├── Pytorch学习笔记.ipynb
├── README.html
├── README.md
├── RP
    └── Recommendation_Systems_Colab.ipynb
├── _config.yml
├── data
    ├── female_text.txt
    └── fig
    │   ├── 437500002015479e1678.gif
    │   ├── coding_life.jpg
    │   └── vim.jpg
├── demo
    ├── SARI
    │   ├── bdd_ncov.ipynb
    │   └── sari.ipynb
    ├── computer.py
    ├── face_recognition
    │   ├── data
    │   │   ├── obama.png
    │   │   ├── test2.jpg
    │   │   └── wqw.jpg
    │   └── face_recog_test.py
    ├── itchat
    │   ├── auto-reply.py
    │   ├── itchat-test.ipynb
    │   ├── itchat.ipynb
    │   ├── itchat.pkl
    │   ├── wechat.xls
    │   └── wechat_robot
    │   │   ├── auto_replay.py
    │   │   ├── auto_tuling.py
    │   │   ├── chatbot.py
    │   │   ├── chatbot_compitition.jpg
    │   │   └── wechat_control.py
    └── skin_detect
    │   ├── data
    │       ├── 0.jpg
    │       └── 1.jpg
    │   ├── detect.py
    │   ├── output
    │       ├── 0_Nude.jpg
    │       └── 1_Normal.jpg
    │   └── readme.ini
├── jupyter
    ├── Scikit-learn学习笔记.ipynb
    ├── TensorFlow_note.ipynb
    ├── analysis.ipynb
    ├── cluster_job.ipynb
    ├── draw.ipynb
    ├── model.ipynb
    ├── scikit-learn-example.ipynb
    ├── test_pandas_numpy.ipynb
    ├── text_process.ipynb
    └── word2vec.ipynb
├── other
    └── figure
    │   ├── mmexport1563449034348.jpg
    │   ├── readme.md
    │   └── wqw.png
├── python
    ├── cws.py
    ├── get_lianjie.py
    ├── mysql_test.py
    ├── nn_test.py
    ├── nn_test_1.py
    ├── nn_test_2.py
    ├── pandas.py
    ├── pylint.py
    ├── python-coding.png
    ├── python入门神图.jpg
    ├── samplt.py
    └── w2v.py
├── qr_code.gif
├── rl
    ├── Reinforcement-learning-with-tensorflow
    │   ├── LICENCE
    │   ├── README.md
    │   ├── RL_cover.jpg
    │   ├── contents
    │   │   ├── 10_A3C
    │   │   │   ├── A3C_RNN.py
    │   │   │   ├── A3C_continuous_action.py
    │   │   │   ├── A3C_discrete_action.py
    │   │   │   └── A3C_distributed_tf.py
    │   │   ├── 11_Dyna_Q
    │   │   │   ├── RL_brain.py
    │   │   │   ├── maze_env.py
    │   │   │   └── run_this.py
    │   │   ├── 12_Proximal_Policy_Optimization
    │   │   │   ├── DPPO.py
    │   │   │   ├── discrete_DPPO.py
    │   │   │   └── simply_PPO.py
    │   │   ├── 1_command_line_reinforcement_learning
    │   │   │   ├── draw.py
    │   │   │   ├── treasure_on_right.py
    │   │   │   └── treasure_on_right_wqw.py
    │   │   ├── 2_Q_Learning_maze
    │   │   │   ├── RL_brain.py
    │   │   │   ├── maze_env.py
    │   │   │   └── run_this.py
    │   │   ├── 3_Sarsa_maze
    │   │   │   ├── RL_brain.py
    │   │   │   ├── maze_env.py
    │   │   │   └── run_this.py
    │   │   ├── 4_Sarsa_lambda_maze
    │   │   │   ├── RL_brain.py
    │   │   │   ├── maze_env.py
    │   │   │   └── run_this.py
    │   │   ├── 5.1_Double_DQN
    │   │   │   ├── RL_brain.py
    │   │   │   └── run_Pendulum.py
    │   │   ├── 5.2_Prioritized_Replay_DQN
    │   │   │   ├── RL_brain.py
    │   │   │   └── run_MountainCar.py
    │   │   ├── 5.3_Dueling_DQN
    │   │   │   ├── RL_brain.py
    │   │   │   └── run_Pendulum.py
    │   │   ├── 5_Deep_Q_Network
    │   │   │   ├── DQN_modified.py
    │   │   │   ├── RL_brain.py
    │   │   │   ├── maze_env.py
    │   │   │   └── run_this.py
    │   │   ├── 6_OpenAI_gym
    │   │   │   ├── RL_brain.py
    │   │   │   ├── run_CartPole.py
    │   │   │   └── run_MountainCar.py
    │   │   ├── 7_Policy_gradient_softmax
    │   │   │   ├── RL_brain.py
    │   │   │   ├── run_CartPole.py
    │   │   │   └── run_MountainCar.py
    │   │   ├── 8_Actor_Critic_Advantage
    │   │   │   ├── AC_CartPole.py
    │   │   │   └── AC_continue_Pendulum.py
    │   │   ├── 9_Deep_Deterministic_Policy_Gradient_DDPG
    │   │   │   ├── DDPG.py
    │   │   │   ├── DDPG_update.py
    │   │   │   └── DDPG_update2.py
    │   │   └── Curiosity_Model
    │   │   │   ├── Curiosity.png
    │   │   │   ├── Curiosity.py
    │   │   │   └── Random_Network_Distillation.py
    │   └── experiments
    │   │   ├── 2D_car
    │   │       ├── DDPG.py
    │   │       ├── car_env.py
    │   │       └── collision.py
    │   │   ├── Robot_arm
    │   │       ├── A3C.py
    │   │       ├── DDPG.py
    │   │       ├── DPPO.py
    │   │       └── arm_env.py
    │   │   ├── Solve_BipedalWalker
    │   │       ├── A3C.py
    │   │       ├── A3C_rnn.py
    │   │       ├── DDPG.py
    │   │       └── log
    │   │       │   └── events.out.tfevents.1490801027.Morvan
    │   │   └── Solve_LunarLander
    │   │       ├── A3C.py
    │   │       ├── DuelingDQNPrioritizedReplay.py
    │   │       └── run_LunarLander.py
    ├── openai
    │   ├── gym-dqn.py
    │   ├── gym_hello.py
    │   └── gym_test.py
    ├── readme.md
    └── ucl
    │   └── test.py
├── web
    ├── Untitled Diagram.drawio
    ├── location_points_cluster_view.html
    ├── pydown
    │   ├── pydown.html
    │   └── pydown_files
    │   │   ├── deck.core.css
    │   │   ├── deck.core.js.下载
    │   │   ├── deck.scale.css
    │   │   ├── deck.scale.js.下载
    │   │   ├── deck.status.css
    │   │   ├── deck.status.js.下载
    │   │   ├── horizontal-slide.css
    │   │   ├── jquery.min.js.下载
    │   │   ├── md_hl.css
    │   │   ├── modernizr.custom.js.下载
    │   │   └── web-2.0.css
    └── timeline
    │   ├── 643.jpg
    │   ├── css
    │       └── about.css
    │   ├── timeline.html
    │   └── tl.html
├── 学习资料汇总
└── 微软-ML算法指南.png


/_config.yml:
--------------------------------------------------------------------------------
1 | theme: jekyll-theme-hacker


--------------------------------------------------------------------------------
/data/fig/437500002015479e1678.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wqw547243068/wangqiwen/07b64ae47d91581e1c339f40bc765fd7815b47ff/data/fig/437500002015479e1678.gif


--------------------------------------------------------------------------------
/data/fig/coding_life.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wqw547243068/wangqiwen/07b64ae47d91581e1c339f40bc765fd7815b47ff/data/fig/coding_life.jpg


--------------------------------------------------------------------------------
/data/fig/vim.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wqw547243068/wangqiwen/07b64ae47d91581e1c339f40bc765fd7815b47ff/data/fig/vim.jpg


--------------------------------------------------------------------------------
/demo/computer.py:
--------------------------------------------------------------------------------
 1 | #python中的import语句是用来导入模块的，在python模块库中有着大量的模块可供使用，要想使用这些文件需要用import语句把指定模块导入到当前程序中。
 2 | import re#处理字符串的模块，如查找特定字符，删除特定字符，字符串分割等
 3 | import tkinter#Tkinter模块("Tk 接口")是Python的标准Tk GUI工具包的接口，位Python的内置模块，直接import tkinter即可使用。
 4 | import tkinter.messagebox#调用tkinter模块中的messagebox函数，这个是消息框，对话框的关键，会弹出一个小框
 5 | 
 6 | #按钮操作，点击按钮后需要做的处理
 7 | def buttonClik(btn):
 8 |     content=contentVar.get()#获取文本框中的内容
 9 |     #如果已有内容是以小数点开头的，在前面加0
10 |     if content.startswith('.'):
11 |         content='0'+content#字符串可以直接用+来增加字符
12 |     #根据不同的按钮作出不同的反应
13 |     if btn in '0123456789':
14 |         content+=btn#0-9中哪个键按下了，就在content字符串中增添
15 |     elif btn=='.':
16 |         #re.split，支持正则及多个字符切割
17 |         lastPart=re.split(r'\+|-|\*|/',content)[-1]#将content从+-*/这些字符的地方分割开来，[-1]表示获取最后一个字符
18 |         if '.'in lastPart:
19 |             tkinter.messagebox.showerror('错误','重复出现的小数点')#出现对话框，并提示信息
20 |             return
21 |         else:
22 |             content+=btn
23 |     elif btn=='C':
24 |         content=''#清除文本框
25 |     elif btn=='=':
26 |         try:
27 |             #对输入的表达式求值
28 |             content=str(eval(content))#调用函数eval，用字符串计算出结果
29 |         except:
30 |             tkinter.messagebox.showerror('错误','表达式有误')
31 |             return
32 |     elif btn in operators:
33 |         if content.endswith(operators):#如果content中最后出现的+-*/
34 |             tkinter.messagebox.showerror('错误','不允许存在连续运算符')
35 |             return
36 |         content+=btn
37 |     elif btn=='Sqrt':
38 |         n=content.split('.')#从.处分割存入n，n是一个列表
39 |         if all(map(lambda x:x.isdigit(),n)):#如果列表中所有的都是数字，就是为了检查表达式是不是正确的
40 |             content=eval(content)**0.5
41 |         else:
42 |             tkinter.messagebox.showerror('错误','表达式错误')
43 |             return
44 |     contentVar.set(content)#将结果显示到文本框中
45 | 
46 | root=tkinter.Tk()#生成主窗口，用root表示，后面就在root操作
47 | #设置窗口大小和位置
48 | root.geometry('300x270+400+100')#指定主框体大小
49 | #不允许改变窗口大小
50 | root.resizable(False,False)#框体大小可调性，分别表示x,y方向的可变性；
51 | #设置窗口标题
52 | root.title('计算器')
53 | 
54 | #文本框和按钮都是tkinter中的组件
55 | #Entry        　　 文本框（单行）；
56 | #Button        　　按钮；
57 | #放置用来显示信息的文本框，设置为只读
58 | #tkinter.StringVar    能自动刷新的字符串变量，可用set和get方法进行传值和取值
59 | contentVar=tkinter.StringVar(root,'')
60 | contentEntry=tkinter.Entry(root,textvariable=contentVar)#括号里面，可见第一个都是root,即表示都是以root为主界面的，将文本框中的内容存在contentVar中
61 | contentEntry['state']='readonly'#文本框只能读，不能写
62 | contentEntry.place(x=10,y=10,width=280,height=20)#文本框在root主界面的xy坐标位置，以及文本框自生的宽和高
63 | #x:        　　　 组件左上角的x坐标；
64 | #y:        　　   组件右上角的y坐标；
65 | #放置清除按钮和等号按钮
66 | btnClear=tkinter.Button(root,text='C',bg = 'red',command=lambda:buttonClik('C'))#在root主界面中放置按钮，按钮上显示C，红色，点击按钮后进入buttonClik回调函数做进一步的处理，注意传入了参数‘C’，这样就能分清是哪个按钮按下了
67 | #下面的内容和上面的模式都是一样的
68 | btnClear.place(x=40,y=40,width=80,height=20)
69 | btnCompute=tkinter.Button(root,text='=',bg = 'yellow',command=lambda :buttonClik('='))
70 | btnCompute.place(x=170,y=40,width=80,height=20)
71 | 
72 | #放置10个数字、小数点和计算平方根的按钮
73 | digits=list('0123456789.')+['Sqrt']#序列list是Python中最基本的数据结构。序列中的每个元素都分配一个数字 - 它的位置，或索引，第一个索引是0，第二个索引是1，依此类推。
74 | index=0
75 | #用循环的方式将上面数字、小数点、平方根这12个按钮分成四行三列进行放置
76 | for row in range(4):
77 |     for col in range(3):
78 |         d=digits[index]#按索引从list中取值，和c语言中的数组类似
79 |         index+=1#索引号递增
80 |         btnDigit=tkinter.Button(root,text=d,command=lambda x=d:buttonClik(x))#和上面的是类似的
81 |         btnDigit.place(x=20+col*70,y=80+row*50,width=50,height=20)#很显然，每次放一个按钮的位置是不一样的，但是它们之间的关系时确定的
82 | #放置运算符按钮
83 | operators=('+','-','*','/','**','//')#Python的元组与列表类似，不同之处在于元组的元素不能修改。
84 | #元组使用小括号，列表使用方括号。
85 | #enumerate() 函数用于将一个可遍历的数据对象(如列表、元组或字符串)组合为一个索引序列，同时列出数据和数据下标，一般用在 for 循环当中。
86 | for index,operator in enumerate(operators):
87 |     btnOperator=tkinter.Button(root,text=operator,bg = 'orange',command=lambda x=operator:buttonClik(x))#创建的过程和上面类似
88 |     btnOperator.place(x=230,y=80+index*30,width=50,height=20)
89 | 
90 | root.mainloop()#进入消息循环（必需组件）
91 | 


--------------------------------------------------------------------------------
/demo/face_recognition/data/obama.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wqw547243068/wangqiwen/07b64ae47d91581e1c339f40bc765fd7815b47ff/demo/face_recognition/data/obama.png


--------------------------------------------------------------------------------
/demo/face_recognition/data/test2.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wqw547243068/wangqiwen/07b64ae47d91581e1c339f40bc765fd7815b47ff/demo/face_recognition/data/test2.jpg


--------------------------------------------------------------------------------
/demo/face_recognition/data/wqw.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wqw547243068/wangqiwen/07b64ae47d91581e1c339f40bc765fd7815b47ff/demo/face_recognition/data/wqw.jpg


--------------------------------------------------------------------------------
/demo/face_recognition/face_recog_test.py:
--------------------------------------------------------------------------------
 1 | import face_recognition
 2 | import cv2
 3 | 
 4 | # This is a super simple (but slow) example of running face recognition on live video from your webcam.
 5 | # There's a second example that's a little more complicated but runs faster.
 6 | 
 7 | # PLEASE NOTE: This example requires OpenCV (the `cv2` library) to be installed only to read from your webcam.
 8 | # OpenCV is *not* required to use the face_recognition library. It's only required if you want to run this
 9 | # specific demo. If you have trouble installing it, try any of the other demos that don't require it instead.
10 | 
11 | # Get a reference to webcam #0 (the default one)
12 | video_capture = cv2.VideoCapture(0)
13 | 
14 | # Load a sample picture and learn how to recognize it.
15 | obama_image = face_recognition.load_image_file("data/obama.png ")
16 | obama_face_encoding = face_recognition.face_encodings(obama_image)[0]
17 | 
18 | while True:
19 |     # Grab a single frame of video
20 |     ret, frame = video_capture.read()
21 | 
22 |     # Find all the faces and face enqcodings in the frame of video
23 |     face_locations = face_recognition.face_locations(frame)
24 |     face_encodings = face_recognition.face_encodings(frame, face_locations)
25 | 
26 |     # Loop through each face in this frame of video
27 |     for (top, right, bottom, left), face_encoding in zip(face_locations, face_encodings):
28 |         # See if the face is a match for the known face(s)
29 |         match = face_recognition.compare_faces([obama_face_encoding], face_encoding)
30 | 
31 |         name = "Unknown"
32 |         if match[0]:
33 |             name = "Barack"
34 | 
35 |         # Draw a box around the face
36 |         cv2.rectangle(frame, (left, top), (right, bottom), (0, 0, 255), 2)
37 | 
38 |         # Draw a label with a name below the face
39 |         cv2.rectangle(frame, (left, bottom - 35), (right, bottom), (0, 0, 255), cv2.FILLED)
40 |         font = cv2.FONT_HERSHEY_DUPLEX
41 |         cv2.putText(frame, name, (left + 6, bottom - 6), font, 1.0, (255, 255, 255), 1)
42 | 
43 |     # Display the resulting image
44 |     cv2.imshow('Video', frame)
45 | 
46 |     # Hit 'q' on the keyboard to quit!
47 |     if cv2.waitKey(1) & 0xFF == ord('q'):
48 |         break
49 | 
50 | # Release handle to the webcam
51 | video_capture.release()
52 | cv2.destroyAllWindows()
53 | 


--------------------------------------------------------------------------------
/demo/itchat/auto-reply.py:
--------------------------------------------------------------------------------
  1 | # -*- coding=utf-8 -*-
  2 | import requests
  3 | import itchat
  4 | import random
  5 | # import sys
  6 | # reload(sys)
  7 | # sys.setdefaultencoding('utf-8')
  8 | 
  9 | import os
 10 | import sys
 11 | import json
 12 | import cv2
 13 | from PIL import ImageGrab
 14 | 
 15 | #原文：https://blog.csdn.net/weixin_37557902/article/details/82740593 
 16 | usageMsg = u"使用方法：\n1.运行CMD命令：cmd xxx (xxx为命令)\n" \
 17 |            u"例如关机命令:\ncmd shutdown -s -t 0 \n" \
 18 |            u"2.获取摄像头并拍照：cap\n" \
 19 |            u"2.获取屏幕截屏：pc\n" \
 20 | 
 21 | KEY = '04f44290d4cf462aae8ac563ea7aac16'
 22 | 
 23 | def get_response(msg):
 24 |     apiUrl = 'http://www.tuling123.com/openapi/api'
 25 |     data = {
 26 |         'key'    : KEY,
 27 |         'info'   : msg,
 28 |         'userid' : 'wechat-robot',
 29 |     }
 30 |     try:
 31 |         r = requests.post(apiUrl, data=data).json()
 32 |         return r.get('text')
 33 |     except:
 34 |         return
 35 | 
 36 | @itchat.msg_register('Text')
 37 | def handler_receive_msg(msg):  # 处理收到的消息
 38 |     message = msg['Text']
 39 |     toName = msg['ToUserName']
 40 |     #owner = msg['User']['PYQuanPin']
 41 |     # 临时保存截屏图片地址
 42 |     #path = 'C:\Users\wqw\Desktop\fig\temp.jpg'
 43 |     path = 'tmp.jpg'
 44 |     reply = json.dumps(msg, ensure_ascii=False)
 45 |     reply = '你发了：%s'%message
 46 |     #{"MsgId": "9212371634710588729", "FromUserName": "@ad238825281702d637159eab5f24f89e", "ToUserName": "@ad238825281702d637159eab5f24f89e", "MsgType": 1, "Content": "你", "Status": 3, "ImgStatus": 1, "CreateTime": 1541315145, "VoiceLength": 0, "PlayLength": 0, "FileName": "", "FileSize": "", "MediaId": "", "Url": "", "AppMsgType": 0, "StatusNotifyCode": 0, "StatusNotifyUserName": "", "RecommendInfo": {"UserName": "", "NickName": "", "QQNum": 0, "Province": "", "City": "", "Content": "", "Signature": "", "Alias": "", "Scene": 0, "VerifyFlag": 0, "AttrStatus": 0, "Sex": 0, "Ticket": "", "OpCode": 0}, "ForwardFlag": 0, "AppInfo": {"AppID": "", "Type": 0}, "HasProductId": 0, "Ticket": "", "ImgHeight": 0, "ImgWidth": 0, "SubMsgType": 0, "NewMsgId": 9212371634710588729, "OriContent": "", "EncryFileName": "", "User": {"MemberList": [], "UserName": "@ad238825281702d637159eab5f24f89e", "City": "海淀", "DisplayName": "", "PYQuanPin": "wangqiwen", "RemarkPYInitial": "", "Province": "北京", "KeyWord": "wqw", "RemarkName": "", "PYInitial": "WQW", "EncryChatRoomId": "", "Alias": "", "Signature": "自律更自由", "NickName": "王奇文", "RemarkPYQuanPin": "", "HeadImgUrl": "/cgi-bin/mmwebwx-bin/webwxgeticon?seq=661826231&username=@ad238825281702d637159eab5f24f89e&skey=@crypt_15c532e6_ba943df756f74fb80686ff7d62c8c677", "UniFriend": 0, "Sex": 1, "AppAccountFlag": 0, "VerifyFlag": 0, "ChatRoomId": 0, "HideInputBarFlag": 0, "AttrStatus": 33656871, "SnsFlag": 17, "MemberCount": 0, "OwnerUin": 0, "ContactFlag": 3, "Uin": 965715160, "StarFriend": 0, "Statues": 0, "WebWxPluginSwitch": 0, "HeadImgFlag": 1, "IsOwner": 0}, "Type": "Text", "Text": "你"}
 47 |     #if toName in ('@ad238825281702d637159eab5f24f89e', "filehelper" ):
 48 |     if toName == "filehelper":
 49 |         if message == "cap":  # 拍照
 50 |             #  要使用摄像头，需要使用cv2.VideoCapture(0)创建VideoCapture对象，
 51 |             # 参数：0指的是摄像头的编号。如果你电脑上有两个摄像头的话，访问第2个摄像头就可以传入1
 52 |             cap = cv2.VideoCapture(0)
 53 |             ret, img = cap.read()  # 获取一帧
 54 |             cv2.imwrite("temp.jpg", img)
 55 |             itchat.send('@img@%s' % u'temp.jpg', toName)
 56 |             cap.release()  # 释放资源
 57 |         if message[0:3] == "cmd":  # 处理cmd命令
 58 |             os.system(message.strip(message[0:4]))
 59 |         if message == "pc":  # 截图
 60 |             im = ImageGrab.grab()  # 实现截屏功能
 61 |             im.save(path, 'JPEG')  # 设置保存路径和图片格式
 62 |             itchat.send_image(path, toName)
 63 |     # 微信表情符对照表：https://www.cnblogs.com/xuange306/p/7098236.html
 64 |     emoji = [')', 'B', 'X', 'Z', 'Q', 'T', 'L', 'g', '|', '<','>', '~', '’(', '$', '!','O', 'P', '+','*']
 65 | 
 66 |     robots = ['/::%s'%(i) for i in emoji]
 67 |     reply = get_response(msg['Text'])+random.choice(robots)
 68 |     res = '收到%s的消息[%s], 回复:[%s]'%(toName, message, reply)
 69 |     print(sys.stderr, res)
 70 |     return reply or defaultReply
 71 | 
 72 | # @itchat.msg_register(itchat.content.TEXT)
 73 | # def tuling_reply(msg):
 74 | #     defaultReply = 'I received: ' + msg['Text']
 75 | #     #robot = ['——By机器人小杨','——By机器人白杨','——By反正不是本人']
 76 | #     robots = ['-^-','^-^','~v~']
 77 | #     reply = get_response(msg['Text'])+random.choice(robots)
 78 | #     return reply or defaultReply
 79 | 
 80 | # 处理群聊消息
 81 | """
 82 | @itchat.msg_register(itchat.content.TEXT, isGroupChat=True)
 83 | def text_reply(msg):
 84 |   if msg['isAt']:
 85 |     itchat.send(u'@%s\u2005I received: %s' % (msg['ActualNickName'], msg))
 86 | """
 87 | # 自动回复
 88 | # 封装好的装饰器，当接收到的消息是Text，即文字消息
 89 | """
 90 | @itchat.msg_register('Text')
 91 | def text_reply(msg):
 92 |     # 当消息不是由自己发出的时候
 93 |     if not msg['FromUserName'] == myUserName:
 94 |         # 发送一条提示给文件助手
 95 |         itchat.send_msg(u"[%s]收到好友@%s 的信息：%s\n" %
 96 |                         (time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(msg['CreateTime'])),
 97 |                          msg['User']['NickName'],
 98 |                          msg['Text']), 'filehelper')
 99 |         # 回复给好友
100 |         return u'[自动回复]您好，我现在有事不在，一会再和您联系。\n已经收到您的的信息：%s\n' % (msg['Text'])
101 | """
102 | 
103 | 
104 | 
105 | if __name__ == '__main__':
106 |     itchat.auto_login()
107 |     # 获取自己的UserName
108 |     #myUserName = itchat.get_friends(update=True)[0]["UserName"]
109 |     owner = "@ad238825281702d637159eab5f24f89e"#filehelper
110 |     itchat.send(usageMsg, owner)#,filehelper
111 |     itchat.run()


--------------------------------------------------------------------------------
/demo/itchat/itchat.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wqw547243068/wangqiwen/07b64ae47d91581e1c339f40bc765fd7815b47ff/demo/itchat/itchat.pkl


--------------------------------------------------------------------------------
/demo/itchat/wechat.xls:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wqw547243068/wangqiwen/07b64ae47d91581e1c339f40bc765fd7815b47ff/demo/itchat/wechat.xls


--------------------------------------------------------------------------------
/demo/itchat/wechat_robot/auto_replay.py:
--------------------------------------------------------------------------------
  1 | # -*- coding:utf-8 -*-
  2 | """
  3 | 	微信机器人，自动回复
  4 | """
  5 | import requests
  6 | import random
  7 | import sys
  8 | import json
  9 | import itchat, time, re
 10 | from itchat.content import *
 11 | 
 12 | 
 13 | @itchat.msg_register([TEXT])
 14 | def text_reply(msg, category = 2):
 15 | 	""" 文本消息回复 """
 16 | 	message = msg['Text']
 17 | 	toName = msg['ToUserName']
 18 |     #owner = msg['User']['PYQuanPin']
 19 |     #reply = json.dumps(msg, ensure_ascii=False)
 20 |     # 微信表情符对照表：https://www.cnblogs.com/xuange306/p/7098236.html
 21 | 	emoji_tag = [')', 'B', 'X', 'Z', 'Q', 'T', 'L', 'g', '|', '<','>', '~', ",(", '$', '!','O', 'P', '+', '*']
 22 | 	emoji_list = ["/::%s"%(i) for i in emoji_tag]
 23 | 	reply = '不知道说啥好了' # 安全回答
 24 | 	if category == 1:
 25 | 		# （1）自动拜年回复
 26 | 	    match = re.search(u'年', msg['Text']).span()
 27 | 	    if match:
 28 | 	    	reply = '谢谢！新春快乐，鸡年大吉，身体健康，万事如意！'
 29 | 	elif category == 2:
 30 | 		# （2）启动图灵机器人
 31 | 		try:
 32 | 			#reply_api = tuling(msg['Text'])
 33 | 			reply_api = get_response(msg['Text'])
 34 | 		except Exception:
 35 | 			print(sys.stderr, '接口故障，请跟进，返回默认答复')
 36 | 			reply_api = reply
 37 | 		# 拼接微信表情符(随机选取)
 38 | 		reply = '%s %s'%(reply_api, random.choice(emoji_list))
 39 | 	else:
 40 | 		print('category取值异常')
 41 | 	# 回复
 42 | 	#itchat.send(reply, msg['FromUserName'])
 43 | 	res = '收到%s的消息[%s], 回复:[%s]'%(toName, message, reply)
 44 | 	print(sys.stderr, res)
 45 | 	return reply
 46 | 
 47 | @itchat.msg_register([PICTURE, RECORDING, VIDEO, SHARING])
 48 | def other_reply(msg):
 49 |     itchat.send((u'新春快乐，鸡年大吉，身体健康，万事如意！'), msg['FromUserName'])
 50 | 
 51 | 
 52 | # 注册文本消息，绑定到text_reply处理函数
 53 | # text_reply msg_files可以处理好友之间的聊天回复
 54 | @itchat.msg_register([MAP,CARD,NOTE,SHARING])
 55 | def text_reply(msg):
 56 |     itchat.send('%s' % tuling(msg['Text']),msg['FromUserName'])
 57 | 
 58 | @itchat.msg_register([PICTURE, RECORDING, ATTACHMENT, VIDEO])
 59 | def download_files(msg):
 60 |     msg['Text'](msg['FileName'])
 61 |     return '@%s@%s' % ({'Picture': 'img', 'Video': 'vid'}.get(msg['Type'], 'fil'), msg['FileName'])
 62 | 
 63 | # # 对于群聊信息，定义获取想要针对某个群进行机器人回复的群ID函数
 64 | # def group_id(name):
 65 | #     df = itchat.search_chatrooms(name=name)
 66 | #     return df[0]['UserName']
 67 | 
 68 | # # 现在微信加了好多群，并不想对所有的群都进行设置微信机器人，只针对想要设置的群进行微信机器人，可进行如下设置
 69 | # @itchat.msg_register(TEXT, isGroupChat=True)
 70 | # def group_text_reply(msg):
 71 | #     # 当然如果只想针对@你的人才回复，可以设置if msg['isAt']: 
 72 | #     item = group_id('未来计划群')  # 根据自己的需求设置
 73 | #     if msg['ToUserName'] == item:
 74 | #         itchat.send(u'%s' % tuling(msg['Text']), item)
 75 | 
 76 | # 调用图灵机器人的api，采用爬虫的原理，根据聊天消息返回回复内容
 77 | def tuling(info):
 78 |     appkey = "e5ccc9c7c8834ec3b08940e290ff1559"
 79 |     url = "http://www.tuling123.com/openapi/api?key=%s&info=%s"%(appkey,info)
 80 |     req = requests.get(url)
 81 |     content = req.text
 82 |     data = json.loads(content)
 83 |     answer = data['text']
 84 |     return answer
 85 | 
 86 | def get_response(msg):
 87 | 	""" 机器人API """
 88 | 	result = '-'
 89 | 	source = '图灵'
 90 | 	# 图灵机器人＞青云客
 91 | 	api_turing = 'http://www.tuling123.com/openapi/api'
 92 | 	appkey = "e5ccc9c7c8834ec3b08940e290ff1559"
 93 | 	api_qingyunke = "http://api.qingyunke.com/api.php"
 94 | 	data = {
 95 |         'key'    : appkey,
 96 |         'info'   : msg,
 97 |         'userid' : 'wechat-robot',
 98 | 	}
 99 | 	try:
100 | 		result = requests.post(api_turing, data=data).json().get('text')
101 | 	except:
102 | 		#青云客机器人 
103 | 		resp = requests.get(api_qingyunke, {'key': 'free', 'appid': 0, 'msg': msg}) 
104 | 		resp.encoding = 'utf8'  
105 | 		result = resp.json()['content']
106 | 		source = '青云客'
107 | 	print('{}\t{:<40}\t{:<40}'.format(source, msg, result))
108 | 	return result
109 | 
110 | 
111 | 
112 | 
113 | if __name__ == '__main__':
114 | 	# 主程序
115 | 	itchat.auto_login(hotReload=True) # 图片二维码
116 | 	#itchat.auto_login(enableCmdQR=0.5,hotReload=True) # 终端二维码
117 | 	itchat.run()


--------------------------------------------------------------------------------
/demo/itchat/wechat_robot/auto_tuling.py:
--------------------------------------------------------------------------------
 1 | ##################### 完整代码##############################
 2 | # 加载库
 3 | from itchat.content import *
 4 | import requests
 5 | import json
 6 | import itchat
 7 | 
 8 | 
 9 | # 调用图灵机器人的api，采用爬虫的原理，根据聊天消息返回回复内容
10 | def tuling(info):
11 |     appkey = "e5ccc9c7c8834ec3b08940e290ff1559"
12 |     url = "http://www.tuling123.com/openapi/api?key=%s&info=%s"%(appkey,info)
13 |     req = requests.get(url)
14 |     content = req.text
15 |     data = json.loads(content)
16 |     answer = data['text']
17 |     return answer
18 | 
19 | # 对于群聊信息，定义获取想要针对某个群进行机器人回复的群ID函数
20 | def group_id(name):
21 |     df = itchat.search_chatrooms(name=name)
22 |     return df[0]['UserName']
23 | 
24 | # 注册文本消息，绑定到text_reply处理函数
25 | # text_reply msg_files可以处理好友之间的聊天回复
26 | @itchat.msg_register([TEXT,MAP,CARD,NOTE,SHARING])
27 | def text_reply(msg):
28 |     itchat.send('%s' % tuling(msg['Text']),msg['FromUserName'])
29 | 
30 | @itchat.msg_register([PICTURE, RECORDING, ATTACHMENT, VIDEO])
31 | def download_files(msg):
32 |     msg['Text'](msg['FileName'])
33 |     return '@%s@%s' % ({'Picture': 'img', 'Video': 'vid'}.get(msg['Type'], 'fil'), msg['FileName'])
34 | 
35 | # 现在微信加了好多群，并不想对所有的群都进行设置微信机器人，只针对想要设置的群进行微信机器人，可进行如下设置
36 | @itchat.msg_register(TEXT, isGroupChat=True)
37 | def group_text_reply(msg):
38 |     # 当然如果只想针对@你的人才回复，可以设置if msg['isAt']: 
39 |     item = group_id(u'想要设置的群的名称')  # 根据自己的需求设置
40 |     if msg['ToUserName'] == item:
41 |         itchat.send(u'%s' % tuling(msg['Text']), item)
42 | 
43 | if __name__ == '__main__':
44 |     appkey = "e5ccc9c7c8834ec3b08940e290ff1559"
45 |     info = '你好!' 
46 |     cnt = 1
47 |     while True:
48 |         info = input("[第%s轮]\t"%(cnt))
49 |         if info.find("exit") != -1 or info.find("quit") != -1 or info.find("退出") != -1:
50 |             print("退出...")
51 |             break
52 |         url = "http://www.tuling123.com/openapi/api?key=%s&info=%s"%(appkey,info)
53 |         req = requests.get(url)
54 |         content = req.text
55 |         data = json.loads(content)
56 |         answer = data['text']
57 |         print("答 复: \t%s"%(answer))
58 |         cnt += 1
59 |     print('='*30)
60 |     print(tuling(u'你好'))
61 |     #itchat.auto_login(hotReload = True)
62 |     #itchat.run()
63 | 


--------------------------------------------------------------------------------
/demo/itchat/wechat_robot/chatbot.py:
--------------------------------------------------------------------------------
 1 | # coding:utf8
 2 | """
 3 |     图灵机器人与青云客机器人聊天对战
 4 |     使用方法：终端执行此脚本，python chatbot.py, 输入一个词启动会话即可
 5 |     [2018-11-4]python初学者好玩案例:https://blog.csdn.net/qq_18495537/article/details/79278710
 6 | """
 7 | from time import sleep  
 8 | import requests
 9 | 
10 | s = input("请主人输入话题：(随便什么词)")
11 | same_max = 10#中止条件，恢复语句中最多多少次相同
12 | same_count = 0
13 | last_resp = '-'
14 | count = 0
15 | print('{}\t{:<40}\t{:<40}\n{}'.format('轮数', '图灵(问)', '青云客(答)', '-'*80))
16 | while True:
17 |     count += 1
18 |     #图灵机器人
19 |     resp = requests.post("http://www.tuling123.com/openapi/api",
20 |         data={"key": "e5ccc9c7c8834ec3b08940e290ff1559", "info": s, })  
21 |     resp = resp.json()
22 |     #print('第{}轮\t图灵：\t{}'.format(count, resp['text']))
23 |     if resp == last_resp:
24 |         same_count += 1
25 |     if same_count > same_max:
26 |         print('这两货把天儿聊死了。。。哈哈哈')
27 |         break
28 |     #青云客机器人
29 |     s = resp['text']  
30 |     resp = requests.get("http://api.qingyunke.com/api.php", {'key': 'free', 'appid': 0, 'msg': s})  
31 |     resp.encoding = 'utf8'  
32 |     resp = resp.json() 
33 |     sleep(1)
34 |     #print('第{}轮\t青云客：\t{}'.format(count, resp['content']))
35 |     print('{}\t{:<40}\t{:<40}'.format(count, s, resp['content']))


--------------------------------------------------------------------------------
/demo/itchat/wechat_robot/chatbot_compitition.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wqw547243068/wangqiwen/07b64ae47d91581e1c339f40bc765fd7815b47ff/demo/itchat/wechat_robot/chatbot_compitition.jpg


--------------------------------------------------------------------------------
/demo/itchat/wechat_robot/wechat_control.py:
--------------------------------------------------------------------------------
  1 | # -*- coding=utf-8 -*-
  2 | """
  3 |     微信助手，可以通过微信远程控制电脑，截屏、摄像头拍照
  4 | """
  5 | 
  6 | import requests
  7 | import itchat
  8 | import random
  9 | # import sys
 10 | # reload(sys)
 11 | # sys.setdefaultencoding('utf-8')
 12 | 
 13 | import os
 14 | import sys
 15 | import json
 16 | import cv2
 17 | from PIL import ImageGrab
 18 | 
 19 | #原文：https://blog.csdn.net/weixin_37557902/article/details/82740593 
 20 | usageMsg = u"使用方法：\n1.运行CMD命令：cmd xxx (xxx为命令)\n" \
 21 |            u"例如关机命令:\ncmd shutdown -s -t 0 \n" \
 22 |            u"2.获取摄像头并拍照：cap\n" \
 23 |            u"2.获取屏幕截屏：pc\n" \
 24 | 
 25 | KEY = '04f44290d4cf462aae8ac563ea7aac16'
 26 | 
 27 | def get_response(msg):
 28 |     apiUrl = 'http://www.tuling123.com/openapi/api'
 29 |     data = {
 30 |         'key'    : KEY,
 31 |         'info'   : msg,
 32 |         'userid' : 'wechat-robot',
 33 |     }
 34 |     try:
 35 |         r = requests.post(apiUrl, data=data).json()
 36 |         return r.get('text')
 37 |     except:
 38 |         return
 39 | 
 40 | @itchat.msg_register('Text')
 41 | def handler_receive_msg(msg):  # 处理收到的消息
 42 |     message = msg['Text']
 43 |     toName = msg['ToUserName']
 44 |     #owner = msg['User']['PYQuanPin']
 45 |     # 临时保存截屏图片地址
 46 |     #path = 'C:\Users\wqw\Desktop\fig\temp.jpg'
 47 |     path = 'tmp.jpg'
 48 |     reply = json.dumps(msg, ensure_ascii=False)
 49 |     reply = '你发了：%s'%message
 50 |     #{"MsgId": "9212371634710588729", "FromUserName": "@ad238825281702d637159eab5f24f89e", "ToUserName": "@ad238825281702d637159eab5f24f89e", "MsgType": 1, "Content": "你", "Status": 3, "ImgStatus": 1, "CreateTime": 1541315145, "VoiceLength": 0, "PlayLength": 0, "FileName": "", "FileSize": "", "MediaId": "", "Url": "", "AppMsgType": 0, "StatusNotifyCode": 0, "StatusNotifyUserName": "", "RecommendInfo": {"UserName": "", "NickName": "", "QQNum": 0, "Province": "", "City": "", "Content": "", "Signature": "", "Alias": "", "Scene": 0, "VerifyFlag": 0, "AttrStatus": 0, "Sex": 0, "Ticket": "", "OpCode": 0}, "ForwardFlag": 0, "AppInfo": {"AppID": "", "Type": 0}, "HasProductId": 0, "Ticket": "", "ImgHeight": 0, "ImgWidth": 0, "SubMsgType": 0, "NewMsgId": 9212371634710588729, "OriContent": "", "EncryFileName": "", "User": {"MemberList": [], "UserName": "@ad238825281702d637159eab5f24f89e", "City": "海淀", "DisplayName": "", "PYQuanPin": "wangqiwen", "RemarkPYInitial": "", "Province": "北京", "KeyWord": "wqw", "RemarkName": "", "PYInitial": "WQW", "EncryChatRoomId": "", "Alias": "", "Signature": "自律更自由", "NickName": "王奇文", "RemarkPYQuanPin": "", "HeadImgUrl": "/cgi-bin/mmwebwx-bin/webwxgeticon?seq=661826231&username=@ad238825281702d637159eab5f24f89e&skey=@crypt_15c532e6_ba943df756f74fb80686ff7d62c8c677", "UniFriend": 0, "Sex": 1, "AppAccountFlag": 0, "VerifyFlag": 0, "ChatRoomId": 0, "HideInputBarFlag": 0, "AttrStatus": 33656871, "SnsFlag": 17, "MemberCount": 0, "OwnerUin": 0, "ContactFlag": 3, "Uin": 965715160, "StarFriend": 0, "Statues": 0, "WebWxPluginSwitch": 0, "HeadImgFlag": 1, "IsOwner": 0}, "Type": "Text", "Text": "你"}
 51 |     #if toName in ('@ad238825281702d637159eab5f24f89e', "filehelper" ):
 52 |     if toName == "filehelper":
 53 |         if message == "cap":  # 拍照
 54 |             #  要使用摄像头，需要使用cv2.VideoCapture(0)创建VideoCapture对象，
 55 |             # 参数：0指的是摄像头的编号。如果你电脑上有两个摄像头的话，访问第2个摄像头就可以传入1
 56 |             cap = cv2.VideoCapture(0)
 57 |             ret, img = cap.read()  # 获取一帧
 58 |             cv2.imwrite("temp.jpg", img)
 59 |             itchat.send('@img@%s' % u'temp.jpg', toName)
 60 |             cap.release()  # 释放资源
 61 |         if message[0:3] == "cmd":  # 处理cmd命令
 62 |             os.system(message.strip(message[0:4]))
 63 |         if message == "pc":  # 截图
 64 |             im = ImageGrab.grab()  # 实现截屏功能
 65 |             im.save(path, 'JPEG')  # 设置保存路径和图片格式
 66 |             itchat.send_image(path, toName)
 67 |     # 微信表情符对照表：https://www.cnblogs.com/xuange306/p/7098236.html
 68 |     emoji = [')', 'B', 'X', 'Z', 'Q', 'T', 'L', 'g', '|', '<','>', '~', '’(', '$', '!','O', 'P', '+','*']
 69 |     robots = ['/::%s'%(i) for i in emoji]
 70 |     reply = get_response(msg['Text'])+random.choice(robots)
 71 |     res = '收到%s的消息[%s], 回复:[%s]'%(toName, message, reply)
 72 |     print(sys.stderr, res)
 73 |     return reply or defaultReply
 74 | 
 75 | # @itchat.msg_register(itchat.content.TEXT)
 76 | # def tuling_reply(msg):
 77 | #     defaultReply = 'I received: ' + msg['Text']
 78 | #     #robot = ['——By机器人小杨','——By机器人白杨','——By反正不是本人']
 79 | #     robots = ['-^-','^-^','~v~']
 80 | #     reply = get_response(msg['Text'])+random.choice(robots)
 81 | #     return reply or defaultReply
 82 | 
 83 | # 处理群聊消息
 84 | """
 85 | @itchat.msg_register(itchat.content.TEXT, isGroupChat=True)
 86 | def text_reply(msg):
 87 |   if msg['isAt']:
 88 |     itchat.send(u'@%s\u2005I received: %s' % (msg['ActualNickName'], msg))
 89 | """
 90 | # 自动回复
 91 | # 封装好的装饰器，当接收到的消息是Text，即文字消息
 92 | """
 93 | @itchat.msg_register('Text')
 94 | def text_reply(msg):
 95 |     # 当消息不是由自己发出的时候
 96 |     if not msg['FromUserName'] == myUserName:
 97 |         # 发送一条提示给文件助手
 98 |         itchat.send_msg(u"[%s]收到好友@%s 的信息：%s\n" %
 99 |                         (time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(msg['CreateTime'])),
100 |                          msg['User']['NickName'],
101 |                          msg['Text']), 'filehelper')
102 |         # 回复给好友
103 |         return u'[自动回复]您好，我现在有事不在，一会再和您联系。\n已经收到您的的信息：%s\n' % (msg['Text'])
104 | """
105 | 
106 | 
107 | 
108 | if __name__ == '__main__':
109 |     itchat.auto_login()
110 |     # 获取自己的UserName
111 |     #myUserName = itchat.get_friends(update=True)[0]["UserName"]
112 |     owner = "@ad238825281702d637159eab5f24f89e"#filehelper
113 |     itchat.send(usageMsg, owner)#,filehelper
114 |     itchat.run()


--------------------------------------------------------------------------------
/demo/skin_detect/data/0.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wqw547243068/wangqiwen/07b64ae47d91581e1c339f40bc765fd7815b47ff/demo/skin_detect/data/0.jpg


--------------------------------------------------------------------------------
/demo/skin_detect/data/1.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wqw547243068/wangqiwen/07b64ae47d91581e1c339f40bc765fd7815b47ff/demo/skin_detect/data/1.jpg


--------------------------------------------------------------------------------
/demo/skin_detect/output/0_Nude.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wqw547243068/wangqiwen/07b64ae47d91581e1c339f40bc765fd7815b47ff/demo/skin_detect/output/0_Nude.jpg


--------------------------------------------------------------------------------
/demo/skin_detect/output/1_Normal.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wqw547243068/wangqiwen/07b64ae47d91581e1c339f40bc765fd7815b47ff/demo/skin_detect/output/1_Normal.jpg


--------------------------------------------------------------------------------
/demo/skin_detect/readme.ini:
--------------------------------------------------------------------------------
1 | #Python 3 色情图片识别,http://blog.csdn.net/mbugatti/article/details/53580575
2 | wget http://labfile.oss.aliyuncs.com/courses/589/nude.py
3 | 


--------------------------------------------------------------------------------
/other/figure/mmexport1563449034348.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wqw547243068/wangqiwen/07b64ae47d91581e1c339f40bc765fd7815b47ff/other/figure/mmexport1563449034348.jpg


--------------------------------------------------------------------------------
/other/figure/readme.md:
--------------------------------------------------------------------------------
1 | # 说明
2 | - 汇总图像资源
3 | 


--------------------------------------------------------------------------------
/other/figure/wqw.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wqw547243068/wangqiwen/07b64ae47d91581e1c339f40bc765fd7815b47ff/other/figure/wqw.png


--------------------------------------------------------------------------------
/python/cws.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/python3
  2 | # Zhang, Kaixu kareyzhang.gmail.com
  3 | import argparse
  4 | import sys
  5 | import json
  6 | import time
  7 | 
  8 | class Weights(dict): # 管理平均感知器的权重
  9 |     def __init__(self):
 10 |         self._step=0
 11 |         self._acc=dict()
 12 |     def update_weights(self,key,delta): # 更新权重
 13 |         if key not in self : self[key]=0
 14 |         self[key]+=delta
 15 |         if key not in self._acc : self._acc[key]=0
 16 |         self._acc[key]+=self._step*delta
 17 |     def average(self): # 平均
 18 |         for k,v in self._acc.items():
 19 |             self[k]=self[k]-self._acc[k]/self._step
 20 |     def save(self,filename):
 21 |         json.dump({k:v for k,v in self.items() if v!=0.0},
 22 |                 open(filename,'w'),
 23 |                 ensure_ascii=False,indent=1)
 24 |     def load(self,filename):
 25 |         self.update(json.load(open(filename)))
 26 | 
 27 | class CWS :
 28 |     def __init__(self):
 29 |         self.weights=Weights()
 30 |     def gen_features(self,x): # 枚举得到每个字的特征向量
 31 |         for i in range(len(x)):
 32 |             left2=x[i-2] if i-2 >=0 else '#'
 33 |             left1=x[i-1] if i-1 >=0 else '#'
 34 |             mid=x[i]
 35 |             right1=x[i+1] if i+1<len(x) else '#'
 36 |             right2=x[i+2] if i+2<len(x) else '#'
 37 |             features=['1'+mid,'2'+left1,'3'+right1,
 38 |                     '4'+left2+left1,'5'+left1+mid,'6'+mid+right1,'7'+right1+right2]
 39 |             yield features
 40 |     def update(self,x,y,delta): # 更新权重
 41 |         for i,features in zip(range(len(x)),self.gen_features(x)):
 42 |             for feature in features :
 43 |                 self.weights.update_weights(str(y[i])+feature,delta)
 44 |         for i in range(len(x)-1):
 45 |             self.weights.update_weights(str(y[i])+':'+str(y[i+1]),delta)
 46 |     def decode(self,x): # 类似隐马模型的动态规划解码算法
 47 |         # 类似隐马模型中的转移概率
 48 |         transitions=[ [self.weights.get(str(i)+':'+str(j),0) for j in range(4)]
 49 |                 for i in range(4) ]
 50 |         # 类似隐马模型中的发射概率
 51 |         emissions=[ [sum(self.weights.get(str(tag)+feature,0) for feature in features) 
 52 |             for tag in range(4) ] for features in self.gen_features(x)]
 53 |         # 类似隐马模型中的前向概率
 54 |         alphas=[[[e,None] for e in emissions[0]]]
 55 |         for i in range(len(x)-1) :
 56 |             alphas.append([max([alphas[i][j][0]+transitions[j][k]+emissions[i+1][k],j]
 57 |                                         for j in range(4))
 58 |                                         for k in range(4)])
 59 |         # 根据alphas中的“指针”得到最优序列
 60 |         alpha=max([alphas[-1][j],j] for j in range(4))
 61 |         i=len(x)
 62 |         tags=[]
 63 |         while i :
 64 |             tags.append(alpha[1])
 65 |             i-=1
 66 |             alpha=alphas[i][alpha[1]]
 67 |         return list(reversed(tags))
 68 | 
 69 | def load_example(words): # 词数组，得到x，y
 70 |     y=[]
 71 |     for word in words :
 72 |         if len(word)==1 : y.append(3)
 73 |         else : y.extend([0]+[1]*(len(word)-2)+[2])
 74 |     return ''.join(words),y
 75 | 
 76 | def dump_example(x,y) : # 根据x，y得到词数组
 77 |     cache=''
 78 |     words=[]
 79 |     for i in range(len(x)) :
 80 |         cache+=x[i]
 81 |         if y[i]==2 or y[i]==3 :
 82 |             words.append(cache)
 83 |             cache=''
 84 |     if cache : words.append(cache)
 85 |     return words
 86 | 
 87 | class Evaluator : # 评价
 88 |     def __init__(self):
 89 |         self.std,self.rst,self.cor=0,0,0
 90 |         self.start_time=time.time()
 91 |     def _gen_set(self,words):
 92 |         offset=0
 93 |         word_set=set()
 94 |         for word in words:
 95 |             word_set.add((offset,word))
 96 |             offset+=len(word)
 97 |         return word_set
 98 |     def __call__(self,std,rst): # 根据答案std和结果rst进行统计
 99 |         std,rst=self._gen_set(std),self._gen_set(rst)
100 |         self.std+=len(std)
101 |         self.rst+=len(rst)
102 |         self.cor+=len(std&rst)
103 |     def report(self):
104 |         precision=self.cor/self.rst if self.rst else 0
105 |         recall=self.cor/self.std if self.std else 0
106 |         f1=2*precision*recall/(precision+recall) if precision+recall!=0 else 0
107 |         print("历时: %.2f秒 答案词数: %i 结果词数: %i 正确词数: %i F值: %.4f"
108 |                 %(time.time()-self.start_time,self.std,self.rst,self.cor,f1))
109 | 
110 | if __name__ == '__main__':
111 |     parser = argparse.ArgumentParser(description='')
112 |     parser.add_argument('--iteration',type=int,default=5, help='')
113 |     parser.add_argument('--train',type=str, help='')
114 |     parser.add_argument('--test',type=str, help='')
115 |     parser.add_argument('--predict',type=str, help='')
116 |     parser.add_argument('--result',type=str, help='')
117 |     parser.add_argument('--model',type=str, help='')
118 |     args = parser.parse_args()
119 |     # 训练
120 |     if args.train: 
121 |         cws=CWS()
122 |         for i in range(args.iteration):
123 |             print('第 %i 次迭代'%(i+1),end=' '),sys.stdout.flush()
124 |             evaluator=Evaluator()
125 |             for l in open(args.train):
126 |                 x,y=load_example(l.split())
127 |                 z=cws.decode(x)
128 |                 evaluator(dump_example(x,y),dump_example(x,z))
129 |                 cws.weights._step+=1
130 |                 if z!=y :
131 |                     cws.update(x,y,1)
132 |                     cws.update(x,z,-1)
133 |             evaluator.report()
134 |         cws.weights.average()
135 |         cws.weights.save(args.model)
136 |     # 使用有正确答案的语料测试
137 |     if args.test : 
138 |         cws=CWS()
139 |         cws.weights.load(args.model)
140 |         evaluator=Evaluator()
141 |         for l in open(args.test) :
142 |             x,y=load_example(l.split())
143 |             z=cws.decode(x)
144 |             evaluator(dump_example(x,y),dump_example(x,z))
145 |         evaluator.report()
146 |     # 对未分词的句子输出分词结果
147 |     if args.model and (not args.train and not args.test) : 
148 |         cws=CWS()
149 |         cws.weights.load(args.model)
150 |         instream=open(args.predict) if args.predict else sys.stdin
151 |         outstream=open(args.result,'w') if args.result else sys.stdout
152 |         for l in instream:
153 |             x,y=load_example(l.split())
154 |             z=cws.decode(x)
155 |             print(' '.join(dump_example(x,z)),file=outstream)
156 | 


--------------------------------------------------------------------------------
/python/get_lianjie.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/python
 2 | # encoding:utf8
 3 | 
 4 | import sys
 5 | import re
 6 | import subprocess
 7 | import json
 8 | import datetime
 9 | import random
10 | 
11 | 
12 | if __name__ == '__main__':
13 |     # 调用外部curl命令抓取页面数据
14 |     # 机器之心-资讯频道-文章主页: http://jiqizhixin.com/article/1734
15 |     pattern_article = re.compile(r'''<div\s+class="sellDetailHeader">.*?
16 |                                         <div\s+class="title">.*?
17 |                                             <h1\s+class="main"\s+title=.*?>(.*?)</h1>.*? # title 主标题
18 |                                             <div\s+class="sub"\s+title=.*?>(.*?)</div>.*? # sub title 子标题
19 |                                         </div>.*?
20 |                                             <div\s+class="btnContainer\s+">.*?
21 |                                                 <div>.*?
22 |                                                     <div\s+class="action">.*?
23 |                                                         <span\s+id="favCount"\s+class="count">(.*?)</span>.*? # 关注人数
24 |                                                     </div>.*?
25 |                                                     <div\s+class="action\s+">.*?
26 |                                                         <span\s+id="cartCount"\s+class="count">(.*?)</span>.*? # 看过人数
27 |                                                     </div>.*?
28 |                                                 </div>.*?
29 |                                             </div>.*?
30 |                                         </div>.*?
31 |                                     </div>.*?
32 |                                     <div\s+class="intro\s+clear".*?
33 |                                     <div\s+class="overview">.*?<div\s+class="content">.*?<div\s+class="price\s+"><span\s+class="total">(.*?)</span>.*? # 总价
34 |                                         <div\s+class="text"><div\s+class="unitPrice"><span\s+class="unitPriceValue">(.*?)<i>元/平米</i></span></div> # 均价,元/平方米
35 |                                         <div\s+class="tax"><span>首付(.*?)</span>税费<span><span\s+id="PanelTax">(.*?)</span>万\(仅供参考\)\s+</span>.*? #首付,税费
36 |                                         <div\s+class="houseInfo">
37 |                                             <div\s+class="room">
38 |                                                 <div\s+class="mainInfo">(.*?)</div> #户型
39 |                                                 <div\s+class="subInfo">(.*?)</div> #楼层
40 |                                             </div>
41 |                                             <div\s+class="type">
42 |                                                 <div\s+class="mainInfo"\s+title=.*?>(.*?)</div> #朝向
43 |                                                 <div\s+class="subInfo">(.*?)</div> #装修
44 |                                             </div>
45 |                                             <div\s+class="area">
46 |                                                 <div\s+class="mainInfo">(.*?)</div> #面积
47 |                                                 <div\s+class="subInfo">(.*?)</div> #建设时间
48 |                                             </div>
49 |                                         </div>
50 |                                         <div\s+class="aroundInfo">
51 |                                             <div\s+class="communityName"><i></i><span\s+class="label">小区名称</span><a\s+href="(.*?)".*?class="info">(.*?)</a><a\s+href=".*?"\s+class="map">地图</a></div> # 小区链接,小区名
52 |                                             <div\s+class="areaName"><i></i><span\s+class="label">所在区域</span><span\s+class="info"><a\s+href="(.*?)".*?>(.*?)</a>.*?<a\s+href="(.*?)".*?>(.*?)</a>(.*?)</span>.*?</div> # 小区所在区域:昌平,北七家,五环到六环
53 |                                             <div\s+class="visitTime"><i></i><span\s+class="label">看房时间</span><span\s+class="info">(.*?)</span></div> # 看房时间
54 |                                             <div\s+class="houseRecord"><span\s+class="label">链家编号</span><span\s+class="info">(.*?)<span\s+class="jubao">.*?</span></span></div> # 看房时间
55 |                                         .*?</div>.*?
56 |                                       ''',re.X|re.S)
57 |     pattern_tag = re.compile(r'''<span\s+class="al-article-tag">(.*?)</span>''',re.X|re.S)
58 |     pattern_para = re.compile(r'''<p\s+.*?>(.*?)</p>''',re.X|re.S)
59 |     # 剔除特殊符号
60 |     #p_single = re.compile(r'<img.*?/>') # 剔除图片信息
61 |     #p_pair = re.compile(r'<(.*?)\s?.*?>(.*?)</\1>') # 剔除外链信息
62 |     p_html = re.compile(r'(<[^>]+>)|(&nbsp;)',re.S)
63 |         
64 |     # 抓取资讯首页
65 |     # http://jiqizhixin.com/edge/p/1
66 |     #["燕城苑南北两居，业主诚心出售，看房方便。", "南北通透两居，视野好，集中供暖！", "35", "2", "345", "40464", "121万 ", "13.8", "2室2厅", "高楼层/共6层", "南 北", "平层/简装", "85.26平米", "1995年建/板楼", "/xiaoqu/1111027381547/", "燕城苑北区", "/ershoufang/changping/", "昌平", "/ershoufang/beiqijia/", "北七家", "&nbsp;五至六环", "有租户需要预约", "101100960378"]
67 |     output_format = ['房源','备注','关注人数','看过人数','总价','均价','首付','税费','户型','楼层','朝向','装修','面积','年代','小区链接','小区名称','区链接','区名','镇链接','镇名','街道','看房','编号']
68 |     print('\t'.join(output_format))
69 |     page_list = ['101100791393','101100960378']
70 |     #for page in page_list:
71 |     for line in file('house_id.txt'):
72 |         page = line.strip().strip('.html')
73 |         #curl_path = 'curl http://bj.lianjia.com/ershoufang/101100960378.html'
74 |         curl_path = 'curl http://bj.lianjia.com/ershoufang/%s.html'%(page)
75 |         content = subprocess.check_output(curl_path,shell=True); #如果命令执行的返回值不为0，则会抛出CalledProcessError的错误
76 |         #print(content)
77 |         result =  re.findall(pattern_article,content)
78 |         output_dict = dict(zip(output_format,result[0]))
79 |         #output =  re.findall(pattern_article,content.decode('utf8'))
80 |         print('\t'.join(result[0]))
81 |         #print(json.dumps(result[0],ensure_ascii=False))
82 |         #print(json.dumps(output_dict,ensure_ascii=False))
83 | 
84 | 
85 | 


--------------------------------------------------------------------------------
/python/mysql_test.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/python2.7
 2 | # coding=utf-8
 3 | #参考<python使用mysql数据库>http://www.cnblogs.com/fnng/p/3565912.html
 4 | '''
 5 | #mysql初始化-shell
 6 | mysql=/usr/local/mysql/bin/mysql
 7 | $mysql -uroot -pwqw  < init.sql
 8 | ------
 9 | $mysql -uroot -p123456 <<EOF  
10 | source /root/temp.sql;  
11 | select current_date();  
12 | delete from tempdb.tb_tmp where id=3;  
13 | select * from tempdb.tb_tmp where id=2;  
14 | EOF 
15 | '''
16 | 
17 | import MySQLdb
18 | import sys
19 |  
20 | host = 'localhost'
21 | user = 'root'
22 | pwd  = 'wqw'   # to be modified.
23 | db   = 'demo'
24 |  
25 |  
26 | if __name__ == '__main__':
27 |     #这只是连接到了数据库，要想操作数据库需要创建游标
28 |     conn = MySQLdb.connect(host, user, pwd, db, charset='utf8');
29 |     try:
30 |         conn.ping()
31 |     except:
32 |         print 'failed to connect MySQL.'
33 |     #创建游标
34 |     cur = conn.cursor()
35 |     print '进入指定数据库'
36 |     out = cur.execute('show tables')
37 |     print cur.fetchmany(out)
38 |     print '连接数据库后直接查已有表'
39 |     #out = cur.execute("select * from student")
40 |     #print cur.fetchmany(out)
41 |     #通过游标cur 操作execute()方法可以写入纯sql语句
42 |     #删除已有表
43 |     cur.execute('drop table student')
44 |     #创建数据表
45 |     print '创建表'
46 |     out = cur.execute("create table if not exists student(id int ,name varchar(20),class varchar(30),age varchar(10),primary key (id))")
47 |     print cur.fetchmany(out)
48 |     #插入一条数据
49 |     cur.execute("insert into student values(1,'Tom','3 year 2 class','9')")
50 |     print '插入一条数据后再查'
51 |     out = cur.execute("select * from student")
52 |     print cur.fetchmany(out)
53 |     #插入数据-变量
54 |     sqli="insert into student values(%s,%s,%s,%s)"
55 |     cur.execute(sqli,(2,'Huhu','2 year 1 class','7'))
56 |     cur.execute("select * from student")
57 |     #插入数据-批量
58 |     sqli="insert into student values(%s,%s,%s,%s)"
59 |     cur.executemany(sqli,[
60 |         (3,'Tom','1 year 1 class','6'),
61 |         (4,'Jack','2 year 1 class','7'),
62 |         (5,'Yaheng','2 year 2 class','7')
63 |         ])
64 |     print '批量插入后再查'
65 |     cur.execute("select * from student")
66 |     #修改查询条件的数据
67 |     cur.execute("update student set class='3 year 1 class' where name = 'Tom'")
68 |     #删除查询条件的数据
69 |     cur.execute("delete from student where age='9'")
70 |     #查询数据
71 |     sql = "select * from student where id = 2"
72 |     out = cur.execute(sql)
73 |     #获取结果数据-一条一条
74 |     row = cur.fetchone()
75 |     print '结果：',row
76 |     for i in row:
77 |         print i
78 |     #获取结果数据-批量
79 |     #打印表中的多少数据
80 |     info = cur.fetchmany(out)
81 |     for ii in info:
82 |         print ii
83 |     #关闭游标
84 |     cur.close()
85 |     #提交事物，在向数据库插入一条数据时必须要有这个方法，否则数据不会被真正的插入。
86 |     conn.commit()
87 |     #关闭连接
88 |     conn.close()
89 |     sys.exit()
90 | # */* vim: set expandtab ts=4 sw=4 sts=4 tw=400: */
91 | 


--------------------------------------------------------------------------------
/python/nn_test.py:
--------------------------------------------------------------------------------
 1 | # coding:utf8
 2 | # How to build your own Neural Network from scratch in Python
 3 | # https://towardsdatascience.com/how-to-build-your-own-neural-network-from-scratch-in-python-68998a08e4f6
 4 | 
 5 | import numpy as np
 6 | 
 7 | def sigmoid(x):
 8 |     return 1.0/(1+ np.exp(-x))
 9 | 
10 | def sigmoid_derivative(x):
11 |     return x * (1.0 - x)
12 | 
13 | class NeuralNetwork:
14 |     def __init__(self, x, y):
15 |         self.input      = x
16 |         self.weights1   = np.random.rand(self.input.shape[1],4) 
17 |         self.weights2   = np.random.rand(4,1)                 
18 |         self.y          = y
19 |         self.output     = np.zeros(self.y.shape)
20 | 
21 |     def feedforward(self):
22 |         self.layer1 = sigmoid(np.dot(self.input, self.weights1))
23 |         self.output = sigmoid(np.dot(self.layer1, self.weights2))
24 | 
25 |     def backprop(self):
26 |         # application of the chain rule to find derivative of the loss function with respect to weights2 and weights1
27 |         d_weights2 = np.dot(self.layer1.T, (2*(self.y - self.output) * sigmoid_derivative(self.output)))
28 |         d_weights1 = np.dot(self.input.T,  (np.dot(2*(self.y - self.output) * sigmoid_derivative(self.output), self.weights2.T) * sigmoid_derivative(self.layer1)))
29 | 
30 |         # update the weights with the derivative (slope) of the loss function
31 |         self.weights1 += d_weights1
32 |         self.weights2 += d_weights2
33 | 
34 | 
35 | if __name__ == "__main__":
36 |     X = np.array([[0,0,1],
37 |                   [0,1,1],
38 |                   [1,0,1],
39 |                   [1,1,1]])
40 |     y = np.array([[0],[1],[1],[0]])
41 |     nn = NeuralNetwork(X,y)
42 | 
43 |     for i in range(1500):
44 |         nn.feedforward()
45 |         nn.backprop()
46 | 
47 |     print(nn.output)
48 | 
49 | 


--------------------------------------------------------------------------------
/python/nn_test_1.py:
--------------------------------------------------------------------------------
  1 | #BP神经网络与Python实现:https://www.cnblogs.com/Finley/p/5946000.html
  2 | import math
  3 | import random
  4 | 
  5 | random.seed(0)
  6 | def rand(a, b):
  7 |     return (b - a) * random.random() + a
  8 | 
  9 | def make_matrix(m, n, fill=0.0):
 10 |     mat = []
 11 |     for i in range(m):
 12 |         mat.append([fill] * n)
 13 |     return mat
 14 | 
 15 | def sigmoid(x):
 16 |     return 1.0 / (1.0 + math.exp(-x))
 17 | 
 18 | def sigmoid_derivative(x):
 19 |     return x * (1 - x)
 20 | 
 21 | class BPNeuralNetwork:
 22 |     def __init__(self):
 23 |         self.input_n = 0
 24 |         self.hidden_n = 0
 25 |         self.output_n = 0
 26 |         self.input_cells = []
 27 |         self.hidden_cells = []
 28 |         self.output_cells = []
 29 |         self.input_weights = []
 30 |         self.output_weights = []
 31 |         self.input_correction = []
 32 |         self.output_correction = []
 33 | 
 34 |     def setup(self, ni, nh, no):
 35 |         self.input_n = ni + 1
 36 |         self.hidden_n = nh
 37 |         self.output_n = no
 38 |         # init cells
 39 |         self.input_cells = [1.0] * self.input_n
 40 |         self.hidden_cells = [1.0] * self.hidden_n
 41 |         self.output_cells = [1.0] * self.output_n
 42 |         # init weights
 43 |         self.input_weights = make_matrix(self.input_n, self.hidden_n)
 44 |         self.output_weights = make_matrix(self.hidden_n, self.output_n)
 45 |         # random activate
 46 |         for i in range(self.input_n):
 47 |             for h in range(self.hidden_n):
 48 |                 self.input_weights[i][h] = rand(-0.2, 0.2)
 49 |         for h in range(self.hidden_n):
 50 |             for o in range(self.output_n):
 51 |                 self.output_weights[h][o] = rand(-2.0, 2.0)
 52 |         # init correction matrix
 53 |         self.input_correction = make_matrix(self.input_n, self.hidden_n)
 54 |         self.output_correction = make_matrix(self.hidden_n, self.output_n)
 55 | 
 56 |     def predict(self, inputs):
 57 |         # activate input layer
 58 |         for i in range(self.input_n - 1):
 59 |             self.input_cells[i] = inputs[i]#输入层输出值
 60 |         # activate hidden layer
 61 |         for j in range(self.hidden_n):
 62 |             total = 0.0
 63 |             for i in range(self.input_n):
 64 |                 total += self.input_cells[i] * self.input_weights[i][j]#隐藏层输入值
 65 |             self.hidden_cells[j] = sigmoid(total)#隐藏层的输出值
 66 |         # activate output layer
 67 |         for k in range(self.output_n):
 68 |             total = 0.0
 69 |             for j in range(self.hidden_n):
 70 |                 total += self.hidden_cells[j] * self.output_weights[j][k]
 71 |                 #-----------------------------------------------
 72 |             # self.output_cells[k] = sigmoid(total)
 73 |             self.output_cells[k] =total#输出层的激励函数是f(x)=x
 74 |  #-----------------------------------------------
 75 |         return self.output_cells[:]
 76 | 
 77 |     def back_propagate(self, case, label, learn, correct):#x,y,修改最大迭代次数， 学习率λ， 矫正率μ三个参数.
 78 |         # feed forward
 79 |         self.predict(case)
 80 |         # get output layer error
 81 |         output_deltas = [0.0] * self.output_n
 82 |         for o in range(self.output_n):
 83 |             error = label[o] - self.output_cells[o]
 84 |             #-----------------------------------------------
 85 |             # output_deltas[o] = sigmoid_derivative(self.output_cells[o]) * error
 86 |             output_deltas[o] = error
 87 | #-----------------------------------------------
 88 |         # get hidden layer error
 89 |         hidden_deltas = [0.0] * self.hidden_n
 90 |         for h in range(self.hidden_n):
 91 |             error = 0.0
 92 |             for o in range(self.output_n):
 93 |                 error += output_deltas[o] * self.output_weights[h][o]
 94 |             hidden_deltas[h] = sigmoid_derivative(self.hidden_cells[h]) * error
 95 | 
 96 |         # update output weights
 97 |         for h in range(self.hidden_n):
 98 |             for o in range(self.output_n):
 99 |                 change = output_deltas[o] * self.hidden_cells[h]
100 |                 self.output_weights[h][o] += learn * change + correct * self.output_correction[h][o]#？？？？？？？？？？
101 |                 self.output_correction[h][o] = change
102 | 
103 |         # update input weights
104 |         for i in range(self.input_n):
105 |             for h in range(self.hidden_n):
106 |                 change = hidden_deltas[h] * self.input_cells[i]
107 |                 self.input_weights[i][h] += learn * change + correct * self.input_correction[i][h]
108 |                 self.input_correction[i][h] = change
109 |         # get global error
110 |         error = 0.0
111 |         for o in range(len(label)):
112 |             error += 0.5 * (label[o] - self.output_cells[o]) ** 2
113 |         return error
114 | 
115 |     def train(self, cases, labels, limit=10000, learn=0.05, correct=0.1):
116 |         for j in range(limit):
117 |             error = 0.0
118 |             for i in range(len(cases)):
119 |                 label = labels[i]
120 |                 case = cases[i]
121 |                 error += self.back_propagate(case, label, learn, correct)
122 | 
123 |     def test(self):
124 |         cases = [
125 |             [1, 2],
126 |             [4, 4.5],
127 |             [1, 0],
128 |             [1, 1],
129 |         ]
130 |         labels = [[3], [9], [1], [2]]
131 |         self.setup(2, 5, 1)
132 |         self.train(cases, labels, 10000, 0.05, 0.1)
133 |         for case in cases:
134 |             print(self.predict(case))
135 | 
136 | if __name__ == '__main__':
137 |     nn = BPNeuralNetwork()
138 |     nn.test()
139 | 


--------------------------------------------------------------------------------
/python/pandas.py:
--------------------------------------------------------------------------------
 1 | # pandas读取excel数据示例
 2 | # 【2016-7-30】 参考：十分钟搞定pandas：http://www.cnblogs.com/chaosimple/p/4153083.html
 3 | import pandas as pd
 4 | import numpy as np
 5 | 
 6 | # 读取数据 D:\work\用户建模画像\家公司挖掘\code\warren.xls
 7 | # 数据格式：time
 8 | print('start')
 9 | df = pd.read_excel('C:\Users\warren\Desktop\warren.xlsx',index='time')
10 | # df = pandas.read_excel(open('your_xls_xlsx_filename','rb'), sheetname='Sheet 1')
11 | #df.index # 行序号
12 | df.columns # 列名
13 | #df['lon'],df['lat'],df[:30] # 按照列名读取数据
14 | #df.ix[:30,:3] # 使用ix、loc或者iloc(按照下标组合)进行行列双向读取，即切片操作
15 | #df.ix[:20,['lon','lat']] # 跨属性组合选取
16 | df.loc[:100,['time','lon','lat']] # 同上
17 | #new = df.iloc[:20,[1,2]]
18 | #new.describe # 基本统计信息
19 | #type(new)
20 | #df[df.lon>117] # 按照数值过滤筛选
21 | #df[df.time<'2016-07-20']
22 | #new.values.tolist() # DataFrame转成list结构
23 | #df.sort(columns='time') # 排序
24 | 


--------------------------------------------------------------------------------
/python/pylint.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # coding:utf8
 3 | """
 4 |     test sample. google编码规范：(URL不受80字符限制)
 5 |     https://zh-google-styleguide.readthedocs.io/en/latest/google-python-styleguide/python_style_rules/
 6 |     2017-11-24
 7 |     wangqiwen@didichuxing.com
 8 | """
 9 | #import的包一定要使用;import包分成3部分，依次排序：①系统包②第三方包③自定义包。每部分按照字母顺序排序，一次不能导入多个包
10 | import sys
11 | 
12 | class MyClass(object):
13 |     """class测试: 类名满足Pascal风格"""
14 |     public_name = '-public-' # public
15 |     _myname = '-protected' # protected
16 |     __private_name = '-private-' # private
17 | 
18 |     def __init__(self, name="wang"):
19 |         self._myname = name
20 |         print '我的名字是%s'%(self._myname)
21 | 
22 |     def say(self):
23 |         """打招呼"""
24 |         print '你好,我是%s,%s,%s'%(self._myname, self.public_name, self.__private_name)
25 |         return 'yes'
26 | 
27 |     def modify(self, name="-"):
28 |         """更改属性值"""
29 |         self._myname = name
30 | 
31 | def my_fun(value=0, delta=9):
32 |     """
33 |         外部函数：名字_连接。多参数时,逗号后面加一个空格
34 |     """
35 |     res = value + delta
36 |     return res
37 | 
38 | def main():
39 |     """main function"""
40 |     #main里的都是全局变量,需要大写
41 |     value = 3
42 |     new = my_fun(value)
43 |     v_result = MyClass("wqw")
44 |     #不能访问protected、private变量.W._myname, W.__private_name
45 |     #超过80字符时，可以用\换行，注：(),[]时可省略\
46 |     print >> sys.stdout, 'hello,related values are listed as : %s , %s,I am \
47 |         %s,%s ...'%(value, new, v_result.say(), v_result.public_name)
48 |     print >> sys.stdout, 'hello,related values are listed as : %s , %s,I am %s,%s ...'%(value, new, v_result.say(), v_result.public_name) # pylint: disable=line-too-long
49 |     #参考:How do I disable a Pylint warning?
50 |     #https://stackoverflow.com/questions/4341746/how-do-i-disable-a-pylint-warning
51 | 
52 | if __name__ == '__main__':
53 |     A = 3 # 此处为全局变量,一律大写
54 |     main()
55 | 
56 | # */* vim: set expandtab ts=4 sw=4 sts=4 tw=400: */
57 | 


--------------------------------------------------------------------------------
/python/python-coding.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wqw547243068/wangqiwen/07b64ae47d91581e1c339f40bc765fd7815b47ff/python/python-coding.png


--------------------------------------------------------------------------------
/python/python入门神图.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wqw547243068/wangqiwen/07b64ae47d91581e1c339f40bc765fd7815b47ff/python/python入门神图.jpg


--------------------------------------------------------------------------------
/python/samplt.py:
--------------------------------------------------------------------------------
 1 | #https://bitbucket.org/gastlygem/documents/src/e4749bcf2e73/sample.py
 2 | # -*- coding: utf-8 -*-
 3 | # 给 Python 初学者的超快速脚本解说
 4 |  
 5 | import os
 6 |  
 7 | def main():
 8 |     print '你好, 世界!'
 9 |     print "单引号，双引号，其实是一码事"
10 |     print '字符串内的引号需被转义(如 O\'Neil)'
11 |     print "换个不同的引号就无需转义了(看 O'Neil)"
12 |      
13 |     print """三引号（亦可以是三个单引号）可以安全地处理单双引号混用，例如：
14 |     O'Neil 说: "姚明太瘦。"
15 |     姚明说: "O'Neil 太老。"
16 | 而且还能跨行，跨行后的格式也能被保留。
17 | """
18 |      
19 |     print '=' * 10
20 |     print '这将直接执行', os.getcwd()
21 |      
22 |     add(5, 10)
23 |  
24 |     counter = 0
25 |     counter += 1
26 |  
27 |     food = ['苹果', '杏子', '李子', '梨']
28 |     for i in food:
29 |         print '俺就爱整只: %s' % i
30 |  
31 |     print '从0数到9'
32 |     for i in range(10):
33 |         print i
34 |  
35 | def add(param1, param2):
36 |     """做了点加法.
37 | 喔，其实还胡乱判断了一气。
38 |     """
39 |     # 这也是一个注释。
40 |     res = param1 + param2
41 |     print '%s + %s = %s' %(param1, param2, res)
42 |      
43 |     if res < 50:
44 |         print '这个这个'
45 |     elif res >= 50 and (param1 == 42 or param2 == 24):
46 |         print '那个那个'
47 |     else:
48 |         print '嗯哼...'
49 |          
50 |     return res      # 注释还可以像这样直接跟在一句代码的后面
51 |  
52 | if __name__ == '__main__':
53 |     main()
54 | 


--------------------------------------------------------------------------------
/python/w2v.py:
--------------------------------------------------------------------------------
 1 | # 参考地址：http://www.52nlp.cn/%E4%B8%AD%E8%8B%B1%E6%96%87%E7%BB%B4%E5%9F%BA%E7%99%BE%E7%A7%91%E8%AF%AD%E6%96%99%E4%B8%8A%E7%9A%84word2vec%E5%AE%9E%E9%AA%8C
 2 | import logging  
 3 | import os  
 4 | import time  
 5 |   
 6 | import gensim  
 7 | from gensim.models import word2vec  
 8 | import jieba  
 9 | #import nltk 
10 | import json
11 |   
12 | #a=jieba.cut(str,cut_all=False)
13 | #print '/'.join(a)
14 |   
15 | logging.basicConfig(format='%(asctime)s:%(levelname)s:%(message)s',level=logging.INFO)    
16 | start1 = time.clock()   
17 | input_file_name = u'E:/百度云/IT技术_new/编程语言/python/demo/word/result.txt' # 原始文件Unicode编码
18 | input_file_f = open(input_file_name,'r')  
19 | #contents = input_file_f.read() # 整个文件读到一个变量里
20 | print '读取文件耗时：',time.clock()
21 | #sentences = [i.strip().split(" ") for  i in contents[:10]]
22 | sentences = []
23 | print '转换后:\n','|'.join(['&'.join(i) for i in sentences])
24 | # 开始逐行处理
25 | for line in input_file_f.readlines(): 
26 |     #按行读取 
27 |     sentences.append(line.strip().split(" "))
28 | #print '行数:%s,内容:\n'%(len(sentences)),json.dumps(sentences,ensure_ascii=False)
29 | #sentences是句子序列，句子又是单词列表，比如，sentences = [['first', 'sentence'], ['second', 'sentence']]
30 | model = word2vec.Word2Vec(sentences,min_count=2,size=200) #min_count表示小于该数的单词会被剔除，默认值为5;size表示神经网络的隐藏层单元数，默认为100
31 | #保存生成的训练模型
32 | output_model = u'E:/百度云/IT技术_new/编程语言/python/demo/word/model'
33 | model.save(output_model)#加载模型文件new_model = gensim.models.Word2Vec.load('model/mymodel4')
34 | #=================
35 | #加载模型文件
36 | new_model = gensim.models.Word2Vec.load(output_model)
37 | dir(new_model) # 多种函数方法,
38 | print new_model.vector_size # 词向量维度
39 | print ','.join(new_model.index2word) # index2word保存单词
40 | # 计算指定词的所以相似词
41 | test_word = '经理'
42 | similar_word_list = new_model.most_similar(test_word)
43 | print json.dumps(similar_word_list,ensure_ascii=False)
44 | #print json.dumps(similar_word_list,ensure_ascii=False,indent=4)
45 | # 抽取北京的搜索session：select query_list from user_satisfy_query where dt=20160918 and province rlike '^010' and count > 1;
46 | #print json.dumps(new_model.most_similar(u'天安门'),ensure_ascii=False)
47 | #In [76]: print json.dumps(new_model.most_similar(u'旅店'),ensure_ascii=False)
48 | #[["莫泰", 0.8472937345504761], ["易佰", 0.8139138221740723], ["168", 0.7009128928184509], ["连锁", 0.6979336738586426], ["旅馆", 0.6874777674674988], ["旺子成", 0.6520262360572815], ["快捷", 0.6426747441291809], ["家庭旅馆", 0.6317397356033325], ["人在旅途", 0.6164605021476746], ["寺易佰", 0.6112728714942932]]
49 | #In [77]: print json.dumps(new_model.most_similar(u'菜馆'),ensure_ascii=False)
50 | #[["家常菜", 0.8295753598213196], ["风味", 0.8144116401672363], ["正宗", 0.8008058071136475], ["菜", 0.787124514579773], ["饺子馆", 0.7830443382263184], ["刀削面", 0.7752013802528381], ["特色", 0.7629570364952087], ["面馆", 0.7591361403465271], ["面", 0.7421250939369202], ["农家菜", 0.7410575747489929]]
51 | #In [158]: print json.dumps(new_model.most_similar(u'软件园'),ensure_ascii=False)  
52 | #[["用友", 0.7017531991004944], ["金蝶", 0.6142528057098389], ["孵化器", 0.5947192907333374], ["网易", 0.5910834074020386], ["f11", 0.584527850151062], ["软件", 0.5816747546195984], ["租贷", 0.5489269495010376], ["卵", 0.5268262624740601], ["鲜花网", 0.5116425156593323], ["广联达", 0.507921576499939]]
53 | #In [171]: print json.dumps(new_model.most_similar(u'美食'),ensure_ascii=False)
54 | #[["中餐", 0.8337364196777344], ["川菜", 0.7456749677658081], ["快餐", 0.7315336465835571], ["西餐", 0.6596412658691406], ["自助餐", 0.6401817202568054], ["老姬", 0.6020432710647583], ["日本料理", 0.5849108099937439], ["合利屋", 0.5827316045761108], ["nokia", 0.5804284811019897], ["早点", 0.5785887241363525]]
55 | #In [176]: print json.dumps(new_model.most_similar(u'麦当劳'),ensure_ascii=False)
56 | #[["肯德基", 0.857654869556427], ["肯德鸡", 0.6457746028900146], ["KFC", 0.6434839963912964], ["kfc", 0.6308714151382446], ["街鼎", 0.6141167283058167], ["FSDT", 0.589178204536438], ["康得基", 0.5770742893218994], ["得来", 0.5747169852256775], ["十佛营", 0.5702893137931824], ["必胜客", 0.5698955655097961]]
57 | print '（1）找某个词的相似词汇如下:\n词汇\t相似度\n','\n'.join(['%s\t%s'%(i[0],i[1]) for i in similar_word_list])
58 | # 计算任意两个词的相似度
59 | word_1 = '经理';word_2 = '数据'
60 | print '（2）任意两个词汇的相似度(%s与%s)'%(word_1,word_2),new_model.similarity(word_1,word_2)
61 | word_set_1 = ['经理','效率'];word_set_2 = ['数据','流程','重复']
62 | print '（3）两个数据集间的余弦距离(%s)与(%s)：'%(json.dumps(word_set_1,ensure_ascii=False),json.dumps(word_set_1,ensure_ascii=False)),new_model.n_similarity(word_set_1, word_set_2) 
63 | print '（4）找集合中不同的一项：(%s)'%(json.dumps(word_set_2,ensure_ascii=False)),new_model.doesnt_match(word_set_2)
64 | # 独特的组合加减法
65 | print json.dumps(new_model.most_similar(positive=[u'麦当劳'],negative=[u'肯德基',u'真功夫']),ensure_ascii=False)
66 | 


--------------------------------------------------------------------------------
/qr_code.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wqw547243068/wangqiwen/07b64ae47d91581e1c339f40bc765fd7815b47ff/qr_code.gif


--------------------------------------------------------------------------------
/rl/Reinforcement-learning-with-tensorflow/LICENCE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2017
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.


--------------------------------------------------------------------------------
/rl/Reinforcement-learning-with-tensorflow/RL_cover.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wqw547243068/wangqiwen/07b64ae47d91581e1c339f40bc765fd7815b47ff/rl/Reinforcement-learning-with-tensorflow/RL_cover.jpg


--------------------------------------------------------------------------------
/rl/Reinforcement-learning-with-tensorflow/contents/11_Dyna_Q/RL_brain.py:
--------------------------------------------------------------------------------
 1 | """
 2 | This part of code is the Dyna-Q learning brain, which is a brain of the agent.
 3 | All decisions and learning processes are made in here.
 4 | 
 5 | View more on my tutorial page: https://morvanzhou.github.io/tutorials/
 6 | """
 7 | 
 8 | import numpy as np
 9 | import pandas as pd
10 | 
11 | 
12 | class QLearningTable:
13 |     def __init__(self, actions, learning_rate=0.01, reward_decay=0.9, e_greedy=0.9):
14 |         self.actions = actions  # a list
15 |         self.lr = learning_rate
16 |         self.gamma = reward_decay
17 |         self.epsilon = e_greedy
18 |         self.q_table = pd.DataFrame(columns=self.actions)
19 | 
20 |     def choose_action(self, observation):
21 |         self.check_state_exist(observation)
22 |         # action selection
23 |         if np.random.uniform() < self.epsilon:
24 |             # choose best action
25 |             state_action = self.q_table.ix[observation, :]
26 |             state_action = state_action.reindex(np.random.permutation(state_action.index))     # some actions have same value
27 |             action = state_action.argmax()
28 |         else:
29 |             # choose random action
30 |             action = np.random.choice(self.actions)
31 |         return action
32 | 
33 |     def learn(self, s, a, r, s_):
34 |         self.check_state_exist(s_)
35 |         q_predict = self.q_table.ix[s, a]
36 |         if s_ != 'terminal':
37 |             q_target = r + self.gamma * self.q_table.ix[s_, :].max()  # next state is not terminal
38 |         else:
39 |             q_target = r  # next state is terminal
40 |         self.q_table.ix[s, a] += self.lr * (q_target - q_predict)  # update
41 | 
42 |     def check_state_exist(self, state):
43 |         if state not in self.q_table.index:
44 |             # append new state to q table
45 |             self.q_table = self.q_table.append(
46 |                 pd.Series(
47 |                     [0]*len(self.actions),
48 |                     index=self.q_table.columns,
49 |                     name=state,
50 |                 )
51 |             )
52 | 
53 | 
54 | class EnvModel:
55 |     """Similar to the memory buffer in DQN, you can store past experiences in here.
56 |     Alternatively, the model can generate next state and reward signal accurately."""
57 |     def __init__(self, actions):
58 |         # the simplest case is to think about the model is a memory which has all past transition information
59 |         self.actions = actions
60 |         self.database = pd.DataFrame(columns=actions, dtype=np.object)
61 | 
62 |     def store_transition(self, s, a, r, s_):
63 |         if s not in self.database.index:
64 |             self.database = self.database.append(
65 |                 pd.Series(
66 |                     [None] * len(self.actions),
67 |                     index=self.database.columns,
68 |                     name=s,
69 |                 ))
70 |         self.database.set_value(s, a, (r, s_))
71 | 
72 |     def sample_s_a(self):
73 |         s = np.random.choice(self.database.index)
74 |         a = np.random.choice(self.database.ix[s].dropna().index)    # filter out the None value
75 |         return s, a
76 | 
77 |     def get_r_s_(self, s, a):
78 |         r, s_ = self.database.ix[s, a]
79 |         return r, s_
80 | 


--------------------------------------------------------------------------------
/rl/Reinforcement-learning-with-tensorflow/contents/11_Dyna_Q/maze_env.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Reinforcement learning maze example.
  3 | 
  4 | Red rectangle:          explorer.
  5 | Black rectangles:       hells       [reward = -1].
  6 | Yellow bin circle:      paradise    [reward = +1].
  7 | All other states:       ground      [reward = 0].
  8 | 
  9 | This script is the environment part of this example. The RL is in RL_brain.py.
 10 | 
 11 | View more on my tutorial page: https://morvanzhou.github.io/tutorials/
 12 | """
 13 | 
 14 | 
 15 | import numpy as np
 16 | np.random.seed(1)
 17 | import tkinter as tk
 18 | import time
 19 | 
 20 | 
 21 | UNIT = 40   # pixels
 22 | MAZE_H = 4  # grid height
 23 | MAZE_W = 4  # grid width
 24 | 
 25 | 
 26 | class Maze(tk.Tk, object):
 27 |     def __init__(self):
 28 |         super(Maze, self).__init__()
 29 |         self.action_space = ['u', 'd', 'l', 'r']
 30 |         self.n_actions = len(self.action_space)
 31 |         self.title('maze')
 32 |         self.geometry('{0}x{1}'.format(MAZE_H * UNIT, MAZE_H * UNIT))
 33 |         self._build_maze()
 34 | 
 35 |     def _build_maze(self):
 36 |         self.canvas = tk.Canvas(self, bg='white',
 37 |                            height=MAZE_H * UNIT,
 38 |                            width=MAZE_W * UNIT)
 39 | 
 40 |         # create grids
 41 |         for c in range(0, MAZE_W * UNIT, UNIT):
 42 |             x0, y0, x1, y1 = c, 0, c, MAZE_H * UNIT
 43 |             self.canvas.create_line(x0, y0, x1, y1)
 44 |         for r in range(0, MAZE_H * UNIT, UNIT):
 45 |             x0, y0, x1, y1 = 0, r, MAZE_H * UNIT, r
 46 |             self.canvas.create_line(x0, y0, x1, y1)
 47 | 
 48 |         # create origin
 49 |         origin = np.array([20, 20])
 50 | 
 51 |         # hell
 52 |         hell1_center = origin + np.array([UNIT * 2, UNIT])
 53 |         self.hell1 = self.canvas.create_rectangle(
 54 |             hell1_center[0] - 15, hell1_center[1] - 15,
 55 |             hell1_center[0] + 15, hell1_center[1] + 15,
 56 |             fill='black')
 57 |         # hell
 58 |         hell2_center = origin + np.array([UNIT, UNIT * 2])
 59 |         self.hell2 = self.canvas.create_rectangle(
 60 |             hell2_center[0] - 15, hell2_center[1] - 15,
 61 |             hell2_center[0] + 15, hell2_center[1] + 15,
 62 |             fill='black')
 63 | 
 64 |         # create oval
 65 |         oval_center = origin + UNIT * 2
 66 |         self.oval = self.canvas.create_oval(
 67 |             oval_center[0] - 15, oval_center[1] - 15,
 68 |             oval_center[0] + 15, oval_center[1] + 15,
 69 |             fill='yellow')
 70 | 
 71 |         # create red rect
 72 |         self.rect = self.canvas.create_rectangle(
 73 |             origin[0] - 15, origin[1] - 15,
 74 |             origin[0] + 15, origin[1] + 15,
 75 |             fill='red')
 76 | 
 77 |         # pack all
 78 |         self.canvas.pack()
 79 | 
 80 |     def reset(self):
 81 |         self.update()
 82 |         time.sleep(0.5)
 83 |         self.canvas.delete(self.rect)
 84 |         origin = np.array([20, 20])
 85 |         self.rect = self.canvas.create_rectangle(
 86 |             origin[0] - 15, origin[1] - 15,
 87 |             origin[0] + 15, origin[1] + 15,
 88 |             fill='red')
 89 |         # return observation
 90 |         return self.canvas.coords(self.rect)
 91 | 
 92 |     def step(self, action):
 93 |         s = self.canvas.coords(self.rect)
 94 |         base_action = np.array([0, 0])
 95 |         if action == 0:   # up
 96 |             if s[1] > UNIT:
 97 |                 base_action[1] -= UNIT
 98 |         elif action == 1:   # down
 99 |             if s[1] < (MAZE_H - 1) * UNIT:
100 |                 base_action[1] += UNIT
101 |         elif action == 2:   # right
102 |             if s[0] < (MAZE_W - 1) * UNIT:
103 |                 base_action[0] += UNIT
104 |         elif action == 3:   # left
105 |             if s[0] > UNIT:
106 |                 base_action[0] -= UNIT
107 | 
108 |         self.canvas.move(self.rect, base_action[0], base_action[1])  # move agent
109 | 
110 |         s_ = self.canvas.coords(self.rect)  # next state
111 | 
112 |         # reward function
113 |         if s_ == self.canvas.coords(self.oval):
114 |             reward = 1
115 |             done = True
116 |         elif s_ in [self.canvas.coords(self.hell1), self.canvas.coords(self.hell2)]:
117 |             reward = -1
118 |             done = True
119 |         else:
120 |             reward = 0
121 |             done = False
122 | 
123 |         return s_, reward, done
124 | 
125 |     def render(self):
126 |         # time.sleep(0.1)
127 |         self.update()
128 | 
129 | 
130 | 


--------------------------------------------------------------------------------
/rl/Reinforcement-learning-with-tensorflow/contents/11_Dyna_Q/run_this.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Simplest model-based RL, Dyna-Q.
 3 | 
 4 | Red rectangle:          explorer.
 5 | Black rectangles:       hells       [reward = -1].
 6 | Yellow bin circle:      paradise    [reward = +1].
 7 | All other states:       ground      [reward = 0].
 8 | 
 9 | This script is the main part which controls the update method of this example.
10 | The RL is in RL_brain.py.
11 | 
12 | View more on my tutorial page: https://morvanzhou.github.io/tutorials/
13 | """
14 | 
15 | from maze_env import Maze
16 | from RL_brain import QLearningTable, EnvModel
17 | 
18 | 
19 | def update():
20 |     for episode in range(40):
21 |         s = env.reset()
22 |         while True:
23 |             env.render()
24 |             a = RL.choose_action(str(s))
25 |             s_, r, done = env.step(a)
26 |             RL.learn(str(s), a, r, str(s_))
27 | 
28 |             # use a model to output (r, s_) by inputting (s, a)
29 |             # the model in dyna Q version is just like a memory replay buffer
30 |             env_model.store_transition(str(s), a, r, s_)
31 |             for n in range(10):     # learn 10 more times using the env_model
32 |                 ms, ma = env_model.sample_s_a()  # ms in here is a str
33 |                 mr, ms_ = env_model.get_r_s_(ms, ma)
34 |                 RL.learn(ms, ma, mr, str(ms_))
35 | 
36 |             s = s_
37 |             if done:
38 |                 break
39 | 
40 |     # end of game
41 |     print('game over')
42 |     env.destroy()
43 | 
44 | 
45 | if __name__ == "__main__":
46 |     env = Maze()
47 |     RL = QLearningTable(actions=list(range(env.n_actions)))
48 |     env_model = EnvModel(actions=list(range(env.n_actions)))
49 | 
50 |     env.after(0, update)
51 |     env.mainloop()


--------------------------------------------------------------------------------
/rl/Reinforcement-learning-with-tensorflow/contents/12_Proximal_Policy_Optimization/simply_PPO.py:
--------------------------------------------------------------------------------
  1 | """
  2 | A simple version of Proximal Policy Optimization (PPO) using single thread.
  3 | 
  4 | Based on:
  5 | 1. Emergence of Locomotion Behaviours in Rich Environments (Google Deepmind): [https://arxiv.org/abs/1707.02286]
  6 | 2. Proximal Policy Optimization Algorithms (OpenAI): [https://arxiv.org/abs/1707.06347]
  7 | 
  8 | View more on my tutorial website: https://morvanzhou.github.io/tutorials
  9 | 
 10 | Dependencies:
 11 | tensorflow r1.2
 12 | gym 0.9.2
 13 | """
 14 | 
 15 | import tensorflow as tf
 16 | import numpy as np
 17 | import matplotlib.pyplot as plt
 18 | import gym
 19 | 
 20 | EP_MAX = 1000
 21 | EP_LEN = 200
 22 | GAMMA = 0.9
 23 | A_LR = 0.0001
 24 | C_LR = 0.0002
 25 | BATCH = 32
 26 | A_UPDATE_STEPS = 10
 27 | C_UPDATE_STEPS = 10
 28 | S_DIM, A_DIM = 3, 1
 29 | METHOD = [
 30 |     dict(name='kl_pen', kl_target=0.01, lam=0.5),   # KL penalty
 31 |     dict(name='clip', epsilon=0.2),                 # Clipped surrogate objective, find this is better
 32 | ][1]        # choose the method for optimization
 33 | 
 34 | 
 35 | class PPO(object):
 36 | 
 37 |     def __init__(self):
 38 |         self.sess = tf.Session()
 39 |         self.tfs = tf.placeholder(tf.float32, [None, S_DIM], 'state')
 40 | 
 41 |         # critic
 42 |         with tf.variable_scope('critic'):
 43 |             l1 = tf.layers.dense(self.tfs, 100, tf.nn.relu)
 44 |             self.v = tf.layers.dense(l1, 1)
 45 |             self.tfdc_r = tf.placeholder(tf.float32, [None, 1], 'discounted_r')
 46 |             self.advantage = self.tfdc_r - self.v
 47 |             self.closs = tf.reduce_mean(tf.square(self.advantage))
 48 |             self.ctrain_op = tf.train.AdamOptimizer(C_LR).minimize(self.closs)
 49 | 
 50 |         # actor
 51 |         pi, pi_params = self._build_anet('pi', trainable=True)
 52 |         oldpi, oldpi_params = self._build_anet('oldpi', trainable=False)
 53 |         with tf.variable_scope('sample_action'):
 54 |             self.sample_op = tf.squeeze(pi.sample(1), axis=0)       # choosing action
 55 |         with tf.variable_scope('update_oldpi'):
 56 |             self.update_oldpi_op = [oldp.assign(p) for p, oldp in zip(pi_params, oldpi_params)]
 57 | 
 58 |         self.tfa = tf.placeholder(tf.float32, [None, A_DIM], 'action')
 59 |         self.tfadv = tf.placeholder(tf.float32, [None, 1], 'advantage')
 60 |         with tf.variable_scope('loss'):
 61 |             with tf.variable_scope('surrogate'):
 62 |                 # ratio = tf.exp(pi.log_prob(self.tfa) - oldpi.log_prob(self.tfa))
 63 |                 ratio = pi.prob(self.tfa) / oldpi.prob(self.tfa)
 64 |                 surr = ratio * self.tfadv
 65 |             if METHOD['name'] == 'kl_pen':
 66 |                 self.tflam = tf.placeholder(tf.float32, None, 'lambda')
 67 |                 kl = tf.distributions.kl_divergence(oldpi, pi)
 68 |                 self.kl_mean = tf.reduce_mean(kl)
 69 |                 self.aloss = -(tf.reduce_mean(surr - self.tflam * kl))
 70 |             else:   # clipping method, find this is better
 71 |                 self.aloss = -tf.reduce_mean(tf.minimum(
 72 |                     surr,
 73 |                     tf.clip_by_value(ratio, 1.-METHOD['epsilon'], 1.+METHOD['epsilon'])*self.tfadv))
 74 | 
 75 |         with tf.variable_scope('atrain'):
 76 |             self.atrain_op = tf.train.AdamOptimizer(A_LR).minimize(self.aloss)
 77 | 
 78 |         tf.summary.FileWriter("log/", self.sess.graph)
 79 | 
 80 |         self.sess.run(tf.global_variables_initializer())
 81 | 
 82 |     def update(self, s, a, r):
 83 |         self.sess.run(self.update_oldpi_op)
 84 |         adv = self.sess.run(self.advantage, {self.tfs: s, self.tfdc_r: r})
 85 |         # adv = (adv - adv.mean())/(adv.std()+1e-6)     # sometimes helpful
 86 | 
 87 |         # update actor
 88 |         if METHOD['name'] == 'kl_pen':
 89 |             for _ in range(A_UPDATE_STEPS):
 90 |                 _, kl = self.sess.run(
 91 |                     [self.atrain_op, self.kl_mean],
 92 |                     {self.tfs: s, self.tfa: a, self.tfadv: adv, self.tflam: METHOD['lam']})
 93 |                 if kl > 4*METHOD['kl_target']:  # this in in google's paper
 94 |                     break
 95 |             if kl < METHOD['kl_target'] / 1.5:  # adaptive lambda, this is in OpenAI's paper
 96 |                 METHOD['lam'] /= 2
 97 |             elif kl > METHOD['kl_target'] * 1.5:
 98 |                 METHOD['lam'] *= 2
 99 |             METHOD['lam'] = np.clip(METHOD['lam'], 1e-4, 10)    # sometimes explode, this clipping is my solution
100 |         else:   # clipping method, find this is better (OpenAI's paper)
101 |             [self.sess.run(self.atrain_op, {self.tfs: s, self.tfa: a, self.tfadv: adv}) for _ in range(A_UPDATE_STEPS)]
102 | 
103 |         # update critic
104 |         [self.sess.run(self.ctrain_op, {self.tfs: s, self.tfdc_r: r}) for _ in range(C_UPDATE_STEPS)]
105 | 
106 |     def _build_anet(self, name, trainable):
107 |         with tf.variable_scope(name):
108 |             l1 = tf.layers.dense(self.tfs, 100, tf.nn.relu, trainable=trainable)
109 |             mu = 2 * tf.layers.dense(l1, A_DIM, tf.nn.tanh, trainable=trainable)
110 |             sigma = tf.layers.dense(l1, A_DIM, tf.nn.softplus, trainable=trainable)
111 |             norm_dist = tf.distributions.Normal(loc=mu, scale=sigma)
112 |         params = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope=name)
113 |         return norm_dist, params
114 | 
115 |     def choose_action(self, s):
116 |         s = s[np.newaxis, :]
117 |         a = self.sess.run(self.sample_op, {self.tfs: s})[0]
118 |         return np.clip(a, -2, 2)
119 | 
120 |     def get_v(self, s):
121 |         if s.ndim < 2: s = s[np.newaxis, :]
122 |         return self.sess.run(self.v, {self.tfs: s})[0, 0]
123 | 
124 | env = gym.make('Pendulum-v0').unwrapped
125 | ppo = PPO()
126 | all_ep_r = []
127 | 
128 | for ep in range(EP_MAX):
129 |     s = env.reset()
130 |     buffer_s, buffer_a, buffer_r = [], [], []
131 |     ep_r = 0
132 |     for t in range(EP_LEN):    # in one episode
133 |         env.render()
134 |         a = ppo.choose_action(s)
135 |         s_, r, done, _ = env.step(a)
136 |         buffer_s.append(s)
137 |         buffer_a.append(a)
138 |         buffer_r.append((r+8)/8)    # normalize reward, find to be useful
139 |         s = s_
140 |         ep_r += r
141 | 
142 |         # update ppo
143 |         if (t+1) % BATCH == 0 or t == EP_LEN-1:
144 |             v_s_ = ppo.get_v(s_)
145 |             discounted_r = []
146 |             for r in buffer_r[::-1]:
147 |                 v_s_ = r + GAMMA * v_s_
148 |                 discounted_r.append(v_s_)
149 |             discounted_r.reverse()
150 | 
151 |             bs, ba, br = np.vstack(buffer_s), np.vstack(buffer_a), np.array(discounted_r)[:, np.newaxis]
152 |             buffer_s, buffer_a, buffer_r = [], [], []
153 |             ppo.update(bs, ba, br)
154 |     if ep == 0: all_ep_r.append(ep_r)
155 |     else: all_ep_r.append(all_ep_r[-1]*0.9 + ep_r*0.1)
156 |     print(
157 |         'Ep: %i' % ep,
158 |         "|Ep_r: %i" % ep_r,
159 |         ("|Lam: %.4f" % METHOD['lam']) if METHOD['name'] == 'kl_pen' else '',
160 |     )
161 | 
162 | plt.plot(np.arange(len(all_ep_r)), all_ep_r)
163 | plt.xlabel('Episode');plt.ylabel('Moving averaged episode reward');plt.show()


--------------------------------------------------------------------------------
/rl/Reinforcement-learning-with-tensorflow/contents/1_command_line_reinforcement_learning/draw.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import matplotlib.pyplot as plt
 3 | import matplotlib.animation as animation
 4 | 
 5 | 
 6 | def update_line(num, data, line):
 7 |     line.set_data(data[..., :num])
 8 |     return line,
 9 | 
10 | fig = plt.figure()
11 | ax = fig.add_subplot(111)
12 | 
13 | 
14 | def update(icon_list):
15 |     #icon_list = ['-', 'o', '-', '-', 'T', '-']
16 |     print(icon_list)
17 |     # plt.clf()
18 |     list_len = len(icon_list)
19 |     delta = 0.9/list_len
20 |     color_dict = {'-':'g', 'o':'b', 'T':'r'}
21 |     start_point = (0.05, 0.05)
22 |     plt.text(0.5, 0.6, 'Episode 5', horizontalalignment='center',  fontsize=12,
23 |             verticalalignment='center', transform=ax.transAxes)
24 |     for idx, item in enumerate(icon_list):
25 |         color_value = color_dict[item]
26 |         end_point = (start_point[0]+idx*delta, 0.3)
27 |         ax.add_patch(plt.Rectangle((end_point[0], end_point[1]+delta), delta, delta*0.5, 
28 |             linestyle='--', edgecolor='b', linewidth=1, alpha=0.5))
29 |         plt.text(end_point[0]+0.5*delta, end_point[1]+1.25*delta, idx, horizontalalignment='center',  fontsize=12,
30 |             verticalalignment='center', transform=ax.transAxes)
31 |         ax.add_patch(plt.Rectangle(end_point, delta, delta, 
32 |             color='%s'%(color_value), linestyle='--', edgecolor='y', linewidth=1, alpha=0.5)) #, fill=None
33 |         plt.text(end_point[0]+0.5*delta, end_point[1]+0.5*delta, item, horizontalalignment='center',  fontsize=12,
34 |             verticalalignment='center', transform=ax.transAxes)
35 |     # plt.show()
36 | #ax.add_patch(plt.Rectangle((0.1,0.1),0.3,0.3))
37 | def init():
38 |     plt.xlim(0, 1)
39 |     plt.ylim(0, 1)
40 |     plt.xlabel('x')
41 |     plt.title('test')
42 |     return l,
43 | 
44 | data = [['-', 'o', '-', '-', 'T', '-'],
45 |     ['-', '-', 'o', '-', 'T', '-'],
46 |     ['-', '-', '-', 'o', 'T', '-']]
47 | # l, = plt.plot([], [], 'r-')
48 | # line_ani = animation.FuncAnimation(fig, update, data, interval=10, blit=True)
49 | for item in data:
50 |     update(item)
51 | plt.show()


--------------------------------------------------------------------------------
/rl/Reinforcement-learning-with-tensorflow/contents/1_command_line_reinforcement_learning/treasure_on_right.py:
--------------------------------------------------------------------------------
  1 | """
  2 | A simple example for Reinforcement Learning using table lookup Q-learning method.
  3 | An agent "o" is on the left of a 1 dimensional world, the treasure is on the rightmost location.
  4 | Run this program and to see how the agent will improve its strategy of finding the treasure.
  5 | 
  6 | View more on my tutorial page: https://morvanzhou.github.io/tutorials/
  7 | """
  8 | 
  9 | import numpy as np
 10 | import pandas as pd
 11 | import time
 12 | 
 13 | np.random.seed(2)  # reproducible
 14 | 
 15 | 
 16 | N_STATES = 6   # the length of the 1 dimensional world
 17 | ACTIONS = ['left', 'right']     # available actions
 18 | EPSILON = 0.9   # greedy police
 19 | ALPHA = 0.1     # learning rate
 20 | GAMMA = 0.9    # discount factor
 21 | MAX_EPISODES = 13   # maximum episodes
 22 | FRESH_TIME = 0.3    # fresh time for one move
 23 | 
 24 | 
 25 | def build_q_table(n_states, actions):
 26 |     table = pd.DataFrame(
 27 |         np.zeros((n_states, len(actions))),     # q_table initial values
 28 |         columns=actions,    # actions's name
 29 |     )
 30 |     # print(table)    # show table
 31 |     return table
 32 | 
 33 | 
 34 | def choose_action(state, q_table):
 35 |     # This is how to choose an action
 36 |     state_actions = q_table.iloc[state, :]
 37 |     if (np.random.uniform() > EPSILON) or ((state_actions == 0).all()):  # act non-greedy or state-action have no value
 38 |         action_name = np.random.choice(ACTIONS)
 39 |     else:   # act greedy
 40 |         action_name = state_actions.idxmax()    # replace argmax to idxmax as argmax means a different function in newer version of pandas
 41 |     return action_name
 42 | 
 43 | 
 44 | def get_env_feedback(S, A):
 45 |     # This is how agent will interact with the environment
 46 |     if A == 'right':    # move right
 47 |         if S == N_STATES - 2:   # terminate
 48 |             S_ = 'terminal'
 49 |             R = 1
 50 |         else:
 51 |             S_ = S + 1
 52 |             R = 0
 53 |     else:   # move left
 54 |         R = 0
 55 |         if S == 0:
 56 |             S_ = S  # reach the wall
 57 |         else:
 58 |             S_ = S - 1
 59 |     return S_, R
 60 | 
 61 | 
 62 | def update_env(S, episode, step_counter):
 63 |     # This is how environment be updated
 64 |     env_list = ['-']*(N_STATES-1) + ['T']   # '---------T' our environment
 65 |     if S == 'terminal':
 66 |         interaction = 'Episode %s: total_steps = %s' % (episode+1, step_counter)
 67 |         print('\r{}'.format(interaction), end='')
 68 |         time.sleep(2)
 69 |         print('\r                                ', end='')
 70 |     else:
 71 |         env_list[S] = 'o'
 72 |         interaction = ''.join(env_list)
 73 |         print('\r{}'.format(interaction), end='')
 74 |         time.sleep(FRESH_TIME)
 75 | 
 76 | 
 77 | def rl():
 78 |     # main part of RL loop
 79 |     q_table = build_q_table(N_STATES, ACTIONS)
 80 |     for episode in range(MAX_EPISODES):
 81 |         step_counter = 0
 82 |         S = 0
 83 |         is_terminated = False
 84 |         update_env(S, episode, step_counter)
 85 |         while not is_terminated:
 86 | 
 87 |             A = choose_action(S, q_table)
 88 |             S_, R = get_env_feedback(S, A)  # take action & get next state and reward
 89 |             q_predict = q_table.loc[S, A]
 90 |             if S_ != 'terminal':
 91 |                 q_target = R + GAMMA * q_table.iloc[S_, :].max()   # next state is not terminal
 92 |             else:
 93 |                 q_target = R     # next state is terminal
 94 |                 is_terminated = True    # terminate this episode
 95 | 
 96 |             q_table.loc[S, A] += ALPHA * (q_target - q_predict)  # update
 97 |             S = S_  # move to next state
 98 | 
 99 |             update_env(S, episode, step_counter+1)
100 |             step_counter += 1
101 |     return q_table
102 | 
103 | 
104 | if __name__ == "__main__":
105 |     q_table = rl()
106 |     print('\r\nQ-table:\n')
107 |     print(q_table)
108 | 


--------------------------------------------------------------------------------
/rl/Reinforcement-learning-with-tensorflow/contents/1_command_line_reinforcement_learning/treasure_on_right_wqw.py:
--------------------------------------------------------------------------------
  1 | 
  2 | """
  3 | A simple example for Reinforcement Learning using table lookup Q-learning method.
  4 | An agent "o" is on the left of a 1 dimensional world, the treasure is on the rightmost location.
  5 | Run this program and to see how the agent will improve its strategy of finding the treasure.
  6 | 
  7 | View more on my tutorial page: https://morvanzhou.github.io/tutorials/
  8 | """
  9 | 
 10 | import numpy as np
 11 | import pandas as pd
 12 | import time
 13 | 
 14 | np.random.seed(2)  # reproducible
 15 | 
 16 | 
 17 | N_STATES = 6   # 状态数目 the length of the 1 dimensional world
 18 | ACTIONS = ['left', 'right'] # 可行动作列表 available actions
 19 | EPSILON = 0.9   # 贪心因子 greedy police
 20 | ALPHA = 0.1     # 学习率 learning rate
 21 | GAMMA = 0.9    # 折扣损失 discount factor
 22 | MAX_EPISODES = 13   # 最大训练回合 maximum episodes
 23 | FRESH_TIME = 0.3    # 每回合休息时间 fresh time for one move
 24 | 
 25 | 
 26 | import numpy as np
 27 | import matplotlib.pyplot as plt
 28 | import matplotlib.animation as animation
 29 | 
 30 | 
 31 | def render():
 32 |     """
 33 |         绘制网格图
 34 |     """
 35 |     def update_line(num, data, line):
 36 |         line.set_data(data[..., :num])
 37 |         return line,
 38 | 
 39 |     fig1 = plt.figure()
 40 |     data = np.random.rand(2, 25)
 41 |     l, = plt.plot([], [], 'r-')
 42 |     plt.xlim(0, 1)
 43 |     plt.ylim(0, 1)
 44 |     plt.xlabel('x')
 45 |     plt.title('test')
 46 |     line_ani = animation.FuncAnimation(fig1, update_line, 25, fargs=(data, l), interval=50, blit=True)
 47 |     plt.show()
 48 | 
 49 | 
 50 | def build_q_table(n_states, actions):
 51 |     """
 52 |         构建Q表, nXa
 53 |     """
 54 |     # Q表初始化
 55 |     table = pd.DataFrame(
 56 |         np.zeros((n_states, len(actions))),     # q_table initial values
 57 |         columns=actions,    # actions's name
 58 |     )
 59 |     # print(table)    # show table
 60 |     return table
 61 | 
 62 | 
 63 | def choose_action(state, q_table):
 64 |     """
 65 |         策略函数：如何选择下一步动作 This is how to choose an action
 66 |     """
 67 |     # 获取该状态下所有动作奖励列表
 68 |     state_actions = q_table.iloc[state, :]
 69 |     if (np.random.uniform() > EPSILON) or ((state_actions == 0).all()):
 70 |         # 随机模式（探索）act non-greedy or state-action have no value
 71 |         action_name = np.random.choice(ACTIONS)
 72 |     else:   # 贪婪模式（利用）act greedy 
 73 |         action_name = state_actions.idxmax()
 74 |         # replace argmax to idxmax as argmax means a different function in newer version of pandas
 75 |     return action_name
 76 | 
 77 | 
 78 | def get_env_feedback(S, A):
 79 |     """
 80 |         agent从环境中获取反馈，S状态下采取A获得的奖励R (S, A) -> R
 81 |         This is how agent will interact with the environment
 82 |     """
 83 |     if A == 'right':
 84 |         # move right
 85 |         if S == N_STATES - 2:   # terminate
 86 |             S_ = 'terminal'
 87 |             R = 1
 88 |         else:
 89 |             S_ = S + 1
 90 |             R = 0
 91 |     else:   # move left
 92 |         R = 0
 93 |         if S == 0:
 94 |             S_ = S  # reach the wall
 95 |         else:
 96 |             S_ = S - 1
 97 |     return S_, R
 98 | 
 99 | 
100 | def update_env(S, episode, step_counter):
101 |     # This is how environment be updated
102 |     env_list = ['-']*(N_STATES-1) + ['T']   # '---------T' our environment
103 |     if S == 'terminal':
104 |         interaction = 'Episode %s: total_steps = %s' % (episode+1, step_counter)
105 |         print(' => {}'.format(interaction))
106 |         #print('\r{}'.format(interaction), end='')
107 |         time.sleep(1)
108 |         #print('\r                                ', end='')
109 |     else:
110 |         env_list[S] = 'o'
111 |         interaction = ''.join(env_list)
112 |         print('\r{}'.format(interaction), end='')
113 |         time.sleep(FRESH_TIME)
114 | 
115 | 
116 | def rl():
117 |     """ 强化学习程序主体 """
118 |     # main part of RL loop
119 |     q_table = build_q_table(N_STATES, ACTIONS)
120 |     # 最多玩MAX_EPISODE局
121 |     for episode in range(MAX_EPISODES):
122 |         step_counter = 0
123 |         S = 0
124 |         is_terminated = False
125 |         update_env(S, episode, step_counter)
126 |         while not is_terminated:
127 |             A = choose_action(S, q_table)
128 |             S_, R = get_env_feedback(S, A)  # take action & get next state and reward
129 |             q_predict = q_table.loc[S, A]
130 |             # Q Learning算法
131 |             if S_ != 'terminal':
132 |                 q_target = R + GAMMA * q_table.iloc[S_, :].max()   # next state is not terminal
133 |             else:
134 |                 q_target = R     # next state is terminal
135 |                 is_terminated = True    # terminate this episode
136 |                 # 输出Q表
137 |                 print('\rQ-table: %s\n' % (q_table))
138 |             q_table.loc[S, A] += ALPHA * (q_target - q_predict)  # update
139 |             S = S_  # move to next state
140 |             update_env(S, episode, step_counter+1)
141 |             step_counter += 1
142 |     return q_table
143 | 
144 | 
145 | if __name__ == "__main__":
146 |     q_table = rl()
147 |     print('\r\nQ-table:')
148 |     print(q_table)
149 | 


--------------------------------------------------------------------------------
/rl/Reinforcement-learning-with-tensorflow/contents/2_Q_Learning_maze/RL_brain.py:
--------------------------------------------------------------------------------
 1 | """
 2 | This part of code is the Q learning brain, which is a brain of the agent.
 3 | All decisions are made in here.
 4 | 
 5 | View more on my tutorial page: https://morvanzhou.github.io/tutorials/
 6 | """
 7 | 
 8 | import numpy as np
 9 | import pandas as pd
10 | 
11 | 
12 | class QLearningTable:
13 |     def __init__(self, actions, learning_rate=0.01, reward_decay=0.9, e_greedy=0.9):
14 |         self.actions = actions  # a list
15 |         self.lr = learning_rate
16 |         self.gamma = reward_decay
17 |         self.epsilon = e_greedy
18 |         self.q_table = pd.DataFrame(columns=self.actions, dtype=np.float64)
19 | 
20 |     def choose_action(self, observation):
21 |         self.check_state_exist(observation)
22 |         # action selection
23 |         if np.random.uniform() < self.epsilon:
24 |             # choose best action
25 |             state_action = self.q_table.loc[observation, :]
26 |             # some actions may have the same value, randomly choose on in these actions
27 |             action = np.random.choice(state_action[state_action == np.max(state_action)].index)
28 |         else:
29 |             # choose random action
30 |             action = np.random.choice(self.actions)
31 |         return action
32 | 
33 |     def learn(self, s, a, r, s_):
34 |         self.check_state_exist(s_)
35 |         q_predict = self.q_table.loc[s, a]
36 |         if s_ != 'terminal':
37 |             q_target = r + self.gamma * self.q_table.loc[s_, :].max()  # next state is not terminal
38 |         else:
39 |             q_target = r  # next state is terminal
40 |         self.q_table.loc[s, a] += self.lr * (q_target - q_predict)  # update
41 | 
42 |     def check_state_exist(self, state):
43 |         if state not in self.q_table.index:
44 |             # append new state to q table
45 |             self.q_table = self.q_table.append(
46 |                 pd.Series(
47 |                     [0]*len(self.actions),
48 |                     index=self.q_table.columns,
49 |                     name=state,
50 |                 )
51 |             )


--------------------------------------------------------------------------------
/rl/Reinforcement-learning-with-tensorflow/contents/2_Q_Learning_maze/maze_env.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Reinforcement learning maze example.
  3 | 
  4 | Red rectangle:          explorer.
  5 | Black rectangles:       hells       [reward = -1].
  6 | Yellow bin circle:      paradise    [reward = +1].
  7 | All other states:       ground      [reward = 0].
  8 | 
  9 | This script is the environment part of this example. The RL is in RL_brain.py.
 10 | 
 11 | View more on my tutorial page: https://morvanzhou.github.io/tutorials/
 12 | """
 13 | 
 14 | import numpy as np
 15 | import time
 16 | import sys
 17 | 
 18 | if sys.version_info.major == 2:
 19 |     import Tkinter as tk
 20 | else:
 21 |     import tkinter as tk
 22 | 
 23 | 
 24 | UNIT = 100   # pixels 格子大小，初始值为40
 25 | HALF_UNIT = UNIT/2 # 格子位置基准
 26 | MOVE_HALF = UNIT/2.5 # 运动节点的半径
 27 | BOUND = 5 # 边界
 28 | MAZE_H = 4  # grid height
 29 | MAZE_W = 4  # grid width
 30 | 
 31 | 
 32 | class Maze(tk.Tk, object):
 33 |     def __init__(self):
 34 |         super(Maze, self).__init__()
 35 |         self.action_space = ['u', 'd', 'l', 'r']
 36 |         self.n_actions = len(self.action_space)
 37 |         self.title('maze迷宫问题')
 38 |         self.geometry('{0}x{1}'.format(MAZE_H * UNIT, MAZE_H * UNIT))
 39 |         self._build_maze()
 40 | 
 41 |     def _build_maze(self):
 42 |         self.canvas = tk.Canvas(self, bg='white',
 43 |                            height=MAZE_H * UNIT,
 44 |                            width=MAZE_W * UNIT)
 45 |         # create grids 画网格
 46 |         for c in range(0, MAZE_W * UNIT, UNIT):
 47 |             x0, y0, x1, y1 = c, 0+BOUND, c, MAZE_H * UNIT
 48 |             self.canvas.create_line(x0, y0, x1, y1)
 49 |         for r in range(0, MAZE_H * UNIT, UNIT):
 50 |             x0, y0, x1, y1 = 0+BOUND, r, MAZE_W * UNIT, r
 51 |             self.canvas.create_line(x0, y0, x1, y1)
 52 |         # create origin
 53 |         origin = np.array([HALF_UNIT, HALF_UNIT])
 54 |         # hell
 55 |         hell1_center = origin + np.array([UNIT * 2, UNIT])
 56 |         self.hell1 = self.canvas.create_rectangle(
 57 |             hell1_center[0] - MOVE_HALF, hell1_center[1] - MOVE_HALF,
 58 |             hell1_center[0] + MOVE_HALF, hell1_center[1] + MOVE_HALF,
 59 |             fill='black')
 60 |         # hell
 61 |         hell2_center = origin + np.array([UNIT, UNIT * 2])
 62 |         self.hell2 = self.canvas.create_rectangle(
 63 |             hell2_center[0] - MOVE_HALF, hell2_center[1] - MOVE_HALF,
 64 |             hell2_center[0] + MOVE_HALF, hell2_center[1] + MOVE_HALF,
 65 |             fill='black')
 66 |         # create oval
 67 |         oval_center = origin + UNIT * 2
 68 |         self.oval = self.canvas.create_oval(
 69 |             oval_center[0] - MOVE_HALF, oval_center[1] - MOVE_HALF,
 70 |             oval_center[0] + MOVE_HALF, oval_center[1] + MOVE_HALF,
 71 |             fill='yellow')
 72 |         # create red rect
 73 |         self.rect = self.canvas.create_rectangle(
 74 |             origin[0] - MOVE_HALF, origin[1] - MOVE_HALF,
 75 |             origin[0] + MOVE_HALF, origin[1] + MOVE_HALF,
 76 |             fill='red')
 77 |         # pack all
 78 |         self.canvas.pack()
 79 | 
 80 |     def reset(self):
 81 |         self.update()
 82 |         time.sleep(0.5)
 83 |         self.canvas.delete(self.rect)
 84 |         origin = np.array([HALF_UNIT, HALF_UNIT])
 85 |         self.rect = self.canvas.create_rectangle(
 86 |             origin[0] - MOVE_HALF, origin[1] - MOVE_HALF,
 87 |             origin[0] + MOVE_HALF, origin[1] + MOVE_HALF,
 88 |             fill='red')
 89 |         # return observation
 90 |         return self.canvas.coords(self.rect)
 91 | 
 92 |     def step(self, action):
 93 |         s = self.canvas.coords(self.rect)
 94 |         base_action = np.array([0, 0])
 95 |         if action == 0:   # up
 96 |             if s[1] > UNIT:
 97 |                 base_action[1] -= UNIT
 98 |         elif action == 1:   # down
 99 |             if s[1] < (MAZE_H - 1) * UNIT:
100 |                 base_action[1] += UNIT
101 |         elif action == 2:   # right
102 |             if s[0] < (MAZE_W - 1) * UNIT:
103 |                 base_action[0] += UNIT
104 |         elif action == 3:   # left
105 |             if s[0] > UNIT:
106 |                 base_action[0] -= UNIT
107 | 
108 |         self.canvas.move(self.rect, base_action[0], base_action[1])  # move agent
109 | 
110 |         s_ = self.canvas.coords(self.rect)  # next state
111 | 
112 |         # reward function
113 |         if s_ == self.canvas.coords(self.oval):
114 |             reward = 1
115 |             done = True
116 |             s_ = 'terminal'
117 |         elif s_ in [self.canvas.coords(self.hell1), self.canvas.coords(self.hell2)]:
118 |             reward = -1
119 |             done = True
120 |             s_ = 'terminal'
121 |         else:
122 |             reward = 0
123 |             done = False
124 | 
125 |         return s_, reward, done
126 | 
127 |     def render(self):
128 |         time.sleep(0.1)
129 |         self.update()
130 | 
131 | 
132 | def update():
133 |     for t in range(10):
134 |         s = env.reset()
135 |         while True:
136 |             env.render()
137 |             a = 1
138 |             s, r, done = env.step(a)
139 |             if done:
140 |                 break
141 | 
142 | if __name__ == '__main__':
143 |     env = Maze()
144 |     env.after(100, update)
145 |     env.mainloop()


--------------------------------------------------------------------------------
/rl/Reinforcement-learning-with-tensorflow/contents/2_Q_Learning_maze/run_this.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Reinforcement learning maze example.
 3 | 
 4 | Red rectangle:          explorer.
 5 | Black rectangles:       hells       [reward = -1].
 6 | Yellow bin circle:      paradise    [reward = +1].
 7 | All other states:       ground      [reward = 0].
 8 | 
 9 | This script is the main part which controls the update method of this example.
10 | The RL is in RL_brain.py.
11 | 
12 | View more on my tutorial page: https://morvanzhou.github.io/tutorials/
13 | """
14 | 
15 | from maze_env import Maze
16 | from RL_brain import QLearningTable
17 | 
18 | 
19 | def update():
20 |     for episode in range(100):
21 |         # initial observation
22 |         observation = env.reset()
23 | 
24 |         while True:
25 |             # fresh env
26 |             env.render()
27 | 
28 |             # RL choose action based on observation
29 |             action = RL.choose_action(str(observation))
30 | 
31 |             # RL take action and get next observation and reward
32 |             observation_, reward, done = env.step(action)
33 | 
34 |             # RL learn from this transition
35 |             RL.learn(str(observation), action, reward, str(observation_))
36 | 
37 |             # swap observation
38 |             observation = observation_
39 | 
40 |             # break while loop when end of this episode
41 |             if done:
42 |                 break
43 | 
44 |     # end of game
45 |     print('game over')
46 |     env.destroy()
47 | 
48 | if __name__ == "__main__":
49 |     env = Maze()
50 |     RL = QLearningTable(actions=list(range(env.n_actions)))
51 | 
52 |     env.after(100, update)
53 |     env.mainloop()


--------------------------------------------------------------------------------
/rl/Reinforcement-learning-with-tensorflow/contents/3_Sarsa_maze/RL_brain.py:
--------------------------------------------------------------------------------
 1 | """
 2 | This part of code is the Q learning brain, which is a brain of the agent.
 3 | All decisions are made in here.
 4 | 
 5 | View more on my tutorial page: https://morvanzhou.github.io/tutorials/
 6 | """
 7 | 
 8 | import numpy as np
 9 | import pandas as pd
10 | 
11 | 
12 | class RL(object):
13 |     def __init__(self, action_space, learning_rate=0.01, reward_decay=0.9, e_greedy=0.9):
14 |         self.actions = action_space  # a list
15 |         self.lr = learning_rate
16 |         self.gamma = reward_decay
17 |         self.epsilon = e_greedy
18 | 
19 |         self.q_table = pd.DataFrame(columns=self.actions, dtype=np.float64)
20 | 
21 |     def check_state_exist(self, state):
22 |         if state not in self.q_table.index:
23 |             # append new state to q table
24 |             self.q_table = self.q_table.append(
25 |                 pd.Series(
26 |                     [0]*len(self.actions),
27 |                     index=self.q_table.columns,
28 |                     name=state,
29 |                 )
30 |             )
31 | 
32 |     def choose_action(self, observation):
33 |         self.check_state_exist(observation)
34 |         # action selection
35 |         if np.random.rand() < self.epsilon:
36 |             # choose best action
37 |             state_action = self.q_table.loc[observation, :]
38 |             # some actions may have the same value, randomly choose on in these actions
39 |             action = np.random.choice(state_action[state_action == np.max(state_action)].index)
40 |         else:
41 |             # choose random action
42 |             action = np.random.choice(self.actions)
43 |         return action
44 | 
45 |     def learn(self, *args):
46 |         pass
47 | 
48 | 
49 | # off-policy
50 | class QLearningTable(RL):
51 |     def __init__(self, actions, learning_rate=0.01, reward_decay=0.9, e_greedy=0.9):
52 |         super(QLearningTable, self).__init__(actions, learning_rate, reward_decay, e_greedy)
53 | 
54 |     def learn(self, s, a, r, s_):
55 |         self.check_state_exist(s_)
56 |         q_predict = self.q_table.loc[s, a]
57 |         if s_ != 'terminal':
58 |             q_target = r + self.gamma * self.q_table.loc[s_, :].max()  # next state is not terminal
59 |         else:
60 |             q_target = r  # next state is terminal
61 |         self.q_table.loc[s, a] += self.lr * (q_target - q_predict)  # update
62 | 
63 | 
64 | # on-policy
65 | class SarsaTable(RL):
66 | 
67 |     def __init__(self, actions, learning_rate=0.01, reward_decay=0.9, e_greedy=0.9):
68 |         super(SarsaTable, self).__init__(actions, learning_rate, reward_decay, e_greedy)
69 | 
70 |     def learn(self, s, a, r, s_, a_):
71 |         self.check_state_exist(s_)
72 |         q_predict = self.q_table.loc[s, a]
73 |         if s_ != 'terminal':
74 |             q_target = r + self.gamma * self.q_table.loc[s_, a_]  # next state is not terminal
75 |         else:
76 |             q_target = r  # next state is terminal
77 |         self.q_table.loc[s, a] += self.lr * (q_target - q_predict)  # update
78 | 


--------------------------------------------------------------------------------
/rl/Reinforcement-learning-with-tensorflow/contents/3_Sarsa_maze/maze_env.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Reinforcement learning maze example.
  3 | 
  4 | Red rectangle:          explorer.
  5 | Black rectangles:       hells       [reward = -1].
  6 | Yellow bin circle:      paradise    [reward = +1].
  7 | All other states:       ground      [reward = 0].
  8 | 
  9 | This script is the environment part of this example. The RL is in RL_brain.py.
 10 | 
 11 | View more on my tutorial page: https://morvanzhou.github.io/tutorials/
 12 | """
 13 | 
 14 | import numpy as np
 15 | import time
 16 | import sys
 17 | 
 18 | if sys.version_info.major == 2:
 19 |     import Tkinter as tk
 20 | else:
 21 |     import tkinter as tk
 22 | 
 23 | 
 24 | UNIT = 100   # pixels 格子大小，初始值为40
 25 | HALF_UNIT = UNIT/2 # 格子位置基准
 26 | MOVE_HALF = UNIT/2.5 # 运动节点的半径
 27 | BOUND = 5 # 边界
 28 | MAZE_H = 4  # grid height
 29 | MAZE_W = 4  # grid width
 30 | 
 31 | 
 32 | class Maze(tk.Tk, object):
 33 |     def __init__(self):
 34 |         super(Maze, self).__init__()
 35 |         self.action_space = ['u', 'd', 'l', 'r']
 36 |         self.n_actions = len(self.action_space)
 37 |         self.title('maze迷宫问题')
 38 |         self.geometry('{0}x{1}'.format(MAZE_H * UNIT, MAZE_H * UNIT))
 39 |         self._build_maze()
 40 | 
 41 |     def _build_maze(self):
 42 |         self.canvas = tk.Canvas(self, bg='white',
 43 |                            height=MAZE_H * UNIT,
 44 |                            width=MAZE_W * UNIT)
 45 |         # create grids 画网格
 46 |         for c in range(0, MAZE_W * UNIT, UNIT):
 47 |             x0, y0, x1, y1 = c, 0+BOUND, c, MAZE_H * UNIT
 48 |             self.canvas.create_line(x0, y0, x1, y1)
 49 |         for r in range(0, MAZE_H * UNIT, UNIT):
 50 |             x0, y0, x1, y1 = 0+BOUND, r, MAZE_W * UNIT, r
 51 |             self.canvas.create_line(x0, y0, x1, y1)
 52 |         # create origin
 53 |         origin = np.array([HALF_UNIT, HALF_UNIT])
 54 |         # hell
 55 |         hell1_center = origin + np.array([UNIT * 2, UNIT])
 56 |         self.hell1 = self.canvas.create_rectangle(
 57 |             hell1_center[0] - MOVE_HALF, hell1_center[1] - MOVE_HALF,
 58 |             hell1_center[0] + MOVE_HALF, hell1_center[1] + MOVE_HALF,
 59 |             fill='black')
 60 |         # hell
 61 |         hell2_center = origin + np.array([UNIT, UNIT * 2])
 62 |         self.hell2 = self.canvas.create_rectangle(
 63 |             hell2_center[0] - MOVE_HALF, hell2_center[1] - MOVE_HALF,
 64 |             hell2_center[0] + MOVE_HALF, hell2_center[1] + MOVE_HALF,
 65 |             fill='black')
 66 |         # create oval
 67 |         oval_center = origin + UNIT * 2
 68 |         self.oval = self.canvas.create_oval(
 69 |             oval_center[0] - MOVE_HALF, oval_center[1] - MOVE_HALF,
 70 |             oval_center[0] + MOVE_HALF, oval_center[1] + MOVE_HALF,
 71 |             fill='yellow')
 72 |         # create red rect
 73 |         self.rect = self.canvas.create_rectangle(
 74 |             origin[0] - MOVE_HALF, origin[1] - MOVE_HALF,
 75 |             origin[0] + MOVE_HALF, origin[1] + MOVE_HALF,
 76 |             fill='red')
 77 |         # pack all
 78 |         self.canvas.pack()
 79 | 
 80 |     def reset(self):
 81 |         self.update()
 82 |         time.sleep(0.5)
 83 |         self.canvas.delete(self.rect)
 84 |         origin = np.array([HALF_UNIT, HALF_UNIT])
 85 |         self.rect = self.canvas.create_rectangle(
 86 |             origin[0] - MOVE_HALF, origin[1] - MOVE_HALF,
 87 |             origin[0] + MOVE_HALF, origin[1] + MOVE_HALF,
 88 |             fill='red')
 89 |         # return observation
 90 |         return self.canvas.coords(self.rect)
 91 | 
 92 |     def step(self, action):
 93 |         s = self.canvas.coords(self.rect)
 94 |         base_action = np.array([0, 0])
 95 |         if action == 0:   # up
 96 |             if s[1] > UNIT:
 97 |                 base_action[1] -= UNIT
 98 |         elif action == 1:   # down
 99 |             if s[1] < (MAZE_H - 1) * UNIT:
100 |                 base_action[1] += UNIT
101 |         elif action == 2:   # right
102 |             if s[0] < (MAZE_W - 1) * UNIT:
103 |                 base_action[0] += UNIT
104 |         elif action == 3:   # left
105 |             if s[0] > UNIT:
106 |                 base_action[0] -= UNIT
107 | 
108 |         self.canvas.move(self.rect, base_action[0], base_action[1])  # move agent
109 | 
110 |         s_ = self.canvas.coords(self.rect)  # next state
111 | 
112 |         # reward function
113 |         if s_ == self.canvas.coords(self.oval):
114 |             reward = 1
115 |             done = True
116 |             s_ = 'terminal'
117 |         elif s_ in [self.canvas.coords(self.hell1), self.canvas.coords(self.hell2)]:
118 |             reward = -1
119 |             done = True
120 |             s_ = 'terminal'
121 |         else:
122 |             reward = 0
123 |             done = False
124 | 
125 |         return s_, reward, done
126 | 
127 |     def render(self):
128 |         time.sleep(0.1)
129 |         self.update()
130 | 
131 | 
132 | def update():
133 |     for t in range(10):
134 |         s = env.reset()
135 |         while True:
136 |             env.render()
137 |             a = 1
138 |             s, r, done = env.step(a)
139 |             if done:
140 |                 break
141 | 
142 | if __name__ == '__main__':
143 |     env = Maze()
144 |     env.after(100, update)
145 |     env.mainloop()


--------------------------------------------------------------------------------
/rl/Reinforcement-learning-with-tensorflow/contents/3_Sarsa_maze/run_this.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Sarsa is a online updating method for Reinforcement learning.
 3 | 
 4 | Unlike Q learning which is a offline updating method, Sarsa is updating while in the current trajectory.
 5 | 
 6 | You will see the sarsa is more coward when punishment is close because it cares about all behaviours,
 7 | while q learning is more brave because it only cares about maximum behaviour.
 8 | """
 9 | 
10 | from maze_env import Maze
11 | from RL_brain import SarsaTable
12 | 
13 | 
14 | def update():
15 |     for episode in range(100):
16 |         # initial observation
17 |         observation = env.reset()
18 | 
19 |         # RL choose action based on observation
20 |         action = RL.choose_action(str(observation))
21 | 
22 |         while True:
23 |             # fresh env
24 |             env.render()
25 | 
26 |             # RL take action and get next observation and reward
27 |             observation_, reward, done = env.step(action)
28 | 
29 |             # RL choose action based on next observation
30 |             action_ = RL.choose_action(str(observation_))
31 | 
32 |             # RL learn from this transition (s, a, r, s, a) ==> Sarsa
33 |             RL.learn(str(observation), action, reward, str(observation_), action_)
34 | 
35 |             # swap observation and action
36 |             observation = observation_
37 |             action = action_
38 | 
39 |             # break while loop when end of this episode
40 |             if done:
41 |                 break
42 | 
43 |     # end of game
44 |     print('game over')
45 |     env.destroy()
46 | 
47 | if __name__ == "__main__":
48 |     env = Maze()
49 |     RL = SarsaTable(actions=list(range(env.n_actions)))
50 | 
51 |     env.after(100, update)
52 |     env.mainloop()


--------------------------------------------------------------------------------
/rl/Reinforcement-learning-with-tensorflow/contents/4_Sarsa_lambda_maze/RL_brain.py:
--------------------------------------------------------------------------------
 1 | """
 2 | This part of code is the Q learning brain, which is a brain of the agent.
 3 | All decisions are made in here.
 4 | 
 5 | View more on my tutorial page: https://morvanzhou.github.io/tutorials/
 6 | """
 7 | 
 8 | import numpy as np
 9 | import pandas as pd
10 | 
11 | 
12 | class RL(object):
13 |     def __init__(self, action_space, learning_rate=0.01, reward_decay=0.9, e_greedy=0.9):
14 |         self.actions = action_space  # a list
15 |         self.lr = learning_rate
16 |         self.gamma = reward_decay
17 |         self.epsilon = e_greedy
18 | 
19 |         self.q_table = pd.DataFrame(columns=self.actions, dtype=np.float64)
20 | 
21 |     def check_state_exist(self, state):
22 |         if state not in self.q_table.index:
23 |             # append new state to q table
24 |             self.q_table = self.q_table.append(
25 |                 pd.Series(
26 |                     [0]*len(self.actions),
27 |                     index=self.q_table.columns,
28 |                     name=state,
29 |                 )
30 |             )
31 | 
32 |     def choose_action(self, observation):
33 |         self.check_state_exist(observation)
34 |         # action selection
35 |         if np.random.rand() < self.epsilon:
36 |             # choose best action
37 |             state_action = self.q_table.loc[observation, :]
38 |             # some actions may have the same value, randomly choose on in these actions
39 |             action = np.random.choice(state_action[state_action == np.max(state_action)].index)
40 |         else:
41 |             # choose random action
42 |             action = np.random.choice(self.actions)
43 |         return action
44 | 
45 |     def learn(self, *args):
46 |         pass
47 | 
48 | 
49 | # backward eligibility traces
50 | class SarsaLambdaTable(RL):
51 |     def __init__(self, actions, learning_rate=0.01, reward_decay=0.9, e_greedy=0.9, trace_decay=0.9):
52 |         super(SarsaLambdaTable, self).__init__(actions, learning_rate, reward_decay, e_greedy)
53 | 
54 |         # backward view, eligibility trace.
55 |         self.lambda_ = trace_decay
56 |         self.eligibility_trace = self.q_table.copy()
57 | 
58 |     def check_state_exist(self, state):
59 |         if state not in self.q_table.index:
60 |             # append new state to q table
61 |             to_be_append = pd.Series(
62 |                     [0] * len(self.actions),
63 |                     index=self.q_table.columns,
64 |                     name=state,
65 |                 )
66 |             self.q_table = self.q_table.append(to_be_append)
67 | 
68 |             # also update eligibility trace
69 |             self.eligibility_trace = self.eligibility_trace.append(to_be_append)
70 | 
71 |     def learn(self, s, a, r, s_, a_):
72 |         self.check_state_exist(s_)
73 |         q_predict = self.q_table.loc[s, a]
74 |         if s_ != 'terminal':
75 |             q_target = r + self.gamma * self.q_table.loc[s_, a_]  # next state is not terminal
76 |         else:
77 |             q_target = r  # next state is terminal
78 |         error = q_target - q_predict
79 | 
80 |         # increase trace amount for visited state-action pair
81 | 
82 |         # Method 1:
83 |         # self.eligibility_trace.loc[s, a] += 1
84 | 
85 |         # Method 2:
86 |         self.eligibility_trace.loc[s, :] *= 0
87 |         self.eligibility_trace.loc[s, a] = 1
88 | 
89 |         # Q update
90 |         self.q_table += self.lr * error * self.eligibility_trace
91 | 
92 |         # decay eligibility trace after update
93 |         self.eligibility_trace *= self.gamma*self.lambda_
94 | 


--------------------------------------------------------------------------------
/rl/Reinforcement-learning-with-tensorflow/contents/4_Sarsa_lambda_maze/maze_env.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Reinforcement learning maze example.
  3 | 
  4 | Red rectangle:          explorer.
  5 | Black rectangles:       hells       [reward = -1].
  6 | Yellow bin circle:      paradise    [reward = +1].
  7 | All other states:       ground      [reward = 0].
  8 | 
  9 | This script is the environment part of this example.
 10 | The RL is in RL_brain.py.
 11 | 
 12 | View more on my tutorial page: https://morvanzhou.github.io/tutorials/
 13 | """
 14 | 
 15 | 
 16 | import numpy as np
 17 | import time
 18 | import sys
 19 | if sys.version_info.major == 2:
 20 |     import Tkinter as tk
 21 | else:
 22 |     import tkinter as tk
 23 | 
 24 | 
 25 | UNIT = 40   # pixels
 26 | MAZE_H = 4  # grid height
 27 | MAZE_W = 4  # grid width
 28 | 
 29 | 
 30 | class Maze(tk.Tk, object):
 31 |     def __init__(self):
 32 |         super(Maze, self).__init__()
 33 |         self.action_space = ['u', 'd', 'l', 'r']
 34 |         self.n_actions = len(self.action_space)
 35 |         self.title('maze')
 36 |         self.geometry('{0}x{1}'.format(MAZE_H * UNIT, MAZE_H * UNIT))
 37 |         self._build_maze()
 38 | 
 39 |     def _build_maze(self):
 40 |         self.canvas = tk.Canvas(self, bg='white',
 41 |                            height=MAZE_H * UNIT,
 42 |                            width=MAZE_W * UNIT)
 43 | 
 44 |         # create grids
 45 |         for c in range(0, MAZE_W * UNIT, UNIT):
 46 |             x0, y0, x1, y1 = c, 0, c, MAZE_H * UNIT
 47 |             self.canvas.create_line(x0, y0, x1, y1)
 48 |         for r in range(0, MAZE_H * UNIT, UNIT):
 49 |             x0, y0, x1, y1 = 0, r, MAZE_W * UNIT, r
 50 |             self.canvas.create_line(x0, y0, x1, y1)
 51 | 
 52 |         # create origin
 53 |         origin = np.array([20, 20])
 54 | 
 55 |         # hell
 56 |         hell1_center = origin + np.array([UNIT * 2, UNIT])
 57 |         self.hell1 = self.canvas.create_rectangle(
 58 |             hell1_center[0] - 15, hell1_center[1] - 15,
 59 |             hell1_center[0] + 15, hell1_center[1] + 15,
 60 |             fill='black')
 61 |         # hell
 62 |         hell2_center = origin + np.array([UNIT, UNIT * 2])
 63 |         self.hell2 = self.canvas.create_rectangle(
 64 |             hell2_center[0] - 15, hell2_center[1] - 15,
 65 |             hell2_center[0] + 15, hell2_center[1] + 15,
 66 |             fill='black')
 67 | 
 68 |         # create oval
 69 |         oval_center = origin + UNIT * 2
 70 |         self.oval = self.canvas.create_oval(
 71 |             oval_center[0] - 15, oval_center[1] - 15,
 72 |             oval_center[0] + 15, oval_center[1] + 15,
 73 |             fill='yellow')
 74 | 
 75 |         # create red rect
 76 |         self.rect = self.canvas.create_rectangle(
 77 |             origin[0] - 15, origin[1] - 15,
 78 |             origin[0] + 15, origin[1] + 15,
 79 |             fill='red')
 80 | 
 81 |         # pack all
 82 |         self.canvas.pack()
 83 | 
 84 |     def reset(self):
 85 |         self.update()
 86 |         time.sleep(0.5)
 87 |         self.canvas.delete(self.rect)
 88 |         origin = np.array([20, 20])
 89 |         self.rect = self.canvas.create_rectangle(
 90 |             origin[0] - 15, origin[1] - 15,
 91 |             origin[0] + 15, origin[1] + 15,
 92 |             fill='red')
 93 |         # return observation
 94 |         return self.canvas.coords(self.rect)
 95 | 
 96 |     def step(self, action):
 97 |         s = self.canvas.coords(self.rect)
 98 |         base_action = np.array([0, 0])
 99 |         if action == 0:   # up
100 |             if s[1] > UNIT:
101 |                 base_action[1] -= UNIT
102 |         elif action == 1:   # down
103 |             if s[1] < (MAZE_H - 1) * UNIT:
104 |                 base_action[1] += UNIT
105 |         elif action == 2:   # right
106 |             if s[0] < (MAZE_W - 1) * UNIT:
107 |                 base_action[0] += UNIT
108 |         elif action == 3:   # left
109 |             if s[0] > UNIT:
110 |                 base_action[0] -= UNIT
111 | 
112 |         self.canvas.move(self.rect, base_action[0], base_action[1])  # move agent
113 | 
114 |         s_ = self.canvas.coords(self.rect)  # next state
115 | 
116 |         # reward function
117 |         if s_ == self.canvas.coords(self.oval):
118 |             reward = 1
119 |             done = True
120 |             s_ = 'terminal'
121 |         elif s_ in [self.canvas.coords(self.hell1), self.canvas.coords(self.hell2)]:
122 |             reward = -1
123 |             done = True
124 |             s_ = 'terminal'
125 |         else:
126 |             reward = 0
127 |             done = False
128 | 
129 |         return s_, reward, done
130 | 
131 |     def render(self):
132 |         time.sleep(0.05)
133 |         self.update()
134 | 
135 | 
136 | 


--------------------------------------------------------------------------------
/rl/Reinforcement-learning-with-tensorflow/contents/4_Sarsa_lambda_maze/run_this.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Sarsa is a online updating method for Reinforcement learning.
 3 | 
 4 | Unlike Q learning which is a offline updating method, Sarsa is updating while in the current trajectory.
 5 | 
 6 | You will see the sarsa is more coward when punishment is close because it cares about all behaviours,
 7 | while q learning is more brave because it only cares about maximum behaviour.
 8 | """
 9 | 
10 | from maze_env import Maze
11 | from RL_brain import SarsaLambdaTable
12 | 
13 | 
14 | def update():
15 |     for episode in range(100):
16 |         # initial observation
17 |         observation = env.reset()
18 | 
19 |         # RL choose action based on observation
20 |         action = RL.choose_action(str(observation))
21 | 
22 |         # initial all zero eligibility trace
23 |         RL.eligibility_trace *= 0
24 | 
25 |         while True:
26 |             # fresh env
27 |             env.render()
28 | 
29 |             # RL take action and get next observation and reward
30 |             observation_, reward, done = env.step(action)
31 | 
32 |             # RL choose action based on next observation
33 |             action_ = RL.choose_action(str(observation_))
34 | 
35 |             # RL learn from this transition (s, a, r, s, a) ==> Sarsa
36 |             RL.learn(str(observation), action, reward, str(observation_), action_)
37 | 
38 |             # swap observation and action
39 |             observation = observation_
40 |             action = action_
41 | 
42 |             # break while loop when end of this episode
43 |             if done:
44 |                 break
45 | 
46 |     # end of game
47 |     print('game over')
48 |     env.destroy()
49 | 
50 | if __name__ == "__main__":
51 |     env = Maze()
52 |     RL = SarsaLambdaTable(actions=list(range(env.n_actions)))
53 | 
54 |     env.after(100, update)
55 |     env.mainloop()


--------------------------------------------------------------------------------
/rl/Reinforcement-learning-with-tensorflow/contents/5.1_Double_DQN/run_Pendulum.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Double DQN & Natural DQN comparison,
 3 | The Pendulum example.
 4 | 
 5 | View more on my tutorial page: https://morvanzhou.github.io/tutorials/
 6 | 
 7 | Using:
 8 | Tensorflow: 1.0
 9 | gym: 0.8.0
10 | """
11 | 
12 | 
13 | import gym
14 | from RL_brain import DoubleDQN
15 | import numpy as np
16 | import matplotlib.pyplot as plt
17 | import tensorflow as tf
18 | 
19 | 
20 | env = gym.make('Pendulum-v0')
21 | env = env.unwrapped
22 | env.seed(1)
23 | MEMORY_SIZE = 3000
24 | ACTION_SPACE = 11
25 | 
26 | sess = tf.Session()
27 | with tf.variable_scope('Natural_DQN'):
28 |     natural_DQN = DoubleDQN(
29 |         n_actions=ACTION_SPACE, n_features=3, memory_size=MEMORY_SIZE,
30 |         e_greedy_increment=0.001, double_q=False, sess=sess
31 |     )
32 | 
33 | with tf.variable_scope('Double_DQN'):
34 |     double_DQN = DoubleDQN(
35 |         n_actions=ACTION_SPACE, n_features=3, memory_size=MEMORY_SIZE,
36 |         e_greedy_increment=0.001, double_q=True, sess=sess, output_graph=True)
37 | 
38 | sess.run(tf.global_variables_initializer())
39 | 
40 | 
41 | def train(RL):
42 |     total_steps = 0
43 |     observation = env.reset()
44 |     while True:
45 |         # if total_steps - MEMORY_SIZE > 8000: env.render()
46 | 
47 |         action = RL.choose_action(observation)
48 | 
49 |         f_action = (action-(ACTION_SPACE-1)/2)/((ACTION_SPACE-1)/4)   # convert to [-2 ~ 2] float actions
50 |         observation_, reward, done, info = env.step(np.array([f_action]))
51 | 
52 |         reward /= 10     # normalize to a range of (-1, 0). r = 0 when get upright
53 |         # the Q target at upright state will be 0, because Q_target = r + gamma * Qmax(s', a') = 0 + gamma * 0
54 |         # so when Q at this state is greater than 0, the agent overestimates the Q. Please refer to the final result.
55 | 
56 |         RL.store_transition(observation, action, reward, observation_)
57 | 
58 |         if total_steps > MEMORY_SIZE:   # learning
59 |             RL.learn()
60 | 
61 |         if total_steps - MEMORY_SIZE > 20000:   # stop game
62 |             break
63 | 
64 |         observation = observation_
65 |         total_steps += 1
66 |     return RL.q
67 | 
68 | q_natural = train(natural_DQN)
69 | q_double = train(double_DQN)
70 | 
71 | plt.plot(np.array(q_natural), c='r', label='natural')
72 | plt.plot(np.array(q_double), c='b', label='double')
73 | plt.legend(loc='best')
74 | plt.ylabel('Q eval')
75 | plt.xlabel('training steps')
76 | plt.grid()
77 | plt.show()
78 | 


--------------------------------------------------------------------------------
/rl/Reinforcement-learning-with-tensorflow/contents/5.2_Prioritized_Replay_DQN/run_MountainCar.py:
--------------------------------------------------------------------------------
 1 | """
 2 | The DQN improvement: Prioritized Experience Replay (based on https://arxiv.org/abs/1511.05952)
 3 | 
 4 | View more on my tutorial page: https://morvanzhou.github.io/tutorials/
 5 | 
 6 | Using:
 7 | Tensorflow: 1.0
 8 | gym: 0.8.0
 9 | """
10 | 
11 | 
12 | import gym
13 | from RL_brain import DQNPrioritizedReplay
14 | import matplotlib.pyplot as plt
15 | import tensorflow as tf
16 | import numpy as np
17 | 
18 | env = gym.make('MountainCar-v0')
19 | env = env.unwrapped
20 | env.seed(21)
21 | MEMORY_SIZE = 10000
22 | 
23 | sess = tf.Session()
24 | with tf.variable_scope('natural_DQN'):
25 |     RL_natural = DQNPrioritizedReplay(
26 |         n_actions=3, n_features=2, memory_size=MEMORY_SIZE,
27 |         e_greedy_increment=0.00005, sess=sess, prioritized=False,
28 |     )
29 | 
30 | with tf.variable_scope('DQN_with_prioritized_replay'):
31 |     RL_prio = DQNPrioritizedReplay(
32 |         n_actions=3, n_features=2, memory_size=MEMORY_SIZE,
33 |         e_greedy_increment=0.00005, sess=sess, prioritized=True, output_graph=True,
34 |     )
35 | sess.run(tf.global_variables_initializer())
36 | 
37 | 
38 | def train(RL):
39 |     total_steps = 0
40 |     steps = []
41 |     episodes = []
42 |     for i_episode in range(20):
43 |         observation = env.reset()
44 |         while True:
45 |             # env.render()
46 | 
47 |             action = RL.choose_action(observation)
48 | 
49 |             observation_, reward, done, info = env.step(action)
50 | 
51 |             if done: reward = 10
52 | 
53 |             RL.store_transition(observation, action, reward, observation_)
54 | 
55 |             if total_steps > MEMORY_SIZE:
56 |                 RL.learn()
57 | 
58 |             if done:
59 |                 print('episode ', i_episode, ' finished')
60 |                 steps.append(total_steps)
61 |                 episodes.append(i_episode)
62 |                 break
63 | 
64 |             observation = observation_
65 |             total_steps += 1
66 |     return np.vstack((episodes, steps))
67 | 
68 | his_natural = train(RL_natural)
69 | his_prio = train(RL_prio)
70 | 
71 | # compare based on first success
72 | plt.plot(his_natural[0, :], his_natural[1, :] - his_natural[1, 0], c='b', label='natural DQN')
73 | plt.plot(his_prio[0, :], his_prio[1, :] - his_prio[1, 0], c='r', label='DQN with prioritized replay')
74 | plt.legend(loc='best')
75 | plt.ylabel('total training time')
76 | plt.xlabel('episode')
77 | plt.grid()
78 | plt.show()
79 | 
80 | 
81 | 


--------------------------------------------------------------------------------
/rl/Reinforcement-learning-with-tensorflow/contents/5.3_Dueling_DQN/run_Pendulum.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Dueling DQN & Natural DQN comparison
 3 | 
 4 | View more on my tutorial page: https://morvanzhou.github.io/tutorials/
 5 | 
 6 | Using:
 7 | Tensorflow: 1.0
 8 | gym: 0.8.0
 9 | """
10 | 
11 | 
12 | import gym
13 | from RL_brain import DuelingDQN
14 | import numpy as np
15 | import matplotlib.pyplot as plt
16 | import tensorflow as tf
17 | 
18 | 
19 | env = gym.make('Pendulum-v0')
20 | env = env.unwrapped
21 | env.seed(1)
22 | MEMORY_SIZE = 3000
23 | ACTION_SPACE = 25
24 | 
25 | sess = tf.Session()
26 | with tf.variable_scope('natural'):
27 |     natural_DQN = DuelingDQN(
28 |         n_actions=ACTION_SPACE, n_features=3, memory_size=MEMORY_SIZE,
29 |         e_greedy_increment=0.001, sess=sess, dueling=False)
30 | 
31 | with tf.variable_scope('dueling'):
32 |     dueling_DQN = DuelingDQN(
33 |         n_actions=ACTION_SPACE, n_features=3, memory_size=MEMORY_SIZE,
34 |         e_greedy_increment=0.001, sess=sess, dueling=True, output_graph=True)
35 | 
36 | sess.run(tf.global_variables_initializer())
37 | 
38 | 
39 | def train(RL):
40 |     acc_r = [0]
41 |     total_steps = 0
42 |     observation = env.reset()
43 |     while True:
44 |         # if total_steps-MEMORY_SIZE > 9000: env.render()
45 | 
46 |         action = RL.choose_action(observation)
47 | 
48 |         f_action = (action-(ACTION_SPACE-1)/2)/((ACTION_SPACE-1)/4)   # [-2 ~ 2] float actions
49 |         observation_, reward, done, info = env.step(np.array([f_action]))
50 | 
51 |         reward /= 10      # normalize to a range of (-1, 0)
52 |         acc_r.append(reward + acc_r[-1])  # accumulated reward
53 | 
54 |         RL.store_transition(observation, action, reward, observation_)
55 | 
56 |         if total_steps > MEMORY_SIZE:
57 |             RL.learn()
58 | 
59 |         if total_steps-MEMORY_SIZE > 15000:
60 |             break
61 | 
62 |         observation = observation_
63 |         total_steps += 1
64 |     return RL.cost_his, acc_r
65 | 
66 | c_natural, r_natural = train(natural_DQN)
67 | c_dueling, r_dueling = train(dueling_DQN)
68 | 
69 | plt.figure(1)
70 | plt.plot(np.array(c_natural), c='r', label='natural')
71 | plt.plot(np.array(c_dueling), c='b', label='dueling')
72 | plt.legend(loc='best')
73 | plt.ylabel('cost')
74 | plt.xlabel('training steps')
75 | plt.grid()
76 | 
77 | plt.figure(2)
78 | plt.plot(np.array(r_natural), c='r', label='natural')
79 | plt.plot(np.array(r_dueling), c='b', label='dueling')
80 | plt.legend(loc='best')
81 | plt.ylabel('accumulated reward')
82 | plt.xlabel('training steps')
83 | plt.grid()
84 | 
85 | plt.show()
86 | 
87 | 


--------------------------------------------------------------------------------
/rl/Reinforcement-learning-with-tensorflow/contents/5_Deep_Q_Network/maze_env.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Reinforcement learning maze example.
  3 | 
  4 | Red rectangle:          explorer.
  5 | Black rectangles:       hells       [reward = -1].
  6 | Yellow bin circle:      paradise    [reward = +1].
  7 | All other states:       ground      [reward = 0].
  8 | 
  9 | This script is the environment part of this example.
 10 | The RL is in RL_brain.py.
 11 | 
 12 | View more on my tutorial page: https://morvanzhou.github.io/tutorials/
 13 | """
 14 | import numpy as np
 15 | import time
 16 | import sys
 17 | if sys.version_info.major == 2:
 18 |     import Tkinter as tk
 19 | else:
 20 |     import tkinter as tk
 21 | 
 22 | UNIT = 40   # pixels
 23 | MAZE_H = 4  # grid height
 24 | MAZE_W = 4  # grid width
 25 | 
 26 | 
 27 | class Maze(tk.Tk, object):
 28 |     def __init__(self):
 29 |         super(Maze, self).__init__()
 30 |         self.action_space = ['u', 'd', 'l', 'r']
 31 |         self.n_actions = len(self.action_space)
 32 |         self.n_features = 2
 33 |         self.title('maze')
 34 |         self.geometry('{0}x{1}'.format(MAZE_H * UNIT, MAZE_H * UNIT))
 35 |         self._build_maze()
 36 | 
 37 |     def _build_maze(self):
 38 |         self.canvas = tk.Canvas(self, bg='white',
 39 |                            height=MAZE_H * UNIT,
 40 |                            width=MAZE_W * UNIT)
 41 | 
 42 |         # create grids
 43 |         for c in range(0, MAZE_W * UNIT, UNIT):
 44 |             x0, y0, x1, y1 = c, 0, c, MAZE_H * UNIT
 45 |             self.canvas.create_line(x0, y0, x1, y1)
 46 |         for r in range(0, MAZE_H * UNIT, UNIT):
 47 |             x0, y0, x1, y1 = 0, r, MAZE_W * UNIT, r
 48 |             self.canvas.create_line(x0, y0, x1, y1)
 49 | 
 50 |         # create origin
 51 |         origin = np.array([20, 20])
 52 | 
 53 |         # hell
 54 |         hell1_center = origin + np.array([UNIT * 2, UNIT])
 55 |         self.hell1 = self.canvas.create_rectangle(
 56 |             hell1_center[0] - 15, hell1_center[1] - 15,
 57 |             hell1_center[0] + 15, hell1_center[1] + 15,
 58 |             fill='black')
 59 |         # hell
 60 |         # hell2_center = origin + np.array([UNIT, UNIT * 2])
 61 |         # self.hell2 = self.canvas.create_rectangle(
 62 |         #     hell2_center[0] - 15, hell2_center[1] - 15,
 63 |         #     hell2_center[0] + 15, hell2_center[1] + 15,
 64 |         #     fill='black')
 65 | 
 66 |         # create oval
 67 |         oval_center = origin + UNIT * 2
 68 |         self.oval = self.canvas.create_oval(
 69 |             oval_center[0] - 15, oval_center[1] - 15,
 70 |             oval_center[0] + 15, oval_center[1] + 15,
 71 |             fill='yellow')
 72 | 
 73 |         # create red rect
 74 |         self.rect = self.canvas.create_rectangle(
 75 |             origin[0] - 15, origin[1] - 15,
 76 |             origin[0] + 15, origin[1] + 15,
 77 |             fill='red')
 78 | 
 79 |         # pack all
 80 |         self.canvas.pack()
 81 | 
 82 |     def reset(self):
 83 |         self.update()
 84 |         time.sleep(0.1)
 85 |         self.canvas.delete(self.rect)
 86 |         origin = np.array([20, 20])
 87 |         self.rect = self.canvas.create_rectangle(
 88 |             origin[0] - 15, origin[1] - 15,
 89 |             origin[0] + 15, origin[1] + 15,
 90 |             fill='red')
 91 |         # return observation
 92 |         return (np.array(self.canvas.coords(self.rect)[:2]) - np.array(self.canvas.coords(self.oval)[:2]))/(MAZE_H*UNIT)
 93 | 
 94 |     def step(self, action):
 95 |         s = self.canvas.coords(self.rect)
 96 |         base_action = np.array([0, 0])
 97 |         if action == 0:   # up
 98 |             if s[1] > UNIT:
 99 |                 base_action[1] -= UNIT
100 |         elif action == 1:   # down
101 |             if s[1] < (MAZE_H - 1) * UNIT:
102 |                 base_action[1] += UNIT
103 |         elif action == 2:   # right
104 |             if s[0] < (MAZE_W - 1) * UNIT:
105 |                 base_action[0] += UNIT
106 |         elif action == 3:   # left
107 |             if s[0] > UNIT:
108 |                 base_action[0] -= UNIT
109 | 
110 |         self.canvas.move(self.rect, base_action[0], base_action[1])  # move agent
111 | 
112 |         next_coords = self.canvas.coords(self.rect)  # next state
113 | 
114 |         # reward function
115 |         if next_coords == self.canvas.coords(self.oval):
116 |             reward = 1
117 |             done = True
118 |         elif next_coords in [self.canvas.coords(self.hell1)]:
119 |             reward = -1
120 |             done = True
121 |         else:
122 |             reward = 0
123 |             done = False
124 |         s_ = (np.array(next_coords[:2]) - np.array(self.canvas.coords(self.oval)[:2]))/(MAZE_H*UNIT)
125 |         return s_, reward, done
126 | 
127 |     def render(self):
128 |         # time.sleep(0.01)
129 |         self.update()
130 | 
131 | 
132 | 


--------------------------------------------------------------------------------
/rl/Reinforcement-learning-with-tensorflow/contents/5_Deep_Q_Network/run_this.py:
--------------------------------------------------------------------------------
 1 | from maze_env import Maze
 2 | from RL_brain import DeepQNetwork
 3 | 
 4 | 
 5 | def run_maze():
 6 |     step = 0
 7 |     for episode in range(300):
 8 |         # initial observation
 9 |         observation = env.reset()
10 | 
11 |         while True:
12 |             # fresh env
13 |             env.render()
14 | 
15 |             # RL choose action based on observation
16 |             action = RL.choose_action(observation)
17 | 
18 |             # RL take action and get next observation and reward
19 |             observation_, reward, done = env.step(action)
20 | 
21 |             RL.store_transition(observation, action, reward, observation_)
22 | 
23 |             if (step > 200) and (step % 5 == 0):
24 |                 RL.learn()
25 | 
26 |             # swap observation
27 |             observation = observation_
28 | 
29 |             # break while loop when end of this episode
30 |             if done:
31 |                 break
32 |             step += 1
33 | 
34 |     # end of game
35 |     print('game over')
36 |     env.destroy()
37 | 
38 | 
39 | if __name__ == "__main__":
40 |     # maze game
41 |     env = Maze()
42 |     RL = DeepQNetwork(env.n_actions, env.n_features,
43 |                       learning_rate=0.01,
44 |                       reward_decay=0.9,
45 |                       e_greedy=0.9,
46 |                       replace_target_iter=200,
47 |                       memory_size=2000,
48 |                       # output_graph=True
49 |                       )
50 |     env.after(100, run_maze)
51 |     env.mainloop()
52 |     RL.plot_cost()


--------------------------------------------------------------------------------
/rl/Reinforcement-learning-with-tensorflow/contents/6_OpenAI_gym/run_CartPole.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Deep Q network,
 3 | 
 4 | Using:
 5 | Tensorflow: 1.0
 6 | gym: 0.7.3
 7 | """
 8 | 
 9 | 
10 | import gym
11 | from RL_brain import DeepQNetwork
12 | 
13 | env = gym.make('CartPole-v0')
14 | env = env.unwrapped
15 | 
16 | print(env.action_space)
17 | print(env.observation_space)
18 | print(env.observation_space.high)
19 | print(env.observation_space.low)
20 | 
21 | RL = DeepQNetwork(n_actions=env.action_space.n,
22 |                   n_features=env.observation_space.shape[0],
23 |                   learning_rate=0.01, e_greedy=0.9,
24 |                   replace_target_iter=100, memory_size=2000,
25 |                   e_greedy_increment=0.001,)
26 | 
27 | total_steps = 0
28 | 
29 | 
30 | for i_episode in range(100):
31 | 
32 |     observation = env.reset()
33 |     ep_r = 0
34 |     while True:
35 |         env.render()
36 | 
37 |         action = RL.choose_action(observation)
38 | 
39 |         observation_, reward, done, info = env.step(action)
40 | 
41 |         # the smaller theta and closer to center the better
42 |         x, x_dot, theta, theta_dot = observation_
43 |         r1 = (env.x_threshold - abs(x))/env.x_threshold - 0.8
44 |         r2 = (env.theta_threshold_radians - abs(theta))/env.theta_threshold_radians - 0.5
45 |         reward = r1 + r2
46 | 
47 |         RL.store_transition(observation, action, reward, observation_)
48 | 
49 |         ep_r += reward
50 |         if total_steps > 1000:
51 |             RL.learn()
52 | 
53 |         if done:
54 |             print('episode: ', i_episode,
55 |                   'ep_r: ', round(ep_r, 2),
56 |                   ' epsilon: ', round(RL.epsilon, 2))
57 |             break
58 | 
59 |         observation = observation_
60 |         total_steps += 1
61 | 
62 | RL.plot_cost()
63 | 


--------------------------------------------------------------------------------
/rl/Reinforcement-learning-with-tensorflow/contents/6_OpenAI_gym/run_MountainCar.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Deep Q network,
 3 | 
 4 | Using:
 5 | Tensorflow: 1.0
 6 | gym: 0.8.0
 7 | """
 8 | 
 9 | 
10 | import gym
11 | from RL_brain import DeepQNetwork
12 | 
13 | env = gym.make('MountainCar-v0')
14 | env = env.unwrapped
15 | 
16 | print(env.action_space)
17 | print(env.observation_space)
18 | print(env.observation_space.high)
19 | print(env.observation_space.low)
20 | 
21 | RL = DeepQNetwork(n_actions=3, n_features=2, learning_rate=0.001, e_greedy=0.9,
22 |                   replace_target_iter=300, memory_size=3000,
23 |                   e_greedy_increment=0.0002,)
24 | 
25 | total_steps = 0
26 | 
27 | 
28 | for i_episode in range(10):
29 | 
30 |     observation = env.reset()
31 |     ep_r = 0
32 |     while True:
33 |         env.render()
34 | 
35 |         action = RL.choose_action(observation)
36 | 
37 |         observation_, reward, done, info = env.step(action)
38 | 
39 |         position, velocity = observation_
40 | 
41 |         # the higher the better
42 |         reward = abs(position - (-0.5))     # r in [0, 1]
43 | 
44 |         RL.store_transition(observation, action, reward, observation_)
45 | 
46 |         if total_steps > 1000:
47 |             RL.learn()
48 | 
49 |         ep_r += reward
50 |         if done:
51 |             get = '| Get' if observation_[0] >= env.unwrapped.goal_position else '| ----'
52 |             print('Epi: ', i_episode,
53 |                   get,
54 |                   '| Ep_r: ', round(ep_r, 4),
55 |                   '| Epsilon: ', round(RL.epsilon, 2))
56 |             break
57 | 
58 |         observation = observation_
59 |         total_steps += 1
60 | 
61 | RL.plot_cost()
62 | 


--------------------------------------------------------------------------------
/rl/Reinforcement-learning-with-tensorflow/contents/7_Policy_gradient_softmax/RL_brain.py:
--------------------------------------------------------------------------------
  1 | """
  2 | This part of code is the reinforcement learning brain, which is a brain of the agent.
  3 | All decisions are made in here.
  4 | 
  5 | Policy Gradient, Reinforcement Learning.
  6 | 
  7 | View more on my tutorial page: https://morvanzhou.github.io/tutorials/
  8 | 
  9 | Using:
 10 | Tensorflow: 1.0
 11 | gym: 0.8.0
 12 | """
 13 | 
 14 | import numpy as np
 15 | import tensorflow as tf
 16 | 
 17 | # reproducible
 18 | np.random.seed(1)
 19 | tf.set_random_seed(1)
 20 | 
 21 | 
 22 | class PolicyGradient:
 23 |     def __init__(
 24 |             self,
 25 |             n_actions,
 26 |             n_features,
 27 |             learning_rate=0.01,
 28 |             reward_decay=0.95,
 29 |             output_graph=False,
 30 |     ):
 31 |         self.n_actions = n_actions
 32 |         self.n_features = n_features
 33 |         self.lr = learning_rate
 34 |         self.gamma = reward_decay
 35 | 
 36 |         self.ep_obs, self.ep_as, self.ep_rs = [], [], []
 37 | 
 38 |         self._build_net()
 39 | 
 40 |         self.sess = tf.Session()
 41 | 
 42 |         if output_graph:
 43 |             # $ tensorboard --logdir=logs
 44 |             # http://0.0.0.0:6006/
 45 |             # tf.train.SummaryWriter soon be deprecated, use following
 46 |             tf.summary.FileWriter("logs/", self.sess.graph)
 47 | 
 48 |         self.sess.run(tf.global_variables_initializer())
 49 | 
 50 |     def _build_net(self):
 51 |         with tf.name_scope('inputs'):
 52 |             self.tf_obs = tf.placeholder(tf.float32, [None, self.n_features], name="observations")
 53 |             self.tf_acts = tf.placeholder(tf.int32, [None, ], name="actions_num")
 54 |             self.tf_vt = tf.placeholder(tf.float32, [None, ], name="actions_value")
 55 |         # fc1
 56 |         layer = tf.layers.dense(
 57 |             inputs=self.tf_obs,
 58 |             units=10,
 59 |             activation=tf.nn.tanh,  # tanh activation
 60 |             kernel_initializer=tf.random_normal_initializer(mean=0, stddev=0.3),
 61 |             bias_initializer=tf.constant_initializer(0.1),
 62 |             name='fc1'
 63 |         )
 64 |         # fc2
 65 |         all_act = tf.layers.dense(
 66 |             inputs=layer,
 67 |             units=self.n_actions,
 68 |             activation=None,
 69 |             kernel_initializer=tf.random_normal_initializer(mean=0, stddev=0.3),
 70 |             bias_initializer=tf.constant_initializer(0.1),
 71 |             name='fc2'
 72 |         )
 73 | 
 74 |         self.all_act_prob = tf.nn.softmax(all_act, name='act_prob')  # use softmax to convert to probability
 75 | 
 76 |         with tf.name_scope('loss'):
 77 |             # to maximize total reward (log_p * R) is to minimize -(log_p * R), and the tf only have minimize(loss)
 78 |             neg_log_prob = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=all_act, labels=self.tf_acts)   # this is negative log of chosen action
 79 |             # or in this way:
 80 |             # neg_log_prob = tf.reduce_sum(-tf.log(self.all_act_prob)*tf.one_hot(self.tf_acts, self.n_actions), axis=1)
 81 |             loss = tf.reduce_mean(neg_log_prob * self.tf_vt)  # reward guided loss
 82 | 
 83 |         with tf.name_scope('train'):
 84 |             self.train_op = tf.train.AdamOptimizer(self.lr).minimize(loss)
 85 | 
 86 |     def choose_action(self, observation):
 87 |         prob_weights = self.sess.run(self.all_act_prob, feed_dict={self.tf_obs: observation[np.newaxis, :]})
 88 |         action = np.random.choice(range(prob_weights.shape[1]), p=prob_weights.ravel())  # select action w.r.t the actions prob
 89 |         return action
 90 | 
 91 |     def store_transition(self, s, a, r):
 92 |         self.ep_obs.append(s)
 93 |         self.ep_as.append(a)
 94 |         self.ep_rs.append(r)
 95 | 
 96 |     def learn(self):
 97 |         # discount and normalize episode reward
 98 |         discounted_ep_rs_norm = self._discount_and_norm_rewards()
 99 | 
100 |         # train on episode
101 |         self.sess.run(self.train_op, feed_dict={
102 |              self.tf_obs: np.vstack(self.ep_obs),  # shape=[None, n_obs]
103 |              self.tf_acts: np.array(self.ep_as),  # shape=[None, ]
104 |              self.tf_vt: discounted_ep_rs_norm,  # shape=[None, ]
105 |         })
106 | 
107 |         self.ep_obs, self.ep_as, self.ep_rs = [], [], []    # empty episode data
108 |         return discounted_ep_rs_norm
109 | 
110 |     def _discount_and_norm_rewards(self):
111 |         # discount episode rewards
112 |         discounted_ep_rs = np.zeros_like(self.ep_rs)
113 |         running_add = 0
114 |         for t in reversed(range(0, len(self.ep_rs))):
115 |             running_add = running_add * self.gamma + self.ep_rs[t]
116 |             discounted_ep_rs[t] = running_add
117 | 
118 |         # normalize episode rewards
119 |         discounted_ep_rs -= np.mean(discounted_ep_rs)
120 |         discounted_ep_rs /= np.std(discounted_ep_rs)
121 |         return discounted_ep_rs
122 | 
123 | 
124 | 
125 | 


--------------------------------------------------------------------------------
/rl/Reinforcement-learning-with-tensorflow/contents/7_Policy_gradient_softmax/run_CartPole.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Policy Gradient, Reinforcement Learning.
 3 | 
 4 | The cart pole example
 5 | 
 6 | View more on my tutorial page: https://morvanzhou.github.io/tutorials/
 7 | 
 8 | Using:
 9 | Tensorflow: 1.0
10 | gym: 0.8.0
11 | """
12 | 
13 | import gym
14 | from RL_brain import PolicyGradient
15 | import matplotlib.pyplot as plt
16 | 
17 | DISPLAY_REWARD_THRESHOLD = 400  # renders environment if total episode reward is greater then this threshold
18 | RENDER = False  # rendering wastes time
19 | 
20 | env = gym.make('CartPole-v0')
21 | env.seed(1)     # reproducible, general Policy gradient has high variance
22 | env = env.unwrapped
23 | 
24 | print(env.action_space)
25 | print(env.observation_space)
26 | print(env.observation_space.high)
27 | print(env.observation_space.low)
28 | 
29 | RL = PolicyGradient(
30 |     n_actions=env.action_space.n,
31 |     n_features=env.observation_space.shape[0],
32 |     learning_rate=0.02,
33 |     reward_decay=0.99,
34 |     # output_graph=True,
35 | )
36 | 
37 | for i_episode in range(3000):
38 | 
39 |     observation = env.reset()
40 | 
41 |     while True:
42 |         if RENDER: env.render()
43 | 
44 |         action = RL.choose_action(observation)
45 | 
46 |         observation_, reward, done, info = env.step(action)
47 | 
48 |         RL.store_transition(observation, action, reward)
49 | 
50 |         if done:
51 |             ep_rs_sum = sum(RL.ep_rs)
52 | 
53 |             if 'running_reward' not in globals():
54 |                 running_reward = ep_rs_sum
55 |             else:
56 |                 running_reward = running_reward * 0.99 + ep_rs_sum * 0.01
57 |             if running_reward > DISPLAY_REWARD_THRESHOLD: RENDER = True     # rendering
58 |             print("episode:", i_episode, "  reward:", int(running_reward))
59 | 
60 |             vt = RL.learn()
61 | 
62 |             if i_episode == 0:
63 |                 plt.plot(vt)    # plot the episode vt
64 |                 plt.xlabel('episode steps')
65 |                 plt.ylabel('normalized state-action value')
66 |                 plt.show()
67 |             break
68 | 
69 |         observation = observation_
70 | 


--------------------------------------------------------------------------------
/rl/Reinforcement-learning-with-tensorflow/contents/7_Policy_gradient_softmax/run_MountainCar.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Policy Gradient, Reinforcement Learning.
 3 | 
 4 | The cart pole example
 5 | 
 6 | View more on my tutorial page: https://morvanzhou.github.io/tutorials/
 7 | 
 8 | Using:
 9 | Tensorflow: 1.0
10 | gym: 0.8.0
11 | """
12 | 
13 | import gym
14 | from RL_brain import PolicyGradient
15 | import matplotlib.pyplot as plt
16 | 
17 | DISPLAY_REWARD_THRESHOLD = -2000  # renders environment if total episode reward is greater then this threshold
18 | # episode: 154   reward: -10667
19 | # episode: 387   reward: -2009
20 | # episode: 489   reward: -1006
21 | # episode: 628   reward: -502
22 | 
23 | RENDER = False  # rendering wastes time
24 | 
25 | env = gym.make('MountainCar-v0')
26 | env.seed(1)     # reproducible, general Policy gradient has high variance
27 | env = env.unwrapped
28 | 
29 | print(env.action_space)
30 | print(env.observation_space)
31 | print(env.observation_space.high)
32 | print(env.observation_space.low)
33 | 
34 | RL = PolicyGradient(
35 |     n_actions=env.action_space.n,
36 |     n_features=env.observation_space.shape[0],
37 |     learning_rate=0.02,
38 |     reward_decay=0.995,
39 |     # output_graph=True,
40 | )
41 | 
42 | for i_episode in range(1000):
43 | 
44 |     observation = env.reset()
45 | 
46 |     while True:
47 |         if RENDER: env.render()
48 | 
49 |         action = RL.choose_action(observation)
50 | 
51 |         observation_, reward, done, info = env.step(action)     # reward = -1 in all cases
52 | 
53 |         RL.store_transition(observation, action, reward)
54 | 
55 |         if done:
56 |             # calculate running reward
57 |             ep_rs_sum = sum(RL.ep_rs)
58 |             if 'running_reward' not in globals():
59 |                 running_reward = ep_rs_sum
60 |             else:
61 |                 running_reward = running_reward * 0.99 + ep_rs_sum * 0.01
62 |             if running_reward > DISPLAY_REWARD_THRESHOLD: RENDER = True     # rendering
63 | 
64 |             print("episode:", i_episode, "  reward:", int(running_reward))
65 | 
66 |             vt = RL.learn()  # train
67 | 
68 |             if i_episode == 30:
69 |                 plt.plot(vt)  # plot the episode vt
70 |                 plt.xlabel('episode steps')
71 |                 plt.ylabel('normalized state-action value')
72 |                 plt.show()
73 | 
74 |             break
75 | 
76 |         observation = observation_
77 | 


--------------------------------------------------------------------------------
/rl/Reinforcement-learning-with-tensorflow/contents/8_Actor_Critic_Advantage/AC_CartPole.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Actor-Critic using TD-error as the Advantage, Reinforcement Learning.
  3 | 
  4 | The cart pole example. Policy is oscillated.
  5 | 
  6 | View more on my tutorial page: https://morvanzhou.github.io/tutorials/
  7 | 
  8 | Using:
  9 | tensorflow 1.0
 10 | gym 0.8.0
 11 | """
 12 | 
 13 | import numpy as np
 14 | import tensorflow as tf
 15 | import gym
 16 | 
 17 | np.random.seed(2)
 18 | tf.set_random_seed(2)  # reproducible
 19 | 
 20 | # Superparameters
 21 | OUTPUT_GRAPH = False
 22 | MAX_EPISODE = 3000
 23 | DISPLAY_REWARD_THRESHOLD = 200  # renders environment if total episode reward is greater then this threshold
 24 | MAX_EP_STEPS = 1000   # maximum time step in one episode
 25 | RENDER = False  # rendering wastes time
 26 | GAMMA = 0.9     # reward discount in TD error
 27 | LR_A = 0.001    # learning rate for actor
 28 | LR_C = 0.01     # learning rate for critic
 29 | 
 30 | env = gym.make('CartPole-v0')
 31 | env.seed(1)  # reproducible
 32 | env = env.unwrapped
 33 | 
 34 | N_F = env.observation_space.shape[0]
 35 | N_A = env.action_space.n
 36 | 
 37 | 
 38 | class Actor(object):
 39 |     def __init__(self, sess, n_features, n_actions, lr=0.001):
 40 |         self.sess = sess
 41 | 
 42 |         self.s = tf.placeholder(tf.float32, [1, n_features], "state")
 43 |         self.a = tf.placeholder(tf.int32, None, "act")
 44 |         self.td_error = tf.placeholder(tf.float32, None, "td_error")  # TD_error
 45 | 
 46 |         with tf.variable_scope('Actor'):
 47 |             l1 = tf.layers.dense(
 48 |                 inputs=self.s,
 49 |                 units=20,    # number of hidden units
 50 |                 activation=tf.nn.relu,
 51 |                 kernel_initializer=tf.random_normal_initializer(0., .1),    # weights
 52 |                 bias_initializer=tf.constant_initializer(0.1),  # biases
 53 |                 name='l1'
 54 |             )
 55 | 
 56 |             self.acts_prob = tf.layers.dense(
 57 |                 inputs=l1,
 58 |                 units=n_actions,    # output units
 59 |                 activation=tf.nn.softmax,   # get action probabilities
 60 |                 kernel_initializer=tf.random_normal_initializer(0., .1),  # weights
 61 |                 bias_initializer=tf.constant_initializer(0.1),  # biases
 62 |                 name='acts_prob'
 63 |             )
 64 | 
 65 |         with tf.variable_scope('exp_v'):
 66 |             log_prob = tf.log(self.acts_prob[0, self.a])
 67 |             self.exp_v = tf.reduce_mean(log_prob * self.td_error)  # advantage (TD_error) guided loss
 68 | 
 69 |         with tf.variable_scope('train'):
 70 |             self.train_op = tf.train.AdamOptimizer(lr).minimize(-self.exp_v)  # minimize(-exp_v) = maximize(exp_v)
 71 | 
 72 |     def learn(self, s, a, td):
 73 |         s = s[np.newaxis, :]
 74 |         feed_dict = {self.s: s, self.a: a, self.td_error: td}
 75 |         _, exp_v = self.sess.run([self.train_op, self.exp_v], feed_dict)
 76 |         return exp_v
 77 | 
 78 |     def choose_action(self, s):
 79 |         s = s[np.newaxis, :]
 80 |         probs = self.sess.run(self.acts_prob, {self.s: s})   # get probabilities for all actions
 81 |         return np.random.choice(np.arange(probs.shape[1]), p=probs.ravel())   # return a int
 82 | 
 83 | 
 84 | class Critic(object):
 85 |     def __init__(self, sess, n_features, lr=0.01):
 86 |         self.sess = sess
 87 | 
 88 |         self.s = tf.placeholder(tf.float32, [1, n_features], "state")
 89 |         self.v_ = tf.placeholder(tf.float32, [1, 1], "v_next")
 90 |         self.r = tf.placeholder(tf.float32, None, 'r')
 91 | 
 92 |         with tf.variable_scope('Critic'):
 93 |             l1 = tf.layers.dense(
 94 |                 inputs=self.s,
 95 |                 units=20,  # number of hidden units
 96 |                 activation=tf.nn.relu,  # None
 97 |                 # have to be linear to make sure the convergence of actor.
 98 |                 # But linear approximator seems hardly learns the correct Q.
 99 |                 kernel_initializer=tf.random_normal_initializer(0., .1),  # weights
100 |                 bias_initializer=tf.constant_initializer(0.1),  # biases
101 |                 name='l1'
102 |             )
103 | 
104 |             self.v = tf.layers.dense(
105 |                 inputs=l1,
106 |                 units=1,  # output units
107 |                 activation=None,
108 |                 kernel_initializer=tf.random_normal_initializer(0., .1),  # weights
109 |                 bias_initializer=tf.constant_initializer(0.1),  # biases
110 |                 name='V'
111 |             )
112 | 
113 |         with tf.variable_scope('squared_TD_error'):
114 |             self.td_error = self.r + GAMMA * self.v_ - self.v
115 |             self.loss = tf.square(self.td_error)    # TD_error = (r+gamma*V_next) - V_eval
116 |         with tf.variable_scope('train'):
117 |             self.train_op = tf.train.AdamOptimizer(lr).minimize(self.loss)
118 | 
119 |     def learn(self, s, r, s_):
120 |         s, s_ = s[np.newaxis, :], s_[np.newaxis, :]
121 | 
122 |         v_ = self.sess.run(self.v, {self.s: s_})
123 |         td_error, _ = self.sess.run([self.td_error, self.train_op],
124 |                                           {self.s: s, self.v_: v_, self.r: r})
125 |         return td_error
126 | 
127 | 
128 | sess = tf.Session()
129 | 
130 | actor = Actor(sess, n_features=N_F, n_actions=N_A, lr=LR_A)
131 | critic = Critic(sess, n_features=N_F, lr=LR_C)     # we need a good teacher, so the teacher should learn faster than the actor
132 | 
133 | sess.run(tf.global_variables_initializer())
134 | 
135 | if OUTPUT_GRAPH:
136 |     tf.summary.FileWriter("logs/", sess.graph)
137 | 
138 | for i_episode in range(MAX_EPISODE):
139 |     s = env.reset()
140 |     t = 0
141 |     track_r = []
142 |     while True:
143 |         if RENDER: env.render()
144 | 
145 |         a = actor.choose_action(s)
146 | 
147 |         s_, r, done, info = env.step(a)
148 | 
149 |         if done: r = -20
150 | 
151 |         track_r.append(r)
152 | 
153 |         td_error = critic.learn(s, r, s_)  # gradient = grad[r + gamma * V(s_) - V(s)]
154 |         actor.learn(s, a, td_error)     # true_gradient = grad[logPi(s,a) * td_error]
155 | 
156 |         s = s_
157 |         t += 1
158 | 
159 |         if done or t >= MAX_EP_STEPS:
160 |             ep_rs_sum = sum(track_r)
161 | 
162 |             if 'running_reward' not in globals():
163 |                 running_reward = ep_rs_sum
164 |             else:
165 |                 running_reward = running_reward * 0.95 + ep_rs_sum * 0.05
166 |             if running_reward > DISPLAY_REWARD_THRESHOLD: RENDER = True  # rendering
167 |             print("episode:", i_episode, "  reward:", int(running_reward))
168 |             break
169 | 
170 | 


--------------------------------------------------------------------------------
/rl/Reinforcement-learning-with-tensorflow/contents/8_Actor_Critic_Advantage/AC_continue_Pendulum.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Actor-Critic with continuous action using TD-error as the Advantage, Reinforcement Learning.
  3 | 
  4 | The Pendulum example (based on https://github.com/dennybritz/reinforcement-learning/blob/master/PolicyGradient/Continuous%20MountainCar%20Actor%20Critic%20Solution.ipynb)
  5 | 
  6 | Cannot converge!!! oscillate!!!
  7 | 
  8 | View more on my tutorial page: https://morvanzhou.github.io/tutorials/
  9 | 
 10 | Using:
 11 | tensorflow r1.3
 12 | gym 0.8.0
 13 | """
 14 | 
 15 | import tensorflow as tf
 16 | import numpy as np
 17 | import gym
 18 | 
 19 | np.random.seed(2)
 20 | tf.set_random_seed(2)  # reproducible
 21 | 
 22 | 
 23 | class Actor(object):
 24 |     def __init__(self, sess, n_features, action_bound, lr=0.0001):
 25 |         self.sess = sess
 26 | 
 27 |         self.s = tf.placeholder(tf.float32, [1, n_features], "state")
 28 |         self.a = tf.placeholder(tf.float32, None, name="act")
 29 |         self.td_error = tf.placeholder(tf.float32, None, name="td_error")  # TD_error
 30 | 
 31 |         l1 = tf.layers.dense(
 32 |             inputs=self.s,
 33 |             units=30,  # number of hidden units
 34 |             activation=tf.nn.relu,
 35 |             kernel_initializer=tf.random_normal_initializer(0., .1),  # weights
 36 |             bias_initializer=tf.constant_initializer(0.1),  # biases
 37 |             name='l1'
 38 |         )
 39 | 
 40 |         mu = tf.layers.dense(
 41 |             inputs=l1,
 42 |             units=1,  # number of hidden units
 43 |             activation=tf.nn.tanh,
 44 |             kernel_initializer=tf.random_normal_initializer(0., .1),  # weights
 45 |             bias_initializer=tf.constant_initializer(0.1),  # biases
 46 |             name='mu'
 47 |         )
 48 | 
 49 |         sigma = tf.layers.dense(
 50 |             inputs=l1,
 51 |             units=1,  # output units
 52 |             activation=tf.nn.softplus,  # get action probabilities
 53 |             kernel_initializer=tf.random_normal_initializer(0., .1),  # weights
 54 |             bias_initializer=tf.constant_initializer(1.),  # biases
 55 |             name='sigma'
 56 |         )
 57 |         global_step = tf.Variable(0, trainable=False)
 58 |         # self.e = epsilon = tf.train.exponential_decay(2., global_step, 1000, 0.9)
 59 |         self.mu, self.sigma = tf.squeeze(mu*2), tf.squeeze(sigma+0.1)
 60 |         self.normal_dist = tf.distributions.Normal(self.mu, self.sigma)
 61 | 
 62 |         self.action = tf.clip_by_value(self.normal_dist.sample(1), action_bound[0], action_bound[1])
 63 | 
 64 |         with tf.name_scope('exp_v'):
 65 |             log_prob = self.normal_dist.log_prob(self.a)  # loss without advantage
 66 |             self.exp_v = log_prob * self.td_error  # advantage (TD_error) guided loss
 67 |             # Add cross entropy cost to encourage exploration
 68 |             self.exp_v += 0.01*self.normal_dist.entropy()
 69 | 
 70 |         with tf.name_scope('train'):
 71 |             self.train_op = tf.train.AdamOptimizer(lr).minimize(-self.exp_v, global_step)    # min(v) = max(-v)
 72 | 
 73 |     def learn(self, s, a, td):
 74 |         s = s[np.newaxis, :]
 75 |         feed_dict = {self.s: s, self.a: a, self.td_error: td}
 76 |         _, exp_v = self.sess.run([self.train_op, self.exp_v], feed_dict)
 77 |         return exp_v
 78 | 
 79 |     def choose_action(self, s):
 80 |         s = s[np.newaxis, :]
 81 |         return self.sess.run(self.action, {self.s: s})  # get probabilities for all actions
 82 | 
 83 | 
 84 | class Critic(object):
 85 |     def __init__(self, sess, n_features, lr=0.01):
 86 |         self.sess = sess
 87 |         with tf.name_scope('inputs'):
 88 |             self.s = tf.placeholder(tf.float32, [1, n_features], "state")
 89 |             self.v_ = tf.placeholder(tf.float32, [1, 1], name="v_next")
 90 |             self.r = tf.placeholder(tf.float32, name='r')
 91 | 
 92 |         with tf.variable_scope('Critic'):
 93 |             l1 = tf.layers.dense(
 94 |                 inputs=self.s,
 95 |                 units=30,  # number of hidden units
 96 |                 activation=tf.nn.relu,
 97 |                 kernel_initializer=tf.random_normal_initializer(0., .1),  # weights
 98 |                 bias_initializer=tf.constant_initializer(0.1),  # biases
 99 |                 name='l1'
100 |             )
101 | 
102 |             self.v = tf.layers.dense(
103 |                 inputs=l1,
104 |                 units=1,  # output units
105 |                 activation=None,
106 |                 kernel_initializer=tf.random_normal_initializer(0., .1),  # weights
107 |                 bias_initializer=tf.constant_initializer(0.1),  # biases
108 |                 name='V'
109 |             )
110 | 
111 |         with tf.variable_scope('squared_TD_error'):
112 |             self.td_error = tf.reduce_mean(self.r + GAMMA * self.v_ - self.v)
113 |             self.loss = tf.square(self.td_error)    # TD_error = (r+gamma*V_next) - V_eval
114 |         with tf.variable_scope('train'):
115 |             self.train_op = tf.train.AdamOptimizer(lr).minimize(self.loss)
116 | 
117 |     def learn(self, s, r, s_):
118 |         s, s_ = s[np.newaxis, :], s_[np.newaxis, :]
119 | 
120 |         v_ = self.sess.run(self.v, {self.s: s_})
121 |         td_error, _ = self.sess.run([self.td_error, self.train_op],
122 |                                           {self.s: s, self.v_: v_, self.r: r})
123 |         return td_error
124 | 
125 | 
126 | OUTPUT_GRAPH = False
127 | MAX_EPISODE = 1000
128 | MAX_EP_STEPS = 200
129 | DISPLAY_REWARD_THRESHOLD = -100  # renders environment if total episode reward is greater then this threshold
130 | RENDER = False  # rendering wastes time
131 | GAMMA = 0.9
132 | LR_A = 0.001    # learning rate for actor
133 | LR_C = 0.01     # learning rate for critic
134 | 
135 | env = gym.make('Pendulum-v0')
136 | env.seed(1)  # reproducible
137 | env = env.unwrapped
138 | 
139 | N_S = env.observation_space.shape[0]
140 | A_BOUND = env.action_space.high
141 | 
142 | sess = tf.Session()
143 | 
144 | actor = Actor(sess, n_features=N_S, lr=LR_A, action_bound=[-A_BOUND, A_BOUND])
145 | critic = Critic(sess, n_features=N_S, lr=LR_C)
146 | 
147 | sess.run(tf.global_variables_initializer())
148 | 
149 | if OUTPUT_GRAPH:
150 |     tf.summary.FileWriter("logs/", sess.graph)
151 | 
152 | for i_episode in range(MAX_EPISODE):
153 |     s = env.reset()
154 |     t = 0
155 |     ep_rs = []
156 |     while True:
157 |         # if RENDER:
158 |         env.render()
159 |         a = actor.choose_action(s)
160 | 
161 |         s_, r, done, info = env.step(a)
162 |         r /= 10
163 | 
164 |         td_error = critic.learn(s, r, s_)  # gradient = grad[r + gamma * V(s_) - V(s)]
165 |         actor.learn(s, a, td_error)  # true_gradient = grad[logPi(s,a) * td_error]
166 | 
167 |         s = s_
168 |         t += 1
169 |         ep_rs.append(r)
170 |         if t > MAX_EP_STEPS:
171 |             ep_rs_sum = sum(ep_rs)
172 |             if 'running_reward' not in globals():
173 |                 running_reward = ep_rs_sum
174 |             else:
175 |                 running_reward = running_reward * 0.9 + ep_rs_sum * 0.1
176 |             if running_reward > DISPLAY_REWARD_THRESHOLD: RENDER = True  # rendering
177 |             print("episode:", i_episode, "  reward:", int(running_reward))
178 |             break
179 | 
180 | 


--------------------------------------------------------------------------------
/rl/Reinforcement-learning-with-tensorflow/contents/9_Deep_Deterministic_Policy_Gradient_DDPG/DDPG_update.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Deep Deterministic Policy Gradient (DDPG), Reinforcement Learning.
  3 | DDPG is Actor Critic based algorithm.
  4 | Pendulum example.
  5 | 
  6 | View more on my tutorial page: https://morvanzhou.github.io/tutorials/
  7 | 
  8 | Using:
  9 | tensorflow 1.0
 10 | gym 0.8.0
 11 | """
 12 | 
 13 | import tensorflow as tf
 14 | import numpy as np
 15 | import gym
 16 | import time
 17 | 
 18 | 
 19 | #####################  hyper parameters  ####################
 20 | 
 21 | MAX_EPISODES = 200
 22 | MAX_EP_STEPS = 200
 23 | LR_A = 0.001    # learning rate for actor
 24 | LR_C = 0.002    # learning rate for critic
 25 | GAMMA = 0.9     # reward discount
 26 | TAU = 0.01      # soft replacement
 27 | MEMORY_CAPACITY = 10000
 28 | BATCH_SIZE = 32
 29 | 
 30 | RENDER = False
 31 | ENV_NAME = 'Pendulum-v0'
 32 | 
 33 | ###############################  DDPG  ####################################
 34 | 
 35 | class DDPG(object):
 36 |     def __init__(self, a_dim, s_dim, a_bound,):
 37 |         self.memory = np.zeros((MEMORY_CAPACITY, s_dim * 2 + a_dim + 1), dtype=np.float32)
 38 |         self.pointer = 0
 39 |         self.sess = tf.Session()
 40 | 
 41 |         self.a_dim, self.s_dim, self.a_bound = a_dim, s_dim, a_bound,
 42 |         self.S = tf.placeholder(tf.float32, [None, s_dim], 's')
 43 |         self.S_ = tf.placeholder(tf.float32, [None, s_dim], 's_')
 44 |         self.R = tf.placeholder(tf.float32, [None, 1], 'r')
 45 | 
 46 |         with tf.variable_scope('Actor'):
 47 |             self.a = self._build_a(self.S, scope='eval', trainable=True)
 48 |             a_ = self._build_a(self.S_, scope='target', trainable=False)
 49 |         with tf.variable_scope('Critic'):
 50 |             # assign self.a = a in memory when calculating q for td_error,
 51 |             # otherwise the self.a is from Actor when updating Actor
 52 |             q = self._build_c(self.S, self.a, scope='eval', trainable=True)
 53 |             q_ = self._build_c(self.S_, a_, scope='target', trainable=False)
 54 | 
 55 |         # networks parameters
 56 |         self.ae_params = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope='Actor/eval')
 57 |         self.at_params = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope='Actor/target')
 58 |         self.ce_params = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope='Critic/eval')
 59 |         self.ct_params = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope='Critic/target')
 60 | 
 61 |         # target net replacement
 62 |         self.soft_replace = [tf.assign(t, (1 - TAU) * t + TAU * e)
 63 |                              for t, e in zip(self.at_params + self.ct_params, self.ae_params + self.ce_params)]
 64 | 
 65 |         q_target = self.R + GAMMA * q_
 66 |         # in the feed_dic for the td_error, the self.a should change to actions in memory
 67 |         td_error = tf.losses.mean_squared_error(labels=q_target, predictions=q)
 68 |         self.ctrain = tf.train.AdamOptimizer(LR_C).minimize(td_error, var_list=self.ce_params)
 69 | 
 70 |         a_loss = - tf.reduce_mean(q)    # maximize the q
 71 |         self.atrain = tf.train.AdamOptimizer(LR_A).minimize(a_loss, var_list=self.ae_params)
 72 | 
 73 |         self.sess.run(tf.global_variables_initializer())
 74 | 
 75 |     def choose_action(self, s):
 76 |         return self.sess.run(self.a, {self.S: s[np.newaxis, :]})[0]
 77 | 
 78 |     def learn(self):
 79 |         # soft target replacement
 80 |         self.sess.run(self.soft_replace)
 81 | 
 82 |         indices = np.random.choice(MEMORY_CAPACITY, size=BATCH_SIZE)
 83 |         bt = self.memory[indices, :]
 84 |         bs = bt[:, :self.s_dim]
 85 |         ba = bt[:, self.s_dim: self.s_dim + self.a_dim]
 86 |         br = bt[:, -self.s_dim - 1: -self.s_dim]
 87 |         bs_ = bt[:, -self.s_dim:]
 88 | 
 89 |         self.sess.run(self.atrain, {self.S: bs})
 90 |         self.sess.run(self.ctrain, {self.S: bs, self.a: ba, self.R: br, self.S_: bs_})
 91 | 
 92 |     def store_transition(self, s, a, r, s_):
 93 |         transition = np.hstack((s, a, [r], s_))
 94 |         index = self.pointer % MEMORY_CAPACITY  # replace the old memory with new memory
 95 |         self.memory[index, :] = transition
 96 |         self.pointer += 1
 97 | 
 98 |     def _build_a(self, s, scope, trainable):
 99 |         with tf.variable_scope(scope):
100 |             net = tf.layers.dense(s, 30, activation=tf.nn.relu, name='l1', trainable=trainable)
101 |             a = tf.layers.dense(net, self.a_dim, activation=tf.nn.tanh, name='a', trainable=trainable)
102 |             return tf.multiply(a, self.a_bound, name='scaled_a')
103 | 
104 |     def _build_c(self, s, a, scope, trainable):
105 |         with tf.variable_scope(scope):
106 |             n_l1 = 30
107 |             w1_s = tf.get_variable('w1_s', [self.s_dim, n_l1], trainable=trainable)
108 |             w1_a = tf.get_variable('w1_a', [self.a_dim, n_l1], trainable=trainable)
109 |             b1 = tf.get_variable('b1', [1, n_l1], trainable=trainable)
110 |             net = tf.nn.relu(tf.matmul(s, w1_s) + tf.matmul(a, w1_a) + b1)
111 |             return tf.layers.dense(net, 1, trainable=trainable)  # Q(s,a)
112 | 
113 | ###############################  training  ####################################
114 | 
115 | env = gym.make(ENV_NAME)
116 | env = env.unwrapped
117 | env.seed(1)
118 | 
119 | s_dim = env.observation_space.shape[0]
120 | a_dim = env.action_space.shape[0]
121 | a_bound = env.action_space.high
122 | 
123 | ddpg = DDPG(a_dim, s_dim, a_bound)
124 | 
125 | var = 3  # control exploration
126 | t1 = time.time()
127 | for i in range(MAX_EPISODES):
128 |     s = env.reset()
129 |     ep_reward = 0
130 |     for j in range(MAX_EP_STEPS):
131 |         if RENDER:
132 |             env.render()
133 | 
134 |         # Add exploration noise
135 |         a = ddpg.choose_action(s)
136 |         a = np.clip(np.random.normal(a, var), -2, 2)    # add randomness to action selection for exploration
137 |         s_, r, done, info = env.step(a)
138 | 
139 |         ddpg.store_transition(s, a, r / 10, s_)
140 | 
141 |         if ddpg.pointer > MEMORY_CAPACITY:
142 |             var *= .9995    # decay the action randomness
143 |             ddpg.learn()
144 | 
145 |         s = s_
146 |         ep_reward += r
147 |         if j == MAX_EP_STEPS-1:
148 |             print('Episode:', i, ' Reward: %i' % int(ep_reward), 'Explore: %.2f' % var, )
149 |             # if ep_reward > -300:RENDER = True
150 |             break
151 | print('Running time: ', time.time() - t1)


--------------------------------------------------------------------------------
/rl/Reinforcement-learning-with-tensorflow/contents/9_Deep_Deterministic_Policy_Gradient_DDPG/DDPG_update2.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Note: This is a updated version from my previous code,
  3 | for the target network, I use moving average to soft replace target parameters instead using assign function.
  4 | By doing this, it has 20% speed up on my machine (CPU).
  5 | 
  6 | Deep Deterministic Policy Gradient (DDPG), Reinforcement Learning.
  7 | DDPG is Actor Critic based algorithm.
  8 | Pendulum example.
  9 | 
 10 | View more on my tutorial page: https://morvanzhou.github.io/tutorials/
 11 | 
 12 | Using:
 13 | tensorflow 1.0
 14 | gym 0.8.0
 15 | """
 16 | 
 17 | import tensorflow as tf
 18 | import numpy as np
 19 | import gym
 20 | import time
 21 | 
 22 | 
 23 | #####################  hyper parameters  ####################
 24 | 
 25 | MAX_EPISODES = 200
 26 | MAX_EP_STEPS = 200
 27 | LR_A = 0.001    # learning rate for actor
 28 | LR_C = 0.002    # learning rate for critic
 29 | GAMMA = 0.9     # reward discount
 30 | TAU = 0.01      # soft replacement
 31 | MEMORY_CAPACITY = 10000
 32 | BATCH_SIZE = 32
 33 | 
 34 | RENDER = False
 35 | ENV_NAME = 'Pendulum-v0'
 36 | 
 37 | 
 38 | ###############################  DDPG  ####################################
 39 | 
 40 | 
 41 | class DDPG(object):
 42 |     def __init__(self, a_dim, s_dim, a_bound,):
 43 |         self.memory = np.zeros((MEMORY_CAPACITY, s_dim * 2 + a_dim + 1), dtype=np.float32)
 44 |         self.pointer = 0
 45 |         self.sess = tf.Session()
 46 | 
 47 |         self.a_dim, self.s_dim, self.a_bound = a_dim, s_dim, a_bound,
 48 |         self.S = tf.placeholder(tf.float32, [None, s_dim], 's')
 49 |         self.S_ = tf.placeholder(tf.float32, [None, s_dim], 's_')
 50 |         self.R = tf.placeholder(tf.float32, [None, 1], 'r')
 51 | 
 52 |         self.a = self._build_a(self.S,)
 53 |         q = self._build_c(self.S, self.a, )
 54 |         a_params = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope='Actor')
 55 |         c_params = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope='Critic')
 56 |         ema = tf.train.ExponentialMovingAverage(decay=1 - TAU)          # soft replacement
 57 | 
 58 |         def ema_getter(getter, name, *args, **kwargs):
 59 |             return ema.average(getter(name, *args, **kwargs))
 60 | 
 61 |         target_update = [ema.apply(a_params), ema.apply(c_params)]      # soft update operation
 62 |         a_ = self._build_a(self.S_, reuse=True, custom_getter=ema_getter)   # replaced target parameters
 63 |         q_ = self._build_c(self.S_, a_, reuse=True, custom_getter=ema_getter)
 64 | 
 65 |         a_loss = - tf.reduce_mean(q)  # maximize the q
 66 |         self.atrain = tf.train.AdamOptimizer(LR_A).minimize(a_loss, var_list=a_params)
 67 | 
 68 |         with tf.control_dependencies(target_update):    # soft replacement happened at here
 69 |             q_target = self.R + GAMMA * q_
 70 |             td_error = tf.losses.mean_squared_error(labels=q_target, predictions=q)
 71 |             self.ctrain = tf.train.AdamOptimizer(LR_C).minimize(td_error, var_list=c_params)
 72 | 
 73 |         self.sess.run(tf.global_variables_initializer())
 74 | 
 75 |     def choose_action(self, s):
 76 |         return self.sess.run(self.a, {self.S: s[np.newaxis, :]})[0]
 77 | 
 78 |     def learn(self):
 79 |         indices = np.random.choice(MEMORY_CAPACITY, size=BATCH_SIZE)
 80 |         bt = self.memory[indices, :]
 81 |         bs = bt[:, :self.s_dim]
 82 |         ba = bt[:, self.s_dim: self.s_dim + self.a_dim]
 83 |         br = bt[:, -self.s_dim - 1: -self.s_dim]
 84 |         bs_ = bt[:, -self.s_dim:]
 85 | 
 86 |         self.sess.run(self.atrain, {self.S: bs})
 87 |         self.sess.run(self.ctrain, {self.S: bs, self.a: ba, self.R: br, self.S_: bs_})
 88 | 
 89 |     def store_transition(self, s, a, r, s_):
 90 |         transition = np.hstack((s, a, [r], s_))
 91 |         index = self.pointer % MEMORY_CAPACITY  # replace the old memory with new memory
 92 |         self.memory[index, :] = transition
 93 |         self.pointer += 1
 94 | 
 95 |     def _build_a(self, s, reuse=None, custom_getter=None):
 96 |         trainable = True if reuse is None else False
 97 |         with tf.variable_scope('Actor', reuse=reuse, custom_getter=custom_getter):
 98 |             net = tf.layers.dense(s, 30, activation=tf.nn.relu, name='l1', trainable=trainable)
 99 |             a = tf.layers.dense(net, self.a_dim, activation=tf.nn.tanh, name='a', trainable=trainable)
100 |             return tf.multiply(a, self.a_bound, name='scaled_a')
101 | 
102 |     def _build_c(self, s, a, reuse=None, custom_getter=None):
103 |         trainable = True if reuse is None else False
104 |         with tf.variable_scope('Critic', reuse=reuse, custom_getter=custom_getter):
105 |             n_l1 = 30
106 |             w1_s = tf.get_variable('w1_s', [self.s_dim, n_l1], trainable=trainable)
107 |             w1_a = tf.get_variable('w1_a', [self.a_dim, n_l1], trainable=trainable)
108 |             b1 = tf.get_variable('b1', [1, n_l1], trainable=trainable)
109 |             net = tf.nn.relu(tf.matmul(s, w1_s) + tf.matmul(a, w1_a) + b1)
110 |             return tf.layers.dense(net, 1, trainable=trainable)  # Q(s,a)
111 | 
112 | 
113 | ###############################  training  ####################################
114 | 
115 | env = gym.make(ENV_NAME)
116 | env = env.unwrapped
117 | env.seed(1)
118 | 
119 | s_dim = env.observation_space.shape[0]
120 | a_dim = env.action_space.shape[0]
121 | a_bound = env.action_space.high
122 | 
123 | ddpg = DDPG(a_dim, s_dim, a_bound)
124 | 
125 | var = 3  # control exploration
126 | t1 = time.time()
127 | for i in range(MAX_EPISODES):
128 |     s = env.reset()
129 |     ep_reward = 0
130 |     for j in range(MAX_EP_STEPS):
131 |         if RENDER:
132 |             env.render()
133 | 
134 |         # Add exploration noise
135 |         a = ddpg.choose_action(s)
136 |         a = np.clip(np.random.normal(a, var), -2, 2)    # add randomness to action selection for exploration
137 |         s_, r, done, info = env.step(a)
138 | 
139 |         ddpg.store_transition(s, a, r / 10, s_)
140 | 
141 |         if ddpg.pointer > MEMORY_CAPACITY:
142 |             var *= .9995    # decay the action randomness
143 |             ddpg.learn()
144 | 
145 |         s = s_
146 |         ep_reward += r
147 |         if j == MAX_EP_STEPS-1:
148 |             print('Episode:', i, ' Reward: %i' % int(ep_reward), 'Explore: %.2f' % var, )
149 |             # if ep_reward > -300:RENDER = True
150 |             break
151 | 
152 | print('Running time: ', time.time() - t1)


--------------------------------------------------------------------------------
/rl/Reinforcement-learning-with-tensorflow/contents/Curiosity_Model/Curiosity.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wqw547243068/wangqiwen/07b64ae47d91581e1c339f40bc765fd7815b47ff/rl/Reinforcement-learning-with-tensorflow/contents/Curiosity_Model/Curiosity.png


--------------------------------------------------------------------------------
/rl/Reinforcement-learning-with-tensorflow/contents/Curiosity_Model/Curiosity.py:
--------------------------------------------------------------------------------
  1 | """This is a simple implementation of [Large-Scale Study of Curiosity-Driven Learning](https://arxiv.org/abs/1808.04355)"""
  2 | 
  3 | import numpy as np
  4 | import tensorflow as tf
  5 | import gym
  6 | import matplotlib.pyplot as plt
  7 | 
  8 | 
  9 | class CuriosityNet:
 10 |     def __init__(
 11 |             self,
 12 |             n_a,
 13 |             n_s,
 14 |             lr=0.01,
 15 |             gamma=0.98,
 16 |             epsilon=0.95,
 17 |             replace_target_iter=300,
 18 |             memory_size=10000,
 19 |             batch_size=128,
 20 |             output_graph=False,
 21 |     ):
 22 |         self.n_a = n_a
 23 |         self.n_s = n_s
 24 |         self.lr = lr
 25 |         self.gamma = gamma
 26 |         self.epsilon = epsilon
 27 |         self.replace_target_iter = replace_target_iter
 28 |         self.memory_size = memory_size
 29 |         self.batch_size = batch_size
 30 | 
 31 |         # total learning step
 32 |         self.learn_step_counter = 0
 33 |         self.memory_counter = 0
 34 | 
 35 |         # initialize zero memory [s, a, r, s_]
 36 |         self.memory = np.zeros((self.memory_size, n_s * 2 + 2))
 37 |         self.tfs, self.tfa, self.tfr, self.tfs_, self.dyn_train, self.dqn_train, self.q, self.int_r = \
 38 |             self._build_nets()
 39 | 
 40 |         t_params = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope='target_net')
 41 |         e_params = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope='eval_net')
 42 | 
 43 |         with tf.variable_scope('hard_replacement'):
 44 |             self.target_replace_op = [tf.assign(t, e) for t, e in zip(t_params, e_params)]
 45 | 
 46 |         self.sess = tf.Session()
 47 | 
 48 |         if output_graph:
 49 |             tf.summary.FileWriter("logs/", self.sess.graph)
 50 | 
 51 |         self.sess.run(tf.global_variables_initializer())
 52 | 
 53 |     def _build_nets(self):
 54 |         tfs = tf.placeholder(tf.float32, [None, self.n_s], name="s")    # input State
 55 |         tfa = tf.placeholder(tf.int32, [None, ], name="a")              # input Action
 56 |         tfr = tf.placeholder(tf.float32, [None, ], name="ext_r")        # extrinsic reward
 57 |         tfs_ = tf.placeholder(tf.float32, [None, self.n_s], name="s_")  # input Next State
 58 | 
 59 |         # dynamics net
 60 |         dyn_s_, curiosity, dyn_train = self._build_dynamics_net(tfs, tfa, tfs_)
 61 | 
 62 |         # normal RL model
 63 |         total_reward = tf.add(curiosity, tfr, name="total_r")
 64 |         q, dqn_loss, dqn_train = self._build_dqn(tfs, tfa, total_reward, tfs_)
 65 |         return tfs, tfa, tfr, tfs_, dyn_train, dqn_train, q, curiosity
 66 | 
 67 |     def _build_dynamics_net(self, s, a, s_):
 68 |         with tf.variable_scope("dyn_net"):
 69 |             float_a = tf.expand_dims(tf.cast(a, dtype=tf.float32, name="float_a"), axis=1, name="2d_a")
 70 |             sa = tf.concat((s, float_a), axis=1, name="sa")
 71 |             encoded_s_ = s_                # here we use s_ as the encoded s_
 72 | 
 73 |             dyn_l = tf.layers.dense(sa, 32, activation=tf.nn.relu)
 74 |             dyn_s_ = tf.layers.dense(dyn_l, self.n_s)  # predicted s_
 75 |         with tf.name_scope("int_r"):
 76 |             squared_diff = tf.reduce_sum(tf.square(encoded_s_ - dyn_s_), axis=1)  # intrinsic reward
 77 | 
 78 |         # It is better to reduce the learning rate in order to stay curious
 79 |         train_op = tf.train.RMSPropOptimizer(self.lr, name="dyn_opt").minimize(tf.reduce_mean(squared_diff))
 80 |         return dyn_s_, squared_diff, train_op
 81 | 
 82 |     def _build_dqn(self, s, a, r, s_):
 83 |         with tf.variable_scope('eval_net'):
 84 |             e1 = tf.layers.dense(s, 128, tf.nn.relu)
 85 |             q = tf.layers.dense(e1, self.n_a, name="q")
 86 |         with tf.variable_scope('target_net'):
 87 |             t1 = tf.layers.dense(s_, 128, tf.nn.relu)
 88 |             q_ = tf.layers.dense(t1, self.n_a, name="q_")
 89 | 
 90 |         with tf.variable_scope('q_target'):
 91 |             q_target = r + self.gamma * tf.reduce_max(q_, axis=1, name="Qmax_s_")
 92 | 
 93 |         with tf.variable_scope('q_wrt_a'):
 94 |             a_indices = tf.stack([tf.range(tf.shape(a)[0], dtype=tf.int32), a], axis=1)
 95 |             q_wrt_a = tf.gather_nd(params=q, indices=a_indices)
 96 | 
 97 |         loss = tf.losses.mean_squared_error(labels=q_target, predictions=q_wrt_a)   # TD error
 98 |         train_op = tf.train.RMSPropOptimizer(self.lr, name="dqn_opt").minimize(
 99 |             loss, var_list=tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, "eval_net"))
100 |         return q, loss, train_op
101 | 
102 |     def store_transition(self, s, a, r, s_):            
103 |         transition = np.hstack((s, [a, r], s_))
104 |         # replace the old memory with new memory
105 |         index = self.memory_counter % self.memory_size
106 |         self.memory[index, :] = transition
107 |         self.memory_counter += 1
108 | 
109 |     def choose_action(self, observation):
110 |         # to have batch dimension when feed into tf placeholder
111 |         s = observation[np.newaxis, :]
112 | 
113 |         if np.random.uniform() < self.epsilon:
114 |             # forward feed the observation and get q value for every actions
115 |             actions_value = self.sess.run(self.q, feed_dict={self.tfs: s})
116 |             action = np.argmax(actions_value)
117 |         else:
118 |             action = np.random.randint(0, self.n_a)
119 |         return action
120 | 
121 |     def learn(self):
122 |         # check to replace target parameters
123 |         if self.learn_step_counter % self.replace_target_iter == 0:
124 |             self.sess.run(self.target_replace_op)
125 | 
126 |         # sample batch memory from all memory
127 |         top = self.memory_size if self.memory_counter > self.memory_size else self.memory_counter
128 |         sample_index = np.random.choice(top, size=self.batch_size)
129 |         batch_memory = self.memory[sample_index, :]
130 | 
131 |         bs, ba, br, bs_ = batch_memory[:, :self.n_s], batch_memory[:, self.n_s], \
132 |             batch_memory[:, self.n_s + 1], batch_memory[:, -self.n_s:]
133 |         self.sess.run(self.dqn_train, feed_dict={self.tfs: bs, self.tfa: ba, self.tfr: br, self.tfs_: bs_})
134 |         if self.learn_step_counter % 1000 == 0:     # delay training in order to stay curious
135 |             self.sess.run(self.dyn_train, feed_dict={self.tfs: bs, self.tfa: ba, self.tfs_: bs_})
136 |         self.learn_step_counter += 1
137 | 
138 | 
139 | env = gym.make('MountainCar-v0')
140 | env = env.unwrapped
141 | 
142 | dqn = CuriosityNet(n_a=3, n_s=2, lr=0.01, output_graph=False)
143 | ep_steps = []
144 | for epi in range(200):
145 |     s = env.reset()
146 |     steps = 0
147 |     while True:
148 |         env.render()
149 |         a = dqn.choose_action(s)
150 |         s_, r, done, info = env.step(a)
151 |         dqn.store_transition(s, a, r, s_)
152 |         dqn.learn()
153 |         if done:
154 |             print('Epi: ', epi, "| steps: ", steps)
155 |             ep_steps.append(steps)
156 |             break
157 |         s = s_
158 |         steps += 1
159 | 
160 | plt.plot(ep_steps)
161 | plt.ylabel("steps")
162 | plt.xlabel("episode")
163 | plt.show()


--------------------------------------------------------------------------------
/rl/Reinforcement-learning-with-tensorflow/contents/Curiosity_Model/Random_Network_Distillation.py:
--------------------------------------------------------------------------------
  1 | """This is a simple implementation of [Exploration by Random Network Distillation](https://arxiv.org/abs/1810.12894)"""
  2 | 
  3 | import numpy as np
  4 | import tensorflow as tf
  5 | import gym
  6 | import matplotlib.pyplot as plt
  7 | 
  8 | 
  9 | class CuriosityNet:
 10 |     def __init__(
 11 |             self,
 12 |             n_a,
 13 |             n_s,
 14 |             lr=0.01,
 15 |             gamma=0.95,
 16 |             epsilon=1.,
 17 |             replace_target_iter=300,
 18 |             memory_size=10000,
 19 |             batch_size=128,
 20 |             output_graph=False,
 21 |     ):
 22 |         self.n_a = n_a
 23 |         self.n_s = n_s
 24 |         self.lr = lr
 25 |         self.gamma = gamma
 26 |         self.epsilon = epsilon
 27 |         self.replace_target_iter = replace_target_iter
 28 |         self.memory_size = memory_size
 29 |         self.batch_size = batch_size
 30 |         self.s_encode_size = 1000       # give a hard job for predictor to learn
 31 | 
 32 |         # total learning step
 33 |         self.learn_step_counter = 0
 34 |         self.memory_counter = 0
 35 | 
 36 |         # initialize zero memory [s, a, r, s_]
 37 |         self.memory = np.zeros((self.memory_size, n_s * 2 + 2))
 38 |         self.tfs, self.tfa, self.tfr, self.tfs_, self.pred_train, self.dqn_train, self.q = \
 39 |             self._build_nets()
 40 | 
 41 |         t_params = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope='target_net')
 42 |         e_params = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope='eval_net')
 43 | 
 44 |         with tf.variable_scope('hard_replacement'):
 45 |             self.target_replace_op = [tf.assign(t, e) for t, e in zip(t_params, e_params)]
 46 | 
 47 |         self.sess = tf.Session()
 48 | 
 49 |         if output_graph:
 50 |             tf.summary.FileWriter("logs/", self.sess.graph)
 51 | 
 52 |         self.sess.run(tf.global_variables_initializer())
 53 | 
 54 |     def _build_nets(self):
 55 |         tfs = tf.placeholder(tf.float32, [None, self.n_s], name="s")    # input State
 56 |         tfa = tf.placeholder(tf.int32, [None, ], name="a")              # input Action
 57 |         tfr = tf.placeholder(tf.float32, [None, ], name="ext_r")        # extrinsic reward
 58 |         tfs_ = tf.placeholder(tf.float32, [None, self.n_s], name="s_")  # input Next State
 59 | 
 60 |         # fixed random net
 61 |         with tf.variable_scope("random_net"):
 62 |             rand_encode_s_ = tf.layers.dense(tfs_, self.s_encode_size)
 63 | 
 64 |         # predictor
 65 |         ri, pred_train = self._build_predictor(tfs_, rand_encode_s_)
 66 | 
 67 |         # normal RL model
 68 |         q, dqn_loss, dqn_train = self._build_dqn(tfs, tfa, ri, tfr, tfs_)
 69 |         return tfs, tfa, tfr, tfs_, pred_train, dqn_train, q
 70 | 
 71 |     def _build_predictor(self, s_, rand_encode_s_):
 72 |         with tf.variable_scope("predictor"):
 73 |             net = tf.layers.dense(s_, 128, tf.nn.relu)
 74 |             out = tf.layers.dense(net, self.s_encode_size)
 75 | 
 76 |         with tf.name_scope("int_r"):
 77 |             ri = tf.reduce_sum(tf.square(rand_encode_s_ - out), axis=1)  # intrinsic reward
 78 |         train_op = tf.train.RMSPropOptimizer(self.lr, name="predictor_opt").minimize(
 79 |             tf.reduce_mean(ri), var_list=tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, "predictor"))
 80 | 
 81 |         return ri, train_op
 82 | 
 83 |     def _build_dqn(self, s, a, ri, re, s_):
 84 |         with tf.variable_scope('eval_net'):
 85 |             e1 = tf.layers.dense(s, 128, tf.nn.relu)
 86 |             q = tf.layers.dense(e1, self.n_a, name="q")
 87 |         with tf.variable_scope('target_net'):
 88 |             t1 = tf.layers.dense(s_, 128, tf.nn.relu)
 89 |             q_ = tf.layers.dense(t1, self.n_a, name="q_")
 90 | 
 91 |         with tf.variable_scope('q_target'):
 92 |             q_target = re + ri + self.gamma * tf.reduce_max(q_, axis=1, name="Qmax_s_")
 93 | 
 94 |         with tf.variable_scope('q_wrt_a'):
 95 |             a_indices = tf.stack([tf.range(tf.shape(a)[0], dtype=tf.int32), a], axis=1)
 96 |             q_wrt_a = tf.gather_nd(params=q, indices=a_indices)
 97 | 
 98 |         loss = tf.losses.mean_squared_error(labels=q_target, predictions=q_wrt_a)   # TD error
 99 |         train_op = tf.train.RMSPropOptimizer(self.lr, name="dqn_opt").minimize(
100 |             loss, var_list=tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, "eval_net"))
101 |         return q, loss, train_op
102 | 
103 |     def store_transition(self, s, a, r, s_):            
104 |         transition = np.hstack((s, [a, r], s_))
105 |         # replace the old memory with new memory
106 |         index = self.memory_counter % self.memory_size
107 |         self.memory[index, :] = transition
108 |         self.memory_counter += 1
109 | 
110 |     def choose_action(self, observation):
111 |         # to have batch dimension when feed into tf placeholder
112 |         s = observation[np.newaxis, :]
113 | 
114 |         if np.random.uniform() < self.epsilon:
115 |             # forward feed the observation and get q value for every actions
116 |             actions_value = self.sess.run(self.q, feed_dict={self.tfs: s})
117 |             action = np.argmax(actions_value)
118 |         else:
119 |             action = np.random.randint(0, self.n_a)
120 |         return action
121 | 
122 |     def learn(self):
123 |         # check to replace target parameters
124 |         if self.learn_step_counter % self.replace_target_iter == 0:
125 |             self.sess.run(self.target_replace_op)
126 | 
127 |         # sample batch memory from all memory
128 |         top = self.memory_size if self.memory_counter > self.memory_size else self.memory_counter
129 |         sample_index = np.random.choice(top, size=self.batch_size)
130 |         batch_memory = self.memory[sample_index, :]
131 | 
132 |         bs, ba, br, bs_ = batch_memory[:, :self.n_s], batch_memory[:, self.n_s], \
133 |             batch_memory[:, self.n_s + 1], batch_memory[:, -self.n_s:]
134 |         self.sess.run(self.dqn_train, feed_dict={self.tfs: bs, self.tfa: ba, self.tfr: br, self.tfs_: bs_})
135 |         if self.learn_step_counter % 100 == 0:     # delay training in order to stay curious
136 |             self.sess.run(self.pred_train, feed_dict={self.tfs_: bs_})
137 |         self.learn_step_counter += 1
138 | 
139 | 
140 | env = gym.make('MountainCar-v0')
141 | env = env.unwrapped
142 | 
143 | dqn = CuriosityNet(n_a=3, n_s=2, lr=0.01, output_graph=False)
144 | ep_steps = []
145 | for epi in range(200):
146 |     s = env.reset()
147 |     steps = 0
148 |     while True:
149 |         # env.render()
150 |         a = dqn.choose_action(s)
151 |         s_, r, done, info = env.step(a)
152 |         dqn.store_transition(s, a, r, s_)
153 |         dqn.learn()
154 |         if done:
155 |             print('Epi: ', epi, "| steps: ", steps)
156 |             ep_steps.append(steps)
157 |             break
158 |         s = s_
159 |         steps += 1
160 | 
161 | plt.plot(ep_steps)
162 | plt.ylabel("steps")
163 | plt.xlabel("episode")
164 | plt.show()


--------------------------------------------------------------------------------
/rl/Reinforcement-learning-with-tensorflow/experiments/2D_car/collision.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | def intersection():
 4 |     p = np.array([0, 0])
 5 |     r = np.array([1, 1])
 6 |     q = np.array([0.1, 0.1])
 7 |     s = np.array([.1, .1])
 8 | 
 9 |     if np.cross(r, s) == 0 and np.cross((q-p), r) == 0:    # collinear
10 |         # t0 = (q − p) · r / (r · r)
11 |         # t1 = (q + s − p) · r / (r · r) = t0 + s · r / (r · r)
12 |         t0 = np.dot(q-p, r)/np.dot(r, r)
13 |         t1 = t0 + np.dot(s, r)/np.dot(r, r)
14 |         print(t1, t0)
15 |         if ((np.dot(s, r) > 0) and (0 <= t1 - t0 <= 1)) or ((np.dot(s, r) <= 0) and (0 <= t0 - t1 <= 1)):
16 |             print('collinear and overlapping, q_s in p_r')
17 |         else:
18 |             print('collinear and disjoint')
19 |     elif np.cross(r, s) == 0 and np.cross((q-p), r) != 0:  # parallel r × s = 0 and (q − p) × r ≠ 0,
20 |         print('parallel')
21 |     else:
22 |         t = np.cross((q - p), s) / np.cross(r, s)
23 |         u = np.cross((q - p), r) / np.cross(r, s)
24 |         if 0 <= t <= 1 and 0 <= u <= 1:
25 |             # If r × s ≠ 0 and 0 ≤ t ≤ 1 and 0 ≤ u ≤ 1, the two line segments meet at the point p + t r = q + u s
26 |             print('intersection: ', p + t*r)
27 |         else:
28 |             print('not parallel and not intersect')
29 | 
30 | 
31 | def point2segment():
32 |     p = np.array([-1, 1])    # coordination of point
33 |     a = np.array([0, 1])    # coordination of line segment end 1
34 |     b = np.array([1, 0])    # coordination of line segment end 2
35 |     ab = b-a    # line ab
36 |     ap = p-a
37 |     distance = np.abs(np.cross(ab, ap)/np.linalg.norm(ab))  # d = (AB x AC)/|AB|
38 |     print(distance)
39 | 
40 |     # angle  Cos(θ) = A dot B /(|A||B|)
41 |     bp = p-b
42 |     cosTheta1 = np.dot(ap, ab) / (np.linalg.norm(ap) * np.linalg.norm(ab))
43 |     theta1 = np.arccos(cosTheta1)
44 |     cosTheta2 = np.dot(bp, ab) / (np.linalg.norm(bp) * np.linalg.norm(ab))
45 |     theta2 = np.arccos(cosTheta2)
46 |     if np.pi/2 <= (theta1 % (np.pi*2)) <= 3/2 * np.pi:
47 |         print('out of a')
48 |     elif -np.pi/2 <= (theta2 % (np.pi*2)) <= np.pi/2:
49 |         print('out of b')
50 |     else:
51 |         print('between a and b')
52 | 
53 | 
54 | 
55 | if __name__ == '__main__':
56 |     point2segment()
57 |     # intersection()
58 | 


--------------------------------------------------------------------------------
/rl/Reinforcement-learning-with-tensorflow/experiments/Solve_BipedalWalker/log/events.out.tfevents.1490801027.Morvan:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wqw547243068/wangqiwen/07b64ae47d91581e1c339f40bc765fd7815b47ff/rl/Reinforcement-learning-with-tensorflow/experiments/Solve_BipedalWalker/log/events.out.tfevents.1490801027.Morvan


--------------------------------------------------------------------------------
/rl/Reinforcement-learning-with-tensorflow/experiments/Solve_LunarLander/run_LunarLander.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Deep Q network,
 3 | 
 4 | LunarLander-v2 example
 5 | 
 6 | Using:
 7 | Tensorflow: 1.0
 8 | gym: 0.8.0
 9 | """
10 | 
11 | 
12 | import gym
13 | from gym import wrappers
14 | from DuelingDQNPrioritizedReplay import DuelingDQNPrioritizedReplay
15 | 
16 | env = gym.make('LunarLander-v2')
17 | # env = env.unwrapped
18 | env.seed(1)
19 | 
20 | N_A = env.action_space.n
21 | N_S = env.observation_space.shape[0]
22 | MEMORY_CAPACITY = 50000
23 | TARGET_REP_ITER = 2000
24 | MAX_EPISODES = 900
25 | E_GREEDY = 0.95
26 | E_INCREMENT = 0.00001
27 | GAMMA = 0.99
28 | LR = 0.0001
29 | BATCH_SIZE = 32
30 | HIDDEN = [400, 400]
31 | RENDER = True
32 | 
33 | RL = DuelingDQNPrioritizedReplay(
34 |     n_actions=N_A, n_features=N_S, learning_rate=LR, e_greedy=E_GREEDY, reward_decay=GAMMA,
35 |     hidden=HIDDEN, batch_size=BATCH_SIZE, replace_target_iter=TARGET_REP_ITER,
36 |     memory_size=MEMORY_CAPACITY, e_greedy_increment=E_INCREMENT,)
37 | 
38 | 
39 | total_steps = 0
40 | running_r = 0
41 | r_scale = 100
42 | for i_episode in range(MAX_EPISODES):
43 |     s = env.reset()  # (coord_x, coord_y, vel_x, vel_y, angle, angular_vel, l_leg_on_ground, r_leg_on_ground)
44 |     ep_r = 0
45 |     while True:
46 |         if total_steps > MEMORY_CAPACITY: env.render()
47 |         a = RL.choose_action(s)
48 |         s_, r, done, _ = env.step(a)
49 |         if r == -100: r = -30
50 |         r /= r_scale
51 | 
52 |         ep_r += r
53 |         RL.store_transition(s, a, r, s_)
54 |         if total_steps > MEMORY_CAPACITY:
55 |             RL.learn()
56 |         if done:
57 |             land = '| Landed' if r == 100/r_scale else '| ------'
58 |             running_r = 0.99 * running_r + 0.01 * ep_r
59 |             print('Epi: ', i_episode,
60 |                   land,
61 |                   '| Epi_R: ', round(ep_r, 2),
62 |                   '| Running_R: ', round(running_r, 2),
63 |                   '| Epsilon: ', round(RL.epsilon, 3))
64 |             break
65 | 
66 |         s = s_
67 |         total_steps += 1
68 | 
69 | 


--------------------------------------------------------------------------------
/rl/openai/gym_hello.py:
--------------------------------------------------------------------------------
1 | import gym
2 | env = gym.make('CartPole-v0')
3 | env.reset() #重置环境状态
4 | for _ in range(1000):
5 |     env.render() #重绘每一帧
6 |     env.step(env.action_space.sample()) # take a random action


--------------------------------------------------------------------------------
/rl/openai/gym_test.py:
--------------------------------------------------------------------------------
1 | import gym
2 | env = gym.make("Taxi-v1")
3 | observation = env.reset()
4 | for _ in range(1000):
5 |   env.render()
6 |   action = env.action_space.sample() # your agent here (this takes random actions)
7 |   observation, reward, done, info = env.step(action)


--------------------------------------------------------------------------------
/rl/readme.md:
--------------------------------------------------------------------------------
1 | ## 强化学习
2 | 
3 | - 安装deepmind工具包：
4 | ```shell
5 | pip install git+git://github.com/deepmind/trfl.git
6 | ```
7 | 
8 | 
9 | 


--------------------------------------------------------------------------------
/rl/ucl/test.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # coding:utf8
 3 | 
 4 | # pip install tensorflow # 1.8以上
 5 | # pip install git+git://github.com/deepmind/trfl.git
 6 | import tensorflow as tf
 7 | import trfl
 8 | 
 9 | # Q-values for the previous and next timesteps, shape [batch_size, num_actions].
10 | q_tm1 = tf.get_variable(
11 |     "q_tm1", initializer=[[1., 1., 0.], [1., 2., 0.]], dtype=tf.float32)
12 | q_t = tf.get_variable(
13 |     "q_t", initializer=[[0., 1., 0.], [1., 2., 0.]], dtype=tf.float32)
14 | 
15 | # Action indices, discounts and rewards, shape [batch_size].
16 | a_tm1 = tf.constant([0, 1], dtype=tf.int32)
17 | r_t = tf.constant([1, 1], dtype=tf.float32)
18 | pcont_t = tf.constant([0, 1], dtype=tf.float32)  # the discount factor
19 | 
20 | # Q-learning loss, and auxiliary data.
21 | loss, q_learning = trfl.qlearning(q_tm1, a_tm1, r_t, pcont_t, q_t)
22 | 
23 | reduced_loss = tf.reduce_mean(loss)
24 | optimizer = tf.train.AdamOptimizer(learning_rate=0.1)
25 | train_op = optimizer.minimize(reduced_loss)
26 | 
27 | 


--------------------------------------------------------------------------------
/web/Untitled Diagram.drawio:
--------------------------------------------------------------------------------
1 | <mxfile host="app.diagrams.net" modified="2020-07-03T16:14:55.716Z" agent="5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.116 Safari/537.36" version="13.3.7" etag="EjEP2njd7rAsiYoRAfXL" type="github"><diagram id="3228e29e-7158-1315-38df-8450db1d8a1d" name="Page-1">1ddNc6IwGAfwT8N1B5KAcFTb7vbQS7sze85ChExDHifGt/30+yBB0eBMZ2odykXyzwvhF4wS0Hm9+2n4snqBQqiAhMUuoA8BISSNCX40yb5NIpqFbVIaWbjsFLzJf8KFXbO1LMTqrKEFUFYuz8MctBa5Pcu4MbA9b7YAdX7VJS+FF7zlXPnpH1nYyqVRkp0qfglZVu7SKZm0FX95/l4aWGt3vYDQxeFoq2vejeVudFXxAra9iD4GdG4AbHtW7+ZCNbgdW9vv6Urtcd5GaPuRDiSKqeu04Wotulkf5mb3nQd2QnoszBZSqTkoMIcK+hSmkzDGHJY8l7ZZ7yTE4soaeBddQw266VvZWmEpaoYBbfvDHA7M3VSEsWJ39ZaiIxQ+gQJqYc0em7gOlGY/4raTe/xomHbJ9rSacejWoOot5DHk7gkqj+OfFPHEQV5HJZ9Ajch0OhsXajS5RI3ub0o/YSrSOGFkVKYsHYEp80yf9QZvQIL2cPFWrSNxWzZhWOZKlhoLOXYT6DRrVCRuplNXUcuiUPdzJdmlK27QA7BRNgA7uZFr7Lm+ig2o9XeGTcYAm3iwvw0v5Hd2ZZ5rHN/ddeK5vkglLXcjDbB6jB/Q6i1FlNxGL068bTQb0mMDeOxGeKmHNwe9UBL/mo4bj7FLPDb4I/SVeJmPV3FYjVzO3wzvLtcN05NDHa5zMXI7/ytL2RfaYfH0RnWo67230sf/</diagram></mxfile>


--------------------------------------------------------------------------------
/web/pydown/pydown.html:
--------------------------------------------------------------------------------
 1 | <!DOCTYPE html>
 2 | <!-- saved from url=(0033)https://isnowfy.github.io/pydown/ -->
 3 | <html class="js flexbox canvas canvastext webgl no-touch geolocation postmessage websqldatabase indexeddb hashchange history draganddrop websockets rgba hsla multiplebgs backgroundsize borderimage borderradius boxshadow textshadow opacity cssanimations csscolumns cssgradients cssreflections csstransforms csstransforms3d csstransitions fontface generatedcontent video audio localstorage sessionstorage webworkers applicationcache svg inlinesvg smil svgclippaths ready gr__isnowfy_github_io" lang="en"><!--<![endif]--><head><meta http-equiv="Content-Type" content="text/html; charset=UTF-8">
 4 | 	
 5 | 	<meta http-equiv="X-UA-Compatible" content="IE=edge,chrome=1">
 6 | 	
 7 | 	<title>pydown</title>
 8 | 	
 9 | 	<meta name="description" content="A jQuery library for modern HTML presentations">
10 | 	<meta name="author" content="Caleb Troughton">
11 | 	<meta name="viewport" content="width=1024, user-scalable=no">
12 | 	
13 | 	<!-- Core and extension CSS files -->
14 | 	<link rel="stylesheet" href="./pydown_files/deck.core.css">
15 | 	<link rel="stylesheet" href="./pydown_files/deck.status.css">
16 | 	<link rel="stylesheet" href="./pydown_files/deck.scale.css">
17 | 	
18 | 	<!-- Style theme. More available in /themes/style/ or create your own. -->
19 | 	<link rel="stylesheet" href="./pydown_files/web-2.0.css">
20 | 	
21 | 	<!-- Transition theme. More available in /themes/transition/ or create your own. -->
22 | 	<link rel="stylesheet" href="./pydown_files/horizontal-slide.css">
23 | 
24 | 	<link rel="stylesheet" href="./pydown_files/md_hl.css">
25 | 	
26 | 	<script src="./pydown_files/modernizr.custom.js.下载"></script>
27 | </head>
28 | 
29 | <body class="deck-container deck-scale on-slide-7" data-gr-c-s-loaded="true">
30 | 
31 | <!-- Begin slides -->
32 | <section class="slide deck-before"><div class="deck-slide-scaler"><div class="content"><h1>pydown</h1></div></div></section>
33 | <section class="slide deck-before"><div class="deck-slide-scaler"><div class="content"><h2>Markdown + deck.js</h2>
34 | <h2>Simple html presentation maker</h2>
35 | <h2>You just need to write markdown file</h2></div></div></section>
36 | <section class="slide deck-before"><div class="deck-slide-scaler"><div class="content"><h2>Support code highlighting</h2>
37 | <div class="codehilite"><pre><span class="k">def</span> <span class="nf">hello</span><span class="p">():</span>
38 |     <span class="k">print</span> <span class="s">'hello world'</span>
39 | </pre></div></div></div></section>
40 | <section class="slide left deck-before"><div class="deck-slide-scaler"><div class="content"><h2>You can customize with css</h2>
41 | <h3>like dropping the centering</h3></div></div></section>
42 | <section class="slide left deck-before"><div class="deck-slide-scaler"><div class="content"><h2>Easy to use</h2>
43 | <ol>
44 | <li>write your slides markdown file</li>
45 | <li>python main.py md directory</li>
46 | </ol></div></div></section>
47 | <section class="slide deck-before"><div class="deck-slide-scaler"><div class="content"><h2>The previous slide just looks like this</h2>
48 | <div class="codehilite"><pre> !<span class="n">SLIDE</span> <span class="n">left</span>
49 | 
50 |  ##<span class="n">Easy</span> <span class="n">to</span> <span class="n">use</span>
51 | 
52 |  1<span class="p">.</span> <span class="n">write</span> <span class="n">your</span> <span class="n">slides</span> <span class="n">markdown</span> <span class="n">file</span>
53 |  2<span class="p">.</span> <span class="n">python</span> <span class="n">main</span><span class="p">.</span><span class="n">py</span> <span class="n">md</span> <span class="n">directory</span>
54 | </pre></div></div></div></section>
55 | <section class="slide deck-previous"><div class="deck-slide-scaler"><div class="content"><h2>Just simple</h2>
56 | <h2>and enjoy yourself</h2></div></div></section>
57 | <section class="slide deck-current"><div class="deck-slide-scaler"><div class="content"><h1>Thanks</h1>
58 | <h2><a href="http://www.isnowfy.com/">isnowfy</a>|<a href="https://github.com/isnowfy">isnowfy</a> on Github</h2>
59 | <h3>Made by <a href="https://github.com/isnowfy/pydown">pydown</a></h3>
60 | <h3>Inspired by <a href="https://github.com/infews/keydown">keydown</a></h3></div></div></section>
61 | 
62 | 
63 | <!-- deck.status snippet -->
64 | <p class="deck-status">
65 | 	<span class="deck-status-current">8</span>
66 | 	/
67 | 	<span class="deck-status-total">8</span>
68 | </p>
69 | 
70 | <!-- Grab CDN jQuery, with a protocol relative URL; fall back to local if offline -->
71 | <script src="./pydown_files/jquery.min.js.下载"></script>
72 | <script>window.jQuery || document.write('<script src="js/jquery-1.7.2.min.js"><\/script>')</script>
73 | 
74 | <!-- Deck Core and extensions -->
75 | <script src="./pydown_files/deck.core.js.下载"></script>
76 | <script src="./pydown_files/deck.status.js.下载"></script>
77 | <script src="./pydown_files/deck.scale.js.下载"></script>
78 | 
79 | <!-- Initialize the deck -->
80 | <script>
81 | $(function() {
82 | 	$.deck('.slide');
83 | });
84 | </script>
85 | 
86 | 
87 | 
88 | </body></html>


--------------------------------------------------------------------------------
/web/pydown/pydown_files/deck.scale.css:
--------------------------------------------------------------------------------
 1 | /* Remove this line if you are embedding deck.js in a page and
 2 | using the scale extension. */
 3 | .csstransforms {
 4 |   overflow: hidden;
 5 | }
 6 | 
 7 | .csstransforms .deck-container.deck-scale:not(.deck-menu) > .slide {
 8 |   -webkit-box-sizing: padding-box;
 9 |   -moz-box-sizing: padding-box;
10 |   box-sizing: padding-box;
11 |   width: 100%;
12 |   padding-bottom: 20px;
13 | }
14 | .csstransforms .deck-container.deck-scale:not(.deck-menu) > .slide > .deck-slide-scaler {
15 |   -webkit-transform-origin: 50% 0;
16 |   -moz-transform-origin: 50% 0;
17 |   -o-transform-origin: 50% 0;
18 |   -ms-transform-origin: 50% 0;
19 |   transform-origin: 50% 0;
20 | }
21 | 
22 | .csstransforms .deck-container.deck-menu .deck-slide-scaler {
23 |   -webkit-transform: none !important;
24 |   -moz-transform: none !important;
25 |   -o-transform: none !important;
26 |   -ms-transform: none !important;
27 |   transform: none !important;
28 | }
29 | 


--------------------------------------------------------------------------------
/web/pydown/pydown_files/deck.scale.js.下载:
--------------------------------------------------------------------------------
  1 | /*!
  2 | Deck JS - deck.scale
  3 | Copyright (c) 2011-2012 Caleb Troughton
  4 | Dual licensed under the MIT license and GPL license.
  5 | https://github.com/imakewebthings/deck.js/blob/master/MIT-license.txt
  6 | https://github.com/imakewebthings/deck.js/blob/master/GPL-license.txt
  7 | */
  8 | 
  9 | /*
 10 | This module adds automatic scaling to the deck.  Slides are scaled down
 11 | using CSS transforms to fit within the deck container. If the container is
 12 | big enough to hold the slides without scaling, no scaling occurs. The user
 13 | can disable and enable scaling with a keyboard shortcut.
 14 | 
 15 | Note: CSS transforms may make Flash videos render incorrectly.  Presenters
 16 | that need to use video may want to disable scaling to play them.  HTML5 video
 17 | works fine.
 18 | */
 19 | (function($, deck, window, undefined) {
 20 | 	var $d = $(document),
 21 | 	$w = $(window),
 22 | 	baseHeight, // Value to scale against
 23 | 	timer, // Timeout id for debouncing
 24 | 	rootSlides,
 25 | 
 26 | 	/*
 27 | 	Internal function to do all the dirty work of scaling the slides.
 28 | 	*/
 29 | 	scaleDeck = function() {
 30 | 		var opts = $[deck]('getOptions'),
 31 | 		obh = opts.baseHeight,
 32 | 		$container = $[deck]('getContainer'),
 33 | 		baseHeight = obh ? obh : $container.height();
 34 | 
 35 | 		// Scale each slide down if necessary (but don't scale up)
 36 | 		$.each(rootSlides, function(i, $slide) {
 37 | 			var slideHeight = $slide.innerHeight(),
 38 | 			$scaler = $slide.find('.' + opts.classes.scaleSlideWrapper),
 39 | 			scale = $container.hasClass(opts.classes.scale) ?
 40 | 				baseHeight / slideHeight :
 41 | 				1;
 42 | 			
 43 | 			$.each('Webkit Moz O ms Khtml'.split(' '), function(i, prefix) {
 44 | 				if (scale === 1) {
 45 | 					$scaler.css(prefix + 'Transform', '');
 46 | 				}
 47 | 				else {
 48 | 					$scaler.css(prefix + 'Transform', 'scale(' + scale + ')');
 49 | 				}
 50 | 			});
 51 | 		});
 52 | 	}
 53 | 
 54 | 	/*
 55 | 	Extends defaults/options.
 56 | 
 57 | 	options.classes.scale
 58 | 		This class is added to the deck container when scaling is enabled.
 59 | 		It is enabled by default when the module is included.
 60 | 	
 61 | 	options.classes.scaleSlideWrapper
 62 | 		Scaling is done using a wrapper around the contents of each slide. This
 63 | 		class is applied to that wrapper.
 64 | 
 65 | 	options.keys.scale
 66 | 		The numeric keycode used to toggle enabling and disabling scaling.
 67 | 
 68 | 	options.baseHeight
 69 | 		When baseHeight is falsy, as it is by default, the deck is scaled in
 70 | 		proportion to the height of the deck container. You may instead specify
 71 | 		a height as a number of px, and slides will be scaled against this
 72 | 		height regardless of the container size.
 73 | 
 74 | 	options.scaleDebounce
 75 | 		Scaling on the browser resize event is debounced. This number is the
 76 | 		threshold in milliseconds. You can learn more about debouncing here:
 77 | 		http://unscriptable.com/index.php/2009/03/20/debouncing-javascript-methods/
 78 | 
 79 | 	*/
 80 | 	$.extend(true, $[deck].defaults, {
 81 | 		classes: {
 82 | 			scale: 'deck-scale',
 83 | 			scaleSlideWrapper: 'deck-slide-scaler'
 84 | 		},
 85 | 
 86 | 		keys: {
 87 | 			scale: 83 // s
 88 | 		},
 89 | 
 90 | 		baseHeight: null,
 91 | 		scaleDebounce: 200
 92 | 	});
 93 | 
 94 | 	/*
 95 | 	jQuery.deck('disableScale')
 96 | 
 97 | 	Disables scaling and removes the scale class from the deck container.
 98 | 	*/
 99 | 	$[deck]('extend', 'disableScale', function() {
100 | 		$[deck]('getContainer').removeClass($[deck]('getOptions').classes.scale);
101 | 		scaleDeck();
102 | 	});
103 | 
104 | 	/*
105 | 	jQuery.deck('enableScale')
106 | 
107 | 	Enables scaling and adds the scale class to the deck container.
108 | 	*/
109 | 	$[deck]('extend', 'enableScale', function() {
110 | 		$[deck]('getContainer').addClass($[deck]('getOptions').classes.scale);
111 | 		scaleDeck();
112 | 	});
113 | 
114 | 	/*
115 | 	jQuery.deck('toggleScale')
116 | 
117 | 	Toggles between enabling and disabling scaling.
118 | 	*/
119 | 	$[deck]('extend', 'toggleScale', function() {
120 | 		var $c = $[deck]('getContainer');
121 | 		$[deck]($c.hasClass($[deck]('getOptions').classes.scale) ?
122 | 			'disableScale' : 'enableScale');
123 | 	});
124 | 
125 | 	$d.bind('deck.init', function() {
126 | 		var opts = $[deck]('getOptions'),
127 | 		slideTest = $.map([
128 | 			opts.classes.before,
129 | 			opts.classes.previous,
130 | 			opts.classes.current,
131 | 			opts.classes.next,
132 | 			opts.classes.after
133 | 		], function(el, i) {
134 | 			return '.' + el;
135 | 		}).join(', ');
136 | 		
137 | 		// Build top level slides array
138 | 		rootSlides = [];
139 | 		$.each($[deck]('getSlides'), function(i, $el) {
140 | 			if (!$el.parentsUntil(opts.selectors.container, slideTest).length) {
141 | 				rootSlides.push($el);
142 | 			}
143 | 		});
144 | 		
145 | 		// Use a wrapper on each slide to handle content scaling
146 | 		$.each(rootSlides, function(i, $slide) {
147 | 			$slide.children().wrapAll('<div class="' + opts.classes.scaleSlideWrapper + '"/>');
148 | 		});
149 | 
150 | 		// Debounce the resize scaling
151 | 		$w.unbind('resize.deckscale').bind('resize.deckscale', function() {
152 | 			window.clearTimeout(timer);
153 | 			timer = window.setTimeout(scaleDeck, opts.scaleDebounce);
154 | 		})
155 | 		// Scale once on load, in case images or something change layout
156 | 		.unbind('load.deckscale').bind('load.deckscale', scaleDeck);
157 | 
158 | 		// Bind key events
159 | 		$d.unbind('keydown.deckscale').bind('keydown.deckscale', function(e) {
160 | 			if (e.which === opts.keys.scale || $.inArray(e.which, opts.keys.scale) > -1) {
161 | 				$[deck]('toggleScale');
162 | 				e.preventDefault();
163 | 			}
164 | 		});
165 | 
166 | 		// Enable scale on init
167 | 		$[deck]('enableScale');
168 | 	});
169 | })(jQuery, 'deck', this);
170 | 
171 | 


--------------------------------------------------------------------------------
/web/pydown/pydown_files/deck.status.css:
--------------------------------------------------------------------------------
 1 | .deck-container .deck-status {
 2 |   position: absolute;
 3 |   bottom: 10px;
 4 |   right: 5px;
 5 |   color: #888;
 6 |   z-index: 3;
 7 |   margin: 0;
 8 | }
 9 | 
10 | body.deck-container .deck-status {
11 |   position: fixed;
12 | }
13 | 
14 | @media print {
15 |   .deck-status {
16 |     display: none;
17 |   }
18 | }
19 | 


--------------------------------------------------------------------------------
/web/pydown/pydown_files/deck.status.js.下载:
--------------------------------------------------------------------------------
 1 | /*!
 2 | Deck JS - deck.status
 3 | Copyright (c) 2011 Caleb Troughton
 4 | Dual licensed under the MIT license and GPL license.
 5 | https://github.com/imakewebthings/deck.js/blob/master/MIT-license.txt
 6 | https://github.com/imakewebthings/deck.js/blob/master/GPL-license.txt
 7 | */
 8 | 
 9 | /*
10 | This module adds a (current)/(total) style status indicator to the deck.
11 | */
12 | (function($, deck, undefined) {
13 | 	var $d = $(document),
14 | 	
15 | 	updateCurrent = function(e, from, to) {
16 | 		var opts = $[deck]('getOptions');
17 | 		
18 | 		$(opts.selectors.statusCurrent).text(opts.countNested ?
19 | 			to + 1 :
20 | 			$[deck]('getSlide', to).data('rootSlide')
21 | 		);
22 | 	};
23 | 	
24 | 	/*
25 | 	Extends defaults/options.
26 | 	
27 | 	options.selectors.statusCurrent
28 | 		The element matching this selector displays the current slide number.
29 | 		
30 | 	options.selectors.statusTotal
31 | 		The element matching this selector displays the total number of slides.
32 | 		
33 | 	options.countNested
34 | 		If false, only top level slides will be counted in the current and
35 | 		total numbers.
36 | 	*/
37 | 	$.extend(true, $[deck].defaults, {
38 | 		selectors: {
39 | 			statusCurrent: '.deck-status-current',
40 | 			statusTotal: '.deck-status-total'
41 | 		},
42 | 		
43 | 		countNested: true
44 | 	});
45 | 	
46 | 	$d.bind('deck.init', function() {
47 | 		var opts = $[deck]('getOptions'),
48 | 		slides = $[deck]('getSlides'),
49 | 		$current = $[deck]('getSlide'),
50 | 		ndx;
51 | 		
52 | 		// Set total slides once
53 | 		if (opts.countNested) {
54 | 			$(opts.selectors.statusTotal).text(slides.length);
55 | 		}
56 | 		else {
57 | 			/* Determine root slides by checking each slide's ancestor tree for
58 | 			any of the slide classes. */
59 | 			var rootIndex = 1,
60 | 			slideTest = $.map([
61 | 				opts.classes.before,
62 | 				opts.classes.previous,
63 | 				opts.classes.current,
64 | 				opts.classes.next,
65 | 				opts.classes.after
66 | 			], function(el, i) {
67 | 				return '.' + el;
68 | 			}).join(', ');
69 | 			
70 | 			/* Store the 'real' root slide number for use during slide changes. */
71 | 			$.each(slides, function(i, $el) {
72 | 				var $parentSlides = $el.parentsUntil(opts.selectors.container, slideTest);
73 | 
74 | 				$el.data('rootSlide', $parentSlides.length ?
75 | 					$parentSlides.last().data('rootSlide') :
76 | 					rootIndex++
77 | 				);
78 | 			});
79 | 			
80 | 			$(opts.selectors.statusTotal).text(rootIndex - 1);
81 | 		}
82 | 		
83 | 		// Find where we started in the deck and set initial state
84 | 		$.each(slides, function(i, $el) {
85 | 			if ($el === $current) {
86 | 				ndx = i;
87 | 				return false;
88 | 			}
89 | 		});
90 | 		updateCurrent(null, ndx, ndx);
91 | 	})
92 | 	/* Update current slide number with each change event */
93 | 	.bind('deck.change', updateCurrent);
94 | })(jQuery, 'deck');
95 | 
96 | 


--------------------------------------------------------------------------------
/web/pydown/pydown_files/horizontal-slide.css:
--------------------------------------------------------------------------------
 1 | .csstransitions.csstransforms {
 2 |   overflow-x: hidden;
 3 | }
 4 | .csstransitions.csstransforms .deck-container > .slide {
 5 |   -webkit-transition: -webkit-transform 500ms ease-in-out;
 6 |   -moz-transition: -moz-transform 500ms ease-in-out;
 7 |   -ms-transition: -ms-transform 500ms ease-in-out;
 8 |   -o-transition: -o-transform 500ms ease-in-out;
 9 |   transition: transform 500ms ease-in-out;
10 | }
11 | .csstransitions.csstransforms .deck-container:not(.deck-menu) > .slide {
12 |   position: absolute;
13 |   top: 0;
14 |   left: 0;
15 |   -webkit-box-sizing: border-box;
16 |   -moz-box-sizing: border-box;
17 |   box-sizing: border-box;
18 |   width: 100%;
19 |   padding: 0 48px;
20 | }
21 | .csstransitions.csstransforms .deck-container:not(.deck-menu) > .slide .slide {
22 |   position: relative;
23 |   left: 0;
24 |   top: 0;
25 |   -webkit-transition: -webkit-transform 500ms ease-in-out, opacity 500ms ease-in-out;
26 |   -moz-transition: -moz-transform 500ms ease-in-out, opacity 500ms ease-in-out;
27 |   -ms-transition: -ms-transform 500ms ease-in-out, opacity 500ms ease-in-out;
28 |   -o-transition: -o-transform 500ms ease-in-out, opacity 500ms ease-in-out;
29 |   transition: -webkit-transform 500ms ease-in-out, opacity 500ms ease-in-out;
30 | }
31 | .csstransitions.csstransforms .deck-container:not(.deck-menu) > .slide .deck-next, .csstransitions.csstransforms .deck-container:not(.deck-menu) > .slide .deck-after {
32 |   visibility: visible;
33 |   -webkit-transform: translate3d(200%, 0, 0);
34 |   -moz-transform: translate(200%, 0);
35 |   -ms-transform: translate(200%, 0);
36 |   -o-transform: translate(200%, 0);
37 |   transform: translate3d(200%, 0, 0);
38 | }
39 | .csstransitions.csstransforms .deck-container:not(.deck-menu) > .deck-previous {
40 |   -webkit-transform: translate3d(-200%, 0, 0);
41 |   -moz-transform: translate(-200%, 0);
42 |   -ms-transform: translate(-200%, 0);
43 |   -o-transform: translate(-200%, 0);
44 |   transform: translate3d(-200%, 0, 0);
45 | }
46 | .csstransitions.csstransforms .deck-container:not(.deck-menu) > .deck-before {
47 |   -webkit-transform: translate3d(-400%, 0, 0);
48 |   -moz-transform: translate(-400%, 0);
49 |   -ms-transform: translate(-400%, 0);
50 |   -o-transform: translate(-400%, 0);
51 |   transform: translate3d(-400%, 0, 0);
52 | }
53 | .csstransitions.csstransforms .deck-container:not(.deck-menu) > .deck-next {
54 |   -webkit-transform: translate3d(200%, 0, 0);
55 |   -moz-transform: translate(200%, 0);
56 |   -ms-transform: translate(200%, 0);
57 |   -o-transform: translate(200%, 0);
58 |   transform: translate3d(200%, 0, 0);
59 | }
60 | .csstransitions.csstransforms .deck-container:not(.deck-menu) > .deck-after {
61 |   -webkit-transform: translate3d(400%, 0, 0);
62 |   -moz-transform: translate(400%, 0);
63 |   -ms-transform: translate(400%, 0);
64 |   -o-transform: translate(400%, 0);
65 |   transform: translate3d(400%, 0, 0);
66 | }
67 | .csstransitions.csstransforms .deck-container:not(.deck-menu) > .deck-before .slide, .csstransitions.csstransforms .deck-container:not(.deck-menu) > .deck-previous .slide {
68 |   visibility: visible;
69 | }
70 | .csstransitions.csstransforms .deck-container:not(.deck-menu) > .deck-child-current {
71 |   -webkit-transform: none;
72 |   -moz-transform: none;
73 |   -ms-transform: none;
74 |   -o-transform: none;
75 |   transform: none;
76 | }
77 | 


--------------------------------------------------------------------------------
/web/pydown/pydown_files/md_hl.css:
--------------------------------------------------------------------------------
 1 | /*pygments style from Armin Ronacher*/
 2 | pre .hll { background-color: #ffffcc }
 3 | pre .c { color: #8f5902; font-style: italic } /* Comment */
 4 | pre .err { color: #a40000; border: 1px solid #ef2929 } /* Error */
 5 | pre .g { color: #000000 } /* Generic */
 6 | pre .k { color: #204a87; font-weight: bold } /* Keyword */
 7 | pre .l { color: #000000 } /* Literal */
 8 | pre .n { color: #000000 } /* Name */
 9 | pre .o { color: #ce5c00; font-weight: bold } /* Operator */
10 | pre .x { color: #000000 } /* Other */
11 | pre .p { color: #000000; font-weight: bold } /* Punctuation */
12 | pre .cm { color: #8f5902; font-style: italic } /* Comment.Multiline */
13 | pre .cp { color: #8f5902; font-style: italic } /* Comment.Preproc */
14 | pre .c1 { color: #8f5902; font-style: italic } /* Comment.Single */
15 | pre .cs { color: #8f5902; font-style: italic } /* Comment.Special */
16 | pre .gd { color: #a40000 } /* Generic.Deleted */
17 | pre .ge { color: #000000; font-style: italic } /* Generic.Emph */
18 | pre .gr { color: #ef2929 } /* Generic.Error */
19 | pre .gh { color: #000080; font-weight: bold } /* Generic.Heading */
20 | pre .gi { color: #00A000 } /* Generic.Inserted */
21 | pre .go { color: #000000; font-style: italic } /* Generic.Output */
22 | pre .gp { color: #8f5902 } /* Generic.Prompt */
23 | pre .gs { color: #000000; font-weight: bold } /* Generic.Strong */
24 | pre .gu { color: #800080; font-weight: bold } /* Generic.Subheading */
25 | pre .gt { color: #a40000; font-weight: bold } /* Generic.Traceback */
26 | pre .kc { color: #204a87; font-weight: bold } /* Keyword.Constant */
27 | pre .kd { color: #204a87; font-weight: bold } /* Keyword.Declaration */
28 | pre .kn { color: #204a87; font-weight: bold } /* Keyword.Namespace */
29 | pre .kp { color: #204a87; font-weight: bold } /* Keyword.Pseudo */
30 | pre .kr { color: #204a87; font-weight: bold } /* Keyword.Reserved */
31 | pre .kt { color: #204a87; font-weight: bold } /* Keyword.Type */
32 | pre .ld { color: #000000 } /* Literal.Date */
33 | pre .m { color: #0000cf; font-weight: bold } /* Literal.Number */
34 | pre .s { color: #4e9a06 } /* Literal.String */
35 | pre .na { color: #c4a000 } /* Name.Attribute */
36 | pre .nb { color: #204a87 } /* Name.Builtin */
37 | pre .nc { color: #000000 } /* Name.Class */
38 | pre .no { color: #000000 } /* Name.Constant */
39 | pre .nd { color: #5c35cc; font-weight: bold } /* Name.Decorator */
40 | pre .ni { color: #ce5c00 } /* Name.Entity */
41 | pre .ne { color: #cc0000; font-weight: bold } /* Name.Exception */
42 | pre .nf { color: #000000 } /* Name.Function */
43 | pre .nl { color: #f57900 } /* Name.Label */
44 | pre .nn { color: #000000 } /* Name.Namespace */
45 | pre .nx { color: #000000 } /* Name.Other */
46 | pre .py { color: #000000 } /* Name.Property */
47 | pre .nt { color: #204a87; font-weight: bold } /* Name.Tag */
48 | pre .nv { color: #000000 } /* Name.Variable */
49 | pre .ow { color: #204a87; font-weight: bold } /* Operator.Word */
50 | pre .w { color: #f8f8f8; text-decoration: underline } /* Text.Whitespace */
51 | pre .mf { color: #0000cf; font-weight: bold } /* Literal.Number.Float */
52 | pre .mh { color: #0000cf; font-weight: bold } /* Literal.Number.Hex */
53 | pre .mi { color: #0000cf; font-weight: bold } /* Literal.Number.Integer */
54 | pre .mo { color: #0000cf; font-weight: bold } /* Literal.Number.Oct */
55 | pre .sb { color: #4e9a06 } /* Literal.String.Backtick */
56 | pre .sc { color: #4e9a06 } /* Literal.String.Char */
57 | pre .sd { color: #8f5902; font-style: italic } /* Literal.String.Doc */
58 | pre .s2 { color: #4e9a06 } /* Literal.String.Double */
59 | pre .se { color: #4e9a06 } /* Literal.String.Escape */
60 | pre .sh { color: #4e9a06 } /* Literal.String.Heredoc */
61 | pre .si { color: #4e9a06 } /* Literal.String.Interpol */
62 | pre .sx { color: #4e9a06 } /* Literal.String.Other */
63 | pre .sr { color: #4e9a06 } /* Literal.String.Regex */
64 | pre .s1 { color: #4e9a06 } /* Literal.String.Single */
65 | pre .ss { color: #4e9a06 } /* Literal.String.Symbol */
66 | pre .bp { color: #3465a4 } /* Name.Builtin.Pseudo */
67 | pre .vc { color: #000000 } /* Name.Variable.Class */
68 | pre .vg { color: #000000 } /* Name.Variable.Global */
69 | pre .vi { color: #000000 } /* Name.Variable.Instance */
70 | pre .il { color: #0000cf; font-weight: bold } /* Literal.Number.Integer.Long */
71 | 


--------------------------------------------------------------------------------
/web/timeline/643.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wqw547243068/wangqiwen/07b64ae47d91581e1c339f40bc765fd7815b47ff/web/timeline/643.jpg


--------------------------------------------------------------------------------
/web/timeline/css/about.css:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wqw547243068/wangqiwen/07b64ae47d91581e1c339f40bc765fd7815b47ff/web/timeline/css/about.css


--------------------------------------------------------------------------------
/web/timeline/timeline.html:
--------------------------------------------------------------------------------
 1 | <!DOCTYPE html>
 2 | <html>
 3 | <head>
 4 |     <meta http-equiv="Content-Type" content="text/html; charset=utf-8">
 5 |     <link rel="stylesheet" href="css/about.css">
 6 |     <style>
 7 |         .page {
 8 |             width: 100%;
 9 |             background: #F0F0F0 url('643.jpg') repeat-x;
10 |         }
11 |     </style>
12 |     <title>静态可以折叠时光轴</title>
13 | </head>
14 | <body>
15 |     <div class="page">
16 |         <div class="box">
17 |             <ul class="event_year">
18 |                 <li class="current"><label for="20170111">20170111</label></li>
19 |                 <li><label for="20170112">20170112</label></li>
20 |                 <li><label for="20170113">20170113</label></li>
21 |             </ul>
22 |             <ul class="event_list">
23 |                 <div>
24 |                     <h3 id="20170111">20170111</h3>
25 |                     <li>
26 |                         <span>20170111 22:32:45</span>
27 |                         <p><span>出入口系统<a href="f?p='||V('APP_ID')||':122:'||V('SESSION')||':::::"></a></span></p>
28 |                     </li>
29 |                     <li>
30 |                         <span>20170111 21:00:31</span>
31 |                         <p><span>停车场系统<a href="f?p='||V('APP_ID')||':122:'||V('SESSION')||':::::"></a></span></p>
32 |                     </li>
33 |                     <li>
34 |                         <span>20170111 17:30:45</span>
35 |                         <p><span>楼宇门禁系统<a href="f?p='||V('APP_ID')||':122:'||V('SESSION')||':::::"></a></span></p>
36 |                     </li>
37 |  
38 |                 </div>
39 |                 <div>
40 |                     <h3 id="20170112">20170112</h3>
41 |                     <li>
42 |                         <span>20170112 14:03:41</span>
43 |                         <p><span>视频监控系统<a href="f?p='||V('APP_ID')||':122:'||V('SESSION')||':::::"></a></span></p>
44 |                     </li>
45 |                     <li>
46 |                         <span>20170112 11:24:47</span>
47 |                         <p><span>电子巡更系统<a href="f?p='||V('APP_ID')||':122:'||V('SESSION')||':::::"></a></span></p>
48 |                     </li>
49 |                 </div>
50 |                 <div>
51 |                     <h3 id="20170113">20170113</h3>
52 |                     <li><span>20170112 14:03:41</span><p><span>视频监控系统<a href="f?p='||V('APP_ID')||':122:'||V('SESSION')||':::::"></a></span></p></li>
53 |                     <li><span>20170112 14:03:41</span><p><span>视频监控系统<a href="f?p='||V('APP_ID')||':122:'||V('SESSION')||':::::"></a></span></p></li>
54 |                 </div>
55 |             </ul>
56 |             <div class="clearfix"></div>
57 |         </div>
58 |     </div>
59 |     <!-- <script src="js/jquery.min_v1.0.js" type="text/javascript"></script> -->
60 |     <script src="http://cdn.static.runoob.com/libs/jquery/2.1.1/jquery.min.js"></script>
61 |     <script type="text/javascript">
62 |         $(function () {
63 |             $('label').click(function () {
64 |                 $('.event_year>li').removeClass('current');
65 |                 $(this).parent('li').addClass('current');
66 |                 var year = $(this).attr('for');
67 |                 $('#' + year).parent().prevAll('div').slideUp(800);
68 |                 $('#' + year).parent().slideDown(800).nextAll('div').slideDown(800);
69 |             });
70 |         });
71 |     </script>
72 | </body>
73 | </html>
74 | 


--------------------------------------------------------------------------------
/web/timeline/tl.html:
--------------------------------------------------------------------------------
 1 | <!DOCTYPE html>
 2 | <html>
 3 | <head>
 4 |     <meta name="viewport" content="width=device-width, initial-scale=1">
 5 |     <link rel="stylesheet" href="http://cdn.static.runoob.com/libs/bootstrap/3.3.7/css/bootstrap.min.css"> 
 6 |     <script src="http://cdn.static.runoob.com/libs/jquery/2.1.1/jquery.min.js"></script>
 7 |     <style>
 8 |         .lzc_timeline { margin:10px 0 0 0; padding: 0; list-style: none; position: relative; }
 9 |         .lzc_timeline:before { content: ''; position: absolute; top: 0; bottom: 0; width: 5px; background: #afdcf8; left: 20px; margin-left: -10px; }
10 |     .lzc_timeline > li { position: relative; }
11 |     .lzc_timeline > li .lzc_time { display: block;   position: absolute;}
12 |     .lzc_timeline > li .lzc_time span { display: block; text-align: right; }
13 |     .lzc_timeline > li .lzc_time span:first-child { font-size: 8px; color: #bdd0db; }
14 |     .lzc_timeline > li .lzc_time span:last-child { font-size: 12px; color: #3594cb; }
15 |     .lzc_timeline > li:nth-child(odd) .lzc_time span:last-child { color: #6cbfee; }
16 |     .lzc_timeline > li .lzc_label { margin: 0 8px 8px 30px; background: #afdcf8;  padding: 10px; position: relative; border-radius: 5px; height: 100px; box-shadow: 0 2px 8px 0 rgba(0, 0, 0, 0.19)}
17 |     .lzc_timeline > li:nth-child(odd) .label { background: #579dc5; }
18 |     .lzc_timeline > li .lzc_label h2 {font-size: 16px; text-shadow: rgba(7, 84, 152, 0.71) 1px 1px 1px; }
19 |     .lzc_timeline > li .lzc_label:after { right: 100%; border: solid transparent; content: " "; height: 0; width: 0; position: absolute; pointer-events: none; border-right-color: #afdcf8; border-width: 10px; top: 10px; }
20 |     .lzc_timeline > li:nth-child(odd) .lzc_label:after { border-right-color: #afdcf8; }
21 |     .lzc_timeline > li .lzc_icon { width: 10px; height: 10px; font-family: 'ecoico'; speak: none; font-style: normal; font-weight: normal; font-variant: normal; text-transform: none; font-size: 1.4em; line-height: 40px; -webkit-font-smoothing: antialiased; position: absolute; color: #fff; background: #46a4da; border-radius: 50%; box-shadow: 0 0 0 3px #afdcf8; text-align: center; left: 32px; top: 15px; margin: 0 0 0 -25px; }
22 |     .lzc_blogpic { width: 200px; height: 120px; overflow: hidden; display: block; float: left; margin-right: 20px; }
23 |     .lzc_timeline li .lzc_time { -webkit-transition: all 1s; -moz-transition: all 1s; -o-transition: all 1s; }
24 |     .lzc_timeline li:hover .lzc_icon { box-shadow: 0 0 0 3px #ececec; }
25 |     .lzc_timeline li:hover .lzc_time { background: #afdcf8; border-radius: 20px 0 0 20px; }
26 |     .lzc_timeline li:hover .lzc_time span { color: #fff; }
27 |     </style>
28 | </head>
29 | <body >
30 |     <article>
31 |     <div>
32 |         <ul id = "sale_list" class="lzc_timeline">
33 |             <li class="time-line-item" >
34 |             <div class="lzc_icon"></div>
35 |             <div class="lzc_label" data-scroll-reveal="enter right over 1s" >
36 |             <span>2018-05-01</span>
37 |             <div style="margin-top:8px">天气晴朗</div>
38 |                 <div style="margin-top:8px">适合跑步</div>
39 |             <div style="position:absolute;right: 8px;bottom:10px">查看更多>></div>
40 |             </div>
41 |         </li>
42 |         <li class="time-line-item" >
43 |             <div class="lzc_icon"></div>
44 |             <div class="lzc_label" data-scroll-reveal="enter right over 1s" >
45 |             <span>2018-05-02</span>
46 |                 <div style="margin-top:8px">天气晴朗</div>
47 |             <div style="margin-top:8px">适合跑步</div>
48 |             <div style="position:absolute;right: 8px;bottom:10px">查看更多>></div>
49 |             </div>
50 |         </li>
51 |         <li class="time-line-item" >
52 |             <div class="lzc_icon"></div>
53 |             <div class="lzc_label" data-scroll-reveal="enter right over 1s" >
54 |             <span>2018-05-03</span>
55 |             <div style="margin-top:8px">天气晴朗</div>
56 |             <div style="margin-top:8px">适合跑步</div>
57 |             <div style="position:absolute;right: 8px;bottom:10px">查看更多>></div>
58 |             </div>
59 |         </li>
60 |         <li class="time-line-item" >
61 |             <div class="lzc_icon"></div>
62 |             <div class="lzc_label" data-scroll-reveal="enter right over 1s" >
63 |             <span>2018-05-04</span>
64 |             <div style="margin-top:8px">天气晴朗</div>
65 |             <div style="margin-top:8px">适合跑步</div>
66 |             <div style="position:absolute;right: 8px;bottom:10px">查看更多>></div>
67 |             </div>
68 |         </li>
69 |         </ul>
70 |     </div>
71 |     </article>
72 | <script>
73 | </script>
74 | </body>
75 | </html>
76 | 


--------------------------------------------------------------------------------
/学习资料汇总:
--------------------------------------------------------------------------------
1 | 学习资料汇总
2 | 
3 | 【2017-3-18】[序列学习-马尔科夫模型](http://blog.csdn.net/dark_scope/article/details/61417336)
4 | [2017-3-27] 算法优化——如何避开鞍点，http://www.csuldw.com/2016/07/10/2016-07-10-saddlepoints/?utm_source=tuicool&utm_medium=referral
5 | 


--------------------------------------------------------------------------------
/微软-ML算法指南.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wqw547243068/wangqiwen/07b64ae47d91581e1c339f40bc765fd7815b47ff/微软-ML算法指南.png


--------------------------------------------------------------------------------