├── .gitattributes
├── .github
└── FUNDING.yml
├── .gitignore
├── Causal
└── PR Causal Introduction.md
├── Control
├── 1_LQR.md
├── 1_LQR_code.py
├── 2_DMP.md
└── 3_DMP2.md
├── Efficient
├── PR Efficient Ⅰ:机器人中的数据高效强化学习.md
├── PR Efficient Ⅱ:Bayesian Transfer RL with prior knowledge.md
├── PR Efficient Ⅲ:Efficient RL for Multi-Step Visual Tasks with Sim2real.md
├── PR Efficient Ⅳ:五分钟内让四足机器人学会行走.md
├── PR Efficient Ⅴ:DERL with self-predictive representations.md
├── PR Efficient Ⅵ:从RL的五个方面分析Sample Efficient.md
└── PR Efficient Ⅶ:Efficient RL 中表征学习的理论基础.md
├── Federated Learning
└── FLⅠ:联邦学习(Federated Learning)入门指南.md
├── Imitation learning
├── 01Introduction.md
├── 02DAgger.assets
│ ├── image-20200514200538762.png
│ ├── 微信截图_20200514220002.png
│ ├── 微信截图_20200514220137.png
│ ├── 微信截图_20200514220221.png
│ ├── 微信截图_20200514220412.png
│ ├── 微信截图_20200514220508.png
│ └── 微信截图_20200514220651.png
├── 02DAgger.md
├── 03EnsembleDAgger.assets
│ ├── image-20200514223244922.png
│ └── 微信截图_20200514225330.png
├── 03EnsembleDAgger.md
├── EnsembleDAgger.assets
│ ├── image-20200514131357018.png
│ └── image-20200514131458469.png
└── Introduction.assets
│ ├── 1_9gdENk_iThuoha-ZJK4oOQ.jpeg
│ ├── 1_P076bt-xcC3mKyYCINzSFg.jpeg
│ ├── 1_RkCKUyRW68fAuysDgWhuMA.png
│ ├── 1_UuY1bsit07pwijg1pSOQgQ.jpeg
│ ├── image-20200513095032814.png
│ ├── image-20200513095203592.png
│ ├── image-20200513110523699.png
│ ├── stacking_demo.gif
│ ├── v2-61bb833f9464f5c7fc088045f26c909d_1440w.png
│ └── v2-9ad04f29683121bb7870e0589b8ec389_1440w.png
├── LICENSE
├── MARL
├── MARL Ⅰ:A Selective Overview of Theories and Algorithms.assets
│ ├── 2020-09-06_23-17-55.jpg
│ ├── 2020-09-08_12-50-46.jpg
│ ├── image-20200906172748898.png
│ └── image-20200906225644703.png
├── MARL Ⅰ:A Selective Overview of Theories and Algorithms.md
├── MARL Ⅱ:QD-learning.assets
│ └── image-20200914210232950.png
└── MARL Ⅱ:QD-learning.md
├── MBRL
├── Model-Based RL Ⅲ 从源码读懂PILCO.md
└── img
│ ├── equation.svg
│ ├── image-20200505172434162.png
│ ├── 微信截图_20200505223246.png
│ └── 微信截图_20200505223309.png
├── Memory
└── Memory systems 2018 – towards a new paradigm.md
├── Paper Reading
├── Bayesian Relational Memory for Semantic Visual Navigation.assets
│ ├── image-20200703144120242.png
│ ├── image-20200703144159048.png
│ ├── image-20200703144238614.png
│ ├── image-20200703151212485.png
│ ├── image-20200703154225724.png
│ └── image-20200703154359760.png
├── Bayesian Relational Memory for Semantic Visual Navigation.md
├── Improving Target-driven Visual Navigation with Attention on 3D Spatial Relationships.assets
│ ├── image-20200705111106393.png
│ ├── image-20200705111939601.png
│ ├── image-20200705123909206.png
│ ├── image-20200705130626188.png
│ ├── image-20200705131656750.png
│ ├── image-20200705131744033.png
│ ├── image-20200705131747173.png
│ ├── image-20200705131859781.png
│ ├── image-20200705131926516.png
│ └── image-20200705132141819.png
├── Improving Target-driven Visual Navigation with Attention on 3D Spatial Relationships.md
├── Learning to Adapt in Dynamic, Real-World Environments through Meta-Reinforcement Learning.md
├── Learning to learn how to learn Self-adaptive visual navigation using meta-learning.assets
│ ├── image-20200702211649011.png
│ ├── image-20200702213128415.png
│ ├── image-20200703101618018.png
│ ├── image-20200703101914142.png
│ ├── image-20200703104319711.png
│ ├── image-20200703121626950.png
│ ├── image-20200703121644122.png
│ ├── image-20200703121752526.png
│ └── image-20200703121906914.png
├── Learning to learn how to learn Self-adaptive visual navigation using meta-learning.md
├── Long Range Neural Navigation Policies for the Real World.md
├── Scene memory transformer for embodied agents in long-horizon tasks.assets
│ ├── image-20200706152627903.png
│ ├── image-20200706153724992.png
│ ├── image-20200706165235163.png
│ ├── image-20200706175359541.png
│ ├── image-20200706182841042.png
│ ├── image-20200706182939318.png
│ ├── image-20200706183146814.png
│ ├── image-20200706183211120.png
│ └── image-20200706190803028.png
├── Scene memory transformer for embodied agents in long-horizon tasks.md
├── Semi-parametric topological memory for navigation.assets
│ ├── image-20200706125057832.png
│ ├── image-20200706125603144.png
│ ├── image-20200706140848398.png
│ ├── image-20200706140947547.png
│ └── image-20200706141720545.png
├── Semi-parametric topological memory for navigation.md
├── Target driven visual navigation exploiting object relationships.assets
│ ├── image-20200702140946820.png
│ ├── image-20200702151238953.png
│ ├── image-20200702154153781.png
│ ├── image-20200702161215057.png
│ ├── image-20200702163714131.png
│ ├── image-20200702164548441.png
│ └── image-20200702172920825.png
├── Target driven visual navigation exploiting object relationships.md
└── Uncertainty-Aware Reinforcement Learning for Collision Avoidance.md
├── Perspective
├── PR Perspective Ⅰ:Embodied AI 的新浪潮.md
└── PR Perspective Ⅱ:Robot Learning思考.md
├── Preliminary
├── A Simple Guide for NN.assets
│ ├── 20160707204048899.gif
│ ├── 853467-20160630141449671-1058672778.png
│ ├── 853467-20160630152018906-1524325812.png
│ ├── 853467-20160630154317562-311369571.png
│ ├── equation-1584587262781.svg
│ ├── equation-1584587282205.svg
│ ├── equation-1584587725413.svg
│ └── equation.svg
├── A Simple Guide for NN.md
├── Preliminary RL basic knowledge.assets
│ ├── 131433102201.jpg
│ └── 132312526273.jpg
├── Preliminary of RL 1.md
├── Preliminary of RL 2.assets
│ ├── 1042406-20180812184148124-1485684702.jpg
│ ├── 1560008119444.png
│ ├── 201019414696.png
│ ├── 201019447506.png
│ ├── 201019462191.png
│ ├── 221402112851854.png
│ ├── 221402155049842.png
│ ├── 221402163881216.png
│ ├── 221402175506201.png
│ ├── interview-14.png
│ ├── v2-111ca0554c4504c7aefc9a14d0d92d2f_1440w.jpg
│ ├── v2-6c24d01db0b8b94589b2fe6a6efcc7b2_1440w.jpg
│ ├── v2-ef32f6901c6a5b8f6eafd8d478ff83ef_1440w.jpg
│ ├── v2-f3c12050c797196c7c37b003905a8d30_1440w.jpg
│ └── 屏幕快照 2016-01-05 下午9.48.30.png
├── Preliminary of RL 2.md
├── Preliminary of RL 3.assets
│ └── equation.svg
├── Preliminary of RL 3.md
├── Preliminary of RL 5.md
├── Reinforcement Learning Notes.assets
│ ├── 0_kt9_Z41qxgiI0CDl
│ ├── 0_kt9_Z41qxgiI0CDl-1575448739547
│ ├── 0_kt9_Z41qxgiI0CDl-1575448742639
│ ├── 0_kt9_Z41qxgiI0CDl-1575448756568
│ ├── 0_oh-lF13hYWt2Bd6V_
│ ├── 1564474069516.png
│ ├── 1564549789614.png
│ ├── 76a319586cd215c8f2075b938fc6f6e07c81714b.svg
│ ├── 8795d42bd263dcbe55d123e7466b2dd5091490a7.svg
│ ├── 9ed1a541005a48d51b624c3b329897064ec2c065.svg
│ ├── a325c9e05fa2ccce85eb2384ca00b4888d1c7824.svg
│ ├── a5132668c0af8733656505c5fb6c1dff4a7907a1.svg
│ ├── dc4621f81a5205e6ae31a35b87c54316e043deda-1575549924223.svg
│ ├── dc4621f81a5205e6ae31a35b87c54316e043deda-1575549965545.svg
│ ├── dc4621f81a5205e6ae31a35b87c54316e043deda.svg
│ ├── image-20191204161236516.png
│ ├── image-20191204164022284.png
│ ├── image-20191204200910603.png
│ ├── image-20191204201005583.png
│ ├── image-20191204204627884.png
│ ├── image-20191204205459823.png
│ ├── image-20191205092649257.png
│ ├── image-20191205102211427.png
│ ├── image-20191205103234091.png
│ ├── image-20191205103636621.png
│ ├── image-20191205103810941.png
│ ├── image-20191205104741531.png
│ ├── image-20191205105318993.png
│ ├── image-20191205110708645.png
│ ├── image-20191205111303995.png
│ ├── image-20191205121930328.png
│ └── image-20191205190844049.png
├── Reinforcement Learning Notes.md
└── img
│ ├── 1558592857137.png
│ ├── 1558614556514.png
│ ├── 1560008119444.png
│ ├── 2019-04-10 19-14-32 的屏幕截图.png
│ ├── 2019-04-10 19-17-30 的屏幕截图.png
│ ├── 2019-04-10 21-00-18 的屏幕截图.png
│ ├── 3-3-1.png
│ ├── 3-3-2.png
│ ├── 4-1-1-1554948278323.jpg
│ ├── 4-1-1.jpg
│ ├── 4-5-4.png
│ ├── 4155986-e77eec1baba5aeea.webp
│ ├── 5-1-1.png
│ ├── DQN3.png
│ ├── sl4.png
│ └── 屏幕快照 2016-01-05 下午9.48.30.png
├── Probabilistic Robotics
├── PR GaussianProcessRegression.assets
│ ├── 1_9xMQMnSPnAFkWqIY2jvIpQ.png
│ ├── 1_IdGgdrY_n_9_YfkaCh-dag.png
│ ├── 1_YAPmNXea5gKoH3uyRrtITQ.png
│ ├── 1_zNQg-o-C2JELQFQjEEDrLw.png
│ ├── Illustration-of-a-bivariate-Gaussian-distribution-The-marginal-and-joint-probability.png
│ ├── Sun, 10 May 2020 143250.png
│ ├── image-20200510155719330.png
│ ├── v2-3c25a927c217f13a055794377635faaf_1440w.jpg
│ └── 微信图片编辑_20200510130051.jpg
├── PR HMC&MH&Gibbs.assets
│ ├── 2018-05-09-gibbs-100.png
│ ├── gibbssampler-2dnormal1.png
│ ├── image__14_.png
│ ├── true.jpg
│ └── v2-08cb302ac37b757ee390705d822f87f2_1440w.jpg
├── PR HMC&MH&Gibbs.md
├── PR HMM.md
├── PR IS&MCMC.assets
│ ├── 1_3nBb4AqcriLcENdpBp4fpQ@2x.png
│ ├── 1_AZBh2kDanLoTFmb3yzErGQ@2x.png
│ ├── 1_HclnWfZrh7Nzuj2_aHkPCQ.png
│ ├── 1_hKQcryMc6fbcS7r-g0sriQ.png
│ ├── 20190511000705.png
│ ├── 5cdd91aec0102b09bad70aff4bd0e9b2.jpg
│ ├── importance_sampling_concept.png
│ ├── v2-9514f7703820b5bf99c98405eb413359_1440w.jpg
│ └── v2-eb0945aa2185df958f4568e58300e77a_1440w.gif
├── PR IS&MCMC.md
├── PR Ⅰ MLE&MAP.md
├── PR Ⅱ Bayesian
├── PR Ⅱ MCMC&EM.md
├── PR Ⅲ Bayesian_MCMC.assets
│ ├── 1_3nBb4AqcriLcENdpBp4fpQ@2x.png
│ ├── 1_AZBh2kDanLoTFmb3yzErGQ@2x.png
│ ├── Beta_9_7.png
│ └── Example-of-Bayesian-inference-with-a-prior-distribution-a-posterior-distribution-and.png
├── PR Ⅲ GaussianProcessRegression.md
├── PR Ⅳ BayesFilter.md
├── PR Ⅳ BayesNeuralNetwork.assets
│ ├── bayes_nn.png
│ ├── bayesian_statistics.jpg
│ └── extrapolation_graph.png
├── PR Ⅳ BayesNeuralNetwork.md
├── PR Ⅴ GMM.assets
│ ├── aHR0cDovL2ltZy5ibG9nLmNzZG4ubmV0LzIwMTcwMzAyMTc1NDQyMjcy.jfif
│ └── aHR0cDovL2ltZy5ibG9nLmNzZG4ubmV0LzIwMTcwMzAyMTc1NTQ5ODc3.jfif
├── PR Ⅴ GMM.md
├── PR Ⅵ BayesGraph.md
├── PR Ⅶ VariationalInference.md
├── PR Ⅷ MeanField.md
├── PR Ⅸ Entropy.assets
│ ├── OIP.jfif
│ ├── image-20200519155921660.png
│ ├── image-20200523201708073.png
│ └── multicolored-abstract-painting-1095624-710x210.jpg
├── PR Ⅸ Entropy.md
├── Probabilistic in Robotic (PR).png
└── Probabilistic in Robotic (PR).xmind
├── README.md
├── RL from Demonstration
├── Deep_Q_From_Demonstration.assets
│ ├── image-20200522121546661.png
│ ├── image-20200522121626682.png
│ └── image-20200522122022338.png
├── Deep_Q_From_Demonstration.md
├── RLfrom_Imperfect_Demonstration.assets
│ ├── image-20200524205516760.png
│ ├── image-20200524213207117.png
│ ├── image-20200524215212011.png
│ ├── image-20200524215647493.png
│ ├── image-20200524221043525.png
│ ├── image-20200524221144933.png
│ ├── image-20200524221938482.png
│ └── image-20200524222429294.png
└── RLfrom_Imperfect_Demonstration.md
├── ROS
├── ROS Ⅰ:An Introduction.md
├── ROS Ⅱ:报错解决方案集锦.md
├── ROS Ⅲ:ROS 话题.md
├── ROS Ⅳ:ROS 消息&服务.md
└── ROS 机器人实战Ⅰ:TurtleBot3 Simulation SLAM + Navigation.md
├── Reasoning
├── PR Reasoning Ⅰ:Bandit问题与 UCB UCT AlphaGo.md
├── PR Reasoning Ⅱ:Inductive bias 归纳偏置及其在深度学习中的应用.md
├── PR Reasoning Ⅲ:基于图表征的关系推理框架 —— Graph Network.md
├── PR Reasoning Ⅳ:数理逻辑(命题逻辑、谓词逻辑)知识整理.md
├── PR Reasoning Ⅴ:命题推理与First Order Logic Reasoning.md
├── PR Reasoning Ⅵ:Counterfactual Reasoning 反事实推理及其在深度学习中的应用.md
├── PR Reasoning Ⅶ:Graph Reasoning 基于图的推理.md
├── PR Reasoning 序:Reasoning Robotics 推理机器人.md
└── Relational inductive biases, deep learning, and graph networks.md
├── Related Works
├── A_survey_on_PS_for_Learning_Robot_controllers_in_a_Handful_of_trials.assets
│ ├── image-20200528152451409.png
│ ├── image-20200528201113694.png
│ ├── image-20200528213920267.png
│ ├── image-20200529151635542.png
│ ├── image-20200529151707215.png
│ └── image-20200529154918336.png
├── A_survey_on_PS_for_Learning_Robot_controllers_in_a_Handful_of_trials.md
├── Deep Reinforcement Learning for Robotic Manipulation with Asynchronous Off-Policy Updates.md
├── End-to-End Robotic Reinforcement Learning without Reward Engineering.assets
│ ├── image-20191208214135640.png
│ ├── image-20191209102302915.png
│ └── image-20191209104206015.png
├── End-to-End Robotic Reinforcement Learning without Reward Engineering.md
├── IROS2019速读(一).md
├── IROS2019速读(三).assets
│ ├── image-20191221113203207.png
│ ├── image-20191221114139768.png
│ ├── image-20191221114316092.png
│ ├── image-20191221200948469.png
│ └── image-20191221234256181.png
├── IROS2019速读(三).md
├── IROS2019速读(二).assets
│ └── image-20191219185107956.png
├── IROS2019速读(二).md
├── IROS2019速读(五).md
├── IROS2019速读(四).assets
│ ├── dream_2.gif
│ ├── dream_6.gif
│ ├── image-20191222112555208.png
│ ├── image-20191222123748477.png
│ └── image-20191222144804275.png
├── IROS2019速读(四).md
├── IROS2019速读.assets
│ ├── image-20191216203301798.png
│ ├── image-20191216211111079.png
│ ├── image-20191216212441606.png
│ └── image-20191217203423494.png
├── Meta learning An Introduction.assets
│ ├── 1_AcaPiikZErVv_iFJzWekQg.gif
│ ├── NTM.png
│ ├── combine-slow-fast-weights.png
│ ├── equation-1577535254476.svg
│ ├── equation.svg
│ ├── few-shot-classification.png
│ ├── image-20191226194740259.png
│ ├── image-20191226200939808.png
│ ├── image-20191226202123590.png
│ ├── image-20191226202326065.png
│ ├── lstm-meta-learner.png
│ ├── maml-algo.png
│ ├── maml.png
│ ├── mann-meta-learning.png
│ ├── matching-networks.png
│ ├── meta-network.png
│ ├── prototypical-networks-1577419209148.png
│ ├── prototypical-networks.png
│ ├── relation-network.png
│ ├── reptile-algo.png
│ ├── reptile_vs_FOMAML.png
│ ├── siamese-conv-net.png
│ ├── train-meta-learner.png
│ └── v2-2d61ff11eb1a5a9e52d6c12eb333eb4b_hd.jpg
├── Meta learning An Introduction.md
├── Meta-Reinforcement-Learning An Introduction.md
├── Overcoming Exploration in Reinforcement Learning with Demonstrations.assets
│ └── image-20191211211229554.png
├── Overcoming Exploration in Reinforcement Learning with Demonstrations.md
├── The Predictron End-To-End Learning and Planning.assets
│ ├── image-20191211163322741.png
│ ├── image-20191211201710251.png
│ ├── image-20191211202507981.png
│ ├── image-20191212102538657.png
│ └── image-20191212102714873.png
├── The Predictron End-To-End Learning and Planning.md
├── When to Trust Your Model Model-Based Policy Optimization.assets
│ └── image-20191215201141993.png
├── When to Trust Your Model Model-Based Policy Optimization.md
├── 智源大会笔记.assets
│ ├── image-20200624101204649.png
│ ├── image-20200624101305253.png
│ ├── image-20200624101402324.png
│ ├── image-20200624101416544.png
│ ├── image-20200624101704078.png
│ ├── image-20200624101836768.png
│ ├── image-20200624102016044.png
│ ├── image-20200624102239093.png
│ ├── image-20200624102756731.png
│ ├── image-20200624102845779.png
│ ├── image-20200624102945812.png
│ ├── image-20200624103018972.png
│ ├── image-20200624103109295.png
│ ├── image-20200624103145785.png
│ ├── image-20200624104915752.png
│ ├── image-20200624105312169.png
│ ├── image-20200624105434860.png
│ ├── image-20200624105534892.png
│ ├── image-20200624110446944.png
│ └── image-20200624110634783.png
└── 智源大会笔记.md
├── Representation
└── Repre 1:Introduction.md
├── Robotics
└── Bimanual coordination.md
├── Simulator
├── MuJoCo机器人建模教程.assets
│ ├── download.html
│ ├── grid1.png
│ ├── grid2.png
│ ├── grid2pin.png
│ ├── openai-robotics-hand-with-cube-solved-crop-2000w.jpg
│ ├── particle2.png
│ └── unnamed.png
├── MuJoCo机器人建模教程.md
├── MuJoCo详细使用指南.md
├── PyBullet详细使用指南.md
└── Sim2real.md
├── Structured
├── PR Structure Ⅱ .assets
│ ├── image-20200719135624489.png
│ ├── image-20200719135706275.png
│ ├── image-20200719141215552.png
│ ├── image-20200719141544422.png
│ ├── image-20200719141645589.png
│ ├── image-20200719142143932.png
│ ├── image-20200719160240612.png
│ ├── image-20200719160620909.png
│ ├── image-20200719161117095.png
│ ├── image-20200719161134980.png
│ ├── image-20200719163653512.png
│ ├── image-20200719164027556.png
│ ├── image-20200719164332203.png
│ └── image-20200719164745855.png
├── PR Structured Ⅰ GNN.assets
│ ├── image-20200712132319666.png
│ ├── image-20200712152008297.png
│ ├── image-20200712152040834.png
│ ├── image-20200712152050882.png
│ ├── image-20200712154103209.png
│ ├── image-20200712155738229.png
│ ├── image-20200712161616515.png
│ ├── image-20200712165321733.png
│ ├── image-20200712170431243.png
│ ├── image-20200712171214741.png
│ ├── image-20200712172004136.png
│ ├── image-20200712172227154.png
│ ├── image-20200712174617755.png
│ ├── image-20200712175300499.png
│ ├── image-20200712175439303.png
│ ├── image-20200712175812940.png
│ ├── image-20200712180112851.png
│ ├── image-20200712195813501.png
│ ├── image-20200712203453039.png
│ ├── image-20200712212800180.png
│ ├── image-20200712212854141.png
│ ├── image-20200712213349638.png
│ ├── image-20200712213614662.png
│ ├── image-20200712214932503.png
│ ├── image-20200712215137652.png
│ ├── image-20200712230809276.png
│ ├── image-20200712231606429.png
│ └── image-20200719135706275.png
├── PR Structured Ⅱ:Structured Probabilistic Model Ⅰ.md
├── PR Structured Ⅲ:马尔可夫、隐马尔可夫 HMM 、条件随机场 CRF 全解析及其python实现.md
├── PR Structured Ⅳ:General Conditional Random Field (CRF).md
├── PR Structured Ⅴ:GraphRNN——依次生成节点和边的图生成模型.md
└── PR StructuredⅠ:Graph Neural Network An Introduction .md
├── Tools
├── Atlas
│ ├── Atlas 使用指南.assets
│ │ └── Atlas软硬件架构.png
│ ├── Atlas 使用指南.md
│ ├── Atlas安装配置流程.eps
│ ├── Atlas安装配置流程.png
│ ├── Atlas软硬件架构.eps
│ └── Atlas软硬件架构.png
├── C++部署Pytorch模型方法.docx
├── Docker
│ ├── Docker Ⅰ:安装与测试指南.md
│ ├── Docker Ⅱ:管理与使用命令手册.md
│ ├── Docker Ⅲ:Nvidia Docker安装与测试指南.md
│ ├── Docker Ⅳ:Nvidia Docker使用命令手册.md
│ └── Docker Ⅴ:Docker与Nvidia Docker踩坑与解决方案记录集.md
├── Habitat
│ └── Habitat Challenge提交指南.md
├── Tools 1:Qt 转 PyQt5 的 Pycharm 插件.md
├── Tools 3:python socket 服务器与客户端双向通信.md
├── Tools 4:Python三行转并行——真香.md
├── Tools 5:Python三行转并行后续——全局变量.md
├── Tools 6:如何用Readthedoc写一份优雅的技术文档.md
├── Tools 7:Python颜色设置.md
├── Tools 8:Tex符号大全.md
├── Tools 9:Zotero使用指南.md
├── Ubuntu
│ └── Ubuntu系统问题.md
└── color.py
├── Utils
├── HTML2PDF.py
├── PDFselector.py
└── basic_plot.py
└── img
└── image-20230825121432059.png
/.gitattributes:
--------------------------------------------------------------------------------
1 | *.pdf filter=lfs diff=lfs merge=lfs -text
2 |
--------------------------------------------------------------------------------
/.github/FUNDING.yml:
--------------------------------------------------------------------------------
1 | # These are supported funding model platforms
2 |
3 | github: # Replace with up to 4 GitHub Sponsors-enabled usernames e.g., [user1, user2]
4 | patreon: # Replace with a single Patreon username
5 | open_collective: # Replace with a single Open Collective username
6 | ko_fi: # Replace with a single Ko-fi username
7 | tidelift: # Replace with a single Tidelift platform-name/package-name e.g., npm/babel
8 | community_bridge: # Replace with a single Community Bridge project-name e.g., cloud-foundry
9 | liberapay: # Replace with a single Liberapay username
10 | issuehunt: # Replace with a single IssueHunt username
11 | otechie: # Replace with a single Otechie username
12 | custom: https://skylark0924.github.io/img/pay.png # Replace with up to 4 custom sponsorship URLs e.g., ['link1', 'link2']
13 |
14 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | Reasoning/BOOKS
2 |
3 | *.pdf
4 | .idea/
5 | .vscode/
6 |
7 | .DS_Store
8 |
--------------------------------------------------------------------------------
/Causal/PR Causal Introduction.md:
--------------------------------------------------------------------------------
1 | # PR Causal Ⅰ:An Introduction
2 |
3 | [TOC]
4 |
5 | ## Materials
6 |
7 | 入门博文:[因果推理一:从 Yule-Simpson's Paradox 讲起](https://cosx.org/2012/03/causality1-simpson-paradox/)
8 |
9 | 书籍:[Causality: Models, Reasoning, and Inference - Judea Pearl](http://bayes.cs.ucla.edu/BOOK-2K/)
10 |
11 | Papers:
12 |
13 | - [An Introduction to Causal Inference - Judea Pearl](http://www.pubmedcentral.nih.gov/articlerender.fcgi?artid=2836213&tool=pmcentrez&rendertype=abstract)
14 | - [Causal inference in statistics: An overview](https://ftp.cs.ucla.edu/pub/stat_ser/r350.pdf)
15 | - [Causal Reasoning fromMeta-reinforcement Learning](http://arxiv.org/abs/1901.08162)
16 | - [Causal Discovery with Reinforcement Learning](http://arxiv.org/abs/1906.04477)
17 |
18 |
19 |
20 | ## Abstract
21 |
22 | 本章旨在以简明例子引入**因果推理**的概念,概述因果推理方向目前的研究现状,并阐述其与贝叶斯网络等传统概率方法的异同。
23 |
24 |
25 |
26 | ## Introduction
27 |
28 |
29 |
30 | ## Methods
31 |
32 |
33 |
34 | ## Difference & Function
35 |
36 |
--------------------------------------------------------------------------------
/Efficient/PR Efficient Ⅳ:五分钟内让四足机器人学会行走.md:
--------------------------------------------------------------------------------
1 | #! https://zhuanlan.zhihu.com/p/360314680
2 | 
3 | # PR Efficient Ⅳ:五分钟内让四足机器人自主学会行走
4 | > **每天一篇 Efficient,离 robot learning 落地更进一步。**
5 |
6 | 经我观察,一般做 Efficient RL 的文章,都可以同时实现 safe exploration。这两个需求之间是有相通性的,毕竟 efficient 主要是通过压缩state或action维度来实现,那么在这个过程中压缩掉一些 unsafe exploration 就很好理解了。Efficient 与 safe exploration 的结合使强化学习在 real-world 机器人中的落地更进一步。
7 |
8 | 今天看一篇2019年 Google Robotics 的 CoRL
9 | 
10 |
11 | 这篇文章的效果还是很惊艳的,从机器人随机动作采集数据开始,到机器人学会 walking 步态,一共只需要 4.5 min 的数据采集。就算是加上 rollout 和 experiment resets,也不过10分钟,36个 episodes (45,000 control steps)。
12 |
13 | ## Main Contribution
14 | 我们都知道 model-based 相较于 model-free RL 在机器人上是更合理且更高效的做法,这里就不赘述了。
15 |
16 | 针对 model-based RL in real-world Robotics,本文主要解决下述**三个问题**:
17 | 1. The learned model needs to be sufficiently **accurate** for long-horizon planning。即使短期内的小误差,经过 long horizon 的累积,对控制的影响都是致命的;
18 | 2. **Real-time** action planning at a high control frequency。实时性是机器人任务,尤其是底层控制器不得不面对的现实问题;
19 | 3. **Safe** data collection for model learning is nontrivial。防止 mechanical failures/damages。
20 |
21 | 本文给出的对应**解决方案**:
22 | 1. **Accurate**:使用 **multi-step loss** 来防止 long-horizon prediction 中的 error accumulation;
23 | 2. **Real-time**:将 planning 和 controlling **并行化**,并借助 learned model 进行超前预测,弥补 planning 和 controlling 之间的时差,实现**异步控制**;
24 | 3. **Safe**:使用 trajectory generator 确保 planned action 的**平滑性**。
25 |
26 | 下面我们详细展开一下这些贡献。
27 |
28 | ## Multi-step loss
29 | **为什么用 multi-step loss,而非 MVE/STEVE/MBPO 常用的 model ensemble 来消除 model error的影响?**
30 |
31 | 虽然 ensemble 也不错,但是它增加了 planning time,影响了实时性。
32 |
33 | **Multi-step loss 的形式**
34 | 普通的单步损失:
35 |
36 | 
37 |
38 | 多步损失:
39 | 
40 |
41 | 其中,$f_\theta(s_t, a_t)$ 是用于拟合 model-based 惯用的状态差 $s_{t+1} - s_t$ 的神经网络。至于其消除 long-horizon model error 的作用还是显而易见的。
42 |
43 | ## Parallel and asynchronous
44 | 
45 |
46 | 并行必定会导致两个环节处理时长不匹配的问题。所以这里就借助了上一节的 learned model 让 planner 能够在 $t$ 时刻超前预测 $t+T$ 时刻的 planning,从而弥补了 planning 的滞后。
47 |
48 | ## Smoothness
49 | 
50 | 这里的 Trajectory generators (TGs) 概念借用自 [Policies Modulating Trajectory Generators - Google, CoRL 2018 ](https://arxiv.org/pdf/1910.02812.pdf),我简单看了一下,这是一个可以为控制器结合 **memory** 和 **prior knowledge** 控制行为生成器,是机器人能够生成更符合人类期望的复杂行为或步态。
51 |
52 | 除此之外,本文对输出的连续 actions 也做了平滑处理。
53 | 
54 |
55 | ## Experiments
56 | ## On-robot experiments
57 | 
58 |
59 | ### 与 model-free 的效率对比
60 | 学习效率较 model-free 方法高出一个数量级。不过这里并没有出现与 SOTA 的model-based 方法的比较,但是考虑到五分钟的训练就有这样的效果,还是不错的了。
61 |
62 | 
63 |
64 | ## Conclusion
65 | 从 efficient 的角度来看,效果还是不错的,也考虑并解决了很多**实际工程问题**,值得借鉴。
--------------------------------------------------------------------------------
/Efficient/PR Efficient Ⅴ:DERL with self-predictive representations.md:
--------------------------------------------------------------------------------
1 | #! https://zhuanlan.zhihu.com/p/360526111
2 | 
3 | # PR Efficient Ⅴ:自预测表征,让RL agent高效地理解世界
4 | >每天一篇 Efficient,离 robot learning 落地更进一步。
5 |
6 | > 想要专栏作家勋章,大家快关注专栏 RL in Robotics帮帮我~
7 |
8 | 本文来自 ICLR 2021,初步满足了我对于**将 model-based RL 与 Representation Learning 结合**的想法。
9 | 
10 |
11 | ## Motivation
12 | 本文受近期 semi-supervised 和 self-supervised learning 领域的启发,通过将状态空间映射到 latent space,并使用自监督的方法从数据的自然结构中,即时生成无限地训练数据来提高数据效率。RL 算法可以利用这个 learned representation model 来预测未来多步的 latent state representation。
13 |
14 | ## Method
15 | 
16 | ### Online and Target Encoder
17 | 设计一个 **online encoder** $f_o$,将观测状态 $s_t$ 转换为表征 $z_t\triangleq f_o(s_t)$。该表征函数只用于当前时刻。
18 |
19 | 真正用到未来状态表征预测的是 target encoder $f_m$,它不随梯度更新,其参数与 online encoder 的关系为:
20 |
21 | $$
22 | \theta_m\leftarrow \tau \theta_m + (1-\tau)\theta_o\\
23 | $$
24 |
25 | ### Transition Model
26 | $$
27 | \hat{z}_{t+k+1} \triangleq h(\hat{z}_{t+k},a_{t+k})
28 | $$
29 |
30 | ### Projection Heads
31 | 分别将 online 和 target encoder 得到的隐式表征投影到更小的 latent space,方便学习,并设置了一个 prediction head $q$ 函数来学习 online projection 和 target projection 之间的关系。
32 | 
33 |
34 | ### Prediction Loss
35 | 通过对 $t\sim t+K$ 时刻预测值和观测值之间的余弦相似度求和,计算出SPR的未来预测损失。(这和上一篇文章的多步损失一样,用于限制 long-horizon 的 learned model error)
36 | 
37 |
38 | Prediction Loss 是以 auxiliary loss 的形式参与到训练中的,具体来说就是 $\mathcal{L}_\theta^{total} = \mathcal{L}_\theta^{RL}+\lambda \mathcal{L}_\theta^{SPR}$。
39 |
40 | ### Algorithm
41 | 
42 |
43 | ## Conclusion
44 | 其实本文的亮点在第三章 Related Work,**列举了大量 data-efficient RL 以及 RL 结合 state representation 的最新进展**,我在这里汇总一下:
45 | 1. **SiMPLe** (Kaiser et al., 2019):学习 Atari 的像素级 transition model,以生成模拟训练数据,从而在100k帧设定下的几场比赛中取得了不错的成绩,但仍需要**花费数周**的训练时间;
46 | 2. **Data-Efficient Rainbow (DER) and OTRainbow** (Hasselt et al. (2019) and Kielak (2020)):引入了针对样本效率进行了调整的Rainbow变体,它们可通过更少的计算获得相当或更高的性能;
47 | 3. (Hafner et al., 2019; Lee et al., 2019; Hafner et al., 2020):利用 **reconstruction loss** 进行训练的 latent-space model 来提高样本效率;
48 | 4. **DrQ** (Yarats et al., 2021) and **RAD** (Laskin et al., 2020):发现适度的 data augmentation 可以大大提高 RL 中的样本效率;
49 | 5. **CURL** (Srinivas et al., 2020):提出将 image augmentation and a contrastive loss 相结合来执行RL的表征学习。但是,RAD的跟踪结果 (Laskin et al., 2020) 表明,CURL的大部分利好来自图像增强,而不是对比损失。(所以本文一直在强调其模型具有兼容数据增强的特性)
50 | 6. **CPC** (Oord et al., 2018), **CPC|Action** (Guo et al., 2018), **ST-DIM** (Anand et al., 2019) and **DRIML** (Mazoure et al., 2020):优化强化学习环境中的各种 temporal contrastive losses;
51 | 7. Kipf et al. (2019):提出通过训练基于**图神经网络**的**结构化 transition model** 来学习面向对象的 **contrastive representations**。
52 | 8. **DeepMDP** (Gelada et al., 2019):训练了具有未归一化的L2损失的 transition model,以预测未来状态的表征以及奖励预测目标函数;
53 | 9. **PBL** (Guo et al., 2020):直接通过梯度下降训练的两个独立的 target networks,预测未来状态的表征。
54 |
55 |
56 |
57 | 这篇文章一个重大缺陷是,**自预测表征依赖于全局观测,使其应用场景受限于 video game**,仍不适合 real-world Robotics。
--------------------------------------------------------------------------------
/Efficient/PR Efficient Ⅵ:从RL的五个方面分析Sample Efficient.md:
--------------------------------------------------------------------------------
1 | # PR Efficient Ⅵ:从RL的五个方面分析Sample Efficient
2 |
3 | 本文来自俞杨老师的 IJCAI-18
4 | 
5 | >每天一篇 Efficient,离 robot learning 落地更进一步。
6 |
7 | > 想要专栏作家勋章,大家快关注专栏 RL in Robotics帮帮我~
8 |
9 | 本文从 exploration, optimization, environment modeling, experience transfer, and abstraction 五个方面,讨论减少强化学习的样本成本的可能方法。
--------------------------------------------------------------------------------
/Efficient/PR Efficient Ⅶ:Efficient RL 中表征学习的理论基础.md:
--------------------------------------------------------------------------------
1 | #! https://zhuanlan.zhihu.com/p/360596249
2 | 
3 | # PR Efficient Ⅶ:表征学习对 Efficient RL 影响的理论研究
4 | > 封面自 [Link](https://www.google.com.hk/url?sa=i&url=http%3A%2F%2Fwww.allwhitebackground.com%2Fblue-minimalist-wallpapers.html%2Fdownload%2F29937&psig=AOvVaw234IM3ZqsztPUI2kQMh9vw&ust=1617032639790000&source=images&cd=vfe&ved=0CAIQjRxqFwoTCKCE6pOq0-8CFQAAAAAdAAAAABAJ)
5 |
6 | > 每天一篇 Efficient,离 robot learning 落地更进一步。
7 |
8 | > 想要专栏作家勋章,大家快关注专栏 RL in Robotics帮帮我~
9 |
10 | 本文来自 ICLR 2020,[Is a Good Representation Sufficient for Sample Efficient Reinforcement Learning?](https://link.zhihu.com/?target=https%3A//arxiv.org/pdf/1910.03016.pdf).
11 | 
12 |
13 |
14 | Recent work 仅针对 model-based 中 learned model error 进行研究,却很少有研究 Efficient RL 的必要条件。本文发现,从统计学的角度来看,对满足 sample-efficient RL的表征比传统的近似观点中要求**更加严格**。本文的主要结果为 RL 提供了清晰的门槛,表明在构成良好的函数逼近(就表征的维数而言)方面存在严格的限制。这些下限突显出,**除非其函数逼近的质量超过某些严格的阈值,否则一个良好的表征不足以实现 Efficient RL。** 本文试图了解当我们能够获得准确的(紧凑的)参数表征时,是否有可能进行 efficient 的学习?
15 |
16 | ## Proof
17 | 由于我理论基础不太好,具体证明就不写了(/捂脸笑,人菜还想分析paper说的就是我),推荐参照 [@张楚珩](https://www.zhihu.com/people/zhang-chu-heng) 的文章:
18 |
19 | [【强化学习 101】Representation Lower Bound](https://zhuanlan.zhihu.com/p/100213425 'card')
20 |
21 | ## Results
22 | 1. 对 value-based learning,本文证明即使所有策略的 $Q$ 函数都可以由给定表征的线性函数近似且具有近似误差 $\delta=\Omega\left(\sqrt{\dfrac{H}{d}}\right)$ ,其中 $d$ 是表征维度,$H$ 是 planning horizon,agent 仍然需要采样指数级的样本才能找到接近最优的策略;
23 | 2. 对 model-based learning,本文证明即使过渡矩阵和奖励函数可以由给定表征的线性函数逼近而具有近似误差 $\delta=\Omega\left(\sqrt{\dfrac{H}{d}}\right)$,agent 仍然需要采样指数级的样本才能找到接近最优的策略;
24 | 3. 对 policy-based learning,本文证明即使可以通过给定表征的线性函数以严格的 positive margin 完美预测最佳策略,agent 仍然需要采样指数级的样本才能找到接近最优的策略。
25 |
26 | 这些下限即使在确定性系统,甚至已知 transition model 的情况中也是如此。本文的结果突出了以下见解:
27 | 1. 对于最坏情况下表征的逼近质量,存在严格的阈值下限;
28 | 2. 我们发现 efficient 的问题不是由于传统观念下的 exploration 导致的,未知的奖励函数足以使问题变得棘手;
29 | 3. 我们的下限不是由于 agent 无法执行 efficient 的监督学习而引起的,因为如果数据分布固定,我们的假设确实允许多项式样本复杂度的上限;
30 | 4. 最大的困难来自于 distribution mismatch。
31 |
32 | ## Separations
33 | 本文还对比分析了不同的算法设定
34 |
35 | Perfect representation vs. good-but-not-perfect representation
36 |
37 | 结论:**更好的表征形式具有 provable exponential benefit。**
38 |
39 | Value-based learning vs. policy-based learning
40 |
41 | 结论:**表征预测 Q 函数的能力比预测最优策略的能力强得多。**
42 |
43 | Supervised learning vs. reinforcement learning
44 |
45 | 结论:**样本复杂度对 planning horizon H 的依赖性是指数级的。**
46 |
47 | Imitation learning vs. reinforcement learning
48 |
49 | 结论:**使用函数拟合时,policy-based RL 比 IL 差一个数量级。**
50 |
51 |
--------------------------------------------------------------------------------
/Federated Learning/FLⅠ:联邦学习(Federated Learning)入门指南.md:
--------------------------------------------------------------------------------
1 | # FLⅠ:联邦学习(Federated Learning)入门指南
2 |
3 | [TOC]
4 |
5 |
6 |
7 | ## 分布式机器学习
8 |
9 |
10 |
11 |
12 |
13 | ## 联邦学习与分布式学习的区别
14 |
15 |
16 |
17 | ## 通信问题
18 |
19 |
20 |
21 | ## Federated Averaging 算法
22 |
23 |
24 |
25 | ## 隐私泄露与隐私保护
26 |
27 |
28 |
29 | ## 安全问题
30 |
31 |
32 |
33 |
34 |
35 | ## Conclusion
36 |
37 |
38 |
39 | ## Reference
40 |
41 |
42 |
43 |
--------------------------------------------------------------------------------
/Imitation learning/02DAgger.assets/image-20200514200538762.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Skylark0924/Reinforcement-Learning-in-Robotics/a1abcf4442a9d8869d7c7d0068399fb908559642/Imitation learning/02DAgger.assets/image-20200514200538762.png
--------------------------------------------------------------------------------
/Imitation learning/02DAgger.assets/微信截图_20200514220002.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Skylark0924/Reinforcement-Learning-in-Robotics/a1abcf4442a9d8869d7c7d0068399fb908559642/Imitation learning/02DAgger.assets/微信截图_20200514220002.png
--------------------------------------------------------------------------------
/Imitation learning/02DAgger.assets/微信截图_20200514220137.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Skylark0924/Reinforcement-Learning-in-Robotics/a1abcf4442a9d8869d7c7d0068399fb908559642/Imitation learning/02DAgger.assets/微信截图_20200514220137.png
--------------------------------------------------------------------------------
/Imitation learning/02DAgger.assets/微信截图_20200514220221.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Skylark0924/Reinforcement-Learning-in-Robotics/a1abcf4442a9d8869d7c7d0068399fb908559642/Imitation learning/02DAgger.assets/微信截图_20200514220221.png
--------------------------------------------------------------------------------
/Imitation learning/02DAgger.assets/微信截图_20200514220412.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Skylark0924/Reinforcement-Learning-in-Robotics/a1abcf4442a9d8869d7c7d0068399fb908559642/Imitation learning/02DAgger.assets/微信截图_20200514220412.png
--------------------------------------------------------------------------------
/Imitation learning/02DAgger.assets/微信截图_20200514220508.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Skylark0924/Reinforcement-Learning-in-Robotics/a1abcf4442a9d8869d7c7d0068399fb908559642/Imitation learning/02DAgger.assets/微信截图_20200514220508.png
--------------------------------------------------------------------------------
/Imitation learning/02DAgger.assets/微信截图_20200514220651.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Skylark0924/Reinforcement-Learning-in-Robotics/a1abcf4442a9d8869d7c7d0068399fb908559642/Imitation learning/02DAgger.assets/微信截图_20200514220651.png
--------------------------------------------------------------------------------
/Imitation learning/03EnsembleDAgger.assets/image-20200514223244922.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Skylark0924/Reinforcement-Learning-in-Robotics/a1abcf4442a9d8869d7c7d0068399fb908559642/Imitation learning/03EnsembleDAgger.assets/image-20200514223244922.png
--------------------------------------------------------------------------------
/Imitation learning/03EnsembleDAgger.assets/微信截图_20200514225330.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Skylark0924/Reinforcement-Learning-in-Robotics/a1abcf4442a9d8869d7c7d0068399fb908559642/Imitation learning/03EnsembleDAgger.assets/微信截图_20200514225330.png
--------------------------------------------------------------------------------
/Imitation learning/03EnsembleDAgger.md:
--------------------------------------------------------------------------------
1 | ## Imitation Learning Ⅲ: EnsembleDAgger
2 |
3 | **EnsembleDAgger: A Bayesian Approach to Safe Imitation Learning**
4 |
5 | 2019 | Paper | Stanford & UIUC
6 |
7 | *Kunal Menda, Katherine Driggs-Campbell, and Mykel J. Kochenderfer*
8 |
9 | Imitation Learning (IL) 使用 expert demonstration 来引导探索,正如第一章所介绍的,IL在遇到数据集不包含的情况时会出现严重的后果,这在真实机器人上是致命的,也是DAgger算法的缺点。因此,本文基于贝叶斯方法提出了一种**安全的**模仿学习方法。
10 |
11 | ### Preliminary
12 |
13 | Imitation Learning & DAgger
14 |
15 | https://zhuanlan.zhihu.com/p/140348314
16 |
17 | https://zhuanlan.zhihu.com/p/140939491
18 |
19 | Bayesian Methods
20 |
21 | https://zhuanlan.zhihu.com/p/139478368
22 |
23 | https://zhuanlan.zhihu.com/p/139523520
24 |
25 | *p.s. 构建自己的知识体系真是件有意思的事*
26 |
27 | ### DAgger & SafeDAgger
28 |
29 | 我们把上一章讲到的DAgger算法的结构看成下图(实际上思路是一样的),这样我们可以发现接下来的几个算法不同点就在**Decision Rule**上:
30 |
31 | 
32 |
33 | 
34 |
35 | 最近已经有paper提出了SafeDAgger的概念,使用 expert 与 novice 之间的差距作为判断safe与否的标准。
36 |
37 | ### EnsembleDAgger
38 |
39 | 本文将DAgger扩展到了**概率空间**,旨在最大程度地减少专家干预,同时限制失败的可能性。**通过量化不确定性和置信度来度量*doubt*(质疑),以此来judge safe明显比SafeDAgger更合理一些。**为了量化这种doubt,本文使用神经网络的集成来估计特定状态下新手动作的**方差**,这表明即使在复杂的高维空间中,它也可以有效地逼近高斯过程(GPs)。至于为什么用GP和Bayesian,详见Preliminary。
40 |
41 | 
42 |
43 | EnsembleDAgger在Decision rule里多了一个方差的判断,这被称作Doubt rule。
44 |
45 | **这样为什么就safe了呢?看看文中给出的论点:**
46 |
47 | 1. The expert prefers trajectories that avoid failure states, and rarely visits near failure states, implying that states dissimilar to those in expert trajectories (or states unfamiliar to the novice) are likely to be in closer proximity to failure states.
48 | 2. Following from (1), and by capturing epistemic uncer- tainty, or lack of familiarity with states in the training dataset, the novice’s doubt provides a model-free proxy for proximity to failure states.
49 | 3. In order to constrain the probability of encountering a failure state, the discrepancy between the action taken and the expert’s action is less than some bound.
50 | 4. The ideal bounds should be state-dependent, such that the bound is tighter in close proximity to failure states.
51 | 5. Following from (2, 4), the bound on discrepancy should decrease as the novice’s doubt increases.
52 |
53 | Emmm,听起来不怎么让人信服。。。
54 |
55 | ### 结语
56 |
57 | 这篇paper的数学性不强,没有理论论证,而且似乎这个思想有些过于简单且生硬。把一个超参变成两个超参的创新有些奇怪。既然已经概率化了,为什么不更近一步做混合控制,岂不是更safe?当然如果在读这篇文章的小伙伴也有这个想法,抱歉,那你们来晚了,因为这个idea我写过paper了。
58 |
59 |
--------------------------------------------------------------------------------
/Imitation learning/EnsembleDAgger.assets/image-20200514131357018.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Skylark0924/Reinforcement-Learning-in-Robotics/a1abcf4442a9d8869d7c7d0068399fb908559642/Imitation learning/EnsembleDAgger.assets/image-20200514131357018.png
--------------------------------------------------------------------------------
/Imitation learning/EnsembleDAgger.assets/image-20200514131458469.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Skylark0924/Reinforcement-Learning-in-Robotics/a1abcf4442a9d8869d7c7d0068399fb908559642/Imitation learning/EnsembleDAgger.assets/image-20200514131458469.png
--------------------------------------------------------------------------------
/Imitation learning/Introduction.assets/1_9gdENk_iThuoha-ZJK4oOQ.jpeg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Skylark0924/Reinforcement-Learning-in-Robotics/a1abcf4442a9d8869d7c7d0068399fb908559642/Imitation learning/Introduction.assets/1_9gdENk_iThuoha-ZJK4oOQ.jpeg
--------------------------------------------------------------------------------
/Imitation learning/Introduction.assets/1_P076bt-xcC3mKyYCINzSFg.jpeg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Skylark0924/Reinforcement-Learning-in-Robotics/a1abcf4442a9d8869d7c7d0068399fb908559642/Imitation learning/Introduction.assets/1_P076bt-xcC3mKyYCINzSFg.jpeg
--------------------------------------------------------------------------------
/Imitation learning/Introduction.assets/1_RkCKUyRW68fAuysDgWhuMA.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Skylark0924/Reinforcement-Learning-in-Robotics/a1abcf4442a9d8869d7c7d0068399fb908559642/Imitation learning/Introduction.assets/1_RkCKUyRW68fAuysDgWhuMA.png
--------------------------------------------------------------------------------
/Imitation learning/Introduction.assets/1_UuY1bsit07pwijg1pSOQgQ.jpeg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Skylark0924/Reinforcement-Learning-in-Robotics/a1abcf4442a9d8869d7c7d0068399fb908559642/Imitation learning/Introduction.assets/1_UuY1bsit07pwijg1pSOQgQ.jpeg
--------------------------------------------------------------------------------
/Imitation learning/Introduction.assets/image-20200513095032814.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Skylark0924/Reinforcement-Learning-in-Robotics/a1abcf4442a9d8869d7c7d0068399fb908559642/Imitation learning/Introduction.assets/image-20200513095032814.png
--------------------------------------------------------------------------------
/Imitation learning/Introduction.assets/image-20200513095203592.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Skylark0924/Reinforcement-Learning-in-Robotics/a1abcf4442a9d8869d7c7d0068399fb908559642/Imitation learning/Introduction.assets/image-20200513095203592.png
--------------------------------------------------------------------------------
/Imitation learning/Introduction.assets/image-20200513110523699.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Skylark0924/Reinforcement-Learning-in-Robotics/a1abcf4442a9d8869d7c7d0068399fb908559642/Imitation learning/Introduction.assets/image-20200513110523699.png
--------------------------------------------------------------------------------
/Imitation learning/Introduction.assets/stacking_demo.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Skylark0924/Reinforcement-Learning-in-Robotics/a1abcf4442a9d8869d7c7d0068399fb908559642/Imitation learning/Introduction.assets/stacking_demo.gif
--------------------------------------------------------------------------------
/Imitation learning/Introduction.assets/v2-61bb833f9464f5c7fc088045f26c909d_1440w.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Skylark0924/Reinforcement-Learning-in-Robotics/a1abcf4442a9d8869d7c7d0068399fb908559642/Imitation learning/Introduction.assets/v2-61bb833f9464f5c7fc088045f26c909d_1440w.png
--------------------------------------------------------------------------------
/Imitation learning/Introduction.assets/v2-9ad04f29683121bb7870e0589b8ec389_1440w.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Skylark0924/Reinforcement-Learning-in-Robotics/a1abcf4442a9d8869d7c7d0068399fb908559642/Imitation learning/Introduction.assets/v2-9ad04f29683121bb7870e0589b8ec389_1440w.png
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) 2019 Skylark
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 |
--------------------------------------------------------------------------------
/MARL/MARL Ⅰ:A Selective Overview of Theories and Algorithms.assets/2020-09-06_23-17-55.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Skylark0924/Reinforcement-Learning-in-Robotics/a1abcf4442a9d8869d7c7d0068399fb908559642/MARL/MARL Ⅰ:A Selective Overview of Theories and Algorithms.assets/2020-09-06_23-17-55.jpg
--------------------------------------------------------------------------------
/MARL/MARL Ⅰ:A Selective Overview of Theories and Algorithms.assets/2020-09-08_12-50-46.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Skylark0924/Reinforcement-Learning-in-Robotics/a1abcf4442a9d8869d7c7d0068399fb908559642/MARL/MARL Ⅰ:A Selective Overview of Theories and Algorithms.assets/2020-09-08_12-50-46.jpg
--------------------------------------------------------------------------------
/MARL/MARL Ⅰ:A Selective Overview of Theories and Algorithms.assets/image-20200906172748898.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Skylark0924/Reinforcement-Learning-in-Robotics/a1abcf4442a9d8869d7c7d0068399fb908559642/MARL/MARL Ⅰ:A Selective Overview of Theories and Algorithms.assets/image-20200906172748898.png
--------------------------------------------------------------------------------
/MARL/MARL Ⅰ:A Selective Overview of Theories and Algorithms.assets/image-20200906225644703.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Skylark0924/Reinforcement-Learning-in-Robotics/a1abcf4442a9d8869d7c7d0068399fb908559642/MARL/MARL Ⅰ:A Selective Overview of Theories and Algorithms.assets/image-20200906225644703.png
--------------------------------------------------------------------------------
/MARL/MARL Ⅱ:QD-learning.assets/image-20200914210232950.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Skylark0924/Reinforcement-Learning-in-Robotics/a1abcf4442a9d8869d7c7d0068399fb908559642/MARL/MARL Ⅱ:QD-learning.assets/image-20200914210232950.png
--------------------------------------------------------------------------------
/MBRL/img/equation.svg:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/MBRL/img/image-20200505172434162.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Skylark0924/Reinforcement-Learning-in-Robotics/a1abcf4442a9d8869d7c7d0068399fb908559642/MBRL/img/image-20200505172434162.png
--------------------------------------------------------------------------------
/MBRL/img/微信截图_20200505223246.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Skylark0924/Reinforcement-Learning-in-Robotics/a1abcf4442a9d8869d7c7d0068399fb908559642/MBRL/img/微信截图_20200505223246.png
--------------------------------------------------------------------------------
/MBRL/img/微信截图_20200505223309.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Skylark0924/Reinforcement-Learning-in-Robotics/a1abcf4442a9d8869d7c7d0068399fb908559642/MBRL/img/微信截图_20200505223309.png
--------------------------------------------------------------------------------
/Paper Reading/Bayesian Relational Memory for Semantic Visual Navigation.assets/image-20200703144120242.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Skylark0924/Reinforcement-Learning-in-Robotics/a1abcf4442a9d8869d7c7d0068399fb908559642/Paper Reading/Bayesian Relational Memory for Semantic Visual Navigation.assets/image-20200703144120242.png
--------------------------------------------------------------------------------
/Paper Reading/Bayesian Relational Memory for Semantic Visual Navigation.assets/image-20200703144159048.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Skylark0924/Reinforcement-Learning-in-Robotics/a1abcf4442a9d8869d7c7d0068399fb908559642/Paper Reading/Bayesian Relational Memory for Semantic Visual Navigation.assets/image-20200703144159048.png
--------------------------------------------------------------------------------
/Paper Reading/Bayesian Relational Memory for Semantic Visual Navigation.assets/image-20200703144238614.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Skylark0924/Reinforcement-Learning-in-Robotics/a1abcf4442a9d8869d7c7d0068399fb908559642/Paper Reading/Bayesian Relational Memory for Semantic Visual Navigation.assets/image-20200703144238614.png
--------------------------------------------------------------------------------
/Paper Reading/Bayesian Relational Memory for Semantic Visual Navigation.assets/image-20200703151212485.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Skylark0924/Reinforcement-Learning-in-Robotics/a1abcf4442a9d8869d7c7d0068399fb908559642/Paper Reading/Bayesian Relational Memory for Semantic Visual Navigation.assets/image-20200703151212485.png
--------------------------------------------------------------------------------
/Paper Reading/Bayesian Relational Memory for Semantic Visual Navigation.assets/image-20200703154225724.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Skylark0924/Reinforcement-Learning-in-Robotics/a1abcf4442a9d8869d7c7d0068399fb908559642/Paper Reading/Bayesian Relational Memory for Semantic Visual Navigation.assets/image-20200703154225724.png
--------------------------------------------------------------------------------
/Paper Reading/Bayesian Relational Memory for Semantic Visual Navigation.assets/image-20200703154359760.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Skylark0924/Reinforcement-Learning-in-Robotics/a1abcf4442a9d8869d7c7d0068399fb908559642/Paper Reading/Bayesian Relational Memory for Semantic Visual Navigation.assets/image-20200703154359760.png
--------------------------------------------------------------------------------
/Paper Reading/Improving Target-driven Visual Navigation with Attention on 3D Spatial Relationships.assets/image-20200705111106393.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Skylark0924/Reinforcement-Learning-in-Robotics/a1abcf4442a9d8869d7c7d0068399fb908559642/Paper Reading/Improving Target-driven Visual Navigation with Attention on 3D Spatial Relationships.assets/image-20200705111106393.png
--------------------------------------------------------------------------------
/Paper Reading/Improving Target-driven Visual Navigation with Attention on 3D Spatial Relationships.assets/image-20200705111939601.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Skylark0924/Reinforcement-Learning-in-Robotics/a1abcf4442a9d8869d7c7d0068399fb908559642/Paper Reading/Improving Target-driven Visual Navigation with Attention on 3D Spatial Relationships.assets/image-20200705111939601.png
--------------------------------------------------------------------------------
/Paper Reading/Improving Target-driven Visual Navigation with Attention on 3D Spatial Relationships.assets/image-20200705123909206.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Skylark0924/Reinforcement-Learning-in-Robotics/a1abcf4442a9d8869d7c7d0068399fb908559642/Paper Reading/Improving Target-driven Visual Navigation with Attention on 3D Spatial Relationships.assets/image-20200705123909206.png
--------------------------------------------------------------------------------
/Paper Reading/Improving Target-driven Visual Navigation with Attention on 3D Spatial Relationships.assets/image-20200705130626188.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Skylark0924/Reinforcement-Learning-in-Robotics/a1abcf4442a9d8869d7c7d0068399fb908559642/Paper Reading/Improving Target-driven Visual Navigation with Attention on 3D Spatial Relationships.assets/image-20200705130626188.png
--------------------------------------------------------------------------------
/Paper Reading/Improving Target-driven Visual Navigation with Attention on 3D Spatial Relationships.assets/image-20200705131656750.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Skylark0924/Reinforcement-Learning-in-Robotics/a1abcf4442a9d8869d7c7d0068399fb908559642/Paper Reading/Improving Target-driven Visual Navigation with Attention on 3D Spatial Relationships.assets/image-20200705131656750.png
--------------------------------------------------------------------------------
/Paper Reading/Improving Target-driven Visual Navigation with Attention on 3D Spatial Relationships.assets/image-20200705131744033.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Skylark0924/Reinforcement-Learning-in-Robotics/a1abcf4442a9d8869d7c7d0068399fb908559642/Paper Reading/Improving Target-driven Visual Navigation with Attention on 3D Spatial Relationships.assets/image-20200705131744033.png
--------------------------------------------------------------------------------
/Paper Reading/Improving Target-driven Visual Navigation with Attention on 3D Spatial Relationships.assets/image-20200705131747173.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Skylark0924/Reinforcement-Learning-in-Robotics/a1abcf4442a9d8869d7c7d0068399fb908559642/Paper Reading/Improving Target-driven Visual Navigation with Attention on 3D Spatial Relationships.assets/image-20200705131747173.png
--------------------------------------------------------------------------------
/Paper Reading/Improving Target-driven Visual Navigation with Attention on 3D Spatial Relationships.assets/image-20200705131859781.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Skylark0924/Reinforcement-Learning-in-Robotics/a1abcf4442a9d8869d7c7d0068399fb908559642/Paper Reading/Improving Target-driven Visual Navigation with Attention on 3D Spatial Relationships.assets/image-20200705131859781.png
--------------------------------------------------------------------------------
/Paper Reading/Improving Target-driven Visual Navigation with Attention on 3D Spatial Relationships.assets/image-20200705131926516.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Skylark0924/Reinforcement-Learning-in-Robotics/a1abcf4442a9d8869d7c7d0068399fb908559642/Paper Reading/Improving Target-driven Visual Navigation with Attention on 3D Spatial Relationships.assets/image-20200705131926516.png
--------------------------------------------------------------------------------
/Paper Reading/Improving Target-driven Visual Navigation with Attention on 3D Spatial Relationships.assets/image-20200705132141819.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Skylark0924/Reinforcement-Learning-in-Robotics/a1abcf4442a9d8869d7c7d0068399fb908559642/Paper Reading/Improving Target-driven Visual Navigation with Attention on 3D Spatial Relationships.assets/image-20200705132141819.png
--------------------------------------------------------------------------------
/Paper Reading/Improving Target-driven Visual Navigation with Attention on 3D Spatial Relationships.md:
--------------------------------------------------------------------------------
1 | # Improving Target-driven Visual Navigation with Attention on 3D Spatial Relationships
2 |
3 | [Paper](http://arxiv.org/abs/2005.02153) | Code | 2020
4 |
5 | *Yunlian Lv, Ning Xie, Yimin Shi, Zijiao Wang, and Heng Tao Shen*
6 |
7 | > **Paper Reading**: Task of **Active Visual Navigation**
8 |
9 | ## Introduction
10 |
11 | 第四篇仿真文,我已经对这个领域的physical usage不抱希望了。依然是熟悉的AI2-THOR模拟器,我们还是来看看这回是什么思路吧。
12 |
13 | 本文思路如下:
14 |
15 | 1. 从探索的图像中抽取物体之间的**三维空间关系**来构成 relational graphs;
16 | 2. 用 **GCN** 来获取图节点的特征;
17 | 3. **Attention Mechanism** 学习应该关注观测图像的哪些部分,来引导策略探索;
18 | 4. 用类似 **HER** 的方法,本文称为 target skill extension (TSE) ,从失败的探索中学习,解决 sparse reward (稀疏奖励) 问题;
19 | 5. 此外,本文还用了 **imitation learning** 的示范数据来辅助agent。
20 |
21 | **本文有两个不太一样的地方:**
22 |
23 | - action多了一个stop (可是这和 done 有啥区别呢?);
24 | - 在环境中设置了可动的物体来提升 task 复杂度,比方说鸡蛋在盒子里,必须要打开盒子才算可见。
25 |
26 |
27 |
28 |
29 |
30 | 
31 |
32 | ## The Proposed Method
33 |
34 | 
35 |
36 | ### Base Algorithm
37 |
38 | 和前几篇一样,将 NN 结构为 LSTM 的A3C 作为基础 RL algorithm,以求能够学习到二维图像到三维场景中动作的映射。(SAC等其他的不行吗?为啥非要用A3C)
39 |
40 | 类似李飞飞组的做法,将 target 和 observation 的 image 作为输入。
41 |
42 | ### Visual Features
43 |
44 | 1. 在 ImageNet 上预训练的 **ResNet50** 作为 target 和 observation 的 image 的特征提取器;
45 | 2. 用 **deep siamese network** 将二者 concat 起来。
46 |
47 | 剩下的都在图里,我就不赘述了。
48 |
49 | ### Spatial Relationships
50 |
51 | 这个部分才是本文精华所在,本文指出 3D knowledge graph 可以编码多种空间 (front, left, right, in, up, under) 关系。$G=(V,E)$
52 |
53 | - 节点 V:代表 object
54 | - 边 E:代表两个 object 之间的关系
55 |
56 | YOLOv3 作为 object detector
57 |
58 | 
59 |
60 | 1. relation extraction module:同时出现的object的边是相连的;
61 | 2. 基于知识图构建邻接矩阵 A,并初始化节点特征 $H_0$ 为一个 onehot 特征向量;
62 | 3. 对 A 正则化得 $\hat A$,使每个节点包含自己的节点特征;
63 | 4. GNN 使用边缘信息聚合节点信息以生成新的节点表示,因此使用GCN来计算节点特征向量,$H^{(l+1)}=\sigma\left(\hat{A} H^{(l)} W^{(l)}\right)$;
64 | 5. $H_t, H_g$ 编码 target 和 observation 的 image 从GCN得到的空间特征信息;
65 | 6. 通过 Attention Mechanism 得到 attention vector $H_a$,并以此作为RL算法的state 输入。
66 |
67 | ### Sub-targets Extraction
68 |
69 | 这段是模仿 HER 解决稀疏奖励的。由于A3C是多线程的,一个线程学习到的目标仍可能被其他线程遇到。所以本文将A3C与 sub-targets 结合用于 data-augmentation 并将其称为 target skill extension(TSE)模块。
70 |
71 | 
72 |
73 | 本文将观测中出现新object或其他线程的目标作为一个sub-target。因此,一条 trajectory 可以被切分成多条不同 target 的轨迹。从而解决了稀疏奖励问题。
74 |
75 | ### Imitation Learning
76 |
77 | 本文为辅助 agent 加速训练过程,对于**每一个 training target**给出一条示范轨迹。
78 |
79 |
80 |
81 | ## Experiment
82 |
83 | ### Baseline
84 |
85 | 
86 |
87 | ### Results
88 |
89 | 
90 |
91 | 
92 |
93 | 
--------------------------------------------------------------------------------
/Paper Reading/Learning to Adapt in Dynamic, Real-World Environments through Meta-Reinforcement Learning.md:
--------------------------------------------------------------------------------
1 | # Learning to Adapt in Dynamic, Real-World Environments through Meta-Reinforcement Learning
2 |
3 | [Paper](https://sites.google.com/berkeley.edu/metaadaptivecontrol) | [Code](https://github.com/iclavera/learning_to_adapt) | ICLR 2019
4 |
5 | *Anusha Nagabandi, Ignasi Clavera, Simin Liu, Ronald S. Fearing, Pieter Abbeel, Sergey Levine, & Chelsea Finn*
6 |
7 | > **Paper Reading**: **Meta-leanring**
--------------------------------------------------------------------------------
/Paper Reading/Learning to learn how to learn Self-adaptive visual navigation using meta-learning.assets/image-20200702211649011.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Skylark0924/Reinforcement-Learning-in-Robotics/a1abcf4442a9d8869d7c7d0068399fb908559642/Paper Reading/Learning to learn how to learn Self-adaptive visual navigation using meta-learning.assets/image-20200702211649011.png
--------------------------------------------------------------------------------
/Paper Reading/Learning to learn how to learn Self-adaptive visual navigation using meta-learning.assets/image-20200702213128415.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Skylark0924/Reinforcement-Learning-in-Robotics/a1abcf4442a9d8869d7c7d0068399fb908559642/Paper Reading/Learning to learn how to learn Self-adaptive visual navigation using meta-learning.assets/image-20200702213128415.png
--------------------------------------------------------------------------------
/Paper Reading/Learning to learn how to learn Self-adaptive visual navigation using meta-learning.assets/image-20200703101618018.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Skylark0924/Reinforcement-Learning-in-Robotics/a1abcf4442a9d8869d7c7d0068399fb908559642/Paper Reading/Learning to learn how to learn Self-adaptive visual navigation using meta-learning.assets/image-20200703101618018.png
--------------------------------------------------------------------------------
/Paper Reading/Learning to learn how to learn Self-adaptive visual navigation using meta-learning.assets/image-20200703101914142.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Skylark0924/Reinforcement-Learning-in-Robotics/a1abcf4442a9d8869d7c7d0068399fb908559642/Paper Reading/Learning to learn how to learn Self-adaptive visual navigation using meta-learning.assets/image-20200703101914142.png
--------------------------------------------------------------------------------
/Paper Reading/Learning to learn how to learn Self-adaptive visual navigation using meta-learning.assets/image-20200703104319711.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Skylark0924/Reinforcement-Learning-in-Robotics/a1abcf4442a9d8869d7c7d0068399fb908559642/Paper Reading/Learning to learn how to learn Self-adaptive visual navigation using meta-learning.assets/image-20200703104319711.png
--------------------------------------------------------------------------------
/Paper Reading/Learning to learn how to learn Self-adaptive visual navigation using meta-learning.assets/image-20200703121626950.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Skylark0924/Reinforcement-Learning-in-Robotics/a1abcf4442a9d8869d7c7d0068399fb908559642/Paper Reading/Learning to learn how to learn Self-adaptive visual navigation using meta-learning.assets/image-20200703121626950.png
--------------------------------------------------------------------------------
/Paper Reading/Learning to learn how to learn Self-adaptive visual navigation using meta-learning.assets/image-20200703121644122.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Skylark0924/Reinforcement-Learning-in-Robotics/a1abcf4442a9d8869d7c7d0068399fb908559642/Paper Reading/Learning to learn how to learn Self-adaptive visual navigation using meta-learning.assets/image-20200703121644122.png
--------------------------------------------------------------------------------
/Paper Reading/Learning to learn how to learn Self-adaptive visual navigation using meta-learning.assets/image-20200703121752526.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Skylark0924/Reinforcement-Learning-in-Robotics/a1abcf4442a9d8869d7c7d0068399fb908559642/Paper Reading/Learning to learn how to learn Self-adaptive visual navigation using meta-learning.assets/image-20200703121752526.png
--------------------------------------------------------------------------------
/Paper Reading/Learning to learn how to learn Self-adaptive visual navigation using meta-learning.assets/image-20200703121906914.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Skylark0924/Reinforcement-Learning-in-Robotics/a1abcf4442a9d8869d7c7d0068399fb908559642/Paper Reading/Learning to learn how to learn Self-adaptive visual navigation using meta-learning.assets/image-20200703121906914.png
--------------------------------------------------------------------------------
/Paper Reading/Long Range Neural Navigation Policies for the Real World.md:
--------------------------------------------------------------------------------
1 | # Long Range Neural Navigation Policies for the Real World
2 |
3 | [Paper](http://arxiv.org/abs/1903.09870) | Code | 2019
4 |
5 | *Ayzaan Wahid, Alexander Toshev, Marek Fiser and Tsang-Wei Edward Lee*
6 |
7 | ## Introduction
8 |
9 | 震惊!**第一篇real-world文**,终于不再是Simulation了!本文的思想类似 hierarchical RL:
10 |
11 | - High-level policy:理解图像并给出长期规划;
12 | - Low-level policy:将长期规划转化为安全且鲁棒的特定平台底层指令。
13 |
14 | 不过相应的,obs的信息也更多了。不再是单纯的视觉信息,而是SLAM处理过的信息。
--------------------------------------------------------------------------------
/Paper Reading/Scene memory transformer for embodied agents in long-horizon tasks.assets/image-20200706152627903.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Skylark0924/Reinforcement-Learning-in-Robotics/a1abcf4442a9d8869d7c7d0068399fb908559642/Paper Reading/Scene memory transformer for embodied agents in long-horizon tasks.assets/image-20200706152627903.png
--------------------------------------------------------------------------------
/Paper Reading/Scene memory transformer for embodied agents in long-horizon tasks.assets/image-20200706153724992.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Skylark0924/Reinforcement-Learning-in-Robotics/a1abcf4442a9d8869d7c7d0068399fb908559642/Paper Reading/Scene memory transformer for embodied agents in long-horizon tasks.assets/image-20200706153724992.png
--------------------------------------------------------------------------------
/Paper Reading/Scene memory transformer for embodied agents in long-horizon tasks.assets/image-20200706165235163.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Skylark0924/Reinforcement-Learning-in-Robotics/a1abcf4442a9d8869d7c7d0068399fb908559642/Paper Reading/Scene memory transformer for embodied agents in long-horizon tasks.assets/image-20200706165235163.png
--------------------------------------------------------------------------------
/Paper Reading/Scene memory transformer for embodied agents in long-horizon tasks.assets/image-20200706175359541.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Skylark0924/Reinforcement-Learning-in-Robotics/a1abcf4442a9d8869d7c7d0068399fb908559642/Paper Reading/Scene memory transformer for embodied agents in long-horizon tasks.assets/image-20200706175359541.png
--------------------------------------------------------------------------------
/Paper Reading/Scene memory transformer for embodied agents in long-horizon tasks.assets/image-20200706182841042.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Skylark0924/Reinforcement-Learning-in-Robotics/a1abcf4442a9d8869d7c7d0068399fb908559642/Paper Reading/Scene memory transformer for embodied agents in long-horizon tasks.assets/image-20200706182841042.png
--------------------------------------------------------------------------------
/Paper Reading/Scene memory transformer for embodied agents in long-horizon tasks.assets/image-20200706182939318.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Skylark0924/Reinforcement-Learning-in-Robotics/a1abcf4442a9d8869d7c7d0068399fb908559642/Paper Reading/Scene memory transformer for embodied agents in long-horizon tasks.assets/image-20200706182939318.png
--------------------------------------------------------------------------------
/Paper Reading/Scene memory transformer for embodied agents in long-horizon tasks.assets/image-20200706183146814.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Skylark0924/Reinforcement-Learning-in-Robotics/a1abcf4442a9d8869d7c7d0068399fb908559642/Paper Reading/Scene memory transformer for embodied agents in long-horizon tasks.assets/image-20200706183146814.png
--------------------------------------------------------------------------------
/Paper Reading/Scene memory transformer for embodied agents in long-horizon tasks.assets/image-20200706183211120.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Skylark0924/Reinforcement-Learning-in-Robotics/a1abcf4442a9d8869d7c7d0068399fb908559642/Paper Reading/Scene memory transformer for embodied agents in long-horizon tasks.assets/image-20200706183211120.png
--------------------------------------------------------------------------------
/Paper Reading/Scene memory transformer for embodied agents in long-horizon tasks.assets/image-20200706190803028.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Skylark0924/Reinforcement-Learning-in-Robotics/a1abcf4442a9d8869d7c7d0068399fb908559642/Paper Reading/Scene memory transformer for embodied agents in long-horizon tasks.assets/image-20200706190803028.png
--------------------------------------------------------------------------------
/Paper Reading/Semi-parametric topological memory for navigation.assets/image-20200706125057832.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Skylark0924/Reinforcement-Learning-in-Robotics/a1abcf4442a9d8869d7c7d0068399fb908559642/Paper Reading/Semi-parametric topological memory for navigation.assets/image-20200706125057832.png
--------------------------------------------------------------------------------
/Paper Reading/Semi-parametric topological memory for navigation.assets/image-20200706125603144.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Skylark0924/Reinforcement-Learning-in-Robotics/a1abcf4442a9d8869d7c7d0068399fb908559642/Paper Reading/Semi-parametric topological memory for navigation.assets/image-20200706125603144.png
--------------------------------------------------------------------------------
/Paper Reading/Semi-parametric topological memory for navigation.assets/image-20200706140848398.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Skylark0924/Reinforcement-Learning-in-Robotics/a1abcf4442a9d8869d7c7d0068399fb908559642/Paper Reading/Semi-parametric topological memory for navigation.assets/image-20200706140848398.png
--------------------------------------------------------------------------------
/Paper Reading/Semi-parametric topological memory for navigation.assets/image-20200706140947547.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Skylark0924/Reinforcement-Learning-in-Robotics/a1abcf4442a9d8869d7c7d0068399fb908559642/Paper Reading/Semi-parametric topological memory for navigation.assets/image-20200706140947547.png
--------------------------------------------------------------------------------
/Paper Reading/Semi-parametric topological memory for navigation.assets/image-20200706141720545.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Skylark0924/Reinforcement-Learning-in-Robotics/a1abcf4442a9d8869d7c7d0068399fb908559642/Paper Reading/Semi-parametric topological memory for navigation.assets/image-20200706141720545.png
--------------------------------------------------------------------------------
/Paper Reading/Semi-parametric topological memory for navigation.md:
--------------------------------------------------------------------------------
1 | # Semi-parametric topological memory for navigation
2 |
3 | [Paper](https://arxiv.org/abs/1803.00653) | [Code](https://github.com/nsavinov/SPTM) | ICLR 2018
4 |
5 | *Nikolay Savinov, Alexey Dosovitskiy, Vladlen Koltun*
6 |
7 | > **Paper Reading**: Task of **Active Visual Navigation**
8 |
9 | ## Introduction
10 |
11 | 本文模仿动物基于地标的导航,提出了新的 memory architecture 以实现视觉导航任务的泛化性。结构名为 **semi- parametric topological memory (SPTM)**,它由一个(无参)**图**(由与环境中的位置相对应的节点组成),以及一个能够根据观测结果从图中检索节点的(有参)**深层网络**构成。
12 |
13 | 与之前一样,图存储的是地标之间的联通性。本文将 SPTM 作为规划模块(就和model-based 的model一样,只是这回是图model)。
14 |
15 |
16 |
17 | ## Method
18 |
19 | ### Semi-parametric Topological Memory
20 |
21 | 
22 |
23 | 本文比较奇怪的是,上图的 $o^w$ 我一开始以为是个 state representation,**结果竟然是个 sub-target!**所以后续在 Locomotion network 上同时输出 waypoint obs 和 current obs 就很好理解了。下面还是分步详细解释一下:
24 |
25 | **Retrieval Net:**
26 |
27 | 这是检索网络的意思,用于估计两个obs的相似度。先用基于agent随机探索的轨迹,以**自监督**的方式在一组环境上预训练该网络。也就是说这实质上是个classification task,那么自监督是怎么实现的呢?
28 |
29 | 1. 在随机探索的traj里,采样三元组 $\left\langle\mathbf{o}_{i}, \mathbf{o}_{j}, y_{i j}\right\rangle$,这个 y 就是判断两个obs是否够相似的二进制位。由于是连续探索的,所以我们只需要看采样的时间戳是否在某个阈值内(本文设定 l =20 timesteps),即可将 y = 1;反之,达到另一个阈值(本文设定 Mxl = 5x20 steps),y=0,表示两个 obs 相隔足够远;
30 | 2. 两个 obs 以之前介绍过的 siamese 结构经过 ResNet-18 的特征提取;
31 | 3. 再concat两个特征向量,经过五层全连接,得到 2-way softmax;
32 | 4. 计算 cross-entropy loss。
33 |
34 | **Memory graph:**
35 |
36 | 这个图也是用探索数据建立的。取一些观测序列,图上的每个节点存一个观测(谜一样的操作??),节点之间的边联通性取决于:
37 |
38 | - 二者来自连续的时间戳;
39 | - 观测足够近,依据上一节 retrieval net的判断方法。
40 |
41 | **Finding the waypoint:**
42 |
43 | 当实际导航的时候,SPTM向 locomotion net 提供一个 sub-target 一样的 waypoint,这个过程分三步:
44 |
45 | 1. **Localization:**agent在记忆图中给自己以及目标定个位,这就用到了 retrieval net 的距离比较了;
46 | 2. **Planning:**用Dijkstra算法在图中招到当前位置到目标的最短路径;
47 | 3. **Waypoint Selection:**在最短路径中选择一个 sub-target,要求有信心可达的,不要太远也不要太近,这还是可以依赖 retrieval net。
48 |
49 |
50 |
51 | 
52 |
53 | ### Locomotion Network
54 |
55 | 还是利用随机探索时的轨迹数据,摘取足够近的两个 obs作为input,以及对应的action作为label。监督训练该网络。这个网络的结构和前面介绍的 Retrieval Net一样。
56 |
57 | > **这个网络,竟然是个监督学习!我本以为这是篇RL文章,毕竟我一直在做RL,结果竟然是纯 DL 的,抱歉了是我没好好审题。**
58 |
59 | 
60 |
61 | ## Experiment
62 |
63 | ### Baselines
64 |
65 | 
66 |
67 | 注意第一个是个A3C,而且很奇怪的没给 goal,这整篇文章透漏着蜜汁自信,最后竟然只是和 vanilla A3C 以及 vanilla LSTM 比了比???
68 |
69 | > 自从刚刚得知了这是个纯 DL 的文章,我就产生了深深的怀疑。
70 |
71 | ### Results
72 |
73 | 
74 |
75 | 别看上面有测试集,看起来像是很generalization的样子。实际上文中写道:
76 |
77 | 当给出一个新迷宫,agent开始在仿真中随机探索 10500 步,并且还是人工驱动agent在迷宫里跑。他们还给出了探索的样例:https://sites.google.com/view/SPTM
78 |
79 | ## Conclusion
80 |
81 | 本文大概是**年代较早(2018年)**,思路比较简单。个人认为本文的问题主要在于:
82 |
83 | 1. 我实在说服不了自己这样的随机探索与预先建图有什么区别;
84 | 2. Graph的节点直接生猛地保存了 obs 也是第一次见;
85 | 3. 本文多次依仗 Retrieval Net 来判断 obs 的相似度,本来以为是个好方法,然而如果脱离了预先探索,这就是废纸一张;
86 | 4. 依据这个结构,明显不具备广义意义上的泛化性,agent更像是记住了每一条路,而不是学会寻路,光是理念上就被前面写过的meta方法吊打。
87 |
88 |
--------------------------------------------------------------------------------
/Paper Reading/Target driven visual navigation exploiting object relationships.assets/image-20200702140946820.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Skylark0924/Reinforcement-Learning-in-Robotics/a1abcf4442a9d8869d7c7d0068399fb908559642/Paper Reading/Target driven visual navigation exploiting object relationships.assets/image-20200702140946820.png
--------------------------------------------------------------------------------
/Paper Reading/Target driven visual navigation exploiting object relationships.assets/image-20200702151238953.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Skylark0924/Reinforcement-Learning-in-Robotics/a1abcf4442a9d8869d7c7d0068399fb908559642/Paper Reading/Target driven visual navigation exploiting object relationships.assets/image-20200702151238953.png
--------------------------------------------------------------------------------
/Paper Reading/Target driven visual navigation exploiting object relationships.assets/image-20200702154153781.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Skylark0924/Reinforcement-Learning-in-Robotics/a1abcf4442a9d8869d7c7d0068399fb908559642/Paper Reading/Target driven visual navigation exploiting object relationships.assets/image-20200702154153781.png
--------------------------------------------------------------------------------
/Paper Reading/Target driven visual navigation exploiting object relationships.assets/image-20200702161215057.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Skylark0924/Reinforcement-Learning-in-Robotics/a1abcf4442a9d8869d7c7d0068399fb908559642/Paper Reading/Target driven visual navigation exploiting object relationships.assets/image-20200702161215057.png
--------------------------------------------------------------------------------
/Paper Reading/Target driven visual navigation exploiting object relationships.assets/image-20200702163714131.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Skylark0924/Reinforcement-Learning-in-Robotics/a1abcf4442a9d8869d7c7d0068399fb908559642/Paper Reading/Target driven visual navigation exploiting object relationships.assets/image-20200702163714131.png
--------------------------------------------------------------------------------
/Paper Reading/Target driven visual navigation exploiting object relationships.assets/image-20200702164548441.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Skylark0924/Reinforcement-Learning-in-Robotics/a1abcf4442a9d8869d7c7d0068399fb908559642/Paper Reading/Target driven visual navigation exploiting object relationships.assets/image-20200702164548441.png
--------------------------------------------------------------------------------
/Paper Reading/Target driven visual navigation exploiting object relationships.assets/image-20200702172920825.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Skylark0924/Reinforcement-Learning-in-Robotics/a1abcf4442a9d8869d7c7d0068399fb908559642/Paper Reading/Target driven visual navigation exploiting object relationships.assets/image-20200702172920825.png
--------------------------------------------------------------------------------
/Paper Reading/Uncertainty-Aware Reinforcement Learning for Collision Avoidance.md:
--------------------------------------------------------------------------------
1 | # Uncertainty-Aware Reinforcement Learning for Collision Avoidance
2 |
3 | [Paper](http://arxiv.org/abs/1702.01182) | [Code](https://github.com/w0617/Uncertainty-aware-Reinforcement-Learning-for-Collision-Avoidance) | [Video](https://sites.google.com/site/probcoll) | 2017
4 |
5 | *Gregory Kahn, Adam Villaflor, Vitchyr Pong, Pieter Abbeel, Sergey Levine*
6 |
7 | > **Paper Reading**: Task of **Collision Avoidance**
8 |
9 |
10 |
11 | ## Introdution
12 |
13 | Pieter 和 Sergey 组的文章,旨在利用对 uncertainty 的估计,使机器人能够**安全**的探索。本文是一个 model-based 结构,利用探索数据学习碰撞预测模型,并用此来评估模型的不确定性。在有可能碰撞的地方降低速度,在足够confidence的地方加快速度。
14 |
15 |
16 |
17 | ## Uncertainty-Aware Collision Prediction
18 |
19 | 简述一下思想:
20 |
21 | - 以当前状态 $x_t$ ,obs $o_t$ 和控制序列 $u_{t:T+H}$ 为 input,经过NN以及logistic function L,输出机器人碰撞的概率
22 | - risk-averse 碰撞评估器:
23 | - 构建碰撞损失函数,并加入到总损失函数中;
24 | - 为使NN获得正确的不确定性估计,本文使用了 bootstrapping 和 dropout 技术。
--------------------------------------------------------------------------------
/Perspective/PR Perspective Ⅱ:Robot Learning思考.md:
--------------------------------------------------------------------------------
1 | #! https://zhuanlan.zhihu.com/p/395562430
2 | # 近期 Robot Learning 领域大事件及思考
3 |
4 | 结束了半个多月的毕业旅行,马上又要开始博士的新征程了。几个月没学习,所以今天总结一下近期 Robot Learning 以及 Reinforcement Learning 领域的进展及大事件(吃瓜)。
5 | > 记载的都是一些个人想法,可能与以往不同,有些杂乱。
6 |
7 | ## Pure RL 的瓜
8 | 先说说 pure RL 吧,最大的瓜莫过于俞老师在[**强化学习领域目前遇到的瓶颈是什么?**](https://www.zhihu.com/question/449478247/answer/2001407526)这个问题下的回答:
9 |
10 | [强化学习领域目前遇到的瓶颈是什么? - 俞扬的回答 - 知乎](https://www.zhihu.com/question/449478247/answer/2001407526)
11 |
12 | 上面的答案是修改过的,原答案已经有很多好心人截过图了。
13 |
14 | 就三个字
15 |
16 | 
17 |
18 | 没法用倒也是很大一部分事实,毕竟强化学习这种方法,亮点是在其trial and error的精神上,而在其他方面真的不能对它报有太高的期望。从两个方面来看:
19 |
20 | 1. 很多单调、重复、精准或安全性要求高的tasks真的不需要它,PID 就已经很 perfect 了,何必要搞这些幺蛾子。举个例子,这种情况在工业机器人上就是显而易见的。
21 | 2. 目前学界主流的强化学习都在搞游戏、搞仿真数据,说实话感觉都没啥实际意义。强化学习agent在游戏中把我们都打败了,那我们玩游戏的乐趣是什么?更有趣的是,前段时间新学校的博导让我审了IROS2021的两个稿,其中一篇本来说好做的是森林自动化伐木+运输。我看完Intro中作者的问题描述直接好家伙,这么复杂,这要是能做出来AGI不是梦了。结果没成想,接下来作者进行了大量的简化,直接把问题简化成了一个[FetchPickAndPlace-v1](https://gym.openai.com/envs/FetchPickAndPlace-v1/)。我傻眼了,所以这和森不森林、伐不伐木到底有啥关系?作者想在实际问题上做出贡献是值得称赞的,但是也不至于这样强扯吧。
22 |
23 | 
24 |
25 | 尽管如此,俞老师作为一位强化学习的大佬竟然给出这样以偏概全的答案,我第一时间就觉得事情并不简单。
26 |
27 | 
28 |
29 | 在结合俞老师后续的更新以及其专栏文章 [关于强化学习“没法用”的吐槽 - 俞扬的文章 - 知乎](https://zhuanlan.zhihu.com/p/391032165)之后,我才意识到,这可能是一场蓄意的拨乱反正。简单来说,其旨在强调强化学习研究道路应该更贴近实际问题,而不应该是在单纯刷SOTA。我寻思这不就是我们Robo专业要干的事嘛,毕竟这领域不仅是典型的实用型工程领域,而且好像也没啥benchmark可刷。所以经常看看大佬发言是很有好处的,可以时刻给自己增强道路自信。
30 |
31 | ## RL in Robotics
32 | 再说说将RL或者其他learning方法用到Robotics上这件事吧。
33 |
34 | Robot Learning 一个震惊的消息就是RL龙头研究所 OpenAI,抵不住金钱的诱惑,裁掉了重资产且数据低效的Robo组,全力搞能赚钱的项目了。不过这也能接受,毕竟机器人“龙头”公司倒闭或被卖也不是一回两回了,但这也再次提醒我:既然踏上这条路,就不能一门心思呆呆地搞研究,还要时时刻刻想着商业化前景,否则不是被饿死就是老老实实当打工人。
35 |
36 | 想到不久前,我还将[OpenAI单手解魔方机器人](https://openai.com/blog/solving-rubiks-cube/)视频作为激励师弟们入门的素材,向他们展示机器人智能化的美好未来,然而现在才知机器人革命远未到来。视频如下,大家可以重温一下这“神迹”。
37 |
38 | 好在这条路上总有人在前赴后继着,一个OpenAI Robo倒下了,还会有千千万万个Robo组站起来,直到机器人革命的星星之火可以燎原。Alphabet 近期又一次开始了机器人商业化探索,成立了全新的Robotics + AI公司 Intrinsic,面向机器人更智能的工业控制软件设计。
39 |
40 |
41 |
42 | 近期和新博导开了几次会,有组会也有单独的,算是互相了解了一下。博导给出的研究计划主旨是 real world robot Learning,并且考虑通过表征的方式,增加机器人对环境的理解。当然了,这也可以反过来讲,即对环境(both external and internal)以某种方式进行表征 or 降维 or 转化到 latent space 。。。反正含义是一样的,然后再进行机器人的决策。这其实和我当初给他的 research proposal 如出一辙,也不知我怎么找到方向这么一致的导师的,所以申请学校这件事,完全是看机缘。回到研究内容上,本专栏从创刊开始,就在目录文章中写过
43 |
44 | > This is a private learning repository about **R**einforcement learning techniques, **R**easoning, and **R**epresentation learning used in **R**obotics, founded for **Real intelligence**.
45 |
46 | 因此,本专栏从一开始就不只是在关注 Reinforcement Learning 这一种方法,而是所有适用的 learning 方法在 robotics 上的应用。接下来我在博士期间也会继续围绕 robot learning 写一些笔记,欢迎大家来为我这个菜鸡 **点赞+收藏+关注** 捧场呀!
47 |
48 |
49 |
50 |
51 | 本文纯属个人感悟,不构成投资建议!
--------------------------------------------------------------------------------
/Preliminary/A Simple Guide for NN.assets/20160707204048899.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Skylark0924/Reinforcement-Learning-in-Robotics/a1abcf4442a9d8869d7c7d0068399fb908559642/Preliminary/A Simple Guide for NN.assets/20160707204048899.gif
--------------------------------------------------------------------------------
/Preliminary/A Simple Guide for NN.assets/853467-20160630141449671-1058672778.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Skylark0924/Reinforcement-Learning-in-Robotics/a1abcf4442a9d8869d7c7d0068399fb908559642/Preliminary/A Simple Guide for NN.assets/853467-20160630141449671-1058672778.png
--------------------------------------------------------------------------------
/Preliminary/A Simple Guide for NN.assets/853467-20160630152018906-1524325812.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Skylark0924/Reinforcement-Learning-in-Robotics/a1abcf4442a9d8869d7c7d0068399fb908559642/Preliminary/A Simple Guide for NN.assets/853467-20160630152018906-1524325812.png
--------------------------------------------------------------------------------
/Preliminary/A Simple Guide for NN.assets/853467-20160630154317562-311369571.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Skylark0924/Reinforcement-Learning-in-Robotics/a1abcf4442a9d8869d7c7d0068399fb908559642/Preliminary/A Simple Guide for NN.assets/853467-20160630154317562-311369571.png
--------------------------------------------------------------------------------
/Preliminary/Preliminary RL basic knowledge.assets/131433102201.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Skylark0924/Reinforcement-Learning-in-Robotics/a1abcf4442a9d8869d7c7d0068399fb908559642/Preliminary/Preliminary RL basic knowledge.assets/131433102201.jpg
--------------------------------------------------------------------------------
/Preliminary/Preliminary RL basic knowledge.assets/132312526273.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Skylark0924/Reinforcement-Learning-in-Robotics/a1abcf4442a9d8869d7c7d0068399fb908559642/Preliminary/Preliminary RL basic knowledge.assets/132312526273.jpg
--------------------------------------------------------------------------------
/Preliminary/Preliminary of RL 2.assets/1042406-20180812184148124-1485684702.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Skylark0924/Reinforcement-Learning-in-Robotics/a1abcf4442a9d8869d7c7d0068399fb908559642/Preliminary/Preliminary of RL 2.assets/1042406-20180812184148124-1485684702.jpg
--------------------------------------------------------------------------------
/Preliminary/Preliminary of RL 2.assets/1560008119444.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Skylark0924/Reinforcement-Learning-in-Robotics/a1abcf4442a9d8869d7c7d0068399fb908559642/Preliminary/Preliminary of RL 2.assets/1560008119444.png
--------------------------------------------------------------------------------
/Preliminary/Preliminary of RL 2.assets/201019414696.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Skylark0924/Reinforcement-Learning-in-Robotics/a1abcf4442a9d8869d7c7d0068399fb908559642/Preliminary/Preliminary of RL 2.assets/201019414696.png
--------------------------------------------------------------------------------
/Preliminary/Preliminary of RL 2.assets/201019447506.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Skylark0924/Reinforcement-Learning-in-Robotics/a1abcf4442a9d8869d7c7d0068399fb908559642/Preliminary/Preliminary of RL 2.assets/201019447506.png
--------------------------------------------------------------------------------
/Preliminary/Preliminary of RL 2.assets/201019462191.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Skylark0924/Reinforcement-Learning-in-Robotics/a1abcf4442a9d8869d7c7d0068399fb908559642/Preliminary/Preliminary of RL 2.assets/201019462191.png
--------------------------------------------------------------------------------
/Preliminary/Preliminary of RL 2.assets/221402112851854.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Skylark0924/Reinforcement-Learning-in-Robotics/a1abcf4442a9d8869d7c7d0068399fb908559642/Preliminary/Preliminary of RL 2.assets/221402112851854.png
--------------------------------------------------------------------------------
/Preliminary/Preliminary of RL 2.assets/221402155049842.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Skylark0924/Reinforcement-Learning-in-Robotics/a1abcf4442a9d8869d7c7d0068399fb908559642/Preliminary/Preliminary of RL 2.assets/221402155049842.png
--------------------------------------------------------------------------------
/Preliminary/Preliminary of RL 2.assets/221402163881216.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Skylark0924/Reinforcement-Learning-in-Robotics/a1abcf4442a9d8869d7c7d0068399fb908559642/Preliminary/Preliminary of RL 2.assets/221402163881216.png
--------------------------------------------------------------------------------
/Preliminary/Preliminary of RL 2.assets/221402175506201.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Skylark0924/Reinforcement-Learning-in-Robotics/a1abcf4442a9d8869d7c7d0068399fb908559642/Preliminary/Preliminary of RL 2.assets/221402175506201.png
--------------------------------------------------------------------------------
/Preliminary/Preliminary of RL 2.assets/interview-14.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Skylark0924/Reinforcement-Learning-in-Robotics/a1abcf4442a9d8869d7c7d0068399fb908559642/Preliminary/Preliminary of RL 2.assets/interview-14.png
--------------------------------------------------------------------------------
/Preliminary/Preliminary of RL 2.assets/v2-111ca0554c4504c7aefc9a14d0d92d2f_1440w.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Skylark0924/Reinforcement-Learning-in-Robotics/a1abcf4442a9d8869d7c7d0068399fb908559642/Preliminary/Preliminary of RL 2.assets/v2-111ca0554c4504c7aefc9a14d0d92d2f_1440w.jpg
--------------------------------------------------------------------------------
/Preliminary/Preliminary of RL 2.assets/v2-6c24d01db0b8b94589b2fe6a6efcc7b2_1440w.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Skylark0924/Reinforcement-Learning-in-Robotics/a1abcf4442a9d8869d7c7d0068399fb908559642/Preliminary/Preliminary of RL 2.assets/v2-6c24d01db0b8b94589b2fe6a6efcc7b2_1440w.jpg
--------------------------------------------------------------------------------
/Preliminary/Preliminary of RL 2.assets/v2-ef32f6901c6a5b8f6eafd8d478ff83ef_1440w.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Skylark0924/Reinforcement-Learning-in-Robotics/a1abcf4442a9d8869d7c7d0068399fb908559642/Preliminary/Preliminary of RL 2.assets/v2-ef32f6901c6a5b8f6eafd8d478ff83ef_1440w.jpg
--------------------------------------------------------------------------------
/Preliminary/Preliminary of RL 2.assets/v2-f3c12050c797196c7c37b003905a8d30_1440w.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Skylark0924/Reinforcement-Learning-in-Robotics/a1abcf4442a9d8869d7c7d0068399fb908559642/Preliminary/Preliminary of RL 2.assets/v2-f3c12050c797196c7c37b003905a8d30_1440w.jpg
--------------------------------------------------------------------------------
/Preliminary/Preliminary of RL 2.assets/屏幕快照 2016-01-05 下午9.48.30.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Skylark0924/Reinforcement-Learning-in-Robotics/a1abcf4442a9d8869d7c7d0068399fb908559642/Preliminary/Preliminary of RL 2.assets/屏幕快照 2016-01-05 下午9.48.30.png
--------------------------------------------------------------------------------
/Preliminary/Preliminary of RL 3.md:
--------------------------------------------------------------------------------
1 | ## Preliminary of RL Ⅲ: on-policy, off-policy & Model-based, Model-free
2 |
3 | ### on-policy & off-policy
4 |
5 | > On-policy methods attempt to evaluate or improve the policy that is used to make decisions, whereas off-policy methods evaluate or improve a policy different from that used to generate the data。
6 | >
7 | > **差异:**更新Q值时所使用的方法是沿用既定的策略(on-policy)还是使用新策略(off-policy)。
8 | >
9 | > —— Sutton, RL: an introduction
10 |
11 | **on-policy:** 若**交互/采样策略**和**评估及改善的策略**是同一个策略,可翻译为同策略。
12 |
13 | **off-policy:** 若**交互/采样策略**和**评估及改善的策略**是不同的策略,可翻译为异策略。
14 |
15 | 这种差异有两种解读方式:
16 |
17 | 1. 策略迭代的策略不是当前交互的策略(Q-learning与Sarsa)
18 | 2. 策略迭代时候使用的经验不是以当前策略进行交互的(DQN等具有 experience replay的算法,这个好理解就不解释了)
19 |
20 | 以off-policy的Q-learning与on-policy的Sarsa为例:
21 |
22 | Q-learning:
23 | $$
24 | \mathrm{Q}(\mathrm{s}, \mathrm{a}) \leftarrow \mathrm{Q}(\mathrm{s}, \mathrm{a})+\alpha\left(\mathrm{R}(\mathrm{s})+\gamma \max _{a}, \mathrm{Q}\left(\mathrm{s}^{\prime}, \mathrm{a}^{\prime}\right)-\mathrm{Q}(\mathrm{s}, \mathrm{a})\right)
25 | $$
26 | Sarsa:
27 | $$
28 | \mathrm{Q}(\mathrm{s}, \mathrm{a}) \leftarrow \mathrm{Q}(\mathrm{s}, \mathrm{a})+\alpha\left(\mathrm{R}(\mathrm{s})+\gamma \mathrm{Q}\left(\mathrm{s}^{\prime}, \mathrm{a}^{\prime}\right)-\mathrm{Q}(\mathrm{s}, \mathrm{a})\right)
29 | $$
30 | Q-learning 在 $T$ 步策略估计的时候,使用了具有最大 $Q$ 值的 $T+1$ 步action,是greedy的策略。然而,实际上下一步并不一定选择该action,因此是 off-policy。
31 |
32 | Sarsa 在 $T$ 步策略估计的时候,使用了按照当前第 $T$ 步策略应该走的 $T+1$ 步action,就是 $T$ 步策略本身,故为 on-policy。
33 |
34 | [其他多种描述可参见:[强化学习中on-policy 与off-policy有什么区别?](https://www.zhihu.com/question/57159315)]
35 |
36 | ---
37 |
38 | #### Model-based & Model-free
39 |
40 | **Model-based:** 先给model-based消个歧,强化学习中所说的model-based并不是已知环境模型,或者已知状态转移概率。而是要从经验中**学习到一个环境模型或者其他映射**,并利用这个 learned model 加速策略迭代的进程。这种方式更适合用于机器人等领域。
41 |
42 | > 详细的Model-based笔记见本专栏的Model-based RL系列,我还会继续更新的。
43 |
44 | **Model-free:** Model-free就是我们常听到的 DQN, DDPG, PPO 等SOTA算法。它和 model-based 的区别就在于是否利用经验做策略迭代之外的事。显然,所有 model-free 都可以转变为 model-based,model-based只是一个框架,任意的 model-free 算法都可以嵌套进去。
45 |
46 | **经验的其他用途:**除了用于策略迭代外,经验还可用于:
47 |
48 | - 拟合环境模型以及即时奖励模型:$R,S'\leftarrow Model(S,A)$,作为新的数据源补充算法的训练
49 |
50 | > Dyna, ME-TRPO, NAF
51 |
52 | - 拟合未来的值函数以及即时奖励:$R,V'\leftarrow Model(S,A)$,辅助决策
53 |
54 | > VPN, I2A
55 |
56 | - 拟合未来的 Q 值:$Q\leftarrow Model(S,A)$,用于增加 Q 值预估的质量,将其在环境模型中展开(rollout)
57 |
58 | > MVE, STEVE, MBPO
59 |
60 |
61 |
62 | **Reference**
63 |
64 | [1] [[Model-based]基于模型的强化学习论文合集](https://zhuanlan.zhihu.com/p/72642285)
65 |
66 | ---
67 |
68 | #### Rollout
69 |
70 | 这个词经常会出现在 model-based 算法中,我一般常译作'**展开**',或'**模型展开**',用于描述如何使用 learned model 加速training过程。
71 |
72 | **实际意义**: 在 current state 上,从每一个可能的action出发,根据给定的 policy 进行路径采样,最后根据多次采样的奖励和来对 current state 的每一个action的Q值进行估计。形象地描述就是,**站在路口(current state)先按照大脑中的map(learned model)想象一下接下来每条路(action)的后果(future reward)。**
73 |
74 | - MC 中,采样是为了逐步使信息更准确,进而更准确地改善策略。
75 | - Rollout 中,采样是采出每一步之后的一定信息,利用信息更新后,然后做出选择让这一步进入下一个状态(思想依然是主要关注当前状态)。
76 |
77 |
--------------------------------------------------------------------------------
/Preliminary/Preliminary of RL 5.md:
--------------------------------------------------------------------------------
1 | # Preliminary of RL Ⅴ: Challenges in RL
2 |
3 | - [Preliminary of RL Ⅴ: Challenges in RL](#preliminary-of-rl-ⅴ-challenges-in-rl)
4 | - [Model-free RL](#model-free-rl)
5 | - [Overestimation](#overestimation)
6 | - [Model-based RL](#model-based-rl)
7 | - [Model error](#model-error)
8 | - [MARL](#marl)
9 | - [OfflineRL](#offlinerl)
10 | - [Distributional shift](#distributional-shift)
11 |
12 | ## Model-free RL
13 |
14 | ### Overestimation
15 |
16 | ## Model-based RL
17 |
18 | ### Model error
19 |
20 | ## MARL
21 |
22 | ## OfflineRL
23 |
24 | ### Distributional shift
--------------------------------------------------------------------------------
/Preliminary/Reinforcement Learning Notes.assets/0_kt9_Z41qxgiI0CDl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Skylark0924/Reinforcement-Learning-in-Robotics/a1abcf4442a9d8869d7c7d0068399fb908559642/Preliminary/Reinforcement Learning Notes.assets/0_kt9_Z41qxgiI0CDl
--------------------------------------------------------------------------------
/Preliminary/Reinforcement Learning Notes.assets/0_kt9_Z41qxgiI0CDl-1575448739547:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Skylark0924/Reinforcement-Learning-in-Robotics/a1abcf4442a9d8869d7c7d0068399fb908559642/Preliminary/Reinforcement Learning Notes.assets/0_kt9_Z41qxgiI0CDl-1575448739547
--------------------------------------------------------------------------------
/Preliminary/Reinforcement Learning Notes.assets/0_kt9_Z41qxgiI0CDl-1575448742639:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Skylark0924/Reinforcement-Learning-in-Robotics/a1abcf4442a9d8869d7c7d0068399fb908559642/Preliminary/Reinforcement Learning Notes.assets/0_kt9_Z41qxgiI0CDl-1575448742639
--------------------------------------------------------------------------------
/Preliminary/Reinforcement Learning Notes.assets/0_kt9_Z41qxgiI0CDl-1575448756568:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Skylark0924/Reinforcement-Learning-in-Robotics/a1abcf4442a9d8869d7c7d0068399fb908559642/Preliminary/Reinforcement Learning Notes.assets/0_kt9_Z41qxgiI0CDl-1575448756568
--------------------------------------------------------------------------------
/Preliminary/Reinforcement Learning Notes.assets/0_oh-lF13hYWt2Bd6V_:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Skylark0924/Reinforcement-Learning-in-Robotics/a1abcf4442a9d8869d7c7d0068399fb908559642/Preliminary/Reinforcement Learning Notes.assets/0_oh-lF13hYWt2Bd6V_
--------------------------------------------------------------------------------
/Preliminary/Reinforcement Learning Notes.assets/1564474069516.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Skylark0924/Reinforcement-Learning-in-Robotics/a1abcf4442a9d8869d7c7d0068399fb908559642/Preliminary/Reinforcement Learning Notes.assets/1564474069516.png
--------------------------------------------------------------------------------
/Preliminary/Reinforcement Learning Notes.assets/1564549789614.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Skylark0924/Reinforcement-Learning-in-Robotics/a1abcf4442a9d8869d7c7d0068399fb908559642/Preliminary/Reinforcement Learning Notes.assets/1564549789614.png
--------------------------------------------------------------------------------
/Preliminary/Reinforcement Learning Notes.assets/76a319586cd215c8f2075b938fc6f6e07c81714b.svg:
--------------------------------------------------------------------------------
1 |
2 |
3 |
--------------------------------------------------------------------------------
/Preliminary/Reinforcement Learning Notes.assets/8795d42bd263dcbe55d123e7466b2dd5091490a7.svg:
--------------------------------------------------------------------------------
1 |
2 |
3 |
--------------------------------------------------------------------------------
/Preliminary/Reinforcement Learning Notes.assets/image-20191204161236516.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Skylark0924/Reinforcement-Learning-in-Robotics/a1abcf4442a9d8869d7c7d0068399fb908559642/Preliminary/Reinforcement Learning Notes.assets/image-20191204161236516.png
--------------------------------------------------------------------------------
/Preliminary/Reinforcement Learning Notes.assets/image-20191204164022284.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Skylark0924/Reinforcement-Learning-in-Robotics/a1abcf4442a9d8869d7c7d0068399fb908559642/Preliminary/Reinforcement Learning Notes.assets/image-20191204164022284.png
--------------------------------------------------------------------------------
/Preliminary/Reinforcement Learning Notes.assets/image-20191204200910603.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Skylark0924/Reinforcement-Learning-in-Robotics/a1abcf4442a9d8869d7c7d0068399fb908559642/Preliminary/Reinforcement Learning Notes.assets/image-20191204200910603.png
--------------------------------------------------------------------------------
/Preliminary/Reinforcement Learning Notes.assets/image-20191204201005583.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Skylark0924/Reinforcement-Learning-in-Robotics/a1abcf4442a9d8869d7c7d0068399fb908559642/Preliminary/Reinforcement Learning Notes.assets/image-20191204201005583.png
--------------------------------------------------------------------------------
/Preliminary/Reinforcement Learning Notes.assets/image-20191204204627884.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Skylark0924/Reinforcement-Learning-in-Robotics/a1abcf4442a9d8869d7c7d0068399fb908559642/Preliminary/Reinforcement Learning Notes.assets/image-20191204204627884.png
--------------------------------------------------------------------------------
/Preliminary/Reinforcement Learning Notes.assets/image-20191204205459823.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Skylark0924/Reinforcement-Learning-in-Robotics/a1abcf4442a9d8869d7c7d0068399fb908559642/Preliminary/Reinforcement Learning Notes.assets/image-20191204205459823.png
--------------------------------------------------------------------------------
/Preliminary/Reinforcement Learning Notes.assets/image-20191205092649257.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Skylark0924/Reinforcement-Learning-in-Robotics/a1abcf4442a9d8869d7c7d0068399fb908559642/Preliminary/Reinforcement Learning Notes.assets/image-20191205092649257.png
--------------------------------------------------------------------------------
/Preliminary/Reinforcement Learning Notes.assets/image-20191205102211427.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Skylark0924/Reinforcement-Learning-in-Robotics/a1abcf4442a9d8869d7c7d0068399fb908559642/Preliminary/Reinforcement Learning Notes.assets/image-20191205102211427.png
--------------------------------------------------------------------------------
/Preliminary/Reinforcement Learning Notes.assets/image-20191205103234091.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Skylark0924/Reinforcement-Learning-in-Robotics/a1abcf4442a9d8869d7c7d0068399fb908559642/Preliminary/Reinforcement Learning Notes.assets/image-20191205103234091.png
--------------------------------------------------------------------------------
/Preliminary/Reinforcement Learning Notes.assets/image-20191205103636621.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Skylark0924/Reinforcement-Learning-in-Robotics/a1abcf4442a9d8869d7c7d0068399fb908559642/Preliminary/Reinforcement Learning Notes.assets/image-20191205103636621.png
--------------------------------------------------------------------------------
/Preliminary/Reinforcement Learning Notes.assets/image-20191205103810941.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Skylark0924/Reinforcement-Learning-in-Robotics/a1abcf4442a9d8869d7c7d0068399fb908559642/Preliminary/Reinforcement Learning Notes.assets/image-20191205103810941.png
--------------------------------------------------------------------------------
/Preliminary/Reinforcement Learning Notes.assets/image-20191205104741531.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Skylark0924/Reinforcement-Learning-in-Robotics/a1abcf4442a9d8869d7c7d0068399fb908559642/Preliminary/Reinforcement Learning Notes.assets/image-20191205104741531.png
--------------------------------------------------------------------------------
/Preliminary/Reinforcement Learning Notes.assets/image-20191205105318993.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Skylark0924/Reinforcement-Learning-in-Robotics/a1abcf4442a9d8869d7c7d0068399fb908559642/Preliminary/Reinforcement Learning Notes.assets/image-20191205105318993.png
--------------------------------------------------------------------------------
/Preliminary/Reinforcement Learning Notes.assets/image-20191205110708645.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Skylark0924/Reinforcement-Learning-in-Robotics/a1abcf4442a9d8869d7c7d0068399fb908559642/Preliminary/Reinforcement Learning Notes.assets/image-20191205110708645.png
--------------------------------------------------------------------------------
/Preliminary/Reinforcement Learning Notes.assets/image-20191205111303995.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Skylark0924/Reinforcement-Learning-in-Robotics/a1abcf4442a9d8869d7c7d0068399fb908559642/Preliminary/Reinforcement Learning Notes.assets/image-20191205111303995.png
--------------------------------------------------------------------------------
/Preliminary/Reinforcement Learning Notes.assets/image-20191205121930328.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Skylark0924/Reinforcement-Learning-in-Robotics/a1abcf4442a9d8869d7c7d0068399fb908559642/Preliminary/Reinforcement Learning Notes.assets/image-20191205121930328.png
--------------------------------------------------------------------------------
/Preliminary/Reinforcement Learning Notes.assets/image-20191205190844049.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Skylark0924/Reinforcement-Learning-in-Robotics/a1abcf4442a9d8869d7c7d0068399fb908559642/Preliminary/Reinforcement Learning Notes.assets/image-20191205190844049.png
--------------------------------------------------------------------------------
/Preliminary/img/1558592857137.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Skylark0924/Reinforcement-Learning-in-Robotics/a1abcf4442a9d8869d7c7d0068399fb908559642/Preliminary/img/1558592857137.png
--------------------------------------------------------------------------------
/Preliminary/img/1558614556514.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Skylark0924/Reinforcement-Learning-in-Robotics/a1abcf4442a9d8869d7c7d0068399fb908559642/Preliminary/img/1558614556514.png
--------------------------------------------------------------------------------
/Preliminary/img/1560008119444.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Skylark0924/Reinforcement-Learning-in-Robotics/a1abcf4442a9d8869d7c7d0068399fb908559642/Preliminary/img/1560008119444.png
--------------------------------------------------------------------------------
/Preliminary/img/2019-04-10 19-14-32 的屏幕截图.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Skylark0924/Reinforcement-Learning-in-Robotics/a1abcf4442a9d8869d7c7d0068399fb908559642/Preliminary/img/2019-04-10 19-14-32 的屏幕截图.png
--------------------------------------------------------------------------------
/Preliminary/img/2019-04-10 19-17-30 的屏幕截图.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Skylark0924/Reinforcement-Learning-in-Robotics/a1abcf4442a9d8869d7c7d0068399fb908559642/Preliminary/img/2019-04-10 19-17-30 的屏幕截图.png
--------------------------------------------------------------------------------
/Preliminary/img/2019-04-10 21-00-18 的屏幕截图.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Skylark0924/Reinforcement-Learning-in-Robotics/a1abcf4442a9d8869d7c7d0068399fb908559642/Preliminary/img/2019-04-10 21-00-18 的屏幕截图.png
--------------------------------------------------------------------------------
/Preliminary/img/3-3-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Skylark0924/Reinforcement-Learning-in-Robotics/a1abcf4442a9d8869d7c7d0068399fb908559642/Preliminary/img/3-3-1.png
--------------------------------------------------------------------------------
/Preliminary/img/3-3-2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Skylark0924/Reinforcement-Learning-in-Robotics/a1abcf4442a9d8869d7c7d0068399fb908559642/Preliminary/img/3-3-2.png
--------------------------------------------------------------------------------
/Preliminary/img/4-1-1-1554948278323.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Skylark0924/Reinforcement-Learning-in-Robotics/a1abcf4442a9d8869d7c7d0068399fb908559642/Preliminary/img/4-1-1-1554948278323.jpg
--------------------------------------------------------------------------------
/Preliminary/img/4-1-1.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Skylark0924/Reinforcement-Learning-in-Robotics/a1abcf4442a9d8869d7c7d0068399fb908559642/Preliminary/img/4-1-1.jpg
--------------------------------------------------------------------------------
/Preliminary/img/4-5-4.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Skylark0924/Reinforcement-Learning-in-Robotics/a1abcf4442a9d8869d7c7d0068399fb908559642/Preliminary/img/4-5-4.png
--------------------------------------------------------------------------------
/Preliminary/img/4155986-e77eec1baba5aeea.webp:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Skylark0924/Reinforcement-Learning-in-Robotics/a1abcf4442a9d8869d7c7d0068399fb908559642/Preliminary/img/4155986-e77eec1baba5aeea.webp
--------------------------------------------------------------------------------
/Preliminary/img/5-1-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Skylark0924/Reinforcement-Learning-in-Robotics/a1abcf4442a9d8869d7c7d0068399fb908559642/Preliminary/img/5-1-1.png
--------------------------------------------------------------------------------
/Preliminary/img/DQN3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Skylark0924/Reinforcement-Learning-in-Robotics/a1abcf4442a9d8869d7c7d0068399fb908559642/Preliminary/img/DQN3.png
--------------------------------------------------------------------------------
/Preliminary/img/sl4.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Skylark0924/Reinforcement-Learning-in-Robotics/a1abcf4442a9d8869d7c7d0068399fb908559642/Preliminary/img/sl4.png
--------------------------------------------------------------------------------
/Preliminary/img/屏幕快照 2016-01-05 下午9.48.30.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Skylark0924/Reinforcement-Learning-in-Robotics/a1abcf4442a9d8869d7c7d0068399fb908559642/Preliminary/img/屏幕快照 2016-01-05 下午9.48.30.png
--------------------------------------------------------------------------------
/Probabilistic Robotics/PR GaussianProcessRegression.assets/1_9xMQMnSPnAFkWqIY2jvIpQ.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Skylark0924/Reinforcement-Learning-in-Robotics/a1abcf4442a9d8869d7c7d0068399fb908559642/Probabilistic Robotics/PR GaussianProcessRegression.assets/1_9xMQMnSPnAFkWqIY2jvIpQ.png
--------------------------------------------------------------------------------
/Probabilistic Robotics/PR GaussianProcessRegression.assets/1_IdGgdrY_n_9_YfkaCh-dag.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Skylark0924/Reinforcement-Learning-in-Robotics/a1abcf4442a9d8869d7c7d0068399fb908559642/Probabilistic Robotics/PR GaussianProcessRegression.assets/1_IdGgdrY_n_9_YfkaCh-dag.png
--------------------------------------------------------------------------------
/Probabilistic Robotics/PR GaussianProcessRegression.assets/1_YAPmNXea5gKoH3uyRrtITQ.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Skylark0924/Reinforcement-Learning-in-Robotics/a1abcf4442a9d8869d7c7d0068399fb908559642/Probabilistic Robotics/PR GaussianProcessRegression.assets/1_YAPmNXea5gKoH3uyRrtITQ.png
--------------------------------------------------------------------------------
/Probabilistic Robotics/PR GaussianProcessRegression.assets/1_zNQg-o-C2JELQFQjEEDrLw.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Skylark0924/Reinforcement-Learning-in-Robotics/a1abcf4442a9d8869d7c7d0068399fb908559642/Probabilistic Robotics/PR GaussianProcessRegression.assets/1_zNQg-o-C2JELQFQjEEDrLw.png
--------------------------------------------------------------------------------
/Probabilistic Robotics/PR GaussianProcessRegression.assets/Illustration-of-a-bivariate-Gaussian-distribution-The-marginal-and-joint-probability.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Skylark0924/Reinforcement-Learning-in-Robotics/a1abcf4442a9d8869d7c7d0068399fb908559642/Probabilistic Robotics/PR GaussianProcessRegression.assets/Illustration-of-a-bivariate-Gaussian-distribution-The-marginal-and-joint-probability.png
--------------------------------------------------------------------------------
/Probabilistic Robotics/PR GaussianProcessRegression.assets/Sun, 10 May 2020 143250.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Skylark0924/Reinforcement-Learning-in-Robotics/a1abcf4442a9d8869d7c7d0068399fb908559642/Probabilistic Robotics/PR GaussianProcessRegression.assets/Sun, 10 May 2020 143250.png
--------------------------------------------------------------------------------
/Probabilistic Robotics/PR GaussianProcessRegression.assets/image-20200510155719330.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Skylark0924/Reinforcement-Learning-in-Robotics/a1abcf4442a9d8869d7c7d0068399fb908559642/Probabilistic Robotics/PR GaussianProcessRegression.assets/image-20200510155719330.png
--------------------------------------------------------------------------------
/Probabilistic Robotics/PR GaussianProcessRegression.assets/v2-3c25a927c217f13a055794377635faaf_1440w.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Skylark0924/Reinforcement-Learning-in-Robotics/a1abcf4442a9d8869d7c7d0068399fb908559642/Probabilistic Robotics/PR GaussianProcessRegression.assets/v2-3c25a927c217f13a055794377635faaf_1440w.jpg
--------------------------------------------------------------------------------
/Probabilistic Robotics/PR GaussianProcessRegression.assets/微信图片编辑_20200510130051.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Skylark0924/Reinforcement-Learning-in-Robotics/a1abcf4442a9d8869d7c7d0068399fb908559642/Probabilistic Robotics/PR GaussianProcessRegression.assets/微信图片编辑_20200510130051.jpg
--------------------------------------------------------------------------------
/Probabilistic Robotics/PR HMC&MH&Gibbs.assets/2018-05-09-gibbs-100.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Skylark0924/Reinforcement-Learning-in-Robotics/a1abcf4442a9d8869d7c7d0068399fb908559642/Probabilistic Robotics/PR HMC&MH&Gibbs.assets/2018-05-09-gibbs-100.png
--------------------------------------------------------------------------------
/Probabilistic Robotics/PR HMC&MH&Gibbs.assets/gibbssampler-2dnormal1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Skylark0924/Reinforcement-Learning-in-Robotics/a1abcf4442a9d8869d7c7d0068399fb908559642/Probabilistic Robotics/PR HMC&MH&Gibbs.assets/gibbssampler-2dnormal1.png
--------------------------------------------------------------------------------
/Probabilistic Robotics/PR HMC&MH&Gibbs.assets/image__14_.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Skylark0924/Reinforcement-Learning-in-Robotics/a1abcf4442a9d8869d7c7d0068399fb908559642/Probabilistic Robotics/PR HMC&MH&Gibbs.assets/image__14_.png
--------------------------------------------------------------------------------
/Probabilistic Robotics/PR HMC&MH&Gibbs.assets/true.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Skylark0924/Reinforcement-Learning-in-Robotics/a1abcf4442a9d8869d7c7d0068399fb908559642/Probabilistic Robotics/PR HMC&MH&Gibbs.assets/true.jpg
--------------------------------------------------------------------------------
/Probabilistic Robotics/PR HMC&MH&Gibbs.assets/v2-08cb302ac37b757ee390705d822f87f2_1440w.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Skylark0924/Reinforcement-Learning-in-Robotics/a1abcf4442a9d8869d7c7d0068399fb908559642/Probabilistic Robotics/PR HMC&MH&Gibbs.assets/v2-08cb302ac37b757ee390705d822f87f2_1440w.jpg
--------------------------------------------------------------------------------
/Probabilistic Robotics/PR HMM.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Skylark0924/Reinforcement-Learning-in-Robotics/a1abcf4442a9d8869d7c7d0068399fb908559642/Probabilistic Robotics/PR HMM.md
--------------------------------------------------------------------------------
/Probabilistic Robotics/PR IS&MCMC.assets/1_3nBb4AqcriLcENdpBp4fpQ@2x.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Skylark0924/Reinforcement-Learning-in-Robotics/a1abcf4442a9d8869d7c7d0068399fb908559642/Probabilistic Robotics/PR IS&MCMC.assets/1_3nBb4AqcriLcENdpBp4fpQ@2x.png
--------------------------------------------------------------------------------
/Probabilistic Robotics/PR IS&MCMC.assets/1_AZBh2kDanLoTFmb3yzErGQ@2x.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Skylark0924/Reinforcement-Learning-in-Robotics/a1abcf4442a9d8869d7c7d0068399fb908559642/Probabilistic Robotics/PR IS&MCMC.assets/1_AZBh2kDanLoTFmb3yzErGQ@2x.png
--------------------------------------------------------------------------------
/Probabilistic Robotics/PR IS&MCMC.assets/1_HclnWfZrh7Nzuj2_aHkPCQ.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Skylark0924/Reinforcement-Learning-in-Robotics/a1abcf4442a9d8869d7c7d0068399fb908559642/Probabilistic Robotics/PR IS&MCMC.assets/1_HclnWfZrh7Nzuj2_aHkPCQ.png
--------------------------------------------------------------------------------
/Probabilistic Robotics/PR IS&MCMC.assets/1_hKQcryMc6fbcS7r-g0sriQ.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Skylark0924/Reinforcement-Learning-in-Robotics/a1abcf4442a9d8869d7c7d0068399fb908559642/Probabilistic Robotics/PR IS&MCMC.assets/1_hKQcryMc6fbcS7r-g0sriQ.png
--------------------------------------------------------------------------------
/Probabilistic Robotics/PR IS&MCMC.assets/20190511000705.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Skylark0924/Reinforcement-Learning-in-Robotics/a1abcf4442a9d8869d7c7d0068399fb908559642/Probabilistic Robotics/PR IS&MCMC.assets/20190511000705.png
--------------------------------------------------------------------------------
/Probabilistic Robotics/PR IS&MCMC.assets/5cdd91aec0102b09bad70aff4bd0e9b2.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Skylark0924/Reinforcement-Learning-in-Robotics/a1abcf4442a9d8869d7c7d0068399fb908559642/Probabilistic Robotics/PR IS&MCMC.assets/5cdd91aec0102b09bad70aff4bd0e9b2.jpg
--------------------------------------------------------------------------------
/Probabilistic Robotics/PR IS&MCMC.assets/importance_sampling_concept.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Skylark0924/Reinforcement-Learning-in-Robotics/a1abcf4442a9d8869d7c7d0068399fb908559642/Probabilistic Robotics/PR IS&MCMC.assets/importance_sampling_concept.png
--------------------------------------------------------------------------------
/Probabilistic Robotics/PR IS&MCMC.assets/v2-9514f7703820b5bf99c98405eb413359_1440w.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Skylark0924/Reinforcement-Learning-in-Robotics/a1abcf4442a9d8869d7c7d0068399fb908559642/Probabilistic Robotics/PR IS&MCMC.assets/v2-9514f7703820b5bf99c98405eb413359_1440w.jpg
--------------------------------------------------------------------------------
/Probabilistic Robotics/PR IS&MCMC.assets/v2-eb0945aa2185df958f4568e58300e77a_1440w.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Skylark0924/Reinforcement-Learning-in-Robotics/a1abcf4442a9d8869d7c7d0068399fb908559642/Probabilistic Robotics/PR IS&MCMC.assets/v2-eb0945aa2185df958f4568e58300e77a_1440w.gif
--------------------------------------------------------------------------------
/Probabilistic Robotics/PR Ⅱ Bayesian:
--------------------------------------------------------------------------------
1 | ## Probabilistic in Robotics Ⅱ: Bayesian Estimation/Inference
2 |
3 | **统计推断**旨在根据可观察到的事物来了解不可观察到的事物。即,统计推断是基于一个总体或一些样本中的某些观察变量(通常是影响)得出结论的过程,例如关于总体或样本中某些潜在变量(通常是原因)的准时估计、置信区间或区间估计等。
4 |
5 | ### 贝叶斯估计/推断
6 |
7 | 我们在上一章强调贝叶斯派与频率派世界观差异的时候,着重描述了贝叶斯派对于**参数是随机变量**的看法,然而最大后验概率估计MAP得到的值却是个定值 $\theta^*$,是参数这个变量概率分布中的一个特定点。这听起来没有很好的贯彻贝叶斯派的精神。
8 |
9 | **贝叶斯估计**是个更彻底的贝叶斯派,是MAP的进一步扩展。不再估计一个特定的参数 $\theta$,而是要估计它的**分布**。
10 |
11 | 在贝叶斯估计中,样本经验分布 $P(X)$ 不再被省略,因为这不再是专门对参数 $\theta$ 的估计。
12 |
13 | 离散型贝叶斯公式:
14 | $$
15 | P(\theta | X)=\frac{P(X | \theta) P(\theta)}{P(X)}
16 | $$
17 | 连续型贝叶斯公式:
18 | $$
19 | P(\theta | X)=\frac{P(X | \theta) P(\theta)}{\int_{\Theta} P(X | \theta) P(\theta) d \theta}
20 | $$
21 | 很明显,这并不是一个好处理的公式,尤其是连续型的分母(**归一化因子**) $P(X)=\int_{\Theta} P(X | \theta) P(\theta) d \theta$ (全概率公式展开,还记得吗🧐)
22 |
23 | ### 手算贝叶斯估计
24 |
25 | 我们先试着做做,还是之前的扔硬币,不过样本变了点,正六反四:
26 |
27 | 以下内容来自[贝叶斯估计、最大似然估计、最大后验概率估计](https://link.zhihu.com/?target=http%3A//noahsnail.com/2018/05/17/2018-05-17-%E8%B4%9D%E5%8F%B6%E6%96%AF%E4%BC%B0%E8%AE%A1%E3%80%81%E6%9C%80%E5%A4%A7%E4%BC%BC%E7%84%B6%E4%BC%B0%E8%AE%A1%E3%80%81%E6%9C%80%E5%A4%A7%E5%90%8E%E9%AA%8C%E6%A6%82%E7%8E%87%E4%BC%B0%E8%AE%A1/)
28 |
29 | 使用共轭先验分布,就可以更好的解决这个问题。二项分布参数的共轭先验是Beta分布,由于 θ 的似然函数服从二项分布,因此在贝叶斯估计中,假设 θ 的先验分布服从 $P(\theta) \sim \operatorname{Beta}(\alpha, \beta)$,Beta分布的概率密度公式为:
30 | $$
31 | f(x ; \alpha, \beta)=\frac{1}{B(\alpha, \beta)} x^{\alpha-1}(1-x)^{\beta-1}
32 | $$
33 | 因此,贝叶斯公式可写作:
34 | $$
35 | \begin{aligned}
36 | P(\theta | X) &=\frac{P(X | \theta) P(\theta)}{\int_{\Theta} P(X | \theta) P(\theta) d \theta} \\
37 | &=\frac{\theta^{6}(1-\theta)^{4} \frac{\theta^{2-1}(1-\theta)^{\beta-1}}{B(\alpha, \beta)}}{\int_{\Theta} \theta^{6}(1-\theta)^{4} \frac{\theta^{\alpha-1}(1-\theta)^{\beta-1}}{B(\alpha, \beta)} d \theta} \\
38 | &=\frac{\theta^{\alpha+6-1}(1-\theta)^{\beta+4-1}}{\int_{\Theta} \theta^{\alpha+6-1}(1-\theta)^{\beta+4-1} d \theta} \\
39 | &=\frac{\theta^{\alpha+6-1}(1-\theta)^{\beta+4-1}}{B(\alpha+6-1, \beta+4-1)} \\
40 | &=\operatorname{Beta}(\theta | \alpha+6-1, \beta+4-1) \\
41 | &=\operatorname{Beta}(\theta | \alpha+6, \beta+4)
42 | \end{aligned}
43 | $$
44 | 从上面的公式可以看出,$P(\theta | X) \sim \operatorname{Beta}(\theta | \alpha+6, \beta+4)$。其中 B 函数,也称Beta函数,是一个标准化常量,用来使整个概率的积分为1。$Beta(θ|α+6,β+4)$就是贝叶斯估计的结果。
45 |
46 | 如果使用贝叶斯估计得到的 θ 分布存在一个有限均值,则可以用后验分布的期望作为 θ 的估计值。假设$α=3,β=3$,在这种情况下,先验分布会在0.5处取得最大值,则$P(θ|X)∼Beta(θ|9,7)$,Beta(θ|9,7)的曲线如下图:
47 |
48 | 
49 |
50 | 从上图可以看出,在$α=3,β=3$的情况下,θ的估计值 $\hat{\theta}$ 应该在0.6附近。根据Beta分布的数学期望公式 $E(θ)=\dfrac{α}{α+β}$ ,我们可以和MAP一样求出一个参数的特值:
51 | $$
52 | \hat{\theta}=\int_{\Theta} \theta P(\theta | X) d \theta=E(\theta)=\frac{\alpha}{\alpha+\beta}=\frac{9}{9+7}=0.5625
53 | $$
54 | **求解步骤:**
55 |
56 | - 确定参数的似然函数
57 | - 确定参数的先验分布,应是后验分布的共轭先验
58 | - 确定参数的后验分布函数
59 | - 根据贝叶斯公式求解参数的后验分布
60 |
61 |
62 |
63 | 1.
--------------------------------------------------------------------------------
/Probabilistic Robotics/PR Ⅱ MCMC&EM.md:
--------------------------------------------------------------------------------
1 | ## Probabilistic in Robotics Ⅱ
2 |
3 | 在我的另一个专栏 [Machine Learning 格物志](https://zhuanlan.zhihu.com/c_1236984830903996416) 中,已经介绍了**朴素贝叶斯**方法用作**分类器**的格物志:
4 |
5 | https://zhuanlan.zhihu.com/p/136676940
6 |
7 | **朴素贝叶斯算法假设各个特征之间相互独立**
8 |
9 |
10 |
11 |
--------------------------------------------------------------------------------
/Probabilistic Robotics/PR Ⅲ Bayesian_MCMC.assets/1_3nBb4AqcriLcENdpBp4fpQ@2x.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Skylark0924/Reinforcement-Learning-in-Robotics/a1abcf4442a9d8869d7c7d0068399fb908559642/Probabilistic Robotics/PR Ⅲ Bayesian_MCMC.assets/1_3nBb4AqcriLcENdpBp4fpQ@2x.png
--------------------------------------------------------------------------------
/Probabilistic Robotics/PR Ⅲ Bayesian_MCMC.assets/1_AZBh2kDanLoTFmb3yzErGQ@2x.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Skylark0924/Reinforcement-Learning-in-Robotics/a1abcf4442a9d8869d7c7d0068399fb908559642/Probabilistic Robotics/PR Ⅲ Bayesian_MCMC.assets/1_AZBh2kDanLoTFmb3yzErGQ@2x.png
--------------------------------------------------------------------------------
/Probabilistic Robotics/PR Ⅲ Bayesian_MCMC.assets/Beta_9_7.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Skylark0924/Reinforcement-Learning-in-Robotics/a1abcf4442a9d8869d7c7d0068399fb908559642/Probabilistic Robotics/PR Ⅲ Bayesian_MCMC.assets/Beta_9_7.png
--------------------------------------------------------------------------------
/Probabilistic Robotics/PR Ⅲ Bayesian_MCMC.assets/Example-of-Bayesian-inference-with-a-prior-distribution-a-posterior-distribution-and.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Skylark0924/Reinforcement-Learning-in-Robotics/a1abcf4442a9d8869d7c7d0068399fb908559642/Probabilistic Robotics/PR Ⅲ Bayesian_MCMC.assets/Example-of-Bayesian-inference-with-a-prior-distribution-a-posterior-distribution-and.png
--------------------------------------------------------------------------------
/Probabilistic Robotics/PR Ⅳ BayesFilter.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Skylark0924/Reinforcement-Learning-in-Robotics/a1abcf4442a9d8869d7c7d0068399fb908559642/Probabilistic Robotics/PR Ⅳ BayesFilter.md
--------------------------------------------------------------------------------
/Probabilistic Robotics/PR Ⅳ BayesNeuralNetwork.assets/bayes_nn.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Skylark0924/Reinforcement-Learning-in-Robotics/a1abcf4442a9d8869d7c7d0068399fb908559642/Probabilistic Robotics/PR Ⅳ BayesNeuralNetwork.assets/bayes_nn.png
--------------------------------------------------------------------------------
/Probabilistic Robotics/PR Ⅳ BayesNeuralNetwork.assets/bayesian_statistics.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Skylark0924/Reinforcement-Learning-in-Robotics/a1abcf4442a9d8869d7c7d0068399fb908559642/Probabilistic Robotics/PR Ⅳ BayesNeuralNetwork.assets/bayesian_statistics.jpg
--------------------------------------------------------------------------------
/Probabilistic Robotics/PR Ⅳ BayesNeuralNetwork.assets/extrapolation_graph.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Skylark0924/Reinforcement-Learning-in-Robotics/a1abcf4442a9d8869d7c7d0068399fb908559642/Probabilistic Robotics/PR Ⅳ BayesNeuralNetwork.assets/extrapolation_graph.png
--------------------------------------------------------------------------------
/Probabilistic Robotics/PR Ⅴ GMM.assets/aHR0cDovL2ltZy5ibG9nLmNzZG4ubmV0LzIwMTcwMzAyMTc1NDQyMjcy.jfif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Skylark0924/Reinforcement-Learning-in-Robotics/a1abcf4442a9d8869d7c7d0068399fb908559642/Probabilistic Robotics/PR Ⅴ GMM.assets/aHR0cDovL2ltZy5ibG9nLmNzZG4ubmV0LzIwMTcwMzAyMTc1NDQyMjcy.jfif
--------------------------------------------------------------------------------
/Probabilistic Robotics/PR Ⅴ GMM.assets/aHR0cDovL2ltZy5ibG9nLmNzZG4ubmV0LzIwMTcwMzAyMTc1NTQ5ODc3.jfif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Skylark0924/Reinforcement-Learning-in-Robotics/a1abcf4442a9d8869d7c7d0068399fb908559642/Probabilistic Robotics/PR Ⅴ GMM.assets/aHR0cDovL2ltZy5ibG9nLmNzZG4ubmV0LzIwMTcwMzAyMTc1NTQ5ODc3.jfif
--------------------------------------------------------------------------------
/Probabilistic Robotics/PR Ⅴ GMM.md:
--------------------------------------------------------------------------------
1 | ## PR Ⅴ: 高斯混合模型 Gaussian Mixture Model (GMM)
2 |
3 | 在概率的神经网络 Bayesian Neural Network之后,我们还可以用概率的方法做**聚类**(无监督学习),这就是今天要介绍的 ———— 高斯混合模型 Gaussian Mixture Model,缩写为GMM。
4 |
5 | ### 一、高斯分布
6 |
7 | 高斯分布的基础已经在前面讲过了
8 |
9 | [](https://zhuanlan.zhihu.com/p/139478368)
10 |
11 | 从一元高斯分布到多元高斯分布都给出了图像以及概率密度公式。快速浏览一下再回来。
12 |
13 | ### 二、高斯混合模型
14 |
15 | 不论是一元还是多元高斯都是指单个高斯分布,只是特征变量数量的区别。而GMM是多个高斯分布的**混合(线性组合)**,用于解决同一集合下的数据包含多个不同的分布的情况。
16 |
17 | 
18 |
19 | 
20 |
21 | 概率密度函数:
22 | $$
23 | p(x)=\sum_{j=1}^{k} \alpha_{j} \phi\left(x | \mu_{j}, \Sigma_{j}\right)
24 | $$
25 |
26 | - k 是单高斯分布的个数;
27 | - $\alpha_j$是每个分布的权重,$\sum_{j=1}^k\alpha_j=1$;
28 | - $\phi\left(x | \mu_{j}, \Sigma_{j}\right)$就是参数为 $(\mu_{j}, \Sigma_{j})$ 的高斯分布的概率密度,如果是一元高斯,那么就写作:
29 |
30 | $$
31 | \phi(x|\mu_{j}, \Sigma_{j})=\frac{1}{\sigma \sqrt{2 \pi}} \exp \left(-\frac{(x-\mu_{j})^{2}}{2 \Sigma_{j}}\right)
32 | $$
33 |
34 | 高斯混合模型的参数估计就是对 $\theta=(\alpha_1,\dots, \alpha_k,; \mu_1,\dots, \mu_k;\Sigma)$ 的估计。如果用之前的极大似然估计试试,在样本集 $\equiv\left\{x^{(1)}, \cdots, x^{(m)}\right\}$下 ,对数似然如下:
35 | $$
36 | L(\alpha, \mu, \Sigma)=\sum_{i=1}^{m} \log p\left(x^{(i)} ; \alpha, \mu, \Sigma\right)=\sum_{i=1}^{m} \log \sum_{z^{(i)}=1}^{k} p\left(x^{(i)} | Z^{(i)} ; \mu, \Sigma\right) p\left(z^{(i)} ; \alpha\right)
37 | $$
38 | 很明显系数 $\alpha$ 我们无从得知,这个极大似然估计也没法进行下去。
39 |
40 | ### 三、隐变量
41 |
42 | **极大似然估计MLE、极大后验概率估计MAP和贝叶斯推断BI**能够直接应用都有个前提:概率模型的变量均为**观测变量**。这样就可以直接将给定数据带入来估计参数。
43 |
44 | 然而,有的时候变量中还包括**隐变量(hidden/latent variable)**,上述方法就不能直接使用了。
45 |
46 | **隐变量解释如下:**
47 |
48 | 相对于观测变量,隐变量指不可观测的随机变量,或理论上可行但实际并没有给出具体数据的变量(数据不完整),亦或是抽象的变量,例如心理状态、行为等。
49 |
50 | >
51 |
52 | ### 四、EM 算法
53 |
54 | **Expectation Maximization algorithm 期望极大算法** 用于对**含有隐变量**的概率模型参数进行极大似然估计MLE/极大后验概率估计MAP。
55 |
56 | 每次迭代分两步:
57 |
58 | - E步:求期望
59 | - M步:求极大
--------------------------------------------------------------------------------
/Probabilistic Robotics/PR Ⅵ BayesGraph.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Skylark0924/Reinforcement-Learning-in-Robotics/a1abcf4442a9d8869d7c7d0068399fb908559642/Probabilistic Robotics/PR Ⅵ BayesGraph.md
--------------------------------------------------------------------------------
/Probabilistic Robotics/PR Ⅶ VariationalInference.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Skylark0924/Reinforcement-Learning-in-Robotics/a1abcf4442a9d8869d7c7d0068399fb908559642/Probabilistic Robotics/PR Ⅶ VariationalInference.md
--------------------------------------------------------------------------------
/Probabilistic Robotics/PR Ⅷ MeanField.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Skylark0924/Reinforcement-Learning-in-Robotics/a1abcf4442a9d8869d7c7d0068399fb908559642/Probabilistic Robotics/PR Ⅷ MeanField.md
--------------------------------------------------------------------------------
/Probabilistic Robotics/PR Ⅸ Entropy.assets/OIP.jfif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Skylark0924/Reinforcement-Learning-in-Robotics/a1abcf4442a9d8869d7c7d0068399fb908559642/Probabilistic Robotics/PR Ⅸ Entropy.assets/OIP.jfif
--------------------------------------------------------------------------------
/Probabilistic Robotics/PR Ⅸ Entropy.assets/image-20200519155921660.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Skylark0924/Reinforcement-Learning-in-Robotics/a1abcf4442a9d8869d7c7d0068399fb908559642/Probabilistic Robotics/PR Ⅸ Entropy.assets/image-20200519155921660.png
--------------------------------------------------------------------------------
/Probabilistic Robotics/PR Ⅸ Entropy.assets/image-20200523201708073.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Skylark0924/Reinforcement-Learning-in-Robotics/a1abcf4442a9d8869d7c7d0068399fb908559642/Probabilistic Robotics/PR Ⅸ Entropy.assets/image-20200523201708073.png
--------------------------------------------------------------------------------
/Probabilistic Robotics/PR Ⅸ Entropy.assets/multicolored-abstract-painting-1095624-710x210.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Skylark0924/Reinforcement-Learning-in-Robotics/a1abcf4442a9d8869d7c7d0068399fb908559642/Probabilistic Robotics/PR Ⅸ Entropy.assets/multicolored-abstract-painting-1095624-710x210.jpg
--------------------------------------------------------------------------------
/Probabilistic Robotics/Probabilistic in Robotic (PR).png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Skylark0924/Reinforcement-Learning-in-Robotics/a1abcf4442a9d8869d7c7d0068399fb908559642/Probabilistic Robotics/Probabilistic in Robotic (PR).png
--------------------------------------------------------------------------------
/Probabilistic Robotics/Probabilistic in Robotic (PR).xmind:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Skylark0924/Reinforcement-Learning-in-Robotics/a1abcf4442a9d8869d7c7d0068399fb908559642/Probabilistic Robotics/Probabilistic in Robotic (PR).xmind
--------------------------------------------------------------------------------
/RL from Demonstration/Deep_Q_From_Demonstration.assets/image-20200522121546661.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Skylark0924/Reinforcement-Learning-in-Robotics/a1abcf4442a9d8869d7c7d0068399fb908559642/RL from Demonstration/Deep_Q_From_Demonstration.assets/image-20200522121546661.png
--------------------------------------------------------------------------------
/RL from Demonstration/Deep_Q_From_Demonstration.assets/image-20200522121626682.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Skylark0924/Reinforcement-Learning-in-Robotics/a1abcf4442a9d8869d7c7d0068399fb908559642/RL from Demonstration/Deep_Q_From_Demonstration.assets/image-20200522121626682.png
--------------------------------------------------------------------------------
/RL from Demonstration/Deep_Q_From_Demonstration.assets/image-20200522122022338.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Skylark0924/Reinforcement-Learning-in-Robotics/a1abcf4442a9d8869d7c7d0068399fb908559642/RL from Demonstration/Deep_Q_From_Demonstration.assets/image-20200522122022338.png
--------------------------------------------------------------------------------
/RL from Demonstration/RLfrom_Imperfect_Demonstration.assets/image-20200524205516760.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Skylark0924/Reinforcement-Learning-in-Robotics/a1abcf4442a9d8869d7c7d0068399fb908559642/RL from Demonstration/RLfrom_Imperfect_Demonstration.assets/image-20200524205516760.png
--------------------------------------------------------------------------------
/RL from Demonstration/RLfrom_Imperfect_Demonstration.assets/image-20200524213207117.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Skylark0924/Reinforcement-Learning-in-Robotics/a1abcf4442a9d8869d7c7d0068399fb908559642/RL from Demonstration/RLfrom_Imperfect_Demonstration.assets/image-20200524213207117.png
--------------------------------------------------------------------------------
/RL from Demonstration/RLfrom_Imperfect_Demonstration.assets/image-20200524215212011.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Skylark0924/Reinforcement-Learning-in-Robotics/a1abcf4442a9d8869d7c7d0068399fb908559642/RL from Demonstration/RLfrom_Imperfect_Demonstration.assets/image-20200524215212011.png
--------------------------------------------------------------------------------
/RL from Demonstration/RLfrom_Imperfect_Demonstration.assets/image-20200524215647493.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Skylark0924/Reinforcement-Learning-in-Robotics/a1abcf4442a9d8869d7c7d0068399fb908559642/RL from Demonstration/RLfrom_Imperfect_Demonstration.assets/image-20200524215647493.png
--------------------------------------------------------------------------------
/RL from Demonstration/RLfrom_Imperfect_Demonstration.assets/image-20200524221043525.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Skylark0924/Reinforcement-Learning-in-Robotics/a1abcf4442a9d8869d7c7d0068399fb908559642/RL from Demonstration/RLfrom_Imperfect_Demonstration.assets/image-20200524221043525.png
--------------------------------------------------------------------------------
/RL from Demonstration/RLfrom_Imperfect_Demonstration.assets/image-20200524221144933.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Skylark0924/Reinforcement-Learning-in-Robotics/a1abcf4442a9d8869d7c7d0068399fb908559642/RL from Demonstration/RLfrom_Imperfect_Demonstration.assets/image-20200524221144933.png
--------------------------------------------------------------------------------
/RL from Demonstration/RLfrom_Imperfect_Demonstration.assets/image-20200524221938482.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Skylark0924/Reinforcement-Learning-in-Robotics/a1abcf4442a9d8869d7c7d0068399fb908559642/RL from Demonstration/RLfrom_Imperfect_Demonstration.assets/image-20200524221938482.png
--------------------------------------------------------------------------------
/RL from Demonstration/RLfrom_Imperfect_Demonstration.assets/image-20200524222429294.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Skylark0924/Reinforcement-Learning-in-Robotics/a1abcf4442a9d8869d7c7d0068399fb908559642/RL from Demonstration/RLfrom_Imperfect_Demonstration.assets/image-20200524222429294.png
--------------------------------------------------------------------------------
/ROS/ROS Ⅰ:An Introduction.md:
--------------------------------------------------------------------------------
1 | # ROS Ⅰ:An Introduction
2 |
3 | [[TOC]]
4 | # 总体结构
5 | **计算图级、文件系统级、社区级**
6 | 
7 |
8 |
9 |
10 | ## 计算图级
11 | 类似于tensorflow,计算图是ROS处理数据的一种点对点的网络形式。程序运行时,所有进程以及他们所进行的数据处理,将会通过一种点对点的网络形式表现出来。这一级主要包括几个重要概念:**节点(node)、消息(message)、主题(topic)、服务(service)**。
12 | 
13 | ### **节点**
14 | 节点就是一些执行运算任务的进程。ROS利用规模可增长的方式是代码模块化:一个系统就是典型的由很多节点组成的。在这里,节点也可以被称之为“软件模块”。我们使用“节点”使得基于ROS的系统在运行的时候更加形象化:当许多节点同时运行时,可以很方便的将端对端的通讯绘制成一个图表,在这个图表中,进程就是图中的节点,而端对端的连接关系就是其中弧线连接。
15 | ### **消息**
16 | 节点之间是通过传送消息进行通讯的。每一个消息都是一个严格的数据结构。原来标准的数据类型(整型,浮点型,布尔型等等)都是支持的,同时也支持原始数组类型。消息可以包含任意的嵌套结构和数组(很类似于C语言的结构structs)。
17 | ### **主题**
18 | \
19 | 消息以一种**发布/订阅**的方式传递。一个节点可以在一个给定的主题中发布消息。一个节点针对某个主题关注与订阅特定类型的数据。可能同时有多个节点发布或者订阅同一个主题的消息。总体上,发布者和订阅者不了解彼此的存在。
20 | ### **服务**
21 | 虽然基于话题的发布/订阅模型是很灵活的通讯模式,但是它广播式的路径规划对于可以简化节点设计的**同步传输模式**并不适合。在ROS中,我们称之为一个服务,用一个字符串和一对严格规范的消息定义:一个用于请求,一个用于回应。这类似于web服务器,web服务器是由URIs定义的,同时带有完整定义类型的请求和回复文档。需要注意的是,不像话题,只有一个节点可以以任意独有的名字广播一个服务:只有一个服务可以称之为“分类象征”,比如说,任意一个给出的URI地址只能有一个web服务器。
22 | 在上面概念的基础上,需要有一个控制器可以使所有节点有条不紊的执行,这就是一个ROS的控制器(ROS Master)。
23 | ROS Master 通过RPC(Remote Procedure Call Protocol,远程过程调用)提供了登记列表和对其他计算图表的查找。没有控制器,节点将无法找到其他节点,交换消息或调用服务。
24 | 比如控制节点订阅和发布消息的模型如下:
25 | 
26 |
27 | ROS的控制器给ROS的节点存储了主题和服务的注册信息。节点与控制器通信从而报告它们的注册信息。当这些节点与控制器通信的时候,它们可以接收关于其他以注册及节点的信息并且建立与其它以注册节点之间的联系。当这些注册信息改变时控制器也会回馈这些节点,同时允许节点动态创建与新节点之间的连接。
28 | 节点与节点之间的连接是直接的,控制器仅仅提供了查询信息,就像一个DNS服务器。节点订阅一个主题将会要求建立一个与出版该主题的节点的连接,并且将会在同意连接协议的基础上建立该连接。\
29 | 另:ROS控制器控制服务:\
30 | 
31 |
32 | ## 文件系统级
33 | ROS文件系统级指的是在硬盘上面查看的ROS源代码的组织形式。ROS中有无数的节点、消息、服务、工具和库文件,需要有效的结构去管理这些代码。在ROS的文件系统级,有以下几个重要概念:**包(package)、堆(stack)**
34 | ### 包(package)
35 | \
36 | ROS的软件以包的方式组织起来。包包含节点、ROS依赖库、数据套、配置文件、第三方软件、或者任何其他逻辑构成。包的目标是提供一种易于使用的结构以便于软件的重复使用。总得来说,ROS的包短小精干。
37 | ### 堆(stack)
38 | \
39 | 堆是包的集合,它提供一个完整的功能,像“navigation stack”。Stack与版本号关联,同时也是如何发行ROS软件方式的关键。
40 |
41 | ### 文件结构gailan
42 | ROS是一种分布式处理框架。这使可执行文件能被单独设计,并且在运行时松散耦合。这些过程可以封装到包(Packages)和堆(Stacks)中,以便于共享和分发。下图是在包和堆在文件中的具体结构:\
43 | 
44 |
45 | **Manifests** (**manifest.xml**):提供关于Package元数据,包括它的许可信息和Package之间依赖关系,以及语言特性信息像编译旗帜(编译优化参数)。\
46 | **Stack manifests**(**stack.xml**):提供关于Stack元数据,包括它的许可信息和Stack之间依赖关系。
47 |
48 | ## 社区级
49 | ROS的社区级概念是ROS网络上进行代码发布的一种表现形式
50 | \
51 | 代码库的联合系统。使得协作亦能被分发。这种从文件系统级别到社区一级的设计让独立地发展和实施工作成为可能。正是因为这种分布式的结构,似的ROS迅速发展,软件仓库中包的数量指数级增加。
--------------------------------------------------------------------------------
/ROS/ROS Ⅲ:ROS 话题.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Skylark0924/Reinforcement-Learning-in-Robotics/a1abcf4442a9d8869d7c7d0068399fb908559642/ROS/ROS Ⅲ:ROS 话题.md
--------------------------------------------------------------------------------
/ROS/ROS Ⅳ:ROS 消息&服务.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Skylark0924/Reinforcement-Learning-in-Robotics/a1abcf4442a9d8869d7c7d0068399fb908559642/ROS/ROS Ⅳ:ROS 消息&服务.md
--------------------------------------------------------------------------------
/Reasoning/PR Reasoning Ⅴ:命题推理与First Order Logic Reasoning.md:
--------------------------------------------------------------------------------
1 | 
2 | # PR Reasoning Ⅴ:命题推理与一阶逻辑推理 First Order Logic Reasoning
--------------------------------------------------------------------------------
/Reasoning/PR Reasoning Ⅵ:Counterfactual Reasoning 反事实推理及其在深度学习中的应用.md:
--------------------------------------------------------------------------------
1 | 
2 | # PR Reasoning Ⅵ:Counterfactual Reasoning 反事实推理及其在深度学习中的应用
3 |
4 |
--------------------------------------------------------------------------------
/Reasoning/PR Reasoning Ⅶ:Graph Reasoning 基于图的推理.md:
--------------------------------------------------------------------------------
1 | # 【重磅综述】Graph Reasoning 如何使用图结构进行推理
2 |
3 | # PR Reasoning Ⅶ:Graph Reasoning 基于图的推理
4 |
5 | [[TOC]]
6 |
7 | ##
--------------------------------------------------------------------------------
/Related Works/A_survey_on_PS_for_Learning_Robot_controllers_in_a_Handful_of_trials.assets/image-20200528152451409.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Skylark0924/Reinforcement-Learning-in-Robotics/a1abcf4442a9d8869d7c7d0068399fb908559642/Related Works/A_survey_on_PS_for_Learning_Robot_controllers_in_a_Handful_of_trials.assets/image-20200528152451409.png
--------------------------------------------------------------------------------
/Related Works/A_survey_on_PS_for_Learning_Robot_controllers_in_a_Handful_of_trials.assets/image-20200528201113694.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Skylark0924/Reinforcement-Learning-in-Robotics/a1abcf4442a9d8869d7c7d0068399fb908559642/Related Works/A_survey_on_PS_for_Learning_Robot_controllers_in_a_Handful_of_trials.assets/image-20200528201113694.png
--------------------------------------------------------------------------------
/Related Works/A_survey_on_PS_for_Learning_Robot_controllers_in_a_Handful_of_trials.assets/image-20200528213920267.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Skylark0924/Reinforcement-Learning-in-Robotics/a1abcf4442a9d8869d7c7d0068399fb908559642/Related Works/A_survey_on_PS_for_Learning_Robot_controllers_in_a_Handful_of_trials.assets/image-20200528213920267.png
--------------------------------------------------------------------------------
/Related Works/A_survey_on_PS_for_Learning_Robot_controllers_in_a_Handful_of_trials.assets/image-20200529151635542.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Skylark0924/Reinforcement-Learning-in-Robotics/a1abcf4442a9d8869d7c7d0068399fb908559642/Related Works/A_survey_on_PS_for_Learning_Robot_controllers_in_a_Handful_of_trials.assets/image-20200529151635542.png
--------------------------------------------------------------------------------
/Related Works/A_survey_on_PS_for_Learning_Robot_controllers_in_a_Handful_of_trials.assets/image-20200529151707215.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Skylark0924/Reinforcement-Learning-in-Robotics/a1abcf4442a9d8869d7c7d0068399fb908559642/Related Works/A_survey_on_PS_for_Learning_Robot_controllers_in_a_Handful_of_trials.assets/image-20200529151707215.png
--------------------------------------------------------------------------------
/Related Works/A_survey_on_PS_for_Learning_Robot_controllers_in_a_Handful_of_trials.assets/image-20200529154918336.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Skylark0924/Reinforcement-Learning-in-Robotics/a1abcf4442a9d8869d7c7d0068399fb908559642/Related Works/A_survey_on_PS_for_Learning_Robot_controllers_in_a_Handful_of_trials.assets/image-20200529154918336.png
--------------------------------------------------------------------------------
/Related Works/Deep Reinforcement Learning for Robotic Manipulation with Asynchronous Off-Policy Updates.md:
--------------------------------------------------------------------------------
1 | # Deep Reinforcement Learning for Robotic Manipulation with Asynchronous Off-Policy Updates
2 |
3 | Shixiang Gu∗,1,2,3 and Ethan Holly∗,1 and Timothy Lillicrap 4 and Sergey Levine 1,5
4 |
5 | ∗equal contribution, 1 Google Brain, 2 University of Cambridge, 3MPI T¨ubingen, 4 Google DeepMind, 5 UC Berkeley
6 |
7 | Paper: https://arxiv.org/pdf/1610.00633.pdf
--------------------------------------------------------------------------------
/Related Works/End-to-End Robotic Reinforcement Learning without Reward Engineering.assets/image-20191208214135640.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Skylark0924/Reinforcement-Learning-in-Robotics/a1abcf4442a9d8869d7c7d0068399fb908559642/Related Works/End-to-End Robotic Reinforcement Learning without Reward Engineering.assets/image-20191208214135640.png
--------------------------------------------------------------------------------
/Related Works/End-to-End Robotic Reinforcement Learning without Reward Engineering.assets/image-20191209102302915.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Skylark0924/Reinforcement-Learning-in-Robotics/a1abcf4442a9d8869d7c7d0068399fb908559642/Related Works/End-to-End Robotic Reinforcement Learning without Reward Engineering.assets/image-20191209102302915.png
--------------------------------------------------------------------------------
/Related Works/End-to-End Robotic Reinforcement Learning without Reward Engineering.assets/image-20191209104206015.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Skylark0924/Reinforcement-Learning-in-Robotics/a1abcf4442a9d8869d7c7d0068399fb908559642/Related Works/End-to-End Robotic Reinforcement Learning without Reward Engineering.assets/image-20191209104206015.png
--------------------------------------------------------------------------------
/Related Works/IROS2019速读(三).assets/image-20191221113203207.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Skylark0924/Reinforcement-Learning-in-Robotics/a1abcf4442a9d8869d7c7d0068399fb908559642/Related Works/IROS2019速读(三).assets/image-20191221113203207.png
--------------------------------------------------------------------------------
/Related Works/IROS2019速读(三).assets/image-20191221114139768.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Skylark0924/Reinforcement-Learning-in-Robotics/a1abcf4442a9d8869d7c7d0068399fb908559642/Related Works/IROS2019速读(三).assets/image-20191221114139768.png
--------------------------------------------------------------------------------
/Related Works/IROS2019速读(三).assets/image-20191221114316092.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Skylark0924/Reinforcement-Learning-in-Robotics/a1abcf4442a9d8869d7c7d0068399fb908559642/Related Works/IROS2019速读(三).assets/image-20191221114316092.png
--------------------------------------------------------------------------------
/Related Works/IROS2019速读(三).assets/image-20191221200948469.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Skylark0924/Reinforcement-Learning-in-Robotics/a1abcf4442a9d8869d7c7d0068399fb908559642/Related Works/IROS2019速读(三).assets/image-20191221200948469.png
--------------------------------------------------------------------------------
/Related Works/IROS2019速读(三).assets/image-20191221234256181.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Skylark0924/Reinforcement-Learning-in-Robotics/a1abcf4442a9d8869d7c7d0068399fb908559642/Related Works/IROS2019速读(三).assets/image-20191221234256181.png
--------------------------------------------------------------------------------
/Related Works/IROS2019速读(二).assets/image-20191219185107956.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Skylark0924/Reinforcement-Learning-in-Robotics/a1abcf4442a9d8869d7c7d0068399fb908559642/Related Works/IROS2019速读(二).assets/image-20191219185107956.png
--------------------------------------------------------------------------------
/Related Works/IROS2019速读(二).md:
--------------------------------------------------------------------------------
1 | # IROS2019速读(二)
2 |
3 | ## Learning Physics-Based Manipulation in Clutter: Combining Image-Based Generalization and Look-Ahead Planning
4 |
5 | Paper: http://arxiv.org/abs/1904.02223
6 |
7 | Video: https://youtu.be/EmkUQfyvwkY
8 |
9 | ### Keywords
10 |
11 | end-to-end learning, planning-based look-ahead, sequential decision, generalization, motion planning
12 |
13 | ### Main Idea
14 |
15 | Base Model: Domain Randomization + State Representation + (CNN+DNN) + $\epsilon$-RHP(Receding Horizon Planning) + Mask RCNN
16 |
17 | .assets\image-20191219185107956.png)
18 |
19 | 简述一下本文的流程:
20 |
21 | 1. 将图像数据使用Mask RCNN进行实例分割(instance segmentation)
22 | 2. 使用域随机化(domain randomization)用以提升Value Function的泛化能力,使其不受到以下因素的影响:the shape and scale of the objects, the clutter density, the target location, and the initial pose of the objects and end-effector.
23 | 3. 在机器人的大脑中创建一个想象空间(simulator by Box2D)
24 | 4. 由CNN+DNN构造的Value Function网络,采用double Q-learning学习
25 | 5. 利用RHP探索策略在simulator中进行rollout
26 | 6. 将概率最大的动作输出并在real-world robot上执行
27 | 7. 返回1
28 |
29 | ### Opinion
30 |
31 | Awesome work! 通过实时的simulator避免了以往先sim后real的问题,giving robotics the ability of thinking and planning in their mind! 通过语义分割以及状态表征,使机器人能够利用物体的形状来降低抓取难度。
32 |
33 |
34 |
35 | ## Motion Planning for a Continuum Robotic Mobile Lamp: Defining and Navigating the Configuration Space
36 |
37 | ### Keywords
38 |
39 | continuum robots, [RRT](https://www.cnblogs.com/21207-iHome/p/7210543.html)(rapidly exploring random tree)
40 |
41 | ### Main Idea
42 |
43 | Main Contribution:
44 |
45 | 1. 连续体机器人的位形空间(configuration space)分析(single section) :milky_way:
46 | 2. 连续体与刚体机器人的位形空间比较
47 | 3. 连续体机器人的几何约束
48 |
49 | ### Opinion
50 |
51 | 扩展了连续体机器人的运动规划理论
52 |
53 |
54 |
55 | ## Soft Action Particle Deep Reinforcement Learning for a Continuous Action Space
56 |
57 | Code: https://github.com/rllab-snu/soft_action_particle_method
58 |
59 | Video: https://www.youtube.com/watch?v=4PdSHtH_cB4
60 |
61 | ### Keywords
62 |
63 | on-policy, particle
64 |
65 | ### Main Idea
66 |
67 | 现今的AC算法需要大量的参数,导致过拟合且不易于调参。本文提出了一种新的AC架构,将传统AC中Actor网络用一组action particles代替,策略概率由state action value network表示。
68 |
69 | 1. 将动作空间分段成 $N_A$ 个高斯分布: $M=\left\{\mu_{i}\right\}_{i=1}^{N_{A}}, S=\left\{\sigma_{i}\right\}_{i=1}^{N_{A}}$
70 |
71 | 2. 在训练过程中,agent总是sample新的action particle set $A$:
72 |
73 | $a_{i}=\mu_{i}+\epsilon_{i} \sigma_{i}, \quad \epsilon_{i} \sim \mathcal{N}(0,1)$
74 |
75 | 3. Policy distribution 定义如下,
76 |
77 | $$
78 | \pi_{\theta}\left(a_{i} | s\right) \triangleq \frac{\mathbb{E}_{\epsilon_{i}}\left[\exp \left(Q_{\theta}\left(s, a_{i}\right) / \alpha\right)\right]}{\sum_{m_{j} \in M, \sigma_{j} \in S} \mathbb{E}_{\epsilon_{j}}\left[\exp \left(Q_{\theta}\left(s, a_{j}\right) / \alpha\right)\right]}
79 | $$
80 |
81 | 4. 使用 Soft Q-learning 更新 Q function
82 | 5. 为了防止最优动作不在 $M$ 中,action particle也应该朝着最大动作值的方向移动
83 | 6. 为了防止action particle 在更新后聚集在一起,通过计算两两之间的欧氏距离,将距离小于阈值的其中一个删除,然后进行重新采样(Resampling)
84 |
85 | ### Opinion
86 |
87 | 这算是进化算法和RL的结合,粒子群算法和SAC的结合,大大减少了参数量,在保证了精度的情况下,不失为一种好办法。
--------------------------------------------------------------------------------
/Related Works/IROS2019速读(五).md:
--------------------------------------------------------------------------------
1 | # IROS2019 Paper速读(五)
2 |
3 | ## Learning Real-time Closed Loop Robotic Reaching from Monocular Vision by Exploiting A Control Lyapunov Function Structure
4 |
5 | ### Keywords
6 |
7 | Lyapunov function, multi-goal,
8 |
9 | ### Main Idea
10 |
11 |
12 |
13 | ### Opinion
14 |
15 |
16 |
17 |
18 |
19 | ## Learning Virtual Grasp with Failed Demonstrations via Bayesian Inverse Reinforcement Learning
20 |
21 | ### Keywords
22 |
23 | Failed demonstration, Bayesian Inverse Reinforcement Learning(BIRL)
24 |
25 | ### Main Idea
26 |
27 |
28 |
29 | ### Opinion
30 |
31 |
32 |
33 | ## TendencyRL: Multi-stage Discriminative Hints for Efficient Goal-Oriented Reverse Curriculum Learning
34 |
35 | ### Keywords
36 |
37 |
38 |
39 | ### Main Idea
40 |
41 |
42 |
43 | ### Opinion
44 |
45 |
--------------------------------------------------------------------------------
/Related Works/IROS2019速读(四).assets/dream_2.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Skylark0924/Reinforcement-Learning-in-Robotics/a1abcf4442a9d8869d7c7d0068399fb908559642/Related Works/IROS2019速读(四).assets/dream_2.gif
--------------------------------------------------------------------------------
/Related Works/IROS2019速读(四).assets/dream_6.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Skylark0924/Reinforcement-Learning-in-Robotics/a1abcf4442a9d8869d7c7d0068399fb908559642/Related Works/IROS2019速读(四).assets/dream_6.gif
--------------------------------------------------------------------------------
/Related Works/IROS2019速读(四).assets/image-20191222112555208.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Skylark0924/Reinforcement-Learning-in-Robotics/a1abcf4442a9d8869d7c7d0068399fb908559642/Related Works/IROS2019速读(四).assets/image-20191222112555208.png
--------------------------------------------------------------------------------
/Related Works/IROS2019速读(四).assets/image-20191222123748477.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Skylark0924/Reinforcement-Learning-in-Robotics/a1abcf4442a9d8869d7c7d0068399fb908559642/Related Works/IROS2019速读(四).assets/image-20191222123748477.png
--------------------------------------------------------------------------------
/Related Works/IROS2019速读(四).assets/image-20191222144804275.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Skylark0924/Reinforcement-Learning-in-Robotics/a1abcf4442a9d8869d7c7d0068399fb908559642/Related Works/IROS2019速读(四).assets/image-20191222144804275.png
--------------------------------------------------------------------------------
/Related Works/IROS2019速读.assets/image-20191216203301798.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Skylark0924/Reinforcement-Learning-in-Robotics/a1abcf4442a9d8869d7c7d0068399fb908559642/Related Works/IROS2019速读.assets/image-20191216203301798.png
--------------------------------------------------------------------------------
/Related Works/IROS2019速读.assets/image-20191216211111079.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Skylark0924/Reinforcement-Learning-in-Robotics/a1abcf4442a9d8869d7c7d0068399fb908559642/Related Works/IROS2019速读.assets/image-20191216211111079.png
--------------------------------------------------------------------------------
/Related Works/IROS2019速读.assets/image-20191216212441606.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Skylark0924/Reinforcement-Learning-in-Robotics/a1abcf4442a9d8869d7c7d0068399fb908559642/Related Works/IROS2019速读.assets/image-20191216212441606.png
--------------------------------------------------------------------------------
/Related Works/IROS2019速读.assets/image-20191217203423494.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Skylark0924/Reinforcement-Learning-in-Robotics/a1abcf4442a9d8869d7c7d0068399fb908559642/Related Works/IROS2019速读.assets/image-20191217203423494.png
--------------------------------------------------------------------------------
/Related Works/Meta learning An Introduction.assets/1_AcaPiikZErVv_iFJzWekQg.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Skylark0924/Reinforcement-Learning-in-Robotics/a1abcf4442a9d8869d7c7d0068399fb908559642/Related Works/Meta learning An Introduction.assets/1_AcaPiikZErVv_iFJzWekQg.gif
--------------------------------------------------------------------------------
/Related Works/Meta learning An Introduction.assets/NTM.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Skylark0924/Reinforcement-Learning-in-Robotics/a1abcf4442a9d8869d7c7d0068399fb908559642/Related Works/Meta learning An Introduction.assets/NTM.png
--------------------------------------------------------------------------------
/Related Works/Meta learning An Introduction.assets/combine-slow-fast-weights.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Skylark0924/Reinforcement-Learning-in-Robotics/a1abcf4442a9d8869d7c7d0068399fb908559642/Related Works/Meta learning An Introduction.assets/combine-slow-fast-weights.png
--------------------------------------------------------------------------------
/Related Works/Meta learning An Introduction.assets/equation-1577535254476.svg:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/Related Works/Meta learning An Introduction.assets/equation.svg:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/Related Works/Meta learning An Introduction.assets/few-shot-classification.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Skylark0924/Reinforcement-Learning-in-Robotics/a1abcf4442a9d8869d7c7d0068399fb908559642/Related Works/Meta learning An Introduction.assets/few-shot-classification.png
--------------------------------------------------------------------------------
/Related Works/Meta learning An Introduction.assets/image-20191226194740259.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Skylark0924/Reinforcement-Learning-in-Robotics/a1abcf4442a9d8869d7c7d0068399fb908559642/Related Works/Meta learning An Introduction.assets/image-20191226194740259.png
--------------------------------------------------------------------------------
/Related Works/Meta learning An Introduction.assets/image-20191226200939808.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Skylark0924/Reinforcement-Learning-in-Robotics/a1abcf4442a9d8869d7c7d0068399fb908559642/Related Works/Meta learning An Introduction.assets/image-20191226200939808.png
--------------------------------------------------------------------------------
/Related Works/Meta learning An Introduction.assets/image-20191226202123590.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Skylark0924/Reinforcement-Learning-in-Robotics/a1abcf4442a9d8869d7c7d0068399fb908559642/Related Works/Meta learning An Introduction.assets/image-20191226202123590.png
--------------------------------------------------------------------------------
/Related Works/Meta learning An Introduction.assets/image-20191226202326065.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Skylark0924/Reinforcement-Learning-in-Robotics/a1abcf4442a9d8869d7c7d0068399fb908559642/Related Works/Meta learning An Introduction.assets/image-20191226202326065.png
--------------------------------------------------------------------------------
/Related Works/Meta learning An Introduction.assets/lstm-meta-learner.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Skylark0924/Reinforcement-Learning-in-Robotics/a1abcf4442a9d8869d7c7d0068399fb908559642/Related Works/Meta learning An Introduction.assets/lstm-meta-learner.png
--------------------------------------------------------------------------------
/Related Works/Meta learning An Introduction.assets/maml-algo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Skylark0924/Reinforcement-Learning-in-Robotics/a1abcf4442a9d8869d7c7d0068399fb908559642/Related Works/Meta learning An Introduction.assets/maml-algo.png
--------------------------------------------------------------------------------
/Related Works/Meta learning An Introduction.assets/maml.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Skylark0924/Reinforcement-Learning-in-Robotics/a1abcf4442a9d8869d7c7d0068399fb908559642/Related Works/Meta learning An Introduction.assets/maml.png
--------------------------------------------------------------------------------
/Related Works/Meta learning An Introduction.assets/mann-meta-learning.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Skylark0924/Reinforcement-Learning-in-Robotics/a1abcf4442a9d8869d7c7d0068399fb908559642/Related Works/Meta learning An Introduction.assets/mann-meta-learning.png
--------------------------------------------------------------------------------
/Related Works/Meta learning An Introduction.assets/matching-networks.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Skylark0924/Reinforcement-Learning-in-Robotics/a1abcf4442a9d8869d7c7d0068399fb908559642/Related Works/Meta learning An Introduction.assets/matching-networks.png
--------------------------------------------------------------------------------
/Related Works/Meta learning An Introduction.assets/meta-network.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Skylark0924/Reinforcement-Learning-in-Robotics/a1abcf4442a9d8869d7c7d0068399fb908559642/Related Works/Meta learning An Introduction.assets/meta-network.png
--------------------------------------------------------------------------------
/Related Works/Meta learning An Introduction.assets/prototypical-networks-1577419209148.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Skylark0924/Reinforcement-Learning-in-Robotics/a1abcf4442a9d8869d7c7d0068399fb908559642/Related Works/Meta learning An Introduction.assets/prototypical-networks-1577419209148.png
--------------------------------------------------------------------------------
/Related Works/Meta learning An Introduction.assets/prototypical-networks.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Skylark0924/Reinforcement-Learning-in-Robotics/a1abcf4442a9d8869d7c7d0068399fb908559642/Related Works/Meta learning An Introduction.assets/prototypical-networks.png
--------------------------------------------------------------------------------
/Related Works/Meta learning An Introduction.assets/relation-network.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Skylark0924/Reinforcement-Learning-in-Robotics/a1abcf4442a9d8869d7c7d0068399fb908559642/Related Works/Meta learning An Introduction.assets/relation-network.png
--------------------------------------------------------------------------------
/Related Works/Meta learning An Introduction.assets/reptile-algo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Skylark0924/Reinforcement-Learning-in-Robotics/a1abcf4442a9d8869d7c7d0068399fb908559642/Related Works/Meta learning An Introduction.assets/reptile-algo.png
--------------------------------------------------------------------------------
/Related Works/Meta learning An Introduction.assets/reptile_vs_FOMAML.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Skylark0924/Reinforcement-Learning-in-Robotics/a1abcf4442a9d8869d7c7d0068399fb908559642/Related Works/Meta learning An Introduction.assets/reptile_vs_FOMAML.png
--------------------------------------------------------------------------------
/Related Works/Meta learning An Introduction.assets/siamese-conv-net.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Skylark0924/Reinforcement-Learning-in-Robotics/a1abcf4442a9d8869d7c7d0068399fb908559642/Related Works/Meta learning An Introduction.assets/siamese-conv-net.png
--------------------------------------------------------------------------------
/Related Works/Meta learning An Introduction.assets/train-meta-learner.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Skylark0924/Reinforcement-Learning-in-Robotics/a1abcf4442a9d8869d7c7d0068399fb908559642/Related Works/Meta learning An Introduction.assets/train-meta-learner.png
--------------------------------------------------------------------------------
/Related Works/Meta learning An Introduction.assets/v2-2d61ff11eb1a5a9e52d6c12eb333eb4b_hd.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Skylark0924/Reinforcement-Learning-in-Robotics/a1abcf4442a9d8869d7c7d0068399fb908559642/Related Works/Meta learning An Introduction.assets/v2-2d61ff11eb1a5a9e52d6c12eb333eb4b_hd.jpg
--------------------------------------------------------------------------------
/Related Works/Meta-Reinforcement-Learning An Introduction.md:
--------------------------------------------------------------------------------
1 | # Meta-Reinforcement-Learning: An Introduction
2 |
3 | 在进行 Meta-Reinforcement-Learning 之前,本专栏已经对 Meta-Learning 进行了较为全面的总结:
4 |
5 | 1. Meta-Learning: An Introduction Ⅰ: [[Zhihu](https://zhuanlan.zhihu.com/p/99730942)] [[Github](https://github.com/Skylark0924/Reinforcement-Learning-in-Robotics/blob/master/Related%20Works/Meta%20learning%20An%20Introduction.md)]
6 | 2. Meta-Learning: An Introduction Ⅱ: [[Zhihu](https://zhuanlan.zhihu.com/p/100035717)] [[Github](https://github.com/Skylark0924/Reinforcement-Learning-in-Robotics/blob/master/Related%20Works/Meta%20learning%20An%20Introduction.md)]
7 | 3. Meta-Learning: An Introduction Ⅲ: [[Zhihu](https://zhuanlan.zhihu.com/p/100266389)] [[Github](https://github.com/Skylark0924/Reinforcement-Learning-in-Robotics/blob/master/Related%20Works/Meta%20learning%20An%20Introduction.md)]
8 |
9 | Meta-Learning 通俗来讲,就是构建一种能够具有学习能力的架构,使模型不只是针对某个特定的任务,对于未知的task能够快速适应。关于 Meta-Learning 的研究大致分为三类:Metric-Based(基于度量的方法),Optimization-Based(学习一种具有泛化性的优化方法),Model-Based(基于泛化性架构的方法)。今天我们开始学习学习 Meta-Reinforcement-Learning。
10 |
11 | Meta-Reinforcement-Learning,顾名思义,就是把 meta-learning 的思想用在 reinforcement learning tasks 上,让agent在学习了一些tasks之后,能够利用它的内部特性,开发一种新的RL算法,来学习新的task。
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
--------------------------------------------------------------------------------
/Related Works/Overcoming Exploration in Reinforcement Learning with Demonstrations.assets/image-20191211211229554.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Skylark0924/Reinforcement-Learning-in-Robotics/a1abcf4442a9d8869d7c7d0068399fb908559642/Related Works/Overcoming Exploration in Reinforcement Learning with Demonstrations.assets/image-20191211211229554.png
--------------------------------------------------------------------------------
/Related Works/Overcoming Exploration in Reinforcement Learning with Demonstrations.md:
--------------------------------------------------------------------------------
1 | # Overcoming Exploration in Reinforcement Learning with Demonstrations
2 |
3 | Paper: https://arxiv.org/pdf/1709.10089.pdf
4 |
5 | website: http://ashvin.me/demoddpg-website/
6 |
7 | ## Main Idea
8 |
9 | They combine **demonstrations** with RL to overcome the **sparse reward** problem and successfully learn to perform long-horizon, multi-step robotics tasks with continuous control such as stacking blocks with a robot arm.
10 |
11 | This method is based on **Deep Deterministic Policy Gradients** (DDPG) and **Hindsight Experience Replay** (HER, which is state-of-the-art method to overcome sparse rewards problem)
12 |
13 | ## Method
14 |
15 | ### Demonstration Buffer
16 |
17 | They build a second replay buffer $R_D$ to store the demonstration in the same format as $R$ and train together with exploration experience.
18 |
19 | ### Behavior Cloning Loss (BC Loss)
20 |
21 | $$
22 | L_{B C}=\sum_{i=1}^{N_{D}}\left\|\pi\left(s_{i} | \theta_{\pi}\right)-a_{i}\right\|^{2}
23 | $$
24 |
25 | use only on the R_D examples for training actor. They use Equation 1 as an auxiliary loss for RL and find that it can improve learning significant.
26 | $$
27 | \lambda_{1} \nabla_{\theta_{\pi}} J-\lambda_{2} \nabla_{\theta_{\pi}} L_{B C}
28 | $$
29 | where $\theta_\pi$ is the actor parameters. Note that we maximize J and minimize $L_{BC}$.
30 |
31 | **Using this loss directly prevents the learned policy from improving significantly beyond the demonstration policy, as the actor is always tied back to the demonstrations.**
32 |
33 | ### Q-Filter
34 |
35 | This is a trick to avoid **the possibility that demonstration is not better than current policy**. By creating a filter which just uses **BC Loss** when $Q(s_i;a_i)>Q(s_i;\pi(s_i))$.
36 |
37 | ### Resets to demonstration states
38 |
39 | To overcome the sparse rewards and make it a long horizon task, they reset several episodes to start with given state in Demonstration Buffer and use the final state of it as a target.
40 |
41 | ### In conclusion
42 |
43 | **Main method = (BC Loss + Q-Filter + HER + Reset) + DDPG**
44 |
45 | ## Experiment
46 |
47 | ### Task
48 |
49 | Block Stacking: stack 2-6 blocks
50 |
51 | 
52 |
53 | ### Ablation Experiment
54 |
55 | - BC Loss: Without the behavior cloning loss, the method is significantly worse in every task they try. Since stacking the tower higher is risky and could result in lower reward if the agent knocks over a block that is already correctly placed, the agent chooses to stop after achieving a certain reward. BC Loss forces it to continue.
56 | - Q-Filter: Accelerate learning process
57 | - Reset: The same reason as BC Loss. Resetting from demonstration states alleviates this problem because the agent regularly experiences higher rewards.
58 |
59 | ## Disadvantage
60 |
61 | Impractical outside of simulation due to the low sample efficient. It will take about **1 million timesteps**, which is about **6 hours** of real world interaction time.
62 |
63 | ## Conclusion for the note
64 |
65 | Actually, this paper shows me the tininess of humanity. By piling up such number of tricks and training nearly 1 million timesteps, the robot created by human can only stack 2-6 blocks tremblingly. What a complicated world!
--------------------------------------------------------------------------------
/Related Works/The Predictron End-To-End Learning and Planning.assets/image-20191211163322741.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Skylark0924/Reinforcement-Learning-in-Robotics/a1abcf4442a9d8869d7c7d0068399fb908559642/Related Works/The Predictron End-To-End Learning and Planning.assets/image-20191211163322741.png
--------------------------------------------------------------------------------
/Related Works/The Predictron End-To-End Learning and Planning.assets/image-20191211201710251.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Skylark0924/Reinforcement-Learning-in-Robotics/a1abcf4442a9d8869d7c7d0068399fb908559642/Related Works/The Predictron End-To-End Learning and Planning.assets/image-20191211201710251.png
--------------------------------------------------------------------------------
/Related Works/The Predictron End-To-End Learning and Planning.assets/image-20191211202507981.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Skylark0924/Reinforcement-Learning-in-Robotics/a1abcf4442a9d8869d7c7d0068399fb908559642/Related Works/The Predictron End-To-End Learning and Planning.assets/image-20191211202507981.png
--------------------------------------------------------------------------------
/Related Works/The Predictron End-To-End Learning and Planning.assets/image-20191212102538657.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Skylark0924/Reinforcement-Learning-in-Robotics/a1abcf4442a9d8869d7c7d0068399fb908559642/Related Works/The Predictron End-To-End Learning and Planning.assets/image-20191212102538657.png
--------------------------------------------------------------------------------
/Related Works/The Predictron End-To-End Learning and Planning.assets/image-20191212102714873.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Skylark0924/Reinforcement-Learning-in-Robotics/a1abcf4442a9d8869d7c7d0068399fb908559642/Related Works/The Predictron End-To-End Learning and Planning.assets/image-20191212102714873.png
--------------------------------------------------------------------------------
/Related Works/When to Trust Your Model Model-Based Policy Optimization.assets/image-20191215201141993.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Skylark0924/Reinforcement-Learning-in-Robotics/a1abcf4442a9d8869d7c7d0068399fb908559642/Related Works/When to Trust Your Model Model-Based Policy Optimization.assets/image-20191215201141993.png
--------------------------------------------------------------------------------
/Related Works/When to Trust Your Model Model-Based Policy Optimization.md:
--------------------------------------------------------------------------------
1 | # When to Trust Your Model: Model-Based Policy Optimization
2 |
3 | 
4 |
5 | Paper:
6 |
7 | Code: https://github.com/JannerM/mbpo
8 |
9 | Website: https://people.eecs.berkeley.edu/~janner/mbpo/
10 |
11 | ## Main Idea
12 |
13 | - Formulate model-based RL algorithms with monotonic improvement guarantees
14 | - Use short model-based rollouts
15 |
16 |
17 |
18 | ## Keywords
19 |
20 | Model-based, monotonic improvement guarantee, branched rollout
21 |
22 | ## Preliminaries
23 |
24 | 1. 对于rollout的具体含义, 我一直比较困惑, 只知道是一次试验、一个trajectory。今天特意查了一下,得到了如下的解释:
25 | > The standard use of “rollout” (also called a “playout”) is in regard to an execution of a policy from the current state when there is some uncertainty about the next state or outcome - it is one simulation from your current state. The purpose is for an agent to evaluate many possible next actions in order to find an action that will maximize value (long-term expected reward).
26 |
27 | 这个词源于 [Tesauro and Galperin NIPS 1997](http://papers.nips.cc/paper/1302-on-line-policy-improvement-using-monte-carlo-search.pdf) 这篇文章
28 |
29 | 2. Horizon: the number of time steps that we sample or simulate.
30 |
31 | ## Method
32 |
33 |
34 | $$
35 | \eta[\pi] \geq \hat{\eta}[\pi]-C
36 | $$
37 |
38 | $$
39 | \eta[\pi] \geq \hat{\eta}[\pi]-\underbrace{\left[\frac{2 \gamma r_{\max }\left(\epsilon_{m}+2 \epsilon_{\pi}\right)}{(1-\gamma)^{2}}+\frac{4 r_{\max } \epsilon_{\pi}}{(1-\gamma)}\right]}_{C\left(\epsilon_{\left.m, \epsilon_{\pi}\right)}\right.}
40 | $$
41 |
42 | ### Branched Rollout
43 |
44 | $$
45 | \eta[\pi] \geq \eta^{\mathrm{branch}}[\pi]-2 r_{\max }\left[\frac{\gamma^{k+1} \epsilon_{\pi}}{(1-\gamma)^{2}}+\frac{\gamma^{k}+2}{(1-\gamma)} \epsilon_{\pi}+\frac{k}{1-\gamma}\left(\epsilon_{m}+2 \epsilon_{\pi}\right)\right]
46 | $$
47 |
48 |
49 |
50 | ### Model Generalization
51 |
52 |
53 | $$
54 | \eta[\pi] \geq \eta^{\mathrm{branch}}[\pi]-2 r_{\max }\left[\frac{\gamma^{k+1} \epsilon_{\pi}}{(1-\gamma)^{2}}+\frac{\gamma^{k} \epsilon_{\pi}}{(1-\gamma)}+\frac{k}{1-\gamma}\left(\epsilon_{m^{\prime}}\right)\right]
55 | $$
56 |
57 | $$
58 | \hat{\epsilon}_{m^{\prime}}\left(\epsilon_{\pi}\right) \approx \epsilon_{m}+\epsilon_{\pi} \frac{\mathrm{d} \epsilon_{m^{\prime}}}{\mathrm{d} \epsilon_{\pi}}
59 | $$
60 |
61 |
--------------------------------------------------------------------------------
/Related Works/智源大会笔记.assets/image-20200624101204649.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Skylark0924/Reinforcement-Learning-in-Robotics/a1abcf4442a9d8869d7c7d0068399fb908559642/Related Works/智源大会笔记.assets/image-20200624101204649.png
--------------------------------------------------------------------------------
/Related Works/智源大会笔记.assets/image-20200624101305253.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Skylark0924/Reinforcement-Learning-in-Robotics/a1abcf4442a9d8869d7c7d0068399fb908559642/Related Works/智源大会笔记.assets/image-20200624101305253.png
--------------------------------------------------------------------------------
/Related Works/智源大会笔记.assets/image-20200624101402324.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Skylark0924/Reinforcement-Learning-in-Robotics/a1abcf4442a9d8869d7c7d0068399fb908559642/Related Works/智源大会笔记.assets/image-20200624101402324.png
--------------------------------------------------------------------------------
/Related Works/智源大会笔记.assets/image-20200624101416544.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Skylark0924/Reinforcement-Learning-in-Robotics/a1abcf4442a9d8869d7c7d0068399fb908559642/Related Works/智源大会笔记.assets/image-20200624101416544.png
--------------------------------------------------------------------------------
/Related Works/智源大会笔记.assets/image-20200624101704078.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Skylark0924/Reinforcement-Learning-in-Robotics/a1abcf4442a9d8869d7c7d0068399fb908559642/Related Works/智源大会笔记.assets/image-20200624101704078.png
--------------------------------------------------------------------------------
/Related Works/智源大会笔记.assets/image-20200624101836768.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Skylark0924/Reinforcement-Learning-in-Robotics/a1abcf4442a9d8869d7c7d0068399fb908559642/Related Works/智源大会笔记.assets/image-20200624101836768.png
--------------------------------------------------------------------------------
/Related Works/智源大会笔记.assets/image-20200624102016044.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Skylark0924/Reinforcement-Learning-in-Robotics/a1abcf4442a9d8869d7c7d0068399fb908559642/Related Works/智源大会笔记.assets/image-20200624102016044.png
--------------------------------------------------------------------------------
/Related Works/智源大会笔记.assets/image-20200624102239093.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Skylark0924/Reinforcement-Learning-in-Robotics/a1abcf4442a9d8869d7c7d0068399fb908559642/Related Works/智源大会笔记.assets/image-20200624102239093.png
--------------------------------------------------------------------------------
/Related Works/智源大会笔记.assets/image-20200624102756731.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Skylark0924/Reinforcement-Learning-in-Robotics/a1abcf4442a9d8869d7c7d0068399fb908559642/Related Works/智源大会笔记.assets/image-20200624102756731.png
--------------------------------------------------------------------------------
/Related Works/智源大会笔记.assets/image-20200624102845779.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Skylark0924/Reinforcement-Learning-in-Robotics/a1abcf4442a9d8869d7c7d0068399fb908559642/Related Works/智源大会笔记.assets/image-20200624102845779.png
--------------------------------------------------------------------------------
/Related Works/智源大会笔记.assets/image-20200624102945812.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Skylark0924/Reinforcement-Learning-in-Robotics/a1abcf4442a9d8869d7c7d0068399fb908559642/Related Works/智源大会笔记.assets/image-20200624102945812.png
--------------------------------------------------------------------------------
/Related Works/智源大会笔记.assets/image-20200624103018972.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Skylark0924/Reinforcement-Learning-in-Robotics/a1abcf4442a9d8869d7c7d0068399fb908559642/Related Works/智源大会笔记.assets/image-20200624103018972.png
--------------------------------------------------------------------------------
/Related Works/智源大会笔记.assets/image-20200624103109295.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Skylark0924/Reinforcement-Learning-in-Robotics/a1abcf4442a9d8869d7c7d0068399fb908559642/Related Works/智源大会笔记.assets/image-20200624103109295.png
--------------------------------------------------------------------------------
/Related Works/智源大会笔记.assets/image-20200624103145785.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Skylark0924/Reinforcement-Learning-in-Robotics/a1abcf4442a9d8869d7c7d0068399fb908559642/Related Works/智源大会笔记.assets/image-20200624103145785.png
--------------------------------------------------------------------------------
/Related Works/智源大会笔记.assets/image-20200624104915752.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Skylark0924/Reinforcement-Learning-in-Robotics/a1abcf4442a9d8869d7c7d0068399fb908559642/Related Works/智源大会笔记.assets/image-20200624104915752.png
--------------------------------------------------------------------------------
/Related Works/智源大会笔记.assets/image-20200624105312169.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Skylark0924/Reinforcement-Learning-in-Robotics/a1abcf4442a9d8869d7c7d0068399fb908559642/Related Works/智源大会笔记.assets/image-20200624105312169.png
--------------------------------------------------------------------------------
/Related Works/智源大会笔记.assets/image-20200624105434860.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Skylark0924/Reinforcement-Learning-in-Robotics/a1abcf4442a9d8869d7c7d0068399fb908559642/Related Works/智源大会笔记.assets/image-20200624105434860.png
--------------------------------------------------------------------------------
/Related Works/智源大会笔记.assets/image-20200624105534892.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Skylark0924/Reinforcement-Learning-in-Robotics/a1abcf4442a9d8869d7c7d0068399fb908559642/Related Works/智源大会笔记.assets/image-20200624105534892.png
--------------------------------------------------------------------------------
/Related Works/智源大会笔记.assets/image-20200624110446944.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Skylark0924/Reinforcement-Learning-in-Robotics/a1abcf4442a9d8869d7c7d0068399fb908559642/Related Works/智源大会笔记.assets/image-20200624110446944.png
--------------------------------------------------------------------------------
/Related Works/智源大会笔记.assets/image-20200624110634783.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Skylark0924/Reinforcement-Learning-in-Robotics/a1abcf4442a9d8869d7c7d0068399fb908559642/Related Works/智源大会笔记.assets/image-20200624110634783.png
--------------------------------------------------------------------------------
/Related Works/智源大会笔记.md:
--------------------------------------------------------------------------------
1 | # Satinder Singh
2 |
3 | ### WHAT CAN LEARNED INTRINSIC REWARDS CAPTURE?
4 |
5 | 
6 |
7 | 
8 |
9 | 
10 |
11 | 
12 |
13 | 非平稳
14 |
15 | 
16 |
17 | 蓝色 positive 红色 negative
18 |
19 | 
20 |
21 | 
22 |
23 | ### Second
24 |
25 | General Value Functions (GVFs)
26 |
27 | 
28 |
29 | 
30 |
31 | 
32 |
33 | 
34 |
35 |
36 |
37 | 
38 |
39 | # 安波
40 |
41 | 竞争环境、multi-agent、博弈论
42 |
43 | #### 利用自动学习机的大规模博弈
44 |
45 | 
46 |
47 | #### 何时需要RL
48 |
49 | - 问题不便于建模
50 | - large scale
51 | - Non-convex and cannot be approximated
52 | - No domain structure can be eploited
53 |
54 | 
55 |
56 | 桥牌
57 |
58 | 
59 |
60 | HRL
61 |
62 | 
63 |
64 | 
--------------------------------------------------------------------------------
/Representation/Repre 1:Introduction.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Skylark0924/Reinforcement-Learning-in-Robotics/a1abcf4442a9d8869d7c7d0068399fb908559642/Representation/Repre 1:Introduction.md
--------------------------------------------------------------------------------
/Robotics/Bimanual coordination.md:
--------------------------------------------------------------------------------
1 | # Bimanual coordination
2 |
3 | - [Bimanual coordination](#bimanual-coordination)
4 | - [Function-based](#function-based)
5 | - [Bimanual twisting](#bimanual-twisting)
6 | - [Redundancy Resolution for Dual-Arm Robots Inspired by Human Asymmetric Bimanual Action: Formulation and Experiments](#redundancy-resolution-for-dual-arm-robots-inspired-by-human-asymmetric-bimanual-action-formulation-and-experiments)
7 | - [机器人建模](#机器人建模)
8 | - [Learning-based](#learning-based)
9 |
10 | ## Function-based
11 |
12 | ### Bimanual twisting
13 | 
14 |
15 | 
16 |
17 |
18 |
19 | ### Redundancy Resolution for Dual-Arm Robots Inspired by Human Asymmetric Bimanual Action: Formulation and Experiments
20 | #### 机器人建模
21 | 
22 | A相对于B的速度即为:
23 | $$
24 | \dot{\mathrm{x}}_{R}=-\dot{\mathrm{x}}_{A}+\dot{\mathrm{x}}_{B}=\mathbf{J}_{R} \dot{\mathbf{q}}
25 | $$
26 | 其中,相对雅克比为
27 | $$
28 | \mathbf{J}_{R}=\left[-{ }^{R} \mathbf{R}_{A} \mathbf{J}_{A} \quad{ }^{R} \mathbf{R}_{T}{ }^{T} \mathbf{R}_{B} \mathbf{J}_{B}\right]
29 | $$
30 | A在零空间内的绝对运动为:
31 | $$
32 | \dot{\mathbf{q}}=\mathbf{J}_{R}^{+} \dot{\mathbf{x}}_{R}+\left(\mathbf{I}-\mathbf{J}_{R}^{+} \mathbf{J}_{R}\right)\left[\begin{array}{ll}
33 | \mathbf{J}_{A} & \mathbf{0}
34 | \end{array}\right]^{+} \dot{\mathbf{x}}_{A}
35 | $$
36 | 其中,+为伪逆。最后一项意味着右手(工具末端执行器)相对于左手(参考末端执行器)移动。此外,左手的绝对运动不受限制,因此可以在可触及的工作空间中的任何地方。这种无节制的运动可用于回避障碍或关节限制回避等自我运动,即左臂调整工作空间位置,使右臂在执行不对称双手任务时不会达到其关节极限。
37 |
38 | 关节加速度方面的冗余分辨率为
39 | $$
40 | \ddot{\mathbf{q}}=\mathbf{J}_{R}^{+}\left(\ddot{\mathbf{x}}_{R}-\dot{\mathbf{J}}_{R} \dot{\mathbf{q}}\right)+\left(\mathbf{I}-\mathbf{J}_{R}^{+} \mathbf{J}_{R}\right) \boldsymbol{\eta}
41 | $$
42 |
43 | **相对阻抗控制**
44 | 扭矩级控制器采用时间延迟估计设计:
45 | $$
46 | \boldsymbol{\tau}_{u}=\overline{\mathbf{M}} \ddot{\mathbf{q}}+\boldsymbol{\tau}_{u(t-L)}-\overline{\mathbf{M}} \ddot{\mathbf{q}}_{(t-L)}
47 | $$
48 | $\ddot{\mathbf{q}}$ 是从上上式计算得出的,其中 ̈ xR 是从目标动力学计算得出的,目标动力学表示两个末端执行器之间相对运动和相对力的动态关系,如下所示:
49 | $$
50 | \mathbf{f}_{R}=\mathbf{M}_{R d}\left(\ddot{\mathbf{x}}_{R d}-\ddot{\mathbf{x}}_{R}\right)+\mathbf{B}_{R d}\left(\dot{\mathbf{x}}_{R d}-\dot{\mathbf{x}}_{R}\right)+\mathbf{K}_{R d}\left(\mathbf{x}_{R d}-\mathbf{x}_{R}\right)
51 | $$
52 | $\mathbf{f}_{R}$ 代表相对接触力, $\mathbf{M}_{R d}, \mathbf{B}_{R d}, \mathbf{K}_{R d}$分别代表预期的质量、阻尼和刚度矩阵。
53 |
54 | ## Learning-based
55 |
56 |
--------------------------------------------------------------------------------
/Simulator/MuJoCo机器人建模教程.assets/grid1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Skylark0924/Reinforcement-Learning-in-Robotics/a1abcf4442a9d8869d7c7d0068399fb908559642/Simulator/MuJoCo机器人建模教程.assets/grid1.png
--------------------------------------------------------------------------------
/Simulator/MuJoCo机器人建模教程.assets/grid2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Skylark0924/Reinforcement-Learning-in-Robotics/a1abcf4442a9d8869d7c7d0068399fb908559642/Simulator/MuJoCo机器人建模教程.assets/grid2.png
--------------------------------------------------------------------------------
/Simulator/MuJoCo机器人建模教程.assets/grid2pin.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Skylark0924/Reinforcement-Learning-in-Robotics/a1abcf4442a9d8869d7c7d0068399fb908559642/Simulator/MuJoCo机器人建模教程.assets/grid2pin.png
--------------------------------------------------------------------------------
/Simulator/MuJoCo机器人建模教程.assets/openai-robotics-hand-with-cube-solved-crop-2000w.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Skylark0924/Reinforcement-Learning-in-Robotics/a1abcf4442a9d8869d7c7d0068399fb908559642/Simulator/MuJoCo机器人建模教程.assets/openai-robotics-hand-with-cube-solved-crop-2000w.jpg
--------------------------------------------------------------------------------
/Simulator/MuJoCo机器人建模教程.assets/particle2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Skylark0924/Reinforcement-Learning-in-Robotics/a1abcf4442a9d8869d7c7d0068399fb908559642/Simulator/MuJoCo机器人建模教程.assets/particle2.png
--------------------------------------------------------------------------------
/Simulator/MuJoCo机器人建模教程.assets/unnamed.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Skylark0924/Reinforcement-Learning-in-Robotics/a1abcf4442a9d8869d7c7d0068399fb908559642/Simulator/MuJoCo机器人建模教程.assets/unnamed.png
--------------------------------------------------------------------------------
/Simulator/MuJoCo详细使用指南.md:
--------------------------------------------------------------------------------
1 | # MuJoCo 详细使用指南
2 |
3 |
4 | - [MuJoCo 详细使用指南](#mujoco-详细使用指南)
5 | - [Installation](#installation)
6 | - [Download](#download)
7 | - [Testing](#testing)
8 | - [Modeling](#modeling)
9 | - [Python API](#python-api)
10 | - [mujoco-py](#mujoco-py)
11 |
12 |
13 | ## Installation
14 | ### Download
15 | MuJoCo无需安装,在[官网下载对应版本](https://mujoco.org/download),解压所得压缩包即可。其文件组成如下:
16 |
17 | ```
18 | mujoco210
19 | bin - dynamic libraries, executables, MUJOCO_LOG.TXT
20 | doc - README.txt and REFERENCE.txt
21 | include - header files needed to develop with MuJoCo
22 | model - model collection (extra models available on the Forum)
23 | sample - code samples and makefile need to build them
24 | ```
25 | ### Testing
26 | ```
27 | Windows: simulate ..\model\humanoid.xml
28 | Linux and macOS: ./simulate ../model/humanoid.xml
29 | ```
30 |
31 | ## Modeling
32 |
33 |
34 |
35 | ## Python API
36 | ### mujoco-py
37 |
38 |
39 |
40 |
41 |
42 |
43 |
--------------------------------------------------------------------------------
/Simulator/PyBullet详细使用指南.md:
--------------------------------------------------------------------------------
1 | # PyBullet 详细使用指南
--------------------------------------------------------------------------------
/Simulator/Sim2real.md:
--------------------------------------------------------------------------------
1 | # Sim2real in Robot Learning: An Introduction
--------------------------------------------------------------------------------
/Structured/PR Structure Ⅱ .assets/image-20200719135624489.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Skylark0924/Reinforcement-Learning-in-Robotics/a1abcf4442a9d8869d7c7d0068399fb908559642/Structured/PR Structure Ⅱ .assets/image-20200719135624489.png
--------------------------------------------------------------------------------
/Structured/PR Structure Ⅱ .assets/image-20200719135706275.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Skylark0924/Reinforcement-Learning-in-Robotics/a1abcf4442a9d8869d7c7d0068399fb908559642/Structured/PR Structure Ⅱ .assets/image-20200719135706275.png
--------------------------------------------------------------------------------
/Structured/PR Structure Ⅱ .assets/image-20200719141215552.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Skylark0924/Reinforcement-Learning-in-Robotics/a1abcf4442a9d8869d7c7d0068399fb908559642/Structured/PR Structure Ⅱ .assets/image-20200719141215552.png
--------------------------------------------------------------------------------
/Structured/PR Structure Ⅱ .assets/image-20200719141544422.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Skylark0924/Reinforcement-Learning-in-Robotics/a1abcf4442a9d8869d7c7d0068399fb908559642/Structured/PR Structure Ⅱ .assets/image-20200719141544422.png
--------------------------------------------------------------------------------
/Structured/PR Structure Ⅱ .assets/image-20200719141645589.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Skylark0924/Reinforcement-Learning-in-Robotics/a1abcf4442a9d8869d7c7d0068399fb908559642/Structured/PR Structure Ⅱ .assets/image-20200719141645589.png
--------------------------------------------------------------------------------
/Structured/PR Structure Ⅱ .assets/image-20200719142143932.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Skylark0924/Reinforcement-Learning-in-Robotics/a1abcf4442a9d8869d7c7d0068399fb908559642/Structured/PR Structure Ⅱ .assets/image-20200719142143932.png
--------------------------------------------------------------------------------
/Structured/PR Structure Ⅱ .assets/image-20200719160240612.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Skylark0924/Reinforcement-Learning-in-Robotics/a1abcf4442a9d8869d7c7d0068399fb908559642/Structured/PR Structure Ⅱ .assets/image-20200719160240612.png
--------------------------------------------------------------------------------
/Structured/PR Structure Ⅱ .assets/image-20200719160620909.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Skylark0924/Reinforcement-Learning-in-Robotics/a1abcf4442a9d8869d7c7d0068399fb908559642/Structured/PR Structure Ⅱ .assets/image-20200719160620909.png
--------------------------------------------------------------------------------
/Structured/PR Structure Ⅱ .assets/image-20200719161117095.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Skylark0924/Reinforcement-Learning-in-Robotics/a1abcf4442a9d8869d7c7d0068399fb908559642/Structured/PR Structure Ⅱ .assets/image-20200719161117095.png
--------------------------------------------------------------------------------
/Structured/PR Structure Ⅱ .assets/image-20200719161134980.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Skylark0924/Reinforcement-Learning-in-Robotics/a1abcf4442a9d8869d7c7d0068399fb908559642/Structured/PR Structure Ⅱ .assets/image-20200719161134980.png
--------------------------------------------------------------------------------
/Structured/PR Structure Ⅱ .assets/image-20200719163653512.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Skylark0924/Reinforcement-Learning-in-Robotics/a1abcf4442a9d8869d7c7d0068399fb908559642/Structured/PR Structure Ⅱ .assets/image-20200719163653512.png
--------------------------------------------------------------------------------
/Structured/PR Structure Ⅱ .assets/image-20200719164027556.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Skylark0924/Reinforcement-Learning-in-Robotics/a1abcf4442a9d8869d7c7d0068399fb908559642/Structured/PR Structure Ⅱ .assets/image-20200719164027556.png
--------------------------------------------------------------------------------
/Structured/PR Structure Ⅱ .assets/image-20200719164332203.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Skylark0924/Reinforcement-Learning-in-Robotics/a1abcf4442a9d8869d7c7d0068399fb908559642/Structured/PR Structure Ⅱ .assets/image-20200719164332203.png
--------------------------------------------------------------------------------
/Structured/PR Structure Ⅱ .assets/image-20200719164745855.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Skylark0924/Reinforcement-Learning-in-Robotics/a1abcf4442a9d8869d7c7d0068399fb908559642/Structured/PR Structure Ⅱ .assets/image-20200719164745855.png
--------------------------------------------------------------------------------
/Structured/PR Structured Ⅰ GNN.assets/image-20200712132319666.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Skylark0924/Reinforcement-Learning-in-Robotics/a1abcf4442a9d8869d7c7d0068399fb908559642/Structured/PR Structured Ⅰ GNN.assets/image-20200712132319666.png
--------------------------------------------------------------------------------
/Structured/PR Structured Ⅰ GNN.assets/image-20200712152008297.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Skylark0924/Reinforcement-Learning-in-Robotics/a1abcf4442a9d8869d7c7d0068399fb908559642/Structured/PR Structured Ⅰ GNN.assets/image-20200712152008297.png
--------------------------------------------------------------------------------
/Structured/PR Structured Ⅰ GNN.assets/image-20200712152040834.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Skylark0924/Reinforcement-Learning-in-Robotics/a1abcf4442a9d8869d7c7d0068399fb908559642/Structured/PR Structured Ⅰ GNN.assets/image-20200712152040834.png
--------------------------------------------------------------------------------
/Structured/PR Structured Ⅰ GNN.assets/image-20200712152050882.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Skylark0924/Reinforcement-Learning-in-Robotics/a1abcf4442a9d8869d7c7d0068399fb908559642/Structured/PR Structured Ⅰ GNN.assets/image-20200712152050882.png
--------------------------------------------------------------------------------
/Structured/PR Structured Ⅰ GNN.assets/image-20200712154103209.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Skylark0924/Reinforcement-Learning-in-Robotics/a1abcf4442a9d8869d7c7d0068399fb908559642/Structured/PR Structured Ⅰ GNN.assets/image-20200712154103209.png
--------------------------------------------------------------------------------
/Structured/PR Structured Ⅰ GNN.assets/image-20200712155738229.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Skylark0924/Reinforcement-Learning-in-Robotics/a1abcf4442a9d8869d7c7d0068399fb908559642/Structured/PR Structured Ⅰ GNN.assets/image-20200712155738229.png
--------------------------------------------------------------------------------
/Structured/PR Structured Ⅰ GNN.assets/image-20200712161616515.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Skylark0924/Reinforcement-Learning-in-Robotics/a1abcf4442a9d8869d7c7d0068399fb908559642/Structured/PR Structured Ⅰ GNN.assets/image-20200712161616515.png
--------------------------------------------------------------------------------
/Structured/PR Structured Ⅰ GNN.assets/image-20200712165321733.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Skylark0924/Reinforcement-Learning-in-Robotics/a1abcf4442a9d8869d7c7d0068399fb908559642/Structured/PR Structured Ⅰ GNN.assets/image-20200712165321733.png
--------------------------------------------------------------------------------
/Structured/PR Structured Ⅰ GNN.assets/image-20200712170431243.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Skylark0924/Reinforcement-Learning-in-Robotics/a1abcf4442a9d8869d7c7d0068399fb908559642/Structured/PR Structured Ⅰ GNN.assets/image-20200712170431243.png
--------------------------------------------------------------------------------
/Structured/PR Structured Ⅰ GNN.assets/image-20200712171214741.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Skylark0924/Reinforcement-Learning-in-Robotics/a1abcf4442a9d8869d7c7d0068399fb908559642/Structured/PR Structured Ⅰ GNN.assets/image-20200712171214741.png
--------------------------------------------------------------------------------
/Structured/PR Structured Ⅰ GNN.assets/image-20200712172004136.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Skylark0924/Reinforcement-Learning-in-Robotics/a1abcf4442a9d8869d7c7d0068399fb908559642/Structured/PR Structured Ⅰ GNN.assets/image-20200712172004136.png
--------------------------------------------------------------------------------
/Structured/PR Structured Ⅰ GNN.assets/image-20200712172227154.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Skylark0924/Reinforcement-Learning-in-Robotics/a1abcf4442a9d8869d7c7d0068399fb908559642/Structured/PR Structured Ⅰ GNN.assets/image-20200712172227154.png
--------------------------------------------------------------------------------
/Structured/PR Structured Ⅰ GNN.assets/image-20200712174617755.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Skylark0924/Reinforcement-Learning-in-Robotics/a1abcf4442a9d8869d7c7d0068399fb908559642/Structured/PR Structured Ⅰ GNN.assets/image-20200712174617755.png
--------------------------------------------------------------------------------
/Structured/PR Structured Ⅰ GNN.assets/image-20200712175300499.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Skylark0924/Reinforcement-Learning-in-Robotics/a1abcf4442a9d8869d7c7d0068399fb908559642/Structured/PR Structured Ⅰ GNN.assets/image-20200712175300499.png
--------------------------------------------------------------------------------
/Structured/PR Structured Ⅰ GNN.assets/image-20200712175439303.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Skylark0924/Reinforcement-Learning-in-Robotics/a1abcf4442a9d8869d7c7d0068399fb908559642/Structured/PR Structured Ⅰ GNN.assets/image-20200712175439303.png
--------------------------------------------------------------------------------
/Structured/PR Structured Ⅰ GNN.assets/image-20200712175812940.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Skylark0924/Reinforcement-Learning-in-Robotics/a1abcf4442a9d8869d7c7d0068399fb908559642/Structured/PR Structured Ⅰ GNN.assets/image-20200712175812940.png
--------------------------------------------------------------------------------
/Structured/PR Structured Ⅰ GNN.assets/image-20200712180112851.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Skylark0924/Reinforcement-Learning-in-Robotics/a1abcf4442a9d8869d7c7d0068399fb908559642/Structured/PR Structured Ⅰ GNN.assets/image-20200712180112851.png
--------------------------------------------------------------------------------
/Structured/PR Structured Ⅰ GNN.assets/image-20200712195813501.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Skylark0924/Reinforcement-Learning-in-Robotics/a1abcf4442a9d8869d7c7d0068399fb908559642/Structured/PR Structured Ⅰ GNN.assets/image-20200712195813501.png
--------------------------------------------------------------------------------
/Structured/PR Structured Ⅰ GNN.assets/image-20200712203453039.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Skylark0924/Reinforcement-Learning-in-Robotics/a1abcf4442a9d8869d7c7d0068399fb908559642/Structured/PR Structured Ⅰ GNN.assets/image-20200712203453039.png
--------------------------------------------------------------------------------
/Structured/PR Structured Ⅰ GNN.assets/image-20200712212800180.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Skylark0924/Reinforcement-Learning-in-Robotics/a1abcf4442a9d8869d7c7d0068399fb908559642/Structured/PR Structured Ⅰ GNN.assets/image-20200712212800180.png
--------------------------------------------------------------------------------
/Structured/PR Structured Ⅰ GNN.assets/image-20200712212854141.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Skylark0924/Reinforcement-Learning-in-Robotics/a1abcf4442a9d8869d7c7d0068399fb908559642/Structured/PR Structured Ⅰ GNN.assets/image-20200712212854141.png
--------------------------------------------------------------------------------
/Structured/PR Structured Ⅰ GNN.assets/image-20200712213349638.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Skylark0924/Reinforcement-Learning-in-Robotics/a1abcf4442a9d8869d7c7d0068399fb908559642/Structured/PR Structured Ⅰ GNN.assets/image-20200712213349638.png
--------------------------------------------------------------------------------
/Structured/PR Structured Ⅰ GNN.assets/image-20200712213614662.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Skylark0924/Reinforcement-Learning-in-Robotics/a1abcf4442a9d8869d7c7d0068399fb908559642/Structured/PR Structured Ⅰ GNN.assets/image-20200712213614662.png
--------------------------------------------------------------------------------
/Structured/PR Structured Ⅰ GNN.assets/image-20200712214932503.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Skylark0924/Reinforcement-Learning-in-Robotics/a1abcf4442a9d8869d7c7d0068399fb908559642/Structured/PR Structured Ⅰ GNN.assets/image-20200712214932503.png
--------------------------------------------------------------------------------
/Structured/PR Structured Ⅰ GNN.assets/image-20200712215137652.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Skylark0924/Reinforcement-Learning-in-Robotics/a1abcf4442a9d8869d7c7d0068399fb908559642/Structured/PR Structured Ⅰ GNN.assets/image-20200712215137652.png
--------------------------------------------------------------------------------
/Structured/PR Structured Ⅰ GNN.assets/image-20200712230809276.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Skylark0924/Reinforcement-Learning-in-Robotics/a1abcf4442a9d8869d7c7d0068399fb908559642/Structured/PR Structured Ⅰ GNN.assets/image-20200712230809276.png
--------------------------------------------------------------------------------
/Structured/PR Structured Ⅰ GNN.assets/image-20200712231606429.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Skylark0924/Reinforcement-Learning-in-Robotics/a1abcf4442a9d8869d7c7d0068399fb908559642/Structured/PR Structured Ⅰ GNN.assets/image-20200712231606429.png
--------------------------------------------------------------------------------
/Structured/PR Structured Ⅰ GNN.assets/image-20200719135706275.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Skylark0924/Reinforcement-Learning-in-Robotics/a1abcf4442a9d8869d7c7d0068399fb908559642/Structured/PR Structured Ⅰ GNN.assets/image-20200719135706275.png
--------------------------------------------------------------------------------
/Tools/Atlas/Atlas 使用指南.assets/Atlas软硬件架构.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Skylark0924/Reinforcement-Learning-in-Robotics/a1abcf4442a9d8869d7c7d0068399fb908559642/Tools/Atlas/Atlas 使用指南.assets/Atlas软硬件架构.png
--------------------------------------------------------------------------------
/Tools/Atlas/Atlas安装配置流程.eps:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Skylark0924/Reinforcement-Learning-in-Robotics/a1abcf4442a9d8869d7c7d0068399fb908559642/Tools/Atlas/Atlas安装配置流程.eps
--------------------------------------------------------------------------------
/Tools/Atlas/Atlas安装配置流程.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Skylark0924/Reinforcement-Learning-in-Robotics/a1abcf4442a9d8869d7c7d0068399fb908559642/Tools/Atlas/Atlas安装配置流程.png
--------------------------------------------------------------------------------
/Tools/Atlas/Atlas软硬件架构.eps:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Skylark0924/Reinforcement-Learning-in-Robotics/a1abcf4442a9d8869d7c7d0068399fb908559642/Tools/Atlas/Atlas软硬件架构.eps
--------------------------------------------------------------------------------
/Tools/Atlas/Atlas软硬件架构.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Skylark0924/Reinforcement-Learning-in-Robotics/a1abcf4442a9d8869d7c7d0068399fb908559642/Tools/Atlas/Atlas软硬件架构.png
--------------------------------------------------------------------------------
/Tools/C++部署Pytorch模型方法.docx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Skylark0924/Reinforcement-Learning-in-Robotics/a1abcf4442a9d8869d7c7d0068399fb908559642/Tools/C++部署Pytorch模型方法.docx
--------------------------------------------------------------------------------
/Tools/Docker/Docker Ⅰ:安装与测试指南.md:
--------------------------------------------------------------------------------
1 | # Docker Ⅰ:安装与测试指南
2 |
3 | [[TOC]]
4 |
5 | > 工欲善其事,必先利其器
6 |
7 | **设备信息**
8 |
9 | - Ubuntu 18.04
10 | - Nvidia RTX2080 Ti 显卡
11 | - Nvidia Driver Version: 440.33.01
12 | - CUDA Version: 10.2
13 |
14 | ## 安装
15 |
16 |
17 |
18 | ## 测试
19 | ### 启动 Docker
20 | ```
21 | $ sudo systemctl enable docker
22 | $ sudo systemctl start docker
23 | ```
24 | (无输出)
25 |
26 |
27 | ### 测试安装
28 | ```
29 | $ docker run -rm hello-world
30 | ```
31 | 输出如下:
32 | ```
33 | Hello from Docker!
34 | This message shows that your installation appears to be working correctly.
35 |
36 | To generate this message, Docker took the following steps:
37 | 1. The Docker client contacted the Docker daemon.
38 | 2. The Docker daemon pulled the "hello-world" image from the Docker Hub.
39 | (amd64)
40 | 3. The Docker daemon created a new container from that image which runs the
41 | executable that produces the output you are currently reading.
42 | 4. The Docker daemon streamed that output to the Docker client, which sent it
43 | to your terminal.
44 |
45 | To try something more ambitious, you can run an Ubuntu container with:
46 | $ docker run -it ubuntu bash
47 |
48 | Share images, automate workflows, and more with a free Docker ID:
49 | https://hub.docker.com/
50 |
51 | For more examples and ideas, visit:
52 | https://docs.docker.com/get-started/
53 |
54 | ```
55 | 如出现上述输出,即为安装正常。\
56 | 如果是第一次打开 hello-world镜像,docker会自动拉取这个库,所以在上述输出之前还会出现:
57 | ```
58 | Unable to find image 'hello-world:latest' locally
59 | latest: Pulling from library/hello-world
60 | b8dfde127a29: Pull complete
61 | Digest: sha256:f2266cbfc127c960fd30e76b7c792dc23b588c0db76233517e1891a4e357d519
62 | Status: Downloaded newer image for hello-world:latest
63 | ```
64 |
65 | ## 卸载
66 | ```
67 | $ sudo apt-get remove docker docker-engine docker.io
68 | ```
69 |
70 | ## Reference
71 | 1. [Docker —— 从入门到实践](https://yeasy.gitbook.io/docker_practice/)
--------------------------------------------------------------------------------
/Tools/Docker/Docker Ⅱ:管理与使用命令手册.md:
--------------------------------------------------------------------------------
1 | # Docker Ⅱ:管理与使用命令手册
2 |
3 | [[TOC]]
4 |
5 | > 工欲善其事,必先利其器
6 |
7 | ## 镜像
8 | Docker 运行容器前需要本地存在对应的镜像,如果本地不存在该镜像,Docker 会从镜像仓库下载该镜像。
9 | ### 查看镜像
10 | ```
11 | $ sudo docker images -a
12 | ```
13 | 或
14 | ```
15 | $ sudo docker images
16 | ```
17 | ### 下载镜像
18 | 与git类似:
19 | ```
20 | $ sudo docker pull [选项] [Docker Registry 地址[:端口号]/]仓库名[:标签]
21 | ```
22 |
23 | ### 删除镜像
24 | ```
25 | $ sudo docker rmi
26 | ```
27 | 其中,``是你要删除的镜像ID。
28 |
29 | **强制删除**\
30 | 上述删除可能会出现如下输出:
31 | ```
32 | Error response from daemon: conflict: unable to delete fce289e99eb9 (must be forced) - image is being used by stopped container 3e92eacb4f6b
33 | ```
34 | 可以使用`-f`强制删除
35 | ```
36 | $ sudo docker rmi -f
37 | ```
38 |
39 | **多个同时删除**\
40 | 镜像ID间空格隔开
41 | ```
42 | $ sudo docker rmi ...
43 | ```
44 |
45 | ## 容器
46 | ### 查看容器
47 | - `sudo docker ps` 仅列出在运行的容器;
48 | - `sudo docker ps -a` 列出所有容器,包括停止运行的;
49 | - `sudo docker ps -q` 仅列出在运行容器的ID,无其他信息;
50 | - `sudo docker ps -q -a` 仅列出所有容器的ID,无其他信息;
51 |
52 | ### 开始运行
53 | -
54 |
55 | ### 停止运行
56 | - `sudo docker stop $(docker ps -a -q)`停止所有容器;
57 | - `sudo docker stop `停止指定容器。
58 |
59 |
60 |
61 | ### 删除容器
62 | ```
63 | $ sudo docker rm
64 | ```
65 | 多个容器同时删除与镜像同理。\
66 | 删除全部容器:
67 | ```
68 | $ sudo docker rm $(sudo docker ps -a -q)
69 | ```
70 |
71 |
72 | ## Reference
73 | 1. [Docker —— 从入门到实践](https://yeasy.gitbook.io/docker_practice/)
74 | 2. [如何删除 Docker 镜像和容器](https://chinese.freecodecamp.org/news/how-to-remove-images-in-docker/)
75 |
76 |
77 |
78 |
--------------------------------------------------------------------------------
/Tools/Docker/Docker Ⅲ:Nvidia Docker安装与测试指南.md:
--------------------------------------------------------------------------------
1 | # Docker Ⅲ:Nvidia Docker安装与测试指南
2 |
3 | [[TOC]]
4 |
5 | > 工欲善其事,必先利其器
6 |
7 | **设备信息**
8 |
9 | - Ubuntu 18.04
10 | - Nvidia RTX2080 Ti 显卡
11 | - Nvidia Driver Version: 440.33.01
12 | - CUDA Version: 10.2
13 |
14 | ## 安装
15 | ```
16 | $ distribution=$(. /etc/os-release;echo $ID$VERSION_ID) \
17 | && curl -s -L https://nvidia.github.io/nvidia-docker/gpgkey | sudo apt-key add - \
18 | && curl -s -L https://nvidia.github.io/nvidia-docker/$distribution/nvidia-docker.list | sudo tee /etc/apt/sources.list.d/nvidia-docker.list
19 | ```
20 |
21 | ```
22 | $ sudo apt-get update
23 | ```
24 |
25 | ```
26 | $ sudo apt-get install -y nvidia-docker2
27 | ```
28 |
29 | ```
30 | $ sudo systemctl restart docker
31 | ```
32 |
33 | ## 测试
34 | ```
35 | $ sudo docker run --rm --gpus all nvidia/cuda:11.0-base nvidia-smi
36 | ```
37 | 查看版本
38 | ```
39 | $ sudo nvidia-docker version
40 |
41 | NVIDIA Docker: 2.6.0
42 | Client: Docker Engine - Community
43 | Version: 19.03.12
44 | API version: 1.40
45 | Go version: go1.13.10
46 | Git commit: 48a66213fe
47 | Built: Mon Jun 22 15:45:49 2020
48 | OS/Arch: linux/amd64
49 | Experimental: false
50 |
51 | Server: Docker Engine - Community
52 | Engine:
53 | Version: 19.03.12
54 | API version: 1.40 (minimum version 1.12)
55 | Go version: go1.13.10
56 | Git commit: 48a66213fe
57 | Built: Mon Jun 22 15:44:20 2020
58 | OS/Arch: linux/amd64
59 | Experimental: false
60 | containerd:
61 | Version: 1.2.13
62 | GitCommit: 7ad184331fa3e55e52b890ea95e65ba581ae3429
63 | runc:
64 | Version: 1.0.0-rc10
65 | GitCommit: dc9208a3303feef5b3839f4323d9beb36df0a9dd
66 | docker-init:
67 | Version: 0.18.0
68 | GitCommit: fec3683
69 |
70 | ```
71 |
72 | ## 卸载
73 | ```
74 | $ sudo docker volume ls -q -f driver=nvidia-docker | xargs -r -I{} -n1 docker ps -q -a -f volume={} | xargs -r docker rm -f
75 | $ sudo apt-get purge nvidia-docker
76 | ```
--------------------------------------------------------------------------------
/Tools/Docker/Docker Ⅳ:Nvidia Docker使用命令手册.md:
--------------------------------------------------------------------------------
1 | # Docker Ⅳ:Nvidia Docker使用命令手册
2 |
3 | [[TOC]]
4 |
5 | > 工欲善其事,必先利其器
--------------------------------------------------------------------------------
/Tools/Docker/Docker Ⅴ:Docker与Nvidia Docker踩坑与解决方案记录集.md:
--------------------------------------------------------------------------------
1 | # Docker Ⅴ:Docker与Nvidia Docker踩坑与解决方案记录
2 |
3 | [[TOC]]
4 |
5 | > 工欲善其事,必先利其器
6 |
7 | ## Docker
8 |
9 |
10 | ## Nvidia Docker
11 | ### GPG key Error
12 | **踩坑日志**
13 | 当我在安装Nvidia Docker时,添加Nvidia的存储库和GPG密钥是没问题的,但是当我`sudo apt-get update`时,有以下报错:
14 | ```
15 | W: GPG error: http://developer.download.nvidia.com/compute/cuda/repos/ubuntu1604/x86_64 Release: The following signatures were invalid: BADSIG F60F4B3D7FA2AF80 cudatools
16 | W: The repository 'http://developer.download.nvidia.com/compute/cuda/repos/ubuntu1604/x86_64 Release' is not signed.
17 | ```
18 |
19 | **初步分析**
20 | 很明显这是中国区的网络问题,这个[Reply](https://github.com/NVIDIA/nvidia-docker/issues/571#issuecomment-393783307)也佐证了这一观点。那么解决的思路就是换源。
21 |
22 | ### libnvidia-tls.so.* 缺失
23 | 运行官方测试镜像报错
24 | ```
25 | $ docker run --runtime=nvidia --rm nvidia/cuda:11.0-base nvidia-smi
26 | ```
27 | 按理说,这行命令应该输出会显卡信息,说明nvidia docker成功创建并在内部正确执行了 nvidia-smi 命令。\
28 | 然而...
29 |
30 | **踩坑日志**
31 | ```
32 | docker: Error response from daemon: OCI runtime create failed: container_linux.go:349: starting container process caused "process_linux.go:449: container init caused \"process_linux.go:432: running prestart hook 1 caused \\\"error running hook: exit status 1, stdout: , stderr: nvidia-container-cli: detection error: open failed: /usr/lib/x86_64-linux-gnu/libnvidia-tls.so.460.56: no such file or directory\\\\n\\\"\"": unknown.
33 | ```
34 | **初步分析**
35 | 一开始,我天真地复制了这一堆报错去查,结果发现很多人都有这个问题,但是一顿讨论看下来谁都拿不出解决方案,详见[nvidia-docker/issues/1225](https://github.com/NVIDIA/nvidia-docker/issues/1225)。直到我看到`klueska`的这个[Reply](https://github.com/NVIDIA/nvidia-docker/issues/1225#issuecomment-694736952),才意识到`docker: Error response from daemon: OCI runtime create failed: container_linux.go:349:...`这个前缀是Docker的报错信息,而真正的Nvidia Docker Error是`stderr`后面的那些,也就是:
36 | ```
37 | nvidia-container-cli: detection error: open failed: /usr/lib/x86_64-linux-gnu/libnvidia-tls.so.460.56: no such file or directory\\\\n\\\"\"": unknown.
38 | ```
39 | 那么,按照这个报错,我就找到了[nvidia-docker/issues/1404](https://github.com/NVIDIA/nvidia-docker/issues/1404)
40 |
41 | 还是参考`klueska`的[Reply](https://github.com/NVIDIA/nvidia-docker/issues/1404#issuecomment-720657408),这是库文件与驱动版本不统一的问题。我的显卡驱动是440,`/usr/lib/`目录下搜索所有 `libnvidia*` 文件,却真的找到了几个后缀有460的文件,然后终端 `sudo rm ` 删除它们。
42 |
43 | > 有趣的是,这个issue后面那个老哥也犯了我一样的错误,以为前缀一样就是 same issue了。
44 |
45 | 
46 |
47 | ### nvidia-container-runtime: no such file or directory
48 | **踩坑日志**
49 | ```
50 | docker: Error response from daemon: OCI runtime create failed: unable to retrieve OCI runtime error (open /run/containerd/io.containerd.runtime.v1.linux/moby/d9f69fa38a697ffbc276caefea82f0e3262683c815f95b783ad835a19461696b/log.json: no such file or directory): fork/exec /usr/bin/nvidia-container-runtime: no such file or directory: : unknown.
51 | ```
52 | **解决方案**
53 | ```
54 | sudo apt-get install nvidia-container-runtime
55 | ```
56 | **Reference**
57 | 1. https://www.geek-share.com/detail/2794463648.html
--------------------------------------------------------------------------------
/Tools/Habitat/Habitat Challenge提交指南.md:
--------------------------------------------------------------------------------
1 | # Habitat Challenge提交指南
2 | [[TOC]]
3 | ### Clone the challenge repository
4 | ```
5 | $ git clone https://github.com/facebookresearch/habitat-challenge.git
6 | $ cd habitat-challenge
7 | ```
8 |
9 | ### 安装 Nvidia Docker
10 | https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/install-guide.html#docker
11 |
12 | ### 配置环境依赖
13 | 编辑`Dockerfile`,在名为`habitat`的conda环境下,添加依赖安装命令。以自定义安装`pytorch`为例:
14 |
15 | ```
16 | FROM fairembodied/habitat-challenge:2021
17 |
18 | # install dependencies in the habitat conda environment
19 | RUN /bin/bash -c ". activate habitat; pip install torch"
20 |
21 | ADD agent.py /agent.py
22 | ADD submission.sh /submission.sh
23 | ```
24 |
25 | ### 将程序封装为Docker容器
26 | ```
27 | $ sudo docker build . --file Objectnav.Dockerfile -t objectnav_submission
28 | ```
29 |
30 | ### 准备数据集
31 | 建立软链接
32 | ```
33 | $ ln -f -s /home/skylark/datasets/habitat/data/scene_datasets/mp3d \
34 | habitat-challenge-data/data/scene_datasets/mp3d
35 | ```
36 |
37 |
38 | ### Docker容器本地评估
39 | ```
40 | $ sudo ./test_locally_objectnav_rgbd.sh --docker-name objectnav_submission
41 | ```
42 | 输出如下:
43 | ```
44 | 2021-05-04 14:51:40,617 Initializing dataset ObjectNav-v1
45 | 2021-05-04 14:51:40,638 initializing sim Sim-v0
46 | 2021-05-04 14:51:41,753 Initializing task ObjectNav-v1
47 | 2021-05-04 14:51:42,593 distance_to_goal: 6.3394588788350426
48 | 2021-05-04 14:51:42,593 success: 0.0
49 | 2021-05-04 14:51:42,593 spl: 0.0
50 | 2021-05-04 14:51:42,593 softspl: 0.01344610551192538
51 | ```
52 |
53 | ### 在线提交
54 | 安装 `EvalAI`:
55 | ```
56 | # Installing EvalAI Command Line Interface
57 | $ pip3h install "evalai>=1.3.5"
58 |
59 | # Set EvalAI account token
60 | $ evalai set_token
61 |
62 | # Push docker image to EvalAI docker registry
63 | # Objectnav
64 | $ evalai push objectnav_submission:latest --phase
65 | ```
66 |
67 | `` 可选:
68 | - **Minival phase**:与`./test_locally_{pointnav, objectnav}_rgbd.sh`一样,目的是进行完整性检查-确认我们的远程评估报告的结果与您在本地看到的结果相同。每个阶段每天最多允许100个团队提交。
69 | 我们将阻止和取消垃圾邮件服务器团队的资格。
70 | - **Test Standard phase**:目的是充当公共排行榜,以建立最先进的技术;这就是用来报告论文结果的方法。每个小组在此阶段每天最多可以提交10份意见书。
71 | - **Test Challenge phase**:用于确定挑战获胜者。到挑战赛提交阶段结束之前,每个团队总共可以提交5份报告。在CVPR的Embeded AI研讨会上宣布最终结果之前,不会公开此次拆分的结果。
72 |
73 |
74 | ## ObjectNav Baselines and DD-PPO Training Starter Code
75 | ### 安装 Habitat-sim 和Habitat-lab
76 |
77 | ### 下载数据集
78 | Dataset: https://dl.fbaipublicfiles.com/habitat/data/datasets/objectnav/m3d/v1/objectnav_mp3d_v1.zip\
79 | > you should have the train and val splits at habitat-challenge/habitat-challenge-data/data/datasets/objectnav/mp3d/v1/train/ and habitat-challenge/habitat-challenge-data/data/datasets/objectnav/mp3d/v1/val/ respectively.
80 |
81 |
82 | Scene_dataset: 软链接
83 |
84 | ### 下载Habitat2021 DDPPO预训练模型
85 | 下载模型至项目目录
86 | ```
87 | $ wget https://dl.fbaipublicfiles.com/habitat/data/baselines/v1/ddppo_objectnav_habitat2021_challenge_baseline_v1.pth
88 | ```
89 |
90 | ### 构建Docker容器
91 | ```
92 | $ sudo docker build . --file Objectnav_DDPPO_baseline.Dockerfile -t objectnav_submission
93 | ```
94 |
95 | ### 本地评估
96 | ```
97 | $ sudo ./test_locally_objectnav_rgbd.sh
98 | ```
--------------------------------------------------------------------------------
/Tools/Tools 1:Qt 转 PyQt5 的 Pycharm 插件.md:
--------------------------------------------------------------------------------
1 | #! https://zhuanlan.zhihu.com/p/259564109
2 | 
3 |
4 | # Tools Ⅰ:如何用 PyQt5 和 Qt Designer 在 Pycharm 中愉快地开发软件
5 |
6 | 转载自 [两个轮子](https://me.csdn.net/qq_40666028) 的 CSDN https://blog.csdn.net/qq_40666028/article/details/81069878
7 |
8 | 封面图自 [Link](https://www.google.com/url?sa=i&url=https%3A%2F%2Fsteamcommunity.com%2Fsharedfiles%2Ffiledetails%2F%3Fid%3D1779931748&psig=AOvVaw0ELHQO3iVoOS3hnQrxBAlv&ust=1601188068117000&source=images&cd=vfe&ved=0CAIQjRxqFwoTCJDhjsOYhuwCFQAAAAAdAAAAABAk)
9 |
10 | > 本系列是用作备忘的,毕竟有些开发小工具的设置方式我可能反复找了不下十遍了,不如自己记录一下。
11 |
12 | ## PyQt5和Qt Designer安装
13 |
14 | 先把PyQt5和Qt Designer安装了才行。
15 |
16 | **PyQt5**
17 |
18 | ```
19 | pip install PyQt5
20 | ```
21 | **Qt Designer**
22 |
23 | [这里](https://build-system.fman.io/qt-designer-download) 下载
24 |
25 |
26 | ## PyCharm中添加Qt Designer工具
27 | - 在“File—>Settings—>Tools—>External Tools”中点击“+”号,添加外部工具;
28 | - Program中填入“designer.exe”的路径,
29 | eg. `D:\ProgramData\Anaconda2\Library\bin\designer.exe`;
30 | - Working directory中填入`$FileDir$`。
31 |
32 | ## PyCharm中添加Pyuic工具
33 | - 在“File—>Settings—>Tools—>External Tools”中点击“+”号,添加外部工具;
34 | - Program中填入“python.exe”的路径,
35 | eg. `D:\ProgramData\Anaconda2\python.exe`
36 | - Arguments中填入`-m PyQt5.uic.pyuic
37 | \$FileName$ -o \$FileNameWithoutExtension$.py`;
38 | - Working directory中填入`$ProjectFileDir$`。
39 |
40 | 
41 |
42 |
43 | ## .ui 文件转 .py
44 | 对于从 Qt Designer 生成的 .ui 窗体文件,直接在pycharm中右键
45 |
46 | 
47 |
48 | 点击pyuic即可生成相应的窗体.py文件 Ui_Windows 类。
49 |
50 | 接下来就可以愉快地用 python 开发软件啦!
--------------------------------------------------------------------------------
/Tools/Tools 4:Python三行转并行——真香.md:
--------------------------------------------------------------------------------
1 | #! https://zhuanlan.zhihu.com/p/269623666
2 | 
3 | # Tools 4:Python三行转并行——真香!
4 |
5 | 俗话说,**二十核就是二十倍的快乐**。让我们先来感受一下这份加倍的快乐:
6 |
7 | 
8 |
9 | ```python
10 | import time
11 | import multiprocessing
12 |
13 |
14 | def job(x, y):
15 | """
16 | :param x:
17 | :param y:
18 | :return:
19 | """
20 | return x * y
21 |
22 | def parallel(z):
23 | """
24 | 处理多参数传参问题(实际上把参数写成元组,在job函数内再拆成
25 | x, y = param 也一样
26 | :param z:
27 | :return:
28 | """
29 | return job(z[0], z[1])
30 |
31 |
32 | if __name__ == "__main__":
33 | time1=time.time()
34 | pool = multiprocessing.Pool(2) # 参数缺省的话就是cpu全员上阵
35 | # 把本来要写成循环的参数, 做成一个list
36 | data_list=[(1,1),(2,2),(3,3),(4,4),(5,5),(6,6),(7,7),(8,8),(9,9),(10,10)]
37 | # 重点就只有下面这一行!!!
38 | # 用map函数代替循环
39 | res = pool.map(parallel, data_list)
40 | time2=time.time()
41 | print(res)
42 | pool.close()
43 | pool.join()
44 | print('总耗时:' + str(time2 - time1) + 's')
45 | ```
46 |
47 |
48 | 当我最终用上了并行之后,除了感叹真香,还流下了不学无术的眼泪。
49 |
50 | ## Reference
51 | **感谢以下两位的分享**:
52 | 1. [教你用一行Python代码实现并行](https://www.cnblogs.com/wumingxiaoyao/archive/2004/01/13/8241869.html)
53 | 2. [【python 多进程传参】pool.map() 函数传多参数](https://blog.csdn.net/u013421629/article/details/100284962)
--------------------------------------------------------------------------------
/Tools/Tools 5:Python三行转并行后续——全局变量.md:
--------------------------------------------------------------------------------
1 | #! https://zhuanlan.zhihu.com/p/273508904
2 | 
3 | # Tools 5:Python三行转并行后续——多进程全局变量
4 |
5 | ## 感谢原作者 [houyanhua1 - CSDN](https://blog.csdn.net/houyanhua1/article/details/78244288)
6 |
7 | 自从上次学会了CPU并行,我就打开了新世界的大门,什么代码都得并行一下,直到我遇到了需要多进程维护同一个字典变量的情况。
8 |
9 | 当我自信满满地写完了一个处理并保存 `dict` 到 `pickle` 的程序。结果后续的程序告诉我不能使用空的 `pickle` 的时候,我一脸问号???我可是加了 `global` 的啊???
10 |
11 | 
12 |
13 | 实际上,用 `global` 的方式让多进程共用一个变量,在一个进程中修改后,在另外的进程中并没有产生修改。
14 |
15 | 那么,就需要 `multiprocessing` 库来正式地定义一个全局变量了:
16 |
17 | 在定义线程池之前,声明全局变量
18 | ```python
19 | num=multiprocessing.Value("d",10.0) # 共享数值:d表示数值,
20 | num=multiprocessing.Array("i",[1,2,3,4,5]) # 共享数组
21 | mydict=multiprocessing.Manager().dict() # 共享字典
22 | mylist=multiprocessing.Manager().list(range(5)) # 共享 list
23 | ```
24 |
25 | 完整版如下:
26 | ## 进程之间共享数据(数值型):
27 |
28 | ```python
29 | import multiprocessing
30 |
31 | def func(num):
32 | num.value=10.78 #子进程改变数值的值,主进程跟着改变
33 |
34 | if __name__=="__main__":
35 | num=multiprocessing.Value("d",10.0) # d表示数值,主进程与子进程共享这个value。(主进程与子进程都是用的同一个value)
36 | print(num.value)
37 |
38 | p=multiprocessing.Process(target=func,args=(num,))
39 | p.start()
40 | p.join()
41 |
42 | print(num.value)
43 | ```
44 | ## 进程之间共享数据(数组型):
45 | ```python
46 | import multiprocessing
47 |
48 | def func(num):
49 | num[2]=9999 #子进程改变数组,主进程跟着改变
50 |
51 | if __name__=="__main__":
52 | num=multiprocessing.Array("i",[1,2,3,4,5]) #主进程与子进程共享这个数组
53 | print(num[:])
54 |
55 | p=multiprocessing.Process(target=func,args=(num,))
56 | p.start()
57 | p.join()
58 |
59 | print(num[:])
60 | ```
61 | ## 进程之间共享数据(dict, list):
62 | ```python
63 | import multiprocessing
64 |
65 | def func(mydict,mylist):
66 | mydict["index1"]="aaaaaa" #子进程改变dict,主进程跟着改变
67 | mydict["index2"]="bbbbbb"
68 | mylist.append(11) #子进程改变List,主进程跟着改变
69 | mylist.append(22)
70 | mylist.append(33)
71 |
72 | if __name__=="__main__":
73 | with multiprocessing.Manager() as MG: #重命名
74 | mydict=multiprocessing.Manager().dict() #主进程与子进程共享这个字典
75 | mylist=multiprocessing.Manager().list(range(5)) #主进程与子进程共享这个List
76 |
77 | p=multiprocessing.Process(target=func,args=(mydict,mylist))
78 | p.start()
79 | p.join()
80 |
81 | print(mylist)
82 | print(mydict)
83 | ```
84 |
85 | 不过,我后来想想,如果是在 `class` 里,应该可以用 `self.dict` 来更改吧。(未验证)
86 |
87 |
--------------------------------------------------------------------------------
/Tools/Tools 6:如何用Readthedoc写一份优雅的技术文档.md:
--------------------------------------------------------------------------------
1 | # Tools 6:如何用readthedocs写一份优雅的技术文档
2 |
3 | ## 注册并链接 Github
4 |
5 | ## 开始一个项目
6 |
7 | 主要就两件事:
8 | - 写一份配置文件 `conf.py`
9 | - 写一个封面页 `index.rst`
10 |
11 | ## 使用 Markdown 编写文档
12 | 由于主页是用 `.rst` 格式书写的,即使可以使用在线转换工具,由 `.md` 转化为 `.rst`,还是不如直接用 `.md` 顺手。因此,我们需要借助 `recommonmark` 包:
13 | ```
14 | pip3 install recommonmark
15 | ```
16 |
17 |
18 |
19 | ## 模块与代码的链接
20 |
21 |
22 | ## References
23 | 1. [Read the Docs 从懵逼到入门 - **推荐!**](https://blog.csdn.net/lu_embedded/article/details/109006380)
24 | 2.
--------------------------------------------------------------------------------
/Tools/Tools 7:Python颜色设置.md:
--------------------------------------------------------------------------------
1 | #! https://zhuanlan.zhihu.com/p/368668837
2 | # Tools 7:Python字体、背景、绘图颜色设置以及强迫症中文对齐
3 | > 工欲善其事,必先利其器
4 |
5 | 本文是为了备忘,整理一下常用的python颜色显示小工具。
6 |
7 | ## Python字体、背景颜色
8 | 字体、背景颜色**转发自:https://blog.csdn.net/ever_peng/article/details/91492491**
9 |
10 | 试了上面链接之后发现,输出的颜色背景不对齐。作为一个强迫症,这是不被允许的。试过了`%.10s`和`%-10s`以及`str({}).ljust(10)`都一样,最后发现问题出在中文字符上,补齐时一个中文算一个字符,打印出来却成了两个字符。所以,我又找到了下面的专治中文对齐的妙招:
11 |
12 | https://blog.csdn.net/weixin_42280517/article/details/80814677
13 |
14 | 顺便,把代码改成了方便复制的形式。
15 |
16 | ```python
17 | print("\033[1;30m{}\033[0m".format(' 字体颜色:白色'))
18 | print("\033[1;31m{}\033[0m".format(' 字体颜色:红色'))
19 | print("\033[1;32m{}\033[0m".format(' 字体颜色:深黄色'))
20 | print("\033[1;33m{}\033[0m".format(' 字体颜色:浅黄色'))
21 | print("\033[1;34m{}\033[0m".format(' 字体颜色:蓝色'))
22 | print("\033[1;35m{}\033[0m".format(' 字体颜色:淡紫色'))
23 | print("\033[1;36m{}\033[0m".format(' 字体颜色:青色'))
24 | print("\033[1;37m{}\033[0m".format(' 字体颜色:灰色'))
25 | print("\033[1;38m{}\033[0m".format(' 字体颜色:浅灰色'))
26 | print('\n')
27 | print('{0:{1}<9} \033[1;40m{2}\033[0m\n'.format('背景颜色:白色', chr(12288), ' '), end='')
28 | print('{0:{1}<9} \033[1;41m{2}\033[0m\n'.format('背景颜色:红色', chr(12288), ' '), end='')
29 | print('{0:{1}<9} \033[1;42m{2}\033[0m\n'.format('背景颜色:深黄色', chr(12288), ' '), end='')
30 | print('{0:{1}<9} \033[1;43m{2}\033[0m\n'.format('背景颜色:浅黄色', chr(12288), ' '), end='')
31 | print('{0:{1}<9} \033[1;44m{2}\033[0m\n'.format('背景颜色:蓝色', chr(12288), ' '), end='')
32 | print('{0:{1}<9} \033[1;45m{2}\033[0m\n'.format('背景颜色:淡紫色', chr(12288), ' '), end='')
33 | print('{0:{1}<9} \033[1;46m{2}\033[0m\n'.format('背景颜色:青色', chr(12288), ' '), end='')
34 | print('{0:{1}<9} \033[1;47m{2}\033[0m\n'.format('背景颜色:灰色', chr(12288), ' '), end='')
35 | ```
36 |
37 | 运行结果:
38 |
39 | 
40 |
41 |
42 | ## Matplotlib颜色表
43 |
44 | **转发自:https://finthon.com/matplotlib-color-list/**
45 |
46 | 
--------------------------------------------------------------------------------
/Tools/Tools 8:Tex符号大全.md:
--------------------------------------------------------------------------------
1 | # Tools 8:Tex公式符号大全(文字版可复制)
2 |
3 | > 工欲善其事,必先利其器
4 |
5 | 本文是为了备忘,整理一下常用的tex公式符号。
6 |
7 | 转载并完善自:http://mohu.org/info/symbols/symbols.htm
8 |
9 | 将上文链接中的图片转为方便复制的文字形式
10 |
11 | ## 数字模式重音符
12 | $\hat{a}$ \hat{a}
13 | $\check{a}$ \check{a}
14 | $\tilde{a}$ \tilde{a}
15 | $\acute{a}$ \acute{a}
16 | $\grave{a}$ \grave{a}
17 | $\dot{a}$ \dot{a}
18 | $\ddot{a}$ \ddot{a}
19 | $\breve{a}$ \breve{a}
20 | $\bar{a}$ \bar{a}
21 | $\vec{a}$ \vec{a}
22 | $\widehat{A}$ \widehat{A}
23 | $\widetilde{A}$ \widetilde{A}
24 |
25 | ## 希腊字母
26 | $\alpha$ \alpha
27 |
--------------------------------------------------------------------------------
/Tools/Tools 9:Zotero使用指南.md:
--------------------------------------------------------------------------------
1 | # Zotero 使用指南
2 | ## 天下苦 Mendeley 久矣
3 | Mendeley 不仅同步慢、文件管理混乱、界面差劲,今年更是直接断供了移动端的api。
4 |
5 | ## 还在用 Mendeley?不如试试 Zotero
6 | 
7 |
8 | 下载最新的[beta版](https://www.zotero.org/support/dev_builds),可以体验到丝滑的 mendeley 导入和内置 pdf viewer。
9 |
10 | ### 从 Mendeley 完整导入到 Zotero
11 |
--------------------------------------------------------------------------------
/Tools/Ubuntu/Ubuntu系统问题.md:
--------------------------------------------------------------------------------
1 | # Ubuntu 系统问题集锦
2 |
3 | [[TOC]]
4 | ### 显卡驱动安装
5 | **解决方案**
6 | ```
7 | sudo apt-get purge nvidia*
8 |
9 | sudo ubuntu-drivers autoinstall
10 | ```
11 | 重启机器
12 | ```
13 | nvidia-smi
14 | ```
15 |
16 |
17 |
18 | ### 执行 apt-get install 报错Errors were encountered while processing:
19 |
20 | **踩坑记录**
21 | 
22 |
23 | **解决方案**
24 | ```
25 | # 将info文件夹更名
26 | sudo mv /var/lib/dpkg/info /var/lib/dpkg/info.bk
27 | # 新建一个新的info文件夹
28 | sudo mkdir /var/lib/dpkg/info
29 |
30 | # 安装修复
31 | sudo apt-get update
32 | sudo apt-get install -f
33 |
34 | # 上一步操作在info文件夹下生成一些文件,现将这些文件全部移到info.bk文件夹下
35 | sudo mv /var/lib/dpkg/info/* /var/lib/dpkg/info.bk
36 | # 把新建的info文件夹删掉
37 | sudo rm -rf /var/lib/dpkg/info
38 | # 恢复原有info文件夹,修改名字
39 | sudo mv /var/lib/dpkg/info.bk /var/lib/dpkg/info
40 |
41 | sudo apt-get -o Dpkg::Options::="--force-overwrite" install locales
42 | ```
43 |
44 | **Reference**\
45 | https://blog.csdn.net/qq_42103502/article/details/105808323
--------------------------------------------------------------------------------
/Tools/color.py:
--------------------------------------------------------------------------------
1 | print("\033[1;30m{}\033[0m".format(' 字体颜色:白色'))
2 | print("\033[1;31m{}\033[0m".format(' 字体颜色:红色'))
3 | print("\033[1;32m{}\033[0m".format(' 字体颜色:深黄色'))
4 | print("\033[1;33m{}\033[0m".format(' 字体颜色:浅黄色'))
5 | print("\033[1;34m{}\033[0m".format(' 字体颜色:蓝色'))
6 | print("\033[1;35m{}\033[0m".format(' 字体颜色:淡紫色'))
7 | print("\033[1;36m{}\033[0m".format(' 字体颜色:青色'))
8 | print("\033[1;37m{}\033[0m".format(' 字体颜色:灰色'))
9 | print("\033[1;38m{}\033[0m".format(' 字体颜色:浅灰色'))
10 | print('\n')
11 | print('{0:{1}<9} \033[1;40m \033[0m\n'.format('背景颜色:白色', chr(12288)), end='')
12 | print('{0:{1}<9} \033[1;41m \033[0m\n'.format('背景颜色:红色', chr(12288)), end='')
13 | print('{0:{1}<9} \033[1;42m \033[0m\n'.format('背景颜色:深黄色', chr(12288)), end='')
14 | print('{0:{1}<9} \033[1;43m \033[0m\n'.format('背景颜色:浅黄色', chr(12288)), end='')
15 | print('{0:{1}<9} \033[1;44m \033[0m\n'.format('背景颜色:蓝色', chr(12288)), end='')
16 | print('{0:{1}<9} \033[1;45m \033[0m\n'.format('背景颜色:淡紫色', chr(12288)), end='')
17 | print('{0:{1}<9} \033[1;46m \033[0m\n'.format('背景颜色:青色', chr(12288)), end='')
18 | print('{0:{1}<9} \033[1;47m \033[0m\n'.format('背景颜色:灰色', chr(12288)), end='')
19 | print('{0:{1}<9} \033[1;48m \033[0m\n'.format('背景颜色:浅灰色', chr(12288)), end='')
--------------------------------------------------------------------------------
/Utils/HTML2PDF.py:
--------------------------------------------------------------------------------
1 | # coding = UTF-8
2 | # NeruIPS 2019 论文爬取
3 |
4 | import urllib.request
5 | import re
6 | import os
7 |
8 |
9 | # open the url and read
10 | def getHtml(url):
11 | page = urllib.request.urlopen(url)
12 | html = page.read()
13 | page.close()
14 | return html
15 |
16 |
17 | # compile the regular expressions and find
18 | # all stuff we need
19 | def getUrl(html):
20 | reg = r'\"/paper/.*?"'
21 | url_re = re.compile(reg)
22 | url_lst = url_re.findall(html.decode('UTF-8'))
23 | return (url_lst)
24 |
25 |
26 | def getFile(url):
27 | file_name = url.split('/')[-1]
28 | u = urllib.request.urlopen(url)
29 | f = open(file_name, 'wb')
30 |
31 | block_sz = 8192
32 | while True:
33 | buffer = u.read(block_sz)
34 | if not buffer:
35 | break
36 |
37 | f.write(buffer)
38 | f.close()
39 | print("Sucessful to download" + " " + file_name)
40 |
41 |
42 | root_url = 'http://papers.nips.cc'
43 |
44 | raw_url = 'http://papers.nips.cc/book/advances-in-neural-information-processing-systems-32-2019'
45 |
46 | html = getHtml(raw_url)
47 | url_lst = getUrl(html)
48 |
49 | root_dir = '/home/skylark/PycharmRemote/ldf_download'
50 | if not os.path.exists(root_dir):
51 | os.mkdir('/home/skylark/PycharmRemote/ldf_download')
52 | os.chdir('/home/skylark/PycharmRemote/ldf_download')
53 |
54 | for url in url_lst[:]:
55 | url = root_url + url.split('"')[1] + '.pdf'
56 | getFile(url)
57 |
--------------------------------------------------------------------------------
/Utils/PDFselector.py:
--------------------------------------------------------------------------------
1 | import os
2 | import shutil
3 |
4 | root_dir = '/home/skylark/PycharmRemote/ldf_download'
5 | new_dir = '/home/skylark/PycharmRemote/NeurIPS'
6 | if not os.path.exists(new_dir):
7 | os.mkdir(new_dir)
8 |
9 | files = os.listdir(root_dir)
10 |
11 | keywords = ['einforcement', 'obot', 'RL', 'agent', 'Meta', 'policy', 'actor']
12 |
13 | for file in files:
14 | if any(keyword in file for keyword in keywords):
15 | shutil.copy(os.path.join(root_dir, file), new_dir)
16 |
--------------------------------------------------------------------------------
/Utils/basic_plot.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import matplotlib.pyplot as plt
3 | import pandas as pd
4 |
5 |
6 | def plot_mean_std(data):
7 | mean_lst = []
8 | std_lst = []
9 |
10 | dim_num = len(data)
11 |
12 | for i in range(dim_num):
13 | data_i = data[:i + 1]
14 | mean_lst.append(data_i.mean())
15 | std_lst.append(data_i.std())
16 |
17 | y_upper = np.array(mean_lst) - np.array(std_lst)
18 | y_lower = np.array(mean_lst) + np.array(std_lst)
19 |
20 | x = np.linspace(1, dim_num, dim_num)
21 | fig, ax = plt.subplots(figsize=(10, 5))
22 |
23 | plt.plot(x, mean_lst, c='r')
24 | ax.fill_between(x, y_upper, y_lower, alpha=0.3, color='red')
25 | plt.xlabel('Num of Episodes')
26 | plt.ylabel('Reward')
27 | # plt.ylim(0, 1000)
28 |
--------------------------------------------------------------------------------
/img/image-20230825121432059.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Skylark0924/Reinforcement-Learning-in-Robotics/a1abcf4442a9d8869d7c7d0068399fb908559642/img/image-20230825121432059.png
--------------------------------------------------------------------------------