├── .gitignore ├── Cython ├── MapleSim │ └── 2LinkArm │ │ ├── c2LinkArm.cpp │ │ ├── c2LinkArm_run.cpp │ │ ├── mplshlib.h │ │ ├── mpltable.h │ │ ├── py2LinkArm.cpp │ │ ├── py2LinkArm.pyx │ │ ├── runMatplotlib.py │ │ ├── runPyglet.py │ │ └── setup.py ├── SimpleRNG │ ├── SimpleRNG.cpp │ ├── SimpleRNG.h │ ├── SimpleRNG.so │ ├── pySimpleRNG.cpp │ ├── pySimpleRNG.pyx │ └── setup.py ├── Square │ ├── square.c │ ├── square.o │ ├── square.so │ └── test.py └── Test │ ├── cpp_test.cpp │ ├── cpp_test.h │ ├── setup.py │ ├── test.cpp │ ├── test.cpython-35m-x86_64-linux-gnu.so │ └── test.pyx ├── InvKin ├── Arm.py └── ArmPlot.py ├── LICENSE ├── Nengo scripting ├── Nengo 1.4 │ ├── absolute_value.py │ └── low_pass_derivative_filter.py └── Nengo 2 │ ├── arm_control │ ├── Arm.py │ ├── nengo_arm.py │ └── nengo_arm.py.cfg │ └── discrete_filter │ └── point_attractor.py ├── NengoDL ├── Converting your Keras model into a spiking neural network with NengoDL.ipynb ├── mnist_params.npz └── mnist_params_with_fr_loss.npz ├── PyGame ├── arm │ ├── arm1.py │ ├── arm3.py │ ├── arm3wlines.py │ ├── armpart.py │ ├── forearm.png │ ├── hand.png │ └── upperarm.png └── rotations │ ├── pic.png │ ├── rotations-bad-1.py │ ├── rotations-bad-2.py │ ├── rotations-bad-3.py │ └── rotations-good.py ├── README.rst ├── RL ├── Cat vs Mouse exploration │ ├── barrier2.txt │ ├── cellular.py │ ├── egoMouseLook.py │ ├── qlearn.py │ ├── qlearn_mod_random.py │ ├── temp.ppm │ └── waco.txt ├── Egocentric │ ├── cellular.py │ ├── cliffs.txt │ ├── egoCliffBasic.py │ ├── mouse.py │ └── qlearn.py ├── SARSA vs Qlearn cliff │ ├── cellular.py │ ├── cliff.txt │ ├── cliff_Q.py │ ├── cliff_S.py │ ├── grid.txt │ ├── mouse.py │ ├── qlearn.py │ └── sarsa.py └── combination allo and ego │ ├── cellular.py │ ├── egoalloBasic.py │ ├── mouse.py │ ├── qlearn.py │ └── worlds │ ├── barrier.txt │ ├── barrier2.txt │ ├── barrier3.txt │ ├── bigmap.txt │ ├── cliff.txt │ └── cliffs.txt ├── SymPy ├── UR5.ttt ├── remoteApi.so ├── test_timing.py ├── test_timing2.py ├── ur5.py ├── ur5_config │ ├── EE.J │ ├── EE.T │ ├── Mq │ ├── Mq_g │ ├── joint0.T │ ├── joint1.T │ ├── joint2.T │ ├── joint3.T │ ├── joint4.T │ ├── joint5.T │ ├── link0.J │ ├── link0.T │ ├── link1.J │ ├── link1.T │ ├── link1.T_inv │ ├── link2.J │ ├── link2.T │ ├── link2.T_inv │ ├── link3.J │ ├── link3.T │ ├── link3.T_inv │ ├── link4.J │ ├── link4.T │ ├── link4.T_inv │ ├── link5.J │ ├── link5.T │ ├── link6.J │ └── link6.T ├── ur5_ctrl.py ├── vrep.py └── vrepConst.py ├── VE026a └── Academic Robot Leaflet.pdf ├── VREP ├── pendulum_gravity │ ├── quad_arm_2.ttt │ ├── quad_arm_2_ctrl.py │ ├── remoteApi.so │ ├── vrep.py │ └── vrepConst.py └── two_link_arm │ ├── __init__.py │ ├── remoteApi.so │ ├── vrep.py │ ├── vrepConst.py │ ├── vrep_twolink.ttt │ ├── vrep_twolink_controller.py │ └── vrep_twolink_nengo.ttt ├── orientation ├── UR5.ttt ├── orientation_control.py └── position_and_orientation_control.py ├── tensorflow_models └── npg_cartpole │ ├── natural_policy_gradient.py │ ├── plot_data.py │ └── policy_gradient.py ├── tracking_control ├── tracking_control1.py ├── tracking_control2.py ├── tracking_control3.py ├── tracking_control4.py ├── tracking_control5.py ├── tracking_control6.py ├── tracking_control7.py └── tracking_control8.py └── train_AHF ├── gen_animation_plots.py ├── plot_error.py └── train_hf.py /.gitignore: -------------------------------------------------------------------------------- 1 | *.pyc 2 | .DS_Store 3 | _build 4 | build 5 | dist 6 | *.egg-info 7 | *~ 8 | *.bak 9 | *.swp 10 | -------------------------------------------------------------------------------- /Cython/MapleSim/2LinkArm/py2LinkArm.pyx: -------------------------------------------------------------------------------- 1 | # Written by Travis DeWolf (May, 2013) 2 | import numpy as np 3 | cimport numpy as np 4 | 5 | cdef extern from "c2LinkArm.cpp": 6 | cdef cppclass Sim: 7 | Sim(double dt, double* params) 8 | void reset(double* out, double* ic) 9 | void step(double* out, double* u) 10 | 11 | cdef class pySim: 12 | cdef Sim* thisptr 13 | def __cinit__(self, double dt = .00001, 14 | np.ndarray[double, mode="c"] params=None): 15 | """ 16 | param float dt: simulation timestep, must be < 1e-5 17 | param array params: MapleSim model internal parameters 18 | """ 19 | if params: self.thisptr = new Sim(dt, ¶ms[0]) 20 | else: self.thisptr = new Sim(dt, NULL) 21 | 22 | def __dealloc__(self): 23 | del self.thisptr 24 | 25 | def reset(self, np.ndarray[double, mode="c"] out, 26 | np.ndarray[double, mode="c"] ic=None): 27 | """ 28 | Reset the state of the simulation. 29 | param np.ndarray out: where to store the system output 30 | NOTE: output is of form [time, output] 31 | param np.ndarray ic: the initial conditions of the system 32 | """ 33 | if ic: self.thisptr.reset(&out[0], &ic[0]) 34 | else: self.thisptr.reset(&out[0], NULL) 35 | 36 | def step(self, np.ndarray[double, mode="c"] out, 37 | np.ndarray[double, mode="c"] u): 38 | """ 39 | Step the simulation forward one timestep. 40 | param np.ndarray out: where to store the system output 41 | NOTE: output is of form [time, output] 42 | param np.ndarray u: the control signal 43 | """ 44 | self.thisptr.step(&out[0], &u[0]) 45 | 46 | -------------------------------------------------------------------------------- /Cython/MapleSim/2LinkArm/runMatplotlib.py: -------------------------------------------------------------------------------- 1 | #Written by Travis DeWolf (Sept, 2013) 2 | #Based on code by Jake Vanderplas - http://jakevdp.github.com 3 | import numpy as np 4 | import matplotlib.pyplot as plt 5 | import matplotlib.animation as animation 6 | import py2LinkArm 7 | 8 | class TwoLinkArm: 9 | """ 10 | :param list u: the torque applied to each joints 11 | """ 12 | def __init__(self, u = [.1, 0]): 13 | self.u = np.asarray(u, dtype='float') # control signal 14 | self.state = np.zeros(3) # vector for current state 15 | self.L1=0.37 # length of arm link 1 in m 16 | self.L2=0.27 # length of arm link 2 in m 17 | self.time_elapsed = 0 18 | 19 | self.sim = py2LinkArm.pySim() 20 | self.sim.reset(self.state) 21 | 22 | def position(self): 23 | """Compute x,y position of the hand""" 24 | 25 | x = np.cumsum([0, 26 | self.L1 * np.cos(self.state[1]), 27 | self.L2 * np.cos(self.state[2])]) 28 | y = np.cumsum([0, 29 | self.L1 * np.sin(self.state[1]), 30 | self.L2 * np.sin(self.state[2])]) 31 | return (x, y) 32 | 33 | def step(self, dt): 34 | """Simulate the system and update the state""" 35 | for i in range(1500): 36 | self.sim.step(self.state, self.u) 37 | self.time_elapsed = self.state[0] 38 | 39 | #------------------------------------------------------------ 40 | # set up initial state and global variables 41 | arm = TwoLinkArm() 42 | dt = 1./30 # 30 fps 43 | 44 | #------------------------------------------------------------ 45 | # set up figure and animation 46 | fig = plt.figure(figsize=(4,4)) 47 | ax = fig.add_subplot(111, aspect='equal', autoscale_on=False, 48 | xlim=(-1, 1), ylim=(-1, 1)) 49 | ax.grid() 50 | 51 | line, = ax.plot([], [], 'o-', lw=4, mew=5) 52 | time_text = ax.text(0.02, 0.95, '', transform=ax.transAxes) 53 | 54 | def init(): 55 | """initialize animation""" 56 | line.set_data([], []) 57 | time_text.set_text('') 58 | return line, time_text 59 | 60 | def animate(i): 61 | """perform animation step""" 62 | global arm, dt 63 | arm.step(dt) 64 | 65 | line.set_data(*arm.position()) 66 | time_text.set_text('time = %.2f' % arm.time_elapsed) 67 | return line, time_text 68 | 69 | # frames=None for matplotlib 1.3 70 | ani = animation.FuncAnimation(fig, animate, frames=None, 71 | interval=25, blit=True, init_func=init) 72 | 73 | # uncomment the following line to save the video in mp4 format. This 74 | # requires either mencoder or ffmpeg to be installed on your system 75 | #ani.save('2linkarm.mp4', fps=15, extra_args=['-vcodec', 'libx264']) 76 | 77 | plt.show() 78 | -------------------------------------------------------------------------------- /Cython/MapleSim/2LinkArm/runPyglet.py: -------------------------------------------------------------------------------- 1 | # Written by Travis DeWolf (May, 2013) 2 | import numpy as np 3 | import pyglet 4 | import time 5 | 6 | import py2LinkArm 7 | 8 | def run(): 9 | """Runs and plots a 2 link arm simulation generated by MapleSim.""" 10 | 11 | # set up input control signal to simulation 12 | u = np.ones(2) * .1 13 | # set up array to receive output from simulation 14 | out = np.zeros(3) 15 | 16 | # create MapleSim sim class 17 | sim = py2LinkArm.pySim() 18 | # set up initial conditions of the system 19 | sim.reset(out) 20 | 21 | ###### Plotting things 22 | 23 | # segment lengths = (.31, .27)m (defined in MapleSim) 24 | L = np.array([31, 27]) * 3 25 | 26 | # make our window for drawin' 27 | window = pyglet.window.Window() 28 | 29 | # set up some labels to display time 30 | label_time = pyglet.text.Label('time', font_name='Times New Roman', 31 | font_size=36, x=window.width//2, y=window.height - 50, 32 | anchor_x='center', anchor_y='center') 33 | # and joint angles 34 | label_values = pyglet.text.Label('values', font_name='Times New Roman', 35 | font_size=36, x=window.width//2, y=window.height - 100, 36 | anchor_x='center', anchor_y='center') 37 | 38 | def update_arm(dt): 39 | """ Simulate the arm ahead a chunk of time, then find the 40 | (x,y) coordinates of each joint, and update labels.""" 41 | 42 | # simulate arm forward 15ms 43 | for i in range(1000): 44 | sim.step(out, u) 45 | 46 | # find new joint (x,y) coordinates, offset to center of screen-ish 47 | x = np.array([ 0, 48 | L[0]*np.cos(out[1]), 49 | L[0]*np.cos(out[1]) + L[1]*np.cos(out[1]+out[2])]) + window.width/2 50 | y = np.array([ 0, 51 | L[0]*np.sin(out[1]), 52 | L[0]*np.sin(out[1]) + L[1]*np.sin(out[1]+out[2])]) + 100 53 | 54 | # update line endpoint positions for drawing 55 | window.jps = np.array([x, y]).astype('int') 56 | label_time.text = 'time: %.3f'%out[0] 57 | label_values.text = '(01, 02) = (%.2f, %.2f)'%(out[1], out[2]) 58 | 59 | update_arm(0) # call once to set up 60 | 61 | @window.event 62 | def on_draw(): 63 | window.clear() 64 | label_time.draw() 65 | label_values.draw() 66 | for i in range(2): 67 | pyglet.graphics.draw(2, pyglet.gl.GL_LINES, ('v2i', 68 | (window.jps[0][i], window.jps[1][i], 69 | window.jps[0][i+1], window.jps[1][i+1]))) 70 | 71 | # set up update_arm function to be called super often 72 | pyglet.clock.schedule_interval(update_arm, 0.00001) 73 | pyglet.app.run() 74 | 75 | run() 76 | -------------------------------------------------------------------------------- /Cython/MapleSim/2LinkArm/setup.py: -------------------------------------------------------------------------------- 1 | # Written by Travis DeWolf (May, 2013) 2 | from distutils.core import setup 3 | from distutils.extension import Extension 4 | from Cython.Distutils import build_ext 5 | 6 | setup( 7 | cmdclass = {'build_ext': build_ext}, 8 | ext_modules=[Extension("py2LinkArm", 9 | sources=["py2LinkArm.pyx"], 10 | language="c++"),], 11 | ) 12 | -------------------------------------------------------------------------------- /Cython/SimpleRNG/SimpleRNG.h: -------------------------------------------------------------------------------- 1 | #ifndef SIMPLERNG_H 2 | #define SIMPLERNG_H 3 | 4 | // A simple random number generator based on George Marsaglia's MWC (Multiply With Carry) generator. 5 | // This is not intended to take the place of the library's primary generator, Mersenne Twister. 6 | // Its primary benefit is that it is simple to extract its state. 7 | 8 | class SimpleRNG 9 | { 10 | public: 11 | 12 | SimpleRNG(); 13 | 14 | // Seed the random number generator 15 | void SetState(unsigned int u, unsigned int v); 16 | 17 | // Extract the internal state of the generator 18 | void GetState(unsigned int& u, unsigned int& v); 19 | 20 | // A uniform random sample from the open interval (0, 1) 21 | double GetUniform(); 22 | 23 | // A uniform random sample from the set of unsigned integers 24 | unsigned int GetUint(); 25 | 26 | // This stateless version makes it more convenient to get a uniform 27 | // random value and transfer the state in and out in one operation. 28 | double GetUniform(unsigned int& u, unsigned int& v); 29 | 30 | // This stateless version makes it more convenient to get a random unsigned integer 31 | // and transfer the state in and out in one operation. 32 | unsigned int GetUint(unsigned int& u, unsigned int& v); 33 | 34 | // Normal (Gaussian) random sample 35 | double GetNormal(double mean, double standardDeviation); 36 | 37 | // Exponential random sample 38 | double GetExponential(double mean); 39 | 40 | // Gamma random sample 41 | double GetGamma(double shape, double scale); 42 | 43 | // Chi-square sample 44 | double GetChiSquare(double degreesOfFreedom); 45 | 46 | // Inverse-gamma sample 47 | double GetInverseGamma(double shape, double scale); 48 | 49 | // Weibull sample 50 | double GetWeibull(double shape, double scale); 51 | 52 | // Cauchy sample 53 | double GetCauchy(double median, double scale); 54 | 55 | // Student-t sample 56 | double GetStudentT(double degreesOfFreedom); 57 | 58 | // The Laplace distribution is also known as the double exponential distribution. 59 | double GetLaplace(double mean, double scale); 60 | 61 | // Log-normal sample 62 | double GetLogNormal(double mu, double sigma); 63 | 64 | // Beta sample 65 | double GetBeta(double a, double b); 66 | 67 | // Poisson sample 68 | int GetPoisson(double lambda); 69 | 70 | private: 71 | unsigned int m_u, m_v; 72 | int PoissonLarge(double lambda); 73 | int PoissonSmall(double lambda); 74 | double LogFactorial(int n); 75 | }; 76 | 77 | 78 | #endif -------------------------------------------------------------------------------- /Cython/SimpleRNG/SimpleRNG.so: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/studywolf/blog/62fc3d3aaac242bde5a0643e16aee03a6a061380/Cython/SimpleRNG/SimpleRNG.so -------------------------------------------------------------------------------- /Cython/SimpleRNG/pySimpleRNG.pyx: -------------------------------------------------------------------------------- 1 | cdef extern from "SimpleRNG.h": 2 | cdef cppclass SimpleRNG: 3 | SimpleRNG() 4 | 5 | # Seed the random number generator 6 | void SetState(unsigned int u, unsigned int v) 7 | 8 | # A uniform random sample from the open interval (0, 1) 9 | double GetUniform() 10 | 11 | # A uniform random sample from the set of unsigned integers 12 | unsigned int GetUint() 13 | 14 | # Normal (Gaussian) random sample 15 | double GetNormal(double mean, double standardDeviation) 16 | 17 | # Exponential random sample 18 | double GetExponential(double mean) 19 | 20 | # Gamma random sample 21 | double GetGamma(double shape, double scale) 22 | 23 | # Chi-square sample 24 | double GetChiSquare(double degreesOfFreedom) 25 | 26 | # Inverse-gamma sample 27 | double GetInverseGamma(double shape, double scale) 28 | 29 | # Weibull sample 30 | double GetWeibull(double shape, double scale) 31 | 32 | # Cauchy sample 33 | double GetCauchy(double median, double scale) 34 | 35 | # Student-t sample 36 | double GetStudentT(double degreesOfFreedom) 37 | 38 | # The Laplace distribution is also known as the double exponential distribution. 39 | double GetLaplace(double mean, double scale) 40 | 41 | # Log-normal sample 42 | double GetLogNormal(double mu, double sigma) 43 | 44 | # Beta sample 45 | double GetBeta(double a, double b) 46 | 47 | # Poisson sample 48 | int GetPoisson(double lam) 49 | 50 | cdef class pySimpleRNG: 51 | cdef SimpleRNG* thisptr # hold a C++ instance 52 | def __cinit__(self): 53 | self.thisptr = new SimpleRNG() 54 | def __dealloc__(self): 55 | del self.thisptr 56 | 57 | # Seed the random number generator 58 | def SetState(self, unsigned int u, unsigned int v): 59 | self.thisptr.SetState(u, v) 60 | 61 | # A uniform random sample from the open interval (0, 1) 62 | def GetUniform(self): 63 | return self.thisptr.GetUniform() 64 | 65 | # A uniform random sample from the set of unsigned integers 66 | def GetUint(self): 67 | return self.thisptr.GetUint() 68 | 69 | # Normal (Gaussian) random sample 70 | def GetNormal(self, double mean=0, double std_dev=1): 71 | return self.thisptr.GetNormal(mean, std_dev) 72 | 73 | # Exponential random sample 74 | def GetExponential(self, double mean): 75 | return self.thisptr.GetExponential(mean) 76 | 77 | # Gamma random sample 78 | def GetGamma(self, double shape, double scale): 79 | return self.thisptr.GetGamma(shape, scale) 80 | 81 | # Chi-square sample 82 | def GetChiSquare(self, double degreesOfFreedom): 83 | return self.thisptr.GetChiSquare(degreesOfFreedom) 84 | 85 | # Inverse-gamma sample 86 | def GetInverseGamma(self, double shape, double scale): 87 | return self.thisptr.GetInverseGamma(shape, scale) 88 | 89 | # Weibull sample 90 | def GetWeibull(self, double shape, double scale): 91 | return self.thisptr.GetWeibull(shape, scale) 92 | 93 | # Cauchy sample 94 | def GetCauchy(self, double median, double scale): 95 | return self.thisptr.GetCauchy(median, scale) 96 | 97 | # Student-t sample 98 | def GetStudentT(self, double degreesOfFreedom): 99 | return self.thisptr.GetStudentT(degreesOfFreedom) 100 | 101 | # The Laplace distribution is also known as the double exponential distribution. 102 | def GetLaplace(self, double mean, double scale): 103 | return self.thisptr.GetLaplace(mean, scale) 104 | 105 | # Log-normal sample 106 | def GetLogNormal(self, double mu, double sigma): 107 | return self.thisptr.GetLogNormal(mu, sigma) 108 | 109 | # Beta sample 110 | def GetBeta(self, double a, double b): 111 | return self.thisptr.GetBeta(a, b) 112 | 113 | # Poisson sample 114 | def GetPoisson(self, double lam): 115 | return self.thisptr.GetPoisson(lam) 116 | -------------------------------------------------------------------------------- /Cython/SimpleRNG/setup.py: -------------------------------------------------------------------------------- 1 | from distutils.core import setup 2 | from distutils.extension import Extension 3 | from Cython.Distutils import build_ext 4 | 5 | setup( 6 | name = 'SimpleRNG', 7 | ext_modules=[ 8 | Extension("SimpleRNG", 9 | sources=["pySimpleRNG.pyx", "SimpleRNG.cpp"], # Note, you can link against a c++ library instead of including the source 10 | language="c++"), 11 | ], 12 | cmdclass = {'build_ext': build_ext}, 13 | 14 | ) 15 | -------------------------------------------------------------------------------- /Cython/Square/square.c: -------------------------------------------------------------------------------- 1 | void square(double* array, int n) { 2 | int i; 3 | for (i=0; itest1 = test1; 9 | } 10 | 11 | Test::~Test() { } 12 | 13 | int Test::returnFour() { return 4; } 14 | 15 | int Test::returnFive() { return returnFour() + 1; } 16 | 17 | Test Test::operator+(const Test& other) { 18 | return Test(test1 + other.test1); 19 | } 20 | 21 | Test Test::operator-(const Test& other) { 22 | return Test(test1 - other.test1); 23 | } 24 | -------------------------------------------------------------------------------- /Cython/Test/cpp_test.h: -------------------------------------------------------------------------------- 1 | #ifndef TEST_H 2 | #define TEST_H 3 | 4 | class Test { 5 | public: 6 | int test1; 7 | Test(); 8 | Test(int test1); 9 | ~Test(); 10 | int returnFour(); 11 | int returnFive(); 12 | Test operator+(const Test& other); 13 | Test operator-(const Test& other); 14 | }; 15 | #endif 16 | -------------------------------------------------------------------------------- /Cython/Test/setup.py: -------------------------------------------------------------------------------- 1 | from distutils.core import setup 2 | from distutils.extension import Extension 3 | from Cython.Distutils import build_ext 4 | 5 | setup( 6 | name = 'Demos', 7 | ext_modules=[ 8 | Extension("test", 9 | # Note, you can link against a c++ library 10 | # instead of including the source 11 | sources=["test.pyx", "cpp_test.cpp"], 12 | language="c++"), 13 | ], 14 | cmdclass = {'build_ext': build_ext}, 15 | ) 16 | -------------------------------------------------------------------------------- /Cython/Test/test.cpython-35m-x86_64-linux-gnu.so: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/studywolf/blog/62fc3d3aaac242bde5a0643e16aee03a6a061380/Cython/Test/test.cpython-35m-x86_64-linux-gnu.so -------------------------------------------------------------------------------- /Cython/Test/test.pyx: -------------------------------------------------------------------------------- 1 | cdef extern from "cpp_test.h": 2 | cdef cppclass Test: 3 | Test() 4 | Test(int test1) 5 | int test1 6 | int returnFive() 7 | Test add "operator+"(Test other) 8 | Test sub "operator-"(Test other) 9 | 10 | cdef class pyTest: 11 | cdef Test* thisptr # hold a C++ instance 12 | def __cinit__(self, int test1): 13 | self.thisptr = new Test(test1) 14 | def __dealloc__(self): 15 | del self.thisptr 16 | 17 | def __add__(pyTest left, pyTest other): 18 | cdef Test t = left.thisptr.add(other.thisptr[0]) 19 | cdef pyTest tt = pyTest(t.test1) 20 | return tt 21 | def __sub__(pyTest left, pyTest other): 22 | cdef Test t = left.thisptr.sub(other.thisptr[0]) 23 | cdef pyTest tt = pyTest(t.test1) 24 | return tt 25 | 26 | def __repr__(self): 27 | return "pyTest[%s]" % (self.thisptr.test1) 28 | 29 | def returnFive(self): 30 | return self.thisptr.returnFive() 31 | 32 | def printMe(self): 33 | return "hello world" 34 | -------------------------------------------------------------------------------- /InvKin/Arm.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Copyright (C) 2013 Travis DeWolf 3 | 4 | This program is free software: you can redistribute it and/or modify 5 | it under the terms of the GNU General Public License as published by 6 | the Free Software Foundation, either version 3 of the License, or 7 | (at your option) any later version. 8 | 9 | This program is distributed in the hope that it will be useful, 10 | but WITHOUT ANY WARRANTY; without even the implied warranty of 11 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 | GNU General Public License for more details. 13 | 14 | You should have received a copy of the GNU General Public License 15 | along with this program. If not, see . 16 | ''' 17 | 18 | import numpy as np 19 | import scipy.optimize 20 | 21 | 22 | class Arm3Link: 23 | 24 | def __init__(self, q=None, q0=None, L=None): 25 | """Set up the basic parameters of the arm. 26 | All lists are in order [shoulder, elbow, wrist]. 27 | 28 | q : np.array 29 | the initial joint angles of the arm 30 | q0 : np.array 31 | the default (resting state) joint configuration 32 | L : np.array 33 | the arm segment lengths 34 | """ 35 | # initial joint angles 36 | self.q = [.3, .3, 0] if q is None else q 37 | # some default arm positions 38 | self.q0 = np.array([np.pi/4, np.pi/4, np.pi/4]) if q0 is None else q0 39 | # arm segment lengths 40 | self.L = np.array([1, 1, 1]) if L is None else L 41 | 42 | self.max_angles = [np.pi, np.pi, np.pi/4] 43 | self.min_angles = [0, 0, -np.pi/4] 44 | 45 | def get_xy(self, q=None): 46 | """Returns the corresponding hand xy coordinates for 47 | a given set of joint angle values [shoulder, elbow, wrist], 48 | and the above defined arm segment lengths, L 49 | 50 | q : np.array 51 | the list of current joint angles 52 | 53 | returns : list 54 | the [x,y] position of the arm 55 | """ 56 | if q is None: 57 | q = self.q 58 | 59 | x = self.L[0]*np.cos(q[0]) + \ 60 | self.L[1]*np.cos(q[0]+q[1]) + \ 61 | self.L[2]*np.cos(np.sum(q)) 62 | 63 | y = self.L[0]*np.sin(q[0]) + \ 64 | self.L[1]*np.sin(q[0]+q[1]) + \ 65 | self.L[2]*np.sin(np.sum(q)) 66 | 67 | return [x, y] 68 | 69 | def inv_kin(self, xy): 70 | """This is just a quick write up to find the inverse kinematics 71 | for a 3-link arm, using the SciPy optimize package minimization 72 | function. 73 | 74 | Given an (x,y) position of the hand, return a set of joint angles (q) 75 | using constraint based minimization, constraint is to match hand (x,y), 76 | minimize the distance of each joint from it's default position (q0). 77 | 78 | xy : tuple 79 | the desired xy position of the arm 80 | 81 | returns : list 82 | the optimal [shoulder, elbow, wrist] angle configuration 83 | """ 84 | 85 | def distance_to_default(q, *args): 86 | """Objective function to minimize 87 | Calculates the euclidean distance through joint space to the 88 | default arm configuration. The weight list allows the penalty of 89 | each joint being away from the resting position to be scaled 90 | differently, such that the arm tries to stay closer to resting 91 | state more for higher weighted joints than those with a lower 92 | weight. 93 | 94 | q : np.array 95 | the list of current joint angles 96 | 97 | returns : scalar 98 | euclidean distance to the default arm position 99 | """ 100 | # weights found with trial and error, 101 | # get some wrist bend, but not much 102 | weight = [1, 1, 1.3] 103 | return np.sqrt(np.sum([(qi - q0i)**2 * wi 104 | for qi, q0i, wi in zip(q, self.q0, weight)])) 105 | 106 | def x_constraint(q, xy): 107 | """Returns the corresponding hand xy coordinates for 108 | a given set of joint angle values [shoulder, elbow, wrist], 109 | and the above defined arm segment lengths, L 110 | 111 | q : np.array 112 | the list of current joint angles 113 | xy : np.array 114 | current xy position (not used) 115 | 116 | returns : np.array 117 | the difference between current and desired x position 118 | """ 119 | x = (self.L[0]*np.cos(q[0]) + self.L[1]*np.cos(q[0]+q[1]) + 120 | self.L[2]*np.cos(np.sum(q))) - xy[0] 121 | return x 122 | 123 | def y_constraint(q, xy): 124 | """Returns the corresponding hand xy coordinates for 125 | a given set of joint angle values [shoulder, elbow, wrist], 126 | and the above defined arm segment lengths, L 127 | 128 | q : np.array 129 | the list of current joint angles 130 | xy : np.array 131 | current xy position (not used) 132 | returns : np.array 133 | the difference between current and desired y position 134 | """ 135 | y = (self.L[0]*np.sin(q[0]) + self.L[1]*np.sin(q[0]+q[1]) + 136 | self.L[2]*np.sin(np.sum(q))) - xy[1] 137 | return y 138 | 139 | def joint_limits_upper_constraint(q, xy): 140 | """Used in the function minimization such that the output from 141 | this function must be greater than 0 to be successfully passed. 142 | 143 | q : np.array 144 | the current joint angles 145 | xy : np.array 146 | current xy position (not used) 147 | 148 | returns : np.array 149 | all > 0 if constraint matched 150 | """ 151 | return self.max_angles - q 152 | 153 | def joint_limits_lower_constraint(q, xy): 154 | """Used in the function minimization such that the output from 155 | this function must be greater than 0 to be successfully passed. 156 | 157 | q : np.array 158 | the current joint angles 159 | xy : np.array 160 | current xy position (not used) 161 | 162 | returns : np.array 163 | all > 0 if constraint matched 164 | """ 165 | return q - self.min_angles 166 | 167 | return scipy.optimize.fmin_slsqp( 168 | func=distance_to_default, 169 | x0=self.q, 170 | eqcons=[x_constraint, 171 | y_constraint], 172 | # uncomment to add in min / max angles for the joints 173 | # ieqcons=[joint_limits_upper_constraint, 174 | # joint_limits_lower_constraint], 175 | args=(xy,), 176 | iprint=0) # iprint=0 suppresses output 177 | 178 | 179 | def test(): 180 | # ###########Test it!################## 181 | 182 | arm = Arm3Link() 183 | 184 | # set of desired (x,y) hand positions 185 | x = np.arange(-.75, .75, .05) 186 | y = np.arange(.25, .75, .05) 187 | 188 | # threshold for printing out information, to find trouble spots 189 | thresh = .025 190 | 191 | count = 0 192 | total_error = 0 193 | # test it across the range of specified x and y values 194 | for xi in range(len(x)): 195 | for yi in range(len(y)): 196 | # test the inv_kin function on a range of different targets 197 | xy = [x[xi], y[yi]] 198 | # run the inv_kin function, get the optimal joint angles 199 | q = arm.inv_kin(xy=xy) 200 | # find the (x,y) position of the hand given these angles 201 | actual_xy = arm.get_xy(q) 202 | # calculate the root squared error 203 | error = np.sqrt(np.sum((np.array(xy) - np.array(actual_xy))**2)) 204 | # total the error 205 | total_error += np.nan_to_num(error) 206 | 207 | # if the error was high, print out more information 208 | if np.sum(error) > thresh: 209 | print('-------------------------') 210 | print('Initial joint angles', arm.q) 211 | print('Final joint angles: ', q) 212 | print('Desired hand position: ', xy) 213 | print('Actual hand position: ', actual_xy) 214 | print('Error: ', error) 215 | print('-------------------------') 216 | 217 | count += 1 218 | 219 | print('\n---------Results---------') 220 | print('Total number of trials: ', count) 221 | print('Total error: ', total_error) 222 | print('-------------------------') 223 | 224 | if __name__ == '__main__': 225 | test() 226 | -------------------------------------------------------------------------------- /InvKin/ArmPlot.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Copyright (C) 2013 Travis DeWolf 3 | 4 | This program is free software: you can redistribute it and/or modify 5 | it under the terms of the GNU General Public License as published by 6 | the Free Software Foundation, either version 3 of the License, or 7 | (at your option) any later version. 8 | 9 | This program is distributed in the hope that it will be useful, 10 | but WITHOUT ANY WARRANTY; without even the implied warranty of 11 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 | GNU General Public License for more details. 13 | 14 | You should have received a copy of the GNU General Public License 15 | along with this program. If not, see . 16 | ''' 17 | 18 | import numpy as np 19 | import pyglet 20 | 21 | import Arm 22 | 23 | 24 | def plot(): 25 | """A function for plotting an arm, and having it calculate the 26 | inverse kinematics such that given the mouse (x, y) position it 27 | finds the appropriate joint angles to reach that point.""" 28 | 29 | # create an instance of the arm 30 | arm = Arm.Arm3Link(L=np.array([300, 200, 100])) 31 | 32 | # make our window for drawin' 33 | window = pyglet.window.Window() 34 | 35 | label = pyglet.text.Label( 36 | 'Mouse (x,y)', font_name='Times New Roman', 37 | font_size=36, x=window.width//2, y=window.height//2, 38 | anchor_x='center', anchor_y='center') 39 | 40 | def get_joint_positions(): 41 | """This method finds the (x,y) coordinates of each joint""" 42 | 43 | x = np.array([ 44 | 0, 45 | arm.L[0]*np.cos(arm.q[0]), 46 | arm.L[0]*np.cos(arm.q[0]) + arm.L[1]*np.cos(arm.q[0]+arm.q[1]), 47 | arm.L[0]*np.cos(arm.q[0]) + arm.L[1]*np.cos(arm.q[0]+arm.q[1]) + 48 | arm.L[2]*np.cos(np.sum(arm.q))]) + window.width/2 49 | 50 | y = np.array([ 51 | 0, 52 | arm.L[0]*np.sin(arm.q[0]), 53 | arm.L[0]*np.sin(arm.q[0]) + arm.L[1]*np.sin(arm.q[0]+arm.q[1]), 54 | arm.L[0]*np.sin(arm.q[0]) + arm.L[1]*np.sin(arm.q[0]+arm.q[1]) + 55 | arm.L[2]*np.sin(np.sum(arm.q))]) 56 | 57 | return np.array([x, y]).astype('int') 58 | 59 | window.jps = get_joint_positions() 60 | 61 | @window.event 62 | def on_draw(): 63 | window.clear() 64 | label.draw() 65 | for i in range(3): 66 | pyglet.graphics.draw( 67 | 2, 68 | pyglet.gl.GL_LINES, 69 | ('v2i', (window.jps[0][i], window.jps[1][i], 70 | window.jps[0][i+1], window.jps[1][i+1]))) 71 | 72 | @window.event 73 | def on_mouse_motion(x, y, dx, dy): 74 | # call the inverse kinematics function of the arm 75 | # to find the joint angles optimal for pointing at 76 | # this position of the mouse 77 | label.text = '(x,y) = (%.3f, %.3f)' % (x, y) 78 | arm.q = arm.inv_kin([x - window.width/2, y]) # get new arm angles 79 | window.jps = get_joint_positions() # get new joint (x,y) positions 80 | 81 | pyglet.app.run() 82 | 83 | plot() 84 | -------------------------------------------------------------------------------- /Nengo scripting/Nengo 1.4/absolute_value.py: -------------------------------------------------------------------------------- 1 | import nef 2 | import random 3 | 4 | # constants / parameter setup etc 5 | N = 50 # number of neurons 6 | D = 3 # number of dimensions 7 | 8 | def make_abs_val(name, neurons, dimensions, intercept=[0]): 9 | def mult_neg_one(x): 10 | return x[0] * -1 11 | 12 | abs_val = nef.Network(name) 13 | 14 | abs_val.make('input', neurons=1, dimensions=dimensions, mode='direct') # create input relay 15 | abs_val.make('output', neurons=1, dimensions=dimensions, mode='direct') # create output relay 16 | 17 | for d in range(dimensions): # create a positive and negative population for each dimension in the input signal 18 | abs_val.make('abs_pos%d'%d, neurons=neurons, dimensions=1, encoders=[[1]], intercept=intercept) 19 | abs_val.make('abs_neg%d'%d, neurons=neurons, dimensions=1, encoders=[[-1]], intercept=intercept) 20 | 21 | abs_val.connect('input', 'abs_pos%d'%d, index_pre=d) 22 | abs_val.connect('input', 'abs_neg%d'%d, index_pre=d) 23 | 24 | abs_val.connect('abs_pos%d'%d, 'output', index_post=d) 25 | abs_val.connect('abs_neg%d'%d, 'output', index_post=d, func=mult_neg_one) 26 | 27 | return abs_val.network 28 | 29 | net = nef.Network('network') 30 | 31 | # Create absolute value subnetwork and add it to net 32 | net.add(make_abs_val(name='abs_val', dimensions=D, neurons=N)) 33 | 34 | # Create function input 35 | net.make_input('input', values=[random.random() for d in range(D)]) 36 | 37 | # Connect things up 38 | net.connect('input', 'abs_val.input') 39 | 40 | # Add it all to the Nengo world 41 | net.add_to_nengo() 42 | -------------------------------------------------------------------------------- /Nengo scripting/Nengo 1.4/low_pass_derivative_filter.py: -------------------------------------------------------------------------------- 1 | import nef 2 | 3 | # constants / parameter setup etc 4 | N = 50 # number of neurons 5 | D = 3 # number of dimensions 6 | 7 | def make_abs_val(name, neurons, dimensions, intercept=[0]): 8 | def mult_neg_one(x): 9 | return x[0] * -1 10 | 11 | abs_val = nef.Network(name) 12 | 13 | abs_val.make('input', neurons=1, dimensions=dimensions, mode='direct') # create input relay 14 | abs_val.make('output', neurons=1, dimensions=dimensions, mode='direct') # create output relay 15 | 16 | for d in range(dimensions): # create a positive and negative population for each dimension in the input signal 17 | abs_val.make('abs_pos%d'%d, neurons=neurons, dimensions=1, encoders=[[1]], intercept=intercept) 18 | abs_val.make('abs_neg%d'%d, neurons=neurons, dimensions=1, encoders=[[-1]], intercept=intercept) 19 | 20 | abs_val.connect('input', 'abs_pos%d'%d, index_pre=d) 21 | abs_val.connect('input', 'abs_neg%d'%d, index_pre=d) 22 | 23 | abs_val.connect('abs_pos%d'%d, 'output', index_post=d) 24 | abs_val.connect('abs_neg%d'%d, 'output', index_post=d, func=mult_neg_one) 25 | 26 | return abs_val.network 27 | 28 | def make_dlowpass(name, neurons, dimensions, radius=10, tau_inhib=0.005, inhib_scale=10): 29 | 30 | dlowpass = nef.Network(name) 31 | 32 | dlowpass.make('input', neurons=1, dimensions=dimensions, mode='direct') # create input relay 33 | output = dlowpass.make('output', neurons=dimensions*neurons, dimensions=dimensions) # create output relay 34 | 35 | # now we track the derivative of sum, and only let output relay the input 36 | # if the derivative is below a given threshold 37 | dlowpass.make('derivative', neurons=radius*neurons, dimensions=2, radius=radius) # create population to calculate the derivative 38 | dlowpass.connect('derivative', 'derivative', index_pre=0, index_post=1, pstc=0.1) # set up recurrent connection 39 | dlowpass.add(make_abs_val(name='abs_val', neurons=neurons, dimensions=1, intercept=(.2,1))) # create a subnetwork to calculate the absolute value 40 | 41 | # connect it up! 42 | dlowpass.connect('input', 'output') # set up communication channel 43 | dlowpass.connect('input', 'derivative', index_post=0) 44 | 45 | def sub(x): 46 | return [x[0] - x[1]] 47 | dlowpass.connect('derivative', 'abs_val.input', func=sub) 48 | 49 | # set up inhibitory matrix 50 | inhib_matrix = [[-inhib_scale]] * neurons * dimensions 51 | output.addTermination('inhibition', inhib_matrix, tau_inhib, False) 52 | dlowpass.connect('abs_val.output', output.getTermination('inhibition')) 53 | 54 | return dlowpass.network 55 | 56 | # Create network 57 | net = nef.Network('net') 58 | 59 | # Create / add low pass derivative filter 60 | net.add(make_dlowpass(name='dlowpass', neurons=N, dimensions=D)) 61 | 62 | # Make function input 63 | net.make_input('input_function', values=[0]*D) 64 | 65 | # Connect up function input to filter 66 | net.connect('input_function', 'dlowpass.input') 67 | 68 | # Add it all to Nengo 69 | net.add_to_nengo() 70 | 71 | -------------------------------------------------------------------------------- /Nengo scripting/Nengo 2/arm_control/Arm.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Copyright (C) 2015 Travis DeWolf 3 | 4 | This program is free software: you can redistribute it and/or modify 5 | it under the terms of the GNU General Public License as published by 6 | the Free Software Foundation, either version 3 of the License, or 7 | (at your option) any later version. 8 | 9 | This program is distributed in the hope that it will be useful, 10 | but WITHOUT ANY WARRANTY; without even the implied warranty of 11 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 | GNU General Public License for more details. 13 | 14 | You should have received a copy of the GNU General Public License 15 | along with this program. If not, see . 16 | ''' 17 | import numpy as np 18 | 19 | class Arm2Link: 20 | """A base class for arm simulators""" 21 | 22 | def __init__(self, dt=1e-4): 23 | """ 24 | dt float: the timestep for simulation 25 | singularity_thresh float: the point at which to singular values 26 | from the matrix SVD to zero. 27 | """ 28 | self.DOF = 2 29 | self.dt = dt 30 | 31 | # length of arm links 32 | self.l1 = 1.5 33 | self.l2 = 1.3 34 | 35 | # mass of links 36 | m1=1.98 37 | m2=1.32 38 | 39 | # compute non changing constants 40 | self.K1 = (1/3.*m1+m2)*self.l1**2. + 1/3.*m2*self.l2**2.; 41 | self.K2 = m2*self.l1*self.l2; 42 | self.K3 = 1/3.*m2*self.l2**2.; 43 | self.K4 = 1/2.*m2*self.l1*self.l2; 44 | 45 | self.reset() 46 | 47 | def apply_torque(self, u, dt=None): 48 | """Takes in a torque and timestep and updates the 49 | arm simulation accordingly. 50 | 51 | u np.array: the control signal to apply 52 | dt float: the timestep 53 | """ 54 | if dt is None: 55 | dt = self.dt 56 | 57 | # equations solved for angles 58 | C2 = np.cos(self.q1) 59 | S2 = np.sin(self.q1) 60 | M11 = (self.K1 + self.K2*C2) 61 | M12 = (self.K3 + self.K4*C2) 62 | M21 = M12 63 | M22 = self.K3 64 | H1 = -self.K2*S2*self.dq0*self.dq1 - 1/2.*self.K2*S2*self.dq1**2. 65 | H2 = 1/2.*self.K2*S2*self.dq0**2. 66 | 67 | ddq1 = (H2*M11 - H1*M21 - M11*u[1]+ M21*u[0]) / (M12**2. - M11*M22) 68 | ddq0 = (-H2 + u[1]- M22*ddq1) / M21 69 | self.dq1 = self.dq1 + ddq1*dt 70 | self.dq0 = self.dq0 + ddq0*dt 71 | self.q1 = self.q1 + self.dq1*dt 72 | self.q0 = self.q0 + self.dq0*dt 73 | self.x = self.position(ee_only=True) 74 | 75 | def position(self, q=None, ee_only=False): 76 | """Compute x,y position of the hand 77 | 78 | q list: a list of the joint angles, 79 | if None use current system state 80 | ee_only boolean: if true only return the 81 | position of the end-effector 82 | """ 83 | q0 = self.q0 if q is None else q[0] 84 | q1 = self.q1 if q is None else q[1] 85 | 86 | x = np.cumsum([0, 87 | self.l1 * np.cos(q0), 88 | self.l2 * np.cos(q0+q1)]) 89 | y = np.cumsum([0, 90 | self.l1 * np.sin(q0), 91 | self.l2 * np.sin(q0+q1)]) 92 | 93 | if ee_only: 94 | return np.array([x[-1], y[-1]]) 95 | 96 | return (x, y) 97 | 98 | def reset(self, q=[], dq=[]): 99 | """Resets the state of the arm 100 | q list: a list of the joint angles 101 | dq list: a list of the joint velocities 102 | """ 103 | if isinstance(q, np.ndarray): q = q.tolist() 104 | if isinstance(dq, np.ndarray): dq = dq.tolist() 105 | 106 | self.q0 = np.pi/2; self.q1 = np.pi/2 107 | self.dq0 = 0; self.dq1 = 0 108 | self.x = self.position(ee_only=True) 109 | -------------------------------------------------------------------------------- /Nengo scripting/Nengo 2/arm_control/nengo_arm.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Copyright (C) 2015 Travis DeWolf 3 | 4 | This program is free software: you can redistribute it and/or modify 5 | it under the terms of the GNU General Public License as published by 6 | the Free Software Foundation, either version 3 of the License, or 7 | (at your option) any later version. 8 | 9 | This program is distributed in the hope that it will be useful, 10 | but WITHOUT ANY WARRANTY; without even the implied warranty of 11 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 | GNU General Public License for more details. 13 | 14 | You should have received a copy of the GNU General Public License 15 | along with this program. If not, see . 16 | ''' 17 | 18 | import nengo 19 | import numpy as np 20 | 21 | import Arm; reload(Arm) 22 | 23 | run_in_GUI = True 24 | 25 | # set the initial position of the arm 26 | arm = Arm.Arm2Link(dt=1e-3) 27 | arm.reset(q=[np.pi/5.5, np.pi/1.7], dq=[0, 0]) 28 | 29 | """Generate the Nengo model that will control the arm.""" 30 | model = nengo.Network() 31 | 32 | with model: 33 | 34 | # create input nodes 35 | def arm_func(t, x): 36 | u = x[:2] 37 | arm.apply_torque(u) 38 | data = np.hstack([arm.q0, arm.q1, arm.dq0, arm.dq1, arm.x]) # data returned from node to model 39 | 40 | # visualization code ----------------------------------------------------- 41 | scale = 15 42 | len0 = arm.l1 * scale 43 | len1 = arm.l2 * scale 44 | 45 | angles = data[:3] 46 | angle_offset = np.pi/2 47 | x1 = 50 48 | y1 = 50 49 | x2 = x1 + len0 * np.sin(angle_offset-angles[0]) 50 | y2 = y1 - len0 * np.cos(angle_offset-angles[0]) 51 | x3 = x2 + len1 * np.sin(angle_offset-angles[0] - angles[1]) 52 | y3 = y2 - len1 * np.cos(angle_offset-angles[0] - angles[1]) 53 | 54 | arm_func._nengo_html_ = ''' 55 | 56 | 57 | 58 | 59 | 60 | '''.format(**locals()) 61 | # end of visualization code --------------------------------------------- 62 | 63 | return data 64 | arm_node = nengo.Node(output=arm_func, size_in=2) 65 | 66 | # specify torque input to arm 67 | input_node = nengo.Node(output=[1, .1]) 68 | 69 | # to send a target to an ensemble which then connections to the arm 70 | ens = nengo.Ensemble(n_neurons=500, dimensions=2, radius=20) 71 | nengo.Connection(input_node, ens[:2]) # to send target info to ensemble 72 | 73 | # connect ens to arm 74 | nengo.Connection(ens, arm_node)#, function=some_function) 75 | # -------------------------------------------------------- 76 | 77 | if run_in_GUI: 78 | # to run in GUI, comment out next 4 lines for running without GUI 79 | import nengo_gui 80 | nengo_gui.GUI(model=model, filename=__file__, locals=locals(), 81 | interactive=False, allow_file_change=False).start() 82 | import sys 83 | sys.exit() 84 | else: 85 | # to run in command line 86 | with model: 87 | probe_input = nengo.Probe(input_node) 88 | probe_arm = nengo.Probe(arm_node[arm.DOF*2]) 89 | 90 | print 'building model...' 91 | sim = nengo.Simulator(model, dt=.001) 92 | print 'build complete.' 93 | 94 | sim.run(10) 95 | 96 | t = sim.trange() 97 | x = sim.data[probe_arm] 98 | y = sim.data[probe_arm] 99 | 100 | # plot collected data 101 | import matplotlib.pyplot as plt 102 | 103 | plt.subplot(311) 104 | plt.plot(t, x) 105 | plt.xlabel('time') 106 | plt.ylabel('probe_arm0') 107 | 108 | plt.subplot(312) 109 | plt.plot(t, y) 110 | plt.xlabel('time') 111 | plt.ylabel('probe_arm1') 112 | 113 | plt.subplot(313) 114 | plt.plot(x, y) 115 | plt.xlabel('probe_arm0') 116 | plt.ylabel('probe_arm1') 117 | 118 | plt.tight_layout() 119 | plt.show() 120 | 121 | -------------------------------------------------------------------------------- /Nengo scripting/Nengo 2/arm_control/nengo_arm.py.cfg: -------------------------------------------------------------------------------- 1 | _viz_0 = nengo_gui.components.Slider(input_node) 2 | _viz_config[_viz_0].label_visible = True 3 | _viz_config[_viz_0].width = 0.09976389908641264 4 | _viz_config[_viz_0].x = 0.02887899643772116 5 | _viz_config[_viz_0].y = 1.074374958770512 6 | _viz_config[_viz_0].max_value = 10 7 | _viz_config[_viz_0].min_value = -10 8 | _viz_config[_viz_0].height = 0.22526245089808372 9 | _viz_1 = nengo_gui.components.HTMLView(arm_node) 10 | _viz_config[_viz_1].label_visible = True 11 | _viz_config[_viz_1].width = 0.3500876195626132 12 | _viz_config[_viz_1].height = 0.20246260768977564 13 | _viz_config[_viz_1].y = 1.0595697196368319 14 | _viz_config[_viz_1].x = 0.516872761670474 15 | _viz_ace_editor = nengo_gui.components.AceEditor() 16 | _viz_net_graph = nengo_gui.components.NetGraph() 17 | _viz_sim_control = nengo_gui.components.SimControl() 18 | _viz_config[_viz_sim_control].kept_time = 4 19 | _viz_config[_viz_sim_control].shown_time = 0.5 20 | _viz_config[arm_node].pos=(0.7230494983245134, 0.7045113565355365) 21 | _viz_config[arm_node].size=(0.02962962962962963, 0.024691358024691357) 22 | _viz_config[ens].pos=(0.3530067978378699, 0.7011896136614068) 23 | _viz_config[ens].size=(0.1, 0.1) 24 | _viz_config[input_node].pos=(0.035197911307066954, 0.7062281487555275) 25 | _viz_config[input_node].size=(0.1, 0.1) 26 | _viz_config[model].pos=(0.2154839563527499, -0.3199287566807065) 27 | _viz_config[model].size=(0.934421272707967, 0.934421272707967) 28 | _viz_config[model].expanded=True 29 | _viz_config[model].has_layout=True -------------------------------------------------------------------------------- /Nengo scripting/Nengo 2/discrete_filter/point_attractor.py: -------------------------------------------------------------------------------- 1 | """ Implementing ddy = alpha * (beta * (y* - y) - dy) 2 | 3 | NOTE: when connecting to the input, use synapse=None so that double 4 | filtering of the input signal doesn't happen. """ 5 | 6 | import numpy as np 7 | from scipy.linalg import expm 8 | 9 | import nengo 10 | 11 | def generate(net=None, n_neurons=200, alpha=1000.0, beta=1000.0/4.0, 12 | dt=0.001, analog=False): 13 | tau = 0.1 # synaptic time constant 14 | synapse = nengo.Lowpass(tau) 15 | 16 | # the A matrix for our point attractor 17 | A = np.array([[0.0, 1.0], 18 | [-alpha*beta, -alpha]]) 19 | 20 | # the B matrix for our point attractor 21 | B = np.array([[0.0], [alpha*beta]]) 22 | 23 | # if you have the nengolib library you can do it this way 24 | # from nengolib.synapses import ss2sim 25 | # C = np.eye(2) 26 | # D = np.zeros((2, 2)) 27 | # linsys = ss2sim((A, B, C, D), synapse=synapse, dt=None if analog else dt) 28 | # A = linsys.A 29 | # B = linsys.B 30 | 31 | if analog: 32 | # account for continuous lowpass filter 33 | A = tau * A + np.eye(2) 34 | B = tau * B 35 | else: 36 | # discretize state matrices 37 | Ad = expm(A*dt) 38 | Bd = np.dot(np.linalg.inv(A), np.dot((Ad - np.eye(2)), B)) 39 | # account for discrete lowpass filter 40 | a = np.exp(-dt/tau) 41 | A = 1.0 / (1.0 - a) * (Ad - a * np.eye(2)) 42 | B = 1.0 / (1.0 - a) * Bd 43 | 44 | if net is None: 45 | net = nengo.Network(label='Point Attractor') 46 | config = nengo.Config(nengo.Connection, nengo.Ensemble) 47 | config[nengo.Connection].synapse = nengo.Lowpass(tau) 48 | 49 | with config, net: 50 | net.ydy = nengo.Ensemble(n_neurons=n_neurons, dimensions=2, 51 | # set it up so neurons are tuned to one dimensions only 52 | encoders=nengo.dists.Choice([[1, 0], [-1, 0], [0, 1], [0, -1]])) 53 | # set up Ax part of point attractor 54 | nengo.Connection(net.ydy, net.ydy, transform=A) 55 | 56 | # hook up input 57 | net.input = nengo.Node(size_in=1, size_out=1) 58 | # set up Bu part of point attractor 59 | nengo.Connection(net.input, net.ydy, transform=B) 60 | 61 | # hook up output 62 | net.output = nengo.Node(size_in=1, size_out=1) 63 | # add in forcing function 64 | nengo.Connection(net.ydy[0], net.output, synapse=None) 65 | 66 | return net 67 | 68 | 69 | if __name__ == '__main__': 70 | import matplotlib.pyplot as plt 71 | plt.figure(figsize=(5, 12)) 72 | 73 | dt = 1e-3 74 | time = 5 # number of seconds to run simulation 75 | alphas = [10.0, 100.0, 1000.0] 76 | for ii, alpha in enumerate(alphas): 77 | probe_results = [] 78 | model = nengo.Network() 79 | beta = alpha / 4.0 80 | for option in [True, False]: 81 | with model: 82 | def goal_func(t): 83 | return float(int(t)) / time * 2 - 1 84 | goal = nengo.Node(output=goal_func) 85 | pa = generate(n_neurons=1000, analog=option, 86 | alpha=alpha, beta=beta, dt=dt) 87 | nengo.Connection(goal, pa.input, synapse=None) 88 | 89 | probe_ans = nengo.Probe(goal) 90 | probe = nengo.Probe(pa.output, synapse=.01) 91 | 92 | sim = nengo.Simulator(model, dt=dt) 93 | sim.run(time) 94 | probe_results.append(np.copy(sim.data[probe])) 95 | 96 | plt.subplot(len(alphas), 1, ii+1) 97 | lines_c = plt.plot(sim.trange(), probe_results[0], 'b') 98 | lines_d = plt.plot(sim.trange(), probe_results[1], 'g') 99 | line_ans = plt.plot(sim.trange(), sim.data[probe_ans][:, 0], 'r--') 100 | plt.legend([lines_c[0], lines_d[0], line_ans], 101 | ['continuous', 'discrete', 'desired']) 102 | plt.title('alpha=%.2f, beta=%.2f' % (alpha, beta)) 103 | 104 | plt.tight_layout() 105 | plt.show() 106 | -------------------------------------------------------------------------------- /NengoDL/mnist_params.npz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/studywolf/blog/62fc3d3aaac242bde5a0643e16aee03a6a061380/NengoDL/mnist_params.npz -------------------------------------------------------------------------------- /NengoDL/mnist_params_with_fr_loss.npz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/studywolf/blog/62fc3d3aaac242bde5a0643e16aee03a6a061380/NengoDL/mnist_params_with_fr_loss.npz -------------------------------------------------------------------------------- /PyGame/arm/arm1.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import pygame 3 | import pygame.locals 4 | 5 | from armpart import ArmPart 6 | 7 | black = (0, 0, 0) 8 | white = (255, 255, 255) 9 | 10 | pygame.init() 11 | 12 | width = 500 13 | height = 500 14 | display = pygame.display.set_mode((width, height)) 15 | fpsClock = pygame.time.Clock() 16 | 17 | upperarm = ArmPart('upperarm.png', scale=.7) 18 | 19 | base = (width / 2, height / 2) 20 | 21 | while 1: 22 | 23 | display.fill(white) 24 | 25 | ua_image, ua_rect = upperarm.rotate(.01) 26 | ua_rect.center += np.asarray(base) 27 | ua_rect.center -= np.array([np.cos(upperarm.rotation) * upperarm.offset, 28 | -np.sin(upperarm.rotation) * upperarm.offset]) 29 | 30 | display.blit(ua_image, ua_rect) 31 | pygame.draw.circle(display, black, base, 30) 32 | 33 | # check for quit 34 | for event in pygame.event.get(): 35 | if event.type == pygame.locals.QUIT: 36 | pygame.quit() 37 | sys.exit() 38 | 39 | pygame.display.update() 40 | fpsClock.tick(30) 41 | -------------------------------------------------------------------------------- /PyGame/arm/arm3.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import pygame 3 | import pygame.locals 4 | 5 | from armpart import ArmPart 6 | 7 | black = (0, 0, 0) 8 | white = (255, 255, 255) 9 | 10 | pygame.init() 11 | 12 | width = 750 13 | height = 750 14 | display = pygame.display.set_mode((width, height)) 15 | fpsClock = pygame.time.Clock() 16 | 17 | upperarm = ArmPart('upperarm.png', scale=.7) 18 | forearm = ArmPart('forearm.png', scale=.8) 19 | hand = ArmPart('hand.png', scale=1.0) 20 | 21 | origin = (width / 2, height / 2) 22 | 23 | while 1: 24 | 25 | display.fill(white) 26 | 27 | # rotate our joints 28 | ua_image, ua_rect = upperarm.rotate(.03) 29 | fa_image, fa_rect = forearm.rotate(-.02) 30 | h_image, h_rect = hand.rotate(.03) 31 | 32 | # generate (x,y) positions of each of the joints 33 | joints_x = np.cumsum([0, 34 | upperarm.scale * np.cos(upperarm.rotation), 35 | forearm.scale * np.cos(forearm.rotation), 36 | hand.length * np.cos(hand.rotation)]) + origin[0] 37 | joints_y = np.cumsum([0, 38 | upperarm.scale * np.sin(upperarm.rotation), 39 | forearm.scale * np.sin(forearm.rotation), 40 | hand.length * np.sin(hand.rotation)]) * -1 + origin[1] 41 | joints = [(int(x), int(y)) for x,y in zip(joints_x, joints_y)] 42 | 43 | def transform(rect, base, arm_part): 44 | rect.center += np.asarray(base) 45 | rect.center += np.array([np.cos(arm_part.rotation) * arm_part.offset, 46 | -np.sin(arm_part.rotation) * arm_part.offset]) 47 | 48 | transform(ua_rect, joints[0], upperarm) 49 | transform(fa_rect, joints[1], forearm) 50 | transform(h_rect, joints[2], hand) 51 | # transform the hand a bit more because it's weird 52 | h_rect.center += np.array([np.cos(hand.rotation), 53 | -np.sin(hand.rotation)]) * -10 54 | 55 | display.blit(ua_image, ua_rect) 56 | display.blit(fa_image, fa_rect) 57 | display.blit(h_image, h_rect) 58 | 59 | # check for quit 60 | for event in pygame.event.get(): 61 | if event.type == pygame.locals.QUIT: 62 | pygame.quit() 63 | sys.exit() 64 | 65 | pygame.display.update() 66 | fpsClock.tick(30) 67 | -------------------------------------------------------------------------------- /PyGame/arm/arm3wlines.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import pygame 3 | import pygame.locals 4 | 5 | from armpart import ArmPart 6 | 7 | black = (0, 0, 0) 8 | white = (255, 255, 255) 9 | arm_color = (50, 50, 50, 200) # fourth value specifies transparency 10 | 11 | pygame.init() 12 | 13 | width = 750 14 | height = 750 15 | display = pygame.display.set_mode((width, height)) 16 | fpsClock = pygame.time.Clock() 17 | 18 | upperarm = ArmPart('upperarm.png', scale=.7) 19 | forearm = ArmPart('forearm.png', scale=.8) 20 | hand = ArmPart('hand.png', scale=1.0) 21 | 22 | line_width = 15 23 | line_upperarm = pygame.Surface((upperarm.scale, line_width), pygame.SRCALPHA, 32) 24 | line_forearm = pygame.Surface((forearm.scale, line_width), pygame.SRCALPHA, 32) 25 | line_hand = pygame.Surface((hand.scale, line_width), pygame.SRCALPHA, 32) 26 | 27 | line_upperarm.fill(arm_color) 28 | line_forearm.fill(arm_color) 29 | line_hand.fill(arm_color) 30 | 31 | origin = (width / 2, height / 2) 32 | 33 | def transform(rect, base, arm_part): 34 | rect.center += np.asarray(base) 35 | rect.center += np.array([np.cos(arm_part.rotation) * arm_part.offset, 36 | -np.sin(arm_part.rotation) * arm_part.offset]) 37 | def transform_lines(rect, base, arm_part): 38 | transform(rect, base, arm_part) 39 | rect.center += np.array([-rect.width / 2.0, -rect.height / 2.0]) 40 | 41 | while 1: 42 | 43 | display.fill(white) 44 | 45 | # rotate our joints 46 | ua_image, ua_rect = upperarm.rotate(.03) 47 | fa_image, fa_rect = forearm.rotate(-.02) 48 | h_image, h_rect = hand.rotate(.03) 49 | 50 | # generate (x,y) positions of each of the joints 51 | joints_x = np.cumsum([0, 52 | upperarm.scale * np.cos(upperarm.rotation), 53 | forearm.scale * np.cos(forearm.rotation), 54 | hand.length * np.cos(hand.rotation)]) + origin[0] 55 | joints_y = np.cumsum([0, 56 | upperarm.scale * np.sin(upperarm.rotation), 57 | forearm.scale * np.sin(forearm.rotation), 58 | hand.length * np.sin(hand.rotation)]) * -1 + origin[1] 59 | joints = [(int(x), int(y)) for x,y in zip(joints_x, joints_y)] 60 | 61 | transform(ua_rect, joints[0], upperarm) 62 | transform(fa_rect, joints[1], forearm) 63 | transform(h_rect, joints[2], hand) 64 | # transform the hand a bit more because it's weird 65 | h_rect.center += np.array([np.cos(hand.rotation), 66 | -np.sin(hand.rotation)]) * -10 67 | 68 | display.blit(ua_image, ua_rect) 69 | display.blit(fa_image, fa_rect) 70 | display.blit(h_image, h_rect) 71 | 72 | # rotate arm lines 73 | line_ua = pygame.transform.rotozoom(line_upperarm, 74 | np.degrees(upperarm.rotation), 1) 75 | line_fa = pygame.transform.rotozoom(line_forearm, 76 | np.degrees(forearm.rotation), 1) 77 | line_h = pygame.transform.rotozoom(line_hand, 78 | np.degrees(hand.rotation), 1) 79 | # translate arm lines 80 | lua_rect = line_ua.get_rect() 81 | transform_lines(lua_rect, joints[0], upperarm) 82 | 83 | lfa_rect = line_fa.get_rect() 84 | transform_lines(lfa_rect, joints[1], forearm) 85 | 86 | lh_rect = line_h.get_rect() 87 | transform_lines(lh_rect, joints[2], hand) 88 | 89 | display.blit(line_ua, lua_rect) 90 | display.blit(line_fa, lfa_rect) 91 | display.blit(line_h, lh_rect) 92 | 93 | # draw circles at joints for pretty 94 | pygame.draw.circle(display, black, joints[0], 30) 95 | pygame.draw.circle(display, arm_color, joints[0], 12) 96 | pygame.draw.circle(display, black, joints[1], 20) 97 | pygame.draw.circle(display, arm_color, joints[1], 7) 98 | pygame.draw.circle(display, black, joints[2], 15) 99 | pygame.draw.circle(display, arm_color, joints[2], 5) 100 | 101 | # check for quit 102 | for event in pygame.event.get(): 103 | if event.type == pygame.locals.QUIT: 104 | pygame.quit() 105 | sys.exit() 106 | 107 | pygame.display.update() 108 | fpsClock.tick(30) 109 | -------------------------------------------------------------------------------- /PyGame/arm/armpart.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import pygame 3 | 4 | class ArmPart: 5 | """ 6 | A class for storing relevant arm segment information. 7 | """ 8 | def __init__(self, pic, scale=1.0): 9 | self.base = pygame.image.load(pic) 10 | # some handy constants 11 | self.length = self.base.get_rect()[2] 12 | self.scale = self.length * scale 13 | self.offset = self.scale / 2.0 14 | 15 | self.rotation = 0.0 # in radians 16 | 17 | def rotate(self, rotation): 18 | """ 19 | Rotates and re-centers the arm segment. 20 | """ 21 | self.rotation += rotation 22 | # rotate our image 23 | image = pygame.transform.rotozoom(self.base, np.degrees(self.rotation), 1) 24 | # reset the center 25 | rect = image.get_rect() 26 | rect.center = (0, 0) 27 | 28 | return image, rect 29 | 30 | 31 | -------------------------------------------------------------------------------- /PyGame/arm/forearm.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/studywolf/blog/62fc3d3aaac242bde5a0643e16aee03a6a061380/PyGame/arm/forearm.png -------------------------------------------------------------------------------- /PyGame/arm/hand.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/studywolf/blog/62fc3d3aaac242bde5a0643e16aee03a6a061380/PyGame/arm/hand.png -------------------------------------------------------------------------------- /PyGame/arm/upperarm.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/studywolf/blog/62fc3d3aaac242bde5a0643e16aee03a6a061380/PyGame/arm/upperarm.png -------------------------------------------------------------------------------- /PyGame/rotations/pic.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/studywolf/blog/62fc3d3aaac242bde5a0643e16aee03a6a061380/PyGame/rotations/pic.png -------------------------------------------------------------------------------- /PyGame/rotations/rotations-bad-1.py: -------------------------------------------------------------------------------- 1 | import pygame 2 | import pygame.locals 3 | 4 | white = (255, 255, 255) 5 | 6 | pygame.init() 7 | 8 | display = pygame.display.set_mode((300, 300)) 9 | fpsClock = pygame.time.Clock() 10 | 11 | image = pygame.image.load('pic.png') 12 | 13 | while 1: 14 | 15 | display.fill(white) 16 | 17 | image = pygame.transform.rotate(image, 1) 18 | rect = image.get_rect() 19 | 20 | display.blit(image, rect) 21 | 22 | # check for quit 23 | for event in pygame.event.get(): 24 | if event.type == pygame.locals.QUIT: 25 | pygame.quit() 26 | sys.exit() 27 | 28 | pygame.display.update() 29 | fpsClock.tick(30) 30 | -------------------------------------------------------------------------------- /PyGame/rotations/rotations-bad-2.py: -------------------------------------------------------------------------------- 1 | import pygame 2 | import pygame.locals 3 | 4 | white = (255, 255, 255) 5 | 6 | pygame.init() 7 | 8 | display = pygame.display.set_mode((300, 300)) 9 | fpsClock = pygame.time.Clock() 10 | 11 | image = pygame.image.load('pic.png') 12 | 13 | while 1: 14 | 15 | display.fill(white) 16 | 17 | image = pygame.transform.rotate(image, 1) 18 | rect = image.get_rect() 19 | rect.center = (150, 150) 20 | 21 | display.blit(image, rect) 22 | 23 | # check for quit 24 | for event in pygame.event.get(): 25 | if event.type == pygame.locals.QUIT: 26 | pygame.quit() 27 | sys.exit() 28 | 29 | pygame.display.update() 30 | fpsClock.tick(30) 31 | -------------------------------------------------------------------------------- /PyGame/rotations/rotations-bad-3.py: -------------------------------------------------------------------------------- 1 | import pygame 2 | import pygame.locals 3 | 4 | white = (255, 255, 255) 5 | 6 | pygame.init() 7 | 8 | display = pygame.display.set_mode((300, 300)) 9 | fpsClock = pygame.time.Clock() 10 | 11 | image = pygame.image.load('pic.png') 12 | radians = 0 13 | 14 | while 1: 15 | 16 | display.fill(white) 17 | 18 | radians += .5 19 | 20 | rotated_image = pygame.transform.rotate(image, radians) 21 | rect = rotated_image.get_rect() 22 | rect.center = (150, 150) 23 | 24 | display.blit(rotated_image, rect) 25 | 26 | # check for quit 27 | for event in pygame.event.get(): 28 | if event.type == pygame.locals.QUIT: 29 | pygame.quit() 30 | sys.exit() 31 | 32 | pygame.display.update() 33 | fpsClock.tick(30) 34 | -------------------------------------------------------------------------------- /PyGame/rotations/rotations-good.py: -------------------------------------------------------------------------------- 1 | import pygame 2 | import pygame.locals 3 | 4 | white = (255, 255, 255) 5 | 6 | pygame.init() 7 | 8 | display = pygame.display.set_mode((300, 300)) 9 | fpsClock = pygame.time.Clock() 10 | 11 | image = pygame.image.load('pic.png') 12 | degrees = 0 13 | 14 | while 1: 15 | 16 | display.fill(white) 17 | 18 | degrees += 1 19 | 20 | rotated = pygame.transform.rotozoom(image, degrees, 1) 21 | rect = rotated.get_rect() 22 | rect.center = (150, 150) 23 | 24 | display.blit(rotated, rect) 25 | 26 | # check for quit 27 | for event in pygame.event.get(): 28 | if event.type == pygame.locals.QUIT: 29 | pygame.quit() 30 | sys.exit() 31 | 32 | pygame.display.update() 33 | fpsClock.tick(30) 34 | -------------------------------------------------------------------------------- /README.rst: -------------------------------------------------------------------------------- 1 | ============================================ 2 | Studywolf code blog 3 | ============================================ 4 | 5 | This repository serves to hold projects that I'm both working on 6 | and have completed and presented on http://www.studywolf.com. 7 | 8 | The Control folder has been moved to its own repo, at studywolf/control. 9 | 10 | The DMPs folder has been moved to its own repo, at studywolf/pydmps and can be installed with pip. 11 | -------------------------------------------------------------------------------- /RL/Cat vs Mouse exploration/barrier2.txt: -------------------------------------------------------------------------------- 1 | XXXXXXXXXXXXXX 2 | X X 3 | X XXXXXXXXXX X 4 | X X 5 | XXXXXXXXXXXXXX 6 | -------------------------------------------------------------------------------- /RL/Cat vs Mouse exploration/egoMouseLook.py: -------------------------------------------------------------------------------- 1 | import time 2 | import random 3 | import shelve 4 | 5 | import pdb 6 | 7 | import cellular 8 | reload(cellular) 9 | import qlearn_mod_random as qlearn # to use the alternative exploration method 10 | #import qlearn # to use standard exploration method 11 | reload(qlearn) 12 | 13 | directions = 8 14 | 15 | lookdist = 2 16 | lookcells = [] 17 | for i in range(-lookdist,lookdist+1): 18 | for j in range(-lookdist,lookdist+1): 19 | if (abs(i) + abs(j) <= lookdist) and (i != 0 or j != 0): 20 | lookcells.append((i,j)) 21 | 22 | def pickRandomLocation(): 23 | while 1: 24 | x = random.randrange(world.width) 25 | y = random.randrange(world.height) 26 | cell = world.getCell(x, y) 27 | if not (cell.wall or len(cell.agents) > 0): 28 | return cell 29 | 30 | 31 | class Cell(cellular.Cell): 32 | wall = False 33 | 34 | def colour(self): 35 | if self.wall: 36 | return 'black' 37 | else: 38 | return 'white' 39 | 40 | def load(self, data): 41 | if data == 'X': 42 | self.wall = True 43 | else: 44 | self.wall = False 45 | 46 | 47 | class Cat(cellular.Agent): 48 | cell = None 49 | score = 0 50 | colour = 'orange' 51 | 52 | def update(self): 53 | cell = self.cell 54 | if cell != mouse.cell: 55 | self.goTowards(mouse.cell) 56 | while cell == self.cell: 57 | self.goInDirection(random.randrange(directions)) 58 | 59 | 60 | class Cheese(cellular.Agent): 61 | colour = 'yellow' 62 | 63 | def update(self): 64 | pass 65 | 66 | 67 | class Mouse(cellular.Agent): 68 | colour = 'gray' 69 | 70 | def __init__(self): 71 | self.ai = None 72 | self.ai = qlearn.QLearn(actions=range(directions), 73 | alpha=0.1, gamma=0.9, epsilon=0.1) 74 | self.eaten = 0 75 | self.fed = 0 76 | self.lastState = None 77 | self.lastAction = None 78 | 79 | def update(self): 80 | state = self.calcState() 81 | reward = -1 82 | 83 | if self.cell == cat.cell: 84 | self.eaten += 1 85 | reward = -100 86 | if self.lastState is not None: 87 | self.ai.learn(self.lastState, self.lastAction, reward, state) 88 | self.lastState = None 89 | 90 | self.cell = pickRandomLocation() 91 | return 92 | 93 | if self.cell == cheese.cell: 94 | self.fed += 1 95 | reward = 50 96 | cheese.cell = pickRandomLocation() 97 | 98 | if self.lastState is not None: 99 | self.ai.learn(self.lastState, self.lastAction, reward, state) 100 | 101 | state = self.calcState() 102 | action = self.ai.chooseAction(state) 103 | self.lastState = state 104 | self.lastAction = action 105 | 106 | self.goInDirection(action) 107 | 108 | def calcState(self): 109 | def cellvalue(cell): 110 | if cat.cell is not None and (cell.x == cat.cell.x and 111 | cell.y == cat.cell.y): 112 | return 3 113 | elif cheese.cell is not None and (cell.x == cheese.cell.x and 114 | cell.y == cheese.cell.y): 115 | return 2 116 | else: 117 | return 1 if cell.wall else 0 118 | 119 | return tuple([cellvalue(self.world.getWrappedCell(self.cell.x + j, self.cell.y + i)) 120 | for i,j in lookcells]) 121 | 122 | mouse = Mouse() 123 | cat = Cat() 124 | cheese = Cheese() 125 | 126 | world = cellular.World(Cell, directions=directions, filename='waco.txt') 127 | world.age = 0 128 | 129 | world.addAgent(cheese, cell=pickRandomLocation()) 130 | world.addAgent(cat) 131 | world.addAgent(mouse) 132 | 133 | epsilonx = (0,100000) 134 | epsilony = (0.1,0) 135 | epsilonm = (epsilony[1] - epsilony[0]) / (epsilonx[1] - epsilonx[0]) 136 | 137 | endAge = world.age + 150000 138 | 139 | while world.age < endAge: 140 | world.update() 141 | 142 | '''if world.age % 100 == 0: 143 | mouse.ai.epsilon = (epsilony[0] if world.age < epsilonx[0] else 144 | epsilony[1] if world.age > epsilonx[1] else 145 | epsilonm*(world.age - epsilonx[0]) + epsilony[0])''' 146 | 147 | if world.age % 10000 == 0: 148 | print "{:d}, e: {:0.2f}, W: {:d}, L: {:d}"\ 149 | .format(world.age, mouse.ai.epsilon, mouse.fed, mouse.eaten) 150 | mouse.eaten = 0 151 | mouse.fed = 0 152 | 153 | world.display.activate(size=30) 154 | world.display.delay = 1 155 | while 1: 156 | world.update() 157 | -------------------------------------------------------------------------------- /RL/Cat vs Mouse exploration/qlearn.py: -------------------------------------------------------------------------------- 1 | import random 2 | 3 | 4 | class QLearn: 5 | def __init__(self, actions, epsilon=0.1, alpha=0.2, gamma=0.9): 6 | self.q = {} 7 | 8 | self.epsilon = epsilon 9 | self.alpha = alpha 10 | self.gamma = gamma 11 | self.actions = actions 12 | 13 | def getQ(self, state, action): 14 | return self.q.get((state, action), 0.0) 15 | # return self.q.get((state, action), 1.0) 16 | 17 | def learnQ(self, state, action, reward, value): 18 | oldv = self.q.get((state, action), None) 19 | if oldv is None: 20 | self.q[(state, action)] = reward 21 | else: 22 | self.q[(state, action)] = oldv + self.alpha * (value - oldv) 23 | 24 | def chooseAction(self, state): 25 | if random.random() < self.epsilon: 26 | action = random.choice(self.actions) 27 | else: 28 | q = [self.getQ(state, a) for a in self.actions] 29 | maxQ = max(q) 30 | count = q.count(maxQ) 31 | if count > 1: 32 | best = [i for i in range(len(self.actions)) if q[i] == maxQ] 33 | i = random.choice(best) 34 | else: 35 | i = q.index(maxQ) 36 | 37 | action = self.actions[i] 38 | return action 39 | 40 | def learn(self, state1, action1, reward, state2): 41 | maxqnew = max([self.getQ(state2, a) for a in self.actions]) 42 | self.learnQ(state1, action1, reward, reward + self.gamma*maxqnew) 43 | 44 | import math 45 | def ff(f,n): 46 | fs = "{:f}".format(f) 47 | if len(fs) < n: 48 | return ("{:"+n+"s}").format(fs) 49 | else: 50 | return fs[:n] 51 | -------------------------------------------------------------------------------- /RL/Cat vs Mouse exploration/qlearn_mod_random.py: -------------------------------------------------------------------------------- 1 | import random 2 | 3 | 4 | class QLearn: 5 | def __init__(self, actions, epsilon=0.1, alpha=0.2, gamma=0.9): 6 | self.q = {} 7 | 8 | self.epsilon = epsilon 9 | self.alpha = alpha 10 | self.gamma = gamma 11 | self.actions = actions 12 | 13 | def getQ(self, state, action): 14 | return self.q.get((state, action), 0.0) 15 | # return self.q.get((state, action), 1.0) 16 | 17 | def learnQ(self, state, action, reward, value): 18 | oldv = self.q.get((state, action), None) 19 | if oldv is None: 20 | self.q[(state, action)] = reward 21 | else: 22 | self.q[(state, action)] = oldv + self.alpha * (value - oldv) 23 | 24 | def chooseAction(self, state, return_q=False): 25 | q = [self.getQ(state, a) for a in self.actions] 26 | maxQ = max(q) 27 | 28 | if random.random() < self.epsilon: 29 | #action = random.choice(self.actions) 30 | minQ = min(q); mag = max(abs(minQ), abs(maxQ)) 31 | q = [q[i] + random.random() * mag - .5 * mag for i in range(len(self.actions))] # add random values to all the actions, recalculate maxQ 32 | maxQ = max(q) 33 | 34 | count = q.count(maxQ) 35 | if count > 1: 36 | best = [i for i in range(len(self.actions)) if q[i] == maxQ] 37 | i = random.choice(best) 38 | else: 39 | i = q.index(maxQ) 40 | 41 | action = self.actions[i] 42 | 43 | if return_q: # if they want it, give it! 44 | return action, q 45 | return action 46 | 47 | def learn(self, state1, action1, reward, state2): 48 | maxqnew = max([self.getQ(state2, a) for a in self.actions]) 49 | self.learnQ(state1, action1, reward, reward + self.gamma*maxqnew) 50 | 51 | import math 52 | def ff(f,n): 53 | fs = "{:f}".format(f) 54 | if len(fs) < n: 55 | return ("{:"+n+"s}").format(fs) 56 | else: 57 | return fs[:n] 58 | -------------------------------------------------------------------------------- /RL/Cat vs Mouse exploration/temp.ppm: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/studywolf/blog/62fc3d3aaac242bde5a0643e16aee03a6a061380/RL/Cat vs Mouse exploration/temp.ppm -------------------------------------------------------------------------------- /RL/Cat vs Mouse exploration/waco.txt: -------------------------------------------------------------------------------- 1 | XXXXXXXXXXXXXX 2 | X X 3 | X XXX X XX X 4 | X X XX XXX X 5 | X XX X X 6 | X X X X 7 | X X XXX X XXXX 8 | X X X X X 9 | X XX XXX XX 10 | X X X 11 | XXXXXXXXXXXXXX 12 | -------------------------------------------------------------------------------- /RL/Egocentric/cliffs.txt: -------------------------------------------------------------------------------- 1 | ............. 2 | .XXXXXXXXXXX. 3 | . . 4 | .S G. 5 | . . 6 | .XXXXXXXXXXX. 7 | ............. 8 | -------------------------------------------------------------------------------- /RL/Egocentric/egoCliffBasic.py: -------------------------------------------------------------------------------- 1 | import sys 2 | 3 | import cellular 4 | import qlearn 5 | 6 | startCell = None 7 | 8 | class Cell(cellular.Cell): 9 | def __init__(self): 10 | self.cliff = False 11 | self.goal = False 12 | self.wall = False 13 | 14 | def colour(self): 15 | if self.cliff: 16 | return 'red' 17 | if self.goal: 18 | return 'green' 19 | if self.wall: 20 | return 'black' 21 | else: 22 | return 'white' 23 | 24 | def load(self, data): 25 | global startCell 26 | if data == 'S': 27 | startCell = self 28 | if data == '.': 29 | self.wall = True 30 | if data == 'X': 31 | self.cliff = True 32 | if data == 'G': 33 | self.goal = True 34 | 35 | 36 | class Agent(cellular.Agent): 37 | def __init__(self): 38 | self.actions = range(directions) 39 | self.egoAI = qlearn.QLearn( 40 | actions=self.actions, epsilon=0.05, alpha=0.1, gamma=.05) 41 | self.lastAction = None 42 | self.hitWall = False 43 | self.score = 0 44 | self.intentional_deaths = 0 45 | self.unintentional_deaths = 0 46 | self.intentional = True 47 | 48 | def colour(self): 49 | return 'blue' 50 | 51 | def update(self): 52 | 53 | reward = self.calcReward() 54 | egoState = self.calcEgoState() 55 | 56 | if self.lastAction is not None: 57 | self.egoAI.learn(self.lastEgoState, self.lastAction, 58 | reward, egoState) 59 | 60 | if self.cell.goal == True: 61 | self.score += 1 62 | self.restart() 63 | 64 | elif self.cell.cliff == True: 65 | if self.intentional: 66 | self.intentional_deaths += 1 67 | else: 68 | self.unintentional_deaths += 1 69 | 70 | self.restart() 71 | 72 | else: 73 | 74 | action, self.intentional = self.egoAI.chooseAction(egoState) 75 | 76 | self.lastEgoState = egoState 77 | self.lastAction = action 78 | 79 | self.hitWall = not self.goInDirection(action) 80 | 81 | # # get new state and print q-values 82 | # egoState = self.calcEgoState() 83 | # reward = self.calcReward() 84 | # q = [self.egoAI.getQ(egoState, a) for a in self.egoAI.actions] 85 | # print " %.3f "%q[0] 86 | # print " %.3f %.3f %.3f "%(q[3], reward, q[1]) 87 | # print " %.3f "%q[2] 88 | # print "" 89 | 90 | def calcEgoState(self): 91 | return tuple([self.cellvalue(c) for c in self.cell.neighbours]) 92 | 93 | def calcReward(self): 94 | if self.cell.cliff == True: 95 | return cliffReward 96 | elif self.cell.goal == True: 97 | return goalReward 98 | elif self.hitWall == True: 99 | # print 'hitwall reward' 100 | return hitWallReward 101 | else: 102 | return normalReward 103 | 104 | def cellvalue(self, cell): 105 | return (3 if cell.goal else 106 | 2 if cell.wall else 107 | 1 if cell.cliff else 108 | 0) 109 | 110 | def restart(self): 111 | self.cell = startCell 112 | self.lastAction = None 113 | self.lastEgoState = None 114 | 115 | cliffReward = -10 116 | goalReward = 500 117 | hitWallReward = -5 118 | normalReward = -1 119 | 120 | directions = 4 121 | world = cellular.World(Cell, directions=directions, filename='cliffs.txt') 122 | 123 | if startCell is None: 124 | print "You must indicate where the agent starts by putting a 'S' in the map file" 125 | sys.exit() 126 | agent = Agent() 127 | world.addAgent(agent, cell=startCell) 128 | 129 | pretraining = 0 130 | for i in range(pretraining): 131 | if i % 1000 == 0: 132 | print i, agent.score 133 | agent.score = 0 134 | world.update() 135 | 136 | ### display 137 | print cellular.Display 138 | world.display.activate(size=30) 139 | world.display.delay = 1 140 | while 1: 141 | world.update() 142 | 143 | if world.age % 10000 == 0: 144 | print "{:d}, W: {:d}, L: {:d}, A: {:d}"\ 145 | .format(world.age, agent.score, agent.intentional_deaths, agent.unintentional_deaths) 146 | agent.score = 0 147 | agent.intentional_deaths = 0 148 | agent.unintentional_deaths = 0 149 | -------------------------------------------------------------------------------- /RL/Egocentric/mouse.py: -------------------------------------------------------------------------------- 1 | import cellular 2 | import qlearn 3 | import time 4 | import random 5 | import shelve 6 | 7 | directions = 8 8 | 9 | def pickRandomLocation(): 10 | while 1: 11 | x = random.randrange(world.width) 12 | y = random.randrange(world.height) 13 | cell = world.getCell(x, y) 14 | if not (cell.wall or len(cell.agents) > 0): 15 | return cell 16 | 17 | 18 | class Cell(cellular.Cell): 19 | wall = False 20 | 21 | def colour(self): 22 | if self.wall: 23 | return 'black' 24 | else: 25 | return 'white' 26 | 27 | def load(self, data): 28 | if data == 'X': 29 | self.wall = True 30 | else: 31 | self.wall = False 32 | 33 | 34 | class Cat(cellular.Agent): 35 | cell = None 36 | score = 0 37 | colour = 'orange' 38 | 39 | def update(self): 40 | cell = self.cell 41 | if cell != mouse.cell: 42 | self.goTowards(mouse.cell) 43 | while cell == self.cell: 44 | self.goInDirection(random.randrange(directions)) 45 | 46 | 47 | class Cheese(cellular.Agent): 48 | colour = 'yellow' 49 | 50 | def update(self): 51 | pass 52 | 53 | 54 | class Mouse(cellular.Agent): 55 | colour = 'gray' 56 | 57 | def __init__(self): 58 | self.ai = None 59 | self.ai = qlearn.QLearn(actions=range(directions), 60 | alpha=0.1, gamma=0.9, epsilon=0.1) 61 | self.eaten = 0 62 | self.fed = 0 63 | self.lastState = None 64 | self.lastAction = None 65 | 66 | def update(self): 67 | state = self.calcState() 68 | reward = -1 69 | 70 | if self.cell == cat.cell: 71 | self.eaten += 1 72 | reward = -100 73 | if self.lastState is not None: 74 | self.ai.learn(self.lastState, self.lastAction, reward, state) 75 | self.lastState = None 76 | 77 | self.cell = pickRandomLocation() 78 | return 79 | 80 | if self.cell == cheese.cell: 81 | self.fed += 1 82 | reward = 50 83 | cheese.cell = pickRandomLocation() 84 | 85 | if self.lastState is not None: 86 | self.ai.learn(self.lastState, self.lastAction, reward, state) 87 | 88 | state = self.calcState() 89 | action = self.ai.chooseAction(state) 90 | self.lastState = state 91 | self.lastAction = action 92 | 93 | self.goInDirection(action) 94 | 95 | def calcState(self): 96 | if cat.cell is not None: 97 | return self.cell.x, self.cell.y, cat.cell.x, cat.cell.y, cheese.cell.x, cheese.cell.y 98 | else: 99 | return self.cell.x, self.cell.y, cheese.cell.x, cheese.cell.y 100 | 101 | 102 | mouse = Mouse() 103 | cat = Cat() 104 | cheese = Cheese() 105 | 106 | world = cellular.World(Cell, directions=directions, filename='barrier2.txt') 107 | world.age = 0 108 | 109 | world.addAgent(cheese, cell=pickRandomLocation()) 110 | world.addAgent(cat) 111 | world.addAgent(mouse) 112 | 113 | epsilonx = (0,100000) 114 | epsilony = (0.1,0) 115 | epsilonm = (epsilony[1] - epsilony[0]) / (epsilonx[1] - epsilonx[0]) 116 | 117 | endAge = world.age + 150000 118 | while world.age < endAge: 119 | world.update() 120 | 121 | if world.age % 100 == 0: 122 | mouse.ai.epsilon = (epsilony[0] if world.age < epsilonx[0] else 123 | epsilony[1] if world.age > epsilonx[1] else 124 | epsilonm*(world.age - epsilonx[0]) + epsilony[0]) 125 | 126 | if world.age % 10000 == 0: 127 | print "{:d}, e: {:0.2f}, W: {:d}, L: {:d}"\ 128 | .format(world.age, mouse.ai.epsilon, mouse.fed, mouse.eaten) 129 | mouse.eaten = 0 130 | mouse.fed = 0 131 | 132 | mouse.ai.epsilon = 0.0 # change this to 0 to turn off exploration after learning 133 | 134 | world.display.activate(size=30) 135 | world.display.delay = 1 136 | while 1: 137 | world.update() 138 | -------------------------------------------------------------------------------- /RL/Egocentric/qlearn.py: -------------------------------------------------------------------------------- 1 | import random 2 | 3 | 4 | class QLearn: 5 | def __init__(self, actions, epsilon=0.1, alpha=0.2, gamma=0.9): 6 | self.q = {} 7 | 8 | self.epsilon = epsilon 9 | self.alpha = alpha 10 | self.gamma = gamma 11 | self.actions = actions 12 | 13 | def getQ(self, state, action): 14 | return self.q.get((state, action), 0.0) 15 | 16 | def learn(self, state, action, reward, state2): 17 | 18 | if self.q.has_key((state, action)) == False: 19 | self.q[(state, action)] = reward 20 | 21 | else: 22 | 23 | maxqnew = max([self.getQ(state2, a) for a in self.actions]) 24 | value = reward + self.gamma * maxqnew 25 | self.q[(state, action)] += self.alpha * (value - self.q[(state, action)]) 26 | 27 | def chooseAction(self, state): 28 | 29 | intentional = True 30 | if random.random() < self.epsilon: 31 | action = random.choice(self.actions) 32 | intentional = False 33 | 34 | else: 35 | q = [self.getQ(state, a) for a in self.actions] 36 | maxQ = max(q) 37 | count = q.count(maxQ) 38 | if count > 1: 39 | best = [i for i in range(len(self.actions)) if q[i] == maxQ] 40 | i = random.choice(best) 41 | else: 42 | i = q.index(maxQ) 43 | 44 | action = self.actions[i] 45 | return action, intentional 46 | 47 | -------------------------------------------------------------------------------- /RL/SARSA vs Qlearn cliff/cliff.txt: -------------------------------------------------------------------------------- 1 | .............. 2 | .SXXXXXXXXXXG. 3 | . . 4 | . . 5 | . . 6 | .............. 7 | -------------------------------------------------------------------------------- /RL/SARSA vs Qlearn cliff/cliff_Q.py: -------------------------------------------------------------------------------- 1 | import cellular 2 | import qlearn 3 | import time 4 | import sys 5 | 6 | startCell = None 7 | 8 | 9 | class Cell(cellular.Cell): 10 | def __init__(self): 11 | self.cliff = False 12 | self.goal = False 13 | self.wall = False 14 | 15 | def colour(self): 16 | if self.cliff: 17 | return 'red' 18 | if self.goal: 19 | return 'green' 20 | if self.wall: 21 | return 'black' 22 | else: 23 | return 'white' 24 | 25 | def load(self, data): 26 | global startCell 27 | if data == 'S': 28 | startCell = self 29 | if data == '.': 30 | self.wall = True 31 | if data == 'X': 32 | self.cliff = True 33 | if data == 'G': 34 | self.goal = True 35 | 36 | 37 | class Agent(cellular.Agent): 38 | def __init__(self): 39 | self.ai = qlearn.QLearn( 40 | actions=range(directions), epsilon=0.1, alpha=0.1, gamma=0.9) 41 | self.lastAction = None 42 | self.score = 0 43 | 44 | def colour(self): 45 | return 'blue' 46 | 47 | def update(self): 48 | reward = self.calcReward() 49 | state = self.calcState() 50 | action = self.ai.chooseAction(state) 51 | if self.lastAction is not None: 52 | self.ai.learn(self.lastState, self.lastAction, reward, state) 53 | self.lastState = state 54 | self.lastAction = action 55 | 56 | here = self.cell 57 | if here.goal or here.cliff: 58 | self.cell = startCell 59 | self.lastAction = None 60 | else: 61 | self.goInDirection(action) 62 | 63 | def calcState(self): 64 | return self.cell.x, self.cell.y 65 | 66 | def calcReward(self): 67 | here = self.cell 68 | if here.cliff: 69 | return cliffReward 70 | elif here.goal: 71 | self.score += 1 72 | return goalReward 73 | else: 74 | return normalReward 75 | 76 | 77 | normalReward = -1 78 | cliffReward = -100 79 | goalReward = 0 80 | 81 | directions = 4 82 | world = cellular.World(Cell, directions=directions, filename='cliff.txt') 83 | 84 | if startCell is None: 85 | print "You must indicate where the agent starts by putting a 'S' in the map file" 86 | sys.exit() 87 | agent = Agent() 88 | world.addAgent(agent, cell=startCell) 89 | 90 | pretraining = 0 91 | for i in range(pretraining): 92 | if i % 1000 == 0: 93 | print i, agent.score 94 | agent.score = 0 95 | world.update() 96 | 97 | world.display.activate(size=30) 98 | world.display.delay = 1 99 | while 1: 100 | world.update() 101 | -------------------------------------------------------------------------------- /RL/SARSA vs Qlearn cliff/cliff_S.py: -------------------------------------------------------------------------------- 1 | import cellular 2 | import sarsa 3 | import time 4 | import sys 5 | 6 | startCell = None 7 | 8 | 9 | class Cell(cellular.Cell): 10 | def __init__(self): 11 | self.cliff = False 12 | self.goal = False 13 | self.wall = False 14 | 15 | def colour(self): 16 | if self.cliff: 17 | return 'red' 18 | if self.goal: 19 | return 'green' 20 | if self.wall: 21 | return 'black' 22 | else: 23 | return 'white' 24 | 25 | def load(self, data): 26 | global startCell 27 | if data == 'S': 28 | startCell = self 29 | if data == '.': 30 | self.wall = True 31 | if data == 'X': 32 | self.cliff = True 33 | if data == 'G': 34 | self.goal = True 35 | 36 | 37 | class Agent(cellular.Agent): 38 | def __init__(self): 39 | self.ai = sarsa.Sarsa( 40 | actions=range(directions), epsilon=0.1, alpha=0.1, gamma=0.9) 41 | self.lastAction = None 42 | self.score = 0 43 | 44 | def colour(self): 45 | return 'blue' 46 | 47 | def update(self): 48 | reward = self.calcReward() 49 | state = self.calcState() 50 | action = self.ai.chooseAction(state) 51 | if self.lastAction is not None: 52 | self.ai.learn( 53 | self.lastState, self.lastAction, reward, state, action) 54 | self.lastState = state 55 | self.lastAction = action 56 | 57 | here = self.cell 58 | if here.goal or here.cliff: 59 | self.cell = startCell 60 | self.lastAction = None 61 | else: 62 | self.goInDirection(action) 63 | 64 | def calcState(self): 65 | return self.cell.x, self.cell.y 66 | 67 | def calcReward(self): 68 | here = self.cell 69 | if here.cliff: 70 | return cliffReward 71 | elif here.goal: 72 | self.score += 1 73 | return goalReward 74 | else: 75 | return normalReward 76 | 77 | 78 | normalReward = -1 79 | cliffReward = -100 80 | goalReward = 0 81 | 82 | directions = 4 83 | world = cellular.World(Cell, directions=directions, filename='cliff.txt') 84 | 85 | if startCell is None: 86 | print "You must indicate where the agent starts by putting a 'S' in the map file" 87 | sys.exit() 88 | agent = Agent() 89 | world.addAgent(agent, cell=startCell) 90 | 91 | pretraining = 0 92 | for i in range(pretraining): 93 | if i % 1000 == 0: 94 | print i, agent.score 95 | agent.score = 0 96 | world.update() 97 | 98 | world.display.activate(size=30) 99 | world.display.delay = 1 100 | while 1: 101 | world.update() 102 | -------------------------------------------------------------------------------- /RL/SARSA vs Qlearn cliff/grid.txt: -------------------------------------------------------------------------------- 1 | XXXXXXXXXXXXXX 2 | X X 3 | X X X X X X XX 4 | X X 5 | X X X X X X XX 6 | X X 7 | X X X X X X XX 8 | X X 9 | X X X X X X XX 10 | X X 11 | XXXXXXXXXXXXXX 12 | -------------------------------------------------------------------------------- /RL/SARSA vs Qlearn cliff/mouse.py: -------------------------------------------------------------------------------- 1 | import cellular 2 | import qlearn 3 | import time 4 | import random 5 | import shelve 6 | 7 | directions = 8 8 | 9 | def pickRandomLocation(): 10 | while 1: 11 | x = random.randrange(world.width) 12 | y = random.randrange(world.height) 13 | cell = world.getCell(x, y) 14 | if not (cell.wall or len(cell.agents) > 0): 15 | return cell 16 | 17 | 18 | class Cell(cellular.Cell): 19 | wall = False 20 | 21 | def colour(self): 22 | if self.wall: 23 | return 'black' 24 | else: 25 | return 'white' 26 | 27 | def load(self, data): 28 | if data == 'X': 29 | self.wall = True 30 | else: 31 | self.wall = False 32 | 33 | 34 | class Cat(cellular.Agent): 35 | cell = None 36 | score = 0 37 | colour = 'orange' 38 | 39 | def update(self): 40 | cell = self.cell 41 | if cell != mouse.cell: 42 | self.goTowards(mouse.cell) 43 | while cell == self.cell: 44 | self.goInDirection(random.randrange(directions)) 45 | 46 | 47 | class Cheese(cellular.Agent): 48 | colour = 'yellow' 49 | 50 | def update(self): 51 | pass 52 | 53 | 54 | class Mouse(cellular.Agent): 55 | colour = 'gray' 56 | 57 | def __init__(self): 58 | self.ai = None 59 | self.ai = qlearn.QLearn(actions=range(directions), 60 | alpha=0.1, gamma=0.9, epsilon=0.1) 61 | self.eaten = 0 62 | self.fed = 0 63 | self.lastState = None 64 | self.lastAction = None 65 | 66 | def update(self): 67 | state = self.calcState() 68 | reward = -1 69 | 70 | if self.cell == cat.cell: 71 | self.eaten += 1 72 | reward = -100 73 | if self.lastState is not None: 74 | self.ai.learn(self.lastState, self.lastAction, reward, state) 75 | self.lastState = None 76 | 77 | self.cell = pickRandomLocation() 78 | return 79 | 80 | if self.cell == cheese.cell: 81 | self.fed += 1 82 | reward = 50 83 | cheese.cell = pickRandomLocation() 84 | 85 | if self.lastState is not None: 86 | self.ai.learn(self.lastState, self.lastAction, reward, state) 87 | 88 | state = self.calcState() 89 | action = self.ai.chooseAction(state) 90 | self.lastState = state 91 | self.lastAction = action 92 | 93 | self.goInDirection(action) 94 | 95 | def calcState(self): 96 | if cat.cell is not None: 97 | return self.cell.x, self.cell.y, cat.cell.x, cat.cell.y, cheese.cell.x, cheese.cell.y 98 | else: 99 | return self.cell.x, self.cell.y, cheese.cell.x, cheese.cell.y 100 | 101 | 102 | mouse = Mouse() 103 | cat = Cat() 104 | cheese = Cheese() 105 | 106 | world = cellular.World(Cell, directions=directions, filename='barrier2.txt') 107 | world.age = 0 108 | 109 | world.addAgent(cheese, cell=pickRandomLocation()) 110 | world.addAgent(cat) 111 | world.addAgent(mouse) 112 | 113 | epsilonx = (0,100000) 114 | epsilony = (0.1,0) 115 | epsilonm = (epsilony[1] - epsilony[0]) / (epsilonx[1] - epsilonx[0]) 116 | 117 | endAge = world.age + 150000 118 | while world.age < endAge: 119 | world.update() 120 | 121 | if world.age % 100 == 0: 122 | mouse.ai.epsilon = (epsilony[0] if world.age < epsilonx[0] else 123 | epsilony[1] if world.age > epsilonx[1] else 124 | epsilonm*(world.age - epsilonx[0]) + epsilony[0]) 125 | 126 | if world.age % 10000 == 0: 127 | print "{:d}, e: {:0.2f}, W: {:d}, L: {:d}"\ 128 | .format(world.age, mouse.ai.epsilon, mouse.fed, mouse.eaten) 129 | mouse.eaten = 0 130 | mouse.fed = 0 131 | 132 | mouse.ai.epsilon = 0.0 # change this to 0 to turn off exploration after learning 133 | 134 | world.display.activate(size=30) 135 | world.display.delay = 1 136 | while 1: 137 | world.update() 138 | -------------------------------------------------------------------------------- /RL/SARSA vs Qlearn cliff/qlearn.py: -------------------------------------------------------------------------------- 1 | import random 2 | 3 | 4 | class QLearn: 5 | def __init__(self, actions, epsilon=0.1, alpha=0.2, gamma=0.9): 6 | self.q = {} 7 | 8 | self.epsilon = epsilon 9 | self.alpha = alpha 10 | self.gamma = gamma 11 | self.actions = actions 12 | 13 | def getQ(self, state, action): 14 | return self.q.get((state, action), 0.0) 15 | # return self.q.get((state, action), 1.0) 16 | 17 | def learnQ(self, state, action, reward, value): 18 | oldv = self.q.get((state, action), None) 19 | if oldv is None: 20 | self.q[(state, action)] = reward 21 | else: 22 | self.q[(state, action)] = oldv + self.alpha * (value - oldv) 23 | 24 | def chooseAction(self, state): 25 | if random.random() < self.epsilon: 26 | action = random.choice(self.actions) 27 | else: 28 | q = [self.getQ(state, a) for a in self.actions] 29 | maxQ = max(q) 30 | count = q.count(maxQ) 31 | if count > 1: 32 | best = [i for i in range(len(self.actions)) if q[i] == maxQ] 33 | i = random.choice(best) 34 | else: 35 | i = q.index(maxQ) 36 | 37 | action = self.actions[i] 38 | return action 39 | 40 | def learn(self, state1, action1, reward, state2): 41 | maxqnew = max([self.getQ(state2, a) for a in self.actions]) 42 | self.learnQ(state1, action1, reward, reward + self.gamma*maxqnew) 43 | 44 | def printQ(self): 45 | keys = self.q.keys() 46 | states = list(set([a for a,b in keys])) 47 | actions = list(set([b for a,b in keys])) 48 | 49 | dstates = ["".join([str(int(t)) for t in list(tup)]) for tup in states] 50 | print (" "*4) + " ".join(["%8s" % ("("+s+")") for s in dstates]) 51 | for a in actions: 52 | print ("%3d " % (a)) + \ 53 | " ".join(["%8.2f" % (self.getQ(s,a)) for s in states]) 54 | 55 | def printV(self): 56 | keys = self.q.keys() 57 | states = [a for a,b in keys] 58 | statesX = list(set([x for x,y in states])) 59 | statesY = list(set([y for x,y in states])) 60 | 61 | print (" "*4) + " ".join(["%4d" % (s) for s in statesX]) 62 | for y in statesY: 63 | maxQ = [max([self.getQ((x,y),a) for a in self.actions]) 64 | for x in statesX] 65 | print ("%3d " % (y)) + " ".join([ff(q,4) for q in maxQ]) 66 | 67 | import math 68 | def ff(f,n): 69 | fs = "{:f}".format(f) 70 | if len(fs) < n: 71 | return ("{:"+n+"s}").format(fs) 72 | else: 73 | return fs[:n] 74 | # s = -1 if f < 0 else 1 75 | # ss = "-" if s < 0 else "" 76 | # b = math.floor(math.log10(s*f)) + 1 77 | # if b >= n: 78 | # return ("{:" + n + "d}").format(math.round(f)) 79 | # elif b <= 0: 80 | # return (ss + ".{:" + (n-1) + "d}").format(math.round(f * 10**(n-1))) 81 | # else: 82 | # return ("{:"+b+"d}.{:"+(n-b-1)+" 83 | -------------------------------------------------------------------------------- /RL/SARSA vs Qlearn cliff/sarsa.py: -------------------------------------------------------------------------------- 1 | import random 2 | 3 | 4 | class Sarsa: 5 | def __init__(self, actions, epsilon=0.1, alpha=0.2, gamma=0.9): 6 | self.q = {} 7 | 8 | self.epsilon = epsilon 9 | self.alpha = alpha 10 | self.gamma = gamma 11 | self.actions = actions 12 | 13 | def getQ(self, state, action): 14 | return self.q.get((state, action), 0.0) 15 | 16 | def learnQ(self, state, action, reward, value): 17 | oldv = self.q.get((state, action), None) 18 | if oldv is None: 19 | self.q[(state, action)] = reward 20 | else: 21 | self.q[(state, action)] = oldv + self.alpha * (value - oldv) 22 | 23 | def chooseAction(self, state): 24 | if random.random() < self.epsilon: 25 | action = random.choice(self.actions) 26 | else: 27 | q = [self.getQ(state, a) for a in self.actions] 28 | maxQ = max(q) 29 | count = q.count(maxQ) 30 | if count > 1: 31 | best = [i for i in range(len(self.actions)) if q[i] == maxQ] 32 | i = random.choice(best) 33 | else: 34 | i = q.index(maxQ) 35 | 36 | action = self.actions[i] 37 | return action 38 | 39 | def learn(self, state1, action1, reward, state2, action2): 40 | qnext = self.getQ(state2, action2) 41 | self.learnQ(state1, action1, reward, reward + self.gamma * qnext) 42 | -------------------------------------------------------------------------------- /RL/combination allo and ego/mouse.py: -------------------------------------------------------------------------------- 1 | import cellular 2 | import qlearn 3 | import time 4 | import random 5 | import shelve 6 | 7 | directions = 8 8 | 9 | def pickRandomLocation(): 10 | while 1: 11 | x = random.randrange(world.width) 12 | y = random.randrange(world.height) 13 | cell = world.getCell(x, y) 14 | if not (cell.wall or len(cell.agents) > 0): 15 | return cell 16 | 17 | 18 | class Cell(cellular.Cell): 19 | wall = False 20 | 21 | def colour(self): 22 | if self.wall: 23 | return 'black' 24 | else: 25 | return 'white' 26 | 27 | def load(self, data): 28 | if data == 'X': 29 | self.wall = True 30 | else: 31 | self.wall = False 32 | 33 | 34 | class Cat(cellular.Agent): 35 | cell = None 36 | score = 0 37 | colour = 'orange' 38 | 39 | def update(self): 40 | cell = self.cell 41 | if cell != mouse.cell: 42 | self.goTowards(mouse.cell) 43 | while cell == self.cell: 44 | self.goInDirection(random.randrange(directions)) 45 | 46 | 47 | class Cheese(cellular.Agent): 48 | colour = 'yellow' 49 | 50 | def update(self): 51 | pass 52 | 53 | 54 | class Mouse(cellular.Agent): 55 | colour = 'gray' 56 | 57 | def __init__(self): 58 | self.ai = None 59 | self.ai = qlearn.QLearn(actions=range(directions), 60 | alpha=0.1, gamma=0.9, epsilon=0.1) 61 | self.eaten = 0 62 | self.fed = 0 63 | self.lastState = None 64 | self.lastAction = None 65 | 66 | def update(self): 67 | state = self.calcState() 68 | reward = -1 69 | 70 | if self.cell == cat.cell: 71 | self.eaten += 1 72 | reward = -100 73 | if self.lastState is not None: 74 | self.ai.learn(self.lastState, self.lastAction, reward, state) 75 | self.lastState = None 76 | 77 | self.cell = pickRandomLocation() 78 | return 79 | 80 | if self.cell == cheese.cell: 81 | self.fed += 1 82 | reward = 50 83 | cheese.cell = pickRandomLocation() 84 | 85 | if self.lastState is not None: 86 | self.ai.learn(self.lastState, self.lastAction, reward, state) 87 | 88 | state = self.calcState() 89 | action = self.ai.chooseAction(state) 90 | self.lastState = state 91 | self.lastAction = action 92 | 93 | self.goInDirection(action) 94 | 95 | def calcState(self): 96 | if cat.cell is not None: 97 | return self.cell.x, self.cell.y, cat.cell.x, cat.cell.y, cheese.cell.x, cheese.cell.y 98 | else: 99 | return self.cell.x, self.cell.y, cheese.cell.x, cheese.cell.y 100 | 101 | 102 | mouse = Mouse() 103 | cat = Cat() 104 | cheese = Cheese() 105 | 106 | world = cellular.World(Cell, directions=directions, filename='barrier2.txt') 107 | world.age = 0 108 | 109 | world.addAgent(cheese, cell=pickRandomLocation()) 110 | world.addAgent(cat) 111 | world.addAgent(mouse) 112 | 113 | epsilonx = (0,100000) 114 | epsilony = (0.1,0) 115 | epsilonm = (epsilony[1] - epsilony[0]) / (epsilonx[1] - epsilonx[0]) 116 | 117 | endAge = world.age + 150000 118 | while world.age < endAge: 119 | world.update() 120 | 121 | if world.age % 100 == 0: 122 | mouse.ai.epsilon = (epsilony[0] if world.age < epsilonx[0] else 123 | epsilony[1] if world.age > epsilonx[1] else 124 | epsilonm*(world.age - epsilonx[0]) + epsilony[0]) 125 | 126 | if world.age % 10000 == 0: 127 | print "{:d}, e: {:0.2f}, W: {:d}, L: {:d}"\ 128 | .format(world.age, mouse.ai.epsilon, mouse.fed, mouse.eaten) 129 | mouse.eaten = 0 130 | mouse.fed = 0 131 | 132 | mouse.ai.epsilon = 0.0 # change this to 0 to turn off exploration after learning 133 | 134 | world.display.activate(size=30) 135 | world.display.delay = 1 136 | while 1: 137 | world.update() 138 | -------------------------------------------------------------------------------- /RL/combination allo and ego/qlearn.py: -------------------------------------------------------------------------------- 1 | import random 2 | 3 | 4 | class QLearn: 5 | def __init__(self, actions, epsilon=0.1, alpha=0.2, gamma=0.9): 6 | self.q = {} 7 | 8 | self.epsilon = epsilon 9 | self.alpha = alpha 10 | self.gamma = gamma 11 | self.actions = actions 12 | 13 | def getQ(self, state, action): 14 | return self.q.get((state, action), 0.0) 15 | 16 | def learn(self, state, action, reward, state2): 17 | 18 | if self.q.has_key((state, action)) == False: 19 | update = reward 20 | self.q[(state, action)] = update 21 | 22 | else: 23 | 24 | maxqnew = max([self.getQ(state2, a) for a in self.actions]) 25 | value = reward + self.gamma * maxqnew 26 | update = self.alpha * (value - self.q[(state, action)]) 27 | self.q[(state, action)] += update 28 | 29 | return update 30 | 31 | 32 | def chooseAction(self, state): 33 | 34 | intentional = True 35 | if random.random() < self.epsilon: 36 | action = random.choice(self.actions) 37 | intentional = False 38 | 39 | else: 40 | q = [self.getQ(state, a) for a in self.actions] 41 | maxQ = max(q) 42 | count = q.count(maxQ) 43 | if count > 1: 44 | best = [i for i in range(len(self.actions)) if q[i] == maxQ] 45 | i = random.choice(best) 46 | else: 47 | i = q.index(maxQ) 48 | 49 | action = self.actions[i] 50 | return action, intentional 51 | 52 | -------------------------------------------------------------------------------- /RL/combination allo and ego/worlds/barrier.txt: -------------------------------------------------------------------------------- 1 | XXXXXXXXXX 2 | X X 3 | X XXX X 4 | X X 5 | X X 6 | X X 7 | X X 8 | X X 9 | X X 10 | XXXXXXXXXX 11 | -------------------------------------------------------------------------------- /RL/combination allo and ego/worlds/barrier2.txt: -------------------------------------------------------------------------------- 1 | XXXXXXXXXXXXXX 2 | X X 3 | X XXXXXXXXXX X 4 | X X 5 | XXXXXXXXXXXXXX 6 | -------------------------------------------------------------------------------- /RL/combination allo and ego/worlds/barrier3.txt: -------------------------------------------------------------------------------- 1 | .............. 2 | .S . 3 | . . 4 | .XXXXXXXXX . 5 | . . 6 | . . 7 | . . 8 | . XXXXXXXXX. 9 | . . 10 | . . 11 | . . 12 | . XXXXXXXXX. 13 | . . 14 | . . 15 | . G . 16 | . XXXXXXXXX. 17 | .............. 18 | -------------------------------------------------------------------------------- /RL/combination allo and ego/worlds/bigmap.txt: -------------------------------------------------------------------------------- 1 | ........................ 2 | .S . 3 | . XXXXXX . 4 | . XXXX . 5 | . . 6 | . X XXXXX . 7 | . . 8 | . XXXXXXXXX . 9 | . . 10 | . XXXXXXXXXXX . 11 | . . 12 | . XXXXXXXXX . 13 | . . 14 | . . 15 | . . 16 | . XXXXXXXXX . 17 | . X . 18 | . X . 19 | .XXXX X . 20 | . X XXXXXXXXX . 21 | . X X . 22 | .XXXXXXXX X X . 23 | . G XXXX . 24 | . XXXXXXXXX X . 25 | ........................ 26 | -------------------------------------------------------------------------------- /RL/combination allo and ego/worlds/cliff.txt: -------------------------------------------------------------------------------- 1 | .............. 2 | .SXXXXXXXXXXG. 3 | . . 4 | . . 5 | . . 6 | .............. 7 | -------------------------------------------------------------------------------- /RL/combination allo and ego/worlds/cliffs.txt: -------------------------------------------------------------------------------- 1 | ............. 2 | .XXXXXXXXXXX. 3 | . . 4 | .S G. 5 | . . 6 | .XXXXXXXXXXX. 7 | ............. 8 | -------------------------------------------------------------------------------- /SymPy/UR5.ttt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/studywolf/blog/62fc3d3aaac242bde5a0643e16aee03a6a061380/SymPy/UR5.ttt -------------------------------------------------------------------------------- /SymPy/remoteApi.so: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/studywolf/blog/62fc3d3aaac242bde5a0643e16aee03a6a061380/SymPy/remoteApi.so -------------------------------------------------------------------------------- /SymPy/test_timing.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Copyright (C) 2016 Travis DeWolf 3 | 4 | This program is free software: you can redistribute it and/or modify 5 | it under the terms of the GNU General Public License as published by 6 | the Free Software Foundation, either version 3 of the License, or 7 | (at your option) any later version. 8 | 9 | This program is distributed in the hope that it will be useful, 10 | but WITHOUT ANY WARRANTY; without even the implied warranty of 11 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 | GNU General Public License for more details. 13 | 14 | You should have received a copy of the GNU General Public License 15 | along with this program. If not, see . 16 | ''' 17 | import matplotlib.pyplot as plt 18 | import numpy as np 19 | import seaborn 20 | import timeit 21 | 22 | # Test 1 ---------------------------------------------------------------------- 23 | print('\nTest function 1: ') 24 | time_sympy1 = timeit.timeit( 25 | stmt='f(np.random.random(), np.random.random())', 26 | setup='import numpy as np;\ 27 | import sympy as sp;\ 28 | q0 = sp.Symbol("q0");\ 29 | l0 = sp.Symbol("l0");\ 30 | a = sp.cos(q0) * l0;\ 31 | f = sp.lambdify((q0, l0), a, "numpy")') 32 | print('Sympy lambdify function 1 time: ', time_sympy1) 33 | 34 | time_hardcoded1 = timeit.timeit( 35 | stmt='np.cos(np.random.random())*np.random.random()', 36 | setup='import numpy as np') 37 | print('Hard coded function 1 time: ', time_hardcoded1) 38 | 39 | # Test 2 ---------------------------------------------------------------------- 40 | print('\nTest function 2: ') 41 | time_sympy2 = timeit.timeit( 42 | stmt='f(np.random.random(), np.random.random(), np.random.random(),\ 43 | np.random.random(), np.random.random(), np.random.random())', 44 | setup='import numpy as np;\ 45 | import sympy as sp;\ 46 | q0 = sp.Symbol("q0");\ 47 | q1 = sp.Symbol("q1");\ 48 | q2 = sp.Symbol("q2");\ 49 | l0 = sp.Symbol("l0");\ 50 | l1 = sp.Symbol("l1");\ 51 | l2 = sp.Symbol("l2");\ 52 | a = l1*sp.sin(q0 - l0*sp.sin(q1)*sp.cos(q2) - l2*sp.sin(q2) -\ 53 | l0*sp.sin(q1) + q0*l0)*sp.cos(q0) + l2*sp.sin(q0);\ 54 | f = sp.lambdify((q0,q1,q2,l0,l1,l2), a, "numpy")') 55 | print('Sympy lambdify function 2 time: ', time_sympy2) 56 | 57 | time_hardcoded2 = timeit.timeit( 58 | stmt='l1*np.sin(q0 - l0*np.sin(q1)*np.cos(q2) - l2*np.sin(q2) -\ 59 | l0*np.sin(q1) + q0*l0)*np.cos(q0) + l2*np.sin(q0)', 60 | setup='import numpy as np;\ 61 | q0 = np.random.random();\ 62 | q1 = np.random.random();\ 63 | q2 = np.random.random();\ 64 | l0 = np.random.random();\ 65 | l1 = np.random.random();\ 66 | l2 = np.random.random()') 67 | print('Hard coded function 2 time: ', time_hardcoded2) 68 | 69 | ind = np.arange(2) 70 | width = 0.35 71 | fig, ax = plt.subplots(figsize=(5, 3)) 72 | plt.bar(ind, [time_sympy1, time_sympy2], width, color='b') 73 | plt.bar(ind + width, [time_hardcoded1, time_hardcoded2], width, color='r') 74 | plt.xlim([0, ind[-1]+width*2]) 75 | plt.ylabel('Simulation time (s)') 76 | ax.set_xticks(ind + width) 77 | ax.set_xticklabels(['Function 1', 'Function 2']) 78 | ax.legend(['Sympy', 'Hard-coded'], loc='best') 79 | 80 | plt.show() 81 | -------------------------------------------------------------------------------- /SymPy/test_timing2.py: -------------------------------------------------------------------------------- 1 | import matplotlib.pyplot as plt 2 | import seaborn 3 | import timeit 4 | 5 | 6 | print('\nTest function 1: ') 7 | time_sympy1 = timeit.timeit( 8 | stmt='f(np.random.random(), np.random.random())', 9 | setup='import numpy as np;\ 10 | from sympy.utilities.autowrap import autowrap;\ 11 | import sympy as sp;\ 12 | q0 = sp.Symbol("q0");\ 13 | l0 = sp.Symbol("l0");\ 14 | a = sp.cos(q0) * l0;\ 15 | f = autowrap(a, backend="cython", args=(q0, l0))') 16 | print('Sympy autowrap function 1 time: ', time_sympy1) 17 | 18 | time_hardcoded1 = timeit.timeit( 19 | stmt='np.cos(np.random.random())*np.random.random()', 20 | setup='import numpy as np') 21 | print('Hard coded function 1 time: ', time_hardcoded1) 22 | 23 | print('\nTest function 2: ') 24 | time_sympy2 = timeit.timeit( 25 | stmt='f(np.random.random(), np.random.random(), np.random.random(),\ 26 | np.random.random(), np.random.random(), np.random.random())', 27 | setup='import numpy as np;\ 28 | from sympy.utilities.autowrap import autowrap;\ 29 | import sympy as sp;\ 30 | q0 = sp.Symbol("q0");\ 31 | q1 = sp.Symbol("q1");\ 32 | q2 = sp.Symbol("q2");\ 33 | l0 = sp.Symbol("l0");\ 34 | l1 = sp.Symbol("l1");\ 35 | l2 = sp.Symbol("l2");\ 36 | a = l1*sp.sin(q0 - l0*sp.sin(q1)*sp.cos(q2) - l2*sp.sin(q2) -\ 37 | l0*sp.sin(q1) + q0*l0)*sp.cos(q0) + l2*sp.sin(q0);\ 38 | f = autowrap(a, backend="cython", args=(q0, q1, q2, l0, l1, l2))') 39 | print('Sympy autowrap function 2 time: ', time_sympy2) 40 | 41 | time_hardcoded2 = timeit.timeit( 42 | stmt='q0 = np.random.random();\ 43 | q1 = np.random.random();\ 44 | q2 = np.random.random();\ 45 | l0 = np.random.random();\ 46 | l1 = np.random.random();\ 47 | l2 = np.random.random();\ 48 | l1*np.sin(q0 - l0*np.sin(q1)*np.cos(q2) - l2*np.sin(q2) -\ 49 | l0*np.sin(q1) + q0*l0)*np.cos(q0) + l2*np.sin(q0)', 50 | setup='import numpy as np') 51 | print('Hard coded function 2 time: ', time_hardcoded2) 52 | 53 | plt.figure(figsize=(5, 3)) 54 | plt.bar([.5, 3.5], [time_sympy1, time_sympy2], color='blue', width=1) 55 | plt.bar([1.5, 4.5], [time_hardcoded1, time_hardcoded2], color='red', width=1) 56 | plt.xticks([1.25, 4.25], ['Function 1', 'Function 2']) 57 | plt.ylabel('Simulation time (s)') 58 | plt.xlim([.5, 5.5]) 59 | plt.legend(['Sympy with Cython', 'Hard-coded'], loc=2) 60 | plt.tight_layout() 61 | plt.show() 62 | -------------------------------------------------------------------------------- /SymPy/ur5_config/EE.J: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/studywolf/blog/62fc3d3aaac242bde5a0643e16aee03a6a061380/SymPy/ur5_config/EE.J -------------------------------------------------------------------------------- /SymPy/ur5_config/EE.T: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/studywolf/blog/62fc3d3aaac242bde5a0643e16aee03a6a061380/SymPy/ur5_config/EE.T -------------------------------------------------------------------------------- /SymPy/ur5_config/Mq: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/studywolf/blog/62fc3d3aaac242bde5a0643e16aee03a6a061380/SymPy/ur5_config/Mq -------------------------------------------------------------------------------- /SymPy/ur5_config/Mq_g: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/studywolf/blog/62fc3d3aaac242bde5a0643e16aee03a6a061380/SymPy/ur5_config/Mq_g -------------------------------------------------------------------------------- /SymPy/ur5_config/joint0.T: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/studywolf/blog/62fc3d3aaac242bde5a0643e16aee03a6a061380/SymPy/ur5_config/joint0.T -------------------------------------------------------------------------------- /SymPy/ur5_config/joint1.T: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/studywolf/blog/62fc3d3aaac242bde5a0643e16aee03a6a061380/SymPy/ur5_config/joint1.T -------------------------------------------------------------------------------- /SymPy/ur5_config/joint2.T: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/studywolf/blog/62fc3d3aaac242bde5a0643e16aee03a6a061380/SymPy/ur5_config/joint2.T -------------------------------------------------------------------------------- /SymPy/ur5_config/joint3.T: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/studywolf/blog/62fc3d3aaac242bde5a0643e16aee03a6a061380/SymPy/ur5_config/joint3.T -------------------------------------------------------------------------------- /SymPy/ur5_config/joint4.T: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/studywolf/blog/62fc3d3aaac242bde5a0643e16aee03a6a061380/SymPy/ur5_config/joint4.T -------------------------------------------------------------------------------- /SymPy/ur5_config/joint5.T: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/studywolf/blog/62fc3d3aaac242bde5a0643e16aee03a6a061380/SymPy/ur5_config/joint5.T -------------------------------------------------------------------------------- /SymPy/ur5_config/link0.J: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/studywolf/blog/62fc3d3aaac242bde5a0643e16aee03a6a061380/SymPy/ur5_config/link0.J -------------------------------------------------------------------------------- /SymPy/ur5_config/link0.T: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/studywolf/blog/62fc3d3aaac242bde5a0643e16aee03a6a061380/SymPy/ur5_config/link0.T -------------------------------------------------------------------------------- /SymPy/ur5_config/link1.J: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/studywolf/blog/62fc3d3aaac242bde5a0643e16aee03a6a061380/SymPy/ur5_config/link1.J -------------------------------------------------------------------------------- /SymPy/ur5_config/link1.T: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/studywolf/blog/62fc3d3aaac242bde5a0643e16aee03a6a061380/SymPy/ur5_config/link1.T -------------------------------------------------------------------------------- /SymPy/ur5_config/link1.T_inv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/studywolf/blog/62fc3d3aaac242bde5a0643e16aee03a6a061380/SymPy/ur5_config/link1.T_inv -------------------------------------------------------------------------------- /SymPy/ur5_config/link2.J: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/studywolf/blog/62fc3d3aaac242bde5a0643e16aee03a6a061380/SymPy/ur5_config/link2.J -------------------------------------------------------------------------------- /SymPy/ur5_config/link2.T: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/studywolf/blog/62fc3d3aaac242bde5a0643e16aee03a6a061380/SymPy/ur5_config/link2.T -------------------------------------------------------------------------------- /SymPy/ur5_config/link2.T_inv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/studywolf/blog/62fc3d3aaac242bde5a0643e16aee03a6a061380/SymPy/ur5_config/link2.T_inv -------------------------------------------------------------------------------- /SymPy/ur5_config/link3.J: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/studywolf/blog/62fc3d3aaac242bde5a0643e16aee03a6a061380/SymPy/ur5_config/link3.J -------------------------------------------------------------------------------- /SymPy/ur5_config/link3.T: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/studywolf/blog/62fc3d3aaac242bde5a0643e16aee03a6a061380/SymPy/ur5_config/link3.T -------------------------------------------------------------------------------- /SymPy/ur5_config/link3.T_inv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/studywolf/blog/62fc3d3aaac242bde5a0643e16aee03a6a061380/SymPy/ur5_config/link3.T_inv -------------------------------------------------------------------------------- /SymPy/ur5_config/link4.J: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/studywolf/blog/62fc3d3aaac242bde5a0643e16aee03a6a061380/SymPy/ur5_config/link4.J -------------------------------------------------------------------------------- /SymPy/ur5_config/link4.T: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/studywolf/blog/62fc3d3aaac242bde5a0643e16aee03a6a061380/SymPy/ur5_config/link4.T -------------------------------------------------------------------------------- /SymPy/ur5_config/link4.T_inv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/studywolf/blog/62fc3d3aaac242bde5a0643e16aee03a6a061380/SymPy/ur5_config/link4.T_inv -------------------------------------------------------------------------------- /SymPy/ur5_config/link5.J: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/studywolf/blog/62fc3d3aaac242bde5a0643e16aee03a6a061380/SymPy/ur5_config/link5.J -------------------------------------------------------------------------------- /SymPy/ur5_config/link5.T: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/studywolf/blog/62fc3d3aaac242bde5a0643e16aee03a6a061380/SymPy/ur5_config/link5.T -------------------------------------------------------------------------------- /SymPy/ur5_config/link6.J: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/studywolf/blog/62fc3d3aaac242bde5a0643e16aee03a6a061380/SymPy/ur5_config/link6.J -------------------------------------------------------------------------------- /SymPy/ur5_config/link6.T: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/studywolf/blog/62fc3d3aaac242bde5a0643e16aee03a6a061380/SymPy/ur5_config/link6.T -------------------------------------------------------------------------------- /VE026a/Academic Robot Leaflet.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/studywolf/blog/62fc3d3aaac242bde5a0643e16aee03a6a061380/VE026a/Academic Robot Leaflet.pdf -------------------------------------------------------------------------------- /VREP/pendulum_gravity/quad_arm_2.ttt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/studywolf/blog/62fc3d3aaac242bde5a0643e16aee03a6a061380/VREP/pendulum_gravity/quad_arm_2.ttt -------------------------------------------------------------------------------- /VREP/pendulum_gravity/quad_arm_2_ctrl.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Copyright (C) 2016 Travis DeWolf 3 | 4 | This program is free software: you can redistribute it and/or modify 5 | it under the terms of the GNU General Public License as published by 6 | the Free Software Foundation, either version 3 of the License, or 7 | (at your option) any later version. 8 | 9 | This program is distributed in the hope that it will be useful, 10 | but WITHOUT ANY WARRANTY; without even the implied warranty of 11 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 | GNU General Public License for more details. 13 | 14 | You should have received a copy of the GNU General Public License 15 | along with this program. If not, see . 16 | ''' 17 | import numpy as np 18 | import vrep 19 | 20 | # close any open connections 21 | vrep.simxFinish(-1) 22 | # Connect to the V-REP continuous server 23 | clientID = vrep.simxStart('127.0.0.1', 19997, True, True, 500, 5) 24 | 25 | try: 26 | if clientID != -1: # if we connected successfully 27 | print ('Connected to remote API server') 28 | 29 | # --------------------- Setup the simulation 30 | 31 | vrep.simxSynchronous(clientID,True) 32 | 33 | joint_names = ['Motor1'] # base, joints 1 and 2 34 | # joint target velocities discussed below 35 | joint_target_velocities = np.ones(len(joint_names)) * 10000.0 36 | 37 | # get the handles for each joint and set up streaming 38 | joint_handles = [vrep.simxGetObjectHandle(clientID, 39 | name, vrep.simx_opmode_blocking)[1] for name in joint_names] 40 | print 'Joint names: ', joint_names 41 | print 'Joint handles: ', joint_handles 42 | 43 | # get handle for target and set up streaming 44 | _, target_handle = vrep.simxGetObjectHandle(clientID, 45 | 'target', vrep.simx_opmode_blocking) 46 | # get handle for hand 47 | _, hand_handle= vrep.simxGetObjectHandle(clientID, 48 | 'hand', vrep.simx_opmode_blocking) 49 | 50 | # define a set of targets 51 | center = np.array([0, .15, 0.8]) 52 | dist = .1 53 | num_targets = 5 54 | target_positions = np.array([ 55 | [dist*np.cos(theta), dist*np.cos(theta), dist*np.sin(theta)] \ 56 | + center for theta in np.linspace(0, np.pi, num_targets)]) 57 | 58 | # define variables to share with nengo 59 | q = np.zeros(len(joint_handles)) 60 | dq = np.zeros(len(joint_handles)) 61 | 62 | # --------------------- Start the simulation 63 | 64 | dt = .001 65 | vrep.simxSetFloatingParameter(clientID, 66 | vrep.sim_floatparam_simulation_time_step, 67 | dt, # specify a simulation time step 68 | vrep.simx_opmode_oneshot) 69 | 70 | # start our simulation in lockstep with our code 71 | vrep.simxStartSimulation(clientID, 72 | vrep.simx_opmode_blocking) 73 | 74 | count = 0 75 | track_hand = [] 76 | track_target = [] 77 | target_index = 0 78 | change_target_time = dt*10000 79 | 80 | # NOTE: main loop starts here --------------------------------------------- 81 | while count < len(target_positions) * change_target_time: # run this many seconds 82 | 83 | # every so often move the target 84 | if (count % change_target_time) < dt: 85 | vrep.simxSetObjectPosition(clientID, 86 | target_handle, 87 | -1, # set absolute, not relative position 88 | target_positions[target_index], 89 | vrep.simx_opmode_blocking) 90 | target_index += 1 91 | 92 | # get the (x,y,z) position of the target 93 | _, target_xyz = vrep.simxGetObjectPosition(clientID, 94 | target_handle, 95 | -1, # retrieve absolute, not relative, position 96 | vrep.simx_opmode_blocking) 97 | if _ !=0 : raise Exception() 98 | track_target.append(np.copy(target_xyz)) # store for plotting 99 | target_xyz = np.asarray(target_xyz) 100 | 101 | for ii,joint_handle in enumerate(joint_handles): 102 | old_q = np.copy(q) 103 | # get the joint angles 104 | _, q[ii] = vrep.simxGetJointPosition(clientID, 105 | joint_handle, 106 | vrep.simx_opmode_blocking) 107 | if _ !=0 : raise Exception() 108 | 109 | # get the joint velocity 110 | _, dq[ii] = vrep.simxGetObjectFloatParameter(clientID, 111 | joint_handle, 112 | 2012, # parameter ID for angular velocity of the joint 113 | vrep.simx_opmode_blocking) 114 | if _ !=0 : raise Exception() 115 | 116 | # update end-effector position 117 | L = np.array([0, .35]) # segment lengths associated with each joint 118 | s = np.sin(q) 119 | c = np.cos(q) 120 | 121 | xyz = np.array([ 122 | 0, 123 | L[1]*c[0], 124 | L[1]*s[0]]) 125 | xyz += np.array([0, 0, 0.425]) 126 | 127 | track_hand.append(np.copy(xyz)) # store for plotting 128 | 129 | # Plot where we think the hand is 130 | vrep.simxSetObjectPosition(clientID, 131 | hand_handle, 132 | -1, # set absolute, not relative position 133 | xyz, 134 | vrep.simx_opmode_blocking) 135 | 136 | # set up compensation for the mass 137 | gravity = np.array([0, 0, -9.81, 0, 0, 0,]) 138 | Mbar = np.diag([.1, .1, .1, .05, .05, .05]) 139 | Mbar_g = np.dot(Mbar, gravity) 140 | m = 1.0 141 | i = 0.5 142 | Mblock = np.diag([m,m,m,i,i,i]) 143 | Mblock_g = np.dot(Mblock, gravity) 144 | 145 | # calculate Jacobian for motor1-2 joint 146 | # xyz_m12 = [ 147 | # 0, 148 | # L[1]*c[0], 149 | # L[1]*s[0]] 150 | Jm12_joint = np.zeros((6,1)) 151 | Jm12_joint[:3,0] = [ 152 | 0, 153 | -L[1]*s[0], 154 | L[1]*c[0]] 155 | # TODO: confirm that the axis of rotation was added to the right column 156 | Jm12_joint[3,0] = 1.0 157 | 158 | Mm12_joint_g = np.dot(Jm12_joint.T, Mblock_g) 159 | 160 | # calculate the effects of gravity 161 | Mq_g = Mm12_joint_g 162 | 163 | u = -Mq_g 164 | u *= -1 # because the joints on the arm are backwards 165 | print 'u: ', u 166 | 167 | for ii,joint_handle in enumerate(joint_handles): 168 | # the way we're going to do force control is by setting 169 | # the target velocities of each joint super high and then 170 | # controlling the max torque allowed (yeah, i know) 171 | 172 | # get the current joint torque 173 | _, torque = \ 174 | vrep.simxGetJointForce(clientID, 175 | joint_handle, 176 | vrep.simx_opmode_blocking) 177 | if _ !=0 : raise Exception() 178 | 179 | # if force has changed signs, 180 | # we need to change the target velocity sign 181 | if np.sign(torque) * np.sign(u[ii]) <= 0: 182 | joint_target_velocities[ii] = \ 183 | joint_target_velocities[ii] * -1 184 | vrep.simxSetJointTargetVelocity(clientID, 185 | joint_handle, 186 | joint_target_velocities[ii], # target velocity 187 | vrep.simx_opmode_blocking) 188 | if _ !=0 : raise Exception() 189 | 190 | # and now modulate the force 191 | vrep.simxSetJointForce(clientID, 192 | joint_handle, 193 | abs(u[ii]), # force to apply 194 | vrep.simx_opmode_blocking) 195 | if _ !=0 : raise Exception() 196 | 197 | # move simulation ahead one time step 198 | vrep.simxSynchronousTrigger(clientID) 199 | count += dt 200 | else: 201 | raise Exception('Failed connecting to remote API server') 202 | except Exception, e: 203 | print e 204 | finally: 205 | # stop the simulation 206 | vrep.simxStopSimulation(clientID, vrep.simx_opmode_blocking) 207 | 208 | # Before closing the connection to V-REP, 209 | # make sure that the last command sent out had time to arrive. 210 | vrep.simxGetPingTime(clientID) 211 | 212 | # Now close the connection to V-REP: 213 | vrep.simxFinish(clientID) 214 | print 'connection closed...' 215 | 216 | import matplotlib as mpl 217 | from mpl_toolkits.mplot3d import Axes3D 218 | import matplotlib.pyplot as plt 219 | 220 | track_hand = np.array(track_hand) 221 | track_target = np.array(track_target) 222 | 223 | fig = plt.figure() 224 | ax = fig.gca(projection='3d') 225 | # plot start point of hand 226 | ax.plot([track_hand[0,0]], [track_hand[0,1]], [track_hand[0,2]], 'bx', mew=10) 227 | # plot trajectory of hand 228 | ax.plot(track_hand[:,0], track_hand[:,1], track_hand[:,2]) 229 | # plot trajectory of target 230 | ax.plot(track_target[:,0], track_target[:,1], track_target[:,2], 'rx', mew=10) 231 | 232 | ax.set_xlim([-1, 1]) 233 | ax.set_ylim([-.5, .5]) 234 | ax.set_zlim([0, 1]) 235 | ax.legend() 236 | 237 | plt.show() 238 | -------------------------------------------------------------------------------- /VREP/pendulum_gravity/remoteApi.so: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/studywolf/blog/62fc3d3aaac242bde5a0643e16aee03a6a061380/VREP/pendulum_gravity/remoteApi.so -------------------------------------------------------------------------------- /VREP/two_link_arm/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/studywolf/blog/62fc3d3aaac242bde5a0643e16aee03a6a061380/VREP/two_link_arm/__init__.py -------------------------------------------------------------------------------- /VREP/two_link_arm/remoteApi.so: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/studywolf/blog/62fc3d3aaac242bde5a0643e16aee03a6a061380/VREP/two_link_arm/remoteApi.so -------------------------------------------------------------------------------- /VREP/two_link_arm/vrep_twolink.ttt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/studywolf/blog/62fc3d3aaac242bde5a0643e16aee03a6a061380/VREP/two_link_arm/vrep_twolink.ttt -------------------------------------------------------------------------------- /VREP/two_link_arm/vrep_twolink_controller.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Copyright (C) 2016 Travis DeWolf 3 | 4 | This program is free software: you can redistribute it and/or modify 5 | it under the terms of the GNU General Public License as published by 6 | the Free Software Foundation, either version 3 of the License, or 7 | (at your option) any later version. 8 | 9 | This program is distributed in the hope that it will be useful, 10 | but WITHOUT ANY WARRANTY; without even the implied warranty of 11 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 | GNU General Public License for more details. 13 | 14 | You should have received a copy of the GNU General Public License 15 | along with this program. If not, see . 16 | ''' 17 | import numpy as np 18 | import vrep 19 | 20 | try: 21 | # close any open connections 22 | vrep.simxFinish(-1) 23 | # Connect to the V-REP continuous server 24 | clientID = vrep.simxStart('127.0.0.1', 19997, True, True, 500, 5) 25 | 26 | if clientID != -1: # if we connected successfully 27 | print ('Connected to remote API server') 28 | 29 | # --------------------- Setup the simulation 30 | 31 | vrep.simxSynchronous(clientID,True) 32 | 33 | joint_names = ['shoulder', 'elbow'] 34 | # joint target velocities discussed below 35 | joint_target_velocities = np.ones(len(joint_names)) * 10000.0 36 | 37 | # get the handles for each joint and set up streaming 38 | joint_handles = [vrep.simxGetObjectHandle(clientID, 39 | name, vrep.simx_opmode_blocking)[1] for name in joint_names] 40 | 41 | # get handle for target and set up streaming 42 | _, target_handle = vrep.simxGetObjectHandle(clientID, 43 | 'target', vrep.simx_opmode_blocking) 44 | 45 | dt = .001 46 | vrep.simxSetFloatingParameter(clientID, 47 | vrep.sim_floatparam_simulation_time_step, 48 | dt, # specify a simulation time step 49 | vrep.simx_opmode_oneshot) 50 | 51 | # --------------------- Start the simulation 52 | 53 | # start our simulation in lockstep with our code 54 | vrep.simxStartSimulation(clientID, 55 | vrep.simx_opmode_blocking) 56 | 57 | count = 0 58 | track_hand = [] 59 | track_target = [] 60 | while count < 1: # run for 1 simulated second 61 | 62 | # get the (x,y,z) position of the target 63 | _, target_xyz = vrep.simxGetObjectPosition(clientID, 64 | target_handle, 65 | -1, # retrieve absolute, not relative, position 66 | vrep.simx_opmode_blocking) 67 | if _ !=0 : raise Exception() 68 | track_target.append(np.copy(target_xyz)) # store for plotting 69 | target_xyz = np.asarray(target_xyz) 70 | 71 | q = np.zeros(len(joint_handles)) 72 | dq = np.zeros(len(joint_handles)) 73 | for ii,joint_handle in enumerate(joint_handles): 74 | # get the joint angles 75 | _, q[ii] = vrep.simxGetJointPosition(clientID, 76 | joint_handle, 77 | vrep.simx_opmode_blocking) 78 | if _ !=0 : raise Exception() 79 | # get the joint velocity 80 | _, dq[ii] = vrep.simxGetObjectFloatParameter(clientID, 81 | joint_handle, 82 | 2012, # parameter ID for angular velocity of the joint 83 | vrep.simx_opmode_blocking) 84 | if _ !=0 : raise Exception() 85 | 86 | L = np.array([.42, .225]) # arm segment lengths 87 | 88 | xyz = np.array([L[0] * np.cos(q[0]) + L[1] * np.cos(q[0]+q[1]), 89 | 0, 90 | # have to add .1 offset to z position 91 | L[0] * np.sin(q[0]) + L[1] * np.sin(q[0]+q[1]) + .15]) 92 | track_hand.append(np.copy(xyz)) # store for plotting 93 | # calculate the Jacobian for the hand 94 | JEE = np.zeros((3,2)) 95 | JEE[0,1] = L[1] * -np.sin(q[0]+q[1]) 96 | JEE[2,1] = L[1] * np.cos(q[0]+q[1]) 97 | JEE[0,0] = L[0] * -np.sin(q[0]) + JEE[0,1] 98 | JEE[2,0] = L[0] * np.cos(q[0]) + JEE[2,1] 99 | 100 | # get the Jacobians for the centres-of-mass for the arm segments 101 | JCOM1 = np.zeros((6,2)) 102 | JCOM1[0,0] = L[0] * -np.sin(q[0]) # COM is in a weird place 103 | JCOM1[2,0] = L[0] * np.cos(q[0]) # because of offset 104 | JCOM1[4,0] = 1.0 105 | 106 | JCOM2 = np.zeros((6,2)) 107 | JCOM2[0,1] = L[1] * -np.sin(q[0]+q[1]) # COM is in a weird place 108 | JCOM2[2,1] = L[1] * np.cos(q[0]+q[1]) # because of offset 109 | JCOM2[4,1] = 1.0 110 | JCOM2[0,0] = L[0] * -np.sin(q[0]) + JCOM2[0,1] 111 | JCOM2[2,0] = L[0] * np.cos(q[0]) + JCOM2[2,1] 112 | JCOM2[4,0] = 1.0 113 | 114 | m1 = 1.171 # from VREP 115 | M1 = np.diag([m1, m1, m1, .008, .008, .001]) 116 | m2 = .329 # from VREP 117 | M2 = np.diag([m2, m2, m2, 5e-4, 5e-4, 1e-4]) 118 | 119 | # generate the mass matrix in joint space 120 | Mq = np.dot(JCOM1.T, np.dot(M1, JCOM1)) + \ 121 | np.dot(JCOM2.T, np.dot(M2, JCOM2)) 122 | 123 | # compensate for gravity 124 | gravity = np.array([0, 0, -9.81, 0, 0, 0,]) 125 | Mq_g = np.dot(JCOM1.T, np.dot(M1, gravity)) + \ 126 | np.dot(JCOM2.T, np.dot(M2, gravity)) 127 | 128 | Mx_inv = np.dot(JEE, np.dot(np.linalg.inv(Mq), JEE.T)) 129 | Mu,Ms,Mv = np.linalg.svd(Mx_inv) 130 | # cut off any singular values that could cause control problems 131 | for i in range(len(Ms)): 132 | Ms[i] = 0 if Ms[i] < 1e-5 else 1./float(Ms[i]) 133 | # numpy returns U,S,V.T, so have to transpose both here 134 | Mx = np.dot(Mv.T, np.dot(np.diag(Ms), Mu.T)) 135 | 136 | # calculate desired movement in operational (hand) space 137 | kp = 100 138 | kv = np.sqrt(kp) 139 | u_xyz = np.dot(Mx, kp * (target_xyz - xyz)) 140 | 141 | u = np.dot(JEE.T, u_xyz) - np.dot(Mq, kv * dq) - Mq_g 142 | u *= -1 # because the joints on the arm are backwards 143 | 144 | for ii,joint_handle in enumerate(joint_handles): 145 | # the way we're going to do force control is by setting 146 | # the target velocities of each joint super high and then 147 | # controlling the max torque allowed (yeah, i know) 148 | 149 | # get the current joint torque 150 | _, torque = \ 151 | vrep.simxGetJointForce(clientID, 152 | joint_handle, 153 | vrep.simx_opmode_blocking) 154 | if _ !=0 : raise Exception() 155 | 156 | # if force has changed signs, 157 | # we need to change the target velocity sign 158 | if np.sign(torque) * np.sign(u[ii]) < 0: 159 | joint_target_velocities[ii] = \ 160 | joint_target_velocities[ii] * -1 161 | vrep.simxSetJointTargetVelocity(clientID, 162 | joint_handle, 163 | joint_target_velocities[ii], # target velocity 164 | vrep.simx_opmode_blocking) 165 | if _ !=0 : raise Exception() 166 | 167 | # and now modulate the force 168 | vrep.simxSetJointForce(clientID, 169 | joint_handle, 170 | abs(u[ii]), # force to apply 171 | vrep.simx_opmode_blocking) 172 | if _ !=0 : raise Exception() 173 | 174 | # move simulation ahead one time step 175 | vrep.simxSynchronousTrigger(clientID) 176 | count += dt 177 | 178 | # stop the simulation 179 | vrep.simxStopSimulation(clientID, vrep.simx_opmode_blocking) 180 | 181 | # Before closing the connection to V-REP, 182 | #make sure that the last command sent out had time to arrive. 183 | vrep.simxGetPingTime(clientID) 184 | 185 | # Now close the connection to V-REP: 186 | vrep.simxFinish(clientID) 187 | else: 188 | raise Exception('Failed connecting to remote API server') 189 | 190 | finally: 191 | 192 | # stop the simulation 193 | vrep.simxStopSimulation(clientID, vrep.simx_opmode_blocking) 194 | 195 | # Before closing the connection to V-REP, 196 | # make sure that the last command sent out had time to arrive. 197 | vrep.simxGetPingTime(clientID) 198 | 199 | # Now close the connection to V-REP: 200 | vrep.simxFinish(clientID) 201 | print('connection closed...') 202 | 203 | import matplotlib as mpl 204 | from mpl_toolkits.mplot3d import Axes3D 205 | import matplotlib.pyplot as plt 206 | 207 | track_hand = np.array(track_hand) 208 | track_target = np.array(track_target) 209 | 210 | fig = plt.figure() 211 | ax = fig.gca(projection='3d') 212 | # plot start point of hand 213 | ax.plot([track_hand[0,0]], [track_hand[0,1]], [track_hand[0,2]], 'bx', mew=10) 214 | # plot trajectory of hand 215 | ax.plot(track_hand[:,0], track_hand[:,1], track_hand[:,2]) 216 | # plot trajectory of target 217 | ax.plot(track_target[:,0], track_target[:,1], track_target[:,2], 'rx', mew=10) 218 | 219 | ax.set_xlim([-1, 1]) 220 | ax.set_ylim([-.5, .5]) 221 | ax.set_zlim([0, 1]) 222 | ax.legend() 223 | 224 | plt.show() 225 | -------------------------------------------------------------------------------- /VREP/two_link_arm/vrep_twolink_nengo.ttt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/studywolf/blog/62fc3d3aaac242bde5a0643e16aee03a6a061380/VREP/two_link_arm/vrep_twolink_nengo.ttt -------------------------------------------------------------------------------- /orientation/UR5.ttt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/studywolf/blog/62fc3d3aaac242bde5a0643e16aee03a6a061380/orientation/UR5.ttt -------------------------------------------------------------------------------- /orientation/orientation_control.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Copyright (C) 2018 Travis DeWolf 3 | 4 | This program is free software: you can redistribute it and/or modify 5 | it under the terms of the GNU General Public License as published by 6 | the Free Software Foundation, either version 3 of the License, or 7 | (at your option) any later version. 8 | 9 | This program is distributed in the hope that it will be useful, 10 | but WITHOUT ANY WARRANTY; without even the implied warranty of 11 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 | GNU General Public License for more details. 13 | 14 | You should have received a copy of the GNU General Public License 15 | along with this program. If not, see . 16 | ''' 17 | import numpy as np 18 | 19 | from abr_control.arms import ur5 as arm 20 | from abr_control.interfaces import VREP 21 | from abr_control.utils import transformations 22 | 23 | 24 | # initialize our robot config 25 | robot_config = arm.Config(use_cython=True) 26 | 27 | # create our interface 28 | interface = VREP(robot_config, dt=.005) 29 | interface.connect() 30 | 31 | # specify which parameters [x, y, z, alpha, beta, gamma] to control 32 | # NOTE: needs to be an array to properly select elements of J and u_task 33 | ctrlr_dof = np.array([False, False, False, True, True, True]) 34 | 35 | # control gains 36 | kp = 300 37 | ko = 300 38 | kv = np.sqrt(kp+ko) 39 | 40 | orientations = [ 41 | [0, 0, 0], 42 | [np.pi/4, np.pi/4, np.pi/4], 43 | [-np.pi/4, -np.pi/4, np.pi/2], 44 | [0, 0, 0], 45 | ] 46 | 47 | try: 48 | print('\nSimulation starting...\n') 49 | count = 0 50 | index = 0 51 | while 1: 52 | # get arm feedback 53 | feedback = interface.get_feedback() 54 | hand_xyz = robot_config.Tx('EE', feedback['q']) 55 | 56 | if (count % 200) == 0: 57 | # change target once every simulated second 58 | if index >= len(orientations): 59 | break 60 | interface.set_orientation('target', orientations[index]) 61 | index += 1 62 | 63 | target = np.hstack([ 64 | interface.get_xyz('target'), 65 | interface.get_orientation('target')]) 66 | 67 | # set the block to be the same orientation as end-effector 68 | R_e = robot_config.R('EE', feedback['q']) 69 | EA_e = transformations.euler_from_matrix(R_e, axes='rxyz') 70 | interface.set_orientation('object', EA_e) 71 | 72 | # calculate the Jacobian for the end effectora 73 | # and isolate relevate dimensions 74 | J = robot_config.J('EE', q=feedback['q'])[ctrlr_dof] 75 | 76 | # calculate the inertia matrix in task space 77 | M = robot_config.M(q=feedback['q']) 78 | 79 | # calculate the inertia matrix in task space 80 | M_inv = np.linalg.inv(M) 81 | Mx_inv = np.dot(J, np.dot(M_inv, J.T)) 82 | if np.linalg.det(Mx_inv) != 0: 83 | # do the linalg inverse if matrix is non-singular 84 | # because it's faster and more accurate 85 | Mx = np.linalg.inv(Mx_inv) 86 | else: 87 | # using the rcond to set singular values < thresh to 0 88 | # singular values < (rcond * max(singular_values)) set to 0 89 | Mx = np.linalg.pinv(Mx_inv, rcond=.005) 90 | 91 | u_task = np.zeros(6) # [x, y, z, alpha, beta, gamma] 92 | 93 | # calculate position error 94 | u_task[:3] = -kp * (hand_xyz - target[:3]) 95 | 96 | # Method 1 ------------------------------------------------------------ 97 | # 98 | # transform the orientation target into a quaternion 99 | q_d = transformations.unit_vector( 100 | transformations.quaternion_from_euler( 101 | target[3], target[4], target[5], axes='rxyz')) 102 | 103 | # get the quaternion for the end effector 104 | q_e = transformations.unit_vector( 105 | transformations.quaternion_from_matrix( 106 | robot_config.R('EE', feedback['q']))) 107 | # calculate the rotation from the end-effector to target orientation 108 | q_r = transformations.quaternion_multiply( 109 | q_d, transformations.quaternion_conjugate(q_e)) 110 | u_task[3:] = q_r[1:] * np.sign(q_r[0]) 111 | 112 | 113 | # Method 2 ------------------------------------------------------------ 114 | # From (Caccavale et al, 1997) Section IV - Quaternion feedback ------- 115 | # 116 | # get rotation matrix for the end effector orientation 117 | # R_EE = robot_config.R('EE', feedback['q']) 118 | # # get rotation matrix for the target orientation 119 | # R_d = transformations.euler_matrix( 120 | # target[3], target[4], target[5], axes='rxyz')[:3, :3] 121 | # R_ed = np.dot(R_EE.T, R_target) # eq 24 122 | # q_e = transformations.quaternion_from_matrix(R_ed) 123 | # q_e = transformations.unit_vector(q_e) 124 | # u_task[3:] = np.dot(R_EE, q_e[1:]) # eq 34 125 | 126 | 127 | # Method 3 ------------------------------------------------------------ 128 | # From (Caccavale et al, 1997) Section V - Angle/axis feedback -------- 129 | # 130 | # R_EE = robot_config.R('EE', feedback['q']) 131 | # # get rotation matrix for the target orientation 132 | # R_d = transformations.euler_matrix( 133 | # target[3], target[4], target[5], axes='rxyz')[:3, :3] 134 | # R = np.dot(R_target, R_EE.T) # eq 44 135 | # q_e = transformations.quaternion_from_matrix(R) 136 | # q_e = transformations.unit_vector(q_e) 137 | # u_task[3:] = 2 * q_e[0] * q_e[1:] # eq 45 138 | 139 | 140 | # Method 4 ------------------------------------------------------------- 141 | # From (Yuan, 1988) and (Nakanishi et al, 2008) ------------------------ 142 | # NOTE: This implementation is not working properly -------------------- 143 | # 144 | # # transform the orientation target into a quaternion 145 | # q_d = transformations.unit_vector( 146 | # transformations.quaternion_from_euler( 147 | # target[3], target[4], target[5], axes='rxyz')) 148 | # # get the quaternion for the end effector 149 | # q_e = transformations.unit_vector( 150 | # transformations.quaternion_from_matrix( 151 | # robot_config.R('EE', feedback['q']))) 152 | # 153 | # # given r = [r1, r2, r3] 154 | # # r^x = [[0, -r3, r2], [r3, 0, -r1], [-r2, r1, 0]] 155 | # S = np.array([ 156 | # [0.0, -q_d[2], q_d[1]], 157 | # [q_d[2], 0.0, -q_d[0]], 158 | # [-q_d[1], q_d[0], 0.0]]) 159 | # 160 | # # calculate the difference between q_e and q_d 161 | # # from (Nakanishi et al, 2008). NOTE: the sign of the last term 162 | # # is different from (Yuan, 1998) to account for Euler angles in 163 | # # world coordinates instead of local coordinates. 164 | # # dq = (w_d * [x, y, z] - w * [x_d, y_d, z_d] - 165 | # # [x_d, y_d, z_d]^x * [x, y, z]) 166 | # # the sign of quaternion that moves between q_e and q_d 167 | # u_task[3:] = -(q_d[0] * q_e[1:] - q_e[0] * q_d[1:] + 168 | # np.dot(S, q_e[1:])) 169 | # 170 | # --------------------------------------------------------------------- 171 | 172 | u_task[3:] *= ko # scale orientation signal by orientation gain 173 | # remove uncontrolled dimensions from u_task 174 | u_task = u_task[ctrlr_dof] 175 | 176 | # transform from operational space to torques 177 | # add in velocity and gravity compensation in joint space 178 | u = (np.dot(J.T, np.dot(Mx, u_task)) - 179 | kv * np.dot(M, feedback['dq']) - 180 | robot_config.g(q=feedback['q'])) 181 | 182 | # apply the control signal, step the sim forward 183 | interface.send_forces(u) 184 | count += 1 185 | 186 | finally: 187 | interface.disconnect() 188 | -------------------------------------------------------------------------------- /orientation/position_and_orientation_control.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Copyright (C) 2018 Travis DeWolf 3 | 4 | This program is free software: you can redistribute it and/or modify 5 | it under the terms of the GNU General Public License as published by 6 | the Free Software Foundation, either version 3 of the License, or 7 | (at your option) any later version. 8 | 9 | This program is distributed in the hope that it will be useful, 10 | but WITHOUT ANY WARRANTY; without even the implied warranty of 11 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 | GNU General Public License for more details. 13 | 14 | You should have received a copy of the GNU General Public License 15 | along with this program. If not, see . 16 | ''' 17 | import numpy as np 18 | 19 | from abr_control.arms import ur5 as arm 20 | from abr_control.interfaces import VREP 21 | from abr_control.utils import transformations 22 | 23 | 24 | # initialize our robot config 25 | robot_config = arm.Config(use_cython=True) 26 | 27 | # create our interface 28 | interface = VREP(robot_config, dt=.005) 29 | interface.connect() 30 | 31 | # specify which parameters [x, y, z, alpha, beta, gamma] to control 32 | # NOTE: needs to be an array to properly select elements of J and u_task 33 | ctrlr_dof = np.array([True, True, True, True, True, True]) 34 | 35 | # control gains 36 | kp = 300 37 | ko = 300 38 | kv = np.sqrt(kp+ko) * 1.5 39 | 40 | orientations = [ 41 | [0, 0, 0], 42 | [np.pi/4, np.pi/4, np.pi/4], 43 | [-np.pi/4, -np.pi/4, np.pi/2], 44 | [0, 0, 0], 45 | ] 46 | positions =[ 47 | [0.15, -0.1, 0.6], 48 | [-.15, 0.0, .7], 49 | [.2, .2, .6], 50 | [0.15, -0.1, 0.6] 51 | ] 52 | 53 | try: 54 | print('\nSimulation starting...\n') 55 | count = 0 56 | index = 0 57 | while 1: 58 | # get arm feedback 59 | feedback = interface.get_feedback() 60 | hand_xyz = robot_config.Tx('EE', feedback['q']) 61 | 62 | if (count % 200) == 0: 63 | # change target once every simulated second 64 | if index >= len(orientations): 65 | break 66 | interface.set_orientation('target', orientations[index]) 67 | interface.set_xyz('target', positions[index]) 68 | index += 1 69 | 70 | target = np.hstack([ 71 | interface.get_xyz('target'), 72 | interface.get_orientation('target')]) 73 | 74 | # set the block to be the same orientation as end-effector 75 | R_e = robot_config.R('EE', feedback['q']) 76 | EA_e = transformations.euler_from_matrix(R_e, axes='rxyz') 77 | interface.set_orientation('object', EA_e) 78 | 79 | # calculate the Jacobian for the end effectora 80 | # and isolate relevate dimensions 81 | J = robot_config.J('EE', q=feedback['q'])[ctrlr_dof] 82 | 83 | # calculate the inertia matrix in task space 84 | M = robot_config.M(q=feedback['q']) 85 | 86 | # calculate the inertia matrix in task space 87 | M_inv = np.linalg.inv(M) 88 | Mx_inv = np.dot(J, np.dot(M_inv, J.T)) 89 | if np.linalg.det(Mx_inv) != 0: 90 | # do the linalg inverse if matrix is non-singular 91 | # because it's faster and more accurate 92 | Mx = np.linalg.inv(Mx_inv) 93 | else: 94 | # using the rcond to set singular values < thresh to 0 95 | # singular values < (rcond * max(singular_values)) set to 0 96 | Mx = np.linalg.pinv(Mx_inv, rcond=.005) 97 | 98 | u_task = np.zeros(6) # [x, y, z, alpha, beta, gamma] 99 | 100 | # calculate position error 101 | u_task[:3] = -kp * (hand_xyz - target[:3]) 102 | 103 | # Method 1 ------------------------------------------------------------ 104 | # 105 | # transform the orientation target into a quaternion 106 | q_d = transformations.unit_vector( 107 | transformations.quaternion_from_euler( 108 | target[3], target[4], target[5], axes='rxyz')) 109 | 110 | # get the quaternion for the end effector 111 | q_e = transformations.unit_vector( 112 | transformations.quaternion_from_matrix( 113 | robot_config.R('EE', feedback['q']))) 114 | # calculate the rotation from the end-effector to target orientation 115 | q_r = transformations.quaternion_multiply( 116 | q_d, transformations.quaternion_conjugate(q_e)) 117 | u_task[3:] = q_r[1:] * np.sign(q_r[0]) 118 | 119 | 120 | # Method 2 ------------------------------------------------------------ 121 | # From (Caccavale et al, 1997) Section IV - Quaternion feedback ------- 122 | # 123 | # get rotation matrix for the end effector orientation 124 | # R_EE = robot_config.R('EE', feedback['q']) 125 | # # get rotation matrix for the target orientation 126 | # R_d = transformations.euler_matrix( 127 | # target[3], target[4], target[5], axes='rxyz')[:3, :3] 128 | # R_ed = np.dot(R_EE.T, R_target) # eq 24 129 | # q_e = transformations.quaternion_from_matrix(R_ed) 130 | # q_e = transformations.unit_vector(q_e) 131 | # u_task[3:] = np.dot(R_EE, q_e[1:]) # eq 34 132 | 133 | 134 | # Method 3 ------------------------------------------------------------ 135 | # From (Caccavale et al, 1997) Section V - Angle/axis feedback -------- 136 | # 137 | # R_EE = robot_config.R('EE', feedback['q']) 138 | # # get rotation matrix for the target orientation 139 | # R_d = transformations.euler_matrix( 140 | # target[3], target[4], target[5], axes='rxyz')[:3, :3] 141 | # R = np.dot(R_target, R_EE.T) # eq 44 142 | # q_e = transformations.quaternion_from_matrix(R) 143 | # q_e = transformations.unit_vector(q_e) 144 | # u_task[3:] = 2 * q_e[0] * q_e[1:] # eq 45 145 | 146 | 147 | # Method 4 ------------------------------------------------------------- 148 | # From (Yuan, 1988) and (Nakanishi et al, 2008) ------------------------ 149 | # NOTE: This implementation is not working properly -------------------- 150 | # 151 | # # transform the orientation target into a quaternion 152 | # q_d = transformations.unit_vector( 153 | # transformations.quaternion_from_euler( 154 | # target[3], target[4], target[5], axes='rxyz')) 155 | # # get the quaternion for the end effector 156 | # q_e = transformations.unit_vector( 157 | # transformations.quaternion_from_matrix( 158 | # robot_config.R('EE', feedback['q']))) 159 | # 160 | # # given r = [r1, r2, r3] 161 | # # r^x = [[0, -r3, r2], [r3, 0, -r1], [-r2, r1, 0]] 162 | # S = np.array([ 163 | # [0.0, -q_d[2], q_d[1]], 164 | # [q_d[2], 0.0, -q_d[0]], 165 | # [-q_d[1], q_d[0], 0.0]]) 166 | # 167 | # # calculate the difference between q_e and q_d 168 | # # from (Nakanishi et al, 2008). NOTE: the sign of the last term 169 | # # is different from (Yuan, 1998) to account for Euler angles in 170 | # # world coordinates instead of local coordinates. 171 | # # dq = (w_d * [x, y, z] - w * [x_d, y_d, z_d] - 172 | # # [x_d, y_d, z_d]^x * [x, y, z]) 173 | # # the sign of quaternion that moves between q_e and q_d 174 | # u_task[3:] = -(q_d[0] * q_e[1:] - q_e[0] * q_d[1:] + 175 | # np.dot(S, q_e[1:])) 176 | # 177 | # --------------------------------------------------------------------- 178 | 179 | u_task[3:] *= ko # scale orientation signal by orientation gain 180 | # remove uncontrolled dimensions from u_task 181 | u_task = u_task[ctrlr_dof] 182 | 183 | # transform from operational space to torques 184 | # add in velocity and gravity compensation in joint space 185 | u = (np.dot(J.T, np.dot(Mx, u_task)) - 186 | kv * np.dot(M, feedback['dq']) - 187 | robot_config.g(q=feedback['q'])) 188 | 189 | # apply the control signal, step the sim forward 190 | interface.send_forces(u) 191 | count += 1 192 | 193 | finally: 194 | interface.disconnect() 195 | -------------------------------------------------------------------------------- /tensorflow_models/npg_cartpole/plot_data.py: -------------------------------------------------------------------------------- 1 | """ Copyright (C) 2018 Travis DeWolf 2 | 3 | This program is free software: you can redistribute it and/or modify 4 | it under the terms of the GNU General Public License as published by 5 | the Free Software Foundation, either version 3 of the License, or 6 | (at your option) any later version. 7 | 8 | This program is distributed in the hope that it will be useful, 9 | but WITHOUT ANY WARRANTY; without even the implied warranty of 10 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 11 | GNU General Public License for more details. 12 | 13 | You should have received a copy of the GNU General Public License 14 | along with this program. If not, see . 15 | """ 16 | 17 | import numpy as np 18 | import matplotlib.pyplot as plt 19 | 20 | def bootstrapci(data, func, n=3000, p=0.95): 21 | index = int(n*(1-p)/2) 22 | samples = np.random.choice(data, size=(n, len(data))) 23 | r = [func(s) for s in samples] 24 | r.sort() 25 | return r[index], r[-index] 26 | 27 | fig = plt.figure(figsize=(7, 3.5)) 28 | for name, color in zip( 29 | ['policy_gradient', 'natural_policy_gradient'], ['b', 'g']): 30 | 31 | # load in data 32 | all_max_rewards = [] 33 | all_total_episodes =[] 34 | for ii in range(10): 35 | data = np.load('data/%s_%i.npz' % (name, ii)) 36 | all_max_rewards.append(data['max_rewards']) 37 | all_total_episodes.append(data['total_episodes']) 38 | all_max_rewards = np.array(all_max_rewards) 39 | all_total_episodes = np.array(all_total_episodes) 40 | 41 | # calculate mean 42 | mean = np.mean(all_max_rewards, axis=0) 43 | # calculate 95% confidence intervals 44 | sample = [] 45 | upper_bound = [] 46 | lower_bound = [] 47 | for ii in range(all_max_rewards.shape[1]): 48 | data = all_max_rewards[:, ii] 49 | ci = bootstrapci(data, np.mean) 50 | sample.append(np.mean(data)) 51 | lower_bound.append(ci[0]) 52 | upper_bound.append(ci[1]) 53 | 54 | plt.plot( 55 | range(all_max_rewards.shape[1]), mean, color=color, lw=2) 56 | plt.fill_between( 57 | range(all_max_rewards.shape[1]), upper_bound, lower_bound, 58 | color=color, alpha=.5) 59 | 60 | plt.xlabel('Batch number') 61 | plt.ylabel('Max reward from batch') 62 | plt.legend(['Policy gradient', 'Natural policy gradient'], loc=4) 63 | plt.tight_layout() 64 | plt.show() 65 | -------------------------------------------------------------------------------- /tensorflow_models/npg_cartpole/policy_gradient.py: -------------------------------------------------------------------------------- 1 | """ Copyright (C) 2018 Travis DeWolf 2 | 3 | This program is free software: you can redistribute it and/or modify 4 | it under the terms of the GNU General Public License as published by 5 | the Free Software Foundation, either version 3 of the License, or 6 | (at your option) any later version. 7 | 8 | This program is distributed in the hope that it will be useful, 9 | but WITHOUT ANY WARRANTY; without even the implied warranty of 10 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 11 | GNU General Public License for more details. 12 | 13 | You should have received a copy of the GNU General Public License 14 | along with this program. If not, see . 15 | 16 | 17 | Learning cart-pole with a policy network and value network. 18 | The policy network learns the actions to take, and the value network learns 19 | the expected reward from a given state, for use in calculating the advantage 20 | function. 21 | 22 | The value network is updated with a 2 norm loss * .5 without the sqrt on the 23 | difference from the calculated expected cost. The value network is used to 24 | calculated the advantage function at each point in time. 25 | 26 | The policy network is updated by calculating the policy gradient and 27 | advantage function. 28 | 29 | Adapted from KVFrans: 30 | github.com/kvfrans/openai-cartpole/blob/master/cartpole-policygradient.py 31 | """ 32 | 33 | import tensorflow as tf 34 | import numpy as np 35 | import random 36 | import gym 37 | 38 | def policy_gradient(): 39 | with tf.variable_scope("policy"): 40 | params = tf.get_variable("policy_parameters",[4,2]) 41 | state = tf.placeholder("float",[None,4]) 42 | actions = tf.placeholder("float",[None,2]) 43 | advantages = tf.placeholder("float",[None,1]) 44 | linear = tf.matmul(state,params) 45 | probabilities = tf.nn.softmax(linear) 46 | 47 | # calculate the probability of the chosen action given the state 48 | good_probabilities = tf.reduce_sum( 49 | tf.multiply(probabilities, actions), reduction_indices=[1]) 50 | # calculate the policy gradient 51 | eligibility = tf.multiply(tf.log(good_probabilities), 52 | tf.squeeze(advantages)) 53 | loss = -tf.reduce_sum(eligibility) 54 | optimizer = tf.train.AdamOptimizer(0.01).minimize(loss) 55 | 56 | return probabilities, state, actions, advantages, optimizer 57 | 58 | def value_gradient(): 59 | with tf.variable_scope("value"): 60 | state = tf.placeholder("float", [None, 4]) 61 | newvals = tf.placeholder("float", [None]) 62 | w1 = tf.get_variable("w1", [4, 10]) 63 | b1 = tf.get_variable("b1", [10]) 64 | h1 = tf.nn.relu(tf.matmul(state, w1) + b1) 65 | w2 = tf.get_variable("w2", [10, 1]) 66 | b2 = tf.get_variable("b2", [1]) 67 | calculated = tf.matmul(h1, w2) + b2 68 | 69 | # minimize the difference between predicted and actual 70 | diffs = calculated - newvals 71 | loss = tf.nn.l2_loss(diffs) 72 | optimizer = tf.train.AdamOptimizer(0.1).minimize(loss) 73 | 74 | return calculated, state, newvals, optimizer, loss 75 | 76 | def run_episode(env, policy_grad, value_grad, sess): 77 | # unpack the policy network (generates control policy) 78 | (pl_calculated, pl_state, pl_actions, 79 | pl_advantages, pl_optimizer) = policy_grad 80 | # unpack the value network (estimates expected reward) 81 | (vl_calculated, vl_state, vl_newvals, 82 | vl_optimizer, vl_loss) = value_grad 83 | 84 | # set up the environment 85 | observation = env.reset() 86 | 87 | episode_reward = 0 88 | total_rewards = [] 89 | states = [] 90 | actions = [] 91 | advantages = [] 92 | transitions = [] 93 | update_vals = [] 94 | 95 | n_episodes = 0 96 | n_timesteps = 200 97 | for k in range(n_timesteps): 98 | # calculate policy 99 | obs_vector = np.expand_dims(observation, axis=0) 100 | probs = sess.run(pl_calculated, feed_dict={pl_state: obs_vector}) 101 | 102 | # stochastically generate action using the policy output 103 | action = 0 if random.uniform(0,1) < probs[0][0] else 1 104 | # record the transition 105 | states.append(observation) 106 | actionblank = np.zeros(2) 107 | actionblank[action] = 1 108 | actions.append(actionblank) 109 | # take the action in the environment 110 | old_observation = observation 111 | observation, reward, done, info = env.step(action) 112 | transitions.append((old_observation, action, reward)) 113 | episode_reward += reward 114 | 115 | # if the pole falls or time is up 116 | if done or k == n_timesteps - 1: 117 | for index, trans in enumerate(transitions): 118 | obs, action, reward = trans 119 | 120 | # calculate discounted monte-carlo return 121 | future_reward = 0 122 | future_transitions = len(transitions) - index 123 | decrease = 1 124 | for index2 in range(future_transitions): 125 | future_reward += transitions[(index2) + index][2] * decrease 126 | decrease = decrease * 0.97 127 | obs_vector = np.expand_dims(obs, axis=0) 128 | # compare the calculated expected reward to the average 129 | # expected reward, as estimated by the value network 130 | currentval = sess.run(vl_calculated, 131 | feed_dict={vl_state: obs_vector})[0][0] 132 | 133 | # advantage: how much better was this action than normal 134 | advantages.append(future_reward - currentval) 135 | 136 | # update the value function towards new return 137 | update_vals.append(future_reward) 138 | 139 | n_episodes += 1 140 | # reset variables for next episode in batch 141 | total_rewards.append(episode_reward) 142 | episode_reward = 0.0 143 | transitions = [] 144 | 145 | if done: 146 | # if the pole fell, reset environment 147 | observation = env.reset() 148 | else: 149 | # if out of time, close environment 150 | env.close() 151 | 152 | print('total_rewards: ', total_rewards) 153 | 154 | # update value function 155 | sess.run(vl_optimizer, 156 | feed_dict={vl_state: states, 157 | vl_newvals: update_vals}) 158 | 159 | # update control policy 160 | advantages_vector = np.expand_dims(advantages, axis=1) 161 | sess.run(pl_optimizer, feed_dict={pl_state: states, 162 | pl_advantages: advantages_vector, 163 | pl_actions: actions}) 164 | 165 | return total_rewards, n_episodes 166 | 167 | # generate the networks 168 | policy_grad = policy_gradient() 169 | value_grad = value_gradient() 170 | 171 | # run the training from scratch 10 times, record results 172 | for ii in range(10): 173 | 174 | env = gym.make('CartPole-v0') 175 | env = gym.wrappers.Monitor( 176 | env=env, 177 | directory='cartpole-hill/', 178 | force=True, 179 | video_callable=False) 180 | sess = tf.InteractiveSession() 181 | sess.run(tf.global_variables_initializer()) 182 | 183 | max_rewards = [] 184 | total_episodes = [] 185 | # each batch is 200 time steps worth of episodes 186 | n_training_batches = 300 187 | import time 188 | times = [] 189 | for i in range(n_training_batches): 190 | start_time = time.time() 191 | if i % 100 == 0: 192 | print(i) 193 | reward, n_episodes = run_episode(env, policy_grad, value_grad, sess) 194 | max_rewards.append(np.max(reward)) 195 | total_episodes.append(n_episodes) 196 | times.append(time.time() - start_time) 197 | print('average time: %.3f' % (np.sum(times) / n_training_batches)) 198 | 199 | np.savez_compressed('data/policy_gradient_%i' % ii, 200 | max_rewards=max_rewards, total_episodes=total_episodes) 201 | 202 | sess.close() 203 | -------------------------------------------------------------------------------- /tracking_control/tracking_control1.py: -------------------------------------------------------------------------------- 1 | """ An implementation of the plant and controller from example 1 of 2 | (Slotine & Sastry, 1983) 3 | """ 4 | 5 | import matplotlib.pyplot as plt 6 | import numpy as np 7 | import seaborn 8 | 9 | 10 | class plant: 11 | def __init__(self, dt=.001, theta1=[0.0, 0.0], theta2=[0.0, 0.0]): 12 | """ 13 | dt float: simulation time step 14 | theta1 list/np.array: [position, velocity] 15 | theta2 list/np.array: [position, velocity] 16 | """ 17 | self.dt = dt 18 | self.theta1 = np.asarray(theta1) 19 | self.theta2 = np.asarray(theta2) 20 | 21 | def step(self, u): 22 | """ apply u and move one time step forward """ 23 | ddtheta1 = ( 24 | 3*self.theta1[0] + self.theta2[0]**2 + 25 | 2*self.theta1[0] * self.theta2[1] * np.cos(self.theta2[0]) + u[0]) 26 | ddtheta2 = ( 27 | self.theta1[0]**3 - np.cos(self.theta1[0]) * self.theta2[0] + u[1]) 28 | 29 | self.theta1 += np.array([self.theta1[1], ddtheta1]) * self.dt 30 | self.theta2 += np.array([self.theta2[1], ddtheta2]) * self.dt 31 | 32 | @property 33 | def state(self): 34 | return self.theta1, self.theta2 35 | 36 | 37 | class controller: 38 | def __init__(self): 39 | pass 40 | 41 | def control(self, t, theta1, theta2): 42 | """ 43 | t float: the current time (desired trajectory is a function of time) 44 | thetas np.array: plant state 45 | """ 46 | # calculate the value of s for generating our gains 47 | s1 = theta1[1] + theta1[0] - 2*t*(t + 2) 48 | s2 = theta2[1] + theta2[0] - t*(t + 2) 49 | 50 | # gains for u1 51 | beta11 = -3 52 | beta12 = -1 53 | beta13 = -2 if s1 * theta1[0] * theta2[1] > 0 else 2 54 | k11 = -1 55 | k12 = 5 # k12 > 4 56 | 57 | # gains for u2 58 | beta21 = -1 59 | beta22 = -1 if s2 * theta2[0] > 0 else 1 60 | k21 = -1 61 | k22 = 3 # k22 > 2 62 | 63 | u1 = ( 64 | beta11*theta1[0] + beta12*theta2[0]**2 + 65 | beta13*theta1[0]*theta2[1] + k11*(theta1[1] - 4*t) - 66 | k12*np.sign(s1)) 67 | u2 = ( 68 | beta21*theta1[0]**3 + beta22*theta2[0] + 69 | k21*(theta2[1] - 2*t) - k22*np.sign(s2)) 70 | 71 | return np.array([u1, u2]) 72 | 73 | 74 | T = 1.0 75 | dt = 0.001 76 | timeline = np.arange(0.0, T, dt) 77 | 78 | ctrlr = controller() 79 | 80 | plant_uncontrolled = plant(dt=dt) 81 | theta1_uncontrolled_track = np.zeros((timeline.shape[0], 2)) 82 | theta2_uncontrolled_track = np.zeros((timeline.shape[0], 2)) 83 | 84 | plant_controlled = plant(dt=dt) 85 | theta1_controlled_track = np.zeros((timeline.shape[0], 2)) 86 | theta2_controlled_track = np.zeros((timeline.shape[0], 2)) 87 | 88 | for ii, t in enumerate(timeline): 89 | 90 | if ii % int(1.0/dt) == 0: 91 | print('t: ', t) 92 | (theta1_uncontrolled_track[ii], 93 | theta2_uncontrolled_track[ii]) = plant_uncontrolled.state 94 | (theta1_controlled_track[ii], 95 | theta2_controlled_track[ii]) = plant_controlled.state 96 | 97 | u = ctrlr.control(t, 98 | theta1_controlled_track[ii], 99 | theta2_controlled_track[ii]) 100 | 101 | plant_uncontrolled.step(np.array([0.0, 0.0])) 102 | plant_controlled.step(u) 103 | 104 | plt.subplot(2, 1, 1) 105 | plt.plot(timeline, theta1_uncontrolled_track[:, 0], lw=2) 106 | plt.plot(timeline, theta1_controlled_track[:, 0], lw=2) 107 | plt.plot(timeline, 2*timeline**2, 'r--', lw=2) 108 | plt.legend(['uncontrolled', 'controlled', 'target']) 109 | 110 | plt.subplot(2, 1, 2) 111 | plt.plot(timeline, theta2_uncontrolled_track[:, 0], lw=2) 112 | plt.plot(timeline, theta2_controlled_track[:, 0], lw=2) 113 | plt.plot(timeline, timeline**2, 'r--', lw=2) 114 | plt.legend(['uncontrolled', 'controlled', 'target']) 115 | 116 | plt.tight_layout() 117 | plt.show() 118 | -------------------------------------------------------------------------------- /tracking_control/tracking_control2.py: -------------------------------------------------------------------------------- 1 | """ An implementation based on the plant and controller from example 1 of 2 | (Slotine & Sastry, 1983), with a changed target trajectory. 3 | """ 4 | 5 | import matplotlib.pyplot as plt 6 | import numpy as np 7 | import seaborn 8 | 9 | 10 | class plant: 11 | def __init__(self, dt=.001, theta1=[0.0, 0.0], theta2=[0.0, 0.0]): 12 | """ 13 | dt float: simulation time step 14 | theta1 list/np.array: [position, velocity] 15 | theta2 list/np.array: [position, velocity] 16 | """ 17 | self.dt = dt 18 | self.theta1 = np.asarray(theta1) 19 | self.theta2 = np.asarray(theta2) 20 | 21 | def step(self, u): 22 | """ apply u and move one time step forward """ 23 | ddtheta1 = ( 24 | 3*self.theta1[0] + self.theta2[0]**2 + 25 | 2*self.theta1[0] * self.theta2[1] * np.cos(self.theta2[0]) + u[0]) 26 | ddtheta2 = ( 27 | self.theta1[0]**3 - np.cos(self.theta1[0]) * self.theta2[0] + u[1]) 28 | 29 | self.theta1 += np.array([self.theta1[1], ddtheta1]) * self.dt 30 | self.theta2 += np.array([self.theta2[1], ddtheta2]) * self.dt 31 | 32 | @property 33 | def state(self): 34 | return self.theta1, self.theta2 35 | 36 | 37 | class controller: 38 | def __init__(self): 39 | pass 40 | 41 | def control(self, t, theta1, theta2): 42 | """ 43 | t float: the current time (desired trajectory is a function of time) 44 | thetas np.array: plant state 45 | """ 46 | # calculate the value of s for generating our gains 47 | s1 = theta1[1] + theta1[0] - 10*(np.cos(t) - np.sin(t)) 48 | s2 = theta2[1] + theta2[0] - 10*(np.cos(t)**2 - 2*np.cos(t)*np.sin(t)) 49 | 50 | # gains for u1 51 | beta11 = -3 52 | beta12 = -1 53 | beta13 = -2 if s1 * theta1[0] * theta2[1] > 0 else 2 54 | k11 = -1 55 | k12 = 10 # k12 > 4 56 | 57 | # gains for u2 58 | beta21 = -1 59 | beta22 = -1 if s2 * theta2[0] > 0 else 1 60 | k21 = -1 61 | k22 = 40 # k22 > 2 62 | 63 | u1 = ( 64 | beta11*theta1[0] + beta12*theta2[0]**2 + 65 | beta13*theta1[0]*theta2[1] + k11*(theta1[1] + 10*np.sin(t)) - 66 | k12*np.sign(s1)) 67 | u2 = ( 68 | beta21*theta1[0]**3 + beta22*theta2[0] + 69 | k21*(theta2[1] + 20*np.cos(t)*np.sin(t)) - k22*np.sign(s2)) 70 | 71 | return np.array([u1, u2]) 72 | 73 | 74 | T = 10.0 75 | dt = 0.001 76 | timeline = np.arange(0.0, T, dt) 77 | 78 | ctrlr = controller() 79 | 80 | plant_uncontrolled = plant(dt=dt, theta1=[0.0, 0.0], theta2=[0.0, 0.0]) 81 | theta1_uncontrolled_track = np.zeros((timeline.shape[0], 2)) 82 | theta2_uncontrolled_track = np.zeros((timeline.shape[0], 2)) 83 | 84 | plant_controlled = plant(dt=dt) 85 | theta1_controlled_track = np.zeros((timeline.shape[0], 2)) 86 | theta2_controlled_track = np.zeros((timeline.shape[0], 2)) 87 | 88 | for ii, t in enumerate(timeline): 89 | 90 | if ii % int(1.0/dt) == 0: 91 | print('t: ', t) 92 | (theta1_uncontrolled_track[ii], 93 | theta2_uncontrolled_track[ii]) = plant_uncontrolled.state 94 | (theta1_controlled_track[ii], 95 | theta2_controlled_track[ii]) = plant_controlled.state 96 | 97 | u = ctrlr.control(t, 98 | theta1_controlled_track[ii], 99 | theta2_controlled_track[ii]) 100 | 101 | plant_uncontrolled.step(np.array([0.0, 0.0])) 102 | plant_controlled.step(u) 103 | 104 | plt.subplot(2, 1, 1) 105 | plt.plot(timeline, theta1_uncontrolled_track[:, 0], lw=2) 106 | plt.plot(timeline, theta1_controlled_track[:, 0], lw=2) 107 | plt.plot(timeline, 10*np.cos(timeline), 'r--', lw=2) 108 | plt.legend(['uncontrolled', 'controlled', 'target']) 109 | 110 | plt.subplot(2, 1, 2) 111 | plt.plot(timeline, theta2_uncontrolled_track[:, 0], lw=2) 112 | plt.plot(timeline, theta2_controlled_track[:, 0], lw=2) 113 | plt.plot(timeline, 10*np.cos(timeline)**2, 'r--', lw=2) 114 | plt.legend(['uncontrolled', 'controlled', 'target']) 115 | 116 | plt.tight_layout() 117 | plt.show() 118 | -------------------------------------------------------------------------------- /tracking_control/tracking_control3.py: -------------------------------------------------------------------------------- 1 | """ An implementation based on the plant and controller from example 1 of 2 | (Slotine & Sastry, 1983), with a changed target trajectory. 3 | 4 | Here the target trajectories are 10*cos(t) and 10*(cos(t))**2, 5 | and the exact plant parameters are not known. 6 | """ 7 | 8 | import matplotlib.pyplot as plt 9 | import numpy as np 10 | import seaborn 11 | 12 | 13 | class plant: 14 | def __init__(self, dt=.001, theta1=[0.0, 0.0], theta2=[0.0, 0.0]): 15 | """ 16 | dt float: simulation time step 17 | theta1 list/np.array: [position, velocity] 18 | theta2 list/np.array: [position, velocity] 19 | """ 20 | self.dt = dt 21 | self.theta1 = np.asarray(theta1) 22 | self.theta2 = np.asarray(theta2) 23 | 24 | def step(self, u): 25 | """ apply u and move one time step forward """ 26 | ddtheta1 = ( 27 | 3*self.theta1[0] + self.theta2[0]**2 + 28 | 2*self.theta1[0] * self.theta2[1] * np.cos(self.theta2[0]) + u[0]) 29 | ddtheta2 = ( 30 | self.theta1[0]**3 - np.cos(self.theta1[0]) * self.theta2[0] + u[1]) 31 | 32 | self.theta1 += np.array([self.theta1[1], ddtheta1]) * self.dt 33 | self.theta2 += np.array([self.theta2[1], ddtheta2]) * self.dt 34 | 35 | @property 36 | def state(self): 37 | return self.theta1, self.theta2 38 | 39 | 40 | class controller: 41 | def __init__(self): 42 | pass 43 | 44 | def control(self, t, theta1, theta2): 45 | """ 46 | t float: the current time (desired trajectory is a function of time) 47 | thetas np.array: plant state 48 | """ 49 | # calculate the value of s for generating our gains 50 | s1 = theta1[1] + theta1[0] - 10*(np.cos(t) - np.sin(t)) 51 | s2 = theta2[1] + theta2[0] - 10*(np.cos(t)**2 - 2*np.cos(t)*np.sin(t)) 52 | 53 | noise = 20 54 | 55 | # gains for u1 56 | beta11 = -3 - noise if s1*theta1[0] > 0 else -3 + noise 57 | beta12 = -1 - noise if s1*theta2[0]**2 > 0 else -1 + noise 58 | beta13 = -2 - noise if s1*theta1[0]*theta2[1] > 0 else 2 + noise 59 | k11 = -1 60 | k12 = 10 # k12 > 4 61 | 62 | # gains for u2 63 | beta21 = -1 - noise if s2*theta1[0] > 0 else -1 + noise 64 | beta22 = -1 - noise if s2*theta2[0] > 0 else 1 + noise 65 | k21 = -1 66 | k22 = 40 # k22 > 2 67 | 68 | u1 = ( 69 | beta11*theta1[0] + beta12*theta2[0]**2 + 70 | beta13*theta1[0]*theta2[1] + k11*(theta1[1] + 10*np.sin(t)) - 71 | k12*np.sign(s1)) 72 | u2 = ( 73 | beta21*theta1[0]**3 + beta22*theta2[0] + 74 | k21*(theta2[1] + 20*np.cos(t)*np.sin(t)) - k22*np.sign(s2)) 75 | 76 | return np.array([u1, u2]) 77 | 78 | 79 | T = 10.0 80 | dt = 0.001 81 | timeline = np.arange(0.0, T, dt) 82 | 83 | ctrlr = controller() 84 | 85 | plant_uncontrolled = plant(dt=dt, theta1=[0.0, 0.0], theta2=[0.0, 0.0]) 86 | theta1_uncontrolled_track = np.zeros((timeline.shape[0], 2)) 87 | theta2_uncontrolled_track = np.zeros((timeline.shape[0], 2)) 88 | 89 | plant_controlled = plant(dt=dt) 90 | theta1_controlled_track = np.zeros((timeline.shape[0], 2)) 91 | theta2_controlled_track = np.zeros((timeline.shape[0], 2)) 92 | 93 | for ii, t in enumerate(timeline): 94 | 95 | if ii % int(1.0/dt) == 0: 96 | print('t: ', t) 97 | (theta1_uncontrolled_track[ii], 98 | theta2_uncontrolled_track[ii]) = plant_uncontrolled.state 99 | (theta1_controlled_track[ii], 100 | theta2_controlled_track[ii]) = plant_controlled.state 101 | 102 | u = ctrlr.control(t, 103 | theta1_controlled_track[ii], 104 | theta2_controlled_track[ii]) 105 | 106 | plant_uncontrolled.step(np.array([0.0, 0.0])) 107 | plant_controlled.step(u) 108 | 109 | plt.subplot(2, 1, 1) 110 | plt.plot(timeline, theta1_uncontrolled_track[:, 0], lw=2) 111 | plt.plot(timeline, theta1_controlled_track[:, 0], lw=2) 112 | plt.plot(timeline, 10*np.cos(timeline), 'r--', lw=2) 113 | plt.legend(['uncontrolled', 'controlled', 'target']) 114 | 115 | plt.subplot(2, 1, 2) 116 | plt.plot(timeline, theta2_uncontrolled_track[:, 0], lw=2) 117 | plt.plot(timeline, theta2_controlled_track[:, 0], lw=2) 118 | plt.plot(timeline, 10*np.cos(timeline)**2, 'r--', lw=2) 119 | plt.legend(['uncontrolled', 'controlled', 'target']) 120 | 121 | plt.tight_layout() 122 | plt.show() 123 | -------------------------------------------------------------------------------- /tracking_control/tracking_control4.py: -------------------------------------------------------------------------------- 1 | """ An implementation based on the 2-link arm plant and controller from 2 | (Slotine & Sastry, 1983). 3 | 4 | This version uses the dynamics presented as a function of u, not T. 5 | """ 6 | 7 | import matplotlib.pyplot as plt 8 | import numpy as np 9 | import seaborn 10 | 11 | 12 | class plant: 13 | def __init__(self, dt=.001, theta1=[0.0, 0.0], theta2=[0.0, 0.0]): 14 | """ 15 | dt float: simulation time step 16 | theta1 list/np.array: [position, velocity] 17 | theta2 list/np.array: [position, velocity] 18 | """ 19 | self.dt = dt 20 | self.theta1 = np.asarray(theta1) 21 | self.theta2 = np.asarray(theta2) 22 | 23 | def step(self, u): 24 | """ apply torques T and move one time step forward """ 25 | 26 | denom = (16.0/9.0 - np.cos(self.theta2[0])**2) 27 | ddtheta1 = ( 28 | (2.0/3.0*np.sin(self.theta2[0])*self.theta2[1]*(2*self.theta1[1] + 29 | self.theta2[1]) + (2.0/3.0 + np.cos(self.theta2[0])) * 30 | np.sin(self.theta2[0])*self.theta1[1]**2 + u[0]) / denom) 31 | ddtheta2 = ( 32 | (-(2.0/3.0 + np.cos(self.theta2[0]))*np.sin(self.theta2[0]) * 33 | self.theta2[1] * (2*self.theta1[1] + self.theta2[1]) - 34 | 2*(5.0/3.0 + np.cos(self.theta2[0])) * np.sin(self.theta2[0]) * 35 | self.theta1[1]**2 + u[1]) / denom) 36 | 37 | self.theta1 += np.array([self.theta1[1], ddtheta1]) * self.dt 38 | self.theta2 += np.array([self.theta2[1], ddtheta2]) * self.dt 39 | 40 | @property 41 | def state(self): 42 | return self.theta1, self.theta2 43 | 44 | 45 | class controller: 46 | def __init__(self): 47 | pass 48 | 49 | def control(self, t, theta1, theta2, theta1d, theta2d): 50 | """ 51 | t float: the current time (desired trajectory is a function of time) 52 | thetas np.array: plant state 53 | """ 54 | # calculate the value of s for generating our gains 55 | s1 = (theta1[0] - theta1d[0]) + 5*(theta1[1] - theta1d[1]) 56 | s2 = (theta2[0] - theta2d[0]) + 5*(theta2[1] - theta2d[1]) 57 | 58 | # gains for u1 59 | b11 = -0.7 if s1*theta2[1]*(2*theta1[1] + theta2[1]) > 0 else 0.7 60 | b12 = -1.2 if s1*(theta1[1]**2) > 0 else 1.2 61 | k11 = -9 if s1*(theta1[1] - theta1d[1]) > 0 else -3.8 62 | 63 | # gains for u2 64 | b21 = -1.2 if s2*theta2[1]*(2*theta1[1] + theta2[1]) > 0 else 1.2 65 | b22 = -4.4 if s2*(theta1[1]**2) > 0 else 4.4 66 | k21 = -9 if s2*(theta2[1] - theta2d[1]) > 0 else -3.8 67 | 68 | # shared gains 69 | k2 = 3.15 70 | 71 | u1 = ( 72 | b11*theta2[1]*(2*theta1[1] + theta2[1]) + b12*theta1[1]**2 + 73 | k11*(theta1[1] - theta1d[1]) - k2*np.sign(s1)) 74 | u2 = ( 75 | b21*theta2[1]*(2*theta1[1] + theta2[1]) + b22*theta1[1]**2 + 76 | k21*(theta2[1] - theta2d[1]) - k2*np.sign(s2)) 77 | 78 | return np.array([u1, u2]) 79 | 80 | 81 | T = 5 82 | dt = 0.001 83 | timeline = np.arange(0.0, T, dt) 84 | 85 | ctrlr = controller() 86 | 87 | theta1 = [-90.0, 0.0] 88 | theta2 = [170.0, 0.0] 89 | plant_uncontrolled = plant(dt=dt, theta1=theta1, theta2=theta2) 90 | theta1_uncontrolled_track = np.zeros((timeline.shape[0], 2)) 91 | theta2_uncontrolled_track = np.zeros((timeline.shape[0], 2)) 92 | 93 | plant_controlled = plant(dt=dt, theta1=theta1, theta2=theta2) 94 | theta1_controlled_track = np.zeros((timeline.shape[0], 2)) 95 | theta2_controlled_track = np.zeros((timeline.shape[0], 2)) 96 | 97 | theta1d_1 = lambda t: -90 + 52.5*(1 - np.cos(1.26*t)) if t <= 2.5 else 15 98 | theta1d_2 = lambda t: 52.5*1.26*np.sin(1.26*t) if t <= 2.5 else 0.0 99 | 100 | theta2d_1 = lambda t: 170 - 60*(1 - np.cos(1.26*t)) if t <= 2.5 else 50 101 | theta2d_2 = lambda t: -60*1.26*np.sin(1.26*t) if t < 2.5 else 0.0 102 | 103 | thetad_track = np.zeros((timeline.shape[0], 2)) 104 | 105 | for ii, t in enumerate(timeline): 106 | 107 | if ii % int(1.0/dt) == 0: 108 | print('t: ', t) 109 | theta1d = [theta1d_1(t), theta1d_2(t)] 110 | theta2d = [theta2d_1(t), theta2d_2(t)] 111 | thetad_track[ii] = [theta1d[0], theta2d[0]] 112 | 113 | (theta1_uncontrolled_track[ii], 114 | theta2_uncontrolled_track[ii]) = plant_uncontrolled.state 115 | (theta1_controlled_track[ii], 116 | theta2_controlled_track[ii]) = plant_controlled.state 117 | 118 | u = ctrlr.control(t, 119 | theta1_controlled_track[ii], 120 | theta2_controlled_track[ii], 121 | theta1d, theta2d) 122 | 123 | plant_uncontrolled.step(np.array([0.0, 0.0])) 124 | plant_controlled.step(u) 125 | 126 | plt.subplot(2, 1, 1) 127 | plt.plot(timeline, theta1_uncontrolled_track[:, 0], lw=2) 128 | plt.plot(timeline, theta1_controlled_track[:, 0], lw=2) 129 | plt.plot(timeline, thetad_track[:, 0], 'r--', lw=2) 130 | plt.legend(['uncontrolled', 'controlled', 'target']) 131 | 132 | plt.subplot(2, 1, 2) 133 | plt.plot(timeline, theta2_uncontrolled_track[:, 0], lw=2) 134 | plt.plot(timeline, theta2_controlled_track[:, 0], lw=2) 135 | plt.plot(timeline, thetad_track[:, 1], 'r--', lw=2) 136 | plt.legend(['uncontrolled', 'controlled', 'target']) 137 | 138 | plt.tight_layout() 139 | plt.show() 140 | -------------------------------------------------------------------------------- /tracking_control/tracking_control5.py: -------------------------------------------------------------------------------- 1 | """ An implementation based on the 2-link arm plant and controller from 2 | (Slotine & Sastry, 1983). 3 | """ 4 | 5 | import matplotlib.pyplot as plt 6 | import numpy as np 7 | import seaborn 8 | 9 | 10 | class plant: 11 | def __init__(self, dt=.001, theta1=[0.0, 0.0], theta2=[0.0, 0.0]): 12 | """ 13 | dt float: simulation time step 14 | theta1 list/np.array: [position, velocity] 15 | theta2 list/np.array: [position, velocity] 16 | """ 17 | self.dt = dt 18 | self.theta1 = np.asarray(theta1) 19 | self.theta2 = np.asarray(theta2) 20 | 21 | def step(self, T): 22 | """ apply torques T and move one time step forward """ 23 | 24 | Tprime_1 = (2*T[0] + np.sin(self.theta2[0])*self.theta2[1] * 25 | (2*self.theta1[1] + self.theta2[1])) 26 | Tprime_2 = 2*T[1] - np.sin(self.theta2[0])*self.theta2[1] 27 | denom = (16.0/9.0 - np.cos(self.theta2[0])**2) 28 | 29 | ddtheta1 = ( 30 | (2.0/3.0 * Tprime_1 - (2.0/3.0 + 31 | np.cos(self.theta2[0]) * Tprime_2)) / denom) 32 | ddtheta2 = ( 33 | (-(2.0/3.0 + np.cos(self.theta2[0])) * Tprime_1 + 34 | 2*(5.0/3.0 + np.cos(self.theta2[0])) * Tprime_2) / denom) 35 | 36 | self.theta1 += np.array([self.theta1[1], ddtheta1]) * self.dt 37 | self.theta2 += np.array([self.theta2[1], ddtheta2]) * self.dt 38 | 39 | @property 40 | def state(self): 41 | return self.theta1, self.theta2 42 | 43 | 44 | class controller: 45 | def __init__(self): 46 | pass 47 | 48 | def control(self, t, theta1, theta2, theta1d, theta2d): 49 | """ 50 | t float: the current time (desired trajectory is a function of time) 51 | thetas np.array: plant state 52 | """ 53 | # calculate the value of s for generating our gains 54 | s1 = (theta1[0] - theta1d[0]) + 5*(theta1[1] - theta1d[1]) 55 | s2 = (theta2[0] - theta2d[0]) + 5*(theta2[1] - theta2d[1]) 56 | 57 | # gains for u1 58 | b11 = -0.7 if s1*theta2[1]*(2*theta1[1] + theta2[1]) > 0 else 0.7 59 | b12 = -1.2 if s1*(theta1[1]**2) > 0 else 1.2 60 | k11 = -9 if s1*(theta1[1] - theta1d[1]) > 0 else -3.8 61 | 62 | # gains for u2 63 | b21 = -1.2 if s2*theta2[1]*(2*theta1[1] + theta2[1]) > 0 else 1.2 64 | b22 = -4.4 if s2*(theta1[1]**2) > 0 else 4.4 65 | k21 = -9 if s2*(theta2[1] - theta2d[1]) > 0 else -3.8 66 | 67 | # shared gains 68 | k2 = 3.15 69 | 70 | u1 = ( 71 | b11*theta2[1]*(2*theta1[1] + theta2[1]) + b12*theta1[1]**2 + 72 | k11*(theta1[1] - theta1d[1]) - k2*np.sign(s1)) 73 | u2 = ( 74 | b21*theta2[1]*(2*theta1[1] + theta2[1]) + b22*theta1[1]**2 + 75 | k21*(theta2[1] - theta2d[1]) - k2*np.sign(s2)) 76 | 77 | T2 = ((u2 + (1 + 3.0/2.0*np.cos(theta2[0]))*u1) / 78 | (10.0/3.0 - np.cos(theta2[0]))) 79 | T1 = 3.0/4.0*(u1 + (4.0/3.0 + 2*np.cos(theta2[0]))*T2) 80 | 81 | return np.array([T1, T2]) 82 | 83 | T = 5 84 | dt = 0.001 85 | timeline = np.arange(0.0, T, dt) 86 | 87 | ctrlr = controller() 88 | 89 | theta1 = [-90.0, 0.0] 90 | theta2 = [170.0, 0.0] 91 | plant_uncontrolled = plant(dt=dt, theta1=theta1, theta2=theta2) 92 | theta1_uncontrolled_track = np.zeros((timeline.shape[0], 2)) 93 | theta2_uncontrolled_track = np.zeros((timeline.shape[0], 2)) 94 | 95 | plant_controlled = plant(dt=dt, theta1=theta1, theta2=theta2) 96 | theta1_controlled_track = np.zeros((timeline.shape[0], 2)) 97 | theta2_controlled_track = np.zeros((timeline.shape[0], 2)) 98 | 99 | theta1d_1 = lambda t: -90 + 52.5*(1 - np.cos(1.26*t)) if t <= 2.5 else 15 100 | theta1d_2 = lambda t: 52.5*1.26*np.sin(1.26*t) if t <= 2.5 else 0.0 101 | 102 | theta2d_1 = lambda t: 170 - 60*(1 - np.cos(1.26*t)) if t <= 2.5 else 50 103 | theta2d_2 = lambda t: -60*1.26*np.sin(1.26*t) if t < 2.5 else 0.0 104 | 105 | thetad_track = np.zeros((timeline.shape[0], 2)) 106 | 107 | for ii, t in enumerate(timeline): 108 | 109 | if ii % int(1.0/dt) == 0: 110 | print('t: ', t) 111 | theta1d = [theta1d_1(t), theta1d_2(t)] 112 | theta2d = [theta2d_1(t), theta2d_2(t)] 113 | thetad_track[ii] = [theta1d[0], theta2d[0]] 114 | 115 | (theta1_uncontrolled_track[ii], 116 | theta2_uncontrolled_track[ii]) = plant_uncontrolled.state 117 | (theta1_controlled_track[ii], 118 | theta2_controlled_track[ii]) = plant_controlled.state 119 | 120 | u = ctrlr.control(t, 121 | theta1_controlled_track[ii], 122 | theta2_controlled_track[ii], 123 | theta1d, theta2d) 124 | 125 | plant_uncontrolled.step(np.array([0.0, 0.0])) 126 | plant_controlled.step(u) 127 | 128 | plt.subplot(2, 1, 1) 129 | plt.plot(timeline, theta1_uncontrolled_track[:, 0], lw=2) 130 | plt.plot(timeline, theta1_controlled_track[:, 0], lw=2) 131 | plt.plot(timeline, thetad_track[:, 0], 'r--', lw=2) 132 | plt.legend(['uncontrolled', 'controlled', 'target']) 133 | 134 | plt.subplot(2, 1, 2) 135 | plt.plot(timeline, theta2_uncontrolled_track[:, 0], lw=2) 136 | plt.plot(timeline, theta2_controlled_track[:, 0], lw=2) 137 | plt.plot(timeline, thetad_track[:, 1], 'r--', lw=2) 138 | plt.legend(['uncontrolled', 'controlled', 'target']) 139 | 140 | plt.tight_layout() 141 | plt.show() 142 | -------------------------------------------------------------------------------- /tracking_control/tracking_control6.py: -------------------------------------------------------------------------------- 1 | """ An implementation based on the 2-link arm plant and controller from 2 | (Slotine & Sastry, 1983), with a longer run time and different 3 | desired trajectory. 4 | """ 5 | 6 | import matplotlib.pyplot as plt 7 | import numpy as np 8 | import seaborn 9 | 10 | 11 | class plant: 12 | def __init__(self, dt=.001, theta1=[0.0, 0.0], theta2=[0.0, 0.0]): 13 | """ 14 | dt float: simulation time step 15 | theta1 list/np.array: [position, velocity] 16 | theta2 list/np.array: [position, velocity] 17 | """ 18 | self.dt = dt 19 | self.theta1 = np.asarray(theta1) 20 | self.theta2 = np.asarray(theta2) 21 | 22 | def step(self, T): 23 | """ apply torques T and move one time step forward """ 24 | 25 | Tprime_1 = (2*T[0] + np.sin(self.theta2[0])*self.theta2[1] * 26 | (2*self.theta1[1] + self.theta2[1])) 27 | Tprime_2 = 2*T[1] - np.sin(self.theta2[0])*self.theta2[1] 28 | denom = (16.0/9.0 - np.cos(self.theta2[0])**2) 29 | 30 | ddtheta1 = ( 31 | (2.0/3.0 * Tprime_1 - (2.0/3.0 + 32 | np.cos(self.theta2[0]) * Tprime_2)) / denom) 33 | ddtheta2 = ( 34 | (-(2.0/3.0 + np.cos(self.theta2[0])) * Tprime_1 + 35 | 2*(5.0/3.0 + np.cos(self.theta2[0])) * Tprime_2) / denom) 36 | 37 | self.theta1 += np.array([self.theta1[1], ddtheta1]) * self.dt 38 | self.theta2 += np.array([self.theta2[1], ddtheta2]) * self.dt 39 | 40 | @property 41 | def state(self): 42 | return self.theta1, self.theta2 43 | 44 | 45 | class controller: 46 | def __init__(self): 47 | pass 48 | 49 | def control(self, t, theta1, theta2, theta1d, theta2d): 50 | """ 51 | t float: the current time (desired trajectory is a function of time) 52 | thetas np.array: plant state 53 | """ 54 | # calculate the value of s for generating our gains 55 | s1 = (theta1[0] - theta1d[0]) + 5*(theta1[1] - theta1d[1]) 56 | s2 = (theta2[0] - theta2d[0]) + 5*(theta2[1] - theta2d[1]) 57 | 58 | # gains for u1 59 | b11 = -0.7 if s1*theta2[1]*(2*theta1[1] + theta2[1]) > 0 else 0.7 60 | b12 = -1.2 if s1*(theta1[1]**2) > 0 else 1.2 61 | k11 = -9 if s1*(theta1[1] - theta1d[1]) > 0 else -3.8 62 | 63 | # gains for u2 64 | b21 = -1.2 if s2*theta2[1]*(2*theta1[1] + theta2[1]) > 0 else 1.2 65 | b22 = -4.4 if s2*(theta1[1]**2) > 0 else 4.4 66 | k21 = -9 if s2*(theta2[1] - theta2d[1]) > 0 else -3.8 67 | 68 | # shared gains 69 | k2 = 3.15 70 | 71 | u1 = ( 72 | b11*theta2[1]*(2*theta1[1] + theta2[1]) + b12*theta1[1]**2 + 73 | k11*(theta1[1] - theta1d[1]) - k2*np.sign(s1)) 74 | u2 = ( 75 | b21*theta2[1]*(2*theta1[1] + theta2[1]) + b22*theta1[1]**2 + 76 | k21*(theta2[1] - theta2d[1]) - k2*np.sign(s2)) 77 | 78 | T2 = ((u2 + (1 + 3.0/2.0*np.cos(theta2[0]))*u1) / 79 | (10.0/3.0 - np.cos(theta2[0]))) 80 | T1 = 3.0/4.0*(u1 + (4.0/3.0 + 2*np.cos(theta2[0]))*T2) 81 | 82 | return np.array([T1, T2]) 83 | 84 | T = 30 85 | dt = 0.001 86 | timeline = np.arange(0.0, T, dt) 87 | 88 | ctrlr = controller() 89 | 90 | theta1 = [-80.0, 0.0] 91 | theta2 = [170.0, 0.0] 92 | plant_uncontrolled = plant(dt=dt, theta1=theta1, theta2=theta2) 93 | theta1_uncontrolled_track = np.zeros((timeline.shape[0], 2)) 94 | theta2_uncontrolled_track = np.zeros((timeline.shape[0], 2)) 95 | 96 | plant_controlled = plant(dt=dt, theta1=theta1, theta2=theta2) 97 | theta1_controlled_track = np.zeros((timeline.shape[0], 2)) 98 | theta2_controlled_track = np.zeros((timeline.shape[0], 2)) 99 | 100 | theta1d_1 = lambda t: -50 + 52.5*(1 - np.cos(1.26*t)) if t <= 30 else 50 101 | theta1d_2 = lambda t: 52.5*1.26*np.sin(1.26*t) if t <= 30 else 0.0 102 | 103 | theta2d_1 = lambda t: 170 - 60*(1 - np.sin(1.26*t)) if t <= 30 else 170 104 | theta2d_2 = lambda t: 60*1.26*np.cos(1.26*t) if t < 30 else 0.0 105 | 106 | thetad_track = np.zeros((timeline.shape[0], 2)) 107 | 108 | for ii, t in enumerate(timeline): 109 | 110 | if ii % int(1.0/dt) == 0: 111 | print('t: ', t) 112 | theta1d = [theta1d_1(t), theta1d_2(t)] 113 | theta2d = [theta2d_1(t), theta2d_2(t)] 114 | thetad_track[ii] = [theta1d[0], theta2d[0]] 115 | 116 | (theta1_uncontrolled_track[ii], 117 | theta2_uncontrolled_track[ii]) = plant_uncontrolled.state 118 | (theta1_controlled_track[ii], 119 | theta2_controlled_track[ii]) = plant_controlled.state 120 | 121 | u = ctrlr.control(t, 122 | theta1_controlled_track[ii], 123 | theta2_controlled_track[ii], 124 | theta1d, theta2d) 125 | 126 | plant_uncontrolled.step(np.array([0.0, 0.0])) 127 | plant_controlled.step(u) 128 | 129 | plt.figure(figsize=(4, 8)) 130 | plt.subplot(4, 1, 1) 131 | plt.plot(timeline, theta1_uncontrolled_track[:, 0], lw=2) 132 | plt.plot(timeline, theta1_controlled_track[:, 0], lw=2) 133 | plt.plot(timeline, thetad_track[:, 0], 'r--', lw=2) 134 | plt.legend(['uncontrolled', 'controlled', 'target']) 135 | 136 | plt.subplot(4, 1, 2) 137 | plt.plot(timeline, theta2_uncontrolled_track[:, 0], lw=2) 138 | plt.plot(timeline, theta2_controlled_track[:, 0], lw=2) 139 | plt.plot(timeline, thetad_track[:, 1], 'r--', lw=2) 140 | plt.legend(['uncontrolled', 'controlled', 'target']) 141 | 142 | ax = plt.subplot(2, 1, 2) 143 | ax.set_aspect('equal') 144 | print(theta1_controlled_track[:, 0].shape) 145 | def kin(q0, q1): 146 | # convert to hand (x,y) coordinates 147 | # L0 = L1 = 1 148 | q0 *= np.pi / 180.0 149 | q1 *= np.pi / 180.0 150 | return np.cos(q0) + np.cos(q0+q1), np.sin(q0) + np.sin(q0+q1) 151 | x,y = kin(theta1_controlled_track[:, 0], theta2_controlled_track[:, 0]) 152 | xd, yd = kin(thetad_track[:, 0], thetad_track[:, 1]) 153 | plt.plot(x, y) 154 | plt.plot(xd, yd, 'r--', lw=2) 155 | 156 | plt.tight_layout() 157 | plt.show() 158 | -------------------------------------------------------------------------------- /tracking_control/tracking_control7.py: -------------------------------------------------------------------------------- 1 | """ An implementation of the 2-link arm plant and controller from 2 | (Slotine & Sastry, 1983), with noise added to the system feedback. 3 | """ 4 | 5 | import matplotlib.pyplot as plt 6 | import numpy as np 7 | import seaborn 8 | 9 | 10 | class plant: 11 | def __init__(self, dt=.001, theta1=[0.0, 0.0], theta2=[0.0, 0.0]): 12 | """ 13 | dt float: simulation time step 14 | theta1 list/np.array: [position, velocity] 15 | theta2 list/np.array: [position, velocity] 16 | """ 17 | self.dt = dt 18 | self.theta1 = np.asarray(theta1) 19 | self.theta2 = np.asarray(theta2) 20 | 21 | def step(self, T): 22 | """ apply torques T and move one time step forward """ 23 | 24 | Tprime_1 = (2*T[0] + np.sin(self.theta2[0])*self.theta2[1] * 25 | (2*self.theta1[1] + self.theta2[1])) 26 | Tprime_2 = 2*T[1] - np.sin(self.theta2[0])*self.theta2[1] 27 | denom = (16.0/9.0 - np.cos(self.theta2[0])**2) 28 | 29 | ddtheta1 = ( 30 | (2.0/3.0 * Tprime_1 - (2.0/3.0 + 31 | np.cos(self.theta2[0]) * Tprime_2)) / denom) 32 | ddtheta2 = ( 33 | (-(2.0/3.0 + np.cos(self.theta2[0])) * Tprime_1 + 34 | 2*(5.0/3.0 + np.cos(self.theta2[0])) * Tprime_2) / denom) 35 | 36 | self.theta1 += np.array([self.theta1[1], ddtheta1]) * self.dt 37 | self.theta2 += np.array([self.theta2[1], ddtheta2]) * self.dt 38 | 39 | @property 40 | def state(self): 41 | return self.theta1, self.theta2 42 | 43 | 44 | class controller: 45 | def __init__(self): 46 | pass 47 | 48 | def control(self, t, theta1, theta2, theta1d, theta2d): 49 | """ 50 | t float: the current time (desired trajectory is a function of time) 51 | thetas np.array: plant state 52 | """ 53 | 54 | # add noise in to measurements 55 | noise = [1, .5] 56 | theta1 = np.array(theta1) + np.random.random(2)*2*noise - noise 57 | theta2 = np.array(theta2) + np.random.random(2)*2*noise - noise 58 | 59 | # calculate the value of s for generating our gains 60 | s1 = (theta1[0] - theta1d[0]) + 5*(theta1[1] - theta1d[1]) 61 | s2 = (theta2[0] - theta2d[0]) + 5*(theta2[1] - theta2d[1]) 62 | 63 | # gains for u1 64 | b11 = -0.7 if s1*theta2[1]*(2*theta1[1] + theta2[1]) > 0 else 0.7 65 | b12 = -1.2 if s1*(theta1[1]**2) > 0 else 1.2 66 | k11 = -9 if s1*(theta1[1] - theta1d[1]) > 0 else -3.8 67 | 68 | # gains for u2 69 | b21 = -1.2 if s2*theta2[1]*(2*theta1[1] + theta2[1]) > 0 else 1.2 70 | b22 = -4.4 if s2*(theta1[1]**2) > 0 else 4.4 71 | k21 = -9 if s2*(theta2[1] - theta2d[1]) > 0 else -3.8 72 | 73 | # shared gains 74 | k2 = 3.15 75 | 76 | u1 = ( 77 | b11*theta2[1]*(2*theta1[1] + theta2[1]) + b12*theta1[1]**2 + 78 | k11*(theta1[1] - theta1d[1]) - k2*np.sign(s1)) 79 | u2 = ( 80 | b21*theta2[1]*(2*theta1[1] + theta2[1]) + b22*theta1[1]**2 + 81 | k21*(theta2[1] - theta2d[1]) - k2*np.sign(s2)) 82 | 83 | T2 = ((u2 + (1 + 3.0/2.0*np.cos(theta2[0]))*u1) / 84 | (10.0/3.0 - np.cos(theta2[0]))) 85 | T1 = 3.0/4.0*(u1 + (4.0/3.0 + 2*np.cos(theta2[0]))*T2) 86 | 87 | return np.array([T1, T2]) 88 | 89 | T = 30 90 | dt = 0.001 91 | timeline = np.arange(0.0, T, dt) 92 | 93 | ctrlr = controller() 94 | 95 | theta1 = [-80.0, 0.0] 96 | theta2 = [170.0, 0.0] 97 | plant_uncontrolled = plant(dt=dt, theta1=theta1, theta2=theta2) 98 | theta1_uncontrolled_track = np.zeros((timeline.shape[0], 2)) 99 | theta2_uncontrolled_track = np.zeros((timeline.shape[0], 2)) 100 | 101 | plant_controlled = plant(dt=dt, theta1=theta1, theta2=theta2) 102 | theta1_controlled_track = np.zeros((timeline.shape[0], 2)) 103 | theta2_controlled_track = np.zeros((timeline.shape[0], 2)) 104 | 105 | theta1d_1 = lambda t: -50 + 52.5*(1 - np.cos(1.26*t)) if t <= 30 else 50 106 | theta1d_2 = lambda t: 52.5*1.26*np.sin(1.26*t) if t <= 30 else 0.0 107 | 108 | theta2d_1 = lambda t: 170 - 60*(1 - np.sin(1.26*t)) if t <= 30 else 170 109 | theta2d_2 = lambda t: 60*1.26*np.cos(1.26*t) if t < 30 else 0.0 110 | 111 | thetad_track = np.zeros((timeline.shape[0], 2)) 112 | 113 | for ii, t in enumerate(timeline): 114 | 115 | if ii % int(1.0/dt) == 0: 116 | print('t: ', t) 117 | theta1d = [theta1d_1(t), theta1d_2(t)] 118 | theta2d = [theta2d_1(t), theta2d_2(t)] 119 | thetad_track[ii] = [theta1d[0], theta2d[0]] 120 | 121 | (theta1_uncontrolled_track[ii], 122 | theta2_uncontrolled_track[ii]) = plant_uncontrolled.state 123 | (theta1_controlled_track[ii], 124 | theta2_controlled_track[ii]) = plant_controlled.state 125 | 126 | u = ctrlr.control(t, 127 | theta1_controlled_track[ii], 128 | theta2_controlled_track[ii], 129 | theta1d, theta2d) 130 | 131 | plant_uncontrolled.step(np.array([0.0, 0.0])) 132 | plant_controlled.step(u) 133 | 134 | plt.figure(figsize=(4, 8)) 135 | plt.subplot(4, 1, 1) 136 | plt.plot(timeline, theta1_uncontrolled_track[:, 0], lw=2) 137 | plt.plot(timeline, theta1_controlled_track[:, 0], lw=2) 138 | plt.plot(timeline, thetad_track[:, 0], 'r--', lw=2) 139 | plt.legend(['uncontrolled', 'controlled', 'target']) 140 | 141 | plt.subplot(4, 1, 2) 142 | plt.plot(timeline, theta2_uncontrolled_track[:, 0], lw=2) 143 | plt.plot(timeline, theta2_controlled_track[:, 0], lw=2) 144 | plt.plot(timeline, thetad_track[:, 1], 'r--', lw=2) 145 | plt.legend(['uncontrolled', 'controlled', 'target']) 146 | 147 | ax = plt.subplot(2, 1, 2) 148 | ax.set_aspect('equal') 149 | print(theta1_controlled_track[:, 0].shape) 150 | def kin(q0, q1): 151 | # convert to hand (x,y) coordinates 152 | # L0 = L1 = 1 153 | q0 *= np.pi / 180.0 154 | q1 *= np.pi / 180.0 155 | return np.cos(q0) + np.cos(q0+q1), np.sin(q0) + np.sin(q0+q1) 156 | x,y = kin(theta1_controlled_track[:, 0], theta2_controlled_track[:, 0]) 157 | xd, yd = kin(thetad_track[:, 0], thetad_track[:, 1]) 158 | plt.plot(x, y) 159 | plt.plot(xd, yd, 'r--', lw=2) 160 | 161 | plt.tight_layout() 162 | plt.show() 163 | -------------------------------------------------------------------------------- /tracking_control/tracking_control8.py: -------------------------------------------------------------------------------- 1 | """ An implementation of the 2-link arm plant and controller 2 | from (Slotine & Sastry, 1983). This version implements 3 | the plant with the varying load, mu. 4 | """ 5 | 6 | import matplotlib.pyplot as plt 7 | import numpy as np 8 | import seaborn 9 | 10 | 11 | class plant: 12 | def __init__(self, dt=.001, theta1=[0.0, 0.0], theta2=[0.0, 0.0]): 13 | """ 14 | dt float: simulation time step 15 | theta1 list/np.array: [position, velocity] 16 | theta2 list/np.array: [position, velocity] 17 | """ 18 | self.dt = dt 19 | self.theta1 = np.asarray(theta1) 20 | self.theta2 = np.asarray(theta2) 21 | 22 | def step(self, T): 23 | """ apply torques T and move one time step forward """ 24 | 25 | Tprime_1 = (2*T[0] + np.sin(self.theta2[0])*self.theta2[1] * 26 | (2*self.theta1[1] + self.theta2[1])) 27 | Tprime_2 = 2*T[1] - np.sin(self.theta2[0])*self.theta2[1] 28 | denom = (16.0/9.0 - np.cos(self.theta2[0])**2) 29 | 30 | mu = 20 # load on the arm 31 | ddtheta2 = ( 32 | (2*T[1] - np.sin(theta2[0])*theta1[1]**2*(1 + 2*mu)) / 33 | ((4.0/3.0 + np.cos(theta2[0]) + 2*mu*(1 + np.cos(theta2[0])) + 34 | 2*mu))) 35 | ddtheta1 = ( 36 | (2*T[0] + np.sin(theta1[0])*theta2[1]**2*(2*theta1[1] + theta2[1]) * 37 | (1 + 2*mu) - ddtheta2*(2.0/3.0 + np.cos(theta2[0]) + 2*mu*(1 + 38 | np.cos(theta2[0])))) / 39 | (2*(5.0/3.0 + np.cos(theta2[0])) + 4*mu*(1 + np.cos(theta2[0])))) 40 | 41 | self.theta1 += np.array([self.theta1[1], ddtheta1]) * self.dt 42 | self.theta2 += np.array([self.theta2[1], ddtheta2]) * self.dt 43 | 44 | @property 45 | def state(self): 46 | return self.theta1, self.theta2 47 | 48 | 49 | class controller: 50 | def __init__(self): 51 | pass 52 | 53 | def control(self, t, theta1, theta2, theta1d, theta2d): 54 | """ 55 | t float: the current time (desired trajectory is a function of time) 56 | thetas np.array: plant state 57 | """ 58 | 59 | # calculate the value of s for generating our gains 60 | s1 = (theta1[0] - theta1d[0]) + 5*(theta1[1] - theta1d[1]) 61 | s2 = (theta2[0] - theta2d[0]) + 5*(theta2[1] - theta2d[1]) 62 | 63 | # gains for u1 64 | b11 = -1.2 if s1*theta2[1]*(2*theta1[1] + theta2[1]) > 0 else 1.2 65 | b12 = -2.1 if s1*(theta1[1]**2) > 0 else 2.1 66 | k11 = -15.2 if s1*(theta1[1] - theta1d[1]) > 0 else -2.4 67 | 68 | # gains for u2 69 | b21 = -2.1 if s2*theta2[1]*(2*theta1[1] + theta2[1]) > 0 else 2.1 70 | b22 = -6.4 if s2*(theta1[1]**2) > 0 else 6.4 71 | k21 = -15.2 if s2*(theta2[1] - theta2d[1]) > 0 else -2.4 72 | 73 | # shared gains 74 | k2 = 10.0 75 | 76 | u1 = ( 77 | b11*theta2[1]*(2*theta1[1] + theta2[1]) + b12*theta1[1]**2 + 78 | k11*(theta1[1] - theta1d[1]) - k2*np.sign(s1)) 79 | u2 = ( 80 | b21*theta2[1]*(2*theta1[1] + theta2[1]) + b22*theta1[1]**2 + 81 | k21*(theta2[1] - theta2d[1]) - k2*np.sign(s2)) 82 | 83 | T2 = ((u2 + (1 + 3.0/2.0*np.cos(theta2[0]))*u1) / 84 | (10.0/3.0 - np.cos(theta2[0]))) 85 | T1 = 3.0/4.0*(u1 + (4.0/3.0 + 2*np.cos(theta2[0]))*T2) 86 | 87 | return np.array([T1, T2]) 88 | 89 | T = 150 90 | dt = 0.001 91 | timeline = np.arange(0.0, T, dt) 92 | 93 | ctrlr = controller() 94 | 95 | theta1 = [-50.0, 0.0] 96 | theta2 = [100.0, 0.0] 97 | plant_uncontrolled = plant(dt=dt, theta1=theta1, theta2=theta2) 98 | theta1_uncontrolled_track = np.zeros((timeline.shape[0], 2)) 99 | theta2_uncontrolled_track = np.zeros((timeline.shape[0], 2)) 100 | 101 | plant_controlled = plant(dt=dt, theta1=theta1, theta2=theta2) 102 | theta1_controlled_track = np.zeros((timeline.shape[0], 2)) 103 | theta2_controlled_track = np.zeros((timeline.shape[0], 2)) 104 | 105 | switch_time = 30 106 | theta1d_1 = lambda t: -50 + 52.5*(1 - np.cos(1.26*t)) if t <= switch_time else 50 107 | theta1d_2 = lambda t: 52.5*1.26*np.sin(1.26*t) if t <= switch_time else 0.0 108 | 109 | theta2d_1 = lambda t: 170 - 60*(1 - np.sin(1.26*t)) if t <= switch_time else 170 110 | theta2d_2 = lambda t: 60*1.26*np.cos(1.26*t) if t < switch_time else 0.0 111 | 112 | thetad_track = np.zeros((timeline.shape[0], 2)) 113 | 114 | for ii, t in enumerate(timeline): 115 | 116 | if ii % int(1.0/dt) == 0: 117 | print('t: ', t) 118 | theta1d = [theta1d_1(t), theta1d_2(t)] 119 | theta2d = [theta2d_1(t), theta2d_2(t)] 120 | thetad_track[ii] = [theta1d[0], theta2d[0]] 121 | 122 | (theta1_uncontrolled_track[ii], 123 | theta2_uncontrolled_track[ii]) = plant_uncontrolled.state 124 | (theta1_controlled_track[ii], 125 | theta2_controlled_track[ii]) = plant_controlled.state 126 | 127 | u = ctrlr.control(t, 128 | theta1_controlled_track[ii], 129 | theta2_controlled_track[ii], 130 | theta1d, theta2d) 131 | 132 | plant_uncontrolled.step(np.array([0.0, 0.0])) 133 | plant_controlled.step(u) 134 | 135 | plt.figure(figsize=(4, 8)) 136 | plt.subplot(4, 1, 1) 137 | plt.plot(timeline, theta1_uncontrolled_track[:, 0], lw=2) 138 | plt.plot(timeline, theta1_controlled_track[:, 0], lw=2) 139 | plt.plot(timeline, thetad_track[:, 0], 'r--', lw=2) 140 | plt.legend(['uncontrolled', 'controlled', 'target']) 141 | 142 | plt.subplot(4, 1, 2) 143 | plt.plot(timeline, theta2_uncontrolled_track[:, 0], lw=2) 144 | plt.plot(timeline, theta2_controlled_track[:, 0], lw=2) 145 | plt.plot(timeline, thetad_track[:, 1], 'r--', lw=2) 146 | plt.legend(['uncontrolled', 'controlled', 'target']) 147 | 148 | ax = plt.subplot(2, 1, 2) 149 | ax.set_aspect('equal') 150 | print(theta1_controlled_track[:, 0].shape) 151 | def kin(q0, q1): 152 | # convert to hand (x,y) coordinates 153 | # L0 = L1 = 1 154 | q0 *= np.pi / 180.0 155 | q1 *= np.pi / 180.0 156 | return np.cos(q0) + np.cos(q0+q1), np.sin(q0) + np.sin(q0+q1) 157 | x,y = kin(theta1_controlled_track[:, 0], theta2_controlled_track[:, 0]) 158 | xd, yd = kin(thetad_track[:, 0], thetad_track[:, 1]) 159 | plt.plot(x, y) 160 | plt.plot(xd, yd, 'r--', lw=2) 161 | 162 | plt.tight_layout() 163 | plt.show() 164 | -------------------------------------------------------------------------------- /train_AHF/gen_animation_plots.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Copyright (C) 2016 Travis DeWolf 3 | 4 | This program is free software: you can redistribute it and/or modify 5 | it under the terms of the GNU General Public License as published by 6 | the Free Software Foundation, either version 3 of the License, or 7 | (at your option) any later version. 8 | 9 | This program is distributed in the hope that it will be useful, 10 | but WITHOUT ANY WARRANTY; without even the implied warranty of 11 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 | GNU General Public License for more details. 13 | 14 | You should have received a copy of the GNU General Public License 15 | along with this program. If not, see . 16 | ''' 17 | 18 | import numpy as np 19 | import matplotlib.pyplot as plt 20 | import glob 21 | import sys 22 | 23 | from plot_error import gen_data_plot 24 | 25 | folder = "weights" if len(sys.argv) < 2 else sys.argv[1] 26 | total_num = 100 if len(sys.argv) < 3 else int(sys.argv[2]) 27 | 28 | files = sorted(glob.glob('%s/rnn*'%folder)) 29 | step = len(files) / total_num 30 | count = 0 31 | for ii in range(1, len(files), step): 32 | name = '%s/plots/%05i.png'%(folder, count) 33 | print 'generating plot %i...'%count 34 | gen_data_plot(folder, ii, show_plot=False, verbose=False, save_plot=name) 35 | count += 1 36 | # now generate 10 more plots of the last frame so the 37 | # gif seems to pause a bit at the end 38 | for jj in range(10): 39 | name = '%s/plots/%05i.png'%(folder, count) 40 | print 'generating plot %i...'%count 41 | gen_data_plot(folder, ii, show_plot=False, verbose=False, save_plot=name) 42 | count += 1 43 | -------------------------------------------------------------------------------- /train_AHF/plot_error.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Copyright (C) 2016 Travis DeWolf 3 | 4 | This program is free software: you can redistribute it and/or modify 5 | it under the terms of the GNU General Public License as published by 6 | the Free Software Foundation, either version 3 of the License, or 7 | (at your option) any later version. 8 | 9 | This program is distributed in the hope that it will be useful, 10 | but WITHOUT ANY WARRANTY; without even the implied warranty of 11 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 | GNU General Public License for more details. 13 | 14 | You should have received a copy of the GNU General Public License 15 | along with this program. If not, see . 16 | ''' 17 | 18 | import numpy as np 19 | import matplotlib.pyplot as plt 20 | import glob 21 | import sys 22 | import seaborn 23 | 24 | 25 | def gen_data_plot(folder="weights", index=None, show_plot=True, 26 | save_plot=None, save_paths=False, verbose=True): 27 | 28 | files = sorted(glob.glob('%s/rnn*' % folder)) 29 | files = files[:index] if index is not None else files 30 | 31 | # plot the values over time 32 | vals = [] 33 | 34 | for ii, name in enumerate(files): 35 | if verbose: 36 | print(name) 37 | name = name.split('err')[1] 38 | name = name.split('.npz')[0] 39 | vals.append(float(name)) 40 | 41 | vals = np.array(vals) 42 | 43 | plt.figure(figsize=(10, 3)) 44 | ax = plt.subplot2grid((1, 3), (0, 0), colspan=2) 45 | ax.loglog(vals) 46 | ax.loglog(range(len(vals)), np.ones(len(vals)) * min(vals), 'r--') 47 | ax.loglog(range(len(vals)), np.ones(len(vals)) * min(vals), 'r--') 48 | plt.xlim([0, len(files)]) 49 | plt.ylim([10**-5, 10]) 50 | plt.title('AHF training error') 51 | plt.xlabel('Training iterations') 52 | plt.ylabel('Error') 53 | plt.yscale('log') 54 | 55 | # load in the weights and see how well they control the arm 56 | dt = 1e-2 57 | sig_len = 40 58 | 59 | # HACK: append system path to have access to the arm code 60 | # NOTE: Change this path to wherever your plant model is kept! 61 | sys.path.append("../../../studywolf_control/studywolf_control/") 62 | # from arms.two_link.arm_python import Arm as Arm 63 | from arms.three_link.arm import Arm as Arm 64 | if verbose: 65 | print('Plant is: %s' % str(Arm)) 66 | arm = Arm(dt=dt) 67 | 68 | from hessianfree import RNNet 69 | from hessianfree.nonlinearities import (Tanh, Linear) 70 | from train_hf_3link import PlantArm, gen_targets 71 | 72 | rec_coeff = [1, 1] 73 | rec_type = "sparse" 74 | eps = 1e-6 75 | 76 | num_states = arm.DOF * 2 77 | targets = gen_targets(arm, sig_len=sig_len) 78 | init_state = np.zeros((len(targets), num_states), dtype=np.float32) 79 | init_state[:, :arm.DOF] = arm.init_q # set up the initial joint angles 80 | plant = PlantArm(arm, targets=targets, 81 | init_state=init_state, eps=eps) 82 | 83 | index = -1 if index is None else index 84 | W = np.load(files[index])['arr_0'] 85 | 86 | # make sure this network is the same as the one you trained! 87 | net_size = 96 88 | if '32' in folder: 89 | net_size = 32 90 | rnn = RNNet(shape=[num_states * 2, 91 | net_size, 92 | net_size, 93 | num_states, 94 | num_states], 95 | layers=[Linear(), Tanh(), Tanh(), Linear(), plant], 96 | debug=False, 97 | rec_layers=[1, 2], 98 | conns={0: [1, 2], 1: [2], 2: [3], 3: [4]}, 99 | W_rec_params={"coeff": rec_coeff, "init_type": rec_type}, 100 | load_weights=W, 101 | use_GPU=False) 102 | 103 | rnn.forward(plant, rnn.W) 104 | states = np.asarray(plant.get_vecs()[0][:, :, num_states:]) 105 | targets = np.asarray(plant.get_vecs()[1]) 106 | 107 | def kin(q): 108 | x = np.sum([arm.L[ii] * np.cos(np.sum(q[:, :ii+1], axis=1)) 109 | for ii in range(arm.DOF)], axis=0) 110 | y = np.sum([arm.L[ii] * np.sin(np.sum(q[:, :ii+1], axis=1)) 111 | for ii in range(arm.DOF)], axis=0) 112 | return x,y 113 | 114 | ax = plt.subplot2grid((1, 3), (0, 2)) 115 | # plot start point 116 | initx, inity = kin(init_state) 117 | ax.plot(initx, inity, 'x', mew=10) 118 | for jj in range(0, len(targets)): 119 | # plot target 120 | targetx, targety = kin(targets[jj]) 121 | ax.plot(targetx, targety, 'rx', mew=1) 122 | # plat path 123 | pathx, pathy = kin(states[jj, :, :]) 124 | path = np.hstack([pathx[:, None], pathy[:, None]]) 125 | if save_paths is True: 126 | np.savez_compressed('end-effector position%.3i.npz' % int(jj/8), 127 | array1=path) 128 | ax.plot(path[:, 0], path[:, 1]) 129 | 130 | plt.tight_layout() 131 | # plt.xlim([-.1, .1]) 132 | # plt.ylim([.25, .45]) 133 | plt.title('Hand trajectory') 134 | plt.xlabel('x') 135 | plt.ylabel('y') 136 | 137 | if save_plot is not None: 138 | plt.savefig(save_plot) 139 | if show_plot is True: 140 | plt.show() 141 | plt.close() 142 | 143 | if __name__ == '__main__': 144 | 145 | if len(sys.argv) < 2: 146 | folder = "weights" 147 | else: 148 | folder = sys.argv[1] 149 | if len(sys.argv) < 3: 150 | index = None 151 | else: 152 | index = int(sys.argv[2]) 153 | 154 | gen_data_plot(folder=folder, index=index) 155 | --------------------------------------------------------------------------------