├── 3DposeEstimator ├── data │ └── demo-sequence.h5 ├── demo.py ├── pose2D.py ├── pose2Dto3D.py ├── pose3D.py └── skeletalModel.py ├── SLSynthesis ├── accumulators.py ├── cleaning.py ├── data │ ├── info │ ├── info_spk │ ├── info_today │ ├── recs.h5 │ ├── stop.list │ ├── text │ └── training.list ├── demo_evaluation.py ├── demo_training.py ├── demo_training_FF_20200603-clear.py ├── demo_training_FF_20200603.py ├── demo_training_FF_20200604-clear.py ├── demo_training_FF_20200604.py ├── dtw.py ├── model_FF_05.py ├── model_FF_20200603.py ├── model_FF_20200604.py ├── modeling.py ├── pipeline_demo_05_repository.py ├── saveDeltas.py ├── saveMu.py ├── saveNormSigmaPrintScale.py ├── temp │ └── mu ├── training.py └── utils.py ├── read.me └── wacv2020 ├── README ├── accumulators.py ├── data └── demo │ ├── info │ └── spk │ ├── keypoints │ ├── json.zip │ └── keypoints-01-raw.h5 │ ├── lists │ ├── all.list │ ├── test.list │ └── train.list │ └── text │ └── text ├── dtw.py ├── model_FF_05.py ├── modeling.py ├── pipeline_demo_01_json2h5.py ├── pipeline_demo_02_filter.py ├── pipeline_demo_03_deltas.py ├── pipeline_demo_04_norm.py ├── pipeline_demo_05_mu.py ├── pipeline_demo_06-train-model_FF_05.py ├── pipeline_demo_07-evaluation-model_FF_05.py ├── pipeline_demo_XX_invDeltas.py ├── pose2D.py ├── pose2Dto3D.py ├── pose3D.py ├── skeletalModel.py ├── training.py ├── utils.py ├── visualization ├── invDeltas.m ├── runMe.m └── skeletonDisp.m └── walkDir.py /3DposeEstimator/data/demo-sequence.h5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gopeith/SignLanguageProcessing/a312367087b5ebf01a435ef513db3fe80d8438a2/3DposeEstimator/data/demo-sequence.h5 -------------------------------------------------------------------------------- /3DposeEstimator/demo.py: -------------------------------------------------------------------------------- 1 | # standard 2 | import h5py 3 | 4 | # 3rd party 5 | import numpy 6 | 7 | # our own 8 | import skeletalModel 9 | import pose2D 10 | import pose2Dto3D 11 | import pose3D 12 | 13 | 14 | def save(fname, lst): 15 | 16 | T, dim = lst[0].shape 17 | f = open(fname, "w") 18 | for t in range(T): 19 | for i in range(dim): 20 | for j in range(len(lst)): 21 | f.write("%e\t" % lst[j][t, i]) 22 | f.write("\n") 23 | f.close() 24 | 25 | 26 | if __name__ == "__main__": 27 | 28 | dtype = "float32" 29 | randomNubersGenerator = numpy.random.RandomState(1234) 30 | 31 | # This demo shows converting a result of 2D pose estimation into a 3D pose. 32 | 33 | # Getting our structure of skeletal model. 34 | # For customizing the structure see a definition of getSkeletalModelStructure. 35 | structure = skeletalModel.getSkeletalModelStructure() 36 | 37 | # Getting 2D data 38 | # The sequence is an N-tuple of 39 | # (1sf point - x, 1st point - y, 1st point - likelihood, 2nd point - x, ...) 40 | # a missing point should have x=0, y=0, likelihood=0 41 | f = h5py.File("data/demo-sequence.h5", "r") 42 | inputSequence_2D = numpy.array(f.get("20161025_pocasi")) 43 | f.close() 44 | 45 | # Decomposition of the single matrix into three matrices: x, y, w (=likelihood) 46 | X = inputSequence_2D 47 | Xx = X[0:X.shape[0], 0:(X.shape[1]):3] 48 | Xy = X[0:X.shape[0], 1:(X.shape[1]):3] 49 | Xw = X[0:X.shape[0], 2:(X.shape[1]):3] 50 | 51 | # Normalization of the picture (x and y axis has the same scale) 52 | Xx, Xy = pose2D.normalization(Xx, Xy) 53 | save("data/demo1.txt", [Xx, Xy, Xw]) 54 | 55 | # Delete all skeletal models which have a lot of missing parts. 56 | Xx, Xy, Xw = pose2D.prune(Xx, Xy, Xw, (0, 1, 2, 3, 4, 5, 6, 7), 0.3, dtype) 57 | save("data/demo2.txt", [Xx, Xy, Xw]) 58 | 59 | # Preliminary filtering: weighted linear interpolation of missing points. 60 | Xx, Xy, Xw = pose2D.interpolation(Xx, Xy, Xw, 0.99, dtype) 61 | save("data/demo3.txt", [Xx, Xy, Xw]) 62 | 63 | # Initial 3D pose estimation 64 | lines0, rootsx0, rootsy0, rootsz0, anglesx0, anglesy0, anglesz0, Yx0, Yy0, Yz0 = pose2Dto3D.initialization( 65 | Xx, 66 | Xy, 67 | Xw, 68 | structure, 69 | 0.001, # weight for adding noise 70 | randomNubersGenerator, 71 | dtype 72 | ) 73 | save("data/demo4.txt", [Yx0, Yy0, Yz0]) 74 | 75 | # Backpropagation-based filtering 76 | Yx, Yy, Yz = pose3D.backpropagationBasedFiltering( 77 | lines0, 78 | rootsx0, 79 | rootsy0, 80 | rootsz0, 81 | anglesx0, 82 | anglesy0, 83 | anglesz0, 84 | Xx, 85 | Xy, 86 | Xw, 87 | structure, 88 | dtype, 89 | ) 90 | save("data/demo5.txt", [Yx, Yy, Yz]) 91 | 92 | 93 | -------------------------------------------------------------------------------- /3DposeEstimator/pose2D.py: -------------------------------------------------------------------------------- 1 | # standard 2 | import math 3 | 4 | # 3rd party 5 | import numpy 6 | 7 | 8 | def normalization(Xx, Xy): 9 | T, n = Xx.shape 10 | sum0 = T * n 11 | sum1Xx = numpy.sum(numpy.sum(Xx)) 12 | sum2Xx = numpy.sum(numpy.sum(Xx * Xx)) 13 | sum1Xy = numpy.sum(numpy.sum(Xy)) 14 | sum2Xy = numpy.sum(numpy.sum(Xy * Xy)) 15 | mux = sum1Xx / sum0 16 | muy = sum1Xy / sum0 17 | sum0 = 2 * sum0 18 | sum1 = sum1Xx + sum1Xy 19 | sum2 = sum2Xx + sum2Xy 20 | mu = sum1 / sum0 21 | sigma2 = (sum2 / sum0) - mu * mu 22 | if sigma2 < 1e-10: 23 | simga2 = 1e-10 24 | sigma = math.sqrt(sigma2) 25 | return (Xx - mux) / sigma, (Xy - muy) / sigma 26 | 27 | 28 | 29 | def prune(Xx, Xy, Xw, watchThis, threshold, dtype): 30 | T = Xw.shape[0] 31 | N = Xw.shape[1] 32 | Yx = numpy.zeros((T, N), dtype=dtype) 33 | Yy = numpy.zeros((T, N), dtype=dtype) 34 | Yw = numpy.zeros((T, N), dtype=dtype) 35 | for t in range(T): 36 | sum0 = 0 37 | sum1 = 0.0 38 | for i in watchThis: 39 | sum0 = sum0 + 1 40 | sum1 = sum1 + Xw[t, i] 41 | Ew = sum1 / sum0 42 | if Ew >= threshold: 43 | for i in range(N): 44 | Yx[t, i] = Xx[t, i] 45 | Yy[t, i] = Xy[t, i] 46 | Yw[t, i] = Xw[t, i] 47 | return Yx, Yy, Yw 48 | 49 | 50 | def interpolation(Xx, Xy, Xw, threshold, dtype): 51 | T = Xw.shape[0] 52 | N = Xw.shape[1] 53 | Yx = numpy.zeros((T, N), dtype=dtype) 54 | Yy = numpy.zeros((T, N), dtype=dtype) 55 | for t in range(T): 56 | for i in range(N): 57 | a1 = Xx[t, i] 58 | a2 = Xy[t, i] 59 | p = Xw[t, i] 60 | sumpa1 = p * a1 61 | sumpa2 = p * a2 62 | sump = p 63 | delta = 0 64 | while sump < threshold: 65 | change = False 66 | delta = delta + 1 67 | t2 = t + delta 68 | if t2 < T: 69 | a1 = Xx[t2, i] 70 | a2 = Xy[t2, i] 71 | p = Xw[t2, i] 72 | sumpa1 = sumpa1 + p * a1 73 | sumpa2 = sumpa2 + p * a2 74 | sump = sump + p 75 | change = True 76 | t2 = t - delta 77 | if t2 >= 0: 78 | a1 = Xx[t2, i] 79 | a2 = Xy[t2, i] 80 | p = Xw[t2, i] 81 | sumpa1 = sumpa1 + p * a1 82 | sumpa2 = sumpa2 + p * a2 83 | sump = sump + p 84 | change = True 85 | if not change: 86 | break 87 | if sump <= 0.0: 88 | sump = 1e-10 89 | Yx[t, i] = sumpa1 / sump 90 | Yy[t, i] = sumpa2 / sump 91 | return Yx, Yy, Xw 92 | -------------------------------------------------------------------------------- /3DposeEstimator/pose2Dto3D.py: -------------------------------------------------------------------------------- 1 | # standard 2 | import math 3 | 4 | # 3rd party 5 | import numpy 6 | 7 | # our own 8 | import skeletalModel 9 | 10 | 11 | # add uniformly distributed noise 12 | def addNoise(x, rng, epsilon): 13 | e = numpy.asarray(rng.uniform(low=-epsilon, high=epsilon, size=x.shape), dtype="float32") 14 | return x + e 15 | 16 | 17 | def norm(x): 18 | result = 0.0 19 | for i in x: 20 | result = result + i * i 21 | return math.sqrt(result) 22 | 23 | 24 | # A simple percentile computation. 25 | # Warning: it sorts input list. 26 | def perc(lst, p): 27 | lst.sort() 28 | return lst[int(p * (len(lst) - 1))] 29 | 30 | 31 | # some substitution 32 | def computeB(ax, ay, az, tx, ty, L): 33 | hyps = [ 34 | [tx - ax, ty - ay, 0] 35 | ] 36 | foo = L**2 - (tx - ax)**2 - (ty - ay)**2; 37 | if foo >= 0: 38 | hyps.append([tx - ax, ty - ay, -math.sqrt(foo)]) 39 | hyps.append([tx - ax, ty - ay, +math.sqrt(foo)]) 40 | foo1 = ax**2 - 2*ax*tx + ay**2 - 2*ay*ty + tx**2 + ty**2 41 | foo2 = (1/foo1)**(1/2) 42 | foo3 = ay**3/foo1 + L*ay*foo2 - L*ty*foo2 + (ax**2*ay)/foo1 + (ay*tx**2)/foo1 + (ay*ty**2)/foo1 - (2*ay**2*ty)/foo1 - (2*ax*ay*tx)/foo1 43 | foo4 = ay**3/foo1 - L*ay*foo2 + L*ty*foo2 + (ax**2*ay)/foo1 + (ay*tx**2)/foo1 + (ay*ty**2)/foo1 - (2*ay**2*ty)/foo1 - (2*ax*ay*tx)/foo1 44 | xx1 = -(ax*ty - ay*tx - ax*foo3 + tx*foo3)/(ay - ty) 45 | xx2 = -(ax*ty - ay*tx - ax*foo4 + tx*foo4)/(ay - ty) 46 | xy1 = ay**3/foo1 + L*ay*foo2 - L*ty*foo2 + (ax**2*ay)/foo1 + (ay*tx**2)/foo1 + (ay*ty**2)/foo1 - (2*ay**2*ty)/foo1 - (2*ax*ay*tx)/foo1 47 | xy2 = ay**3/foo1 - L*ay*foo2 + L*ty*foo2 + (ax**2*ay)/foo1 + (ay*tx**2)/foo1 + (ay*ty**2)/foo1 - (2*ay**2*ty)/foo1 - (2*ax*ay*tx)/foo1 48 | if 0 * xx1 * xx2 * xy1 * xy2 == 0: 49 | hyps.append([xx1 - ax, xy1 - ay, 0]) 50 | hyps.append([xx2 - ax, xy2 - ay, 0]) 51 | angle = [1.0, 1.0, 0.0] 52 | Lmin = None 53 | for hypangle in hyps: 54 | normHypangle = norm(hypangle) + 1e-10 55 | xi = [ 56 | ax + L * hypangle[0] / normHypangle, 57 | ay + L * hypangle[1] / normHypangle, 58 | az + L * hypangle[2] / normHypangle, 59 | ] 60 | Li = (xi[0] - tx)**2 + (xi[1] - ty)**2; 61 | if Lmin is None or Lmin > Li: 62 | Lmin = Li 63 | angle = hypangle 64 | return angle 65 | 66 | 67 | # The initial 3D eastiamtion. It returns: 68 | # lines - logarithm of bone lengths 69 | # rootsx, rootsy, rootsz - 3D coordinates of head (root in the graph) positions 70 | # anglesx, anglesy, anglesz - absolute angles of limbs (3D coordinates) 71 | # Yx, Yy, Yz - resultant 3D coordinates 72 | def initialization(Xx, Xy, Xw, structure, sigma, randomNubersGenerator, dtype): 73 | T = Xx.shape[0] 74 | n = Xx.shape[1] 75 | 76 | nLines, nPoints = skeletalModel.structureStats(structure) 77 | 78 | lines = numpy.zeros((nLines, ), dtype=dtype) 79 | 80 | rootsx = Xx[0:T, 0] 81 | rootsy = Xy[0:T, 0] 82 | rootsz = numpy.zeros((T, ), dtype=dtype) 83 | 84 | rootsx = addNoise(rootsx, randomNubersGenerator, sigma) 85 | rootsy = addNoise(rootsy, randomNubersGenerator, sigma) 86 | rootsz = addNoise(rootsz, randomNubersGenerator, sigma) 87 | 88 | anglesx = numpy.zeros((T, len(structure)), dtype=dtype) 89 | anglesy = numpy.zeros((T, len(structure)), dtype=dtype) 90 | anglesz = numpy.zeros((T, len(structure)), dtype=dtype) 91 | 92 | Yx = numpy.zeros((T, n), dtype=dtype) 93 | Yy = numpy.zeros((T, n), dtype=dtype) 94 | Yz = numpy.zeros((T, n), dtype=dtype) 95 | Yx[0:T, 0] = rootsx 96 | Yy[0:T, 0] = rootsy 97 | Yz[0:T, 0] = rootsz 98 | 99 | Ls = {} 100 | for iBone in range(len(structure)): 101 | a, b, line = structure[iBone] 102 | if not line in Ls: 103 | Ls[line] = [] 104 | for t in range(T): 105 | ax = Xx[t, a] 106 | ay = Xy[t, a] 107 | bx = Xx[t, b] 108 | by = Xy[t, b] 109 | wa = Xw[t, a] 110 | wb = Xw[t, b] 111 | w = min([wa, wb]); 112 | L = norm([ax - bx, ay - by]) 113 | Ls[line].append(L) 114 | for i in range(len(lines)): 115 | lines[i] = math.log(perc(Ls[i], 0.5)) 116 | 117 | for iBone in range(len(structure)): 118 | a, b, line = structure[iBone] 119 | L = math.exp(lines[line]); 120 | for t in range(T): 121 | ax = Yx[t, a] 122 | ay = Yy[t, a] 123 | az = Yz[t, a] 124 | 125 | tx = Xx[t, b] 126 | ty = Xy[t, b] 127 | 128 | anglex, angley, anglez = computeB(ax, ay, az, tx, ty, L) 129 | if not 0.0 * anglex == 0.0: # no inf or nan 130 | anglex = 0.0 131 | if not 0.0 * angley == 0.0: # no inf or nan 132 | angley = 0.0 133 | if not 0.0 * anglez == 0.0: # no inf or nan 134 | anglez = 0.0 135 | if anglex == 0.0 and angley == 0.0 and anglez == 0.0: 136 | anglex = 1.0 137 | angley = 1.0 138 | anglez = 1.0 139 | if anglez < 0.0: 140 | anglez = -anglez 141 | 142 | anglez = anglez + 0.001 143 | 144 | normAngle = math.sqrt(anglex * anglex + angley * angley + anglez * anglez) + 1e-10 145 | anglesx[t, iBone] = anglex / normAngle 146 | anglesy[t, iBone] = angley / normAngle 147 | anglesz[t, iBone] = anglez / normAngle 148 | 149 | for t in range(T): 150 | Yx[t, b] = Yx[t, a] + L * anglesx[t, iBone] 151 | Yy[t, b] = Yy[t, a] + L * anglesy[t, iBone] 152 | Yz[t, b] = Yz[t, a] + L * anglesz[t, iBone] 153 | 154 | rootsx = rootsx.reshape((rootsx.shape[0], 1)) 155 | rootsy = rootsy.reshape((rootsy.shape[0], 1)) 156 | rootsz = rootsz.reshape((rootsz.shape[0], 1)) 157 | 158 | return lines, rootsx, rootsy, rootsz, anglesx, anglesy, anglesz, Yx, Yy, Yz 159 | -------------------------------------------------------------------------------- /3DposeEstimator/pose3D.py: -------------------------------------------------------------------------------- 1 | # standard 2 | 3 | # 3rd party 4 | import numpy 5 | import tensorflow as tf 6 | 7 | # our own 8 | import skeletalModel 9 | 10 | 11 | def backpropagationBasedFiltering( 12 | lines0_values, # initial (logarithm of) bones lenghts 13 | rootsx0_values, # head position 14 | rootsy0_values, 15 | rootsz0_values, 16 | anglesx0_values, # angles of limbs 17 | anglesy0_values, 18 | anglesz0_values, 19 | tarx_values, # target 20 | tary_values, 21 | w_values, # weights of estimated points (likelihood of estimation) 22 | structure, 23 | dtype, 24 | learningRate=0.1, 25 | nCycles=1000, 26 | regulatorRates=[0.001, 0.1], 27 | ): 28 | 29 | T = rootsx0_values.shape[0] 30 | nBones, nPoints = skeletalModel.structureStats(structure) 31 | nLimbs = len(structure) 32 | 33 | # vector of (logarithm of) bones length 34 | # shape: (nLines,) 35 | lines = tf.Variable(lines0_values, dtype=dtype) 36 | # x cooordinates of head 37 | # shape: (T, 1) 38 | rootsx = tf.Variable(rootsx0_values, dtype=dtype) 39 | # y cooordinates of head 40 | rootsy = tf.Variable(rootsy0_values, dtype=dtype) 41 | # z cooordinates of head 42 | rootsz = tf.Variable(rootsz0_values, dtype=dtype) 43 | # x coordinate of angles 44 | # shape: (T, nLimbs) 45 | anglesx = tf.Variable(anglesx0_values, dtype=dtype) 46 | # y coordinate of angles 47 | anglesy = tf.Variable(anglesy0_values, dtype=dtype) 48 | # z coordinate of angles 49 | anglesz = tf.Variable(anglesz0_values, dtype=dtype) 50 | 51 | # target 52 | # shape: (T, nPoints) 53 | tarx = tf.placeholder(dtype=dtype) 54 | tary = tf.placeholder(dtype=dtype) 55 | # likelihood from previous pose estimator 56 | # shape: (T, nPoints) 57 | w = tf.placeholder(dtype=dtype) 58 | 59 | # resultant coordinates. It's a list for now. It will be concatenate into a matrix later 60 | # shape: (T, nPoints) 61 | x = [None for i in range(nPoints)] 62 | y = [None for i in range(nPoints)] 63 | z = [None for i in range(nPoints)] 64 | 65 | # head first 66 | x[0] = rootsx 67 | y[0] = rootsy 68 | z[0] = rootsz 69 | 70 | # now other limbs 71 | i = 0 72 | # for numerical stability of angles normalization 73 | epsilon = 1e-10 74 | for a, b, l in structure: 75 | # limb length 76 | L = tf.exp(lines[l]) 77 | # angle 78 | Ax = anglesx[0:T, i:(i + 1)] 79 | Ay = anglesy[0:T, i:(i + 1)] 80 | Az = anglesz[0:T, i:(i + 1)] 81 | # angle norm 82 | normA = tf.sqrt(tf.square(Ax) + tf.square(Ay) + tf.square(Az)) + epsilon 83 | # new joint position 84 | x[b] = x[a] + L * Ax / normA 85 | y[b] = y[a] + L * Ay / normA 86 | z[b] = z[a] + L * Az / normA 87 | i = i + 1 88 | 89 | # making a matrix from the list 90 | x = tf.concat(x, axis=1) 91 | y = tf.concat(y, axis=1) 92 | z = tf.concat(z, axis=1) 93 | 94 | # weighted MSE 95 | loss = tf.reduce_sum(w * tf.square(x - tarx) + w * tf.square(y - tary)) / (T * nPoints) 96 | 97 | # regularozators 98 | # reg1 is a sum of bones length 99 | reg1 = tf.reduce_sum(tf.exp(lines)) 100 | # reg2 is a square of trajectory length 101 | dx = x[0:(T - 1), 0:nPoints] - x[1:T, 0:nPoints] 102 | dy = y[0:(T - 1), 0:nPoints] - y[1:T, 0:nPoints] 103 | dz = z[0:(T - 1), 0:nPoints] - z[1:T, 0:nPoints] 104 | reg2 = tf.reduce_sum(tf.square(dx) + tf.square(dy) + tf.square(dz)) / ((T - 1) * nPoints) 105 | 106 | optimizeThis = loss + regulatorRates[0] * reg1 + regulatorRates[1] * reg2 107 | 108 | # the backpropagation 109 | optimizer = tf.train.GradientDescentOptimizer(learningRate) 110 | train = optimizer.minimize(optimizeThis) 111 | init = tf.variables_initializer(tf.global_variables()) 112 | sess = tf.Session() 113 | sess.run(init) 114 | for iCycle in range(nCycles): 115 | sess.run(train, {tarx: tarx_values, tary: tary_values, w: w_values}) 116 | print("iCycle = %3d, loss = %e" % (iCycle, sess.run([loss], {tarx: tarx_values, tary: tary_values, w: w_values})[0])) 117 | 118 | # returning final coordinates 119 | return sess.run([x, y, z], {}) 120 | 121 | 122 | 123 | if __name__ == "__main__": 124 | # debug - don't run it 125 | 126 | # 127 | # (0) 128 | # | 129 | # | 130 | # 0 131 | # | 132 | # | 133 | # (2)--1--(1)--1--(3) 134 | # 135 | structure = ( 136 | (0, 1, 0), 137 | (1, 2, 1), 138 | (1, 3, 1), 139 | ) 140 | 141 | T = 3 142 | nBones, nPoints = skeletalModel.structureStats(structure) 143 | nLimbs = len(structure) 144 | 145 | dtype = "float32" 146 | 147 | lines0_values = numpy.zeros((nBones, ), dtype=dtype) 148 | rootsx0_values = numpy.ones((T, 1), dtype=dtype) 149 | rootsy0_values = numpy.ones((T, 1), dtype=dtype) 150 | rootsz0_values = numpy.ones((T, 1), dtype=dtype) 151 | anglesx0_values = numpy.ones((T, nLimbs), dtype=dtype) 152 | anglesy0_values = numpy.ones((T, nLimbs), dtype=dtype) 153 | anglesz0_values = numpy.ones((T, nLimbs), dtype=dtype) 154 | 155 | w_values = numpy.ones((T, nPoints), dtype=dtype) 156 | 157 | tarx_values = numpy.ones((T, nPoints), dtype=dtype) 158 | tary_values = numpy.ones((T, nPoints), dtype=dtype) 159 | 160 | x_values, y_values, z_values = backpropagationBasedFiltering( 161 | lines0_values, 162 | rootsx0_values, 163 | rootsy0_values, 164 | rootsz0_values, 165 | anglesx0_values, 166 | anglesy0_values, 167 | anglesz0_values, 168 | tarx_values, 169 | tary_values, 170 | w_values, 171 | structure, 172 | dtype, 173 | ) 174 | 175 | -------------------------------------------------------------------------------- /3DposeEstimator/skeletalModel.py: -------------------------------------------------------------------------------- 1 | # This function gives our structure of skeletal model 2 | 3 | 4 | def getSkeletalModelStructure(): 5 | # Definition of skeleton model structure: 6 | # The structure is an n-tuple of: 7 | # 8 | # (index of a start point, index of an end point, index of a bone) 9 | # 10 | # E.g., this simple skeletal model 11 | # 12 | # (0) 13 | # | 14 | # | 15 | # 0 16 | # | 17 | # | 18 | # (2)--1--(1)--1--(3) 19 | # | | 20 | # | | 21 | # 2 2 22 | # | | 23 | # | | 24 | # (4) (5) 25 | # 26 | # has this structure: 27 | # 28 | # ( 29 | # (0, 1, 0), 30 | # (1, 2, 1), 31 | # (1, 3, 1), 32 | # (2, 4, 2), 33 | # (3, 5, 2), 34 | # ) 35 | # 36 | # Warning 1: The structure has to be a tree. 37 | # 38 | # Warning 2: The order isn't random. The order is from a root to lists. 39 | # 40 | 41 | return ( 42 | # head 43 | (0, 1, 0), 44 | 45 | # left shoulder 46 | (1, 2, 1), 47 | 48 | # left arm 49 | (2, 3, 2), 50 | (3, 4, 3), 51 | 52 | # right shoulder 53 | (1, 5, 1), 54 | 55 | # right arm 56 | (5, 6, 2), 57 | (6, 7, 3), 58 | 59 | # left hand - wrist 60 | (7, 8, 4), 61 | 62 | # left hand - palm 63 | (8, 9, 5), 64 | (8, 13, 9), 65 | (8, 17, 13), 66 | (8, 21, 17), 67 | (8, 25, 21), 68 | 69 | # left hand - 1st finger 70 | (9, 10, 6), 71 | (10, 11, 7), 72 | (11, 12, 8), 73 | 74 | # left hand - 2nd finger 75 | (13, 14, 10), 76 | (14, 15, 11), 77 | (15, 16, 12), 78 | 79 | # left hand - 3rd finger 80 | (17, 18, 14), 81 | (18, 19, 15), 82 | (19, 20, 16), 83 | 84 | # left hand - 4th finger 85 | (21, 22, 18), 86 | (22, 23, 19), 87 | (23, 24, 20), 88 | 89 | # left hand - 5th finger 90 | (25, 26, 22), 91 | (26, 27, 23), 92 | (27, 28, 24), 93 | 94 | # right hand - wrist 95 | (4, 29, 4), 96 | 97 | # right hand - palm 98 | (29, 30, 5), 99 | (29, 34, 9), 100 | (29, 38, 13), 101 | (29, 42, 17), 102 | (29, 46, 21), 103 | 104 | # right hand - 1st finger 105 | (30, 31, 6), 106 | (31, 32, 7), 107 | (32, 33, 8), 108 | 109 | # right hand - 2nd finger 110 | (34, 35, 10), 111 | (35, 36, 11), 112 | (36, 37, 12), 113 | 114 | # right hand - 3rd finger 115 | (38, 39, 14), 116 | (39, 40, 15), 117 | (40, 41, 16), 118 | 119 | # right hand - 4th finger 120 | (42, 43, 18), 121 | (43, 44, 19), 122 | (44, 45, 20), 123 | 124 | # right hand - 5th finger 125 | (46, 47, 22), 126 | (47, 48, 23), 127 | (48, 49, 24), 128 | ) 129 | 130 | 131 | # Computing number of joints and limbs 132 | def structureStats(structure): 133 | ps = {} 134 | ls = {} 135 | for a, b, l in structure: 136 | ps[a] = "gotcha" 137 | ps[b] = "gotcha" 138 | ls[l] = "gotcha" 139 | return len(ls), len(ps) 140 | -------------------------------------------------------------------------------- /SLSynthesis/accumulators.py: -------------------------------------------------------------------------------- 1 | import sys 2 | 3 | import tensorflow 4 | import tensorflow as tf 5 | 6 | import keras.backend as K 7 | from keras.legacy import interfaces 8 | from keras.optimizers import Optimizer 9 | 10 | 11 | def convert_to_accumulate_gradient_optimizer(orig_optimizer, update_params_frequency, accumulate_sum_or_mean=True): 12 | if update_params_frequency < 1: 13 | raise ValueError('update_params_frequency must be >= 1') 14 | print('update_params_frequency: %s' % update_params_frequency) 15 | print('accumulate_sum_or_mean: %s' % accumulate_sum_or_mean) 16 | orig_get_gradients = orig_optimizer.get_gradients 17 | orig_get_updates = orig_optimizer.get_updates 18 | accumulated_iterations = K.variable(0, dtype='int64', name='accumulated_iterations') 19 | orig_optimizer.accumulated_iterations = accumulated_iterations 20 | 21 | def updated_get_gradients(self, loss, params): 22 | return self.accumulate_gradient_accumulators 23 | 24 | def updated_get_updates(self, loss, params): 25 | self.accumulate_gradient_accumulators = [K.zeros(K.int_shape(p), dtype=K.dtype(p)) for p in params] 26 | updates_accumulated_iterations = K.update_add(accumulated_iterations, 1) 27 | new_grads = orig_get_gradients(loss, params) 28 | if not accumulate_sum_or_mean: 29 | new_grads = [g / K.cast(update_params_frequency, K.dtype(g)) for g in new_grads] 30 | self.updated_grads = [K.update_add(p, g) for p, g in zip(self.accumulate_gradient_accumulators, new_grads)] 31 | def update_function(): 32 | with tensorflow.control_dependencies(orig_get_updates(loss, params)): 33 | reset_grads = [K.update(p, K.zeros(K.int_shape(p), dtype=K.dtype(p))) for p in self.accumulate_gradient_accumulators] 34 | return tensorflow.group(*(reset_grads + [updates_accumulated_iterations])) 35 | def just_store_function(): 36 | return tensorflow.group(*[updates_accumulated_iterations]) 37 | 38 | update_switch = K.equal((updates_accumulated_iterations) % update_params_frequency, 0) 39 | 40 | with tensorflow.control_dependencies(self.updated_grads): 41 | self.updates = [K.switch(update_switch, update_function, just_store_function)] 42 | return self.updates 43 | 44 | orig_optimizer.get_gradients = updated_get_gradients.__get__(orig_optimizer, type(orig_optimizer)) 45 | orig_optimizer.get_updates = updated_get_updates.__get__(orig_optimizer, type(orig_optimizer)) 46 | 47 | 48 | class AdamAccumulate(Optimizer): 49 | 50 | def __init__(self, lr=0.001, beta_1=0.9, beta_2=0.999, epsilon=None, decay=0.0, amsgrad=False, accum_iters=1, **kwargs): 51 | if accum_iters < 1: 52 | raise ValueError('accum_iters must be >= 1') 53 | super(AdamAccumulate, self).__init__(**kwargs) 54 | with K.name_scope(self.__class__.__name__): 55 | self.iterations = K.variable(0, dtype='int64', name='iterations') 56 | self.lr_ = K.variable(value=lr, name='lr') 57 | self.beta_1 = K.variable(beta_1, name='beta_1') 58 | self.beta_2 = K.variable(beta_2, name='beta_2') 59 | self.decay = K.variable(decay, name='decay') 60 | if epsilon is None: 61 | epsilon = K.epsilon() 62 | self.epsilon = epsilon 63 | self.initial_decay = decay 64 | self.amsgrad = amsgrad 65 | self.accum_iters = K.variable(accum_iters, K.dtype(self.iterations)) 66 | self.accum_iters_float = K.cast(self.accum_iters, K.floatx()) 67 | 68 | @interfaces.legacy_get_updates_support 69 | def get_updates(self, loss, params): 70 | grads = self.get_gradients(loss, params) 71 | self.updates = [K.update_add(self.iterations, 1)] 72 | 73 | lr = self.lr_ 74 | 75 | completed_updates = K.cast(tf.floordiv(self.iterations, self.accum_iters), K.floatx()) 76 | 77 | if self.initial_decay > 0: 78 | lr = lr * (1. / (1. + self.decay * completed_updates)) 79 | 80 | t = completed_updates + 1 81 | 82 | lr_t = lr * (K.sqrt(1. - K.pow(self.beta_2, t)) / (1. - K.pow(self.beta_1, t))) 83 | 84 | # self.iterations incremented after processing a batch 85 | # batch: 1 2 3 4 5 6 7 8 9 86 | # self.iterations: 0 1 2 3 4 5 6 7 8 87 | # update_switch = 1: x x (if accum_iters=4) 88 | update_switch = K.equal((self.iterations + 1) % self.accum_iters, 0) 89 | update_switch = K.cast(update_switch, K.floatx()) 90 | 91 | ms = [K.zeros(K.int_shape(p), dtype=K.dtype(p)) for p in params] 92 | vs = [K.zeros(K.int_shape(p), dtype=K.dtype(p)) for p in params] 93 | gs = [K.zeros(K.int_shape(p), dtype=K.dtype(p)) for p in params] 94 | 95 | if self.amsgrad: 96 | vhats = [K.zeros(K.int_shape(p), dtype=K.dtype(p)) for p in params] 97 | else: 98 | vhats = [K.zeros((1, )) for _ in params] 99 | 100 | self.weights = [self.iterations] + ms + vs + vhats 101 | 102 | for p, g, m, v, vhat, tg in zip(params, grads, ms, vs, vhats, gs): 103 | 104 | sum_grad = tg + g 105 | #reest_grad = sum_grad 106 | avg_grad = sum_grad / self.accum_iters_float 107 | reest_grad = avg_grad 108 | 109 | m_t = (self.beta_1 * m) + (1. - self.beta_1) * reest_grad 110 | v_t = (self.beta_2 * v) + (1. - self.beta_2) * K.square(reest_grad) 111 | 112 | if self.amsgrad: 113 | vhat_t = K.maximum(vhat, v_t) 114 | p_t = p - lr_t * m_t / (K.sqrt(vhat_t) + self.epsilon) 115 | self.updates.append(K.update(vhat, (1 - update_switch) * vhat + update_switch * vhat_t)) 116 | else: 117 | p_t = p - lr_t * m_t / (K.sqrt(v_t) + self.epsilon) 118 | 119 | self.updates.append(K.update(m, (1 - update_switch) * m + update_switch * m_t)) 120 | self.updates.append(K.update(v, (1 - update_switch) * v + update_switch * v_t)) 121 | self.updates.append(K.update(tg, (1 - update_switch) * sum_grad)) 122 | new_p = p_t 123 | 124 | # Apply constraints. 125 | if getattr(p, 'constraint', None) is not None: 126 | new_p = p.constraint(new_p) 127 | 128 | self.updates.append(K.update(p, (1 - update_switch) * p + update_switch * new_p)) 129 | return self.updates 130 | 131 | def get_config(self): 132 | config = {'lr': float(K.get_value(self.lr_)), 133 | 'beta_1': float(K.get_value(self.beta_1)), 134 | 'beta_2': float(K.get_value(self.beta_2)), 135 | 'decay': float(K.get_value(self.decay)), 136 | 'epsilon': self.epsilon, 137 | 'amsgrad': self.amsgrad} 138 | base_config = super(AdamAccumulate, self).get_config() 139 | return dict(list(base_config.items()) + list(config.items())) 140 | -------------------------------------------------------------------------------- /SLSynthesis/cleaning.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf8 -*- 2 | 3 | import re 4 | import os 5 | import shutil 6 | import codecs 7 | import math 8 | import copy 9 | import h5py 10 | import random 11 | 12 | import numpy 13 | 14 | 15 | def computeDist(a, b): 16 | La = a.shape[0] 17 | Lb = b.shape[0] 18 | dim = a.shape[1] 19 | sumaa = numpy.sum(a * a, axis=1, keepdims=True) 20 | sumbb = numpy.sum(b * b, axis=1, keepdims=True) 21 | D2 = numpy.dot(sumaa, numpy.ones((1, Lb), dtype=a.dtype)) - 2 * numpy.dot(a, b.T) + numpy.dot(numpy.ones((La, 1), dtype=a.dtype), sumbb.T) 22 | return D2 23 | 24 | 25 | def loadMatrixFloatText(fname, dtype="float32"): 26 | M = [] 27 | f = open(fname) 28 | for line in f: 29 | M.append([float(x) for x in line.strip().split()]) 30 | f.close() 31 | M = numpy.asarray(M, dtype=dtype) 32 | return M 33 | 34 | 35 | if __name__ == "__main__": 36 | fnameIn = "data/keypoints-filt-deltas-norm.h5" 37 | mu = loadMatrixFloatText("temp/mu", dtype="float32") 38 | hfIn = h5py.File(fnameIn, "r") 39 | f = open("data/clear.list", "w") 40 | for key in hfIn: 41 | x = numpy.array(hfIn.get(key)) 42 | delta = x - mu 43 | d = numpy.mean(numpy.sum(delta * delta, axis=1)) 44 | print(d) 45 | if d < 150.0: 46 | f.write("%s\n" % key) 47 | f.close() 48 | hfIn.close() 49 | -------------------------------------------------------------------------------- /SLSynthesis/data/info: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /SLSynthesis/data/info_spk: -------------------------------------------------------------------------------- 1 | rec00000 m 2 | rec00001 m 3 | rec00002 m 4 | rec00003 s 5 | rec00004 h 6 | rec00005 h 7 | rec00006 v 8 | rec00007 h 9 | rec00008 t 10 | rec00009 s 11 | rec00010 s 12 | rec00011 s 13 | rec00012 m 14 | rec00013 m 15 | rec00014 s 16 | rec00015 m 17 | rec00016 h 18 | -------------------------------------------------------------------------------- /SLSynthesis/data/info_today: -------------------------------------------------------------------------------- 1 | rec00000 utery 2 | rec00001 pondeli 3 | rec00002 ctvrtek 4 | rec00003 utery 5 | rec00004 utery 6 | rec00005 streda 7 | rec00006 ctvrtek 8 | rec00007 pondeli 9 | rec00008 streda 10 | rec00009 sobota 11 | rec00010 sobota 12 | rec00011 patek 13 | rec00012 ctvrtek 14 | rec00013 ctvrtek 15 | rec00014 patek 16 | rec00015 streda 17 | rec00016 utery 18 | -------------------------------------------------------------------------------- /SLSynthesis/data/recs.h5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gopeith/SignLanguageProcessing/a312367087b5ebf01a435ef513db3fe80d8438a2/SLSynthesis/data/recs.h5 -------------------------------------------------------------------------------- /SLSynthesis/data/stop.list: -------------------------------------------------------------------------------- 1 | rec00014 2 | -------------------------------------------------------------------------------- /SLSynthesis/data/text: -------------------------------------------------------------------------------- 1 | rec00000 0 323 50 324 325 673 71 1875 207 678 256 329 13 3476 496 207 676 8779 1110 5 327 177 682 526 2089 5 333 13 286 10155 1110 71 685 177 1109 334 55 43 336 1356 325 688 339 71 342 343 261 339 1024 345 690 1835 349 350 55 56 2 | rec00001 0 323 50 324 325 673 327 325 326 71 1875 207 677 4210 71 342 71 498 5 686 177 8779 71 1027 1110 681 676 5 2089 5 333 13 4347 375 86 713 732 1110 4212 256 7569 55 43 336 394 325 429 339 13 340 341 71 342 2049 1757 155 339 55 56 0 101 50 348 40 1024 345 13 346 347 1835 349 350 1906 55 56 3 | rec00002 0 324 325 3745 13 286 675 676 326 71 498 5 686 327 678 395 712 713 4210 55 43 336 57 325 1087 341 71 342 343 1757 154 339 55 56 0 345 13 346 347 50 820 619 349 350 55 56 4 | rec00003 0 323 50 673 145 324 71 1875 207 677 678 71 686 675 676 4662 105 6580 4664 50 35 160 327 328 177 71 674 1630 55 43 336 57 325 805 339 13 340 341 71 342 343 383 339 55 56 0 345 13 216 346 347 690 71 1115 55 56 5 | rec00004 0 323 50 324 325 326 151 152 2291 325 673 5 327 35 328 1110 71 342 6145 55 43 336 154 325 57 339 13 286 676 145 1279 71 342 343 1757 383 55 56 0 101 50 40 1024 345 348 349 350 55 56 6 | rec00005 0 323 50 324 325 673 676 145 326 327 4662 332 5 1422 4210 55 43 336 1757 154 145 1844 383 339 71 342 2049 1757 429 339 71 11653 2049 1757 155 339 55 56 0 261 11291 345 13 346 347 50 820 619 349 350 55 56 7 | rec00006 0 323 50 675 326 325 324 151 152 11047 1109 334 55 43 336 344 325 2292 339 71 342 343 1007 71 3938 145 344 339 55 56 0 101 50 345 13 216 346 347 349 350 55 56 8 | rec00007 0 323 50 2097 325 326 330 35 160 327 328 332 256 545 334 333 466 127 71 685 50 334 2398 5 101 1521 105 329 256 682 55 43 336 50 192 684 325 313 339 71 685 160 50 676 177 1047 339 13 43 4347 146 50 336 192 337 325 338 339 55 56 0 345 13 346 347 50 820 619 349 350 55 56 9 | rec00008 0 323 50 2097 325 326 330 35 328 332 55 43 336 35 50 1112 192 1775 5 1275 339 71 342 343 261 339 71 3938 35 9020 24057 9242 145 71 1775 339 55 56 0 101 50 40 345 216 346 347 348 349 350 55 56 10 | rec00009 0 323 50 675 4211 334 676 329 151 152 246 484 673 5 678 526 2040 121 329 13 286 71 342 8779 4210 55 43 336 155 325 366 339 13 340 341 71 342 343 394 55 56 0 345 13 346 347 50 820 619 1367 350 55 56 11 | rec00010 0 323 50 324 325 673 675 71 674 325 326 71 1875 207 678 256 329 676 682 71 1876 5 13 1631 327 177 1016 105 1362 333 246 685 1878 1110 5 11047 1109 334 55 43 336 13 286 2292 325 338 339 71 1630 5 13 1631 338 325 1366 71 342 343 1775 13 3091 5 689 343 337 339 55 56 0 101 50 345 13 216 346 347 349 350 55 56 12 | rec00011 0 177 13 2098 71 8934 166 216 1016 13227 336 11144 145 375 1757 1333 339 13 31292 31293 1507 145 1757 1364 1013 1015 177 312 4210 38 35 328 13 1920 13 445 2054 5 5144 946 5 71 1873 246 1114 5422 186 6045 4871 146 71 273 1630 5 1631 4968 160 145 261 984 5591 55 56 0 323 50 673 145 324 71 674 675 326 246 685 105 4210 676 8779 2089 5 333 13 286 1110 681 676 13 4347 375 2966 713 4212 333 35 160 2735 13306 55 43 336 1757 57 145 1757 154 339 71 342 343 1757 366 55 56 0 101 50 40 345 13 216 346 347 348 349 350 55 56 13 | rec00012 0 323 1684 2624 14373 676 50 673 105 545 334 3212 13 6736 496 207 55 43 336 35 9242 71 1757 429 325 394 339 327 145 71 155 339 13 340 341 71 342 50 343 394 101 466 50 40 345 216 346 347 348 349 350 55 56 14 | rec00013 0 323 50 2097 325 326 2089 71 342 61 2291 6912 334 327 329 256 177 682 55 43 336 1047 325 383 6569 339 71 342 343 684 55 56 0 345 13 346 347 50 820 619 349 350 55 56 15 | rec00014 0 323 50 324 325 673 71 674 325 326 2089 5 333 50 335 334 732 71 2290 4296 330 35 327 328 332 5254 4662 55 43 336 35 50 1112 192 57 325 805 339 71 342 343 154 339 55 56 0 101 50 345 13 216 346 347 349 350 55 56 16 | rec00015 0 50 324 325 673 71 484 5 685 1028 324 325 326 327 71 686 676 678 395 86 713 4210 55 43 336 366 325 261 339 13 340 341 71 342 343 394 71 11653 145 383 339 55 56 0 101 50 40 1024 345 348 1835 2673 350 55 56 17 | rec00016 0 323 50 326 1274 2290 2097 675 327 332 71 1630 5 13 1631 676 177 545 334 55 43 336 1007 325 344 339 71 1006 5 674 1630 5 1631 145 688 339 13 340 341 71 342 343 805 339 55 56 0 40 1024 345 50 101 348 11607 35 1198 323 2673 350 55 56 18 | -------------------------------------------------------------------------------- /SLSynthesis/data/training.list: -------------------------------------------------------------------------------- 1 | rec00009 2 | rec00013 3 | rec00006 4 | rec00003 5 | rec00016 6 | rec00002 7 | rec00010 8 | rec00001 9 | rec00005 10 | rec00008 11 | rec00011 12 | rec00007 13 | rec00004 14 | rec00012 15 | rec00015 16 | rec00000 17 | -------------------------------------------------------------------------------- /SLSynthesis/demo_evaluation.py: -------------------------------------------------------------------------------- 1 | import h5py 2 | import re 3 | import random 4 | import math 5 | 6 | import numpy 7 | 8 | import tensorflow as tf 9 | import keras 10 | from keras.models import Sequential, Model 11 | from keras.layers import Dense, Add, Dropout 12 | from keras import backend as K 13 | from keras.layers import Layer, Input, Bidirectional, LSTM, GRU, Embedding, Multiply, BatchNormalization, Concatenate, Add, Lambda, Subtract 14 | from keras.utils import to_categorical 15 | 16 | from modeling import * 17 | from data import * 18 | from training import * 19 | import accumulators 20 | from dtw import dtw, fakedtw 21 | from utils import * 22 | 23 | from model_FF_05 import * 24 | 25 | # loading model and evalueating on given set 26 | 27 | 28 | list_eval = loadList("data/stop.list") 29 | 30 | fname_model_weights = "models/%s.h5" % model_ID 31 | 32 | model4output.load_weights(fname_model_weights) 33 | 34 | errors, example, outputs = eval_dataset( 35 | list_eval, 36 | loader, 37 | model4output, 38 | model4example, 39 | funs_eval=len(ys)*[compute_errorMSEDTW] 40 | ) 41 | 42 | print(errors) 43 | -------------------------------------------------------------------------------- /SLSynthesis/demo_training.py: -------------------------------------------------------------------------------- 1 | import h5py 2 | import re 3 | import random 4 | import math 5 | 6 | import numpy 7 | 8 | import tensorflow as tf 9 | import keras 10 | from keras.models import Sequential, Model 11 | from keras.layers import Dense, Add, Dropout 12 | from keras import backend as K 13 | from keras.layers import Layer, Input, Bidirectional, LSTM, GRU, Embedding, Multiply, BatchNormalization, Concatenate, Add, Lambda, Subtract 14 | from keras.utils import to_categorical 15 | 16 | from modeling import * 17 | from data import * 18 | from training import * 19 | import accumulators 20 | from dtw import dtw, fakedtw 21 | from utils import * 22 | 23 | from model_FF_05 import * 24 | 25 | 26 | # file with the best (on stop set) results 27 | fname_model_weights = "models/%s.h5" % model_ID 28 | 29 | # file with an example of the chosen output 30 | fname_example = "temp/%s-example.txt" % model_ID 31 | 32 | list_training = loadList("data/training.list") 33 | list_stop = loadList("data/stop.list") 34 | 35 | 36 | n_files_in_batch = 128 37 | if n_files_in_batch > len(list_training): 38 | n_files_in_batch = len(list_training) # carefull 39 | 40 | # how often is model evaluated using the stop set. 41 | # 1 means after every reestimation 42 | n_batches = 1 43 | 44 | # when stop training 45 | max_steps = 100000 46 | 47 | optimizer = accumulators.AdamAccumulate(lr=0.001, decay=0.0, accum_iters=n_files_in_batch) 48 | 49 | wMSE = 1.0 # weight for MSE loss 50 | wATT = 1.0 / 98.0 # weight for our attention based loss loss 51 | 52 | training( 53 | model4training=model4training, 54 | model4output=model4output, 55 | model4example=model4example, 56 | datamod4training=synch, 57 | loss=len(ys)*[MSE]+len(ys_att)*[lossAttMSE], 58 | wloss=len(ys)*[wMSE]+len(ys_att)*[wATT], 59 | loader=loader, 60 | list_training=list_training, 61 | list_stop=list_stop, 62 | mask4print="%.4f", 63 | optimizer=optimizer, 64 | testing_interval=n_batches*n_files_in_batch, 65 | fname_example=fname_example, 66 | fname_model_weights=fname_model_weights, 67 | funs_eval=len(ys)*[compute_errorMSEDTW], 68 | max_steps=max_steps, 69 | ) 70 | 71 | hfTar.close() 72 | -------------------------------------------------------------------------------- /SLSynthesis/demo_training_FF_20200603-clear.py: -------------------------------------------------------------------------------- 1 | import h5py 2 | import re 3 | import random 4 | import math 5 | 6 | import numpy 7 | 8 | import tensorflow as tf 9 | import keras 10 | from keras.models import Sequential, Model 11 | from keras.layers import Dense, Add, Dropout 12 | from keras import backend as K 13 | from keras.layers import Layer, Input, Bidirectional, LSTM, GRU, Embedding, Multiply, BatchNormalization, Concatenate, Add, Lambda, Subtract 14 | from keras.utils import to_categorical 15 | 16 | from modeling import * 17 | from data import * 18 | from training import * 19 | import accumulators 20 | from dtw import dtw, fakedtw 21 | from utils import * 22 | 23 | from model_FF_20200603 import * 24 | 25 | 26 | # file with the best (on stop set) results 27 | fname_model_weights = "models/%s-clear.h5" % model_ID 28 | 29 | # file with an example of the chosen output 30 | fname_example = "temp/%s-clear-example.txt" % model_ID 31 | 32 | list_training = loadList("data/training.list") 33 | list_stop = loadList("data/stop.list") 34 | 35 | # cleaning sets 36 | list_clear = loadList("data/clear.list") 37 | list_training = [key for key in list_training if key in list_clear] 38 | list_stop = [key for key in list_stop if key in list_clear] 39 | 40 | random.shuffle(list_training) 41 | #random.shuffle(list_stop) 42 | 43 | # for debuging 44 | #list_stop = list_stop[0:64] 45 | #list_stop = list_stop[0:1] 46 | 47 | n_files_in_batch = 512 48 | if n_files_in_batch > len(list_training): 49 | n_files_in_batch = len(list_training) # carefull 50 | 51 | # how often is model evaluated using the stop set. 52 | # 1 means after every reestimation 53 | n_batches = 1 54 | 55 | # when stop training 56 | max_steps = 10 * len(list_training) 57 | 58 | optimizer = accumulators.AdamAccumulate(lr=0.001, decay=0.0, accum_iters=n_files_in_batch) 59 | 60 | wMSE = 1.0 # weight for MSE loss 61 | wATT = 1.0 / 98.0 # weight for our attention based loss loss 62 | 63 | training( 64 | model4training=model4training, 65 | model4output=model4output, 66 | model4example=model4example, 67 | datamod4training=synch, 68 | loss=len(ys)*[MSE]+len(ys_att)*[lossAttMSE], 69 | wloss=len(ys)*[wMSE]+len(ys_att)*[wATT], 70 | loader=loader, 71 | list_training=list_training, 72 | list_stop=list_stop, 73 | mask4print="%.4f", 74 | optimizer=optimizer, 75 | testing_interval=n_batches*n_files_in_batch, 76 | fname_example=fname_example, 77 | fname_model_weights=fname_model_weights, 78 | funs_eval=len(ys)*[compute_errorMSEDTW], 79 | max_steps=max_steps, 80 | ) 81 | 82 | hfTar.close() 83 | -------------------------------------------------------------------------------- /SLSynthesis/demo_training_FF_20200603.py: -------------------------------------------------------------------------------- 1 | import h5py 2 | import re 3 | import random 4 | import math 5 | 6 | import numpy 7 | 8 | import tensorflow as tf 9 | import keras 10 | from keras.models import Sequential, Model 11 | from keras.layers import Dense, Add, Dropout 12 | from keras import backend as K 13 | from keras.layers import Layer, Input, Bidirectional, LSTM, GRU, Embedding, Multiply, BatchNormalization, Concatenate, Add, Lambda, Subtract 14 | from keras.utils import to_categorical 15 | 16 | from modeling import * 17 | from data import * 18 | from training import * 19 | import accumulators 20 | from dtw import dtw, fakedtw 21 | from utils import * 22 | 23 | from model_FF_20200603 import * 24 | 25 | 26 | # file with the best (on stop set) results 27 | fname_model_weights = "models/%s.h5" % model_ID 28 | 29 | # file with an example of the chosen output 30 | fname_example = "temp/%s-example.txt" % model_ID 31 | 32 | list_training = loadList("data/training.list") 33 | list_stop = loadList("data/stop.list") 34 | 35 | # not cleaning sets 36 | #list_clear = loadList("data/clear.list") 37 | #list_training = [key for key in list_training if key in list_clear] 38 | #list_stop = [key for key in list_stop if key in list_clear] 39 | 40 | random.shuffle(list_training) 41 | #random.shuffle(list_stop) 42 | 43 | # for debuging 44 | #list_stop = list_stop[0:64] 45 | #list_stop = list_stop[0:1] 46 | 47 | n_files_in_batch = 512 48 | if n_files_in_batch > len(list_training): 49 | n_files_in_batch = len(list_training) # carefull 50 | 51 | # how often is model evaluated using the stop set. 52 | # 1 means after every reestimation 53 | n_batches = 1 54 | 55 | # when stop training 56 | max_steps = 10 * len(list_training) 57 | 58 | optimizer = accumulators.AdamAccumulate(lr=0.001, decay=0.0, accum_iters=n_files_in_batch) 59 | 60 | wMSE = 1.0 # weight for MSE loss 61 | wATT = 1.0 / 98.0 # weight for our attention based loss loss 62 | 63 | training( 64 | model4training=model4training, 65 | model4output=model4output, 66 | model4example=model4example, 67 | datamod4training=synch, 68 | loss=len(ys)*[MSE]+len(ys_att)*[lossAttMSE], 69 | wloss=len(ys)*[wMSE]+len(ys_att)*[wATT], 70 | loader=loader, 71 | list_training=list_training, 72 | list_stop=list_stop, 73 | mask4print="%.4f", 74 | optimizer=optimizer, 75 | testing_interval=n_batches*n_files_in_batch, 76 | fname_example=fname_example, 77 | fname_model_weights=fname_model_weights, 78 | funs_eval=len(ys)*[compute_errorMSEDTW], 79 | max_steps=max_steps, 80 | ) 81 | 82 | hfTar.close() 83 | -------------------------------------------------------------------------------- /SLSynthesis/demo_training_FF_20200604-clear.py: -------------------------------------------------------------------------------- 1 | import h5py 2 | import re 3 | import random 4 | import math 5 | 6 | import numpy 7 | 8 | import tensorflow as tf 9 | import keras 10 | from keras.models import Sequential, Model 11 | from keras.layers import Dense, Add, Dropout 12 | from keras import backend as K 13 | from keras.layers import Layer, Input, Bidirectional, LSTM, GRU, Embedding, Multiply, BatchNormalization, Concatenate, Add, Lambda, Subtract 14 | from keras.utils import to_categorical 15 | 16 | from modeling import * 17 | from data import * 18 | from training import * 19 | import accumulators 20 | from dtw import dtw, fakedtw 21 | from utils import * 22 | 23 | from model_FF_20200604 import * 24 | 25 | 26 | # file with the best (on stop set) results 27 | fname_model_weights = "models/%s-clear.h5" % model_ID 28 | 29 | # file with an example of the chosen output 30 | fname_example = "temp/%s-clear-example.txt" % model_ID 31 | 32 | list_training = loadList("data/training.list") 33 | list_stop = loadList("data/stop.list") 34 | 35 | # cleaning sets 36 | list_clear = loadList("data/clear.list") 37 | list_training = [key for key in list_training if key in list_clear] 38 | list_stop = [key for key in list_stop if key in list_clear] 39 | 40 | random.shuffle(list_training) 41 | random.shuffle(list_stop) 42 | 43 | # for debuging 44 | list_stop = list_stop[0:256] 45 | #list_stop = list_stop[0:1] 46 | 47 | n_files_in_batch = 256 48 | if n_files_in_batch > len(list_training): 49 | n_files_in_batch = len(list_training) # carefull 50 | 51 | # how often is model evaluated using the stop set. 52 | # 1 means after every reestimation 53 | n_batches = 1 54 | 55 | # when stop training 56 | max_steps = 10 * len(list_training) 57 | 58 | optimizer = accumulators.AdamAccumulate(lr=0.001, decay=0.0, accum_iters=n_files_in_batch) 59 | 60 | wMSE = 0.2 # weight for MSE loss 61 | wATT = 2.0 / 98.0 # weight for our attention based loss loss 62 | 63 | training( 64 | model4training=model4training, 65 | model4output=model4output, 66 | model4example=model4example, 67 | datamod4training=synch, 68 | loss=len(ys)*[MSE]+len(ys_att)*[lambda y_true, y_pred: lossAttMSE(y_true, y_pred, thr=0.2)], 69 | wloss=len(ys)*[wMSE]+len(ys_att)*[wATT], 70 | loader=loader, 71 | list_training=list_training, 72 | list_stop=list_stop, 73 | mask4print="%.4f", 74 | optimizer=optimizer, 75 | testing_interval=n_batches*n_files_in_batch, 76 | fname_example=fname_example, 77 | fname_model_weights=fname_model_weights, 78 | funs_eval=len(ys)*[compute_errorMSEDTW], 79 | max_steps=max_steps, 80 | ) 81 | 82 | hfTar.close() 83 | -------------------------------------------------------------------------------- /SLSynthesis/demo_training_FF_20200604.py: -------------------------------------------------------------------------------- 1 | import h5py 2 | import re 3 | import random 4 | import math 5 | 6 | import numpy 7 | 8 | import tensorflow as tf 9 | import keras 10 | from keras.models import Sequential, Model 11 | from keras.layers import Dense, Add, Dropout 12 | from keras import backend as K 13 | from keras.layers import Layer, Input, Bidirectional, LSTM, GRU, Embedding, Multiply, BatchNormalization, Concatenate, Add, Lambda, Subtract 14 | from keras.utils import to_categorical 15 | 16 | from modeling import * 17 | from data import * 18 | from training import * 19 | import accumulators 20 | from dtw import dtw, fakedtw 21 | from utils import * 22 | 23 | from model_FF_20200604 import * 24 | 25 | 26 | # file with the best (on stop set) results 27 | fname_model_weights = "models/%s.h5" % model_ID 28 | 29 | # file with an example of the chosen output 30 | fname_example = "temp/%s-example.txt" % model_ID 31 | 32 | list_training = loadList("data/training.list") 33 | list_stop = loadList("data/stop.list") 34 | 35 | # cleaning sets 36 | #list_clear = loadList("data/clear.list") 37 | #list_training = [key for key in list_training if key in list_clear] 38 | #list_stop = [key for key in list_stop if key in list_clear] 39 | 40 | random.shuffle(list_training) 41 | random.shuffle(list_stop) 42 | 43 | # for debuging 44 | list_stop = list_stop[0:256] 45 | #list_stop = list_stop[0:1] 46 | 47 | n_files_in_batch = 256 48 | if n_files_in_batch > len(list_training): 49 | n_files_in_batch = len(list_training) # carefull 50 | 51 | # how often is model evaluated using the stop set. 52 | # 1 means after every reestimation 53 | n_batches = 1 54 | 55 | # when stop training 56 | max_steps = 10 * len(list_training) 57 | 58 | optimizer = accumulators.AdamAccumulate(lr=0.001, decay=0.0, accum_iters=n_files_in_batch) 59 | 60 | wMSE = 0.2 # weight for MSE loss 61 | wATT = 2.0 / 98.0 # weight for our attention based loss loss 62 | 63 | training( 64 | model4training=model4training, 65 | model4output=model4output, 66 | model4example=model4example, 67 | datamod4training=synch, 68 | loss=len(ys)*[MSE]+len(ys_att)*[lambda y_true, y_pred: lossAttMSE(y_true, y_pred, thr=0.2)], 69 | wloss=len(ys)*[wMSE]+len(ys_att)*[wATT], 70 | loader=loader, 71 | list_training=list_training, 72 | list_stop=list_stop, 73 | mask4print="%.4f", 74 | optimizer=optimizer, 75 | testing_interval=n_batches*n_files_in_batch, 76 | fname_example=fname_example, 77 | fname_model_weights=fname_model_weights, 78 | funs_eval=len(ys)*[compute_errorMSEDTW], 79 | max_steps=max_steps, 80 | ) 81 | 82 | hfTar.close() 83 | -------------------------------------------------------------------------------- /SLSynthesis/dtw.py: -------------------------------------------------------------------------------- 1 | from datetime import datetime 2 | 3 | import numpy 4 | 5 | 6 | def min(l): 7 | m = None 8 | am = None 9 | for i in range(len(l)): 10 | if m is None or m > l[i]: 11 | m = l[i] 12 | am = i 13 | return m, am 14 | 15 | 16 | def min3(a, b, c): 17 | if a > b: 18 | if b > c: 19 | return c, 2 20 | return b, 1 21 | if a > c: 22 | return c, 2 23 | return a, 0 24 | 25 | 26 | def isOut(pa, pb, La, Lb, t=0.1): 27 | qa = pa / float(La) 28 | qb = pb / float(Lb) 29 | return qa - qb > t or qb - qa > t 30 | 31 | 32 | def dtw(a, b, maxDelta=1.0, D=None): 33 | dtype = a.dtype 34 | 35 | inf = 1e+30 36 | 37 | La = a.shape[0] 38 | Lb = b.shape[0] 39 | dim = a.shape[1] 40 | 41 | phi = inf + numpy.zeros((La, Lb), dtype=dtype) 42 | psi = -1 + numpy.zeros((La, Lb), dtype="int64") 43 | 44 | if D is None: 45 | # computing distance matrix 46 | sumaa = numpy.sum(a * a, axis=1, keepdims=True) 47 | sumbb = numpy.sum(b * b, axis=1, keepdims=True) 48 | D2 = numpy.dot(sumaa, numpy.ones((1, Lb), dtype=dtype)) - 2 * numpy.dot(a, b.T) + numpy.dot(numpy.ones((La, 1), dtype=dtype), sumbb.T) 49 | D = D2 50 | 51 | LbLa = float(Lb) / float(La) 52 | 53 | for ia in range(La): 54 | #for ib in range(Lb): 55 | ibMin = int(LbLa * (ia + 1) - maxDelta * Lb - 1) 56 | ibMax = int(LbLa * (ia + 1) + maxDelta * Lb - 1) 57 | if ibMin < 0: 58 | ibMin = 0 59 | if ibMax > Lb - 1: 60 | ibMax = Lb - 1 61 | for ib in range(ibMin, ibMax + 1): 62 | if ia == 0 and ib == 0: 63 | phi[ia, ib] = D[ia, ib] 64 | else: 65 | #delta = ((ia + 1.0) / float(La)) - ((ib + 1.0) / float(Lb)) 66 | #if delta > maxDelta or delta < -maxDelta: 67 | # continue 68 | if ia - 1 >= 0 and ib - 1 >= 0: 69 | if phi[ia, ib] > phi[ia - 1, ib - 1]: 70 | phi[ia, ib] = phi[ia - 1, ib - 1] 71 | psi[ia, ib] = 3 72 | if ia - 1 >= 0: 73 | if phi[ia, ib] > phi[ia - 1, ib]: 74 | phi[ia, ib] = phi[ia - 1, ib] 75 | psi[ia, ib] = 1 76 | if ib - 1 >= 0: 77 | if phi[ia, ib] > phi[ia, ib - 1]: 78 | phi[ia, ib] = phi[ia, ib - 1] 79 | psi[ia, ib] = 2 80 | if phi[ia, ib] < inf: 81 | phi[ia, ib] = phi[ia, ib] + D[ia, ib] 82 | 83 | 84 | point = (La - 1, Lb - 1) 85 | patha = [point[0]] 86 | pathb = [point[1]] 87 | while True: 88 | stepType = psi[point[0], point[1]] 89 | if stepType <= 0: 90 | break 91 | if stepType == 1: 92 | point = (point[0] - 1, point[1]) 93 | if stepType == 2: 94 | point = (point[0], point[1] - 1) 95 | if stepType == 3: 96 | point = (point[0] - 1, point[1] - 1) 97 | patha.insert(0, point[0]) 98 | pathb.insert(0, point[1]) 99 | 100 | return phi[La - 1, Lb - 1], patha, pathb 101 | 102 | 103 | 104 | def fakedtw(a, b, maxDelta="whatever"): 105 | if a.shape[0] > b.shape[0]: 106 | T = a.shape[0] 107 | else: 108 | T = b.shape[0] 109 | patha = numpy.zeros((T, ), dtype="int64") 110 | pathb = numpy.zeros((T, ), dtype="int64") 111 | for t in range(T): 112 | patha[t] = int(((a.shape[0] - 1) * t) / (T - 1)) 113 | pathb[t] = int(((b.shape[0] - 1) * t) / (T - 1)) 114 | return None, patha, pathb 115 | 116 | 117 | -------------------------------------------------------------------------------- /SLSynthesis/model_FF_05.py: -------------------------------------------------------------------------------- 1 | import h5py 2 | import re 3 | import random 4 | import math 5 | 6 | import numpy 7 | 8 | import tensorflow as tf 9 | import keras 10 | from keras.models import Sequential, Model 11 | from keras.layers import Dense, Add, Dropout 12 | from keras import backend as K 13 | from keras.layers import Layer, Input, Bidirectional, LSTM, GRU, Embedding, Multiply, BatchNormalization, Concatenate, Add, Lambda, Subtract 14 | from keras.utils import to_categorical 15 | 16 | from modeling import * 17 | from data import * 18 | from dtw import dtw, fakedtw 19 | from utils import * 20 | from training import * 21 | 22 | 23 | # filenames for weights, etc. are derived from this ID 24 | model_ID = "FF_05" 25 | 26 | dtype = "float32" 27 | 28 | n_steps = 7 # number of frames for each word 29 | dim_y = 98 # ouput size 30 | dim_emb = 64 # embeddings size 31 | dim_rep = 1024 # "repository" size 32 | dim_h = 512 # hidden layers size 33 | 34 | Nh = 5 # number of modules in the translator 35 | 36 | if True: 37 | # avoiding using multiple CPUs (not always succesfully) and memory leaks in tensorflow 38 | conf = K.tf.ConfigProto( 39 | device_count={'CPU': 1}, 40 | intra_op_parallelism_threads=1, 41 | inter_op_parallelism_threads=1, 42 | ) 43 | K.set_session(K.tf.Session(config=conf)) 44 | 45 | # make experiment deterministic 46 | random.seed(123) 47 | numpy.random.seed(123) 48 | 49 | # This function returns a dictionary with inputs and targets. 50 | # Keys "inputs" and "targets" are mandatory. 51 | # Values must be lists not tuples. The lists will be appended and modified when synchronization is performed. 52 | def loader(key): 53 | video = numpy.array(hfTar.get(key)) 54 | text_int = text[key] 55 | text_float = to_categorical(text_int, num_classes=dim_text) 56 | 57 | return { 58 | "inputs": [ 59 | text_float, 60 | info_spk[key], 61 | info_today[key], 62 | ], 63 | "targets": [ 64 | video for y in ys + ys_att 65 | ], 66 | "key": key, 67 | } 68 | 69 | 70 | # A function for tied layers. 71 | def get_dense(Lname, dim, stop, activation=lambda x:x, use_bias=True, init_weights=None): 72 | # create or use already created layer 73 | if not Lname in Ls: 74 | if use_bias: 75 | weights=[ws.pop(0), ws.pop(0)] if len(ws) > 0 else init_weights 76 | else: 77 | weights=[ws.pop(0)] if len(ws) > 0 else init_weights 78 | Ls[Lname] = Dense2(dim, activation=activation, use_bias=use_bias, weights=weights) 79 | L = Ls[Lname] 80 | L.stop = stop 81 | return L 82 | 83 | 84 | # "embedding" layer that code input language A 85 | def embA(x, stop=False, mod=""): 86 | x = ConcatenateMatrixAndVectors()([x] + addinf) 87 | L = get_dense("Aemb" + mod, dim_emb, stop, use_bias=False) 88 | x = L(x) 89 | x = LayerNormalization()(x) 90 | return x 91 | 92 | 93 | def translation_from_A_to_B(x, stop=False, mod="", Nh=0): 94 | 95 | for i in range(Nh): 96 | x_old = x 97 | x = Splicing([-1, 0, 1])(x) 98 | x = ConcatenateMatrixAndVectors()([x] + addinf) 99 | L = get_dense(("A_a[%d]" % i) + mod, dim_h, stop, activation=keras.activations.relu, use_bias=True) 100 | x = L(x) 101 | x = Dropout(rate=pDropout)(x) 102 | L = get_dense(("A_b[%d]" % i) + mod, dim_emb, stop, use_bias=True) 103 | x = L(x) 104 | x = Add()([x, x_old]) 105 | x = LayerNormalization()(x) 106 | 107 | return x 108 | 109 | 110 | # "deembedding" layer that decode output, ie. language B 111 | def invEmbB(x, stop=False, mod=""): 112 | x = ConcatenateMatrixAndVectors()([x] + addinf) 113 | L = get_dense("An_a" + mod, n_steps * dim_emb, stop, use_bias=True) 114 | x = L(x) 115 | x = MakeSteps2(nSteps=n_steps, dimy=dim_emb)(x) 116 | x = Splicing([-5, -4, -3, -2, -1, 0, 1, 2, 3, 4, 5])(x) 117 | x = ConcatenateMatrixAndVectors()([x] + addinf) 118 | L = get_dense("An_b" + mod, dim_rep, stop, activation=keras.activations.tanh, use_bias=True) 119 | x = L(x) 120 | L = get_dense("An_c" + mod, dim_y, stop, use_bias=True) 121 | x = L(x) 122 | x = ConstantBias(mu_values)(x) 123 | return x 124 | 125 | 126 | # dictionary for computed DTW synchronization 127 | saved_paths = {} 128 | 129 | # this function synchronizes outputs and targets 130 | # The synchronization is not provided every gradient computation but only 30% of time. 131 | # 70% of time the function reuses synchronization save in the dictionary saved_paths 132 | def synch(data): 133 | if data["key"] in saved_paths: 134 | paths = saved_paths[data["key"]] 135 | used_saved_paths = (random.randint(0, 100) <= 30) 136 | else: 137 | paths = [None for y_synch in ys_synch] 138 | used_saved_paths = True 139 | 140 | if not used_saved_paths: 141 | inputs_values = data["inputs"] 142 | inputs_values = [addDim(x) for x in inputs_values] # 2D -> 3D 143 | ys_values = model4synch.predict_on_batch(inputs_values) 144 | if not type(ys_values) is list: 145 | ys_values = [ys_values] 146 | else: 147 | T_y = n_steps * data["inputs"][0].shape[0] 148 | ys_values = [numpy.zeros((T_y, 1), dtype=dtype) for y_synch in ys_synch] 149 | 150 | for idx in range(len(ys_values)): 151 | i_out = idx 152 | i_tar = idx 153 | y_values = ys_values[i_out] 154 | tar_values = data["targets"][i_tar] 155 | 156 | if used_saved_paths: 157 | if paths[idx] is None: 158 | d, path_y, path_tar = fakedtw( 159 | y_values, 160 | tar_values, 161 | 0.1, 162 | ) 163 | else: 164 | d, path_y, path_tar = paths[idx] 165 | else: 166 | d, path_y, path_tar = dtw( 167 | y_values, 168 | tar_values, 169 | 0.1, 170 | ) 171 | 172 | paths[idx] = (d, path_y, path_tar) 173 | 174 | data["inputs"].append(numpy.asarray(path_y, dtype="int64")) 175 | data["targets"][i_tar] = data["targets"][i_tar][path_tar] 176 | 177 | saved_paths[data["key"]] = paths 178 | 179 | return data 180 | 181 | 182 | text, dim_text = loadText("data/text") 183 | 184 | hfTar = h5py.File("data/recs.h5", "r") 185 | 186 | info_spk, dim_spk = loadInfo("data/info_spk", dtype=dtype) 187 | info_today, dim_today = loadInfo("data/info_today", dtype=dtype) 188 | 189 | reg = keras.regularizers.l2(0.001) 190 | 191 | input_text = Input(batch_shape=(None, None, dim_text), dtype=dtype, name="textf") 192 | input_spk = Input(batch_shape=(None, None, dim_spk), dtype=dtype, name="spk") 193 | input_today = Input(batch_shape=(None, None, dim_today), dtype=dtype, name="today") 194 | 195 | inputs = [input_text, input_spk, input_today] 196 | 197 | # reshaping 3D inputs into 2D arrays 198 | x_text = Lambda(lambda x: reshape_2d_fixed_dim(x, dim_text))(input_text) 199 | x_spk = Lambda(lambda x: reshape_2d_fixed_dim(x, dim_spk))(input_spk) 200 | x_today = Lambda(lambda x: reshape_2d_fixed_dim(x, dim_today))(input_today) 201 | addinf = [x_spk, x_today] 202 | 203 | 204 | # ws is a list of weights that is important when a new layer is added etc. 205 | #fname_weights = "models/FF-05.h5" 206 | fname_weights = None 207 | if fname_weights is None: 208 | # empty list means random initialization 209 | ws = [] 210 | else: 211 | # load weight from file 212 | ws = loadAllDenses2(fname_weights) 213 | 214 | # dropout rate 215 | pDropout = 0.1 216 | 217 | # mean of outputs 218 | mu_values = loadMatrixFloatText("temp/mu", dtype=dtype) 219 | 220 | # dictionary of layers. Important for tied layers. 221 | Ls = {} 222 | 223 | # following model translates from language A into language B 224 | 225 | a = x_text 226 | ae = embA(a) 227 | ybe = translation_from_A_to_B(ae, Nh=Nh) 228 | yb = invEmbB(ybe, False) 229 | 230 | # outputs 231 | ys = [ 232 | yb, 233 | ] 234 | 235 | # outputs trained by means of an attention mechanism 236 | ys_att = [ 237 | #yb, 238 | ] 239 | 240 | # outputs which will be synchronized 241 | ys_synch = ys 242 | 243 | inputs_dtw = [Input(batch_shape=(None, None), dtype="int64", name="dtw%d" % i) for i in range(len(ys))] 244 | 245 | # special layer synchronizing output (targers are syncronized outside models) 246 | ys_dtw = [UseDTW()([ys[i], inputs_dtw[i]]) for i in range(len(ys_synch))] 247 | 248 | # back into the 3D shapes 249 | make_it_3d = Lambda(lambda x: reshape_3d(x)) 250 | ys_3d = [make_it_3d(y) for y in ys] 251 | ys_dtw_3d = [make_it_3d(y) for y in ys_dtw] 252 | ys_att_3d = [make_it_3d(y) for y in ys_att] 253 | 254 | # these outpus will be trained 255 | model4training = Model(inputs=inputs+inputs_dtw, outputs=ys_dtw_3d+ys_att_3d) 256 | 257 | # these outputs will be evalueated 258 | model4output = Model(inputs=inputs, outputs=ys) 259 | 260 | # these outputs serves for DTW synchronization 261 | model4synch = Model(inputs=inputs, outputs=ys_synch) 262 | 263 | # this output will be saved for monitoring 264 | model4example = Model(inputs=inputs, outputs=[ys[0]]) 265 | -------------------------------------------------------------------------------- /SLSynthesis/model_FF_20200603.py: -------------------------------------------------------------------------------- 1 | import h5py 2 | import re 3 | import random 4 | import math 5 | 6 | import numpy 7 | 8 | import tensorflow as tf 9 | import keras 10 | from keras.models import Sequential, Model 11 | from keras.layers import Dense, Add, Dropout 12 | from keras import backend as K 13 | from keras.layers import Layer, Input, Bidirectional, LSTM, GRU, Embedding, Multiply, BatchNormalization, Concatenate, Add, Lambda, Subtract 14 | from keras.utils import to_categorical 15 | 16 | from modeling import * 17 | from data import * 18 | from dtw import dtw, fakedtw 19 | from utils import * 20 | from training import * 21 | 22 | 23 | # filenames for weights, etc. are derived from this ID 24 | model_ID = "FF_20200603" 25 | 26 | dtype = "float32" 27 | 28 | n_steps = 7 # number of frames for each word 29 | dim_y = 98 # ouput size 30 | dim_emb = 256 # embeddings size 31 | dim_rep = 1024 # "repository" size 32 | dim_h = 1024 # hidden layers size 33 | 34 | if False: 35 | # avoiding using multiple CPUs (not always succesfully) and memory leaks in tensorflow 36 | conf = K.tf.ConfigProto( 37 | device_count={'CPU': 1}, 38 | intra_op_parallelism_threads=1, 39 | inter_op_parallelism_threads=1, 40 | ) 41 | K.set_session(K.tf.Session(config=conf)) 42 | 43 | # make experiment deterministic 44 | random.seed(123) 45 | numpy.random.seed(123) 46 | 47 | # This function returns a dictionary with inputs and targets. 48 | # Keys "inputs" and "targets" are mandatory. 49 | # Values must be lists not tuples. The lists will be appended and modified when synchronization is performed. 50 | def loader(key): 51 | video = numpy.array(hfTar.get(key)) 52 | text_int = text[key] 53 | text_float = to_categorical(text_int, num_classes=dim_text) 54 | 55 | return { 56 | "inputs": [ 57 | text_float, 58 | info_spk[key], 59 | #info_today[key], 60 | ], 61 | "targets": [ 62 | video for y in ys + ys_att 63 | ], 64 | "key": key, 65 | } 66 | 67 | 68 | # A function for tied layers. 69 | def get_dense(Lname, dim, stop, activation=lambda x:x, use_bias=True, init_weights=None, trainable=True): 70 | # create or use already created layer 71 | if not Lname in Ls: 72 | if use_bias: 73 | weights=[ws.pop(0), ws.pop(0)] if len(ws) > 0 else init_weights 74 | else: 75 | weights=[ws.pop(0)] if len(ws) > 0 else init_weights 76 | Ls[Lname] = Dense2(dim, activation=activation, use_bias=use_bias, weights=weights, trainable=trainable) 77 | L = Ls[Lname] 78 | L.stop = stop 79 | return L 80 | 81 | 82 | # "embedding" layer that code input language A 83 | def embA(x, stop=False, mod=""): 84 | x = ConcatenateMatrixAndVectors()([x] + addinf) 85 | L = get_dense("Aemb" + mod, dim_emb, stop, use_bias=False) 86 | x = L(x) 87 | x = LayerNormalization()(x) 88 | return x 89 | 90 | 91 | def translation_from_A_to_B(x, stop=False, mod="", Nh=0): 92 | 93 | for i in range(Nh): 94 | x_old = x 95 | x = Splicing([-1, 0, 1])(x) 96 | x = ConcatenateMatrixAndVectors()([x] + addinf) 97 | L = get_dense(("A_a[%d]" % i) + mod, dim_h, stop, activation=keras.activations.relu, use_bias=True) 98 | x = L(x) 99 | x = Dropout(rate=pDropout)(x) 100 | L = get_dense(("A_b[%d]" % i) + mod, dim_emb, stop, use_bias=True) 101 | x = L(x) 102 | x = Add()([x, x_old]) 103 | x = LayerNormalization()(x) 104 | 105 | return x 106 | 107 | 108 | # "deembedding" layer that decode output, ie. language B 109 | def invEmbB(x, stop=False, mod=""): 110 | x = ConcatenateMatrixAndVectors()([x] + addinf) 111 | L = get_dense("An_a" + mod, n_steps * dim_emb, stop, use_bias=True) 112 | x = L(x) 113 | x = MakeSteps2(nSteps=n_steps, dimy=dim_emb)(x) 114 | x = Splicing([-5, -4, -3, -2, -1, 0, 1, 2, 3, 4, 5])(x) 115 | x = ConcatenateMatrixAndVectors()([x] + addinf) 116 | L = get_dense("An_b" + mod, dim_rep, stop, activation=keras.activations.softmax, use_bias=True) 117 | x = L(x) 118 | L = get_dense("An_c" + mod, dim_y, stop, use_bias=False, init_weights=[rep_values], trainable=False) 119 | x = L(x) 120 | #x = ConstantBias(mu_values)(x) 121 | return x 122 | 123 | 124 | # dictionary for computed DTW synchronization 125 | saved_paths = {} 126 | 127 | # this function synchronizes outputs and targets 128 | # The synchronization is not provided every gradient computation but only 30% of time. 129 | # 70% of time the function reuses synchronization save in the dictionary saved_paths 130 | def synch(data): 131 | if data["key"] in saved_paths: 132 | paths = saved_paths[data["key"]] 133 | used_saved_paths = (random.randint(0, 100) <= 30) 134 | else: 135 | paths = [None for y_synch in ys_synch] 136 | used_saved_paths = True 137 | 138 | used_saved_paths = True 139 | 140 | if not used_saved_paths: 141 | inputs_values = data["inputs"] 142 | inputs_values = [addDim(x) for x in inputs_values] # 2D -> 3D 143 | ys_values = model4synch.predict_on_batch(inputs_values) 144 | if not type(ys_values) is list: 145 | ys_values = [ys_values] 146 | else: 147 | T_y = n_steps * data["inputs"][0].shape[0] 148 | ys_values = [numpy.zeros((T_y, 1), dtype=dtype) for y_synch in ys_synch] 149 | 150 | for idx in range(len(ys_values)): 151 | i_out = idx 152 | i_tar = idx 153 | y_values = ys_values[i_out] 154 | tar_values = data["targets"][i_tar] 155 | 156 | if used_saved_paths: 157 | if paths[idx] is None: 158 | d, path_y, path_tar = fakedtw( 159 | y_values, 160 | tar_values, 161 | 0.3, 162 | ) 163 | else: 164 | d, path_y, path_tar = paths[idx] 165 | else: 166 | d, path_y, path_tar = dtw( 167 | y_values, 168 | tar_values, 169 | 0.3, 170 | ) 171 | 172 | paths[idx] = (d, path_y, path_tar) 173 | 174 | data["inputs"].append(numpy.asarray(path_y, dtype="int64")) 175 | data["targets"][i_tar] = data["targets"][i_tar][path_tar] 176 | 177 | saved_paths[data["key"]] = paths 178 | 179 | return data 180 | 181 | 182 | text, dim_text = loadText("data/text") 183 | 184 | print("dim_text = %d" % dim_text) 185 | 186 | hfTar = h5py.File("data/keypoints-filt-deltas-norm.h5", "r") 187 | 188 | info_spk, dim_spk = loadInfo("data/info_spk", dtype=dtype) 189 | #info_today, dim_today = loadInfo("data/info_today", dtype=dtype) 190 | 191 | reg = keras.regularizers.l2(0.001) 192 | 193 | input_text = Input(batch_shape=(None, None, dim_text), dtype=dtype, name="textf") 194 | input_spk = Input(batch_shape=(None, None, dim_spk), dtype=dtype, name="spk") 195 | #input_today = Input(batch_shape=(None, None, dim_today), dtype=dtype, name="today") 196 | 197 | inputs = [input_text, input_spk]#, input_today] 198 | 199 | # reshaping 3D inputs into 2D arrays 200 | x_text = Lambda(lambda x: reshape_2d_fixed_dim(x, dim_text))(input_text) 201 | x_spk = Lambda(lambda x: reshape_2d_fixed_dim(x, dim_spk))(input_spk) 202 | #x_today = Lambda(lambda x: reshape_2d_fixed_dim(x, dim_today))(input_today) 203 | addinf = [x_spk]#, x_today] 204 | 205 | 206 | # ws is a list of weights that is important when a new layer is added etc. 207 | #fname_weights = "models/FF_20200603-clear.h5" 208 | fname_weights = None 209 | if fname_weights is None: 210 | # empty list means random initialization 211 | ws = [] 212 | else: 213 | # load weight from file 214 | ws = loadAllDenses2(fname_weights) 215 | 216 | # dropout rate 217 | pDropout = 0.1 218 | 219 | # mean of outputs 220 | mu_values = loadMatrixFloatText("temp/mu", dtype=dtype) 221 | rep_values = loadMatrixFloatText("temp/repository-%d" % dim_rep, dtype=dtype) 222 | 223 | # dictionary of layers. Important for tied layers. 224 | Ls = {} 225 | 226 | # following model translates from language A into language B 227 | 228 | a = x_text 229 | ae = embA(a) 230 | ybe_0 = ae 231 | ybe_1 = translation_from_A_to_B(ybe_0, Nh=1, mod="1") 232 | #ybe_2 = translation_from_A_to_B(ybe_1, Nh=1, mod="2") 233 | #ybe_3 = translation_from_A_to_B(ybe_2, Nh=1, mod="3") 234 | 235 | #yb_0 = invEmbB(ybe_0, False, mod="0") 236 | yb_1 = invEmbB(ybe_1, False, mod="1") 237 | #yb_2 = invEmbB(ybe_2, False, mod="2") 238 | #yb_3 = invEmbB(ybe_3, False, mod="3") 239 | 240 | # outputs 241 | ys = [ 242 | #yb_3, 243 | #yb_2, 244 | yb_1, 245 | #yb_0, 246 | ] 247 | 248 | # outputs trained by means of an attention mechanism 249 | ys_att = [ 250 | yb_1, 251 | ] 252 | 253 | # outputs which will be synchronized 254 | ys_synch = ys 255 | 256 | inputs_dtw = [Input(batch_shape=(None, None), dtype="int64", name="dtw%d" % i) for i in range(len(ys))] 257 | 258 | # special layer synchronizing output (targers are syncronized outside models) 259 | ys_dtw = [UseDTW()([ys[i], inputs_dtw[i]]) for i in range(len(ys_synch))] 260 | 261 | # back into the 3D shapes 262 | make_it_3d = Lambda(lambda x: reshape_3d(x)) 263 | ys_3d = [make_it_3d(y) for y in ys] 264 | ys_dtw_3d = [make_it_3d(y) for y in ys_dtw] 265 | ys_att_3d = [make_it_3d(y) for y in ys_att] 266 | 267 | # these outpus will be trained 268 | model4training = Model(inputs=inputs+inputs_dtw, outputs=ys_dtw_3d+ys_att_3d) 269 | 270 | # these outputs will be evalueated 271 | model4output = Model(inputs=inputs, outputs=ys) 272 | 273 | # these outputs serves for DTW synchronization 274 | model4synch = Model(inputs=inputs, outputs=ys_synch) 275 | 276 | # this output will be saved for monitoring 277 | model4example = Model(inputs=inputs, outputs=[ys[0]]) 278 | -------------------------------------------------------------------------------- /SLSynthesis/model_FF_20200604.py: -------------------------------------------------------------------------------- 1 | import h5py 2 | import re 3 | import random 4 | import math 5 | 6 | import numpy 7 | 8 | import tensorflow as tf 9 | import keras 10 | from keras.models import Sequential, Model 11 | from keras.layers import Dense, Add, Dropout 12 | from keras import backend as K 13 | from keras.layers import Layer, Input, Bidirectional, LSTM, GRU, Embedding, Multiply, BatchNormalization, Concatenate, Add, Lambda, Subtract 14 | from keras.utils import to_categorical 15 | 16 | from modeling import * 17 | from data import * 18 | from dtw import dtw, fakedtw 19 | from utils import * 20 | from training import * 21 | 22 | 23 | # filenames for weights, etc. are derived from this ID 24 | model_ID = "FF_20200604" 25 | 26 | dtype = "float32" 27 | 28 | n_steps = 7 # number of frames for each word 29 | dim_y = 98 # ouput size 30 | dim_emb = 256 # embeddings size 31 | dim_rep = 1024 # "repository" size 32 | dim_h = 1024 # hidden layers size 33 | 34 | #Q = 1.0 / math.sqrt(float(dim_emb)) 35 | Q = None 36 | 37 | if False: 38 | # avoiding using multiple CPUs (not always succesfully) and memory leaks in tensorflow 39 | conf = K.tf.ConfigProto( 40 | device_count={'CPU': 1}, 41 | intra_op_parallelism_threads=1, 42 | inter_op_parallelism_threads=1, 43 | ) 44 | K.set_session(K.tf.Session(config=conf)) 45 | 46 | # make experiment deterministic 47 | random.seed(123) 48 | numpy.random.seed(123) 49 | 50 | # This function returns a dictionary with inputs and targets. 51 | # Keys "inputs" and "targets" are mandatory. 52 | # Values must be lists not tuples. The lists will be appended and modified when synchronization is performed. 53 | def loader(key): 54 | video = numpy.array(hfTar.get(key)) 55 | text_int = text[key] 56 | text_float = to_categorical(text_int, num_classes=dim_text) 57 | 58 | return { 59 | "inputs": [ 60 | text_float, 61 | info_spk[key], 62 | #info_today[key], 63 | ], 64 | "targets": [ 65 | video for y in ys + ys_att 66 | ], 67 | "key": key, 68 | } 69 | 70 | 71 | # A function for tied layers. 72 | def get_dense(Lname, dim, stop, activation=lambda x:x, use_bias=True, init_weights=None, trainable=True): 73 | # create or use already created layer 74 | if not Lname in Ls: 75 | if use_bias: 76 | weights=[ws.pop(0), ws.pop(0)] if len(ws) > 0 else init_weights 77 | else: 78 | weights=[ws.pop(0)] if len(ws) > 0 else init_weights 79 | Ls[Lname] = Dense2(dim, activation=activation, use_bias=use_bias, weights=weights, trainable=trainable) 80 | L = Ls[Lname] 81 | L.stop = stop 82 | return L 83 | 84 | 85 | # "embedding" layer that code input language A 86 | def embA(x, stop=False, mod=""): 87 | x = ConcatenateMatrixAndVectors()([x] + addinf) 88 | L = get_dense("Aemb" + mod, dim_emb, stop, use_bias=False) 89 | x = L(x) 90 | x = LayerNormalization()(x) 91 | if not Q is None: 92 | x = Lambda(lambda x: Q * x)(x) 93 | return x 94 | 95 | 96 | def translation_from_A_to_B(x, stop=False, mod="", Nh=0): 97 | 98 | for i in range(Nh): 99 | x_old = x 100 | x = Splicing([-1, 0, 1])(x) 101 | x = ConcatenateMatrixAndVectors()([x] + addinf) 102 | L = get_dense(("A_a[%d]" % i) + mod, dim_h, stop, activation=keras.activations.relu, use_bias=True) 103 | x = L(x) 104 | x = Dropout(rate=pDropout)(x) 105 | L = get_dense(("A_b[%d]" % i) + mod, dim_emb, stop, use_bias=True) 106 | x = L(x) 107 | x = Add()([x, x_old]) 108 | x = LayerNormalization()(x) 109 | if not Q is None: 110 | x = Lambda(lambda x: Q * x)(x) 111 | 112 | return x 113 | 114 | 115 | # "deembedding" layer that decode output, ie. language B 116 | def invEmbB(x, stop=False, mod=""): 117 | x = ConcatenateMatrixAndVectors()([x] + addinf) 118 | L = get_dense("An_a" + mod, n_steps * dim_emb, stop, use_bias=True) 119 | x = L(x) 120 | x = MakeSteps2(nSteps=n_steps, dimy=dim_emb)(x) 121 | x = Splicing([-5, -4, -3, -2, -1, 0, 1, 2, 3, 4, 5])(x) 122 | x = ConcatenateMatrixAndVectors()([x] + addinf) 123 | L = get_dense("An_b" + mod, dim_rep, stop, activation=keras.activations.softmax, use_bias=True) 124 | x = L(x) 125 | L = get_dense("An_c" + mod, dim_y, stop, use_bias=False, init_weights=[rep_values], trainable=False) 126 | x = L(x) 127 | #x = ConstantBias(mu_values)(x) 128 | return x 129 | 130 | 131 | # dictionary for computed DTW synchronization 132 | saved_paths = {} 133 | 134 | # this function synchronizes outputs and targets 135 | # The synchronization is not provided every gradient computation but only 30% of time. 136 | # 70% of time the function reuses synchronization save in the dictionary saved_paths 137 | def synch(data): 138 | if data["key"] in saved_paths: 139 | paths = saved_paths[data["key"]] 140 | used_saved_paths = (random.randint(0, 100) <= 30) 141 | else: 142 | paths = [None for y_synch in ys_synch] 143 | used_saved_paths = True 144 | 145 | used_saved_paths = False 146 | 147 | if not used_saved_paths: 148 | inputs_values = data["inputs"] 149 | inputs_values = [addDim(x) for x in inputs_values] # 2D -> 3D 150 | ys_values = model4synch.predict_on_batch(inputs_values) 151 | if not type(ys_values) is list: 152 | ys_values = [ys_values] 153 | else: 154 | T_y = n_steps * data["inputs"][0].shape[0] 155 | ys_values = [numpy.zeros((T_y, 1), dtype=dtype) for y_synch in ys_synch] 156 | 157 | for idx in range(len(ys_values)): 158 | i_out = idx 159 | i_tar = idx 160 | y_values = ys_values[i_out] 161 | tar_values = data["targets"][i_tar] 162 | 163 | if used_saved_paths: 164 | if paths[idx] is None: 165 | d, path_y, path_tar = fakedtw( 166 | y_values, 167 | tar_values, 168 | 0.3, 169 | ) 170 | else: 171 | d, path_y, path_tar = paths[idx] 172 | else: 173 | d, path_y, path_tar = dtw( 174 | y_values, 175 | tar_values, 176 | 0.3, 177 | ) 178 | 179 | paths[idx] = (d, path_y, path_tar) 180 | 181 | data["inputs"].append(numpy.asarray(path_y, dtype="int64")) 182 | data["targets"][i_tar] = data["targets"][i_tar][path_tar] 183 | 184 | saved_paths[data["key"]] = paths 185 | 186 | return data 187 | 188 | 189 | text, dim_text = loadText("data/text") 190 | 191 | print("dim_text = %d" % dim_text) 192 | 193 | hfTar = h5py.File("data/keypoints-filt-deltas-norm.h5", "r") 194 | 195 | info_spk, dim_spk = loadInfo("data/info_spk", dtype=dtype) 196 | #info_today, dim_today = loadInfo("data/info_today", dtype=dtype) 197 | 198 | reg = keras.regularizers.l2(0.001) 199 | 200 | input_text = Input(batch_shape=(None, None, dim_text), dtype=dtype, name="textf") 201 | input_spk = Input(batch_shape=(None, None, dim_spk), dtype=dtype, name="spk") 202 | #input_today = Input(batch_shape=(None, None, dim_today), dtype=dtype, name="today") 203 | 204 | inputs = [input_text, input_spk]#, input_today] 205 | 206 | # reshaping 3D inputs into 2D arrays 207 | x_text = Lambda(lambda x: reshape_2d_fixed_dim(x, dim_text))(input_text) 208 | x_spk = Lambda(lambda x: reshape_2d_fixed_dim(x, dim_spk))(input_spk) 209 | #x_today = Lambda(lambda x: reshape_2d_fixed_dim(x, dim_today))(input_today) 210 | addinf = [x_spk]#, x_today] 211 | 212 | 213 | # ws is a list of weights that is important when a new layer is added etc. 214 | #fname_weights = "models/FF_20200604-clear.h5" 215 | fname_weights = None 216 | if fname_weights is None: 217 | # empty list means random initialization 218 | ws = [] 219 | else: 220 | # load weight from file 221 | ws = loadAllDenses2(fname_weights) 222 | 223 | # dropout rate 224 | pDropout = 0.1 225 | 226 | # mean of outputs 227 | mu_values = loadMatrixFloatText("temp/mu", dtype=dtype) 228 | rep_values = loadMatrixFloatText("temp/repository-%d" % dim_rep, dtype=dtype) 229 | 230 | # dictionary of layers. Important for tied layers. 231 | Ls = {} 232 | 233 | # following model translates from language A into language B 234 | 235 | a = x_text 236 | ae = embA(a) 237 | ybe_0 = ae 238 | #ybe_1 = translation_from_A_to_B(ybe_0, Nh=1, mod="1") 239 | #ybe_2 = translation_from_A_to_B(ybe_1, Nh=1, mod="2") 240 | #ybe_3 = translation_from_A_to_B(ybe_2, Nh=1, mod="3") 241 | 242 | yb_0 = invEmbB(ybe_0, False, mod="0") 243 | #yb_1 = invEmbB(ybe_1, False, mod="1") 244 | #yb_2 = invEmbB(ybe_2, False, mod="2") 245 | #yb_3 = invEmbB(ybe_3, False, mod="3") 246 | 247 | # outputs 248 | ys = [ 249 | #yb_3, 250 | #yb_2, 251 | #yb_1, 252 | yb_0, 253 | ] 254 | 255 | # outputs trained by means of an attention mechanism 256 | ys_att = [ 257 | #yb_3, 258 | #yb_2, 259 | #yb_1, 260 | yb_0, 261 | ] 262 | 263 | # outputs which will be synchronized 264 | ys_synch = ys 265 | 266 | inputs_dtw = [Input(batch_shape=(None, None), dtype="int64", name="dtw%d" % i) for i in range(len(ys))] 267 | 268 | # special layer synchronizing output (targers are syncronized outside models) 269 | ys_dtw = [UseDTW()([ys[i], inputs_dtw[i]]) for i in range(len(ys_synch))] 270 | 271 | # back into the 3D shapes 272 | make_it_3d = Lambda(lambda x: reshape_3d(x)) 273 | ys_3d = [make_it_3d(y) for y in ys] 274 | ys_dtw_3d = [make_it_3d(y) for y in ys_dtw] 275 | ys_att_3d = [make_it_3d(y) for y in ys_att] 276 | 277 | # these outpus will be trained 278 | model4training = Model(inputs=inputs+inputs_dtw, outputs=ys_dtw_3d+ys_att_3d) 279 | 280 | # these outputs will be evalueated 281 | model4output = Model(inputs=inputs, outputs=ys) 282 | 283 | # these outputs serves for DTW synchronization 284 | model4synch = Model(inputs=inputs, outputs=ys_synch) 285 | 286 | # this output will be saved for monitoring 287 | model4example = Model(inputs=inputs, outputs=[ys[0]]) 288 | -------------------------------------------------------------------------------- /SLSynthesis/modeling.py: -------------------------------------------------------------------------------- 1 | import math 2 | 3 | import numpy 4 | 5 | import tensorflow as tf 6 | 7 | import keras 8 | from keras.models import Sequential, Model 9 | from keras.layers import Dense, Concatenate, Layer, Input, concatenate 10 | from keras import backend as K 11 | 12 | from dtw import dtw 13 | 14 | 15 | def applyTimeShift(x, shift): 16 | xShape = K.shape(x) 17 | nMissing = K.minimum(abs(shift), xShape[0]) 18 | if shift == 0: 19 | y = x 20 | if shift > 0: 21 | yMain = x[shift:xShape[0], 0:xShape[1]] 22 | xT = x[(xShape[0] - 1):xShape[0], 0:xShape[1]] 23 | yAppendix = K.tile(xT, (nMissing, 1)) 24 | y = K.concatenate([yMain, yAppendix], axis=0) 25 | if shift < 0: 26 | yMain = x[0:K.maximum((xShape[0] + shift), 0), 0:xShape[1]] 27 | x0 = x[0:1, 0:xShape[1]] 28 | yAppendix = K.tile(x0, (nMissing, 1)) 29 | y = K.concatenate([yAppendix, yMain], axis=0) 30 | return y 31 | 32 | 33 | def applyTimeShifts(x, shifts, weights=None): 34 | y = [] 35 | i_weight = 0 36 | for shift in shifts: 37 | if weights is None: 38 | y.append(applyTimeShift(x, shift)) 39 | else: 40 | y.append(weights[i_weight] * applyTimeShift(x, shift)) 41 | i_weight = i_weight + 1 42 | y = K.concatenate(y, axis=1) 43 | return y 44 | 45 | 46 | class Splicing(Layer): 47 | 48 | def __init__(self, shifts, shiftWeights=None, **kwargs): 49 | self.shifts = shifts 50 | self.shiftWeights = shiftWeights 51 | super(Splicing, self).__init__(trainable=False, **kwargs) 52 | 53 | def build(self, input_shape): 54 | super(Splicing, self).build(input_shape) 55 | 56 | def call(self, x): 57 | return applyTimeShifts(x, self.shifts, self.shiftWeights) 58 | 59 | def compute_output_shape(self, inputs_shape): 60 | return (None, len(self.shifts) * inputs_shape[1]) 61 | 62 | 63 | class ConcatenateMatrixAndVectors(Layer): 64 | 65 | def __init__(self, **kwargs): 66 | super(ConcatenateMatrixAndVectors, self).__init__(trainable=False, **kwargs) 67 | 68 | def build(self, input_shape): 69 | super(ConcatenateMatrixAndVectors, self).build(input_shape) 70 | 71 | def call(self, x): 72 | m = x[0] 73 | mShape = K.shape(m) 74 | vs = x[1:len(x)] 75 | y = [m] 76 | for v in vs: 77 | vRep = K.tile(v, (mShape[0], 1)) 78 | y.append(vRep) 79 | y = K.concatenate(y, axis=1) 80 | return y 81 | 82 | def compute_output_shape(self, inputs_shape): 83 | T = inputs_shape[0][0] 84 | dim = inputs_shape[0][1] 85 | for i in range(1, len(inputs_shape)): 86 | dim = dim + inputs_shape[i][1] 87 | return (T, dim) 88 | 89 | 90 | class Dense2(Layer): 91 | 92 | def __init__(self, output_dim, use_bias=False, activation=lambda x:x, trainable=True, **kwargs): 93 | self.output_dim = output_dim 94 | self.activation = activation 95 | self.stop = False 96 | self.use_bias = use_bias 97 | self.trainable = trainable 98 | super(Dense2, self).__init__(**kwargs) 99 | 100 | def build(self, input_shape): 101 | self.kernel = self.add_weight(name='kernel', 102 | shape=(input_shape[1], self.output_dim), 103 | initializer='uniform', 104 | trainable=self.trainable) 105 | if self.use_bias: 106 | self.bias = self.add_weight(name='bias', 107 | shape=(self.output_dim, ), 108 | initializer='zeros', 109 | trainable=self.trainable) 110 | super(Dense2, self).build(input_shape) 111 | 112 | def call(self, x): 113 | if self.stop: 114 | kernel = K.stop_gradient(self.kernel) 115 | else: 116 | kernel = self.kernel 117 | if self.use_bias: 118 | if self.stop: 119 | b = K.stop_gradient(self.bias) 120 | else: 121 | b = self.bias 122 | else: 123 | b = 0 124 | return self.activation(K.dot(x, kernel) + b) 125 | 126 | def compute_output_shape(self, input_shape): 127 | return (input_shape[0], self.output_dim) 128 | 129 | 130 | class LayerNormalization(Layer): 131 | 132 | def __init__(self, eps=1e-6, **kwargs): 133 | self.eps = eps 134 | super(LayerNormalization, self).__init__(**kwargs) 135 | 136 | def build(self, input_shape): 137 | #self.gamma = self.add_weight(name='gamma', shape=input_shape[-1:], 138 | # initializer=Ones(), trainable=True) 139 | #self.beta = self.add_weight(name='beta', shape=input_shape[-1:], 140 | # initializer=Zeros(), trainable=True) 141 | super(LayerNormalization, self).build(input_shape) 142 | 143 | def call(self, x): 144 | mean = K.mean(x, axis=-1, keepdims=True) 145 | std = K.std(x, axis=-1, keepdims=True) 146 | #return self.gamma * (x - mean) / (std + self.eps) + self.beta 147 | return (x - mean) / (std + self.eps) 148 | 149 | def compute_output_shape(self, input_shape): 150 | return input_shape 151 | 152 | 153 | class MakeSteps2(Layer): 154 | 155 | def __init__(self, nSteps, dimy, **kwargs): 156 | self.nSteps = nSteps 157 | self.dimy = dimy 158 | super(MakeSteps2, self).__init__(trainable=False, **kwargs) 159 | 160 | def build(self, input_shape): 161 | super(MakeSteps2, self).build(input_shape) 162 | 163 | def call(self, x): 164 | dtype = x.dtype 165 | nSteps = self.nSteps 166 | xShape = K.shape(x) 167 | #xShape = K.print_tensor(xShape, "xShape") 168 | dim_y = self.dimy #K.cast(xShape[1] / nSteps, dtype="int32") 169 | O = K.zeros((1, dim_y), dtype=dtype) 170 | I = K.ones((1, dim_y), dtype=dtype) 171 | y = 0 172 | for i in range(nSteps): 173 | x_i = x[0:xShape[0], (i * dim_y):((i + 1) * dim_y)] 174 | x_i = K.repeat_elements(x_i, nSteps, axis=0) 175 | m = [] 176 | for j in range(nSteps): 177 | if j == i: 178 | m.append(I) 179 | else: 180 | m.append(O) 181 | m = K.concatenate(m, axis=0) 182 | m = K.tile(m, (xShape[0], 1)) 183 | y = y + m * x_i 184 | return y 185 | 186 | def compute_output_shape(self, inputs_shape): 187 | nSteps = self.nSteps 188 | dim_y = self.dimy 189 | return (None, dim_y) 190 | 191 | 192 | class ConstantBias(Layer): 193 | 194 | def __init__(self, bias, **kwargs): 195 | self.bias = bias 196 | super(ConstantBias, self).__init__(trainable=False, **kwargs) 197 | 198 | def build(self, input_shape): 199 | super(ConstantBias, self).build(input_shape) 200 | 201 | def call(self, x): 202 | y = x + self.bias 203 | return y 204 | 205 | def compute_output_shape(self, inputs_shape): 206 | return inputs_shape 207 | 208 | 209 | class UseDTW(Layer): 210 | 211 | def __init__(self, **kwargs): 212 | super(UseDTW, self).__init__(trainable=False, **kwargs) 213 | 214 | def build(self, input_shape): 215 | super(UseDTW, self).build(input_shape) 216 | 217 | def call(self, inputs): 218 | #return x[self.dtw_y] 219 | x, dtw_y = inputs 220 | y = K.gather(x, dtw_y) 221 | return y 222 | 223 | def compute_output_shape(self, inputs_shape): 224 | return (inputs_shape[1][0], inputs_shape[0][1]) 225 | 226 | 227 | def compute_D2(a, b, La, Lb): 228 | #print((a, b, La, Lb)) 229 | dtype = a.dtype 230 | #print(dtype) 231 | bT = K.transpose(b) 232 | sumaa = K.sum(a * a, axis=1, keepdims=True) 233 | sumbb = K.sum(b * b, axis=1, keepdims=True) 234 | #foo = K.ones((1, Lb), dtype=dtype) 235 | #D2 = K.dot(sumaa, foo) 236 | D2 = K.tile(sumaa, (1, Lb)) 237 | D2 = D2 - 2 * K.dot(a, bT) 238 | #D2 = D2 + K.dot(K.ones((La, 1), dtype=dtype), K.transpose(sumbb)) 239 | D2 = D2 + K.tile(K.transpose(sumbb), (La, 1)) 240 | #D2 = K.dot(sumaa, K.ones((1, Lb), dtype=dtype)) - 2 * K.dot(a, bT) + K.dot(K.ones((La, 1), dtype=dtype), K.transpose(sumbb)) 241 | D2 = K.abs(D2) 242 | return D2 243 | 244 | 245 | def makeM0(La, Lb, thr, dtype): 246 | #am = K.arange(La) / (La - 1) 247 | am = K.arange(0, La, 1) / (La - 1) 248 | am = K.reshape(am, (La, 1)) 249 | #bm = K.arange(Lb) / (Lb - 1) 250 | bm = K.arange(0, Lb, 1) / (Lb - 1) 251 | bm = K.reshape(bm, (Lb, 1)) 252 | am = K.cast(am, dtype) 253 | bm = K.cast(bm, dtype) 254 | D2 = compute_D2(am, bm, La, Lb) 255 | M0 = 0.5 * 1e+10 * (1.0 + K.sign(D2 - thr * thr)) 256 | return M0 257 | 258 | 259 | def normList(l): 260 | sum = 0.0 261 | for i in l: 262 | sum = sum + i 263 | return [i / sum for i in l] 264 | 265 | 266 | def compute_mask(La, Lb, dtype): 267 | am = K.arange(La) / (La - 1) 268 | am = K.reshape(am, (La, 1)) 269 | bm = K.arange(Lb) / (Lb - 1) 270 | bm = K.reshape(bm, (Lb, 1)) 271 | am = K.cast(am, dtype) 272 | bm = K.cast(bm, dtype) 273 | #M = 1.0 - compute_D2(am, bm, La, Lb) 274 | D2 = compute_D2(am, bm, La, Lb) 275 | M = K.exp(-4 * D2) 276 | if not M.dtype == dtype: 277 | M = K.cast(M, dtype) 278 | #threshold = 0.15 279 | #threshold2 = threshold * threshold 280 | #M = 0.5 + 0.5 * K.sign(threshold2 - D2) 281 | return M 282 | 283 | 284 | def lossAttMSE(y_true, y_pred, thr=0.1): 285 | a = y_pred 286 | b = y_true 287 | 288 | aShape = K.shape(a) 289 | La = aShape[-2] 290 | dima = aShape[-1] 291 | dtype = a.dtype 292 | a = K.reshape(a, (La, dima)) 293 | 294 | bShape = K.shape(b) 295 | Lb = bShape[-2] 296 | dimb = bShape[-1] 297 | b = K.reshape(b, (Lb, dimb)) 298 | 299 | D2_MSE = compute_D2(a, b, La, Lb) 300 | 301 | shifts = [-3, -2, -1, 0, +1, +2, +3] 302 | wshifts = normList([0.125, 0.25, 0.5, 1.0, 0.5, 0.25, 0.125]) 303 | a = applyTimeShifts(a, shifts, wshifts) 304 | b = applyTimeShifts(b, shifts, wshifts) 305 | 306 | if not a.dtype == dtype: 307 | a = K.cast(a, dtype) 308 | b = K.cast(b, dtype) 309 | 310 | M0 = makeM0(La, Lb, thr, dtype) 311 | D2 = compute_D2(a, b, La, Lb) 312 | mD2M = -(1.0 * D2 + M0) 313 | M1 = K.softmax(mD2M) 314 | M2 = K.transpose(K.softmax(K.transpose(mD2M))) 315 | M = M1 + M2 316 | 317 | return K.sum(K.sum(D2_MSE * M)) / K.sum(K.sum(M)) 318 | 319 | 320 | def MSE(y_true, y_pred): 321 | a = y_pred 322 | b = y_true 323 | 324 | # dealing with 3d arrays 325 | aShape = K.shape(a) 326 | La = aShape[-2] 327 | dima = aShape[-1] 328 | dtype = a.dtype 329 | a = K.reshape(a, (La, dima)) 330 | bShape = K.shape(b) 331 | Lb = bShape[-2] 332 | dimb = bShape[-1] 333 | b = K.reshape(b, (Lb, dimb)) 334 | 335 | # dealing with different dtypes 336 | if not a.dtype == dtype: 337 | a = K.cast(a, dtype) 338 | b = K.cast(b, dtype) 339 | 340 | delta = a - b 341 | delta2 = delta * delta 342 | return K.mean(K.mean(delta2)) 343 | 344 | 345 | def compute_errorMSEDTW(y, tar, threshold=0.2): 346 | d, path_y, path_tar = dtw(y, tar, threshold) 347 | delta = y[path_y] - tar[path_tar] 348 | sum = numpy.sum(numpy.sum(delta * delta)) 349 | #return sum, len(path_y), y.shape[1] 350 | return sum, delta.shape[0], delta.shape[1] 351 | 352 | 353 | def reshape_2d_fixed_dim(x, dim): 354 | xShape = K.shape(x) 355 | return K.reshape(x, (xShape[-2], dim)) 356 | 357 | 358 | def reshape_3d(x): 359 | xShape = K.shape(x) 360 | return K.reshape(x, (1, xShape[-2], xShape[-1])) 361 | -------------------------------------------------------------------------------- /SLSynthesis/pipeline_demo_05_repository.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf8 -*- 2 | 3 | import re 4 | import os 5 | import shutil 6 | import codecs 7 | import math 8 | import copy 9 | import h5py 10 | import random 11 | 12 | import numpy 13 | 14 | from utils import * 15 | 16 | 17 | def saveMatrix(fname, x): 18 | f = open(fname, "w") 19 | for t in range(x.shape[0]): 20 | for i in range(x.shape[1]): 21 | f.write("%e\t" % x[t, i]) 22 | f.write("\n") 23 | f.close() 24 | 25 | 26 | def computeDist(a, b): 27 | La = a.shape[0] 28 | Lb = b.shape[0] 29 | dim = a.shape[1] 30 | sumaa = numpy.sum(a * a, axis=1, keepdims=True) 31 | sumbb = numpy.sum(b * b, axis=1, keepdims=True) 32 | D2 = numpy.dot(sumaa, numpy.ones((1, Lb), dtype=a.dtype)) - 2 * numpy.dot(a, b.T) + numpy.dot(numpy.ones((La, 1), dtype=a.dtype), sumbb.T) 33 | return D2 34 | 35 | 36 | def loadMatrixFloatText(fname, dtype="float64"): 37 | M = [] 38 | f = open(fname) 39 | for line in f: 40 | M.append([float(x) for x in line.strip().split()]) 41 | f.close() 42 | M = numpy.asarray(M, dtype=dtype) 43 | return M 44 | 45 | 46 | if __name__ == "__main__": 47 | K = 1024 48 | 49 | fnameIn = "data/keypoints-filt-deltas-norm.h5" 50 | fnameRep = "temp/repository-%d" % K 51 | 52 | hfIn = h5py.File(fnameIn, "r") 53 | 54 | iters = 20 55 | 56 | rep = numpy.zeros((K, 2 * 49), dtype="float32") 57 | 58 | keys = loadList("data/training.list") 59 | clear = loadList("data/clear.list") 60 | keys = [key for key in keys if key in clear] 61 | 62 | if True: 63 | print("0") 64 | sum0 = numpy.zeros((K, ), dtype="int64") 65 | sum1 = numpy.zeros(rep.shape, dtype="float32") 66 | i = 0 67 | for key in keys: 68 | x = numpy.array(hfIn.get(key)) 69 | for t in range(0, x.shape[0], 7): 70 | x_t = x[t] 71 | #i = random.randint(0, K - 1) 72 | sum0[i] = sum0[i] + 1 73 | sum1[i] = sum1[i] + x_t 74 | i = (i + 1) % K 75 | if i == 0: 76 | break 77 | if i == 0: 78 | break 79 | #break 80 | rep = sum1 / (sum0.reshape((K, 1)) + 1e-30) 81 | saveMatrix(fnameRep, rep) 82 | else: 83 | rep = loadMatrixFloatText(fnameRep, dtype="float32") 84 | 85 | for iter in range(iters): 86 | sume = 0 87 | sume0 = 0 88 | print(iter + 1) 89 | sum0 = numpy.zeros((K, ), dtype="int64") + 1 90 | sum1 = numpy.zeros(rep.shape, dtype="float32") + rep 91 | for key in keys: 92 | x = numpy.array(hfIn.get(key)) 93 | d = computeDist(x, rep) 94 | dm = numpy.min(d, axis=1) 95 | dargm = numpy.argmin(d, axis=1) 96 | cands = [(dm[i], dargm[i], i) for i in range(dm.shape[0])] 97 | 98 | for k in range(len(cands)): 99 | m, am, t = cands[k] 100 | sum0[am] = sum0[am] + 1 101 | sum1[am] = sum1[am] + x[t] 102 | sume = sume + m 103 | sume0 = sume0 + 1 104 | 105 | rep = (sum1 / (sum0.reshape((K, 1)) + 1e-30)) 106 | print(sum0) 107 | print(float(sume) / float(sume0)) 108 | 109 | saveMatrix(fnameRep, rep) 110 | 111 | hfIn.close() 112 | -------------------------------------------------------------------------------- /SLSynthesis/saveDeltas.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf8 -*- 2 | 3 | import re 4 | import os 5 | import shutil 6 | import codecs 7 | import math 8 | import copy 9 | import h5py 10 | 11 | import numpy 12 | 13 | 14 | dtypeFloat = "float32" 15 | dtypeInt = "int64" 16 | 17 | 18 | def saveMatrix(fname, x): 19 | f = open(fname, "w") 20 | for t in range(x.shape[0]): 21 | for i in range(x.shape[1]): 22 | f.write("%e\t" % x[t, i]) 23 | f.write("\n") 24 | f.close() 25 | 26 | 27 | def computeDeltas(x): 28 | 29 | deltas = ( 30 | (0, 1), 31 | (1, 2), 32 | (2, 3), 33 | (3, 4), 34 | (1, 5), 35 | (5, 6), 36 | (6, 7), 37 | 38 | (4, 29), 39 | 40 | (29, 30), 41 | (30, 31), 42 | (31, 32), 43 | (32, 33), 44 | 45 | (29, 34), 46 | (34, 35), 47 | (35, 36), 48 | (36, 37), 49 | 50 | (29, 38), 51 | (38, 39), 52 | (39, 40), 53 | (40, 41), 54 | 55 | (29, 42), 56 | (42, 43), 57 | (43, 44), 58 | (44, 45), 59 | 60 | (29, 46), 61 | (46, 47), 62 | (47, 48), 63 | (48, 49), 64 | 65 | (7, 8), 66 | 67 | (8, 9), 68 | (9, 10), 69 | (10, 11), 70 | (11, 12), 71 | 72 | (8, 13), 73 | (13, 14), 74 | (14, 15), 75 | (15, 16), 76 | 77 | (8, 17), 78 | (17, 18), 79 | (18, 19), 80 | (19, 20), 81 | 82 | (8, 21), 83 | (21, 22), 84 | (22, 23), 85 | (23, 24), 86 | 87 | (8, 25), 88 | (25, 26), 89 | (26, 27), 90 | (27, 28), 91 | ) 92 | 93 | T = x.shape[0] 94 | n = x.shape[1] / 3 95 | nd = len(deltas) 96 | y = numpy.zeros((T, 2 * nd), dtype=dtypeFloat) 97 | for t in range(T): 98 | for iDelta in range(nd): 99 | i = deltas[iDelta][0] 100 | j = deltas[iDelta][1] 101 | y[t, 2 * iDelta + 0] = x[t, 3 * i + 0] - x[t, 3 * j + 0] 102 | y[t, 2 * iDelta + 1] = x[t, 3 * i + 1] - x[t, 3 * j + 1] 103 | return y 104 | 105 | 106 | def walkDir(dname, filt = r".*"): 107 | result = [] 108 | for root, dnames, fnames in os.walk(dname): 109 | for fname in fnames: 110 | if re.search(filt, fname): 111 | foo = root + "/" + fname 112 | foo = re.sub(r"[/\\]+", "/", foo) 113 | result.append(foo) 114 | return result 115 | 116 | 117 | def loadRec(fname): 118 | rec = [] 119 | f = open(fname) 120 | for line in f: 121 | x = [] 122 | for x_i in line.strip().split(): 123 | x.append(float(x_i)) 124 | rec.append(x) 125 | f.close() 126 | return rec 127 | 128 | 129 | #def saveAllData(): 130 | # data = {} 131 | # for fnameInput in walkDir("../../data/keypoints/txt", r"\.txt$"): 132 | # print fnameInput 133 | # fnameClear = re.sub(r".*/", "", re.sub(r"\.[^\.]+$", "", fnameInput)) 134 | # rec = loadRec(fnameInput) 135 | # rec = numpy.asarray(rec, dtype=dtypeFloat) 136 | # deltas = computeDeltas(rec) 137 | # saveMatrix("../../data/keypoints/deltas/" + fnameClear + ".txt", deltas) 138 | 139 | 140 | def saveAllData(fnameIn, fnameOut): 141 | hfIn = h5py.File(fnameIn, "r") 142 | hfOut = h5py.File(fnameOut, "w") 143 | for key in hfIn.keys(): 144 | print(key) 145 | rec = numpy.array(hfIn.get(key)) 146 | deltas = computeDeltas(rec) 147 | hfOut.create_dataset(key, data=deltas, dtype=deltas.dtype) 148 | hfOut.close() 149 | hfIn.close() 150 | 151 | 152 | if __name__ == "__main__": 153 | #saveAllData() 154 | saveAllData("../../data/keypoints/keypoints-clean-skeleton.h5", "../../data/keypoints/keypoints-clean-skeleton-deltas.h5") 155 | -------------------------------------------------------------------------------- /SLSynthesis/saveMu.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf8 -*- 2 | 3 | import re 4 | import os 5 | import shutil 6 | import codecs 7 | import math 8 | import copy 9 | import h5py 10 | 11 | import numpy 12 | 13 | from utils import * 14 | 15 | 16 | def saveMatrix(fname, x): 17 | f = open(fname, "w") 18 | for t in range(x.shape[0]): 19 | for i in range(x.shape[1]): 20 | f.write("%e\t" % x[t, i]) 21 | f.write("\n") 22 | f.close() 23 | 24 | 25 | if __name__ == "__main__": 26 | list_training = loadList("data/training.list") 27 | hfIn = h5py.File("data/recs.h5", "r") 28 | sum0 = 0 29 | sum1 = 0.0 30 | sum2 = 0.0 31 | for key in hfIn: 32 | print("... reading %s" % key) 33 | x = numpy.array(hfIn.get(key)) 34 | sum0 = sum0 + x.shape[0] 35 | sum1 = sum1 + numpy.sum(x, axis = 0, keepdims = True) 36 | sum2 = sum2 + numpy.sum(x * x, axis = 0, keepdims = True) 37 | hfIn.close() 38 | m = sum1 / sum0 39 | m2 = sum2 / sum0 40 | s = numpy.sqrt(m2 - m * m) 41 | saveMatrix("temp/mu", m) 42 | -------------------------------------------------------------------------------- /SLSynthesis/saveNormSigmaPrintScale.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf8 -*- 2 | 3 | import re 4 | import os 5 | import shutil 6 | import codecs 7 | import math 8 | import copy 9 | import h5py 10 | 11 | import numpy 12 | 13 | 14 | dtypeFloat = "float64" 15 | dtypeInt = "int64" 16 | 17 | 18 | def saveMatrix(fname, x): 19 | f = open(fname, "w") 20 | for t in range(x.shape[0]): 21 | for i in range(x.shape[1]): 22 | f.write("%e\t" % x[t, i]) 23 | f.write("\n") 24 | f.close() 25 | 26 | 27 | def walkDir(dname, filt = r".*"): 28 | result = [] 29 | for root, dnames, fnames in os.walk(dname): 30 | for fname in fnames: 31 | if re.search(filt, fname): 32 | foo = root + "/" + fname 33 | foo = re.sub(r"[/\\]+", "/", foo) 34 | result.append(foo) 35 | return result 36 | 37 | 38 | def loadRec(fname): 39 | rec = [] 40 | f = open(fname) 41 | for line in f: 42 | x = [] 43 | for x_i in line.strip().split(): 44 | x.append(float(x_i)) 45 | rec.append(x) 46 | f.close() 47 | return rec 48 | 49 | 50 | def saveAllData(fnameIn, fnameOut): 51 | hfIn = h5py.File(fnameIn, "r") 52 | 53 | sum0 = 0 54 | sum1 = 0.0 55 | sum2 = 0.0 56 | for key in hfIn: 57 | print(key) 58 | x = numpy.array(hfIn.get(key)) 59 | sum0 = sum0 + x.shape[0] 60 | sum1 = sum1 + numpy.sum(x, axis = 0, keepdims = True) 61 | sum2 = sum2 + numpy.sum(x * x, axis = 0, keepdims = True) 62 | m = sum1 / sum0 63 | m2 = sum2 / sum0 64 | s = numpy.sqrt(m2 - m * m) 65 | sigma = numpy.mean(s) 66 | 67 | print("sigma = %e" % sigma) 68 | 69 | 70 | if __name__ == "__main__": 71 | ID = "keypoints-clean-skeleton-deltas" 72 | 73 | fnameIn = "../../data/keypoints/%s.h5" % ID 74 | fnameOut = "../../data/keypoints/%s-normSigma.h5" % ID 75 | 76 | saveAllData(fnameIn, fnameOut) 77 | -------------------------------------------------------------------------------- /SLSynthesis/temp/mu: -------------------------------------------------------------------------------- 1 | -4.752119e-01 -8.700225e+00 7.493736e+00 -1.064738e-01 3.763926e+00 -1.103222e+01 -5.453365e+00 6.044421e+00 -7.622200e+00 5.645945e-02 -4.035364e+00 -1.221669e+01 7.105147e+00 3.497971e+00 -5.583394e-01 3.719013e-01 -7.413567e-01 8.820624e-01 -8.554611e-01 1.154620e+00 -5.694094e-01 8.073070e-01 -2.837018e-01 4.931144e-01 -1.401927e+00 2.798605e+00 -9.484005e-01 6.579404e-01 -4.881163e-01 2.494248e-01 -2.827614e-01 1.871853e-01 -1.291132e+00 2.394697e+00 -1.074346e+00 4.961283e-01 -3.185437e-01 4.699163e-02 -7.316625e-02 9.193767e-02 -1.211229e+00 1.859478e+00 -9.764227e-01 4.008285e-01 -2.073916e-01 2.202991e-02 -1.810719e-02 1.025041e-01 -1.183830e+00 1.270156e+00 -7.010338e-01 2.365454e-01 -1.423412e-01 4.828411e-02 3.977561e-03 1.022625e-01 5.710982e-01 2.167279e-01 9.895887e-01 8.559687e-01 1.202048e+00 8.221900e-01 9.439814e-01 2.625810e-01 5.505453e-01 1.087417e-01 2.315435e+00 1.812145e+00 1.284644e+00 1.315839e-01 4.852837e-01 5.462589e-02 2.644291e-01 3.028526e-02 2.125611e+00 1.224558e+00 1.380610e+00 3.203324e-02 3.327233e-01 3.710161e-02 9.485423e-02 9.398672e-02 1.916867e+00 5.908335e-01 1.273579e+00 -1.448397e-02 2.752693e-01 6.643921e-02 7.428439e-02 1.240490e-01 1.729637e+00 -6.425930e-02 9.785548e-01 -7.221705e-02 2.339510e-01 6.714109e-02 9.914390e-02 1.337369e-01 2 | -------------------------------------------------------------------------------- /SLSynthesis/training.py: -------------------------------------------------------------------------------- 1 | import h5py 2 | import re 3 | import random 4 | import os 5 | import sys 6 | from datetime import datetime 7 | 8 | import numpy 9 | import tensorflow as tf 10 | import keras 11 | from keras.models import Sequential, Model 12 | from keras.layers import Dense 13 | from keras import backend as K 14 | from keras.layers import Layer, Input 15 | from keras.models import model_from_json 16 | 17 | from utils import * 18 | 19 | 20 | def make_dir_for_file(fname): 21 | path = re.sub(r"[^\\/]+$", "", fname) 22 | try: 23 | os.mkdir(path) 24 | except OSError: 25 | #print("Cannot create the directory %s" % path) 26 | pass; # Perhaps, the path exists. 27 | 28 | 29 | def save(fname_weights, model): 30 | #make_dir_for_file(fname_json) 31 | make_dir_for_file(fname_weights) 32 | # serialize model to JSON 33 | #model_json = model.to_json() 34 | #with open(fname_json, "w") as json_file: 35 | # json_file.write(model_json) 36 | # serialize weights to HDF5 37 | model.save_weights(fname_weights) 38 | 39 | 40 | def addDim(M): 41 | if len(M.shape) == 1: 42 | return M.reshape((1, M.shape[0])) 43 | return M.reshape((1, M.shape[0], M.shape[1])) 44 | 45 | 46 | def eval_dataset(list_eval, loader, model4output, model4example, funs_eval): 47 | sum0 = None 48 | sum1 = None 49 | example = None 50 | outputs = {} 51 | 52 | for key in list_eval: 53 | data = loader(key) 54 | inputs_values = data["inputs"] 55 | inputs_values = [addDim(x) for x in inputs_values] # 2D -> 3D 56 | ys_values = model4output.predict_on_batch(inputs_values) 57 | if not ((type(ys_values) is list) or (type(ys_values) is tuple)): 58 | ys_values = [ys_values] 59 | 60 | if sum0 is None: 61 | sum0 = [0 for foo in range(len(ys_values))] 62 | sum1 = [0.0 for foo in range(len(ys_values))] 63 | 64 | if example is None: 65 | if not model4example is None: 66 | example = model4example.predict_on_batch(inputs_values) 67 | 68 | for i in range(len(ys_values)): 69 | y_values = ys_values[i] 70 | tar_values = data["targets"][i] 71 | fun_eval = funs_eval[i] 72 | e, m, n = fun_eval(y_values, tar_values) 73 | sum0[i] = sum0[i] + m * n 74 | sum1[i] = sum1[i] + e 75 | if i == 0: 76 | outputs[key] = y_values 77 | errors = [] 78 | for i in range(len(sum0)): 79 | errors.append(sum1[i] / sum0[i]) 80 | return errors, example, outputs 81 | 82 | 83 | def prepare_data(key, loader, model4output, datamod): 84 | data = loader(key) 85 | data = datamod(data) 86 | inputs_values = data["inputs"] 87 | inputs_values = [addDim(x) for x in inputs_values] # 2D -> 3D 88 | tar_values = data["targets"] 89 | tar_values = [addDim(tar) for tar in tar_values] # 2D -> 3D 90 | return inputs_values, tar_values 91 | 92 | 93 | def training( 94 | model4training, 95 | model4output, 96 | model4example, 97 | loss, 98 | loader, 99 | list_training, 100 | list_stop, 101 | optimizer, 102 | funs_eval, 103 | testing_interval, 104 | fname_model_weights=None, 105 | fname_example=None, 106 | list_test=None, 107 | mask4print="%e", 108 | wloss=None, 109 | max_steps=None, 110 | datamod4training=lambda data: data, 111 | loader_test=None 112 | ): 113 | 114 | if loader_test is None: 115 | loader_test = loader 116 | 117 | if not fname_example is None: 118 | make_dir_for_file(fname_example) 119 | 120 | model4training.compile(loss=loss, loss_weights=wloss, optimizer=optimizer) 121 | 122 | random.shuffle(list_training) 123 | 124 | e_stop_min = None 125 | e_test_min = None 126 | i_step = 0 127 | outputs_test = None 128 | while True: 129 | 130 | # testing 131 | if i_step % testing_interval == 0: 132 | print("") 133 | print("testing (%d steps):" % i_step) 134 | print("") 135 | es_stop, example_stop, outputs_stop = eval_dataset(list_stop, loader_test, model4output, model4example, funs_eval) 136 | e_stop = es_stop[0] 137 | if not fname_example is None: 138 | saveMatrixText(fname_example, example_stop) 139 | if e_stop_min is None or e_stop_min >= e_stop: 140 | e_stop_min = e_stop 141 | if not list_test is None: 142 | es_test, example_test, outputs_test = eval_dataset(list_test, loader_test, model4output, model4example, funs_eval) 143 | e_test = es_test[0] 144 | e_test_min = e_test 145 | if not fname_model_weights is None: 146 | try: 147 | save(fname_model_weights, model4output) 148 | except OSError: 149 | print("OSError") 150 | pass 151 | print(" actual results:") 152 | print("") 153 | print(" e(stop) = %s" % "\t".join([mask4print % foo for foo in es_stop])) 154 | if not list_test is None: 155 | print(" e(test) = %s" % "\t".join([mask4print % foo for foo in es_test])) 156 | print("") 157 | print(" the best results:") 158 | print("") 159 | print((" e(stop) = " + mask4print) % e_stop_min) 160 | if not list_test is None: 161 | print((" e(test) = " + mask4print) % e_test_min) 162 | print("") 163 | sys.stdout.flush() 164 | 165 | i_key = i_step % len(list_training) 166 | key = list_training[i_key] 167 | inputs_values, tar_values = prepare_data(key, loader, model4output, datamod4training) 168 | 169 | model4training.fit(inputs_values, tar_values, epochs=1, batch_size=1, verbose=0) 170 | 171 | i_step = i_step + 1 172 | 173 | if (not max_steps is None) and (i_step >= max_steps): 174 | break 175 | 176 | print("OK") 177 | 178 | -------------------------------------------------------------------------------- /SLSynthesis/utils.py: -------------------------------------------------------------------------------- 1 | import h5py 2 | import re 3 | import random 4 | import os 5 | import codecs 6 | import sys 7 | from datetime import datetime 8 | 9 | import numpy 10 | 11 | 12 | def loadText(fname, cod="utf8"): 13 | f = codecs.open(fname, "r", cod) 14 | text = {} 15 | ws = {} 16 | for line in f: 17 | foo = line.strip().split() 18 | key = foo[0] 19 | sent = foo[1:len(foo)] 20 | for w in sent: 21 | if not w in ws: 22 | ws[w] = len(ws) 23 | sent_int = [ws[w] for w in sent] 24 | text[key] = numpy.asarray(sent_int, dtype="int32") 25 | f.close() 26 | return text, len(ws) 27 | 28 | 29 | def loadInfo(fname, dtype, cod="utf8"): 30 | f = codecs.open(fname, "r", cod) 31 | info = {} 32 | vals = {} 33 | for line in f: 34 | key, val = line.strip().split() 35 | if not val in vals: 36 | vals[val] = len(vals) 37 | info[key] = val 38 | f.close() 39 | for key in info.keys(): 40 | val = info[key] 41 | info[key] = numpy.zeros((1, len(vals), ), dtype=dtype) 42 | info[key][0, vals[val]] = 1 43 | return info, len(vals) 44 | 45 | 46 | def loadDenses(fname, names): 47 | result = [] 48 | hf = h5py.File(fname, "r") 49 | for name in names: 50 | #print(name) 51 | gr = hf.get(name)[name] 52 | w = gr["kernel:0"] 53 | w = numpy.array(w) 54 | result.append(w) 55 | if "bias:0" in gr: 56 | b = gr["bias:0"] 57 | b = numpy.array(b) 58 | result.append(b) 59 | #else: 60 | # b = numpy.zeros((w.shape[1], ), dtype=w.dtype) 61 | hf.close() 62 | return result 63 | 64 | 65 | def loadAllDenses(fname): 66 | result = [] 67 | hf = h5py.File(fname, "r") 68 | names = [] 69 | i = 1 70 | while "dense_%d" % i in hf: 71 | names.append("dense_%d" % i) 72 | i = i + 1 73 | for name in names: 74 | gr = hf.get(name)[name] 75 | w = gr["kernel:0"] 76 | w = numpy.array(w) 77 | result.append(w) 78 | if "bias:0" in gr: 79 | b = gr["bias:0"] 80 | b = numpy.array(b) 81 | result.append(b) 82 | #else: 83 | # b = numpy.zeros((w.shape[1], ), dtype=w.dtype) 84 | hf.close() 85 | return result 86 | 87 | 88 | def loadAllDenses2(fname, maxNum=None): 89 | result = [] 90 | hf = h5py.File(fname, "r") 91 | names = [] 92 | if maxNum is None: 93 | i = 1 94 | while "dense2_%d" % i in hf: 95 | names.append("dense2_%d" % i) 96 | i = i + 1 97 | else: 98 | for i in range(1, maxNum + 1): 99 | if "dense2_%d" % i in hf: 100 | names.append("dense2_%d" % i) 101 | for name in names: 102 | gr = hf.get(name)[name] 103 | w = gr["kernel:0"] 104 | w = numpy.array(w) 105 | result.append(w) 106 | if "bias:0" in gr: 107 | b = gr["bias:0"] 108 | b = numpy.array(b) 109 | result.append(b) 110 | #else: 111 | # b = numpy.zeros((w.shape[1], ), dtype=w.dtype) 112 | hf.close() 113 | return result 114 | 115 | 116 | def loadList(fname): 117 | f = open(fname) 118 | l = [line.strip() for line in f] 119 | f.close() 120 | return l 121 | 122 | 123 | def saveMatrixText(fname, x): 124 | f = open(fname, "w") 125 | for t in range(x.shape[0]): 126 | for i in range(x.shape[1]): 127 | f.write("%e\t" % x[t, i]) 128 | f.write("\n") 129 | f.close() 130 | 131 | 132 | def loadMatrixFloatText(fname, dtype="float64"): 133 | M = [] 134 | f = open(fname) 135 | for line in f: 136 | M.append([float(x) for x in line.strip().split()]) 137 | f.close() 138 | M = numpy.asarray(M, dtype=dtype) 139 | return M 140 | 141 | -------------------------------------------------------------------------------- /read.me: -------------------------------------------------------------------------------- 1 | 2 | 3 | Cite as: 4 | 5 | @InProceedings{Zelinka_2020_WACV, 6 | author = {Zelinka, Jan and Kanis, Jakub}, 7 | title = {Neural Sign Language Synthesis: Words Are Our Glosses}, 8 | booktitle = {The IEEE Winter Conference on Applications of Computer Vision (WACV)}, 9 | month = {March}, 10 | year = {2020} 11 | } 12 | -------------------------------------------------------------------------------- /wacv2020/README: -------------------------------------------------------------------------------- 1 | Neural Sign Language Synthesis: Words Are Our Glosses 2 | 3 | Scripts required the TensorFlow toolbox. The verified TensorFlow version is 1.15.0. 4 | 5 | This archive includes a small toy database (demo). JSON files are packed into a zip file. 6 | 7 | Our pipeline progress as follows: 8 | 1) Packing relevant (see variable idxsPose and idxsHand) keypoints stored as JSON files into a single H5 file. (see pipeline_demo_01_json2h5.py) 9 | 2) Using a filter to interpolate missing keypoints. This filter uses the described backpropagation-based 2D->3D skeleton transformation. (see pipeline_demo_02_filter.py) 10 | 3) Transforming absolute positions into vectors. (see pipeline_demo_03_deltas.py) 11 | 4) Normalizing standard deviance. (see pipeline_demo_04_norm.py) 12 | 5) Computing an average pose. (see pipeline_demo_05_mu.py) 13 | 6) Training a model. (pipeline_demo_06-train-model_FF_05.py) 14 | 15 | The described gradient-based pose reconstruction is used in the second step. The algorithm is implemented in the file pose3D.py. The described initialization is the function initialization in the file pose2Dto3D.py. Algorithms use skeletal structure defined in the script skeletalModel.py. 16 | 17 | Cite as: 18 | 19 | @InProceedings{Zelinka_2020_WACV, 20 | author = {Zelinka, Jan and Kanis, Jakub}, 21 | title = {Neural Sign Language Synthesis: Words Are Our Glosses}, 22 | booktitle = {The IEEE Winter Conference on Applications of Computer Vision (WACV)}, 23 | month = {March}, 24 | year = {2020} 25 | } 26 | -------------------------------------------------------------------------------- /wacv2020/accumulators.py: -------------------------------------------------------------------------------- 1 | import sys 2 | 3 | import tensorflow 4 | import tensorflow as tf 5 | 6 | import keras.backend as K 7 | from keras.legacy import interfaces 8 | from keras.optimizers import Optimizer 9 | 10 | 11 | def convert_to_accumulate_gradient_optimizer(orig_optimizer, update_params_frequency, accumulate_sum_or_mean=True): 12 | if update_params_frequency < 1: 13 | raise ValueError('update_params_frequency must be >= 1') 14 | print('update_params_frequency: %s' % update_params_frequency) 15 | print('accumulate_sum_or_mean: %s' % accumulate_sum_or_mean) 16 | orig_get_gradients = orig_optimizer.get_gradients 17 | orig_get_updates = orig_optimizer.get_updates 18 | accumulated_iterations = K.variable(0, dtype='int64', name='accumulated_iterations') 19 | orig_optimizer.accumulated_iterations = accumulated_iterations 20 | 21 | def updated_get_gradients(self, loss, params): 22 | return self.accumulate_gradient_accumulators 23 | 24 | def updated_get_updates(self, loss, params): 25 | self.accumulate_gradient_accumulators = [K.zeros(K.int_shape(p), dtype=K.dtype(p)) for p in params] 26 | updates_accumulated_iterations = K.update_add(accumulated_iterations, 1) 27 | new_grads = orig_get_gradients(loss, params) 28 | if not accumulate_sum_or_mean: 29 | new_grads = [g / K.cast(update_params_frequency, K.dtype(g)) for g in new_grads] 30 | self.updated_grads = [K.update_add(p, g) for p, g in zip(self.accumulate_gradient_accumulators, new_grads)] 31 | def update_function(): 32 | with tensorflow.control_dependencies(orig_get_updates(loss, params)): 33 | reset_grads = [K.update(p, K.zeros(K.int_shape(p), dtype=K.dtype(p))) for p in self.accumulate_gradient_accumulators] 34 | return tensorflow.group(*(reset_grads + [updates_accumulated_iterations])) 35 | def just_store_function(): 36 | return tensorflow.group(*[updates_accumulated_iterations]) 37 | 38 | update_switch = K.equal((updates_accumulated_iterations) % update_params_frequency, 0) 39 | 40 | with tensorflow.control_dependencies(self.updated_grads): 41 | self.updates = [K.switch(update_switch, update_function, just_store_function)] 42 | return self.updates 43 | 44 | orig_optimizer.get_gradients = updated_get_gradients.__get__(orig_optimizer, type(orig_optimizer)) 45 | orig_optimizer.get_updates = updated_get_updates.__get__(orig_optimizer, type(orig_optimizer)) 46 | 47 | 48 | class AdamAccumulate(Optimizer): 49 | 50 | def __init__(self, lr=0.001, beta_1=0.9, beta_2=0.999, epsilon=None, decay=0.0, amsgrad=False, accum_iters=1, **kwargs): 51 | if accum_iters < 1: 52 | raise ValueError('accum_iters must be >= 1') 53 | super(AdamAccumulate, self).__init__(**kwargs) 54 | with K.name_scope(self.__class__.__name__): 55 | self.iterations = K.variable(0, dtype='int64', name='iterations') 56 | self.lr_ = K.variable(value=lr, name='lr') 57 | self.beta_1 = K.variable(beta_1, name='beta_1') 58 | self.beta_2 = K.variable(beta_2, name='beta_2') 59 | self.decay = K.variable(decay, name='decay') 60 | if epsilon is None: 61 | epsilon = K.epsilon() 62 | self.epsilon = epsilon 63 | self.initial_decay = decay 64 | self.amsgrad = amsgrad 65 | self.accum_iters = K.variable(accum_iters, K.dtype(self.iterations)) 66 | self.accum_iters_float = K.cast(self.accum_iters, K.floatx()) 67 | 68 | @interfaces.legacy_get_updates_support 69 | def get_updates(self, loss, params): 70 | grads = self.get_gradients(loss, params) 71 | self.updates = [K.update_add(self.iterations, 1)] 72 | 73 | lr = self.lr_ 74 | 75 | completed_updates = K.cast(tf.floordiv(self.iterations, self.accum_iters), K.floatx()) 76 | 77 | if self.initial_decay > 0: 78 | lr = lr * (1. / (1. + self.decay * completed_updates)) 79 | 80 | t = completed_updates + 1 81 | 82 | lr_t = lr * (K.sqrt(1. - K.pow(self.beta_2, t)) / (1. - K.pow(self.beta_1, t))) 83 | 84 | # self.iterations incremented after processing a batch 85 | # batch: 1 2 3 4 5 6 7 8 9 86 | # self.iterations: 0 1 2 3 4 5 6 7 8 87 | # update_switch = 1: x x (if accum_iters=4) 88 | update_switch = K.equal((self.iterations + 1) % self.accum_iters, 0) 89 | update_switch = K.cast(update_switch, K.floatx()) 90 | 91 | ms = [K.zeros(K.int_shape(p), dtype=K.dtype(p)) for p in params] 92 | vs = [K.zeros(K.int_shape(p), dtype=K.dtype(p)) for p in params] 93 | gs = [K.zeros(K.int_shape(p), dtype=K.dtype(p)) for p in params] 94 | 95 | if self.amsgrad: 96 | vhats = [K.zeros(K.int_shape(p), dtype=K.dtype(p)) for p in params] 97 | else: 98 | vhats = [K.zeros((1, )) for _ in params] 99 | 100 | self.weights = [self.iterations] + ms + vs + vhats 101 | 102 | for p, g, m, v, vhat, tg in zip(params, grads, ms, vs, vhats, gs): 103 | 104 | sum_grad = tg + g 105 | #reest_grad = sum_grad 106 | avg_grad = sum_grad / self.accum_iters_float 107 | reest_grad = avg_grad 108 | 109 | m_t = (self.beta_1 * m) + (1. - self.beta_1) * reest_grad 110 | v_t = (self.beta_2 * v) + (1. - self.beta_2) * K.square(reest_grad) 111 | 112 | if self.amsgrad: 113 | vhat_t = K.maximum(vhat, v_t) 114 | p_t = p - lr_t * m_t / (K.sqrt(vhat_t) + self.epsilon) 115 | self.updates.append(K.update(vhat, (1 - update_switch) * vhat + update_switch * vhat_t)) 116 | else: 117 | p_t = p - lr_t * m_t / (K.sqrt(v_t) + self.epsilon) 118 | 119 | self.updates.append(K.update(m, (1 - update_switch) * m + update_switch * m_t)) 120 | self.updates.append(K.update(v, (1 - update_switch) * v + update_switch * v_t)) 121 | self.updates.append(K.update(tg, (1 - update_switch) * sum_grad)) 122 | new_p = p_t 123 | 124 | # Apply constraints. 125 | if getattr(p, 'constraint', None) is not None: 126 | new_p = p.constraint(new_p) 127 | 128 | self.updates.append(K.update(p, (1 - update_switch) * p + update_switch * new_p)) 129 | return self.updates 130 | 131 | def get_config(self): 132 | config = {'lr': float(K.get_value(self.lr_)), 133 | 'beta_1': float(K.get_value(self.beta_1)), 134 | 'beta_2': float(K.get_value(self.beta_2)), 135 | 'decay': float(K.get_value(self.decay)), 136 | 'epsilon': self.epsilon, 137 | 'amsgrad': self.amsgrad} 138 | base_config = super(AdamAccumulate, self).get_config() 139 | return dict(list(base_config.items()) + list(config.items())) 140 | -------------------------------------------------------------------------------- /wacv2020/data/demo/info/spk: -------------------------------------------------------------------------------- 1 | 20191005v v 2 | 20191006v v 3 | 20191007mz mz 4 | 20191008mz mz 5 | 20191009h h 6 | 20191010mz mz 7 | 20191011h h 8 | 20191012h h 9 | 20191013s s 10 | 20191014v v 11 | 20191015v v 12 | 20191016v v 13 | 20191017h h 14 | 20191018m m 15 | 20191019m m 16 | 20191020m m 17 | 20191021s s 18 | 20191022s s 19 | -------------------------------------------------------------------------------- /wacv2020/data/demo/keypoints/json.zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gopeith/SignLanguageProcessing/a312367087b5ebf01a435ef513db3fe80d8438a2/wacv2020/data/demo/keypoints/json.zip -------------------------------------------------------------------------------- /wacv2020/data/demo/keypoints/keypoints-01-raw.h5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gopeith/SignLanguageProcessing/a312367087b5ebf01a435ef513db3fe80d8438a2/wacv2020/data/demo/keypoints/keypoints-01-raw.h5 -------------------------------------------------------------------------------- /wacv2020/data/demo/lists/all.list: -------------------------------------------------------------------------------- 1 | 20191005v 2 | 20191006v 3 | 20191007mz 4 | 20191008mz 5 | 20191009h 6 | 20191010mz 7 | 20191011h 8 | 20191012h 9 | 20191013s 10 | 20191014v 11 | 20191015v 12 | 20191016v 13 | 20191017h 14 | 20191018m 15 | 20191019m 16 | 20191020m 17 | 20191021s 18 | 20191022s 19 | -------------------------------------------------------------------------------- /wacv2020/data/demo/lists/test.list: -------------------------------------------------------------------------------- 1 | 20191005v 2 | 20191022s 3 | -------------------------------------------------------------------------------- /wacv2020/data/demo/lists/train.list: -------------------------------------------------------------------------------- 1 | 20191006v 2 | 20191007mz 3 | 20191008mz 4 | 20191009h 5 | 20191010mz 6 | 20191011h 7 | 20191012h 8 | 20191013s 9 | 20191014v 10 | 20191015v 11 | 20191016v 12 | 20191017h 13 | 20191018m 14 | 20191019m 15 | 20191020m 16 | 20191021s 17 | -------------------------------------------------------------------------------- /wacv2020/dtw.py: -------------------------------------------------------------------------------- 1 | from datetime import datetime 2 | 3 | import numpy 4 | 5 | 6 | def min(l): 7 | m = None 8 | am = None 9 | for i in range(len(l)): 10 | if m is None or m > l[i]: 11 | m = l[i] 12 | am = i 13 | return m, am 14 | 15 | 16 | def min3(a, b, c): 17 | if a > b: 18 | if b > c: 19 | return c, 2 20 | return b, 1 21 | if a > c: 22 | return c, 2 23 | return a, 0 24 | 25 | 26 | def isOut(pa, pb, La, Lb, t=0.1): 27 | qa = pa / float(La) 28 | qb = pb / float(Lb) 29 | return qa - qb > t or qb - qa > t 30 | 31 | 32 | def dtw(a, b, maxDelta=1.0, D=None): 33 | dtype = a.dtype 34 | 35 | inf = 1e+30 36 | 37 | La = a.shape[0] 38 | Lb = b.shape[0] 39 | dim = a.shape[1] 40 | 41 | phi = inf + numpy.zeros((La, Lb), dtype=dtype) 42 | psi = -1 + numpy.zeros((La, Lb), dtype="int64") 43 | 44 | if D is None: 45 | # computing distance matrix 46 | sumaa = numpy.sum(a * a, axis=1, keepdims=True) 47 | sumbb = numpy.sum(b * b, axis=1, keepdims=True) 48 | D2 = numpy.dot(sumaa, numpy.ones((1, Lb), dtype=dtype)) - 2 * numpy.dot(a, b.T) + numpy.dot(numpy.ones((La, 1), dtype=dtype), sumbb.T) 49 | D = D2 50 | 51 | LbLa = float(Lb) / float(La) 52 | 53 | for ia in range(La): 54 | #for ib in range(Lb): 55 | ibMin = int(LbLa * (ia + 1) - maxDelta * Lb - 1) 56 | ibMax = int(LbLa * (ia + 1) + maxDelta * Lb - 1) 57 | if ibMin < 0: 58 | ibMin = 0 59 | if ibMax > Lb - 1: 60 | ibMax = Lb - 1 61 | for ib in range(ibMin, ibMax + 1): 62 | if ia == 0 and ib == 0: 63 | phi[ia, ib] = D[ia, ib] 64 | else: 65 | #delta = ((ia + 1.0) / float(La)) - ((ib + 1.0) / float(Lb)) 66 | #if delta > maxDelta or delta < -maxDelta: 67 | # continue 68 | if ia - 1 >= 0 and ib - 1 >= 0: 69 | if phi[ia, ib] > phi[ia - 1, ib - 1]: 70 | phi[ia, ib] = phi[ia - 1, ib - 1] 71 | psi[ia, ib] = 3 72 | if ia - 1 >= 0: 73 | if phi[ia, ib] > phi[ia - 1, ib]: 74 | phi[ia, ib] = phi[ia - 1, ib] 75 | psi[ia, ib] = 1 76 | if ib - 1 >= 0: 77 | if phi[ia, ib] > phi[ia, ib - 1]: 78 | phi[ia, ib] = phi[ia, ib - 1] 79 | psi[ia, ib] = 2 80 | if phi[ia, ib] < inf: 81 | phi[ia, ib] = phi[ia, ib] + D[ia, ib] 82 | 83 | 84 | point = (La - 1, Lb - 1) 85 | patha = [point[0]] 86 | pathb = [point[1]] 87 | while True: 88 | stepType = psi[point[0], point[1]] 89 | if stepType <= 0: 90 | break 91 | if stepType == 1: 92 | point = (point[0] - 1, point[1]) 93 | if stepType == 2: 94 | point = (point[0], point[1] - 1) 95 | if stepType == 3: 96 | point = (point[0] - 1, point[1] - 1) 97 | patha.insert(0, point[0]) 98 | pathb.insert(0, point[1]) 99 | 100 | return phi[La - 1, Lb - 1], patha, pathb 101 | 102 | 103 | 104 | def fakedtw(a, b, maxDelta="whatever"): 105 | if a.shape[0] > b.shape[0]: 106 | T = a.shape[0] 107 | else: 108 | T = b.shape[0] 109 | patha = numpy.zeros((T, ), dtype="int64") 110 | pathb = numpy.zeros((T, ), dtype="int64") 111 | for t in range(T): 112 | patha[t] = int(((a.shape[0] - 1) * t) / (T - 1)) 113 | pathb[t] = int(((b.shape[0] - 1) * t) / (T - 1)) 114 | return None, patha, pathb 115 | 116 | 117 | -------------------------------------------------------------------------------- /wacv2020/model_FF_05.py: -------------------------------------------------------------------------------- 1 | import h5py 2 | import re 3 | import random 4 | import math 5 | 6 | import numpy 7 | 8 | import tensorflow as tf 9 | import keras 10 | from keras.models import Sequential, Model 11 | from keras.layers import Dense, Add, Dropout 12 | from keras import backend as K 13 | from keras.layers import Layer, Input, Bidirectional, LSTM, GRU, Embedding, Multiply, BatchNormalization, Concatenate, Add, Lambda, Subtract 14 | from keras.utils import to_categorical 15 | 16 | from modeling import * 17 | from data import * 18 | from dtw import dtw, fakedtw 19 | from utils import * 20 | from training import * 21 | 22 | 23 | # filenames for weights, etc. are derived from this ID 24 | model_ID = "FF_05" 25 | 26 | dtype = "float32" 27 | 28 | n_steps = 7 # number of frames for each word 29 | dim_y = 98 # ouput size 30 | dim_emb = 64 # embeddings size 31 | dim_rep = 1024 # "repository" size 32 | dim_h = 512 # hidden layers size 33 | 34 | Nh = 5 # number of modules in the translator 35 | 36 | # make experiment deterministic 37 | random.seed(123) 38 | numpy.random.seed(123) 39 | 40 | if False: 41 | # avoiding using multiple CPUs (not always succesfully) and memory leaks in tensorflow 42 | conf = K.tf.ConfigProto( 43 | device_count={'CPU': 1}, 44 | intra_op_parallelism_threads=1, 45 | inter_op_parallelism_threads=1, 46 | ) 47 | K.set_session(K.tf.Session(config=conf)) 48 | 49 | # This function returns a dictionary with inputs and targets. 50 | # Keys "inputs" and "targets" are mandatory. 51 | # Values must be lists not tuples. The lists will be appended and modified when synchronization is performed. 52 | def loader(key): 53 | video = numpy.array(hfTar.get(key)) 54 | text_int = text[key] 55 | text_float = to_categorical(text_int, num_classes=dim_text) 56 | 57 | return { 58 | "inputs": [ 59 | text_float, 60 | info_spk[key], 61 | ], 62 | "targets": [ 63 | video for y in ys + ys_att 64 | ], 65 | "key": key, 66 | } 67 | 68 | 69 | # A function for tied layers. 70 | def get_dense(Lname, dim, stop, activation=lambda x:x, use_bias=True, init_weights=None): 71 | # create or use already created layer 72 | if not Lname in Ls: 73 | if use_bias: 74 | weights=[ws.pop(0), ws.pop(0)] if len(ws) > 0 else init_weights 75 | else: 76 | weights=[ws.pop(0)] if len(ws) > 0 else init_weights 77 | Ls[Lname] = Dense2(dim, activation=activation, use_bias=use_bias, weights=weights) 78 | L = Ls[Lname] 79 | L.stop = stop 80 | return L 81 | 82 | 83 | # "embedding" layer that code input language A 84 | def embA(x, stop=False, mod=""): 85 | x = ConcatenateMatrixAndVectors()([x] + addinf) 86 | L = get_dense("Aemb" + mod, dim_emb, stop, use_bias=False) 87 | x = L(x) 88 | x = LayerNormalization()(x) 89 | return x 90 | 91 | 92 | def translation_from_A_to_B(x, stop=False, mod="", Nh=0): 93 | 94 | for i in range(Nh): 95 | x_old = x 96 | x = Splicing([-1, 0, 1])(x) 97 | x = ConcatenateMatrixAndVectors()([x] + addinf) 98 | L = get_dense(("A_a[%d]" % i) + mod, dim_h, stop, activation=keras.activations.relu, use_bias=True) 99 | x = L(x) 100 | x = Dropout(rate=pDropout)(x) 101 | L = get_dense(("A_b[%d]" % i) + mod, dim_emb, stop, use_bias=True) 102 | x = L(x) 103 | x = Add()([x, x_old]) 104 | x = LayerNormalization()(x) 105 | 106 | return x 107 | 108 | 109 | # "deembedding" layer that decode output, ie. language B 110 | def invEmbB(x, stop=False, mod=""): 111 | x = ConcatenateMatrixAndVectors()([x] + addinf) 112 | L = get_dense("An_a" + mod, n_steps * dim_emb, stop, use_bias=True) 113 | x = L(x) 114 | x = MakeSteps2(nSteps=n_steps, dimy=dim_emb)(x) 115 | x = Splicing([-5, -4, -3, -2, -1, 0, 1, 2, 3, 4, 5])(x) 116 | x = ConcatenateMatrixAndVectors()([x] + addinf) 117 | L = get_dense("An_b" + mod, dim_rep, stop, activation=keras.activations.tanh, use_bias=True) 118 | x = L(x) 119 | L = get_dense("An_c" + mod, dim_y, stop, use_bias=True) 120 | x = L(x) 121 | x = ConstantBias(mu_values)(x) 122 | return x 123 | 124 | 125 | # dictionary for computed DTW synchronization 126 | saved_paths = {} 127 | 128 | # this function synchronizes outputs and targets 129 | # The synchronization is not provided every gradient computation but only 30% of time. 130 | # 70% of time the function reuses synchronization save in the dictionary saved_paths 131 | def synch(data): 132 | if data["key"] in saved_paths: 133 | paths = saved_paths[data["key"]] 134 | used_saved_paths = (random.randint(0, 100) <= 30) 135 | else: 136 | paths = [None for y_synch in ys_synch] 137 | used_saved_paths = True 138 | 139 | if not used_saved_paths: 140 | inputs_values = data["inputs"] 141 | inputs_values = [addDim(x) for x in inputs_values] # 2D -> 3D 142 | ys_values = model4synch.predict_on_batch(inputs_values) 143 | if not type(ys_values) is list: 144 | ys_values = [ys_values] 145 | else: 146 | T_y = n_steps * data["inputs"][0].shape[0] 147 | ys_values = [numpy.zeros((T_y, 1), dtype=dtype) for y_synch in ys_synch] 148 | 149 | for idx in range(len(ys_values)): 150 | i_out = idx 151 | i_tar = idx 152 | y_values = ys_values[i_out] 153 | tar_values = data["targets"][i_tar] 154 | 155 | if used_saved_paths: 156 | if paths[idx] is None: 157 | d, path_y, path_tar = fakedtw( 158 | y_values, 159 | tar_values, 160 | 0.1, 161 | ) 162 | else: 163 | d, path_y, path_tar = paths[idx] 164 | else: 165 | d, path_y, path_tar = dtw( 166 | y_values, 167 | tar_values, 168 | 0.1, 169 | ) 170 | 171 | paths[idx] = (d, path_y, path_tar) 172 | 173 | data["inputs"].append(numpy.asarray(path_y, dtype="int64")) 174 | data["targets"][i_tar] = data["targets"][i_tar][path_tar] 175 | 176 | saved_paths[data["key"]] = paths 177 | 178 | return data 179 | 180 | 181 | text, dim_text = loadText("data/demo/text/text") 182 | 183 | hfTar = h5py.File("data/demo/keypoints/keypoints-04-deltas-norm.h5", "r") 184 | 185 | info_spk, dim_spk = loadInfo("data/demo/info/spk", dtype=dtype) 186 | 187 | reg = keras.regularizers.l2(0.001) 188 | 189 | input_text = Input(batch_shape=(None, None, dim_text), dtype=dtype, name="textf") 190 | input_spk = Input(batch_shape=(None, None, dim_spk), dtype=dtype, name="spk") 191 | 192 | inputs = [input_text, input_spk] 193 | 194 | # reshaping 3D inputs into 2D arrays 195 | x_text = Lambda(lambda x: reshape_2d_fixed_dim(x, dim_text))(input_text) 196 | x_spk = Lambda(lambda x: reshape_2d_fixed_dim(x, dim_spk))(input_spk) 197 | addinf = [x_spk] 198 | 199 | 200 | # ws is a list of weights that is important when a new layer is added etc. 201 | #fname_weights = "models/FF-05.h5" 202 | fname_weights = None 203 | if fname_weights is None: 204 | # empty list means random initialization 205 | ws = [] 206 | else: 207 | # load weight from file 208 | ws = loadAllDenses2(fname_weights) 209 | 210 | # dropout rate 211 | pDropout = 0.1 212 | 213 | # mean of outputs 214 | mu_values = loadMatrixFloatText("temp/demo/mu", dtype=dtype) 215 | 216 | # dictionary of layers. Important for tied layers. 217 | Ls = {} 218 | 219 | # following model translates from language A into language B 220 | 221 | ybes = [] 222 | a = x_text 223 | ae = embA(a) 224 | ybe = ae 225 | ybes.append(ae) 226 | for ih in range(Nh): 227 | ybe = translation_from_A_to_B(ybe, Nh=Nh, mod="%d" % ih) 228 | ybes.append(ybe) 229 | 230 | # outputs 231 | ys = [] 232 | for i in range(len(ybes)): 233 | ybe = ybes[i] 234 | y = invEmbB(ybe, False, mod="%d" % i) 235 | ys.append(y) 236 | ys.reverse() 237 | 238 | # outputs trained by means of an attention mechanism 239 | ys_att = [] 240 | 241 | # outputs which will be synchronized 242 | ys_synch = ys 243 | 244 | inputs_dtw = [Input(batch_shape=(None, None), dtype="int64", name="dtw%d" % i) for i in range(len(ys))] 245 | 246 | # special layer synchronizing output (targers are syncronized outside models) 247 | ys_dtw = [UseDTW()([ys[i], inputs_dtw[i]]) for i in range(len(ys_synch))] 248 | 249 | # back into the 3D shapes 250 | make_it_3d = Lambda(lambda x: reshape_3d(x)) 251 | ys_3d = [make_it_3d(y) for y in ys] 252 | ys_dtw_3d = [make_it_3d(y) for y in ys_dtw] 253 | ys_att_3d = [make_it_3d(y) for y in ys_att] 254 | 255 | # these outpus will be trained 256 | model4training = Model(inputs=inputs+inputs_dtw, outputs=ys_dtw_3d+ys_att_3d) 257 | 258 | # these outputs will be evalueated 259 | model4output = Model(inputs=inputs, outputs=ys) 260 | 261 | # these outputs serves for DTW synchronization 262 | model4synch = Model(inputs=inputs, outputs=ys_synch) 263 | 264 | # this output will be saved for monitoring 265 | model4example = Model(inputs=inputs, outputs=[ys[0]]) 266 | -------------------------------------------------------------------------------- /wacv2020/modeling.py: -------------------------------------------------------------------------------- 1 | import math 2 | 3 | import numpy 4 | 5 | import tensorflow as tf 6 | 7 | import keras 8 | from keras.models import Sequential, Model 9 | from keras.layers import Dense, Concatenate, Layer, Input, concatenate 10 | from keras import backend as K 11 | 12 | from dtw import dtw 13 | 14 | 15 | def applyTimeShift(x, shift): 16 | xShape = K.shape(x) 17 | nMissing = K.minimum(abs(shift), xShape[0]) 18 | if shift == 0: 19 | y = x 20 | if shift > 0: 21 | yMain = x[shift:xShape[0], 0:xShape[1]] 22 | xT = x[(xShape[0] - 1):xShape[0], 0:xShape[1]] 23 | yAppendix = K.tile(xT, (nMissing, 1)) 24 | y = K.concatenate([yMain, yAppendix], axis=0) 25 | if shift < 0: 26 | yMain = x[0:K.maximum((xShape[0] + shift), 0), 0:xShape[1]] 27 | x0 = x[0:1, 0:xShape[1]] 28 | yAppendix = K.tile(x0, (nMissing, 1)) 29 | y = K.concatenate([yAppendix, yMain], axis=0) 30 | return y 31 | 32 | 33 | def applyTimeShifts(x, shifts, weights=None): 34 | y = [] 35 | i_weight = 0 36 | for shift in shifts: 37 | if weights is None: 38 | y.append(applyTimeShift(x, shift)) 39 | else: 40 | y.append(weights[i_weight] * applyTimeShift(x, shift)) 41 | i_weight = i_weight + 1 42 | y = K.concatenate(y, axis=1) 43 | return y 44 | 45 | 46 | class Splicing(Layer): 47 | 48 | def __init__(self, shifts, shiftWeights=None, **kwargs): 49 | self.shifts = shifts 50 | self.shiftWeights = shiftWeights 51 | super(Splicing, self).__init__(trainable=False, **kwargs) 52 | 53 | def build(self, input_shape): 54 | super(Splicing, self).build(input_shape) 55 | 56 | def call(self, x): 57 | return applyTimeShifts(x, self.shifts, self.shiftWeights) 58 | 59 | def compute_output_shape(self, inputs_shape): 60 | return (None, len(self.shifts) * inputs_shape[1]) 61 | 62 | 63 | class ConcatenateMatrixAndVectors(Layer): 64 | 65 | def __init__(self, **kwargs): 66 | super(ConcatenateMatrixAndVectors, self).__init__(trainable=False, **kwargs) 67 | 68 | def build(self, input_shape): 69 | super(ConcatenateMatrixAndVectors, self).build(input_shape) 70 | 71 | def call(self, x): 72 | m = x[0] 73 | mShape = K.shape(m) 74 | vs = x[1:len(x)] 75 | y = [m] 76 | for v in vs: 77 | vRep = K.tile(v, (mShape[0], 1)) 78 | y.append(vRep) 79 | y = K.concatenate(y, axis=1) 80 | return y 81 | 82 | def compute_output_shape(self, inputs_shape): 83 | T = inputs_shape[0][0] 84 | dim = inputs_shape[0][1] 85 | for i in range(1, len(inputs_shape)): 86 | dim = dim + inputs_shape[i][1] 87 | return (T, dim) 88 | 89 | 90 | class Dense2(Layer): 91 | 92 | def __init__(self, output_dim, use_bias=False, activation=lambda x:x, trainable=True, **kwargs): 93 | self.output_dim = output_dim 94 | self.activation = activation 95 | self.stop = False 96 | self.use_bias = use_bias 97 | self.trainable = trainable 98 | super(Dense2, self).__init__(**kwargs) 99 | 100 | def build(self, input_shape): 101 | self.kernel = self.add_weight(name='kernel', 102 | shape=(input_shape[1], self.output_dim), 103 | initializer='uniform', 104 | trainable=self.trainable) 105 | if self.use_bias: 106 | self.bias = self.add_weight(name='bias', 107 | shape=(self.output_dim, ), 108 | initializer='zeros', 109 | trainable=self.trainable) 110 | super(Dense2, self).build(input_shape) 111 | 112 | def call(self, x): 113 | if self.stop: 114 | kernel = K.stop_gradient(self.kernel) 115 | else: 116 | kernel = self.kernel 117 | if self.use_bias: 118 | if self.stop: 119 | b = K.stop_gradient(self.bias) 120 | else: 121 | b = self.bias 122 | else: 123 | b = 0 124 | return self.activation(K.dot(x, kernel) + b) 125 | 126 | def compute_output_shape(self, input_shape): 127 | return (input_shape[0], self.output_dim) 128 | 129 | 130 | class LayerNormalization(Layer): 131 | 132 | def __init__(self, eps=1e-6, **kwargs): 133 | self.eps = eps 134 | super(LayerNormalization, self).__init__(**kwargs) 135 | 136 | def build(self, input_shape): 137 | #self.gamma = self.add_weight(name='gamma', shape=input_shape[-1:], 138 | # initializer=Ones(), trainable=True) 139 | #self.beta = self.add_weight(name='beta', shape=input_shape[-1:], 140 | # initializer=Zeros(), trainable=True) 141 | super(LayerNormalization, self).build(input_shape) 142 | 143 | def call(self, x): 144 | mean = K.mean(x, axis=-1, keepdims=True) 145 | std = K.std(x, axis=-1, keepdims=True) 146 | #return self.gamma * (x - mean) / (std + self.eps) + self.beta 147 | return (x - mean) / (std + self.eps) 148 | 149 | def compute_output_shape(self, input_shape): 150 | return input_shape 151 | 152 | 153 | class MakeSteps2(Layer): 154 | 155 | def __init__(self, nSteps, dimy, **kwargs): 156 | self.nSteps = nSteps 157 | self.dimy = dimy 158 | super(MakeSteps2, self).__init__(trainable=False, **kwargs) 159 | 160 | def build(self, input_shape): 161 | super(MakeSteps2, self).build(input_shape) 162 | 163 | def call(self, x): 164 | dtype = x.dtype 165 | nSteps = self.nSteps 166 | xShape = K.shape(x) 167 | #xShape = K.print_tensor(xShape, "xShape") 168 | dim_y = self.dimy #K.cast(xShape[1] / nSteps, dtype="int32") 169 | O = K.zeros((1, dim_y), dtype=dtype) 170 | I = K.ones((1, dim_y), dtype=dtype) 171 | y = 0 172 | for i in range(nSteps): 173 | x_i = x[0:xShape[0], (i * dim_y):((i + 1) * dim_y)] 174 | x_i = K.repeat_elements(x_i, nSteps, axis=0) 175 | m = [] 176 | for j in range(nSteps): 177 | if j == i: 178 | m.append(I) 179 | else: 180 | m.append(O) 181 | m = K.concatenate(m, axis=0) 182 | m = K.tile(m, (xShape[0], 1)) 183 | y = y + m * x_i 184 | return y 185 | 186 | def compute_output_shape(self, inputs_shape): 187 | nSteps = self.nSteps 188 | dim_y = self.dimy 189 | return (None, dim_y) 190 | 191 | 192 | class ConstantBias(Layer): 193 | 194 | def __init__(self, bias, **kwargs): 195 | self.bias = bias 196 | super(ConstantBias, self).__init__(trainable=False, **kwargs) 197 | 198 | def build(self, input_shape): 199 | super(ConstantBias, self).build(input_shape) 200 | 201 | def call(self, x): 202 | y = x + self.bias 203 | return y 204 | 205 | def compute_output_shape(self, inputs_shape): 206 | return inputs_shape 207 | 208 | 209 | class UseDTW(Layer): 210 | 211 | def __init__(self, **kwargs): 212 | super(UseDTW, self).__init__(trainable=False, **kwargs) 213 | 214 | def build(self, input_shape): 215 | super(UseDTW, self).build(input_shape) 216 | 217 | def call(self, inputs): 218 | #return x[self.dtw_y] 219 | x, dtw_y = inputs 220 | y = K.gather(x, dtw_y) 221 | return y 222 | 223 | def compute_output_shape(self, inputs_shape): 224 | return (inputs_shape[1][0], inputs_shape[0][1]) 225 | 226 | 227 | def compute_D2(a, b, La, Lb): 228 | #print((a, b, La, Lb)) 229 | dtype = a.dtype 230 | #print(dtype) 231 | bT = K.transpose(b) 232 | sumaa = K.sum(a * a, axis=1, keepdims=True) 233 | sumbb = K.sum(b * b, axis=1, keepdims=True) 234 | #foo = K.ones((1, Lb), dtype=dtype) 235 | #D2 = K.dot(sumaa, foo) 236 | D2 = K.tile(sumaa, (1, Lb)) 237 | D2 = D2 - 2 * K.dot(a, bT) 238 | #D2 = D2 + K.dot(K.ones((La, 1), dtype=dtype), K.transpose(sumbb)) 239 | D2 = D2 + K.tile(K.transpose(sumbb), (La, 1)) 240 | #D2 = K.dot(sumaa, K.ones((1, Lb), dtype=dtype)) - 2 * K.dot(a, bT) + K.dot(K.ones((La, 1), dtype=dtype), K.transpose(sumbb)) 241 | D2 = K.abs(D2) 242 | return D2 243 | 244 | 245 | def makeM0(La, Lb, thr, dtype): 246 | #am = K.arange(La) / (La - 1) 247 | am = K.arange(0, La, 1) / (La - 1) 248 | am = K.reshape(am, (La, 1)) 249 | #bm = K.arange(Lb) / (Lb - 1) 250 | bm = K.arange(0, Lb, 1) / (Lb - 1) 251 | bm = K.reshape(bm, (Lb, 1)) 252 | am = K.cast(am, dtype) 253 | bm = K.cast(bm, dtype) 254 | D2 = compute_D2(am, bm, La, Lb) 255 | M0 = 0.5 * 1e+10 * (1.0 + K.sign(D2 - thr * thr)) 256 | return M0 257 | 258 | 259 | def normList(l): 260 | sum = 0.0 261 | for i in l: 262 | sum = sum + i 263 | return [i / sum for i in l] 264 | 265 | 266 | def compute_mask(La, Lb, dtype): 267 | am = K.arange(La) / (La - 1) 268 | am = K.reshape(am, (La, 1)) 269 | bm = K.arange(Lb) / (Lb - 1) 270 | bm = K.reshape(bm, (Lb, 1)) 271 | am = K.cast(am, dtype) 272 | bm = K.cast(bm, dtype) 273 | #M = 1.0 - compute_D2(am, bm, La, Lb) 274 | D2 = compute_D2(am, bm, La, Lb) 275 | M = K.exp(-4 * D2) 276 | if not M.dtype == dtype: 277 | M = K.cast(M, dtype) 278 | #threshold = 0.15 279 | #threshold2 = threshold * threshold 280 | #M = 0.5 + 0.5 * K.sign(threshold2 - D2) 281 | return M 282 | 283 | 284 | def lossAttMSE(y_true, y_pred): 285 | a = y_pred 286 | b = y_true 287 | 288 | aShape = K.shape(a) 289 | La = aShape[-2] 290 | dima = aShape[-1] 291 | dtype = a.dtype 292 | a = K.reshape(a, (La, dima)) 293 | 294 | bShape = K.shape(b) 295 | Lb = bShape[-2] 296 | dimb = bShape[-1] 297 | b = K.reshape(b, (Lb, dimb)) 298 | 299 | D2_MSE = compute_D2(a, b, La, Lb) 300 | 301 | shifts = [-3, -2, -1, 0, +1, +2, +3] 302 | wshifts = normList([0.125, 0.25, 0.5, 1.0, 0.5, 0.25, 0.125]) 303 | a = applyTimeShifts(a, shifts, wshifts) 304 | b = applyTimeShifts(b, shifts, wshifts) 305 | 306 | if not a.dtype == dtype: 307 | a = K.cast(a, dtype) 308 | b = K.cast(b, dtype) 309 | 310 | M0 = makeM0(La, Lb, 0.1, dtype) 311 | D2 = compute_D2(a, b, La, Lb) 312 | mD2M = -(1.0 * D2 + M0) 313 | M1 = K.softmax(mD2M) 314 | M2 = K.transpose(K.softmax(K.transpose(mD2M))) 315 | M = M1 + M2 316 | 317 | return K.sum(K.sum(D2_MSE * M)) / K.sum(K.sum(M)) 318 | 319 | 320 | def MSE(y_true, y_pred): 321 | a = y_pred 322 | b = y_true 323 | 324 | # dealing with 3d arrays 325 | aShape = K.shape(a) 326 | La = aShape[-2] 327 | dima = aShape[-1] 328 | dtype = a.dtype 329 | a = K.reshape(a, (La, dima)) 330 | bShape = K.shape(b) 331 | Lb = bShape[-2] 332 | dimb = bShape[-1] 333 | b = K.reshape(b, (Lb, dimb)) 334 | 335 | # dealing with different dtypes 336 | if not a.dtype == dtype: 337 | a = K.cast(a, dtype) 338 | b = K.cast(b, dtype) 339 | 340 | delta = a - b 341 | delta2 = delta * delta 342 | return K.mean(K.mean(delta2)) 343 | 344 | 345 | def compute_errorMSEDTW(y, tar, threshold=0.2): 346 | d, path_y, path_tar = dtw(y, tar, threshold) 347 | delta = y[path_y] - tar[path_tar] 348 | sum = numpy.sum(numpy.sum(delta * delta)) 349 | #return sum, len(path_y), y.shape[1] 350 | return sum, delta.shape[0], delta.shape[1] 351 | 352 | 353 | def reshape_2d_fixed_dim(x, dim): 354 | xShape = K.shape(x) 355 | return K.reshape(x, (xShape[-2], dim)) 356 | 357 | 358 | def reshape_3d(x): 359 | xShape = K.shape(x) 360 | return K.reshape(x, (1, xShape[-2], xShape[-1])) 361 | -------------------------------------------------------------------------------- /wacv2020/pipeline_demo_01_json2h5.py: -------------------------------------------------------------------------------- 1 | # Packing relevant (see variable idxsPose and idxsHand) keypoints stored as json files into a single h5 file. 2 | # Format: x, y, likelihood, x, y, likelihood, .... 3 | # x == 0, y == 0 means missing keypoint 4 | 5 | import codecs 6 | import math 7 | import re 8 | import os 9 | import json 10 | import h5py 11 | import subprocess 12 | 13 | import numpy 14 | from walkDir import walkDir 15 | 16 | 17 | def selectPoints(points, keepThis): 18 | points2 = [] 19 | for i in keepThis: 20 | points2.append(points[3 * i + 0]) 21 | points2.append(points[3 * i + 1]) 22 | points2.append(points[3 * i + 2]) 23 | return points2 24 | 25 | 26 | def noNones(l): 27 | l2 = [] 28 | for i in l: 29 | if not i is None: 30 | l2.append(i) 31 | return l2 32 | 33 | 34 | def loadData(dname): 35 | fnames = walkDir(dname = dname, filt = r"\.json$") 36 | fnames.sort() 37 | frames = [] 38 | for fname in fnames: 39 | p = re.search(r"([^\\/]+)_(\d+)_keypoints\.json$", fname) 40 | 41 | with open(fname) as json_data: 42 | data = json.load(json_data) 43 | if len(data["people"]) == 0: 44 | continue 45 | 46 | i = int(p.group(2)) 47 | while len(frames) < i + 1: 48 | frames.append(None) 49 | 50 | theTallest = data["people"][0] 51 | 52 | idxsPose = [0, 1, 2, 3, 4, 5, 6, 7] 53 | idxsHand = range(21) 54 | 55 | if theTallest is None: 56 | points = 3 * (len(idxsPose) + 2 * len(idxsHand)) * [0.0] 57 | else: 58 | pointsP = theTallest["pose_keypoints_2d"] 59 | pointsLH = theTallest["hand_left_keypoints_2d"] 60 | pointsRH = theTallest["hand_right_keypoints_2d"] 61 | pointsP = selectPoints(pointsP, idxsPose) 62 | pointsLH = selectPoints(pointsLH, idxsHand) 63 | pointsRH = selectPoints(pointsRH, idxsHand) 64 | points = pointsP + pointsLH + pointsRH 65 | 66 | if not points[0] == 0.0: 67 | frames[i] = points 68 | 69 | return numpy.asarray(noNones(frames), dtype="float32") 70 | 71 | 72 | if __name__ == "__main__": 73 | 74 | dnameIn = "data/demo/keypoints/json/" 75 | fnameOut = "data/demo/keypoints/keypoints-01-raw.h5" 76 | 77 | 78 | recs = {} 79 | for fname in walkDir(dnameIn, filt=r"\.[jJ][sS][oO][nN]$"): 80 | dname = re.sub(r"(.*)[/\\].*", r"\1", fname) 81 | key = re.sub(r".*[/\\]", "", dname) 82 | recs[key] = dname 83 | 84 | hf = h5py.File(fnameOut, "w") 85 | for key in recs: 86 | print(key) 87 | data = loadData(recs[key]) 88 | hf.create_dataset(key, data=data, dtype="float32") 89 | hf.close() 90 | -------------------------------------------------------------------------------- /wacv2020/pipeline_demo_02_filter.py: -------------------------------------------------------------------------------- 1 | # Using a filter to interpolate missing kezpoints. 2 | 3 | 4 | # standard 5 | import h5py 6 | 7 | # 3rd party 8 | import numpy 9 | 10 | # our own 11 | import skeletalModel 12 | import pose2D 13 | import pose2Dto3D 14 | import pose3D 15 | 16 | 17 | def convList2Array(lst): 18 | T, dim = lst[0].shape 19 | a = [] 20 | for t in range(T): 21 | a_t = [] 22 | for i in range(dim): 23 | for j in range(len(lst)): 24 | a_t.append(lst[j][t, i]) 25 | a.append(a_t) 26 | return numpy.asarray(a) 27 | 28 | 29 | def use_filter(x): 30 | structure = skeletalModel.getSkeletalModelStructure() 31 | 32 | inputSequence_2D = x 33 | 34 | # Decomposition of the single matrix into three matrices: x, y, w (=likelihood) 35 | X = inputSequence_2D 36 | Xx = X[0:X.shape[0], 0:(X.shape[1]):3] 37 | Xy = X[0:X.shape[0], 1:(X.shape[1]):3] 38 | Xw = X[0:X.shape[0], 2:(X.shape[1]):3] 39 | 40 | # Normalization of the picture (x and y axis has the same scale) 41 | Xx, Xy = pose2D.normalization(Xx, Xy) 42 | 43 | # Delete all skeletal models which have a lot of missing parts. 44 | Xx, Xy, Xw = pose2D.prune(Xx, Xy, Xw, (0, 1, 2, 3, 4, 5, 6, 7), 0.3, dtype) 45 | 46 | # Preliminary filtering: weighted linear interpolation of missing points. 47 | Xx, Xy, Xw = pose2D.interpolation(Xx, Xy, Xw, 0.99, dtype) 48 | 49 | # Initial 3D pose estimation 50 | lines0, rootsx0, rootsy0, rootsz0, anglesx0, anglesy0, anglesz0, Yx0, Yy0, Yz0 = pose2Dto3D.initialization( 51 | Xx, 52 | Xy, 53 | Xw, 54 | structure, 55 | 0.001, # weight for adding noise 56 | randomNubersGenerator, 57 | dtype, 58 | percentil=0.5, 59 | ) 60 | 61 | # Backpropagation-based filtering 62 | Yx, Yy, Yz = pose3D.backpropagationBasedFiltering( 63 | lines0, 64 | rootsx0, 65 | rootsy0, 66 | rootsz0, 67 | anglesx0, 68 | anglesy0, 69 | anglesz0, 70 | Xx, 71 | Xy, 72 | Xw, 73 | structure, 74 | dtype, 75 | nCycles=100, 76 | ) 77 | 78 | return convList2Array([Yx, Yy, Yz]) 79 | 80 | 81 | if __name__ == "__main__": 82 | 83 | 84 | 85 | fnameIn = "data/demo/keypoints/keypoints-01-raw.h5" 86 | fnameOut = "data/demo/keypoints/keypoints-02-filter.h5" 87 | 88 | dtype = "float32" 89 | randomNubersGenerator = numpy.random.RandomState(1234) 90 | 91 | 92 | hfIn = h5py.File(fnameIn, "r") 93 | hfOut = h5py.File(fnameOut, "w") 94 | for key in hfIn: 95 | print("") 96 | print("... processing '%s'" % key) 97 | print("") 98 | x = numpy.array(hfIn.get(key)) 99 | y = use_filter(x) 100 | hfOut.create_dataset(key, data=y, dtype=y.dtype) 101 | hfOut.close() 102 | hfIn.close() 103 | 104 | -------------------------------------------------------------------------------- /wacv2020/pipeline_demo_03_deltas.py: -------------------------------------------------------------------------------- 1 | # Transforming absolute positions into vectors (deltas). 2 | # See variable deltas = ( (start_joint, end_joint), ....) 3 | 4 | import re 5 | import os 6 | import shutil 7 | import codecs 8 | import math 9 | import copy 10 | import h5py 11 | 12 | import numpy 13 | 14 | 15 | dtypeFloat = "float32" 16 | dtypeInt = "int64" 17 | 18 | 19 | def saveMatrix(fname, x): 20 | f = open(fname, "w") 21 | for t in range(x.shape[0]): 22 | for i in range(x.shape[1]): 23 | f.write("%e\t" % x[t, i]) 24 | f.write("\n") 25 | f.close() 26 | 27 | 28 | def computeDeltas(x): 29 | 30 | deltas = ( 31 | (0, 1), 32 | (1, 2), 33 | (2, 3), 34 | (3, 4), 35 | (1, 5), 36 | (5, 6), 37 | (6, 7), 38 | 39 | (4, 29), 40 | 41 | (29, 30), 42 | (30, 31), 43 | (31, 32), 44 | (32, 33), 45 | 46 | (29, 34), 47 | (34, 35), 48 | (35, 36), 49 | (36, 37), 50 | 51 | (29, 38), 52 | (38, 39), 53 | (39, 40), 54 | (40, 41), 55 | 56 | (29, 42), 57 | (42, 43), 58 | (43, 44), 59 | (44, 45), 60 | 61 | (29, 46), 62 | (46, 47), 63 | (47, 48), 64 | (48, 49), 65 | 66 | (7, 8), 67 | 68 | (8, 9), 69 | (9, 10), 70 | (10, 11), 71 | (11, 12), 72 | 73 | (8, 13), 74 | (13, 14), 75 | (14, 15), 76 | (15, 16), 77 | 78 | (8, 17), 79 | (17, 18), 80 | (18, 19), 81 | (19, 20), 82 | 83 | (8, 21), 84 | (21, 22), 85 | (22, 23), 86 | (23, 24), 87 | 88 | (8, 25), 89 | (25, 26), 90 | (26, 27), 91 | (27, 28), 92 | ) 93 | 94 | T = x.shape[0] 95 | n = x.shape[1] / 3 96 | nd = len(deltas) 97 | y = numpy.zeros((T, 2 * nd), dtype=dtypeFloat) 98 | for t in range(T): 99 | for iDelta in range(nd): 100 | i = deltas[iDelta][0] 101 | j = deltas[iDelta][1] 102 | y[t, 2 * iDelta + 0] = x[t, 3 * i + 0] - x[t, 3 * j + 0] 103 | y[t, 2 * iDelta + 1] = x[t, 3 * i + 1] - x[t, 3 * j + 1] 104 | return y 105 | 106 | 107 | def walkDir(dname, filt = r".*"): 108 | result = [] 109 | for root, dnames, fnames in os.walk(dname): 110 | for fname in fnames: 111 | if re.search(filt, fname): 112 | foo = root + "/" + fname 113 | foo = re.sub(r"[/\\]+", "/", foo) 114 | result.append(foo) 115 | return result 116 | 117 | 118 | def loadRec(fname): 119 | rec = [] 120 | f = open(fname) 121 | for line in f: 122 | x = [] 123 | for x_i in line.strip().split(): 124 | x.append(float(x_i)) 125 | rec.append(x) 126 | f.close() 127 | return rec 128 | 129 | 130 | def saveAllData(fnameIn, fnameOut): 131 | hfIn = h5py.File(fnameIn, "r") 132 | hfOut = h5py.File(fnameOut, "w") 133 | for key in hfIn.keys(): 134 | print(key) 135 | rec = numpy.array(hfIn.get(key)) 136 | deltas = computeDeltas(rec) 137 | hfOut.create_dataset(key, data=deltas, dtype=deltas.dtype) 138 | hfOut.close() 139 | hfIn.close() 140 | 141 | 142 | if __name__ == "__main__": 143 | saveAllData( 144 | "data/demo/keypoints/keypoints-02-filter.h5", 145 | "data/demo/keypoints/keypoints-03-deltas.h5" 146 | ) 147 | -------------------------------------------------------------------------------- /wacv2020/pipeline_demo_04_norm.py: -------------------------------------------------------------------------------- 1 | # Normalizing standard deviance of all points (remember sigma if you want to transform it back to pixels) 2 | 3 | import re 4 | import os 5 | import shutil 6 | import codecs 7 | import math 8 | import copy 9 | import h5py 10 | 11 | import numpy 12 | 13 | 14 | dtypeFloat = "float64" 15 | dtypeInt = "int64" 16 | 17 | 18 | def saveMatrix(fname, x): 19 | f = open(fname, "w") 20 | for t in range(x.shape[0]): 21 | for i in range(x.shape[1]): 22 | f.write("%e\t" % x[t, i]) 23 | f.write("\n") 24 | f.close() 25 | 26 | 27 | def walkDir(dname, filt = r".*"): 28 | result = [] 29 | for root, dnames, fnames in os.walk(dname): 30 | for fname in fnames: 31 | if re.search(filt, fname): 32 | foo = root + "/" + fname 33 | foo = re.sub(r"[/\\]+", "/", foo) 34 | result.append(foo) 35 | return result 36 | 37 | 38 | def loadRec(fname): 39 | rec = [] 40 | f = open(fname) 41 | for line in f: 42 | x = [] 43 | for x_i in line.strip().split(): 44 | x.append(float(x_i)) 45 | rec.append(x) 46 | f.close() 47 | return rec 48 | 49 | 50 | def saveAllData(fnameIn, fnameOut): 51 | hfIn = h5py.File(fnameIn, "r") 52 | 53 | sum0 = 0 54 | sum1 = 0.0 55 | sum2 = 0.0 56 | for key in hfIn: 57 | #print(key) 58 | x = numpy.array(hfIn.get(key)) 59 | sum0 = sum0 + x.shape[0] 60 | sum1 = sum1 + numpy.sum(x, axis = 0, keepdims = True) 61 | sum2 = sum2 + numpy.sum(x * x, axis = 0, keepdims = True) 62 | m = sum1 / sum0 63 | m2 = sum2 / sum0 64 | s = numpy.sqrt(m2 - m * m) 65 | sigma = numpy.mean(s) 66 | 67 | print("sigma = %e" % sigma) 68 | 69 | hfOut = h5py.File(fnameOut, "w") 70 | for key in hfIn.keys(): 71 | #print(key) 72 | rec = numpy.array(hfIn.get(key)) 73 | rec = rec / sigma 74 | hfOut.create_dataset(key, data=rec, dtype=rec.dtype) 75 | hfOut.close() 76 | hfIn.close() 77 | 78 | 79 | if __name__ == "__main__": 80 | saveAllData( 81 | "data/demo/keypoints/keypoints-03-deltas.h5", 82 | "data/demo/keypoints/keypoints-04-deltas-norm.h5", 83 | ) 84 | -------------------------------------------------------------------------------- /wacv2020/pipeline_demo_05_mu.py: -------------------------------------------------------------------------------- 1 | # Computing an average pose 2 | 3 | import re 4 | import os 5 | import shutil 6 | import codecs 7 | import math 8 | import copy 9 | import h5py 10 | 11 | import numpy 12 | 13 | from utils import * 14 | 15 | 16 | def saveMatrix(fname, x): 17 | f = open(fname, "w") 18 | for t in range(x.shape[0]): 19 | for i in range(x.shape[1]): 20 | f.write("%e\t" % x[t, i]) 21 | f.write("\n") 22 | f.close() 23 | 24 | 25 | if __name__ == "__main__": 26 | list_training = loadList("data/demo/lists/train.list") 27 | hfIn = h5py.File("data/demo/keypoints/keypoints-04-deltas-norm.h5", "r") 28 | sum0 = 0 29 | sum1 = 0.0 30 | sum2 = 0.0 31 | for key in hfIn: 32 | print("... reading %s" % key) 33 | x = numpy.array(hfIn.get(key)) 34 | sum0 = sum0 + x.shape[0] 35 | sum1 = sum1 + numpy.sum(x, axis = 0, keepdims = True) 36 | sum2 = sum2 + numpy.sum(x * x, axis = 0, keepdims = True) 37 | hfIn.close() 38 | m = sum1 / sum0 39 | m2 = sum2 / sum0 40 | s = numpy.sqrt(m2 - m * m) 41 | saveMatrix("temp/demo/mu", m) 42 | -------------------------------------------------------------------------------- /wacv2020/pipeline_demo_06-train-model_FF_05.py: -------------------------------------------------------------------------------- 1 | import h5py 2 | import re 3 | import random 4 | import math 5 | 6 | import numpy 7 | 8 | import tensorflow as tf 9 | import keras 10 | from keras.models import Sequential, Model 11 | from keras.layers import Dense, Add, Dropout 12 | from keras import backend as K 13 | from keras.layers import Layer, Input, Bidirectional, LSTM, GRU, Embedding, Multiply, BatchNormalization, Concatenate, Add, Lambda, Subtract 14 | from keras.utils import to_categorical 15 | 16 | from modeling import * 17 | from data import * 18 | from training import * 19 | import accumulators 20 | from dtw import dtw, fakedtw 21 | from utils import * 22 | 23 | from model_FF_05 import * 24 | 25 | 26 | # file with the best (on stop set) results 27 | fname_model_weights = "models/demo/%s.h5" % model_ID 28 | 29 | # file with an example of the chosen output 30 | fname_example = "temp/demo/%s-example.txt" % model_ID 31 | 32 | list_training = loadList("data/demo/lists/train.list") 33 | list_stop = loadList("data/demo/lists/test.list") 34 | 35 | 36 | n_files_in_batch = len(list_training) 37 | if n_files_in_batch > len(list_training): 38 | n_files_in_batch = len(list_training) # carefull 39 | 40 | # how often is model evaluated using the stop set. 41 | # 1 means after every reestimation 42 | n_batches = 1 43 | 44 | # when stop training 45 | max_steps = 100 * len(list_training) 46 | 47 | optimizer = accumulators.AdamAccumulate(lr=0.001, decay=0.0, accum_iters=n_files_in_batch) 48 | 49 | wMSE = 1.0 # weight for MSE loss 50 | wATT = 1.0 / 98.0 # weight for our attention based loss loss 51 | 52 | training( 53 | model4training=model4training, 54 | model4output=model4output, 55 | model4example=model4example, 56 | datamod4training=synch, 57 | loss=len(ys)*[MSE]+len(ys_att)*[lossAttMSE], 58 | wloss=len(ys)*[wMSE]+len(ys_att)*[wATT], 59 | loader=loader, 60 | list_training=list_training, 61 | list_stop=list_stop, 62 | mask4print="%.4f", 63 | optimizer=optimizer, 64 | testing_interval=n_batches*n_files_in_batch, 65 | fname_example=fname_example, 66 | fname_model_weights=fname_model_weights, 67 | funs_eval=len(ys)*[compute_errorMSEDTW], 68 | max_steps=max_steps, 69 | ) 70 | 71 | hfTar.close() 72 | -------------------------------------------------------------------------------- /wacv2020/pipeline_demo_07-evaluation-model_FF_05.py: -------------------------------------------------------------------------------- 1 | import h5py 2 | import re 3 | import random 4 | import math 5 | 6 | import numpy 7 | 8 | import tensorflow as tf 9 | import keras 10 | from keras.models import Sequential, Model 11 | from keras.layers import Dense, Add, Dropout 12 | from keras import backend as K 13 | from keras.layers import Layer, Input, Bidirectional, LSTM, GRU, Embedding, Multiply, BatchNormalization, Concatenate, Add, Lambda, Subtract 14 | from keras.utils import to_categorical 15 | 16 | from modeling import * 17 | from data import * 18 | from training import * 19 | import accumulators 20 | from dtw import dtw, fakedtw 21 | from utils import * 22 | 23 | from model_FF_05 import * 24 | 25 | # loading model and evalueating on given set 26 | 27 | 28 | list_eval = loadList("data/demo/lists/test.list") 29 | 30 | fname_model_weights = "models/demo/%s.h5" % model_ID 31 | 32 | model4output.load_weights(fname_model_weights) 33 | 34 | errors, example, outputs = eval_dataset( 35 | list_eval, 36 | loader, 37 | model4output, 38 | model4example, 39 | funs_eval=len(ys)*[compute_errorMSEDTW] 40 | ) 41 | 42 | print(errors) 43 | -------------------------------------------------------------------------------- /wacv2020/pipeline_demo_XX_invDeltas.py: -------------------------------------------------------------------------------- 1 | # Converting from vectors (deltas) to absolute positions 2 | 3 | import re 4 | import os 5 | import shutil 6 | import codecs 7 | import math 8 | import copy 9 | import h5py 10 | 11 | import numpy 12 | 13 | 14 | dtypeFloat = "float32" 15 | dtypeInt = "int64" 16 | 17 | 18 | def computeInvDeltas(x): 19 | 20 | deltas = ( 21 | (0, 1), 22 | (1, 2), 23 | (2, 3), 24 | (3, 4), 25 | (1, 5), 26 | (5, 6), 27 | (6, 7), 28 | 29 | (4, 29), 30 | 31 | (29, 30), 32 | (30, 31), 33 | (31, 32), 34 | (32, 33), 35 | 36 | (29, 34), 37 | (34, 35), 38 | (35, 36), 39 | (36, 37), 40 | 41 | (29, 38), 42 | (38, 39), 43 | (39, 40), 44 | (40, 41), 45 | 46 | (29, 42), 47 | (42, 43), 48 | (43, 44), 49 | (44, 45), 50 | 51 | (29, 46), 52 | (46, 47), 53 | (47, 48), 54 | (48, 49), 55 | 56 | (7, 8), 57 | 58 | (8, 9), 59 | (9, 10), 60 | (10, 11), 61 | (11, 12), 62 | 63 | (8, 13), 64 | (13, 14), 65 | (14, 15), 66 | (15, 16), 67 | 68 | (8, 17), 69 | (17, 18), 70 | (18, 19), 71 | (19, 20), 72 | 73 | (8, 21), 74 | (21, 22), 75 | (22, 23), 76 | (23, 24), 77 | 78 | (8, 25), 79 | (25, 26), 80 | (26, 27), 81 | (27, 28), 82 | ) 83 | 84 | ndeltas = len(deltas) 85 | T, dim = x.shape 86 | n = int(dim / 2) 87 | 88 | X3 = numpy.zeros((T, 3 * (n + 1)), "float32") 89 | for t in range(T): 90 | 91 | for idelta in range(ndeltas): 92 | a, b = deltas[idelta] 93 | 94 | X3[t, 3 * b + 0] = X3[t, 3 * a + 0] - x[t, 2 * idelta + 0] 95 | X3[t, 3 * b + 1] = X3[t, 3 * a + 1] - x[t, 2 * idelta + 1] 96 | 97 | return X3 98 | 99 | 100 | def saveAllData(fnameIn, fnameOut): 101 | hfIn = h5py.File(fnameIn, "r") 102 | hfOut = h5py.File(fnameOut, "w") 103 | for key in hfIn.keys(): 104 | print(key) 105 | rec = numpy.array(hfIn.get(key)) 106 | deltas = computeInvDeltas(rec) 107 | hfOut.create_dataset(key, data=deltas, dtype=deltas.dtype) 108 | hfOut.close() 109 | hfIn.close() 110 | 111 | 112 | if __name__ == "__main__": 113 | saveAllData( 114 | "...h5", 115 | "...-invDeltas.h5" 116 | ) 117 | -------------------------------------------------------------------------------- /wacv2020/pose2D.py: -------------------------------------------------------------------------------- 1 | # standard 2 | import math 3 | 4 | # 3rd party 5 | import numpy 6 | 7 | 8 | def normalization(Xx, Xy): 9 | T, n = Xx.shape 10 | sum0 = T * n 11 | sum1Xx = numpy.sum(numpy.sum(Xx)) 12 | sum2Xx = numpy.sum(numpy.sum(Xx * Xx)) 13 | sum1Xy = numpy.sum(numpy.sum(Xy)) 14 | sum2Xy = numpy.sum(numpy.sum(Xy * Xy)) 15 | mux = sum1Xx / sum0 16 | muy = sum1Xy / sum0 17 | sum0 = 2 * sum0 18 | sum1 = sum1Xx + sum1Xy 19 | sum2 = sum2Xx + sum2Xy 20 | mu = sum1 / sum0 21 | sigma2 = (sum2 / sum0) - mu * mu 22 | if sigma2 < 1e-10: 23 | simga2 = 1e-10 24 | sigma = math.sqrt(sigma2) 25 | return (Xx - mux) / sigma, (Xy - muy) / sigma 26 | 27 | 28 | 29 | def prune(Xx, Xy, Xw, watchThis, threshold, dtype): 30 | T = Xw.shape[0] 31 | N = Xw.shape[1] 32 | Yx = numpy.zeros((T, N), dtype=dtype) 33 | Yy = numpy.zeros((T, N), dtype=dtype) 34 | Yw = numpy.zeros((T, N), dtype=dtype) 35 | for t in range(T): 36 | sum0 = 0 37 | sum1 = 0.0 38 | for i in watchThis: 39 | sum0 = sum0 + 1 40 | sum1 = sum1 + Xw[t, i] 41 | Ew = sum1 / sum0 42 | if Ew >= threshold: 43 | for i in range(N): 44 | Yx[t, i] = Xx[t, i] 45 | Yy[t, i] = Xy[t, i] 46 | Yw[t, i] = Xw[t, i] 47 | return Yx, Yy, Yw 48 | 49 | 50 | def interpolation(Xx, Xy, Xw, threshold, dtype): 51 | T = Xw.shape[0] 52 | N = Xw.shape[1] 53 | Yx = numpy.zeros((T, N), dtype=dtype) 54 | Yy = numpy.zeros((T, N), dtype=dtype) 55 | for t in range(T): 56 | for i in range(N): 57 | a1 = Xx[t, i] 58 | a2 = Xy[t, i] 59 | p = Xw[t, i] 60 | sumpa1 = p * a1 61 | sumpa2 = p * a2 62 | sump = p 63 | delta = 0 64 | while sump < threshold: 65 | change = False 66 | delta = delta + 1 67 | t2 = t + delta 68 | if t2 < T: 69 | a1 = Xx[t2, i] 70 | a2 = Xy[t2, i] 71 | p = Xw[t2, i] 72 | sumpa1 = sumpa1 + p * a1 73 | sumpa2 = sumpa2 + p * a2 74 | sump = sump + p 75 | change = True 76 | t2 = t - delta 77 | if t2 >= 0: 78 | a1 = Xx[t2, i] 79 | a2 = Xy[t2, i] 80 | p = Xw[t2, i] 81 | sumpa1 = sumpa1 + p * a1 82 | sumpa2 = sumpa2 + p * a2 83 | sump = sump + p 84 | change = True 85 | if not change: 86 | break 87 | if sump <= 0.0: 88 | sump = 1e-10 89 | Yx[t, i] = sumpa1 / sump 90 | Yy[t, i] = sumpa2 / sump 91 | return Yx, Yy, Xw 92 | -------------------------------------------------------------------------------- /wacv2020/pose2Dto3D.py: -------------------------------------------------------------------------------- 1 | # standard 2 | import math 3 | 4 | # 3rd party 5 | import numpy 6 | 7 | # our own 8 | import skeletalModel 9 | 10 | 11 | # add uniformly distributed noise 12 | def addNoise(x, rng, epsilon): 13 | e = numpy.asarray(rng.uniform(low=-epsilon, high=epsilon, size=x.shape), dtype="float32") 14 | return x + e 15 | 16 | 17 | def norm(x): 18 | result = 0.0 19 | for i in x: 20 | result = result + i * i 21 | return math.sqrt(result) 22 | 23 | 24 | # A simple percentile computation. 25 | # Warning: it sorts input list. 26 | def perc(lst, p): 27 | lst.sort() 28 | return lst[int(p * (len(lst) - 1))] 29 | 30 | 31 | # some substitution 32 | def computeB(ax, ay, az, tx, ty, L): 33 | hyps = [ 34 | [tx - ax, ty - ay, 0] 35 | ] 36 | foo = L**2 - (tx - ax)**2 - (ty - ay)**2; 37 | if foo >= 0: 38 | hyps.append([tx - ax, ty - ay, -math.sqrt(foo)]) 39 | hyps.append([tx - ax, ty - ay, +math.sqrt(foo)]) 40 | foo1 = ax**2 - 2*ax*tx + ay**2 - 2*ay*ty + tx**2 + ty**2 41 | foo2 = (1/foo1)**(1/2) 42 | foo3 = ay**3/foo1 + L*ay*foo2 - L*ty*foo2 + (ax**2*ay)/foo1 + (ay*tx**2)/foo1 + (ay*ty**2)/foo1 - (2*ay**2*ty)/foo1 - (2*ax*ay*tx)/foo1 43 | foo4 = ay**3/foo1 - L*ay*foo2 + L*ty*foo2 + (ax**2*ay)/foo1 + (ay*tx**2)/foo1 + (ay*ty**2)/foo1 - (2*ay**2*ty)/foo1 - (2*ax*ay*tx)/foo1 44 | xx1 = -(ax*ty - ay*tx - ax*foo3 + tx*foo3)/(ay - ty) 45 | xx2 = -(ax*ty - ay*tx - ax*foo4 + tx*foo4)/(ay - ty) 46 | xy1 = ay**3/foo1 + L*ay*foo2 - L*ty*foo2 + (ax**2*ay)/foo1 + (ay*tx**2)/foo1 + (ay*ty**2)/foo1 - (2*ay**2*ty)/foo1 - (2*ax*ay*tx)/foo1 47 | xy2 = ay**3/foo1 - L*ay*foo2 + L*ty*foo2 + (ax**2*ay)/foo1 + (ay*tx**2)/foo1 + (ay*ty**2)/foo1 - (2*ay**2*ty)/foo1 - (2*ax*ay*tx)/foo1 48 | if 0 * xx1 * xx2 * xy1 * xy2 == 0: 49 | hyps.append([xx1 - ax, xy1 - ay, 0]) 50 | hyps.append([xx2 - ax, xy2 - ay, 0]) 51 | angle = [1.0, 1.0, 0.0] 52 | Lmin = None 53 | for hypangle in hyps: 54 | normHypangle = norm(hypangle) + 1e-10 55 | xi = [ 56 | ax + L * hypangle[0] / normHypangle, 57 | ay + L * hypangle[1] / normHypangle, 58 | az + L * hypangle[2] / normHypangle, 59 | ] 60 | Li = (xi[0] - tx)**2 + (xi[1] - ty)**2; 61 | if Lmin is None or Lmin > Li: 62 | Lmin = Li 63 | angle = hypangle 64 | return angle 65 | 66 | 67 | # The initial 3D eastiamtion. It returns: 68 | # lines - logarithm of bone lengths 69 | # rootsx, rootsy, rootsz - 3D coordinates of head (root in the graph) positions 70 | # anglesx, anglesy, anglesz - absolute angles of limbs (3D coordinates) 71 | # Yx, Yy, Yz - resultant 3D coordinates 72 | def initialization(Xx, Xy, Xw, structure, sigma, randomNubersGenerator, dtype, percentil=0.5): 73 | T = Xx.shape[0] 74 | n = Xx.shape[1] 75 | 76 | nLines, nPoints = skeletalModel.structureStats(structure) 77 | 78 | lines = numpy.zeros((nLines, ), dtype=dtype) 79 | 80 | rootsx = Xx[0:T, 0] 81 | rootsy = Xy[0:T, 0] 82 | rootsz = numpy.zeros((T, ), dtype=dtype) 83 | 84 | rootsx = addNoise(rootsx, randomNubersGenerator, sigma) 85 | rootsy = addNoise(rootsy, randomNubersGenerator, sigma) 86 | rootsz = addNoise(rootsz, randomNubersGenerator, sigma) 87 | 88 | anglesx = numpy.zeros((T, len(structure)), dtype=dtype) 89 | anglesy = numpy.zeros((T, len(structure)), dtype=dtype) 90 | anglesz = numpy.zeros((T, len(structure)), dtype=dtype) 91 | 92 | Yx = numpy.zeros((T, n), dtype=dtype) 93 | Yy = numpy.zeros((T, n), dtype=dtype) 94 | Yz = numpy.zeros((T, n), dtype=dtype) 95 | Yx[0:T, 0] = rootsx 96 | Yy[0:T, 0] = rootsy 97 | Yz[0:T, 0] = rootsz 98 | 99 | Ls = {} 100 | for iBone in range(len(structure)): 101 | a, b, line = structure[iBone] 102 | if not line in Ls: 103 | Ls[line] = [] 104 | for t in range(T): 105 | ax = Xx[t, a] 106 | ay = Xy[t, a] 107 | bx = Xx[t, b] 108 | by = Xy[t, b] 109 | wa = Xw[t, a] 110 | wb = Xw[t, b] 111 | w = min([wa, wb]); 112 | L = norm([ax - bx, ay - by]) 113 | Ls[line].append(L) 114 | for i in range(len(lines)): 115 | lines[i] = math.log(perc(Ls[i], percentil)) 116 | 117 | for iBone in range(len(structure)): 118 | a, b, line = structure[iBone] 119 | L = math.exp(lines[line]); 120 | for t in range(T): 121 | ax = Yx[t, a] 122 | ay = Yy[t, a] 123 | az = Yz[t, a] 124 | 125 | tx = Xx[t, b] 126 | ty = Xy[t, b] 127 | 128 | anglex, angley, anglez = computeB(ax, ay, az, tx, ty, L) 129 | if not 0.0 * anglex == 0.0: # no inf or nan 130 | anglex = 0.0 131 | if not 0.0 * angley == 0.0: # no inf or nan 132 | angley = 0.0 133 | if not 0.0 * anglez == 0.0: # no inf or nan 134 | anglez = 0.0 135 | if anglex == 0.0 and angley == 0.0 and anglez == 0.0: 136 | anglex = 1.0 137 | angley = 1.0 138 | anglez = 1.0 139 | if anglez < 0.0: 140 | anglez = -anglez 141 | 142 | anglez = anglez + 0.001 143 | 144 | normAngle = math.sqrt(anglex * anglex + angley * angley + anglez * anglez) + 1e-10 145 | anglesx[t, iBone] = anglex / normAngle 146 | anglesy[t, iBone] = angley / normAngle 147 | anglesz[t, iBone] = anglez / normAngle 148 | 149 | for t in range(T): 150 | Yx[t, b] = Yx[t, a] + L * anglesx[t, iBone] 151 | Yy[t, b] = Yy[t, a] + L * anglesy[t, iBone] 152 | Yz[t, b] = Yz[t, a] + L * anglesz[t, iBone] 153 | 154 | rootsx = rootsx.reshape((rootsx.shape[0], 1)) 155 | rootsy = rootsy.reshape((rootsy.shape[0], 1)) 156 | rootsz = rootsz.reshape((rootsz.shape[0], 1)) 157 | 158 | return lines, rootsx, rootsy, rootsz, anglesx, anglesy, anglesz, Yx, Yy, Yz 159 | -------------------------------------------------------------------------------- /wacv2020/pose3D.py: -------------------------------------------------------------------------------- 1 | # standard 2 | 3 | # 3rd party 4 | import numpy 5 | import tensorflow as tf 6 | 7 | # our own 8 | import skeletalModel 9 | 10 | 11 | def backpropagationBasedFiltering( 12 | lines0_values, # initial (logarithm of) bones lenghts 13 | rootsx0_values, # head position 14 | rootsy0_values, 15 | rootsz0_values, 16 | anglesx0_values, # angles of limbs 17 | anglesy0_values, 18 | anglesz0_values, 19 | tarx_values, # target 20 | tary_values, 21 | w_values, # weights of estimated points (likelihood of estimation) 22 | structure, 23 | dtype, 24 | learningRate=0.1, 25 | nCycles=1000, 26 | regulatorRates=[0.001, 0.1], 27 | ): 28 | 29 | T = rootsx0_values.shape[0] 30 | nBones, nPoints = skeletalModel.structureStats(structure) 31 | nLimbs = len(structure) 32 | 33 | # vector of (logarithm of) bones length 34 | # shape: (nLines,) 35 | lines = tf.Variable(lines0_values, dtype=dtype) 36 | # x cooordinates of head 37 | # shape: (T, 1) 38 | rootsx = tf.Variable(rootsx0_values, dtype=dtype) 39 | # y cooordinates of head 40 | rootsy = tf.Variable(rootsy0_values, dtype=dtype) 41 | # z cooordinates of head 42 | rootsz = tf.Variable(rootsz0_values, dtype=dtype) 43 | # x coordinate of angles 44 | # shape: (T, nLimbs) 45 | anglesx = tf.Variable(anglesx0_values, dtype=dtype) 46 | # y coordinate of angles 47 | anglesy = tf.Variable(anglesy0_values, dtype=dtype) 48 | # z coordinate of angles 49 | anglesz = tf.Variable(anglesz0_values, dtype=dtype) 50 | 51 | # target 52 | # shape: (T, nPoints) 53 | tarx = tf.placeholder(dtype=dtype) 54 | tary = tf.placeholder(dtype=dtype) 55 | # likelihood from previous pose estimator 56 | # shape: (T, nPoints) 57 | w = tf.placeholder(dtype=dtype) 58 | 59 | # resultant coordinates. It's a list for now. It will be concatenate into a matrix later 60 | # shape: (T, nPoints) 61 | x = [None for i in range(nPoints)] 62 | y = [None for i in range(nPoints)] 63 | z = [None for i in range(nPoints)] 64 | 65 | # head first 66 | x[0] = rootsx 67 | y[0] = rootsy 68 | z[0] = rootsz 69 | 70 | # now other limbs 71 | i = 0 72 | # for numerical stability of angles normalization 73 | epsilon = 1e-10 74 | for a, b, l in structure: 75 | # limb length 76 | L = tf.exp(lines[l]) 77 | # angle 78 | Ax = anglesx[0:T, i:(i + 1)] 79 | Ay = anglesy[0:T, i:(i + 1)] 80 | Az = anglesz[0:T, i:(i + 1)] 81 | # angle norm 82 | normA = tf.sqrt(tf.square(Ax) + tf.square(Ay) + tf.square(Az)) + epsilon 83 | # new joint position 84 | x[b] = x[a] + L * Ax / normA 85 | y[b] = y[a] + L * Ay / normA 86 | z[b] = z[a] + L * Az / normA 87 | i = i + 1 88 | 89 | # making a matrix from the list 90 | x = tf.concat(x, axis=1) 91 | y = tf.concat(y, axis=1) 92 | z = tf.concat(z, axis=1) 93 | 94 | # weighted MSE 95 | loss = tf.reduce_sum(w * tf.square(x - tarx) + w * tf.square(y - tary)) / (T * nPoints) 96 | 97 | # regularozators 98 | # reg1 is a sum of bones length 99 | reg1 = tf.reduce_sum(tf.exp(lines)) 100 | # reg2 is a square of trajectory length 101 | dx = x[0:(T - 1), 0:nPoints] - x[1:T, 0:nPoints] 102 | dy = y[0:(T - 1), 0:nPoints] - y[1:T, 0:nPoints] 103 | dz = z[0:(T - 1), 0:nPoints] - z[1:T, 0:nPoints] 104 | reg2 = tf.reduce_sum(tf.square(dx) + tf.square(dy) + tf.square(dz)) / ((T - 1) * nPoints) 105 | 106 | optimizeThis = loss + regulatorRates[0] * reg1 + regulatorRates[1] * reg2 107 | 108 | # the backpropagation 109 | optimizer = tf.train.GradientDescentOptimizer(learningRate) 110 | train = optimizer.minimize(optimizeThis) 111 | init = tf.variables_initializer(tf.global_variables()) 112 | sess = tf.Session() 113 | sess.run(init) 114 | for iCycle in range(nCycles): 115 | sess.run(train, {tarx: tarx_values, tary: tary_values, w: w_values}) 116 | print("iCycle = %3d, loss = %e" % (iCycle, sess.run([loss], {tarx: tarx_values, tary: tary_values, w: w_values})[0])) 117 | 118 | # returning final coordinates 119 | return sess.run([x, y, z], {}) 120 | 121 | 122 | 123 | if __name__ == "__main__": 124 | # debug - don't run it 125 | 126 | # 127 | # (0) 128 | # | 129 | # | 130 | # 0 131 | # | 132 | # | 133 | # (2)--1--(1)--1--(3) 134 | # 135 | structure = ( 136 | (0, 1, 0), 137 | (1, 2, 1), 138 | (1, 3, 1), 139 | ) 140 | 141 | T = 3 142 | nBones, nPoints = skeletalModel.structureStats(structure) 143 | nLimbs = len(structure) 144 | 145 | dtype = "float32" 146 | 147 | lines0_values = numpy.zeros((nBones, ), dtype=dtype) 148 | rootsx0_values = numpy.ones((T, 1), dtype=dtype) 149 | rootsy0_values = numpy.ones((T, 1), dtype=dtype) 150 | rootsz0_values = numpy.ones((T, 1), dtype=dtype) 151 | anglesx0_values = numpy.ones((T, nLimbs), dtype=dtype) 152 | anglesy0_values = numpy.ones((T, nLimbs), dtype=dtype) 153 | anglesz0_values = numpy.ones((T, nLimbs), dtype=dtype) 154 | 155 | w_values = numpy.ones((T, nPoints), dtype=dtype) 156 | 157 | tarx_values = numpy.ones((T, nPoints), dtype=dtype) 158 | tary_values = numpy.ones((T, nPoints), dtype=dtype) 159 | 160 | x_values, y_values, z_values = backpropagationBasedFiltering( 161 | lines0_values, 162 | rootsx0_values, 163 | rootsy0_values, 164 | rootsz0_values, 165 | anglesx0_values, 166 | anglesy0_values, 167 | anglesz0_values, 168 | tarx_values, 169 | tary_values, 170 | w_values, 171 | structure, 172 | dtype, 173 | ) 174 | 175 | -------------------------------------------------------------------------------- /wacv2020/skeletalModel.py: -------------------------------------------------------------------------------- 1 | # This function gives our structure of skeletal model 2 | 3 | 4 | def getSkeletalModelStructure(): 5 | # Definition of skeleton model structure: 6 | # The structure is an n-tuple of: 7 | # 8 | # (index of a start point, index of an end point, index of a bone) 9 | # 10 | # E.g., this simple skeletal model 11 | # 12 | # (0) 13 | # | 14 | # | 15 | # 0 16 | # | 17 | # | 18 | # (2)--1--(1)--1--(3) 19 | # | | 20 | # | | 21 | # 2 2 22 | # | | 23 | # | | 24 | # (4) (5) 25 | # 26 | # has this structure: 27 | # 28 | # ( 29 | # (0, 1, 0), 30 | # (1, 2, 1), 31 | # (1, 3, 1), 32 | # (2, 4, 2), 33 | # (3, 5, 2), 34 | # ) 35 | # 36 | # Warning 1: The structure has to be a tree. 37 | # 38 | # Warning 2: The order isn't random. The order is from a root to lists. 39 | # 40 | 41 | return ( 42 | # head 43 | (0, 1, 0), 44 | 45 | # left shoulder 46 | (1, 2, 1), 47 | 48 | # left arm 49 | (2, 3, 2), 50 | (3, 4, 3), 51 | 52 | # right shoulder 53 | (1, 5, 1), 54 | 55 | # right arm 56 | (5, 6, 2), 57 | (6, 7, 3), 58 | 59 | # left hand - wrist 60 | (7, 8, 4), 61 | 62 | # left hand - palm 63 | (8, 9, 5), 64 | (8, 13, 9), 65 | (8, 17, 13), 66 | (8, 21, 17), 67 | (8, 25, 21), 68 | 69 | # left hand - 1st finger 70 | (9, 10, 6), 71 | (10, 11, 7), 72 | (11, 12, 8), 73 | 74 | # left hand - 2nd finger 75 | (13, 14, 10), 76 | (14, 15, 11), 77 | (15, 16, 12), 78 | 79 | # left hand - 3rd finger 80 | (17, 18, 14), 81 | (18, 19, 15), 82 | (19, 20, 16), 83 | 84 | # left hand - 4th finger 85 | (21, 22, 18), 86 | (22, 23, 19), 87 | (23, 24, 20), 88 | 89 | # left hand - 5th finger 90 | (25, 26, 22), 91 | (26, 27, 23), 92 | (27, 28, 24), 93 | 94 | # right hand - wrist 95 | (4, 29, 4), 96 | 97 | # right hand - palm 98 | (29, 30, 5), 99 | (29, 34, 9), 100 | (29, 38, 13), 101 | (29, 42, 17), 102 | (29, 46, 21), 103 | 104 | # right hand - 1st finger 105 | (30, 31, 6), 106 | (31, 32, 7), 107 | (32, 33, 8), 108 | 109 | # right hand - 2nd finger 110 | (34, 35, 10), 111 | (35, 36, 11), 112 | (36, 37, 12), 113 | 114 | # right hand - 3rd finger 115 | (38, 39, 14), 116 | (39, 40, 15), 117 | (40, 41, 16), 118 | 119 | # right hand - 4th finger 120 | (42, 43, 18), 121 | (43, 44, 19), 122 | (44, 45, 20), 123 | 124 | # right hand - 5th finger 125 | (46, 47, 22), 126 | (47, 48, 23), 127 | (48, 49, 24), 128 | ) 129 | 130 | 131 | # Computing number of joints and limbs 132 | def structureStats(structure): 133 | ps = {} 134 | ls = {} 135 | for a, b, l in structure: 136 | ps[a] = "gotcha" 137 | ps[b] = "gotcha" 138 | ls[l] = "gotcha" 139 | return len(ls), len(ps) 140 | -------------------------------------------------------------------------------- /wacv2020/training.py: -------------------------------------------------------------------------------- 1 | import h5py 2 | import re 3 | import random 4 | import os 5 | import sys 6 | from datetime import datetime 7 | 8 | import numpy 9 | import tensorflow as tf 10 | import keras 11 | from keras.models import Sequential, Model 12 | from keras.layers import Dense 13 | from keras import backend as K 14 | from keras.layers import Layer, Input 15 | from keras.models import model_from_json 16 | 17 | from utils import * 18 | 19 | 20 | def make_dir_for_file(fname): 21 | path = re.sub(r"[^\\/]+$", "", fname) 22 | try: 23 | os.mkdir(path) 24 | except OSError: 25 | #print("Cannot create the directory %s" % path) 26 | pass; # Perhaps, the path exists. 27 | 28 | 29 | def save(fname_weights, model): 30 | #make_dir_for_file(fname_json) 31 | make_dir_for_file(fname_weights) 32 | # serialize model to JSON 33 | #model_json = model.to_json() 34 | #with open(fname_json, "w") as json_file: 35 | # json_file.write(model_json) 36 | # serialize weights to HDF5 37 | model.save_weights(fname_weights) 38 | 39 | 40 | def addDim(M): 41 | if len(M.shape) == 1: 42 | return M.reshape((1, M.shape[0])) 43 | return M.reshape((1, M.shape[0], M.shape[1])) 44 | 45 | 46 | def eval_dataset(list_eval, loader, model4output, model4example, funs_eval): 47 | sum0 = None 48 | sum1 = None 49 | example = None 50 | outputs = {} 51 | 52 | for key in list_eval: 53 | data = loader(key) 54 | inputs_values = data["inputs"] 55 | inputs_values = [addDim(x) for x in inputs_values] # 2D -> 3D 56 | ys_values = model4output.predict_on_batch(inputs_values) 57 | if not ((type(ys_values) is list) or (type(ys_values) is tuple)): 58 | ys_values = [ys_values] 59 | 60 | if sum0 is None: 61 | sum0 = [0 for foo in range(len(ys_values))] 62 | sum1 = [0.0 for foo in range(len(ys_values))] 63 | 64 | if example is None: 65 | if not model4example is None: 66 | example = model4example.predict_on_batch(inputs_values) 67 | 68 | for i in range(len(ys_values)): 69 | y_values = ys_values[i] 70 | tar_values = data["targets"][i] 71 | fun_eval = funs_eval[i] 72 | e, m, n = fun_eval(y_values, tar_values) 73 | sum0[i] = sum0[i] + m * n 74 | sum1[i] = sum1[i] + e 75 | if i == 0: 76 | outputs[key] = y_values 77 | errors = [] 78 | for i in range(len(sum0)): 79 | errors.append(sum1[i] / sum0[i]) 80 | return errors, example, outputs 81 | 82 | 83 | def prepare_data(key, loader, model4output, datamod): 84 | data = loader(key) 85 | data = datamod(data) 86 | inputs_values = data["inputs"] 87 | inputs_values = [addDim(x) for x in inputs_values] # 2D -> 3D 88 | tar_values = data["targets"] 89 | tar_values = [addDim(tar) for tar in tar_values] # 2D -> 3D 90 | return inputs_values, tar_values 91 | 92 | 93 | def training( 94 | model4training, 95 | model4output, 96 | model4example, 97 | loss, 98 | loader, 99 | list_training, 100 | list_stop, 101 | optimizer, 102 | funs_eval, 103 | testing_interval, 104 | fname_model_weights=None, 105 | fname_example=None, 106 | list_test=None, 107 | mask4print="%e", 108 | wloss=None, 109 | max_steps=None, 110 | datamod4training=lambda data: data, 111 | loader_test=None 112 | ): 113 | 114 | if loader_test is None: 115 | loader_test = loader 116 | 117 | if not fname_example is None: 118 | make_dir_for_file(fname_example) 119 | 120 | model4training.compile(loss=loss, loss_weights=wloss, optimizer=optimizer) 121 | 122 | random.shuffle(list_training) 123 | 124 | e_stop_min = None 125 | e_test_min = None 126 | i_step = 0 127 | outputs_test = None 128 | while True: 129 | 130 | # testing 131 | if i_step % testing_interval == 0: 132 | print("") 133 | print("testing (%d steps):" % i_step) 134 | print("") 135 | es_stop, example_stop, outputs_stop = eval_dataset(list_stop, loader_test, model4output, model4example, funs_eval) 136 | e_stop = es_stop[0] 137 | if not fname_example is None: 138 | saveMatrixText(fname_example, example_stop) 139 | if e_stop_min is None or e_stop_min >= e_stop: 140 | e_stop_min = e_stop 141 | if not list_test is None: 142 | es_test, example_test, outputs_test = eval_dataset(list_test, loader_test, model4output, model4example, funs_eval) 143 | e_test = es_test[0] 144 | e_test_min = e_test 145 | if not fname_model_weights is None: 146 | try: 147 | save(fname_model_weights, model4output) 148 | except OSError: 149 | print("OSError") 150 | pass 151 | print(" actual results:") 152 | print("") 153 | print(" e(stop) = %s" % "\t".join([mask4print % foo for foo in es_stop])) 154 | if not list_test is None: 155 | print(" e(test) = %s" % "\t".join([mask4print % foo for foo in es_test])) 156 | print("") 157 | print(" the best results:") 158 | print("") 159 | print((" e(stop) = " + mask4print) % e_stop_min) 160 | if not list_test is None: 161 | print((" e(test) = " + mask4print) % e_test_min) 162 | print("") 163 | sys.stdout.flush() 164 | 165 | i_key = i_step % len(list_training) 166 | key = list_training[i_key] 167 | inputs_values, tar_values = prepare_data(key, loader, model4output, datamod4training) 168 | 169 | model4training.fit(inputs_values, tar_values, epochs=1, batch_size=1, verbose=0) 170 | 171 | i_step = i_step + 1 172 | 173 | if (not max_steps is None) and (i_step >= max_steps): 174 | break 175 | 176 | print("OK") 177 | 178 | -------------------------------------------------------------------------------- /wacv2020/utils.py: -------------------------------------------------------------------------------- 1 | import h5py 2 | import re 3 | import random 4 | import os 5 | import codecs 6 | import sys 7 | from datetime import datetime 8 | 9 | import numpy 10 | 11 | 12 | def loadText(fname, cod="utf8"): 13 | f = codecs.open(fname, "r", cod) 14 | text = {} 15 | ws = {} 16 | for line in f: 17 | foo = line.strip().split() 18 | key = foo[0] 19 | sent = foo[1:len(foo)] 20 | for w in sent: 21 | if not w in ws: 22 | ws[w] = len(ws) 23 | sent_int = [ws[w] for w in sent] 24 | text[key] = numpy.asarray(sent_int, dtype="int32") 25 | f.close() 26 | return text, len(ws) 27 | 28 | 29 | def loadInfo(fname, dtype, cod="utf8"): 30 | f = codecs.open(fname, "r", cod) 31 | info = {} 32 | vals = {} 33 | for line in f: 34 | key, val = line.strip().split() 35 | if not val in vals: 36 | vals[val] = len(vals) 37 | info[key] = val 38 | f.close() 39 | for key in info.keys(): 40 | val = info[key] 41 | info[key] = numpy.zeros((1, len(vals), ), dtype=dtype) 42 | info[key][0, vals[val]] = 1 43 | return info, len(vals) 44 | 45 | 46 | def loadDenses(fname, names): 47 | result = [] 48 | hf = h5py.File(fname, "r") 49 | for name in names: 50 | #print(name) 51 | gr = hf.get(name)[name] 52 | w = gr["kernel:0"] 53 | w = numpy.array(w) 54 | result.append(w) 55 | if "bias:0" in gr: 56 | b = gr["bias:0"] 57 | b = numpy.array(b) 58 | result.append(b) 59 | #else: 60 | # b = numpy.zeros((w.shape[1], ), dtype=w.dtype) 61 | hf.close() 62 | return result 63 | 64 | 65 | def loadAllDenses(fname): 66 | result = [] 67 | hf = h5py.File(fname, "r") 68 | names = [] 69 | i = 1 70 | while "dense_%d" % i in hf: 71 | names.append("dense_%d" % i) 72 | i = i + 1 73 | for name in names: 74 | gr = hf.get(name)[name] 75 | w = gr["kernel:0"] 76 | w = numpy.array(w) 77 | result.append(w) 78 | if "bias:0" in gr: 79 | b = gr["bias:0"] 80 | b = numpy.array(b) 81 | result.append(b) 82 | #else: 83 | # b = numpy.zeros((w.shape[1], ), dtype=w.dtype) 84 | hf.close() 85 | return result 86 | 87 | 88 | def loadAllDenses2(fname, maxNum=None): 89 | result = [] 90 | hf = h5py.File(fname, "r") 91 | names = [] 92 | if maxNum is None: 93 | i = 1 94 | while "dense2_%d" % i in hf: 95 | names.append("dense2_%d" % i) 96 | i = i + 1 97 | else: 98 | for i in range(1, maxNum + 1): 99 | if "dense2_%d" % i in hf: 100 | names.append("dense2_%d" % i) 101 | for name in names: 102 | gr = hf.get(name)[name] 103 | w = gr["kernel:0"] 104 | w = numpy.array(w) 105 | result.append(w) 106 | if "bias:0" in gr: 107 | b = gr["bias:0"] 108 | b = numpy.array(b) 109 | result.append(b) 110 | #else: 111 | # b = numpy.zeros((w.shape[1], ), dtype=w.dtype) 112 | hf.close() 113 | return result 114 | 115 | 116 | def loadList(fname): 117 | f = open(fname) 118 | l = [line.strip() for line in f] 119 | f.close() 120 | return l 121 | 122 | 123 | def saveMatrixText(fname, x): 124 | f = open(fname, "w") 125 | for t in range(x.shape[0]): 126 | for i in range(x.shape[1]): 127 | f.write("%e\t" % x[t, i]) 128 | f.write("\n") 129 | f.close() 130 | 131 | 132 | def loadMatrixFloatText(fname, dtype="float64"): 133 | M = [] 134 | f = open(fname) 135 | for line in f: 136 | M.append([float(x) for x in line.strip().split()]) 137 | f.close() 138 | M = numpy.asarray(M, dtype=dtype) 139 | return M 140 | 141 | -------------------------------------------------------------------------------- /wacv2020/visualization/invDeltas.m: -------------------------------------------------------------------------------- 1 | function [X3] = invDeltas(x_deltas) 2 | 3 | deltas = [ 4 | 0, 1 5 | 1, 2 6 | 2, 3 7 | 3, 4 8 | 1, 5 9 | 5, 6 10 | 6, 7 11 | 4, 29 12 | 29, 30 13 | 30, 31 14 | 31, 32 15 | 32, 33 16 | 29, 34 17 | 34, 35 18 | 35, 36 19 | 36, 37 20 | 29, 38 21 | 38, 39 22 | 39, 40 23 | 40, 41 24 | 29, 42 25 | 42, 43 26 | 43, 44 27 | 44, 45 28 | 29, 46 29 | 46, 47 30 | 47, 48 31 | 48, 49 32 | 7, 8 33 | 8, 9 34 | 9, 10 35 | 10, 11 36 | 11, 12 37 | 8, 13 38 | 13, 14 39 | 14, 15 40 | 15, 16 41 | 8, 17 42 | 17, 18 43 | 18, 19 44 | 19, 20 45 | 8, 21 46 | 21, 22 47 | 22, 23 48 | 23, 24 49 | 8, 25 50 | 25, 26 51 | 26, 27 52 | 27, 28 53 | ]; 54 | 55 | ndeltas = size(deltas, 1); 56 | 57 | 58 | [T, dim] = size(x_deltas); 59 | 60 | n = dim / 2; 61 | 62 | X3 = []; 63 | for t = 1:T 64 | 65 | X3(t, 1) = 0; 66 | X3(t, 2) = 0; 67 | X3(t, 3) = 0; 68 | 69 | for idelta = 1:ndeltas 70 | a = deltas(idelta, 1); 71 | b = deltas(idelta, 2); 72 | 73 | X3(t, 3 * b + 1) = X3(t, 3 * a + 1) - x_deltas(t, 2 * (idelta - 1) + 1); 74 | X3(t, 3 * b + 2) = X3(t, 3 * a + 2) - x_deltas(t, 2 * (idelta - 1) + 2); 75 | 76 | end 77 | 78 | end 79 | -------------------------------------------------------------------------------- /wacv2020/visualization/runMe.m: -------------------------------------------------------------------------------- 1 | %x = h5read('../data/demo/keypoints/keypoints-01-raw.h5','/20191005v')'; 2 | x = h5read('../data/demo/keypoints/keypoints-02-filter.h5','/20191005v')'; 3 | 4 | skeletonDisp(x) 5 | -------------------------------------------------------------------------------- /wacv2020/visualization/skeletonDisp.m: -------------------------------------------------------------------------------- 1 | function [] = skeletonDisp(X3) 2 | 3 | lines4disp = [ 4 | 0 1 5 | 1 2 6 | 2 3 7 | 3 4 8 | 1 5 9 | 5 6 10 | 6 7 11 | 12 | 7 8 13 | 14 | 8 9 15 | 9 10 16 | 10 11 17 | 11 12 18 | 19 | 8 13 20 | 13 14 21 | 14 15 22 | 15 16 23 | 24 | 8 17 25 | 17 18 26 | 18 19 27 | 19 20 28 | 29 | 8 21 30 | 21 22 31 | 22 23 32 | 23 24 33 | 34 | 8 25 35 | 25 26 36 | 26 27 37 | 27 28 38 | 39 | 4 29 40 | 29 30 41 | 30 31 42 | 31 32 43 | 32 33 44 | 29 34 45 | 34 35 46 | 35 36 47 | 36 37 48 | 29 38 49 | 38 39 50 | 39 40 51 | 40 41 52 | 29 42 53 | 42 43 54 | 43 44 55 | 44 45 56 | 29 46 57 | 46 47 58 | 47 48 59 | 48 49 60 | ]; 61 | 62 | 63 | X = X3; 64 | 65 | [T, n] = size(X); 66 | n3 = n / 3; 67 | 68 | 69 | xs = X(:, 1:3:n); 70 | ys = X(:, 2:3:n); 71 | zs = X(:, 3:3:n); 72 | 73 | xmin = min(min(xs)); 74 | xmax = max(max(xs)); 75 | ymin = min(min(ys)); 76 | ymax = max(max(ys)); 77 | zmin = min(min(zs)); 78 | zmax = max(max(zs)); 79 | 80 | 81 | d3 = 0; 82 | 83 | for t = 1:T 84 | hold off 85 | 86 | %Ls = []; 87 | 88 | if d3 89 | plot3(xmin, zmin, -ymin, 'x') 90 | hold on 91 | plot3(xmax, zmax, -ymax, 'x') 92 | hold on 93 | else 94 | plot(xmin, -ymin, 'x') 95 | hold on 96 | plot(xmax, -ymax, 'x') 97 | hold on 98 | end 99 | 100 | 101 | for i = 1:size(lines4disp, 1) 102 | a = lines4disp(i, 1); 103 | b = lines4disp(i, 2); 104 | 105 | %L = sqrt((X(t, 3 * a + 1) - X(t, 3 * b + 1)).^2 + (X(t, 3 * a + 2) - X(t, 3 * b + 2)).^2 + (X(t, 3 * a + 3) - X(t, 3 * b + 3)).^2); 106 | %Ls = [Ls L]; 107 | 108 | if d3 109 | plot3([X(t, 3 * a + 1), X(t, 3 * b + 1)], [X(t, 3 * a + 3), X(t, 3 * b + 3)], -[X(t, 3 * a + 2), X(t, 3 * b + 2)], '*-') 110 | else 111 | plot([X(t, 3 * a + 1), X(t, 3 * b + 1)], -[X(t, 3 * a + 2), X(t, 3 * b + 2)], '.-') 112 | end 113 | hold on 114 | 115 | end 116 | 117 | hold off 118 | pause(0.001) 119 | 120 | %disp(Ls) 121 | 122 | end 123 | -------------------------------------------------------------------------------- /wacv2020/walkDir.py: -------------------------------------------------------------------------------- 1 | import os 2 | import re 3 | 4 | def walkDir(dname, filt=r".*"): 5 | result = [] 6 | for root, dnames, fnames in os.walk(dname): 7 | for fname in fnames: 8 | if re.search(filt, fname): 9 | foo = root + "/" + fname 10 | foo = re.sub(r"[/\\]+", "/", foo) 11 | result.append(foo) 12 | return result 13 | --------------------------------------------------------------------------------