├── A0_configuration.py ├── A1_videoAttributes.py ├── A2_getPicData.py ├── A3_zipPicData.py ├── A4_structureDataStructure.py ├── B0_singleVarAna.py ├── B1_integratedVarAna.py ├── B2_Frame2VecDPLBuilding.py ├── C0_clusterModelBuilding.py ├── C1_getClusDiv.py ├── C2_cutMovie.py ├── D0_Integration.py ├── D1_generalizingClipSeries.py ├── E0_GeneralizingFrame.py ├── README.md ├── tryDPModel.py ├── tryFFmpeg.py └── tryFrame2Vec.py /A0_configuration.py: -------------------------------------------------------------------------------- 1 | #coding:utf8 2 | ''' 3 | Created on 2018年11月23日 4 | 5 | @author: Administrator 6 | ''' 7 | 8 | import os 9 | import shutil 10 | 11 | def main(): 12 | ''' 13 | configurate the programme, mainly build and clear data 14 | ''' 15 | dirList=["figures","imageDatas","log","models","movie","structuredData"] 16 | 17 | print("making up dirs ...") 18 | for dirItem in dirList: 19 | if dirItem not in list(os.listdir(".")): 20 | os.mkdir(dirItem) 21 | 22 | print("clearing dirs ...") 23 | clearList=["imageDatas","log"] 24 | for dirItem in clearList: 25 | if len(list(os.walk(dirItem)))>0: 26 | shutil.rmtree(dirItem) 27 | os.mkdir(dirItem) 28 | 29 | if __name__ == '__main__': 30 | main() -------------------------------------------------------------------------------- /A1_videoAttributes.py: -------------------------------------------------------------------------------- 1 | #coding:utf8 2 | ''' 3 | Created on 2018年116 4 | 5 | @author: Administrator 6 | ''' 7 | import cv2 8 | 9 | def main(videoPath='movie/antMan2.mp4'): 10 | ''' 11 | description of the movie 12 | ''' 13 | vc = cv2.VideoCapture(videoPath) 14 | print("total numer of frames:",vc.get(7)) 15 | print("frame rate:",vc.get(5)) 16 | print("total time:",vc.get(7)/vc.get(5)/3600,"h") 17 | print("width",vc.get(3),"px") 18 | print("height:",vc.get(4),"px") 19 | 20 | if __name__ == '__main__': 21 | vc = cv2.VideoCapture('movie/antMan2.mp4') 22 | print("total numer of frames:",vc.get(7)) 23 | print("frame rate:",vc.get(5)) 24 | print("total time:",vc.get(7)/vc.get(5)/3600,"h") 25 | print("width",vc.get(3),"px") 26 | print("height:",vc.get(4),"px") -------------------------------------------------------------------------------- /A2_getPicData.py: -------------------------------------------------------------------------------- 1 | #coding:utf8 2 | ''' 3 | Created on 2018年11月6日 4 | reference: 5 | https://www.jb51.net/article/135972.htm 6 | @author: Administrator 7 | ''' 8 | import cv2 9 | import os 10 | import shutil 11 | from PIL import Image 12 | def main(videoPath,abstractMode="part",timeF=10,checkF=200,imgNum=-1): 13 | ''' 14 | get data in image frame from the video 15 | ---------------------------------------- 16 | videoPath: the path of the source video 17 | imgPath: the path of the images to be saved 18 | timeF: distinct between images 19 | checkF: check whether the programme is running every frames 20 | imgNum: 21 | if it is -1, then travel the whole video 22 | else get images and then stop 23 | 24 | ''' 25 | imgPath="imageDatas" 26 | 27 | print("loading video ...") 28 | vc = cv2.VideoCapture(videoPath) 29 | 30 | print("checking whether is open ...") 31 | if vc.isOpened(): 32 | rval , frame = vc.read() 33 | else: 34 | rval = False 35 | i=0 36 | c=1 37 | 38 | print("developing images ...") 39 | if abstractMode=="part": 40 | print("distinct between images is",timeF) 41 | while rval: 42 | if i<0: 43 | continue 44 | rval, frame = vc.read() 45 | if(c%timeF == 0): 46 | cv2.imwrite(imgPath+'/'+str(i) + '.jpg',frame) 47 | i=i+1 48 | if i%checkF==0: 49 | print("the",i,"th photo") 50 | c = c + 1 51 | cv2.waitKey(1) 52 | if imgNum!=-1: 53 | if i==imgNum: 54 | break 55 | vc.release() 56 | elif abstractMode=="total": 57 | print("distinct between images is",int(vc.get(7)/imgNum)) 58 | totalFrameNum=vc.get(7) 59 | timeF=int(totalFrameNum/imgNum) 60 | while rval: 61 | if i<0: 62 | continue 63 | rval, frame = vc.read() 64 | if(c%timeF == 0): 65 | cv2.imwrite(imgPath+'/'+str(i) + '.jpg',frame) 66 | i=i+1 67 | if i%checkF==0: 68 | print("the",i,"th photo") 69 | c = c + 1 70 | cv2.waitKey(1) 71 | if imgNum!=-1: 72 | if i==imgNum: 73 | break 74 | vc.release() 75 | else: 76 | raise NameError 77 | print("the divide mode's name is wrong") 78 | print("finished") 79 | 80 | if __name__ == '__main__': 81 | 82 | main("movie/antMan2.mp4",abstractMode="part",timeF=77,checkF=200,imgNum=10) 83 | # 84 | # picNum=500 85 | # 86 | # print("clearing imageDatas ...") 87 | # if len(list(os.walk("imageDatas")))>0: 88 | # shutil.rmtree("imageDatas") 89 | # os.mkdir("imageDatas") 90 | # 91 | # timeF = 10 92 | # print("distinct between images is",timeF) 93 | # 94 | # print("loading video ...") 95 | # vc = cv2.VideoCapture('movie/antMan.mp4') 96 | # 97 | # print("checking whether is open ...") 98 | # if vc.isOpened(): #判断是否正常打开 99 | # rval , frame = vc.read() 100 | # else: 101 | # rval = False 102 | # 103 | # 104 | # i=0 105 | # c=1 106 | # while rval: 107 | # if i<0: 108 | # continue 109 | # rval, frame = vc.read() 110 | # if(c%timeF == 0): 111 | # cv2.imwrite('imageDatas/'+str(i) + '.jpg',frame) 112 | # i=i+1 113 | # if i%100==0: 114 | # print("the",i,"th photo") 115 | # c = c + 1 116 | # cv2.waitKey(1) 117 | # if picNum!=-1: 118 | # if i==picNum: 119 | # break 120 | # vc.release() 121 | # print("finished") -------------------------------------------------------------------------------- /A3_zipPicData.py: -------------------------------------------------------------------------------- 1 | #coding:utf8 2 | ''' 3 | Created on 2018年11月7日 4 | 5 | @author: Administrator 6 | ''' 7 | 8 | from PIL import Image 9 | import os 10 | import tqdm 11 | 12 | def main(resizedWidth=36,resizedHeight=36,cutSubtitle=True): 13 | ''' 14 | zip the image data 15 | ---------------------------------------- 16 | imgPath: the path of the images to be saved 17 | resizedWidth: the width that the images are going to be zipped 18 | resizedHeight: the height that the images are going to be ziped 19 | cutSubtitle:whether to cut subtitle (1/8 down part of the images) 20 | ''' 21 | imgPath="imageDatas" 22 | i=0 23 | for _, _, files in os.walk(imgPath): 24 | for f in tqdm.tqdm(files): 25 | if i<0: 26 | continue 27 | fp = os.path.join(imgPath,f) 28 | img = Image.open(fp) 29 | 30 | if cutSubtitle==True: 31 | w, h = img.size 32 | img=img.crop((0,0,w,7/8*h)) 33 | else: 34 | print("not cut subtitles") 35 | pass 36 | 37 | img.resize((resizedWidth, resizedHeight)).save(fp, "JPEG") 38 | img.close 39 | i=i+1 40 | 41 | if __name__ == '__main__': 42 | i=0 43 | resizedWidth=36 44 | resizedHeight=36 45 | for _, _, files in os.walk("imageDatas"): 46 | for f in tqdm.tqdm(files): 47 | if i<0: 48 | continue 49 | fp = os.path.join("imageDatas",f) 50 | img = Image.open(fp) 51 | 52 | print("cutting subtitles ...") 53 | w, h = img.size 54 | img=img.crop((0,0,w,7/8*h)) 55 | 56 | print("resizing ...") 57 | img.resize((resizedWidth, resizedHeight)).save(fp, "JPEG") 58 | img.close 59 | i=i+1 60 | -------------------------------------------------------------------------------- /A4_structureDataStructure.py: -------------------------------------------------------------------------------- 1 | #coding:utf8 2 | ''' 3 | Created on 2018年11月8日 4 | 5 | @author: Administrator 6 | ''' 7 | 8 | import os 9 | import cv2 10 | import numpy as np 11 | import pickle as pkl 12 | 13 | 14 | windowSize=14 15 | step=1 16 | 17 | def paddingSeq(row,maxLen): 18 | neededShape=row[0].shape 19 | while len(row)threshold) for varItem in varArr.tolist()])\ 46 | *np.max(varArr) 47 | 48 | print("get cut point in the frame of fraction of the movie ...") 49 | cutPointList=[i/zeroOneVarArr.shape[0] for i in range(zeroOneVarArr.shape[0]-1) if isCutPoint(zeroOneVarArr[i:i+2])==True] 50 | cutPointList=[0]+cutPointList+[1] 51 | 52 | 53 | clipLenMean=np.mean([cutPointList[lenI+1]-cutPointList[lenI]\ 54 | for lenI in range(len(cutPointList)-1)]) 55 | clipLenStd=np.sqrt(np.var([cutPointList[lenI+1]-cutPointList[lenI]\ 56 | for lenI in range(len(cutPointList)-1)])) 57 | 58 | print("the mean length is",clipLenMean) 59 | print("the var of length is",clipLenStd) 60 | 61 | tempCutPointList=cutPointList.copy() 62 | for lenI in range(len(cutPointList)-1): 63 | if cutPointList[lenI+1]-cutPointList[lenI] 3 | =========Brochure============
4 | 1.After downloading the zip into your workspace, unpack it and establish a folder named "movie";
5 | 2.Put the movie you want to cut into the folder;
6 | 3.Run D0_Integration (most of the parameters are in it, and you can change it), and you will find your clips in "movie";
7 | 4.After adjusting the clip list you want to compile together in D1_generalizatingClipSeries, you can compile the clips together. But the result may be confused to some aspects, since it just gets the clip_in_full_movie_length/full_movie_length\*compiled_movie_ength around the peaks. 8 | -------------------------------------------------------------------------------- /tryDPModel.py: -------------------------------------------------------------------------------- 1 | #coding:utf8 2 | ''' 3 | Created on 2018年11月7日 4 | 5 | @author: Administrator 6 | ''' 7 | 8 | import numpy as np 9 | import matplotlib.pyplot as plt 10 | import pickle as pkl 11 | 12 | from keras.datasets import mnist 13 | from keras.models import Sequential,Model 14 | from keras.layers.core import Dense, Activation, Dropout 15 | from keras.utils import np_utils 16 | from keras.datasets import mnist 17 | from keras.models import Sequential 18 | from keras.layers import Lambda,Input,Dense,Reshape, Activation, Convolution2D, MaxPooling2D, Flatten, Bidirectional 19 | from keras.layers import Convolution2D, MaxPooling2D 20 | from keras.utils import np_utils 21 | from keras.layers.recurrent import LSTM 22 | from keras import backend as K 23 | from keras.callbacks import TensorBoard 24 | 25 | from Bio.Cluster import kcluster 26 | from sklearn import metrics 27 | import tqdm 28 | 29 | class MyModel: 30 | 31 | def covLoss(self,y_true, y_pred): 32 | ytVar=K.var(y_true) 33 | ypVar=K.var(y_pred) 34 | cov=(ytVar+ypVar)/2 35 | return cov 36 | 37 | 38 | def __init__(self,**kwargs): 39 | ''' 40 | you can design any kinds of model in this class 41 | ***note:add *** 42 | ================================================== 43 | for : 44 | 45 | batch_size(default: 50):batch size 46 | nb_classes(default: 10):the number of classes 47 | nb_epoch(default: 12):the number of epoch 48 | nb_filters(default: 32):the number of filters 49 | img_rows(default: 28 ):the height of matrixes 50 | img_cols(default: 28):the width of matrixes 51 | pool_size(default: (2,2)):the size of pools 52 | kernel_size(default: (3,3)): the size of kernels 53 | ================================================== 54 | for <**model name**> 55 | parameter 56 | ''' 57 | self.paraDict=kwargs 58 | 59 | def buildCNNSeriesModel(self,\ 60 | nb_filters = 32,\ 61 | img_num=16,\ 62 | img_rows=256,\ 63 | img_cols =256,\ 64 | img_passes=3,\ 65 | pool_size = 5,\ 66 | kernel_size = (8,8), 67 | vecLen=100): 68 | ''' 69 | batch_size(default: 50):batch size 70 | nb_filters(default: 32):the number of filters 71 | img_rows(default: 28 ):the height of matrixes 72 | img_cols(default: 28):the width of matrixes 73 | img_passes(defaulst: 3):the passages of tensors 74 | pool_size(default: (2,2)):the size of pools 75 | kernel_size(default: (3,3)): the size of kernels 76 | ''' 77 | 78 | try: 79 | self.nb_filters = self.paraDict['nb_filters'] 80 | except: 81 | self.nb_filters =nb_filters 82 | 83 | try: 84 | self.img_rows, self.img_cols = self.paraDict['img_rows'],self.paraDict['img_cols'] 85 | except: 86 | self.img_rows, self.img_cols =img_rows, img_cols 87 | 88 | try: 89 | self.pool_size = self.paraDict['pool_size'] 90 | except: 91 | self.pool_size = pool_size 92 | 93 | try: 94 | self.kernel_size = self.paraDict['kernel_size'] 95 | except: 96 | self.kernel_size=kernel_size 97 | 98 | try: 99 | self.img_passes=self.paraDict['img_passes'] 100 | except: 101 | self.img_passes=img_passes 102 | 103 | try: 104 | self.img_num=self.paraDict['img_num'] 105 | except: 106 | self.img_num=img_num 107 | 108 | try: 109 | self.vecLen=self.paraDict['vecLen'] 110 | except: 111 | self.vecLen=vecLen 112 | self.input_shape = (self.img_num,self.img_rows, self.img_cols,self.img_passes) 113 | 114 | denseList=[] 115 | inputList=[Input(shape=self.input_shape[1:]) for i in range(self.img_num)] 116 | for i in range(self.img_num): 117 | conv2DL1=Convolution2D(self.nb_filters,\ 118 | input_shape=self.input_shape,\ 119 | kernel_size=(self.kernel_size[0],self.kernel_size[1]),\ 120 | border_mode='valid',\ 121 | activation="relu",\ 122 | data_format="channels_last")(inputList[i]) 123 | print(conv2DL1) 124 | maxPoolL1=MaxPooling2D(pool_size=pool_size,\ 125 | padding="same",\ 126 | data_format="channels_last")(conv2DL1) 127 | print(maxPoolL1) 128 | conv2DL2=Convolution2D(self.nb_filters,\ 129 | kernel_size=(self.kernel_size[0],self.kernel_size[1]),\ 130 | border_mode='valid',\ 131 | activation="relu",\ 132 | data_format="channels_last")(maxPoolL1) 133 | print(conv2DL2) 134 | maxPoolL2=MaxPooling2D(pool_size=pool_size,\ 135 | padding="same",\ 136 | data_format="channels_last")(conv2DL2) 137 | print(maxPoolL2) 138 | denseL1=Dense(units=16,activation="relu")(maxPoolL2) 139 | print(denseL1) 140 | flattenL=Flatten()(denseL1) 141 | print(flattenL) 142 | denseL2=Dense(units=64,activation="relu")(flattenL) 143 | print(denseL2) 144 | denseList.append(Reshape((64,1))(denseL2)) 145 | concateL=Lambda(K.concatenate)(denseList) 146 | print(concateL) 147 | BLSTML=Bidirectional(LSTM(units=64,activation="relu"))(concateL) 148 | print(BLSTML) 149 | denseL3=Dense(units=self.vecLen,activation="relu",name="vector_dense")(BLSTML) 150 | denseL4=Dense(units=self.img_rows*self.img_cols*self.img_passes,activation="tanh")(denseL3) 151 | reshapeL=Reshape((self.img_rows,self.img_cols,self.img_passes),name="vector_name")(denseL4) 152 | print(reshapeL) 153 | 154 | model=Model(inputs=inputList,outputs=reshapeL) 155 | model.compile(optimizer="rmsprop",\ 156 | loss="mse") 157 | 158 | return model 159 | 160 | def buildCNNClassifierModel(self,\ 161 | batch_size = 50,\ 162 | nb_classes = 10,\ 163 | nb_epoch = 12,\ 164 | nb_filters = 32,\ 165 | img_rows=28,\ 166 | img_cols =28,\ 167 | pool_size = (2,2),\ 168 | kernel_size = (3,3)): 169 | ''' 170 | batch_size(default: 50):batch size 171 | nb_classes(default: 10):the number of classes 172 | nb_epoch(default: 12):the number of epoch 173 | nb_filters(default: 32):the number of filters 174 | img_rows(default: 28 ):the height of matrixes 175 | img_cols(default: 28):the width of matrixes 176 | pool_size(default: (2,2)):the size of pools 177 | kernel_size(default: (3,3)): the size of kernels 178 | ''' 179 | try: 180 | self.batch_size = self.paraDict['batch_size'] 181 | except: 182 | self.batch_size=batch_size 183 | 184 | try: 185 | self.nb_classes = self.paraDict['nb_classes'] 186 | except: 187 | self.nb_classes=nb_classes 188 | 189 | try: 190 | self.nb_epoch = self.paraDict['nb_epoch'] 191 | except: 192 | self.nb_epoch=nb_epoch 193 | 194 | try: 195 | self.nb_filters = self.paraDict['nb_filters'] 196 | except: 197 | self.nb_filters =nb_filters 198 | 199 | try: 200 | self.img_rows, self.img_cols = self.paraDict['img_rows'],self.paraDict['img_cols'] 201 | except: 202 | self.img_rows, self.img_cols =img_rows, img_cols 203 | 204 | try: 205 | self.pool_size = self.paraDict['pool_size'] 206 | except: 207 | self.pool_size = pool_size 208 | 209 | try: 210 | self.kernel_size = self.paraDict['kernel_size'] 211 | except: 212 | self.kernel_size=kernel_size 213 | 214 | self.input_shape = (self.img_rows, self.img_cols,1) 215 | 216 | model = Sequential() 217 | 218 | model.add(Convolution2D(self.nb_filters, self.kernel_size[0] ,self.kernel_size[1], 219 | border_mode='valid', 220 | input_shape=self.input_shape)) 221 | model.add(Activation('relu')) 222 | 223 | # 卷积层，激活函数是ReLu 224 | model.add(Convolution2D(self.nb_filters, self.kernel_size[0], self.kernel_size[1])) 225 | model.add(Activation('relu')) 226 | 227 | # 池化层，选用Maxpooling，给定pool_size，dropout比例为0.25 228 | model.add(MaxPooling2D(pool_size=self.pool_size)) 229 | model.add(Dropout(0.25)) 230 | 231 | # Flatten层，把多维输入进行一维化，常用在卷积层到全连接层的过渡 232 | model.add(Flatten()) 233 | 234 | # 包含128个神经元的全连接层，激活函数为ReLu，dropout比例为0.5 235 | model.add(Dense(128)) 236 | model.add(Activation('relu')) 237 | model.add(Dropout(0.5)) 238 | 239 | # 包含10个神经元的输出层，激活函数为Softmax 240 | model.add(Dense(self.nb_classes)) 241 | model.add(Activation('softmax')) 242 | 243 | model.compile(loss='categorical_crossentropy', 244 | optimizer='adadelta', 245 | metrics=['accuracy']) 246 | 247 | model.summary() 248 | 249 | self.DPmodel=model 250 | 251 | return model 252 | 253 | if __name__ == '__main__': 254 | print("loading data ...") 255 | with open("structuredData/seriesPkl.pkl","rb") as structuredDataFile: 256 | myData=pkl.load(structuredDataFile) 257 | x_train=[row[0] for row in myData] 258 | x_train=[[row[i] for row in x_train] for i in range(len(x_train[0]))] 259 | # print(len(x_train)) 260 | y_train=[row[1] for row in myData] 261 | # print(y_train) 262 | print("building model ...") 263 | cnnGenModel=MyModel(img_rows=36,\ 264 | img_cols=36,\ 265 | kernel_size = (3,3)).buildCNNSeriesModel() 266 | cnnGenModel.fit(x_train,np.array(y_train),epochs=100,\ 267 | callbacks=[TensorBoard(log_dir="./log")]) 268 | intermediate_layer_model = Model(inputs=cnnGenModel.input, 269 | outputs=cnnGenModel.get_layer("dense_27").output) 270 | reStruMovieList=[intermediate_layer_model.predict([[col[i]] for col in x_train])[0] for i in range(len(x_train[0]))] 271 | 272 | ssList=[] 273 | clusterListList=[] 274 | clusterRange=range(2,19,2) 275 | for i in tqdm.tqdm(clusterRange): 276 | reStruMovieArr=np.array(reStruMovieList) 277 | # clusterModel=KMeans(n=i) 278 | # clusterList=clusterModel.fit_predict(reStruMovieArr).tolist() 279 | clusterList=kcluster(reStruMovieArr,nclusters=i,dist="u")[0].tolist() 280 | clusterListList.append(clusterList) 281 | clusterMat=-np.dot(reStruMovieArr,reStruMovieArr.T)/\ 282 | np.dot(np.sqrt(np.sum(reStruMovieArr*reStruMovieArr,axis=1)),\ 283 | np.sqrt(np.sum(reStruMovieArr*reStruMovieArr,axis=1))) 284 | ss=metrics.silhouette_score(clusterMat, clusterList, metric="precomputed") 285 | ssList.append(ss) 286 | minIndex=ssList.index(min(ssList)) 287 | minClu=list(clusterRange)[minIndex] 288 | 289 | plt.plot(np.array(list(clusterRange)),np.array(ssList)) 290 | plt.show() 291 | 292 | print(clusterListList[1]) 293 | # clusterModel=KMeans(n_clusters=minClu) 294 | # clusterList=clusterModel.fit_predict(reStruMovieArr).tolist() 295 | # print(np.array(clusterList)) 296 | -------------------------------------------------------------------------------- /tryFFmpeg.py: -------------------------------------------------------------------------------- 1 | #coding:utf8 2 | ''' 3 | Created on 2018年11月7日 4 | 5 | @author: Administrator 6 | ''' 7 | 8 | if __name__ == '__main__': 9 | pass -------------------------------------------------------------------------------- /tryFrame2Vec.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Timaos123/AutoDivideMovie/e12b2439db8b55dd802aec32f4113c2c4c4d4b40/tryFrame2Vec.py --------------------------------------------------------------------------------