├── .gitignore ├── .travis.yml ├── .zenodo.json ├── Blueprints └── final_cosan_helmet_light.stl ├── LICENSE ├── README.md ├── __init__.py ├── facesync ├── __init__.py ├── facesync.py ├── tests │ ├── resources │ │ ├── cosan_synctune.wav │ │ ├── sample1.MP4 │ │ ├── sample1.txt │ │ └── sample1.wav │ └── test_facesync.py ├── utils.py └── version.py ├── requirements.txt ├── screenshots ├── AudioAligner.png ├── VideoViewer.png └── plotface.png └── setup.py /.gitignore: -------------------------------------------------------------------------------- 1 | # Python Related # 2 | ################### 3 | *.pyc 4 | *.log 5 | facesync/*.pyc 6 | /facesync/*.pyc 7 | */*.pyc 8 | 9 | # iPython Notebook Caches # 10 | ########################### 11 | .ipynb_checkpoints 12 | Notebooks/ 13 | Notebooks/.ipynb_checkpoints 14 | 15 | # OS generated files # 16 | ###################### 17 | .DS_Store 18 | .DS_Store? 19 | ._* 20 | .Spotlight-V100 21 | .Trashes 22 | thumbs.db 23 | Thumbs.db 24 | 25 | # Tests & Coverage 26 | ###################### 27 | .coverage 28 | htmlcov/ 29 | 30 | # Build files 31 | ############# 32 | .cache/ 33 | build/ 34 | dist/ 35 | facesync.egg-info/ 36 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | os: 2 | - linux 3 | 4 | language: python 5 | 6 | sudo: true 7 | 8 | python: 9 | - "2.7" 10 | 11 | before_install: 12 | - if [[ "$TRAVIS_OS_NAME" == "osx" ]]; then brew update ; fi 13 | - if [[ "$TRAVIS_OS_NAME" == "osx" ]]; then brew install ffmpeg; fi 14 | - if [[ "$TRAVIS_OS_NAME" == "osx" ]]; then brew install libav; fi 15 | - sudo apt-get install libav-tools 16 | 17 | install: 18 | - wget http://repo.continuum.io/miniconda/Miniconda-latest-Linux-x86_64.sh -O miniconda.sh 19 | - bash miniconda.sh -b -p $HOME/miniconda 20 | - export PATH="$HOME/miniconda/bin:$PATH" 21 | - hash -r 22 | - conda config --set always_yes yes --set changeps1 no 23 | - conda update -q conda 24 | - conda info -a 25 | - conda create -q -n testenv python=$TRAVIS_PYTHON_VERSION pip numpy scipy pytest 26 | - source activate testenv 27 | - pip install python-coveralls 28 | - pip install -r requirements.txt 29 | - python setup.py install 30 | 31 | script: coverage run --source facesync -m py.test 32 | 33 | after_success: 34 | - coveralls 35 | -------------------------------------------------------------------------------- /.zenodo.json: -------------------------------------------------------------------------------- 1 | { 2 | "description": "

Release for F1000

", 3 | "license": "other-open", 4 | "title": "cosanlab/facesync: 0.9", 5 | "version": "0.9", 6 | "upload_type": "software", 7 | "publication_date": "2019-04-12", 8 | "creators": [ 9 | { 10 | "affiliation": "Dartmouth College", 11 | "name": "Jin Hyun Cheong" 12 | }, 13 | { 14 | "name": "Sawyer Brooks" 15 | }, 16 | { 17 | "name": "Luke J. Chang" 18 | } 19 | ], 20 | "access_right": "open", 21 | "related_identifiers": [ 22 | { 23 | "scheme": "url", 24 | "identifier": "https://github.com/cosanlab/facesync/tree/0.9", 25 | "relation": "isSupplementTo" 26 | }, 27 | { 28 | "scheme": "doi", 29 | "identifier": "10.5281/zenodo.2638334", 30 | "relation": "isVersionOf" 31 | } 32 | ] 33 | } 34 | -------------------------------------------------------------------------------- /Blueprints/final_cosan_helmet_light.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cosanlab/facesync/bd5922de5729e4e76a6eaae84b45d965660f1545/Blueprints/final_cosan_helmet_light.stl -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) 2017 Jin Hyun Cheong 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | 23 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | [![DOI](https://zenodo.org/badge/107047285.svg)](https://zenodo.org/badge/latestdoi/107047285) 2 | [![Build Status](https://travis-ci.org/jcheong0428/facesync.svg?branch=master)](https://travis-ci.org/jcheong0428/facesync) 3 | [![Coverage Status](https://coveralls.io/repos/github/jcheong0428/facesync/badge.svg?branch=master)](https://coveralls.io/github/jcheong0428/facesync?branch=master) 4 | [![Downloads](https://pepy.tech/badge/facesync)](https://pepy.tech/project/facesync) 5 | 6 | # FaceSync: Open source framework for recording facial expressions with head-mounted cameras 7 | 8 | The FaceSync toolbox provides 3D blueprints for building the head-mounted camera setup described in our [paper](https://psyarxiv.com/p5293/). The toolbox also provides functions to automatically synchronize videos based on audio, manually align audio, plot facial landmark movements, and inspect synchronized videos to graph data. 9 | 10 | 11 | ## Installation 12 | 13 | To install (for osx or linux) open Terminal and type 14 | 15 | `pip install facesync` 16 | 17 | or 18 | 19 | `git clone https://github.com/jcheong0428/facesync.git` 20 | then in the repository folder type 21 | `python setup.py install` 22 | 23 | 24 | ## Dependencies 25 | For full functionality, FACESYNC requires [ffmpeg](https://ffmpeg.org/) and the [libav](https://libav.org/) library. 26 | 27 | Linux 28 | `sudo apt-get install libav-tools` 29 | 30 | OS X 31 | `brew install ffmpeg` 32 | `brew install libav` 33 | 34 | also requires following packages: 35 | - numpy 36 | - scipy 37 | You may also install these via `pip install -r requirements.txt` 38 | 39 | ## Recommended Processing Steps 40 | 1. Extract Audio from Target Video 41 | 2. Find offset with Extracted Audio 42 | 3. Trim Video using Offset. 43 | *If you need to resize your video, do so before trimming. 44 | Otherwise timing can be off. 45 | 46 | ``` 47 | from facesync.facesync import facesync 48 | # change file name to include the full 49 | video_files = ['path/to/sample1.MP4'] 50 | target_audio = 'path/to/cosan_synctune.wav' 51 | # Intialize facesync class 52 | fs = facesync(video_files=video_files,target_audio=target_audio) 53 | # Extracts audio from sample1.MP4 54 | fs.extract_audio() 55 | # Find offset by correlation 56 | fs.find_offset_corr(search_start=14,search_end=16) 57 | print(fs.offsets) 58 | # Find offset by fast fourier transform 59 | fs.find_offset_fft() 60 | print(fs.offsets) 61 | ``` 62 | 63 | # FaceSync provides handy utilities for working with facial expression data. 64 | 65 | ## Manually align the audios with AudioAligner. 66 | ``` 67 | %matplotlib notebook 68 | from facesync.utils import AudioAligner 69 | file_original = 'path/to/audio.wav' 70 | file_sample = 'path/to/sample.wav' 71 | AudioAligner(original=file_original, sample=file_sample) 72 | ``` 73 |

74 | 75 | 76 | ## Plot facial landmarks and how they change as a result of Action Unit changes. 77 | ``` 78 | %matplotlib notebook 79 | from facesync.utils import ChangeAU, plotface 80 | changed_face = ChangeAU(aulist=['AU6','AU12','AU17'], au_weight = 1.0) 81 | ax = plotface(changed_face) 82 | ``` 83 |

84 | 85 | 86 | ## Use the VideoViewer widget to play both video and data at the same time (only available on Python). 87 | ``` 88 | import facesync.utils as utils 89 | %matplotlib notebook 90 | utils.VideoViewer(path_to_video='path/to/video.mp4', data_df = fexDataFrame) 91 | ``` 92 |

93 | 94 | # Citation 95 | Please cite the following paper if you use our head-mounted camera setup or software. 96 | #### Cheong, J. H., Brooks, S., & Chang, L. J. (2017, November 1). FaceSync: Open source framework for recording facial expressions with head-mounted cameras. Retrieved from psyarxiv.com/p5293 97 | -------------------------------------------------------------------------------- /__init__.py: -------------------------------------------------------------------------------- 1 | __all__ = ["facesync"] -------------------------------------------------------------------------------- /facesync/__init__.py: -------------------------------------------------------------------------------- 1 | __all__ = [ 'facesync','utils', 2 | '__version__'] 3 | 4 | from .version import __version__ 5 | -------------------------------------------------------------------------------- /facesync/facesync.py: -------------------------------------------------------------------------------- 1 | from __future__ import division 2 | 3 | ''' 4 | FaceSync Class 5 | ========================================== 6 | Class to sync videos by audio matching. 7 | 8 | ''' 9 | __all__ = ['facesync'] 10 | __author__ = ["Jin Hyun Cheong"] 11 | __license__ = "MIT" 12 | 13 | import os 14 | import numpy as np 15 | import subprocess 16 | import scipy.io.wavfile as wav 17 | 18 | def _get_vid_resolution(vidFile): 19 | """ Gets video resolution for a given file using ffprobe. 20 | """ 21 | cmd = [ 22 | 'ffprobe','-v','error','-of','flat=s=_','-select_streams','v:0','-show_entries','stream=height,width', vidFile 23 | ] 24 | proc = subprocess.Popen(cmd,stdout=subprocess.PIPE) 25 | out = proc.communicate()[0] 26 | out = out.split('\n')[:2] 27 | return tuple([int(elem.split('=')[-1]) for elem in out]) 28 | 29 | def write_offset_to_file(afile, offset, header='offset'): 30 | ''' 31 | Helper function to write offset output to file. 32 | ''' 33 | (path2fname, fname) = os.path.split(afile) 34 | fname = os.path.join(path2fname,fname.split(".")[0] + '.txt') 35 | f = open(fname, 'a+') 36 | f.write(header+'\n') 37 | f.write(str(offset)+'\n') 38 | f.close() 39 | 40 | def processInput(rate0,data0,afile,fps,length,search_start,search_end,verbose): 41 | ''' 42 | Helper function for multiprocessing 43 | ''' 44 | if verbose: 45 | print(afile) 46 | rate1,data1 = wav.read(afile) 47 | assert(rate0==rate1), "Audio sampling rate is not the same for target and sample" # Check if they have same rate 48 | searchtime = search_end-search_start # seconds to search alignment 49 | if np.ndim(data0)>1: 50 | data0 = data0[:,0] 51 | if np.ndim(data1)>1: 52 | data1 = data1[:,0] 53 | to_compare = data0[0:rate0*length] 54 | try: 55 | assert(data1.shape[0] - (searchtime+length)*rate0 >= 0) 56 | except: 57 | print("Original length need to be shorter or reduce searchtime to allow buffer at end.") 58 | rs = [] 59 | ts = [] 60 | # for i in np.linspace(0,searchtime,fps*searchtime): 61 | inputs = list(np.linspace(search_start,search_end,fps*searchtime)) 62 | 63 | ts = inputs 64 | rs.append(rs) 65 | # offset_r = ts[np.argmax(rs)] + search_start 66 | offset_r = ts[np.argmax(rs)] 67 | self.offsets.append(offset_r) 68 | write_offset_to_file(afile, offset_r,header='corr_multi') 69 | return rs,offset_r 70 | 71 | def calc_rs(i, to_compare, sample): 72 | try: 73 | assert(to_compare.shape[0]==sample.shape[0]) 74 | r=np.corrcoef(to_compare,sample)[0][1] 75 | except: 76 | print("Shape mismatch at %s" %str(i)) 77 | return r, i 78 | 79 | class facesync(object): 80 | """ 81 | facesync is a class to represents multiple videos 82 | so that one can align them based on audio. 83 | 84 | Args: 85 | data: list of video files 86 | Y: Pandas DataFrame of training labels 87 | X: Pandas DataFrame Design Matrix for running univariate models 88 | mask: binary nifiti file to mask brain data 89 | output_file: Name to write out to nifti file 90 | **kwargs: Additional keyword arguments to pass to the prediction algorithm 91 | 92 | """ 93 | def __init__(self, video_files=None, audio_files=None, target_audio = None, offsets=None,**kwargs): 94 | ''' 95 | Args: 96 | video_files: list of video filenames to process 97 | audio_files: list of video filenames to process 98 | target_audio: audio to which videos will be aligned 99 | offsets: list of offsets to trim the video_files 100 | ''' 101 | # Initialize attributes 102 | self.video_files = video_files 103 | self.audio_files = audio_files 104 | self.target_audio = target_audio 105 | self.offsets = offsets 106 | 107 | if self.video_files is not None: 108 | assert(isinstance(self.video_files,list)),'Place path to files in a list' 109 | if self.audio_files is not None: 110 | assert(isinstance(self.audio_files,list)),'Place path to files in a list' 111 | if (self.video_files is not None) & (self.offsets is not None): 112 | assert(len(self.video_files)==len(self.offsets)),'Number of videos and number of offsets should match' 113 | 114 | def extract_audio(self,rate=44100,call=True,verbose=True): 115 | ''' 116 | This method extracts audio from video files in self.video_files and saves audio files in self.audio_files 117 | 118 | Input 119 | ------------ 120 | rate: rate of audio stream frequency to be extracted, default 44100 121 | call: boolean, whether to wait for each process to finish or open multiple threads 122 | verbose: if True, prints the currently processing audio filename 123 | ''' 124 | assert(len(self.video_files)!=0),'No video files to process' 125 | self.audio_files = [] 126 | for i, vidfile in enumerate(self.video_files): 127 | if verbose: 128 | print(vidfile) 129 | (path2fname, vname) = os.path.split(vidfile) 130 | aname = vname.split(".")[0] + ".wav" 131 | infile = os.path.join(path2fname,vname) 132 | outfile = os.path.join(path2fname,aname) 133 | self.audio_files.append(outfile) 134 | # cmd = ' '.join(["avconv", "-i", infile, "-y", "-vn", "-ac", "1","-ar",str(rate),"-f", "wav", outfile]) 135 | command = "ffmpeg -y -i " + infile + " -ab 128k -ac 2 -ar " +str(rate) +" -vn " + outfile 136 | if call: 137 | subprocess.call(command, shell=True) 138 | else: 139 | subprocess.Popen(command, shell=True) 140 | 141 | def find_offset_cross(self,length = 10,search_start=0,verbose=True): 142 | ''' 143 | Find offset using Fourier Transform cross correlation. 144 | 145 | Input 146 | ------------ 147 | length: seconds to use for the cross correlation matching, default is 10 seconds 148 | verbose: if True, prints the currently processing audio filename 149 | 150 | Output 151 | ------------ 152 | allrs : list of cross correlation results using fftconvolve. to retrieve the offset time need to zero index and subtract argmax. 153 | ''' 154 | import numpy as np 155 | from scipy.signal import fftconvolve 156 | assert(self.target_audio is not None), 'Target audio not specified' 157 | assert(self.audio_files is not None), 'Audio files not specified' 158 | self.offsets = [] 159 | rate0,data0 = wav.read(self.target_audio) 160 | allrs = [] 161 | for i, afile in enumerate(self.audio_files): 162 | if verbose: 163 | print(afile) 164 | rate1,data1 = wav.read(afile) 165 | assert(rate0==rate1), "Audio sampling rate is not the same for target and sample" # Check if they have same rate 166 | # Take first audio channel 167 | if np.ndim(data0)>1: 168 | data0 = data0[:,0] 169 | if np.ndim(data1)>1: 170 | data1 = data1[:,0] 171 | x = data0[:rate0*length] # target audio 172 | y = data1[int(search_start*rate0):int(search_start*rate0)+rate0*length] # change sample audio location 173 | # Pad target audio with zeros if not same length. 174 | if len(x) < len(y): 175 | xnew = np.zeros_like(y) 176 | xnew[:len(x)] = x 177 | x = xnew 178 | assert(len(x)==len(y)), "Length of two samples must be the same" 179 | crosscorr = fftconvolve(x,y[::-1],'full') 180 | zero_index = int(len(crosscorr) / 2 ) -1 181 | offset_x = search_start+(zero_index - np.argmax(crosscorr))/float(rate0) 182 | # assert(len(crosscorr)==len(x)) 183 | self.offsets.append(offset_x) 184 | write_offset_to_file(afile, offset_x,header='xcorr_len'+str(length)) 185 | allrs.append(crosscorr) 186 | return allrs 187 | 188 | def find_offset_corr(self,length=5,search_start=0,search_end=20,fps=44100,verbose=True): 189 | ''' 190 | Find offset based on correlation of two audio. 191 | 192 | Input 193 | ------------ 194 | self.target_audio : Original audio to which other files will be aligned to 195 | self.audio_files : List of audio files that needs to be trimmed 196 | length : length of original sample to compare 197 | search_start, search_end: start and end times to search for alignment in seconds 198 | fps: level of temporal precision, default 44100 199 | verbose: if True, prints the currently processing audio filename 200 | 201 | Output 202 | ------------ 203 | rs: correlation values 204 | ''' 205 | assert(self.target_audio is not None), 'Target audio not specified' 206 | assert(self.audio_files is not None), 'Audio files not specified' 207 | self.offsets = [] 208 | allrs = [] 209 | rate0,data0 = wav.read(self.target_audio) 210 | for i, afile in enumerate(self.audio_files): 211 | if verbose: 212 | print(afile) 213 | rate1,data1 = wav.read(afile) 214 | assert(rate0==rate1), "Audio sampling rate is not the same for target and sample" # Check if they have same rate 215 | searchtime = search_end-search_start # seconds to search alignment 216 | if np.ndim(data0)>1: 217 | data0 = data0[:,0] 218 | if np.ndim(data1)>1: 219 | data1 = data1[:,0] 220 | to_compare = data0[0:rate0*length] 221 | try: 222 | assert(data1.shape[0] - (searchtime+length)*rate0 >= 0) 223 | except: 224 | print("Original length need to be shorter or reduce searchtime to allow buffer at end.") 225 | rs = [] 226 | ts = [] 227 | # for i in np.linspace(0,searchtime,fps*searchtime): 228 | for i in np.linspace(search_start,search_end,fps*searchtime): 229 | sample = data1[int(rate0*i):int(rate0*(i+length))][0:to_compare.shape[0]] 230 | try: 231 | assert(to_compare.shape[0]==sample.shape[0]) 232 | except: 233 | print("Shape mismatch at %s" %str(i)) 234 | try: 235 | rs.append(np.corrcoef(to_compare,sample)[0][1]) 236 | ts.append(i) 237 | except: 238 | pass 239 | allrs.append(rs) 240 | # offset_r = ts[np.argmax(rs)] + search_start 241 | offset_r = ts[np.argmax(rs)] 242 | self.offsets.append(offset_r) 243 | write_offset_to_file(afile, offset_r,header='corr_fps'+str(fps)+'_len'+str(length)+'_start'+str(search_start)+'_end'+str(search_end)) 244 | return allrs 245 | 246 | def find_offset_corr_sparse(self,length=5,search_start=0,search_end=20,fps=44100,sparse_ratio=.5,verbose=True): 247 | ''' 248 | Finds offset by correlation with sparse sampling. 249 | 250 | Input 251 | ------------ 252 | self.target_audio : Original audio to which other files will be aligned to 253 | self.audio_files : List of audio files that needs to be trimmed 254 | length : length of original sample to compare 255 | search_start, search_end: start and end times to search for alignment in seconds 256 | fps: level of temporal precision, default 44100 257 | sparse_ratio = Determines the sparse sampling of the target audio to match (default is .5) 258 | verbose: if True, prints the currently processing audio filename 259 | 260 | Output 261 | ------------ 262 | offset_r : time to trim based on correlation 263 | offset_d : time to trim based on distance 264 | rs: correlation values 265 | ds: difference values 266 | ''' 267 | assert(self.target_audio is not None), 'Target audio not specified' 268 | assert(self.audio_files is not None), 'Audio files not specified' 269 | self.offsets = [] 270 | allrs = [] 271 | rate0,data0 = wav.read(self.target_audio) 272 | for i, afile in enumerate(self.audio_files): 273 | if verbose: 274 | print(afile) 275 | rate1,data1 = wav.read(afile) 276 | assert(rate0==rate1), "Audio sampling rate is not the same for target and sample" # Check if they have same rate 277 | searchtime = search_end-search_start # seconds to search alignment 278 | if np.ndim(data0)>1: 279 | data0 = data0[:,0] 280 | if np.ndim(data1)>1: 281 | data1 = data1[:,0] 282 | # to_compare = data0[0:rate0*length] 283 | sampleix = list(range(0,int(rate0*length)-1)) 284 | np.random.shuffle(sampleix) 285 | sampleix = np.sort(sampleix[0:int(rate0*length*sparse_ratio)]) 286 | to_compare = data0[sampleix] 287 | 288 | try: 289 | assert(data1.shape[0] - (searchtime+length)*rate0 >= 0) 290 | except: 291 | print("Original length need to be shorter or reduce searchtime to allow buffer at end.") 292 | rs = [] 293 | ts = [] 294 | # for i in np.linspace(0,searchtime,fps*searchtime): 295 | for i in np.linspace(search_start,search_end,fps*searchtime): 296 | # sample = data1[int(rate0*i):int(rate0*(i+length))][0:to_compare.shape[0]] 297 | sample = data1[int(rate0*i):int(rate0*(i+length))][sampleix] 298 | try: 299 | assert(to_compare.shape[0]==sample.shape[0]) 300 | except: 301 | print("Shape mismatch at %s" %str(i)) 302 | try: 303 | rs.append(np.corrcoef(to_compare,sample)[0][1]) 304 | ts.append(i) 305 | except: 306 | pass 307 | allrs.append(rs) 308 | # offset_r = ts[np.argmax(rs)] + search_start 309 | offset_r = ts[np.argmax(rs)] 310 | self.offsets.append(offset_r) 311 | write_offset_to_file(afile, offset_r, header='corr_sparse_fps'+str(fps)+'_len'+str(length)+'_start'+str(search_start)+'_end'+str(search_end)) 312 | return allrs 313 | 314 | def find_offset_corr_multi(self,length=5,search_start=0,search_end=20,fps=44100,verbose=True): 315 | ''' 316 | Find offset based on correlation with multiprocessing. 317 | Requires joblib package. 318 | 319 | Input 320 | ------------ 321 | self.target_audio : Original audio to which other files will be aligned to 322 | self.audio_files : List of audio files that needs to be trimmed 323 | length : length of original sample to compare 324 | search_start, search_end: start and end times to search for alignment in seconds 325 | fps: level of temporal precision, default 44100 326 | verbose: if True, prints the currently processing audio filename 327 | 328 | Output 329 | ------------ 330 | self.offsets: max offsets 331 | rs: correlation values 332 | ''' 333 | from joblib import Parallel, delayed 334 | import multiprocessing 335 | num_cores = multiprocessing.cpu_count()-1 # don't use all cores 336 | 337 | assert(self.target_audio is not None), 'Target audio not specified' 338 | assert(self.audio_files is not None), 'Audio files not specified' 339 | self.offsets = [] 340 | allrs = [] 341 | rate0,data0 = wav.read(self.target_audio) 342 | for i, afile in enumerate(self.audio_files): 343 | if verbose: 344 | print(afile) 345 | rate1,data1 = wav.read(afile) 346 | assert(rate0==rate1), "Audio sampling rate is not the same for target and sample" # Check if they have same rate 347 | searchtime = search_end-search_start # seconds to search alignment 348 | if np.ndim(data0)>1: 349 | data0 = data0[:,0] 350 | if np.ndim(data1)>1: 351 | data1 = data1[:,0] 352 | to_compare = data0[0:rate0*length] 353 | try: 354 | assert(data1.shape[0] - (searchtime+length)*rate0 >= 0) 355 | except: 356 | print("Original length need to be shorter or reduce searchtime to allow buffer at end.") 357 | rs = [] 358 | ts = [] 359 | out = Parallel(n_jobs=num_cores,backend='threading')(delayed(calc_rs)(i,to_compare,data1[int(rate0*i):int(rate0*(i+length))][0:to_compare.shape[0]]) for i in np.linspace(search_start,search_end,fps*searchtime)) 360 | rs,ts= zip(*out) 361 | allrs.append(rs) 362 | offset_r = ts[np.argmax(rs)] 363 | self.offsets.append(offset_r) 364 | write_offset_to_file(afile, offset_r,header='corr_fps'+str(fps)+'_len'+str(length)+'_start'+str(search_start)+'_end'+str(search_end)) 365 | return allrs 366 | 367 | def find_offset_dist(self,length=5,search_start=0,search_end=20,fps=44100,verbose=True): 368 | ''' 369 | Find offset based on squared distance of audio wave. 370 | 371 | Input 372 | ------------ 373 | self.target_audio : Original audio to which other files will be aligned to 374 | self.audio_files : List of audio files that needs to be trimmed 375 | length : length of original sample to compare 376 | search_start, search_end: start and end times to search for alignment in seconds 377 | fps: level of temporal precision, default 44100 378 | verbose: if True, prints the currently processing audio filename 379 | 380 | Output 381 | ------------ 382 | offset_d : time to trim based on distance 383 | rs: correlation values 384 | ds: difference values 385 | ''' 386 | assert(self.target_audio is not None), 'Target audio not specified' 387 | assert(self.audio_files is not None), 'Audio files not specified' 388 | self.offsets = [] 389 | allds = [] 390 | rate0,data0 = wav.read(self.target_audio) 391 | for i, afile in enumerate(self.audio_files): 392 | if verbose: 393 | print(afile) 394 | rate1,data1 = wav.read(afile) 395 | assert(rate0==rate1), "Audio sampling rate is not the same for target and sample" # Check if they have same rate 396 | searchtime = search_end-search_start # seconds to search alignment 397 | if np.ndim(data0)>1: 398 | data0 = data0[:,0] 399 | if np.ndim(data1)>1: 400 | data1 = data1[:,0] 401 | to_compare = data0[0:rate0*length] 402 | try: 403 | assert(data1.shape[0] - (searchtime+length)*rate0 >= 0) 404 | except: 405 | print("Original length need to be shorter or reduce searchtime to allow buffer at end.") 406 | ds = [] 407 | ts = [] 408 | # for i in np.linspace(0,searchtime,fps*searchtime): 409 | for i in np.linspace(search_start,search_end,fps*searchtime): 410 | sample = data1[int(rate0*i):int(rate0*(i+length))][0:to_compare.shape[0]] 411 | try: 412 | assert(to_compare.shape[0]==sample.shape[0]) 413 | except: 414 | print("Shape mismatch at %s" %str(i)) 415 | try: 416 | ds.append(sum((to_compare-sample)**2)) 417 | ts.append(i) 418 | except: 419 | pass 420 | allds.append(ds) 421 | # offset_d = ts[np.argmin(ds)] + search_start 422 | offset_d = ts[np.argmin(ds)] 423 | self.offsets.append(offset_d) 424 | write_offset_to_file(afile, offset_d,header='dist_fps'+str(fps)+'_len'+str(length)+'_start'+str(search_start)+'_end'+str(search_end)) 425 | return allds 426 | 427 | def resize_vids(self, resolution = 64, suffix = None,call = True, force=False): 428 | ''' 429 | Resize videos. 430 | 431 | Inputs 432 | ------------ 433 | resolution: height of the video 434 | suffix: what to name the resized video. If not specified, will append video names with resolution 435 | call: boolean, whether to wait for each process to finish or open multiple threads, 436 | True: call, False: multithread, default is call 437 | force: whether to force creating new files some video files are already at the desired resolution; defaults to False 438 | ''' 439 | if suffix == None: 440 | suffix = str(resolution) 441 | 442 | out = [] 443 | for vidfile in self.video_files: 444 | (path2fname, vname) = os.path.split(vidfile) 445 | print("Resizing video: %s" % (vname)) 446 | current_resolution = _get_vid_resolution(vidfile) 447 | if current_resolution[1] == resolution and not force: 448 | print("Native resolution already ok, skipping: %s" % (vname)) 449 | final_vidname = os.path.join(path2fname,vname) 450 | out.append(final_vidname) 451 | continue 452 | else: 453 | final_vidname = os.path.join(path2fname,vname.split('.')[0]+'_'+suffix+'.'+vname.split('.')[-1]) 454 | out.append(final_vidname) 455 | command = 'ffmpeg -y -i ' + vidfile + ' -vf scale=-1:'+str(resolution)+' '+final_vidname 456 | if not os.path.exists(final_vidname): 457 | if call: 458 | subprocess.call(command, shell=True) 459 | else: 460 | subprocess.Popen(command, shell=True) 461 | return out 462 | 463 | def concat_vids(self, final_vidname = None, resolution_fix=False, checkres=True): 464 | ''' 465 | Concatenate list of videos to one video. 466 | 467 | Inputs 468 | ------------ 469 | final_vidname = Filepath/filname of the concatenated video. If not specified will use the first video name appended with _all 470 | ''' 471 | assert(len(self.video_files)!=0),'No video files to process' 472 | if (final_vidname != None): 473 | self.final_vidname = final_vidname 474 | if (len(self.video_files)!=0) and (final_vidname == None): 475 | (path2fname, vname) = os.path.split(self.video_files[0]) 476 | self.final_vidname = os.path.join(path2fname,vname.split('.')[0]+'_all.'+vname.split('.')[-1]) 477 | assert(type(self.final_vidname)==str),'final_vidname must be a string with full path' 478 | 479 | #Check that files are all of the same resolution 480 | if checkres: 481 | resolutions = [_get_vid_resolution(elem) for elem in self.video_files] 482 | if len(set(resolutions)) > 1: 483 | if resolution_fix: 484 | min_resolution = min([elem[1] for elem in resolutions]) 485 | print("Videos mismatch in resolution, resizing to: %s..." % (min_resolution)) 486 | new_vids= self.resize_vids(resolution=min_resolution) 487 | self.video_files = new_vids 488 | resolutions = [_get_vid_resolution(elem) for elem in self.video_files] 489 | assert(len(set(resolutions))<=1),"Videos still mismatched. Something went wrong with automatic resizing? Try resizing manually." 490 | print("Resizing complete. Continuing.") 491 | else: 492 | raise TypeError("Video files have different resolutions!") 493 | 494 | # Create intermediate video files 495 | tempfiles = str(); 496 | for i, vidfile in enumerate(self.video_files): 497 | (path2fname, vname) = os.path.split(vidfile) 498 | print("Joining video: %s" % (vname)) 499 | if len(tempfiles)!=0: 500 | tempfiles = tempfiles+"|" 501 | intermediatefile = os.path.join(path2fname,"intermediate"+str(i)+'.ts') 502 | if not os.path.exists(intermediatefile): 503 | command = "ffmpeg -i "+ vidfile +" -c copy -bsf:v h264_mp4toannexb -f mpegts " + intermediatefile 504 | subprocess.call(command, shell=True) 505 | tempfiles = tempfiles + intermediatefile 506 | 507 | # Concatenate videos 508 | command = 'ffmpeg -y -i "concat:' + tempfiles + '" -c copy -bsf:a aac_adtstoasc '+ self.final_vidname 509 | subprocess.call(command, shell=True) 510 | #remove intermediates 511 | for i, vidfile in enumerate(self.video_files): 512 | (path2fname, vname) = os.path.split(vidfile) 513 | intermediatefile = os.path.join(path2fname,"intermediate"+str(i)+'.ts') 514 | command = "rm -f " + intermediatefile 515 | subprocess.call(command, shell=True) 516 | 517 | def trim_vids(self,offsets = None, suffix = None,call=True): 518 | ''' 519 | Trims video based on offset 520 | 521 | Inputs 522 | ------------ 523 | offsets: list of offsets to trim the self.video_files with 524 | length of offsets should match length of self.video_files 525 | suffix: string to add to end of the trimmed video, default: 'trimmed' 526 | call: boolean, whether to wait for each process to finish or open multiple threads, 527 | True: call, False: multithread, default is call 528 | ''' 529 | if suffix == None: 530 | suffix = 'trimmed' 531 | if offsets is not None: 532 | self.offsets= offsets 533 | assert(len(self.video_files)==len(self.offsets)),'Number of videos and number of offsets should match' 534 | for i,vidfile in enumerate(self.video_files): 535 | seconds = str(self.offsets[i]) 536 | (path2fname, vname) = os.path.split(vidfile) 537 | print("Trimming video: %s" % (vname)) 538 | final_vidname = os.path.join(path2fname,vname.split('.')[0]+'_'+suffix+'.'+vname.split('.')[-1]) 539 | # command = 'ffmpeg -y -ss ' + str(seconds) + ' -i ' + vidfile + ' -c copy ' + final_vidname 540 | # command = 'ffmpeg -y -ss ' + seconds.split('.')[0] + ' -i ' + vidfile + ' -ss 00:00:00.' + seconds.split('.')[1] + ' -c copy ' + final_vidname 541 | command = 'ffmpeg -y -i ' + vidfile + ' -ss ' + str(seconds) + ' -crf 23 ' + final_vidname 542 | # command = 'ffmpeg -y -i ' + vidfile + ' -ss ' + str(seconds) + ' -vcodec libx264 -crf 23 -acodec copy ' + final_vidname 543 | if call: 544 | subprocess.call(command, shell=True) 545 | else: 546 | subprocess.Popen(command, shell=True) 547 | -------------------------------------------------------------------------------- /facesync/tests/resources/cosan_synctune.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cosanlab/facesync/bd5922de5729e4e76a6eaae84b45d965660f1545/facesync/tests/resources/cosan_synctune.wav -------------------------------------------------------------------------------- /facesync/tests/resources/sample1.MP4: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cosanlab/facesync/bd5922de5729e4e76a6eaae84b45d965660f1545/facesync/tests/resources/sample1.MP4 -------------------------------------------------------------------------------- /facesync/tests/resources/sample1.txt: -------------------------------------------------------------------------------- 1 | xcorr_len3 2 | 15.1612471655 3 | corr_fps441_len5_start15_end16 4 | 15.1772727273 5 | corr_sparse_fps441_len1.8_start15_end16 6 | 15.1772727273 7 | dist_fps441_len5_start15_end16 8 | 15.0590909091 9 | xcorr_len3 10 | 15.161224489795918 11 | corr_fps441_len5_start15_end16 12 | 15.177272727272728 13 | corr_sparse_fps441_len1.8_start15_end16 14 | 15.177272727272728 15 | dist_fps441_len5_start15_end16 16 | 15.059090909090909 17 | xcorr_len3 18 | 15.161224489795918 19 | corr_fps441_len5_start15_end16 20 | 15.177272727272728 21 | corr_sparse_fps441_len1.8_start15_end16 22 | 15.177272727272728 23 | dist_fps441_len5_start15_end16 24 | 15.059090909090909 25 | -------------------------------------------------------------------------------- /facesync/tests/resources/sample1.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cosanlab/facesync/bd5922de5729e4e76a6eaae84b45d965660f1545/facesync/tests/resources/sample1.wav -------------------------------------------------------------------------------- /facesync/tests/test_facesync.py: -------------------------------------------------------------------------------- 1 | from facesync.facesync import facesync 2 | import os, glob 3 | import numpy as np 4 | 5 | def test_facesyc(tmpdir): 6 | fs = facesync() 7 | assert(fs.audio_files==None) 8 | assert(fs.video_files==None) 9 | assert(fs.offsets==None) 10 | assert(fs.target_audio==None) 11 | cwd = os.getcwd() 12 | video_files = [os.path.join(os.path.dirname(__file__), 'resources','sample1.MP4')] 13 | target_audio = os.path.join(os.path.dirname(__file__), 'resources','cosan_synctune.wav') 14 | fs = facesync(video_files=video_files,target_audio=target_audio) 15 | fs.extract_audio() 16 | print(glob.glob(os.path.join(os.path.dirname(__file__), 'resources','*.MP4'))) 17 | print(fs.audio_files) 18 | assert(fs.audio_files == [os.path.join(os.path.dirname(__file__), 'resources','sample1.wav')]) 19 | 20 | print('testing fft cross correlation') 21 | fs.find_offset_cross(length=3,search_start=15) 22 | assert(np.round(fs.offsets[0])==np.round(15.1612471655)) 23 | 24 | assert(isinstance(fs.offsets,list)) 25 | 26 | print('testing correlation method') 27 | fs.find_offset_corr(search_start=15,search_end=16,fps=441) 28 | assert(np.round(fs.offsets[0])==np.round(15.1612603317)) 29 | 30 | print('testing sparse correlation method') 31 | fs.find_offset_corr_sparse(length = 1.8,search_start=15,search_end=16,sparse_ratio=.5,fps=441) 32 | assert(np.round(fs.offsets[0])==np.round(15.1612603317)) 33 | 34 | print('testing distance method') 35 | fs.find_offset_dist(search_start=15,search_end=16,fps=441) 36 | 37 | print('testing trimming method') 38 | # fs.trim_vids(call = False) 39 | print('testing resizing method with Popen') 40 | # fs.resize_vids(resolution = 32,suffix = 'test',call = False) 41 | 42 | # add tests for video concat -------------------------------------------------------------------------------- /facesync/utils.py: -------------------------------------------------------------------------------- 1 | from __future__ import division 2 | 3 | ''' 4 | FaceSync Utils Class 5 | ========================================== 6 | VideoViewer: Watch video and plot data simultaneously. 7 | AudioAligner: Align two audios manually 8 | neutralface: points that show a face 9 | ChangeAU: change AUs and return new face 10 | ''' 11 | __all__ = ['VideoViewer','AudioAligner','neutralface','audict','plotface','ChangeAU','read_facet'] 12 | __author__ = ["Jin Hyun Cheong"] 13 | __license__ = "MIT" 14 | 15 | import os 16 | import numpy as np 17 | import matplotlib.pyplot as plt 18 | 19 | 20 | 21 | def read_facet(facetfile,fullfacet=False,demean = False,demedian=False,zscore=False,fillna=False,sampling_hz=None, target_hz=None): 22 | ''' 23 | This function reads in an iMotions-FACET exported facial expression file. Uses downsample function from nltools. 24 | Arguments: 25 | fullfacet(def: False): If True, Action Units also provided in addition to default emotion predictions. 26 | demean(def: False): Demean data 27 | demedian(def: False): Demedian data 28 | zscore(def: False): Zscore data 29 | fillna(def: False): fill null values with ffill 30 | sampling_hz & target_hz: To downsample, specify the sampling hz and target hz. 31 | Returns: 32 | d: dataframe of processed facial expressions 33 | 34 | ''' 35 | import pandas as pd 36 | 37 | def downsample(data,sampling_freq=None, target=None, target_type='samples', method='mean'): 38 | ''' Downsample pandas to a new target frequency or number of samples 39 | using averaging. 40 | Args: 41 | data: Pandas DataFrame or Series 42 | sampling_freq: Sampling frequency of data 43 | target: downsampling target 44 | target_type: type of target can be [samples,seconds,hz] 45 | method: (str) type of downsample method ['mean','median'], 46 | default: mean 47 | Returns: 48 | downsampled pandas object 49 | ''' 50 | 51 | if not isinstance(data,(pd.DataFrame,pd.Series)): 52 | raise ValueError('Data must by a pandas DataFrame or Series instance.') 53 | if not (method=='median') | (method=='mean'): 54 | raise ValueError("Metric must be either 'mean' or 'median' ") 55 | 56 | if target_type is 'samples': 57 | n_samples = target 58 | elif target_type is 'seconds': 59 | n_samples = target*sampling_freq 60 | elif target_type is 'hz': 61 | n_samples = sampling_freq/target 62 | else: 63 | raise ValueError('Make sure target_type is "samples", "seconds", ' 64 | ' or "hz".') 65 | 66 | idx = np.sort(np.repeat(np.arange(1,data.shape[0]/n_samples,1),n_samples)) 67 | # if data.shape[0] % n_samples: 68 | if data.shape[0] > len(idx): 69 | idx = np.concatenate([idx, np.repeat(idx[-1]+1,data.shape[0]-len(idx))]) 70 | if method=='mean': 71 | return data.groupby(idx).mean().reset_index(drop=True) 72 | elif method=='median': 73 | return data.groupby(idx).median().reset_index(drop=True) 74 | 75 | d = pd.read_table(facetfile, skiprows=4, sep='\t', 76 | usecols = ['FrameTime','Joy Evidence','Anger Evidence','Surprise Evidence','Fear Evidence','Contempt Evidence', 77 | 'Disgust Evidence','Sadness Evidence','Confusion Evidence','Frustration Evidence', 78 | 'Neutral Evidence','Positive Evidence','Negative Evidence','AU1 Evidence','AU2 Evidence', 79 | 'AU4 Evidence','AU5 Evidence','AU6 Evidence','AU7 Evidence','AU9 Evidence','AU10 Evidence', 80 | 'AU12 Evidence','AU14 Evidence','AU15 Evidence','AU17 Evidence','AU18 Evidence','AU20 Evidence', 81 | 'AU23 Evidence','AU24 Evidence','AU25 Evidence','AU26 Evidence','AU28 Evidence','AU43 Evidence','NoOfFaces', 82 | 'Yaw Degrees', 'Pitch Degrees', 'Roll Degrees']) 83 | # Choose index either FrameTime or FrameNo 84 | d = d.set_index(d['FrameTime'].values/1000.0) 85 | if type(fullfacet) == bool: 86 | if fullfacet==True: 87 | facets = ['Joy Evidence','Anger Evidence','Surprise Evidence','Fear Evidence','Contempt Evidence', 88 | 'Disgust Evidence','Sadness Evidence','Confusion Evidence','Frustration Evidence', 89 | 'Neutral Evidence','Positive Evidence','Negative Evidence','AU1 Evidence','AU2 Evidence', 90 | 'AU4 Evidence','AU5 Evidence','AU6 Evidence','AU7 Evidence','AU9 Evidence','AU10 Evidence', 91 | 'AU12 Evidence','AU14 Evidence','AU15 Evidence','AU17 Evidence','AU18 Evidence','AU20 Evidence', 92 | 'AU23 Evidence','AU24 Evidence','AU25 Evidence','AU26 Evidence','AU28 Evidence','AU43 Evidence','NoOfFaces', 93 | 'Yaw Degrees', 'Pitch Degrees', 'Roll Degrees'] 94 | elif fullfacet == False: 95 | if type(fullfacet) == bool: 96 | facets = ['Joy Evidence','Anger Evidence','Surprise Evidence','Fear Evidence','Contempt Evidence', 97 | 'Disgust Evidence','Sadness Evidence','Confusion Evidence','Frustration Evidence', 98 | 'Neutral Evidence','Positive Evidence','Negative Evidence','NoOfFaces'] 99 | else: 100 | facets = fullfacet 101 | d = d[facets] # change datatype to float16 for less memory use 102 | if zscore: 103 | d = (d.ix[:,:] - d.ix[:,:].mean()) / d.ix[:,:].std(ddof=0) 104 | if fillna: 105 | d = d.fillna(method='ffill') 106 | if demedian: 107 | d = d-d.median() 108 | if demean: 109 | d = d-d.mean() 110 | if sampling_hz and target_hz: 111 | d = downsample(d,sampling_freq=sampling_hz,target=target_hz,target_type='hz') 112 | return d 113 | 114 | 115 | def rec_to_time(vals,fps): 116 | times = np.array(vals)/60./fps 117 | times = [str(int(np.floor(t))).zfill(2)+':'+str(int((t-np.floor(t))*60)).zfill(2) for t in times] 118 | return times 119 | 120 | def VideoViewer(path_to_video, data_df,xlabel='', ylabel='',title='',figsize=(6.5,3),legend=False,xlim=None,ylim=None,plot_rows=False): 121 | """ 122 | This function plays a video and plots the data underneath the video and moves a cursor as the video plays. 123 | Plays videos using Jupyter_Video_Widget by https://github.com/Who8MyLunch/Jupyter_Video_Widget 124 | Currently working on: Python 3 125 | For plot update to work properly plotting needs to be set to: %matplotlib notebook 126 | 127 | Args: 128 | path_to_video : file path or url to a video. tested with mov and mp4 formats. 129 | data_df : pandas dataframe with columns to be plotted in 30hz. (plotting too many column can slowdown update) 130 | ylabel(str): add ylabel 131 | legend(bool): toggle whether to plot legend 132 | xlim(list): pass xlimits [min,max] 133 | ylim(list): pass ylimits [min,max] 134 | plot_rows(bool): Draws individual plots for each column of data_df. (Default: True) 135 | """ 136 | from jpy_video import Video 137 | from IPython.display import display, HTML 138 | display(HTML(data=""" 139 | 144 | """)) 145 | 146 | f = os.path.abspath(path_to_video) 147 | wid = Video(f) 148 | wid.layout.width='640px' 149 | wid.display() 150 | lnwidth = 3 151 | 152 | fps = wid.timebase**-1 # time base is play rate hard coded at 30fps 153 | print(fps) 154 | if plot_rows: 155 | fig,axs = plt.subplots(data_df.shape[1],1,figsize=figsize) # hardcode figure size for now.. 156 | else: 157 | fig,axs = plt.subplots(1,1,figsize=figsize) 158 | t=wid.current_time 159 | if plot_rows and data_df.shape[1]>1: 160 | for ixs, ax in enumerate(axs): 161 | ax.axvline(fps*t,color='k',linestyle='--',linewidth=lnwidth) # cursor is always first of ax 162 | # plot each column 163 | data_df.iloc[:,ixs].plot(ax=ax,legend=legend,xlim=xlim,ylim=ylim) 164 | ax.set_xticks = np.arange(0,data_df.shape[0],5) 165 | ax.set(ylabel =data_df.columns[ixs], xlabel=xlabel, xticklabels = rec_to_time(ax.get_xticks(),fps)) 166 | else: 167 | axs.axvline(fps*t,color='k',linestyle='--',linewidth=lnwidth) # cursor is always first of ax 168 | # plot each column 169 | data_df.plot(ax=axs,legend=legend,xlim=xlim,ylim=ylim) 170 | axs.set_xticks = np.arange(0,data_df.shape[0],5) 171 | axs.set(ylabel = data_df.columns[0],xlabel=xlabel, title=title, xticklabels = rec_to_time(axs.get_xticks(),fps)) 172 | if legend: 173 | plt.legend(loc=1) 174 | plt.tight_layout() 175 | 176 | def plot_dat(axs,t,fps=fps): 177 | if plot_rows and data_df.shape[1]>1: 178 | for ax in axs: 179 | if ax.lines: 180 | ax.lines[0].set_xdata([np.round(fps*t),np.round(fps*t)]) 181 | else: 182 | if axs.lines: 183 | axs.lines[0].set_xdata([np.round(fps*t),np.round(fps*t)]) 184 | fig.canvas.draw() 185 | 186 | def on_value_change(change,ax=axs,fps=fps): 187 | if change['name']=='_event': 188 | plot_dat(axs=axs, t=change['new']['currentTime'],fps=fps) 189 | 190 | # call on_value_change that will call plotting function plot_dat whenever there is cursor update 191 | wid.observe(on_value_change) 192 | 193 | 194 | def AudioAligner(original, sample, search_start=0.0,search_end=15.0, xmax = 60,manual=False,reduce_orig_volume=1): 195 | """ 196 | This function pull up an interactive console to find the offsets between two audios. 197 | 198 | Args: 199 | original: path to original audio file (e.g. '../audios/original.wav') 200 | sample: path to the sample audio file (e.g. '../audios/sample.wav') 201 | search_start(float): start range for slider to search for offset 202 | search_end(float): end range for slider to search for offset 203 | xmax(int): Range of audio to plot from beginning 204 | manual(bool): set to True to turn off auto-refresh 205 | reduce_orig_volume(int or float): Original wav sounds are often larger so divide the volume by this number. 206 | """ 207 | import scipy.io.wavfile as wav 208 | from IPython.display import Audio 209 | from IPython.display import display 210 | from ipywidgets import widgets 211 | 212 | orig_r,orig = wav.read(original) 213 | # volume is often louder on original so you can reduce it 214 | orig = orig/reduce_orig_volume 215 | # take one channel of target audio. probably not optimal 216 | if np.ndim(orig) >1: 217 | orig = orig[:,0] 218 | # grab one channel of sample audio 219 | tomatch_r,tomatch = wav.read(sample) 220 | if np.ndim(tomatch) >1: 221 | tomatch = tomatch[:,0] 222 | 223 | fs = 44100 224 | 225 | def audwidg(offset,play_start): 226 | allshift = play_start 227 | samplesize = 30 228 | tomatchcopy = tomatch[int((allshift+offset)*tomatch_r):int((allshift+offset)*tomatch_r)+fs*samplesize] 229 | shape = tomatchcopy.shape[0] 230 | origcopy = orig[int((allshift)*tomatch_r):int((allshift)*tomatch_r)+fs*samplesize] 231 | # when target audio is shorter, pad difference with zeros 232 | if origcopy.shape[0] < tomatchcopy.shape[0]: 233 | diff = tomatchcopy.shape[0] - origcopy.shape[0] 234 | origcopy = np.pad(origcopy, pad_width = (0,diff),mode='constant') 235 | toplay = origcopy + tomatchcopy 236 | display(Audio(data=toplay,rate=fs)) 237 | 238 | def Plot_Audios(offset,x_min,x_max): 239 | # print('Precise offset : ' + str(offset)) 240 | fig,ax = plt.subplots(figsize=(20,3)) 241 | ax.plot(orig[int(fs*x_min):int(fs*x_max)],linewidth=.5,alpha=.8,color='r') 242 | ax.plot(tomatch[int(fs*x_min)+int(fs*offset) : int(fs*x_max)+int(fs*offset)],linewidth=.5,alpha=.8) 243 | ax.set_xticks([(tick-x_min)*fs for tick in range(int(x_min),int(x_max+1))]) 244 | ax.set_xticklabels([tick for tick in range(int(x_min),int(x_max)+1)]) 245 | ax.set_xlim([(x_min-x_min)*fs, (x_max-x_min)*fs] ) 246 | ax.set_ylabel('Audio') 247 | ax.set_xlabel('Target Audio Time') 248 | audwidg(offset,x_min) 249 | plt.show() 250 | 251 | widgets.interact(Plot_Audios, 252 | offset=widgets.FloatSlider(value = 0.5*(search_start+search_end), readout_format='.3f', min = float(search_start), max = float(search_end), step = 0.001, 253 | description='Adjusted offset: ',layout=widgets.Layout(width='90%')), 254 | x_min=widgets.FloatSlider(description='Min X on audio plot', value=0.0,min=0.0,max=xmax,step=0.1, layout=widgets.Layout(width='50%')), 255 | x_max=widgets.FloatSlider(description='Max X on audio plot', value=xmax,min=0.0,max=xmax,step=0.1, layout=widgets.Layout(width='50%')), 256 | __manual=manual 257 | ) 258 | 259 | neutralface = {-34: (212, 335), 260 | -33: (222, 342), -32: (237, 342), -30: (203, 335), -29: (222, 335), 261 | -28: (237, 328), -26: (227, 288), -25: (238, 292), -19: (201, 219), 262 | -18: (184, 220), -17: (169, 214), -16: (184, 204), -15: (201, 203), 263 | -14: (217, 215), -13: (225, 181), -12: (203, 172), -11: (180, 170), 264 | -10: (157, 174), -9: (142, 180), -8: (122, 222), -7: (126, 255), 265 | -6: (133, 286), -5: (139, 318), -4: (148, 349), -3: (165, 375), 266 | -2: (190, 397), -1: (219, 414), 267 | 0: (252, 419), 268 | 1: (285, 414), 2: (315, 398), 3: (341, 377), 4: (359, 351), 269 | 5: (368, 319), 6: (371, 287), 7: (376, 254), 8: (378, 221), 270 | 9: (354, 180), 10: (339, 173), 11: (316, 167), 12: (293, 171), 271 | 13: (270, 180), 14: (281, 215), 15: (296, 203), 16: (314, 202), 272 | 17: (328, 212), 18: (315, 219), 19: (297, 219), 20: (248, 207), 273 | 21: (248, 227), 22: (248, 247), 23: (248, 268), 24: (248, 294), 274 | 25: (260, 291), 26: (271, 287), 27: (248, 333), 28: (262, 328), 275 | 29: (279, 335), 30: (296, 335), 31: (250, 340), 32: (264, 342), 276 | 33: (280, 342), 34: (288, 335)} 277 | 278 | audict = {'AU1' : {-11:(2,0),11:(-2,0),-12:(5,-8),12:(-5,-8),-13:(0,-20),13:(0,-20) }, 279 | # Brow Lowerer 280 | 'AU4': {-10:(4,5),10:(-4,5),-11:(4,15),11:(-4,15),-12:(5,20),12:(-5,20),-13:(0,15),13:(0,15) }, 281 | # Upper Lid Raiser 282 | 'AU5': {-9:(2,-9),9:(2,-9), -10:(2,-10),10:(-2,-10),-11:(2,-15),11:(-2,-15), 283 | -12:(5,-12),12:(-5,-12),-13:(0,-10),13:(0,-10), 284 | -16:(0,-10),-15:(0,-10),16:(0,-10),15:(0,-10), 285 | -19:(0,10),-18:(0,10),19:(0,10),18:(0,10)}, 286 | # cheek raiser 287 | 'AU6': {-8:(20,0),8:(-20,0), -7:(10,-5),7:(-10,-5), -6:(2,-8), 6:(-2,-8), 288 | -9:(5,5),9:(-5,5), 289 | 17:(-5,5),18:(-3,-3),19:(-3,-3), 290 | -17:(5,5),-18:(3,-3),-19:(3,-3)}, 291 | # nose wrinkler 292 | 'AU9': {-15:(2,4),15:(-2,4),-14:(2,3),14:(-2,3), 293 | 20:(0,5), 21:(0,-5), 22:(0,-7), 23:(0,-10), 294 | -26:(5,-15),-25:(0,-15),24:(0,-15),25:(0,-15),26:(-5,-15), 295 | -10:(2,0),10:(-2,0),-11:(2,8),11:(-2,8), 296 | -12:(5,12),12:(-5,12),-13:(0,10),13:(0,10) 297 | }, 298 | # Upper Lip Raiser 299 | 'AU10': {-34:(0,5),-33:(0,-2),-30:(0,3),-29:(0,-10),-28:(0,-5), 300 | -26:(-5,-8),-25:(0,-3),24:(0,-3),25:(0,-3),26:(5,-8), 301 | 27:(0,-10),28:(0,-5),29:(0,-10),30:(0,3),33:(0,-2),34:(0,5)}, 302 | # Lip corner Puller 303 | 'AU12': { -30: (-10,-15), -34: (-5,-5), 30:(10,-15), 34:(5,-5), -29:(0,0), 29:(0,0) }, 304 | #AU14 Dimpler 305 | 'AU14': {-33:(0,-5),-32:(0,-5),-30:(-5,-5),-28:(0,5),28:(0,5),30:(5,-5),31:(0,-5),32:(0,-5),33:(0,-5)}, 306 | # Chin raiser 307 | 'AU17': { -2:(5,0),-1:(5,-5),0:(0,-20),-1:(-5,-5),2:(-5,0)}, 308 | # Lip Puckerer 309 | 'AU18': {-30:(5,0), 30:(-5,0), -34:(5,0), 34:(-5,0), 310 | -33:(5,0),33:(-5,0), -29:(5,0),29:(-5,0),30:(-5,0), 311 | -28:(0,0),28:(0,0),27:(0,-8),31:(0,10),-32:(0,7),32:(0,7)} , 312 | # Lips Part 313 | 'AU25': {-28:(0,-3),28:(0,-3),27:(0,-5),31:(0,7),-32:(0,7),32:(0,7)}, 314 | # Lip Suck 315 | 'AU28': {-33:(0,-5),-32:(0,-5),-28:(0,5),24:(0,-3),28:(0,-5),31:(0,-5),32:(0,-5),33:(0,-5)} 316 | } 317 | 318 | def plotface(face, scatter=True,line=False,annot=False,ax=None): 319 | """ 320 | This function will take a dictionary of dots by (x,y) coordinates like the neutralface. 321 | 322 | """ 323 | lineface = range(-8,9) 324 | linenose = list(range(20,24)) 325 | linenose.extend([26,25,24,-25,-26,23]) 326 | linelbrow = range(-13,-8) 327 | linerbrow = range(9,14) 328 | lineleye = list(range(-19,-13)) 329 | lineleye.append(-19) 330 | linereye = list(range(14,20)) 331 | linereye.append(14) 332 | linemouth = list(range(27,31)) 333 | linemouth.extend([34,33,32,31,-32,-33,-34,-30,-29,-28,27]) 334 | lines = [lineface,linenose,linelbrow,linerbrow,lineleye,linereye,linemouth] 335 | if not ax: 336 | f, ax = plt.subplots(1,1,figsize=(7,7)) 337 | for key in face.keys(): 338 | (x,y) = face[key] 339 | if scatter: 340 | ax.scatter(x,y,s=8,c='k') 341 | if annot: 342 | 343 | ax.annotate(key,(np.sign(key)*20+x,y)) 344 | if line: 345 | for l in lines: 346 | ax.plot([face[key][0] for key in l],[face[key][1] for key in l],color='k' ) 347 | ax.set_xlim([0,500]) 348 | ax.set_ylim([0,500]) 349 | ax.invert_yaxis() 350 | return ax 351 | 352 | def ChangeAU(aulist, au_weight = 1.0, audict = audict, face = neutralface): 353 | ''' 354 | This function will return a new face with the acti on units of aulist moved based on au_weight. 355 | 356 | Args: 357 | aulist: list of AUs that are activated currently supported include 358 | ['AU1','AU4','AU5','AU6','AU9', 'AU10', 'AU12','AU14','AU17','AU18','AU25','AU28'] 359 | au_weights = float between 0 and 1.0 to activate all action unit or a 360 | dictionary to modular change of action units. 361 | audict = Dictionary of AU movements 362 | face = neutral face dictionary. 363 | ''' 364 | au_weights = {} 365 | # if dict, apply weight to each au 366 | if type(au_weight)==dict: 367 | au_weights = au_weight 368 | # if a float apply to all 369 | elif type(au_weight)==float: 370 | for au in audict.keys(): 371 | au_weights[au] = au_weight 372 | newface = face.copy() 373 | for au in aulist: 374 | for landmark in audict[au].keys(): 375 | newface[landmark] = (face[landmark][0] + au_weights[au] * audict[au][landmark][0], 376 | face[landmark][1] + au_weights[au] * audict[au][landmark][1]) 377 | return newface 378 | -------------------------------------------------------------------------------- /facesync/version.py: -------------------------------------------------------------------------------- 1 | """Specifies current version of facesync to be used by setup.py and __init__.py 2 | """ 3 | 4 | __version__ = '0.0.9' -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | numpy>=1.9 2 | scipy -------------------------------------------------------------------------------- /screenshots/AudioAligner.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cosanlab/facesync/bd5922de5729e4e76a6eaae84b45d965660f1545/screenshots/AudioAligner.png -------------------------------------------------------------------------------- /screenshots/VideoViewer.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cosanlab/facesync/bd5922de5729e4e76a6eaae84b45d965660f1545/screenshots/VideoViewer.png -------------------------------------------------------------------------------- /screenshots/plotface.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cosanlab/facesync/bd5922de5729e4e76a6eaae84b45d965660f1545/screenshots/plotface.png -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | # from nltools.version import __version__ 2 | from setuptools import setup, find_packages 3 | 4 | __version__ = '0.0.9' 5 | 6 | # try: 7 | # from setuptools.core import setup 8 | # except ImportError: 9 | # from distutils.core import setup 10 | extra_setuptools_args = dict( 11 | tests_require=['pytest'] 12 | ) 13 | 14 | setup( 15 | name='facesync', 16 | version=__version__, 17 | author='Jin Hyun Cheong', 18 | author_email='jcheong.gr@dartmouth.edu', 19 | url='https://github.com/jcheong0428/facesync', 20 | download_url = 'https://github.com/jcheong0428/facesync/tarball/0.9', 21 | install_requires=['numpy', 'scipy'], 22 | packages=find_packages(exclude=['facesync/tests']), 23 | package_data={'facesync': ['resources/*']}, 24 | license='LICENSE.txt', 25 | description='A Python package to sync videos based on audio', 26 | long_description='facesync is a python package that allows users to synchronize multiple videos based on audio.', 27 | keywords = ['psychology', 'preprocessing', 'video','audio','facecam','syncing'], 28 | classifiers = [ 29 | "Programming Language :: Python", 30 | "Operating System :: OS Independent", 31 | "Intended Audience :: Science/Research", 32 | "License :: OSI Approved :: MIT License" 33 | ], 34 | **extra_setuptools_args 35 | ) 36 | 37 | --------------------------------------------------------------------------------