├── .gitignore
├── .travis.yml
├── .zenodo.json
├── Blueprints
└── final_cosan_helmet_light.stl
├── LICENSE
├── README.md
├── __init__.py
├── facesync
├── __init__.py
├── facesync.py
├── tests
│ ├── resources
│ │ ├── cosan_synctune.wav
│ │ ├── sample1.MP4
│ │ ├── sample1.txt
│ │ └── sample1.wav
│ └── test_facesync.py
├── utils.py
└── version.py
├── requirements.txt
├── screenshots
├── AudioAligner.png
├── VideoViewer.png
└── plotface.png
└── setup.py
/.gitignore:
--------------------------------------------------------------------------------
1 | # Python Related #
2 | ###################
3 | *.pyc
4 | *.log
5 | facesync/*.pyc
6 | /facesync/*.pyc
7 | */*.pyc
8 |
9 | # iPython Notebook Caches #
10 | ###########################
11 | .ipynb_checkpoints
12 | Notebooks/
13 | Notebooks/.ipynb_checkpoints
14 |
15 | # OS generated files #
16 | ######################
17 | .DS_Store
18 | .DS_Store?
19 | ._*
20 | .Spotlight-V100
21 | .Trashes
22 | thumbs.db
23 | Thumbs.db
24 |
25 | # Tests & Coverage
26 | ######################
27 | .coverage
28 | htmlcov/
29 |
30 | # Build files
31 | #############
32 | .cache/
33 | build/
34 | dist/
35 | facesync.egg-info/
36 |
--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
1 | os:
2 | - linux
3 |
4 | language: python
5 |
6 | sudo: true
7 |
8 | python:
9 | - "2.7"
10 |
11 | before_install:
12 | - if [[ "$TRAVIS_OS_NAME" == "osx" ]]; then brew update ; fi
13 | - if [[ "$TRAVIS_OS_NAME" == "osx" ]]; then brew install ffmpeg; fi
14 | - if [[ "$TRAVIS_OS_NAME" == "osx" ]]; then brew install libav; fi
15 | - sudo apt-get install libav-tools
16 |
17 | install:
18 | - wget http://repo.continuum.io/miniconda/Miniconda-latest-Linux-x86_64.sh -O miniconda.sh
19 | - bash miniconda.sh -b -p $HOME/miniconda
20 | - export PATH="$HOME/miniconda/bin:$PATH"
21 | - hash -r
22 | - conda config --set always_yes yes --set changeps1 no
23 | - conda update -q conda
24 | - conda info -a
25 | - conda create -q -n testenv python=$TRAVIS_PYTHON_VERSION pip numpy scipy pytest
26 | - source activate testenv
27 | - pip install python-coveralls
28 | - pip install -r requirements.txt
29 | - python setup.py install
30 |
31 | script: coverage run --source facesync -m py.test
32 |
33 | after_success:
34 | - coveralls
35 |
--------------------------------------------------------------------------------
/.zenodo.json:
--------------------------------------------------------------------------------
1 | {
2 | "description": "
Release for F1000
",
3 | "license": "other-open",
4 | "title": "cosanlab/facesync: 0.9",
5 | "version": "0.9",
6 | "upload_type": "software",
7 | "publication_date": "2019-04-12",
8 | "creators": [
9 | {
10 | "affiliation": "Dartmouth College",
11 | "name": "Jin Hyun Cheong"
12 | },
13 | {
14 | "name": "Sawyer Brooks"
15 | },
16 | {
17 | "name": "Luke J. Chang"
18 | }
19 | ],
20 | "access_right": "open",
21 | "related_identifiers": [
22 | {
23 | "scheme": "url",
24 | "identifier": "https://github.com/cosanlab/facesync/tree/0.9",
25 | "relation": "isSupplementTo"
26 | },
27 | {
28 | "scheme": "doi",
29 | "identifier": "10.5281/zenodo.2638334",
30 | "relation": "isVersionOf"
31 | }
32 | ]
33 | }
34 |
--------------------------------------------------------------------------------
/Blueprints/final_cosan_helmet_light.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cosanlab/facesync/bd5922de5729e4e76a6eaae84b45d965660f1545/Blueprints/final_cosan_helmet_light.stl
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | The MIT License (MIT)
2 |
3 | Copyright (c) 2017 Jin Hyun Cheong
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 |
23 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | [](https://zenodo.org/badge/latestdoi/107047285)
2 | [](https://travis-ci.org/jcheong0428/facesync)
3 | [](https://coveralls.io/github/jcheong0428/facesync?branch=master)
4 | [](https://pepy.tech/project/facesync)
5 |
6 | # FaceSync: Open source framework for recording facial expressions with head-mounted cameras
7 |
8 | The FaceSync toolbox provides 3D blueprints for building the head-mounted camera setup described in our [paper](https://psyarxiv.com/p5293/). The toolbox also provides functions to automatically synchronize videos based on audio, manually align audio, plot facial landmark movements, and inspect synchronized videos to graph data.
9 |
10 |
11 | ## Installation
12 |
13 | To install (for osx or linux) open Terminal and type
14 |
15 | `pip install facesync`
16 |
17 | or
18 |
19 | `git clone https://github.com/jcheong0428/facesync.git`
20 | then in the repository folder type
21 | `python setup.py install`
22 |
23 |
24 | ## Dependencies
25 | For full functionality, FACESYNC requires [ffmpeg](https://ffmpeg.org/) and the [libav](https://libav.org/) library.
26 |
27 | Linux
28 | `sudo apt-get install libav-tools`
29 |
30 | OS X
31 | `brew install ffmpeg`
32 | `brew install libav`
33 |
34 | also requires following packages:
35 | - numpy
36 | - scipy
37 | You may also install these via `pip install -r requirements.txt`
38 |
39 | ## Recommended Processing Steps
40 | 1. Extract Audio from Target Video
41 | 2. Find offset with Extracted Audio
42 | 3. Trim Video using Offset.
43 | *If you need to resize your video, do so before trimming.
44 | Otherwise timing can be off.
45 |
46 | ```
47 | from facesync.facesync import facesync
48 | # change file name to include the full
49 | video_files = ['path/to/sample1.MP4']
50 | target_audio = 'path/to/cosan_synctune.wav'
51 | # Intialize facesync class
52 | fs = facesync(video_files=video_files,target_audio=target_audio)
53 | # Extracts audio from sample1.MP4
54 | fs.extract_audio()
55 | # Find offset by correlation
56 | fs.find_offset_corr(search_start=14,search_end=16)
57 | print(fs.offsets)
58 | # Find offset by fast fourier transform
59 | fs.find_offset_fft()
60 | print(fs.offsets)
61 | ```
62 |
63 | # FaceSync provides handy utilities for working with facial expression data.
64 |
65 | ## Manually align the audios with AudioAligner.
66 | ```
67 | %matplotlib notebook
68 | from facesync.utils import AudioAligner
69 | file_original = 'path/to/audio.wav'
70 | file_sample = 'path/to/sample.wav'
71 | AudioAligner(original=file_original, sample=file_sample)
72 | ```
73 |
74 |
75 |
76 | ## Plot facial landmarks and how they change as a result of Action Unit changes.
77 | ```
78 | %matplotlib notebook
79 | from facesync.utils import ChangeAU, plotface
80 | changed_face = ChangeAU(aulist=['AU6','AU12','AU17'], au_weight = 1.0)
81 | ax = plotface(changed_face)
82 | ```
83 |
84 |
85 |
86 | ## Use the VideoViewer widget to play both video and data at the same time (only available on Python).
87 | ```
88 | import facesync.utils as utils
89 | %matplotlib notebook
90 | utils.VideoViewer(path_to_video='path/to/video.mp4', data_df = fexDataFrame)
91 | ```
92 |
93 |
94 | # Citation
95 | Please cite the following paper if you use our head-mounted camera setup or software.
96 | #### Cheong, J. H., Brooks, S., & Chang, L. J. (2017, November 1). FaceSync: Open source framework for recording facial expressions with head-mounted cameras. Retrieved from psyarxiv.com/p5293
97 |
--------------------------------------------------------------------------------
/__init__.py:
--------------------------------------------------------------------------------
1 | __all__ = ["facesync"]
--------------------------------------------------------------------------------
/facesync/__init__.py:
--------------------------------------------------------------------------------
1 | __all__ = [ 'facesync','utils',
2 | '__version__']
3 |
4 | from .version import __version__
5 |
--------------------------------------------------------------------------------
/facesync/facesync.py:
--------------------------------------------------------------------------------
1 | from __future__ import division
2 |
3 | '''
4 | FaceSync Class
5 | ==========================================
6 | Class to sync videos by audio matching.
7 |
8 | '''
9 | __all__ = ['facesync']
10 | __author__ = ["Jin Hyun Cheong"]
11 | __license__ = "MIT"
12 |
13 | import os
14 | import numpy as np
15 | import subprocess
16 | import scipy.io.wavfile as wav
17 |
18 | def _get_vid_resolution(vidFile):
19 | """ Gets video resolution for a given file using ffprobe.
20 | """
21 | cmd = [
22 | 'ffprobe','-v','error','-of','flat=s=_','-select_streams','v:0','-show_entries','stream=height,width', vidFile
23 | ]
24 | proc = subprocess.Popen(cmd,stdout=subprocess.PIPE)
25 | out = proc.communicate()[0]
26 | out = out.split('\n')[:2]
27 | return tuple([int(elem.split('=')[-1]) for elem in out])
28 |
29 | def write_offset_to_file(afile, offset, header='offset'):
30 | '''
31 | Helper function to write offset output to file.
32 | '''
33 | (path2fname, fname) = os.path.split(afile)
34 | fname = os.path.join(path2fname,fname.split(".")[0] + '.txt')
35 | f = open(fname, 'a+')
36 | f.write(header+'\n')
37 | f.write(str(offset)+'\n')
38 | f.close()
39 |
40 | def processInput(rate0,data0,afile,fps,length,search_start,search_end,verbose):
41 | '''
42 | Helper function for multiprocessing
43 | '''
44 | if verbose:
45 | print(afile)
46 | rate1,data1 = wav.read(afile)
47 | assert(rate0==rate1), "Audio sampling rate is not the same for target and sample" # Check if they have same rate
48 | searchtime = search_end-search_start # seconds to search alignment
49 | if np.ndim(data0)>1:
50 | data0 = data0[:,0]
51 | if np.ndim(data1)>1:
52 | data1 = data1[:,0]
53 | to_compare = data0[0:rate0*length]
54 | try:
55 | assert(data1.shape[0] - (searchtime+length)*rate0 >= 0)
56 | except:
57 | print("Original length need to be shorter or reduce searchtime to allow buffer at end.")
58 | rs = []
59 | ts = []
60 | # for i in np.linspace(0,searchtime,fps*searchtime):
61 | inputs = list(np.linspace(search_start,search_end,fps*searchtime))
62 |
63 | ts = inputs
64 | rs.append(rs)
65 | # offset_r = ts[np.argmax(rs)] + search_start
66 | offset_r = ts[np.argmax(rs)]
67 | self.offsets.append(offset_r)
68 | write_offset_to_file(afile, offset_r,header='corr_multi')
69 | return rs,offset_r
70 |
71 | def calc_rs(i, to_compare, sample):
72 | try:
73 | assert(to_compare.shape[0]==sample.shape[0])
74 | r=np.corrcoef(to_compare,sample)[0][1]
75 | except:
76 | print("Shape mismatch at %s" %str(i))
77 | return r, i
78 |
79 | class facesync(object):
80 | """
81 | facesync is a class to represents multiple videos
82 | so that one can align them based on audio.
83 |
84 | Args:
85 | data: list of video files
86 | Y: Pandas DataFrame of training labels
87 | X: Pandas DataFrame Design Matrix for running univariate models
88 | mask: binary nifiti file to mask brain data
89 | output_file: Name to write out to nifti file
90 | **kwargs: Additional keyword arguments to pass to the prediction algorithm
91 |
92 | """
93 | def __init__(self, video_files=None, audio_files=None, target_audio = None, offsets=None,**kwargs):
94 | '''
95 | Args:
96 | video_files: list of video filenames to process
97 | audio_files: list of video filenames to process
98 | target_audio: audio to which videos will be aligned
99 | offsets: list of offsets to trim the video_files
100 | '''
101 | # Initialize attributes
102 | self.video_files = video_files
103 | self.audio_files = audio_files
104 | self.target_audio = target_audio
105 | self.offsets = offsets
106 |
107 | if self.video_files is not None:
108 | assert(isinstance(self.video_files,list)),'Place path to files in a list'
109 | if self.audio_files is not None:
110 | assert(isinstance(self.audio_files,list)),'Place path to files in a list'
111 | if (self.video_files is not None) & (self.offsets is not None):
112 | assert(len(self.video_files)==len(self.offsets)),'Number of videos and number of offsets should match'
113 |
114 | def extract_audio(self,rate=44100,call=True,verbose=True):
115 | '''
116 | This method extracts audio from video files in self.video_files and saves audio files in self.audio_files
117 |
118 | Input
119 | ------------
120 | rate: rate of audio stream frequency to be extracted, default 44100
121 | call: boolean, whether to wait for each process to finish or open multiple threads
122 | verbose: if True, prints the currently processing audio filename
123 | '''
124 | assert(len(self.video_files)!=0),'No video files to process'
125 | self.audio_files = []
126 | for i, vidfile in enumerate(self.video_files):
127 | if verbose:
128 | print(vidfile)
129 | (path2fname, vname) = os.path.split(vidfile)
130 | aname = vname.split(".")[0] + ".wav"
131 | infile = os.path.join(path2fname,vname)
132 | outfile = os.path.join(path2fname,aname)
133 | self.audio_files.append(outfile)
134 | # cmd = ' '.join(["avconv", "-i", infile, "-y", "-vn", "-ac", "1","-ar",str(rate),"-f", "wav", outfile])
135 | command = "ffmpeg -y -i " + infile + " -ab 128k -ac 2 -ar " +str(rate) +" -vn " + outfile
136 | if call:
137 | subprocess.call(command, shell=True)
138 | else:
139 | subprocess.Popen(command, shell=True)
140 |
141 | def find_offset_cross(self,length = 10,search_start=0,verbose=True):
142 | '''
143 | Find offset using Fourier Transform cross correlation.
144 |
145 | Input
146 | ------------
147 | length: seconds to use for the cross correlation matching, default is 10 seconds
148 | verbose: if True, prints the currently processing audio filename
149 |
150 | Output
151 | ------------
152 | allrs : list of cross correlation results using fftconvolve. to retrieve the offset time need to zero index and subtract argmax.
153 | '''
154 | import numpy as np
155 | from scipy.signal import fftconvolve
156 | assert(self.target_audio is not None), 'Target audio not specified'
157 | assert(self.audio_files is not None), 'Audio files not specified'
158 | self.offsets = []
159 | rate0,data0 = wav.read(self.target_audio)
160 | allrs = []
161 | for i, afile in enumerate(self.audio_files):
162 | if verbose:
163 | print(afile)
164 | rate1,data1 = wav.read(afile)
165 | assert(rate0==rate1), "Audio sampling rate is not the same for target and sample" # Check if they have same rate
166 | # Take first audio channel
167 | if np.ndim(data0)>1:
168 | data0 = data0[:,0]
169 | if np.ndim(data1)>1:
170 | data1 = data1[:,0]
171 | x = data0[:rate0*length] # target audio
172 | y = data1[int(search_start*rate0):int(search_start*rate0)+rate0*length] # change sample audio location
173 | # Pad target audio with zeros if not same length.
174 | if len(x) < len(y):
175 | xnew = np.zeros_like(y)
176 | xnew[:len(x)] = x
177 | x = xnew
178 | assert(len(x)==len(y)), "Length of two samples must be the same"
179 | crosscorr = fftconvolve(x,y[::-1],'full')
180 | zero_index = int(len(crosscorr) / 2 ) -1
181 | offset_x = search_start+(zero_index - np.argmax(crosscorr))/float(rate0)
182 | # assert(len(crosscorr)==len(x))
183 | self.offsets.append(offset_x)
184 | write_offset_to_file(afile, offset_x,header='xcorr_len'+str(length))
185 | allrs.append(crosscorr)
186 | return allrs
187 |
188 | def find_offset_corr(self,length=5,search_start=0,search_end=20,fps=44100,verbose=True):
189 | '''
190 | Find offset based on correlation of two audio.
191 |
192 | Input
193 | ------------
194 | self.target_audio : Original audio to which other files will be aligned to
195 | self.audio_files : List of audio files that needs to be trimmed
196 | length : length of original sample to compare
197 | search_start, search_end: start and end times to search for alignment in seconds
198 | fps: level of temporal precision, default 44100
199 | verbose: if True, prints the currently processing audio filename
200 |
201 | Output
202 | ------------
203 | rs: correlation values
204 | '''
205 | assert(self.target_audio is not None), 'Target audio not specified'
206 | assert(self.audio_files is not None), 'Audio files not specified'
207 | self.offsets = []
208 | allrs = []
209 | rate0,data0 = wav.read(self.target_audio)
210 | for i, afile in enumerate(self.audio_files):
211 | if verbose:
212 | print(afile)
213 | rate1,data1 = wav.read(afile)
214 | assert(rate0==rate1), "Audio sampling rate is not the same for target and sample" # Check if they have same rate
215 | searchtime = search_end-search_start # seconds to search alignment
216 | if np.ndim(data0)>1:
217 | data0 = data0[:,0]
218 | if np.ndim(data1)>1:
219 | data1 = data1[:,0]
220 | to_compare = data0[0:rate0*length]
221 | try:
222 | assert(data1.shape[0] - (searchtime+length)*rate0 >= 0)
223 | except:
224 | print("Original length need to be shorter or reduce searchtime to allow buffer at end.")
225 | rs = []
226 | ts = []
227 | # for i in np.linspace(0,searchtime,fps*searchtime):
228 | for i in np.linspace(search_start,search_end,fps*searchtime):
229 | sample = data1[int(rate0*i):int(rate0*(i+length))][0:to_compare.shape[0]]
230 | try:
231 | assert(to_compare.shape[0]==sample.shape[0])
232 | except:
233 | print("Shape mismatch at %s" %str(i))
234 | try:
235 | rs.append(np.corrcoef(to_compare,sample)[0][1])
236 | ts.append(i)
237 | except:
238 | pass
239 | allrs.append(rs)
240 | # offset_r = ts[np.argmax(rs)] + search_start
241 | offset_r = ts[np.argmax(rs)]
242 | self.offsets.append(offset_r)
243 | write_offset_to_file(afile, offset_r,header='corr_fps'+str(fps)+'_len'+str(length)+'_start'+str(search_start)+'_end'+str(search_end))
244 | return allrs
245 |
246 | def find_offset_corr_sparse(self,length=5,search_start=0,search_end=20,fps=44100,sparse_ratio=.5,verbose=True):
247 | '''
248 | Finds offset by correlation with sparse sampling.
249 |
250 | Input
251 | ------------
252 | self.target_audio : Original audio to which other files will be aligned to
253 | self.audio_files : List of audio files that needs to be trimmed
254 | length : length of original sample to compare
255 | search_start, search_end: start and end times to search for alignment in seconds
256 | fps: level of temporal precision, default 44100
257 | sparse_ratio = Determines the sparse sampling of the target audio to match (default is .5)
258 | verbose: if True, prints the currently processing audio filename
259 |
260 | Output
261 | ------------
262 | offset_r : time to trim based on correlation
263 | offset_d : time to trim based on distance
264 | rs: correlation values
265 | ds: difference values
266 | '''
267 | assert(self.target_audio is not None), 'Target audio not specified'
268 | assert(self.audio_files is not None), 'Audio files not specified'
269 | self.offsets = []
270 | allrs = []
271 | rate0,data0 = wav.read(self.target_audio)
272 | for i, afile in enumerate(self.audio_files):
273 | if verbose:
274 | print(afile)
275 | rate1,data1 = wav.read(afile)
276 | assert(rate0==rate1), "Audio sampling rate is not the same for target and sample" # Check if they have same rate
277 | searchtime = search_end-search_start # seconds to search alignment
278 | if np.ndim(data0)>1:
279 | data0 = data0[:,0]
280 | if np.ndim(data1)>1:
281 | data1 = data1[:,0]
282 | # to_compare = data0[0:rate0*length]
283 | sampleix = list(range(0,int(rate0*length)-1))
284 | np.random.shuffle(sampleix)
285 | sampleix = np.sort(sampleix[0:int(rate0*length*sparse_ratio)])
286 | to_compare = data0[sampleix]
287 |
288 | try:
289 | assert(data1.shape[0] - (searchtime+length)*rate0 >= 0)
290 | except:
291 | print("Original length need to be shorter or reduce searchtime to allow buffer at end.")
292 | rs = []
293 | ts = []
294 | # for i in np.linspace(0,searchtime,fps*searchtime):
295 | for i in np.linspace(search_start,search_end,fps*searchtime):
296 | # sample = data1[int(rate0*i):int(rate0*(i+length))][0:to_compare.shape[0]]
297 | sample = data1[int(rate0*i):int(rate0*(i+length))][sampleix]
298 | try:
299 | assert(to_compare.shape[0]==sample.shape[0])
300 | except:
301 | print("Shape mismatch at %s" %str(i))
302 | try:
303 | rs.append(np.corrcoef(to_compare,sample)[0][1])
304 | ts.append(i)
305 | except:
306 | pass
307 | allrs.append(rs)
308 | # offset_r = ts[np.argmax(rs)] + search_start
309 | offset_r = ts[np.argmax(rs)]
310 | self.offsets.append(offset_r)
311 | write_offset_to_file(afile, offset_r, header='corr_sparse_fps'+str(fps)+'_len'+str(length)+'_start'+str(search_start)+'_end'+str(search_end))
312 | return allrs
313 |
314 | def find_offset_corr_multi(self,length=5,search_start=0,search_end=20,fps=44100,verbose=True):
315 | '''
316 | Find offset based on correlation with multiprocessing.
317 | Requires joblib package.
318 |
319 | Input
320 | ------------
321 | self.target_audio : Original audio to which other files will be aligned to
322 | self.audio_files : List of audio files that needs to be trimmed
323 | length : length of original sample to compare
324 | search_start, search_end: start and end times to search for alignment in seconds
325 | fps: level of temporal precision, default 44100
326 | verbose: if True, prints the currently processing audio filename
327 |
328 | Output
329 | ------------
330 | self.offsets: max offsets
331 | rs: correlation values
332 | '''
333 | from joblib import Parallel, delayed
334 | import multiprocessing
335 | num_cores = multiprocessing.cpu_count()-1 # don't use all cores
336 |
337 | assert(self.target_audio is not None), 'Target audio not specified'
338 | assert(self.audio_files is not None), 'Audio files not specified'
339 | self.offsets = []
340 | allrs = []
341 | rate0,data0 = wav.read(self.target_audio)
342 | for i, afile in enumerate(self.audio_files):
343 | if verbose:
344 | print(afile)
345 | rate1,data1 = wav.read(afile)
346 | assert(rate0==rate1), "Audio sampling rate is not the same for target and sample" # Check if they have same rate
347 | searchtime = search_end-search_start # seconds to search alignment
348 | if np.ndim(data0)>1:
349 | data0 = data0[:,0]
350 | if np.ndim(data1)>1:
351 | data1 = data1[:,0]
352 | to_compare = data0[0:rate0*length]
353 | try:
354 | assert(data1.shape[0] - (searchtime+length)*rate0 >= 0)
355 | except:
356 | print("Original length need to be shorter or reduce searchtime to allow buffer at end.")
357 | rs = []
358 | ts = []
359 | out = Parallel(n_jobs=num_cores,backend='threading')(delayed(calc_rs)(i,to_compare,data1[int(rate0*i):int(rate0*(i+length))][0:to_compare.shape[0]]) for i in np.linspace(search_start,search_end,fps*searchtime))
360 | rs,ts= zip(*out)
361 | allrs.append(rs)
362 | offset_r = ts[np.argmax(rs)]
363 | self.offsets.append(offset_r)
364 | write_offset_to_file(afile, offset_r,header='corr_fps'+str(fps)+'_len'+str(length)+'_start'+str(search_start)+'_end'+str(search_end))
365 | return allrs
366 |
367 | def find_offset_dist(self,length=5,search_start=0,search_end=20,fps=44100,verbose=True):
368 | '''
369 | Find offset based on squared distance of audio wave.
370 |
371 | Input
372 | ------------
373 | self.target_audio : Original audio to which other files will be aligned to
374 | self.audio_files : List of audio files that needs to be trimmed
375 | length : length of original sample to compare
376 | search_start, search_end: start and end times to search for alignment in seconds
377 | fps: level of temporal precision, default 44100
378 | verbose: if True, prints the currently processing audio filename
379 |
380 | Output
381 | ------------
382 | offset_d : time to trim based on distance
383 | rs: correlation values
384 | ds: difference values
385 | '''
386 | assert(self.target_audio is not None), 'Target audio not specified'
387 | assert(self.audio_files is not None), 'Audio files not specified'
388 | self.offsets = []
389 | allds = []
390 | rate0,data0 = wav.read(self.target_audio)
391 | for i, afile in enumerate(self.audio_files):
392 | if verbose:
393 | print(afile)
394 | rate1,data1 = wav.read(afile)
395 | assert(rate0==rate1), "Audio sampling rate is not the same for target and sample" # Check if they have same rate
396 | searchtime = search_end-search_start # seconds to search alignment
397 | if np.ndim(data0)>1:
398 | data0 = data0[:,0]
399 | if np.ndim(data1)>1:
400 | data1 = data1[:,0]
401 | to_compare = data0[0:rate0*length]
402 | try:
403 | assert(data1.shape[0] - (searchtime+length)*rate0 >= 0)
404 | except:
405 | print("Original length need to be shorter or reduce searchtime to allow buffer at end.")
406 | ds = []
407 | ts = []
408 | # for i in np.linspace(0,searchtime,fps*searchtime):
409 | for i in np.linspace(search_start,search_end,fps*searchtime):
410 | sample = data1[int(rate0*i):int(rate0*(i+length))][0:to_compare.shape[0]]
411 | try:
412 | assert(to_compare.shape[0]==sample.shape[0])
413 | except:
414 | print("Shape mismatch at %s" %str(i))
415 | try:
416 | ds.append(sum((to_compare-sample)**2))
417 | ts.append(i)
418 | except:
419 | pass
420 | allds.append(ds)
421 | # offset_d = ts[np.argmin(ds)] + search_start
422 | offset_d = ts[np.argmin(ds)]
423 | self.offsets.append(offset_d)
424 | write_offset_to_file(afile, offset_d,header='dist_fps'+str(fps)+'_len'+str(length)+'_start'+str(search_start)+'_end'+str(search_end))
425 | return allds
426 |
427 | def resize_vids(self, resolution = 64, suffix = None,call = True, force=False):
428 | '''
429 | Resize videos.
430 |
431 | Inputs
432 | ------------
433 | resolution: height of the video
434 | suffix: what to name the resized video. If not specified, will append video names with resolution
435 | call: boolean, whether to wait for each process to finish or open multiple threads,
436 | True: call, False: multithread, default is call
437 | force: whether to force creating new files some video files are already at the desired resolution; defaults to False
438 | '''
439 | if suffix == None:
440 | suffix = str(resolution)
441 |
442 | out = []
443 | for vidfile in self.video_files:
444 | (path2fname, vname) = os.path.split(vidfile)
445 | print("Resizing video: %s" % (vname))
446 | current_resolution = _get_vid_resolution(vidfile)
447 | if current_resolution[1] == resolution and not force:
448 | print("Native resolution already ok, skipping: %s" % (vname))
449 | final_vidname = os.path.join(path2fname,vname)
450 | out.append(final_vidname)
451 | continue
452 | else:
453 | final_vidname = os.path.join(path2fname,vname.split('.')[0]+'_'+suffix+'.'+vname.split('.')[-1])
454 | out.append(final_vidname)
455 | command = 'ffmpeg -y -i ' + vidfile + ' -vf scale=-1:'+str(resolution)+' '+final_vidname
456 | if not os.path.exists(final_vidname):
457 | if call:
458 | subprocess.call(command, shell=True)
459 | else:
460 | subprocess.Popen(command, shell=True)
461 | return out
462 |
463 | def concat_vids(self, final_vidname = None, resolution_fix=False, checkres=True):
464 | '''
465 | Concatenate list of videos to one video.
466 |
467 | Inputs
468 | ------------
469 | final_vidname = Filepath/filname of the concatenated video. If not specified will use the first video name appended with _all
470 | '''
471 | assert(len(self.video_files)!=0),'No video files to process'
472 | if (final_vidname != None):
473 | self.final_vidname = final_vidname
474 | if (len(self.video_files)!=0) and (final_vidname == None):
475 | (path2fname, vname) = os.path.split(self.video_files[0])
476 | self.final_vidname = os.path.join(path2fname,vname.split('.')[0]+'_all.'+vname.split('.')[-1])
477 | assert(type(self.final_vidname)==str),'final_vidname must be a string with full path'
478 |
479 | #Check that files are all of the same resolution
480 | if checkres:
481 | resolutions = [_get_vid_resolution(elem) for elem in self.video_files]
482 | if len(set(resolutions)) > 1:
483 | if resolution_fix:
484 | min_resolution = min([elem[1] for elem in resolutions])
485 | print("Videos mismatch in resolution, resizing to: %s..." % (min_resolution))
486 | new_vids= self.resize_vids(resolution=min_resolution)
487 | self.video_files = new_vids
488 | resolutions = [_get_vid_resolution(elem) for elem in self.video_files]
489 | assert(len(set(resolutions))<=1),"Videos still mismatched. Something went wrong with automatic resizing? Try resizing manually."
490 | print("Resizing complete. Continuing.")
491 | else:
492 | raise TypeError("Video files have different resolutions!")
493 |
494 | # Create intermediate video files
495 | tempfiles = str();
496 | for i, vidfile in enumerate(self.video_files):
497 | (path2fname, vname) = os.path.split(vidfile)
498 | print("Joining video: %s" % (vname))
499 | if len(tempfiles)!=0:
500 | tempfiles = tempfiles+"|"
501 | intermediatefile = os.path.join(path2fname,"intermediate"+str(i)+'.ts')
502 | if not os.path.exists(intermediatefile):
503 | command = "ffmpeg -i "+ vidfile +" -c copy -bsf:v h264_mp4toannexb -f mpegts " + intermediatefile
504 | subprocess.call(command, shell=True)
505 | tempfiles = tempfiles + intermediatefile
506 |
507 | # Concatenate videos
508 | command = 'ffmpeg -y -i "concat:' + tempfiles + '" -c copy -bsf:a aac_adtstoasc '+ self.final_vidname
509 | subprocess.call(command, shell=True)
510 | #remove intermediates
511 | for i, vidfile in enumerate(self.video_files):
512 | (path2fname, vname) = os.path.split(vidfile)
513 | intermediatefile = os.path.join(path2fname,"intermediate"+str(i)+'.ts')
514 | command = "rm -f " + intermediatefile
515 | subprocess.call(command, shell=True)
516 |
517 | def trim_vids(self,offsets = None, suffix = None,call=True):
518 | '''
519 | Trims video based on offset
520 |
521 | Inputs
522 | ------------
523 | offsets: list of offsets to trim the self.video_files with
524 | length of offsets should match length of self.video_files
525 | suffix: string to add to end of the trimmed video, default: 'trimmed'
526 | call: boolean, whether to wait for each process to finish or open multiple threads,
527 | True: call, False: multithread, default is call
528 | '''
529 | if suffix == None:
530 | suffix = 'trimmed'
531 | if offsets is not None:
532 | self.offsets= offsets
533 | assert(len(self.video_files)==len(self.offsets)),'Number of videos and number of offsets should match'
534 | for i,vidfile in enumerate(self.video_files):
535 | seconds = str(self.offsets[i])
536 | (path2fname, vname) = os.path.split(vidfile)
537 | print("Trimming video: %s" % (vname))
538 | final_vidname = os.path.join(path2fname,vname.split('.')[0]+'_'+suffix+'.'+vname.split('.')[-1])
539 | # command = 'ffmpeg -y -ss ' + str(seconds) + ' -i ' + vidfile + ' -c copy ' + final_vidname
540 | # command = 'ffmpeg -y -ss ' + seconds.split('.')[0] + ' -i ' + vidfile + ' -ss 00:00:00.' + seconds.split('.')[1] + ' -c copy ' + final_vidname
541 | command = 'ffmpeg -y -i ' + vidfile + ' -ss ' + str(seconds) + ' -crf 23 ' + final_vidname
542 | # command = 'ffmpeg -y -i ' + vidfile + ' -ss ' + str(seconds) + ' -vcodec libx264 -crf 23 -acodec copy ' + final_vidname
543 | if call:
544 | subprocess.call(command, shell=True)
545 | else:
546 | subprocess.Popen(command, shell=True)
547 |
--------------------------------------------------------------------------------
/facesync/tests/resources/cosan_synctune.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cosanlab/facesync/bd5922de5729e4e76a6eaae84b45d965660f1545/facesync/tests/resources/cosan_synctune.wav
--------------------------------------------------------------------------------
/facesync/tests/resources/sample1.MP4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cosanlab/facesync/bd5922de5729e4e76a6eaae84b45d965660f1545/facesync/tests/resources/sample1.MP4
--------------------------------------------------------------------------------
/facesync/tests/resources/sample1.txt:
--------------------------------------------------------------------------------
1 | xcorr_len3
2 | 15.1612471655
3 | corr_fps441_len5_start15_end16
4 | 15.1772727273
5 | corr_sparse_fps441_len1.8_start15_end16
6 | 15.1772727273
7 | dist_fps441_len5_start15_end16
8 | 15.0590909091
9 | xcorr_len3
10 | 15.161224489795918
11 | corr_fps441_len5_start15_end16
12 | 15.177272727272728
13 | corr_sparse_fps441_len1.8_start15_end16
14 | 15.177272727272728
15 | dist_fps441_len5_start15_end16
16 | 15.059090909090909
17 | xcorr_len3
18 | 15.161224489795918
19 | corr_fps441_len5_start15_end16
20 | 15.177272727272728
21 | corr_sparse_fps441_len1.8_start15_end16
22 | 15.177272727272728
23 | dist_fps441_len5_start15_end16
24 | 15.059090909090909
25 |
--------------------------------------------------------------------------------
/facesync/tests/resources/sample1.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cosanlab/facesync/bd5922de5729e4e76a6eaae84b45d965660f1545/facesync/tests/resources/sample1.wav
--------------------------------------------------------------------------------
/facesync/tests/test_facesync.py:
--------------------------------------------------------------------------------
1 | from facesync.facesync import facesync
2 | import os, glob
3 | import numpy as np
4 |
5 | def test_facesyc(tmpdir):
6 | fs = facesync()
7 | assert(fs.audio_files==None)
8 | assert(fs.video_files==None)
9 | assert(fs.offsets==None)
10 | assert(fs.target_audio==None)
11 | cwd = os.getcwd()
12 | video_files = [os.path.join(os.path.dirname(__file__), 'resources','sample1.MP4')]
13 | target_audio = os.path.join(os.path.dirname(__file__), 'resources','cosan_synctune.wav')
14 | fs = facesync(video_files=video_files,target_audio=target_audio)
15 | fs.extract_audio()
16 | print(glob.glob(os.path.join(os.path.dirname(__file__), 'resources','*.MP4')))
17 | print(fs.audio_files)
18 | assert(fs.audio_files == [os.path.join(os.path.dirname(__file__), 'resources','sample1.wav')])
19 |
20 | print('testing fft cross correlation')
21 | fs.find_offset_cross(length=3,search_start=15)
22 | assert(np.round(fs.offsets[0])==np.round(15.1612471655))
23 |
24 | assert(isinstance(fs.offsets,list))
25 |
26 | print('testing correlation method')
27 | fs.find_offset_corr(search_start=15,search_end=16,fps=441)
28 | assert(np.round(fs.offsets[0])==np.round(15.1612603317))
29 |
30 | print('testing sparse correlation method')
31 | fs.find_offset_corr_sparse(length = 1.8,search_start=15,search_end=16,sparse_ratio=.5,fps=441)
32 | assert(np.round(fs.offsets[0])==np.round(15.1612603317))
33 |
34 | print('testing distance method')
35 | fs.find_offset_dist(search_start=15,search_end=16,fps=441)
36 |
37 | print('testing trimming method')
38 | # fs.trim_vids(call = False)
39 | print('testing resizing method with Popen')
40 | # fs.resize_vids(resolution = 32,suffix = 'test',call = False)
41 |
42 | # add tests for video concat
--------------------------------------------------------------------------------
/facesync/utils.py:
--------------------------------------------------------------------------------
1 | from __future__ import division
2 |
3 | '''
4 | FaceSync Utils Class
5 | ==========================================
6 | VideoViewer: Watch video and plot data simultaneously.
7 | AudioAligner: Align two audios manually
8 | neutralface: points that show a face
9 | ChangeAU: change AUs and return new face
10 | '''
11 | __all__ = ['VideoViewer','AudioAligner','neutralface','audict','plotface','ChangeAU','read_facet']
12 | __author__ = ["Jin Hyun Cheong"]
13 | __license__ = "MIT"
14 |
15 | import os
16 | import numpy as np
17 | import matplotlib.pyplot as plt
18 |
19 |
20 |
21 | def read_facet(facetfile,fullfacet=False,demean = False,demedian=False,zscore=False,fillna=False,sampling_hz=None, target_hz=None):
22 | '''
23 | This function reads in an iMotions-FACET exported facial expression file. Uses downsample function from nltools.
24 | Arguments:
25 | fullfacet(def: False): If True, Action Units also provided in addition to default emotion predictions.
26 | demean(def: False): Demean data
27 | demedian(def: False): Demedian data
28 | zscore(def: False): Zscore data
29 | fillna(def: False): fill null values with ffill
30 | sampling_hz & target_hz: To downsample, specify the sampling hz and target hz.
31 | Returns:
32 | d: dataframe of processed facial expressions
33 |
34 | '''
35 | import pandas as pd
36 |
37 | def downsample(data,sampling_freq=None, target=None, target_type='samples', method='mean'):
38 | ''' Downsample pandas to a new target frequency or number of samples
39 | using averaging.
40 | Args:
41 | data: Pandas DataFrame or Series
42 | sampling_freq: Sampling frequency of data
43 | target: downsampling target
44 | target_type: type of target can be [samples,seconds,hz]
45 | method: (str) type of downsample method ['mean','median'],
46 | default: mean
47 | Returns:
48 | downsampled pandas object
49 | '''
50 |
51 | if not isinstance(data,(pd.DataFrame,pd.Series)):
52 | raise ValueError('Data must by a pandas DataFrame or Series instance.')
53 | if not (method=='median') | (method=='mean'):
54 | raise ValueError("Metric must be either 'mean' or 'median' ")
55 |
56 | if target_type is 'samples':
57 | n_samples = target
58 | elif target_type is 'seconds':
59 | n_samples = target*sampling_freq
60 | elif target_type is 'hz':
61 | n_samples = sampling_freq/target
62 | else:
63 | raise ValueError('Make sure target_type is "samples", "seconds", '
64 | ' or "hz".')
65 |
66 | idx = np.sort(np.repeat(np.arange(1,data.shape[0]/n_samples,1),n_samples))
67 | # if data.shape[0] % n_samples:
68 | if data.shape[0] > len(idx):
69 | idx = np.concatenate([idx, np.repeat(idx[-1]+1,data.shape[0]-len(idx))])
70 | if method=='mean':
71 | return data.groupby(idx).mean().reset_index(drop=True)
72 | elif method=='median':
73 | return data.groupby(idx).median().reset_index(drop=True)
74 |
75 | d = pd.read_table(facetfile, skiprows=4, sep='\t',
76 | usecols = ['FrameTime','Joy Evidence','Anger Evidence','Surprise Evidence','Fear Evidence','Contempt Evidence',
77 | 'Disgust Evidence','Sadness Evidence','Confusion Evidence','Frustration Evidence',
78 | 'Neutral Evidence','Positive Evidence','Negative Evidence','AU1 Evidence','AU2 Evidence',
79 | 'AU4 Evidence','AU5 Evidence','AU6 Evidence','AU7 Evidence','AU9 Evidence','AU10 Evidence',
80 | 'AU12 Evidence','AU14 Evidence','AU15 Evidence','AU17 Evidence','AU18 Evidence','AU20 Evidence',
81 | 'AU23 Evidence','AU24 Evidence','AU25 Evidence','AU26 Evidence','AU28 Evidence','AU43 Evidence','NoOfFaces',
82 | 'Yaw Degrees', 'Pitch Degrees', 'Roll Degrees'])
83 | # Choose index either FrameTime or FrameNo
84 | d = d.set_index(d['FrameTime'].values/1000.0)
85 | if type(fullfacet) == bool:
86 | if fullfacet==True:
87 | facets = ['Joy Evidence','Anger Evidence','Surprise Evidence','Fear Evidence','Contempt Evidence',
88 | 'Disgust Evidence','Sadness Evidence','Confusion Evidence','Frustration Evidence',
89 | 'Neutral Evidence','Positive Evidence','Negative Evidence','AU1 Evidence','AU2 Evidence',
90 | 'AU4 Evidence','AU5 Evidence','AU6 Evidence','AU7 Evidence','AU9 Evidence','AU10 Evidence',
91 | 'AU12 Evidence','AU14 Evidence','AU15 Evidence','AU17 Evidence','AU18 Evidence','AU20 Evidence',
92 | 'AU23 Evidence','AU24 Evidence','AU25 Evidence','AU26 Evidence','AU28 Evidence','AU43 Evidence','NoOfFaces',
93 | 'Yaw Degrees', 'Pitch Degrees', 'Roll Degrees']
94 | elif fullfacet == False:
95 | if type(fullfacet) == bool:
96 | facets = ['Joy Evidence','Anger Evidence','Surprise Evidence','Fear Evidence','Contempt Evidence',
97 | 'Disgust Evidence','Sadness Evidence','Confusion Evidence','Frustration Evidence',
98 | 'Neutral Evidence','Positive Evidence','Negative Evidence','NoOfFaces']
99 | else:
100 | facets = fullfacet
101 | d = d[facets] # change datatype to float16 for less memory use
102 | if zscore:
103 | d = (d.ix[:,:] - d.ix[:,:].mean()) / d.ix[:,:].std(ddof=0)
104 | if fillna:
105 | d = d.fillna(method='ffill')
106 | if demedian:
107 | d = d-d.median()
108 | if demean:
109 | d = d-d.mean()
110 | if sampling_hz and target_hz:
111 | d = downsample(d,sampling_freq=sampling_hz,target=target_hz,target_type='hz')
112 | return d
113 |
114 |
115 | def rec_to_time(vals,fps):
116 | times = np.array(vals)/60./fps
117 | times = [str(int(np.floor(t))).zfill(2)+':'+str(int((t-np.floor(t))*60)).zfill(2) for t in times]
118 | return times
119 |
120 | def VideoViewer(path_to_video, data_df,xlabel='', ylabel='',title='',figsize=(6.5,3),legend=False,xlim=None,ylim=None,plot_rows=False):
121 | """
122 | This function plays a video and plots the data underneath the video and moves a cursor as the video plays.
123 | Plays videos using Jupyter_Video_Widget by https://github.com/Who8MyLunch/Jupyter_Video_Widget
124 | Currently working on: Python 3
125 | For plot update to work properly plotting needs to be set to: %matplotlib notebook
126 |
127 | Args:
128 | path_to_video : file path or url to a video. tested with mov and mp4 formats.
129 | data_df : pandas dataframe with columns to be plotted in 30hz. (plotting too many column can slowdown update)
130 | ylabel(str): add ylabel
131 | legend(bool): toggle whether to plot legend
132 | xlim(list): pass xlimits [min,max]
133 | ylim(list): pass ylimits [min,max]
134 | plot_rows(bool): Draws individual plots for each column of data_df. (Default: True)
135 | """
136 | from jpy_video import Video
137 | from IPython.display import display, HTML
138 | display(HTML(data="""
139 |
144 | """))
145 |
146 | f = os.path.abspath(path_to_video)
147 | wid = Video(f)
148 | wid.layout.width='640px'
149 | wid.display()
150 | lnwidth = 3
151 |
152 | fps = wid.timebase**-1 # time base is play rate hard coded at 30fps
153 | print(fps)
154 | if plot_rows:
155 | fig,axs = plt.subplots(data_df.shape[1],1,figsize=figsize) # hardcode figure size for now..
156 | else:
157 | fig,axs = plt.subplots(1,1,figsize=figsize)
158 | t=wid.current_time
159 | if plot_rows and data_df.shape[1]>1:
160 | for ixs, ax in enumerate(axs):
161 | ax.axvline(fps*t,color='k',linestyle='--',linewidth=lnwidth) # cursor is always first of ax
162 | # plot each column
163 | data_df.iloc[:,ixs].plot(ax=ax,legend=legend,xlim=xlim,ylim=ylim)
164 | ax.set_xticks = np.arange(0,data_df.shape[0],5)
165 | ax.set(ylabel =data_df.columns[ixs], xlabel=xlabel, xticklabels = rec_to_time(ax.get_xticks(),fps))
166 | else:
167 | axs.axvline(fps*t,color='k',linestyle='--',linewidth=lnwidth) # cursor is always first of ax
168 | # plot each column
169 | data_df.plot(ax=axs,legend=legend,xlim=xlim,ylim=ylim)
170 | axs.set_xticks = np.arange(0,data_df.shape[0],5)
171 | axs.set(ylabel = data_df.columns[0],xlabel=xlabel, title=title, xticklabels = rec_to_time(axs.get_xticks(),fps))
172 | if legend:
173 | plt.legend(loc=1)
174 | plt.tight_layout()
175 |
176 | def plot_dat(axs,t,fps=fps):
177 | if plot_rows and data_df.shape[1]>1:
178 | for ax in axs:
179 | if ax.lines:
180 | ax.lines[0].set_xdata([np.round(fps*t),np.round(fps*t)])
181 | else:
182 | if axs.lines:
183 | axs.lines[0].set_xdata([np.round(fps*t),np.round(fps*t)])
184 | fig.canvas.draw()
185 |
186 | def on_value_change(change,ax=axs,fps=fps):
187 | if change['name']=='_event':
188 | plot_dat(axs=axs, t=change['new']['currentTime'],fps=fps)
189 |
190 | # call on_value_change that will call plotting function plot_dat whenever there is cursor update
191 | wid.observe(on_value_change)
192 |
193 |
194 | def AudioAligner(original, sample, search_start=0.0,search_end=15.0, xmax = 60,manual=False,reduce_orig_volume=1):
195 | """
196 | This function pull up an interactive console to find the offsets between two audios.
197 |
198 | Args:
199 | original: path to original audio file (e.g. '../audios/original.wav')
200 | sample: path to the sample audio file (e.g. '../audios/sample.wav')
201 | search_start(float): start range for slider to search for offset
202 | search_end(float): end range for slider to search for offset
203 | xmax(int): Range of audio to plot from beginning
204 | manual(bool): set to True to turn off auto-refresh
205 | reduce_orig_volume(int or float): Original wav sounds are often larger so divide the volume by this number.
206 | """
207 | import scipy.io.wavfile as wav
208 | from IPython.display import Audio
209 | from IPython.display import display
210 | from ipywidgets import widgets
211 |
212 | orig_r,orig = wav.read(original)
213 | # volume is often louder on original so you can reduce it
214 | orig = orig/reduce_orig_volume
215 | # take one channel of target audio. probably not optimal
216 | if np.ndim(orig) >1:
217 | orig = orig[:,0]
218 | # grab one channel of sample audio
219 | tomatch_r,tomatch = wav.read(sample)
220 | if np.ndim(tomatch) >1:
221 | tomatch = tomatch[:,0]
222 |
223 | fs = 44100
224 |
225 | def audwidg(offset,play_start):
226 | allshift = play_start
227 | samplesize = 30
228 | tomatchcopy = tomatch[int((allshift+offset)*tomatch_r):int((allshift+offset)*tomatch_r)+fs*samplesize]
229 | shape = tomatchcopy.shape[0]
230 | origcopy = orig[int((allshift)*tomatch_r):int((allshift)*tomatch_r)+fs*samplesize]
231 | # when target audio is shorter, pad difference with zeros
232 | if origcopy.shape[0] < tomatchcopy.shape[0]:
233 | diff = tomatchcopy.shape[0] - origcopy.shape[0]
234 | origcopy = np.pad(origcopy, pad_width = (0,diff),mode='constant')
235 | toplay = origcopy + tomatchcopy
236 | display(Audio(data=toplay,rate=fs))
237 |
238 | def Plot_Audios(offset,x_min,x_max):
239 | # print('Precise offset : ' + str(offset))
240 | fig,ax = plt.subplots(figsize=(20,3))
241 | ax.plot(orig[int(fs*x_min):int(fs*x_max)],linewidth=.5,alpha=.8,color='r')
242 | ax.plot(tomatch[int(fs*x_min)+int(fs*offset) : int(fs*x_max)+int(fs*offset)],linewidth=.5,alpha=.8)
243 | ax.set_xticks([(tick-x_min)*fs for tick in range(int(x_min),int(x_max+1))])
244 | ax.set_xticklabels([tick for tick in range(int(x_min),int(x_max)+1)])
245 | ax.set_xlim([(x_min-x_min)*fs, (x_max-x_min)*fs] )
246 | ax.set_ylabel('Audio')
247 | ax.set_xlabel('Target Audio Time')
248 | audwidg(offset,x_min)
249 | plt.show()
250 |
251 | widgets.interact(Plot_Audios,
252 | offset=widgets.FloatSlider(value = 0.5*(search_start+search_end), readout_format='.3f', min = float(search_start), max = float(search_end), step = 0.001,
253 | description='Adjusted offset: ',layout=widgets.Layout(width='90%')),
254 | x_min=widgets.FloatSlider(description='Min X on audio plot', value=0.0,min=0.0,max=xmax,step=0.1, layout=widgets.Layout(width='50%')),
255 | x_max=widgets.FloatSlider(description='Max X on audio plot', value=xmax,min=0.0,max=xmax,step=0.1, layout=widgets.Layout(width='50%')),
256 | __manual=manual
257 | )
258 |
259 | neutralface = {-34: (212, 335),
260 | -33: (222, 342), -32: (237, 342), -30: (203, 335), -29: (222, 335),
261 | -28: (237, 328), -26: (227, 288), -25: (238, 292), -19: (201, 219),
262 | -18: (184, 220), -17: (169, 214), -16: (184, 204), -15: (201, 203),
263 | -14: (217, 215), -13: (225, 181), -12: (203, 172), -11: (180, 170),
264 | -10: (157, 174), -9: (142, 180), -8: (122, 222), -7: (126, 255),
265 | -6: (133, 286), -5: (139, 318), -4: (148, 349), -3: (165, 375),
266 | -2: (190, 397), -1: (219, 414),
267 | 0: (252, 419),
268 | 1: (285, 414), 2: (315, 398), 3: (341, 377), 4: (359, 351),
269 | 5: (368, 319), 6: (371, 287), 7: (376, 254), 8: (378, 221),
270 | 9: (354, 180), 10: (339, 173), 11: (316, 167), 12: (293, 171),
271 | 13: (270, 180), 14: (281, 215), 15: (296, 203), 16: (314, 202),
272 | 17: (328, 212), 18: (315, 219), 19: (297, 219), 20: (248, 207),
273 | 21: (248, 227), 22: (248, 247), 23: (248, 268), 24: (248, 294),
274 | 25: (260, 291), 26: (271, 287), 27: (248, 333), 28: (262, 328),
275 | 29: (279, 335), 30: (296, 335), 31: (250, 340), 32: (264, 342),
276 | 33: (280, 342), 34: (288, 335)}
277 |
278 | audict = {'AU1' : {-11:(2,0),11:(-2,0),-12:(5,-8),12:(-5,-8),-13:(0,-20),13:(0,-20) },
279 | # Brow Lowerer
280 | 'AU4': {-10:(4,5),10:(-4,5),-11:(4,15),11:(-4,15),-12:(5,20),12:(-5,20),-13:(0,15),13:(0,15) },
281 | # Upper Lid Raiser
282 | 'AU5': {-9:(2,-9),9:(2,-9), -10:(2,-10),10:(-2,-10),-11:(2,-15),11:(-2,-15),
283 | -12:(5,-12),12:(-5,-12),-13:(0,-10),13:(0,-10),
284 | -16:(0,-10),-15:(0,-10),16:(0,-10),15:(0,-10),
285 | -19:(0,10),-18:(0,10),19:(0,10),18:(0,10)},
286 | # cheek raiser
287 | 'AU6': {-8:(20,0),8:(-20,0), -7:(10,-5),7:(-10,-5), -6:(2,-8), 6:(-2,-8),
288 | -9:(5,5),9:(-5,5),
289 | 17:(-5,5),18:(-3,-3),19:(-3,-3),
290 | -17:(5,5),-18:(3,-3),-19:(3,-3)},
291 | # nose wrinkler
292 | 'AU9': {-15:(2,4),15:(-2,4),-14:(2,3),14:(-2,3),
293 | 20:(0,5), 21:(0,-5), 22:(0,-7), 23:(0,-10),
294 | -26:(5,-15),-25:(0,-15),24:(0,-15),25:(0,-15),26:(-5,-15),
295 | -10:(2,0),10:(-2,0),-11:(2,8),11:(-2,8),
296 | -12:(5,12),12:(-5,12),-13:(0,10),13:(0,10)
297 | },
298 | # Upper Lip Raiser
299 | 'AU10': {-34:(0,5),-33:(0,-2),-30:(0,3),-29:(0,-10),-28:(0,-5),
300 | -26:(-5,-8),-25:(0,-3),24:(0,-3),25:(0,-3),26:(5,-8),
301 | 27:(0,-10),28:(0,-5),29:(0,-10),30:(0,3),33:(0,-2),34:(0,5)},
302 | # Lip corner Puller
303 | 'AU12': { -30: (-10,-15), -34: (-5,-5), 30:(10,-15), 34:(5,-5), -29:(0,0), 29:(0,0) },
304 | #AU14 Dimpler
305 | 'AU14': {-33:(0,-5),-32:(0,-5),-30:(-5,-5),-28:(0,5),28:(0,5),30:(5,-5),31:(0,-5),32:(0,-5),33:(0,-5)},
306 | # Chin raiser
307 | 'AU17': { -2:(5,0),-1:(5,-5),0:(0,-20),-1:(-5,-5),2:(-5,0)},
308 | # Lip Puckerer
309 | 'AU18': {-30:(5,0), 30:(-5,0), -34:(5,0), 34:(-5,0),
310 | -33:(5,0),33:(-5,0), -29:(5,0),29:(-5,0),30:(-5,0),
311 | -28:(0,0),28:(0,0),27:(0,-8),31:(0,10),-32:(0,7),32:(0,7)} ,
312 | # Lips Part
313 | 'AU25': {-28:(0,-3),28:(0,-3),27:(0,-5),31:(0,7),-32:(0,7),32:(0,7)},
314 | # Lip Suck
315 | 'AU28': {-33:(0,-5),-32:(0,-5),-28:(0,5),24:(0,-3),28:(0,-5),31:(0,-5),32:(0,-5),33:(0,-5)}
316 | }
317 |
318 | def plotface(face, scatter=True,line=False,annot=False,ax=None):
319 | """
320 | This function will take a dictionary of dots by (x,y) coordinates like the neutralface.
321 |
322 | """
323 | lineface = range(-8,9)
324 | linenose = list(range(20,24))
325 | linenose.extend([26,25,24,-25,-26,23])
326 | linelbrow = range(-13,-8)
327 | linerbrow = range(9,14)
328 | lineleye = list(range(-19,-13))
329 | lineleye.append(-19)
330 | linereye = list(range(14,20))
331 | linereye.append(14)
332 | linemouth = list(range(27,31))
333 | linemouth.extend([34,33,32,31,-32,-33,-34,-30,-29,-28,27])
334 | lines = [lineface,linenose,linelbrow,linerbrow,lineleye,linereye,linemouth]
335 | if not ax:
336 | f, ax = plt.subplots(1,1,figsize=(7,7))
337 | for key in face.keys():
338 | (x,y) = face[key]
339 | if scatter:
340 | ax.scatter(x,y,s=8,c='k')
341 | if annot:
342 |
343 | ax.annotate(key,(np.sign(key)*20+x,y))
344 | if line:
345 | for l in lines:
346 | ax.plot([face[key][0] for key in l],[face[key][1] for key in l],color='k' )
347 | ax.set_xlim([0,500])
348 | ax.set_ylim([0,500])
349 | ax.invert_yaxis()
350 | return ax
351 |
352 | def ChangeAU(aulist, au_weight = 1.0, audict = audict, face = neutralface):
353 | '''
354 | This function will return a new face with the acti on units of aulist moved based on au_weight.
355 |
356 | Args:
357 | aulist: list of AUs that are activated currently supported include
358 | ['AU1','AU4','AU5','AU6','AU9', 'AU10', 'AU12','AU14','AU17','AU18','AU25','AU28']
359 | au_weights = float between 0 and 1.0 to activate all action unit or a
360 | dictionary to modular change of action units.
361 | audict = Dictionary of AU movements
362 | face = neutral face dictionary.
363 | '''
364 | au_weights = {}
365 | # if dict, apply weight to each au
366 | if type(au_weight)==dict:
367 | au_weights = au_weight
368 | # if a float apply to all
369 | elif type(au_weight)==float:
370 | for au in audict.keys():
371 | au_weights[au] = au_weight
372 | newface = face.copy()
373 | for au in aulist:
374 | for landmark in audict[au].keys():
375 | newface[landmark] = (face[landmark][0] + au_weights[au] * audict[au][landmark][0],
376 | face[landmark][1] + au_weights[au] * audict[au][landmark][1])
377 | return newface
378 |
--------------------------------------------------------------------------------
/facesync/version.py:
--------------------------------------------------------------------------------
1 | """Specifies current version of facesync to be used by setup.py and __init__.py
2 | """
3 |
4 | __version__ = '0.0.9'
--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | numpy>=1.9
2 | scipy
--------------------------------------------------------------------------------
/screenshots/AudioAligner.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cosanlab/facesync/bd5922de5729e4e76a6eaae84b45d965660f1545/screenshots/AudioAligner.png
--------------------------------------------------------------------------------
/screenshots/VideoViewer.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cosanlab/facesync/bd5922de5729e4e76a6eaae84b45d965660f1545/screenshots/VideoViewer.png
--------------------------------------------------------------------------------
/screenshots/plotface.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cosanlab/facesync/bd5922de5729e4e76a6eaae84b45d965660f1545/screenshots/plotface.png
--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
1 | # from nltools.version import __version__
2 | from setuptools import setup, find_packages
3 |
4 | __version__ = '0.0.9'
5 |
6 | # try:
7 | # from setuptools.core import setup
8 | # except ImportError:
9 | # from distutils.core import setup
10 | extra_setuptools_args = dict(
11 | tests_require=['pytest']
12 | )
13 |
14 | setup(
15 | name='facesync',
16 | version=__version__,
17 | author='Jin Hyun Cheong',
18 | author_email='jcheong.gr@dartmouth.edu',
19 | url='https://github.com/jcheong0428/facesync',
20 | download_url = 'https://github.com/jcheong0428/facesync/tarball/0.9',
21 | install_requires=['numpy', 'scipy'],
22 | packages=find_packages(exclude=['facesync/tests']),
23 | package_data={'facesync': ['resources/*']},
24 | license='LICENSE.txt',
25 | description='A Python package to sync videos based on audio',
26 | long_description='facesync is a python package that allows users to synchronize multiple videos based on audio.',
27 | keywords = ['psychology', 'preprocessing', 'video','audio','facecam','syncing'],
28 | classifiers = [
29 | "Programming Language :: Python",
30 | "Operating System :: OS Independent",
31 | "Intended Audience :: Science/Research",
32 | "License :: OSI Approved :: MIT License"
33 | ],
34 | **extra_setuptools_args
35 | )
36 |
37 |
--------------------------------------------------------------------------------