├── examples ├── video_00.gif ├── video_00.jpg └── video_sum.mp4 ├── docker ├── build.sh ├── base.Dockerfile └── README.md ├── Makefile.config ├── include └── hecate │ ├── knapsack.hpp │ ├── time.hpp │ ├── sort.hpp │ ├── shot_range.hpp │ ├── gapstat.hpp │ ├── ffmpeg_helper.hpp │ ├── file_helper.hpp │ ├── video_parser.hpp │ ├── cc_parser.hpp │ ├── hist_opencv.hpp │ ├── gflseg.hpp │ ├── hecate.hpp │ └── image_metrics.hpp ├── src ├── tools │ └── hecate.cpp └── hecate │ ├── hecate_main.cpp │ ├── hecate_thumbnail.cpp │ ├── gflseg.cpp │ ├── hecate_highlight.cpp │ └── video_parser.cpp ├── Makefile ├── README.md └── LICENSE /examples/video_00.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yahoo/hecate/HEAD/examples/video_00.gif -------------------------------------------------------------------------------- /examples/video_00.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yahoo/hecate/HEAD/examples/video_00.jpg -------------------------------------------------------------------------------- /examples/video_sum.mp4: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yahoo/hecate/HEAD/examples/video_sum.mp4 -------------------------------------------------------------------------------- /docker/build.sh: -------------------------------------------------------------------------------- 1 | # Builds images 2 | docker build -f base.Dockerfile -t hecate-core . 3 | docker run --name hecate -it hecate-core -------------------------------------------------------------------------------- /Makefile.config: -------------------------------------------------------------------------------- 1 | # Directories containing ffmpeg and opencv 2 | INCLUDE_DIRS := /usr/local/include 3 | LIBRARY_DIRS := /usr/local/lib 4 | 5 | # Comment if you're using OpenCV 2.4 6 | OPENCV_VERSION := 3 7 | 8 | BUILD_DIR := build 9 | DISTRIBUTE_DIR := distribute 10 | 11 | # Uncomment for debugging. 12 | # DEBUG := 1 13 | 14 | # enable pretty build (comment to see full commands) 15 | Q ?= @ 16 | -------------------------------------------------------------------------------- /include/hecate/knapsack.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | * 0/1 Knapsack solver 3 | * 4 | * Copyright 2016 Yahoo Inc. 5 | * Licensed under the terms of the Apache 2.0 License. 6 | * See LICENSE file in the project root for terms. 7 | * 8 | * Developer: Yale Song (yalesong@yahoo-inc.com) 9 | */ 10 | 11 | #ifndef HECATE_KNAPSACK_HPP 12 | #define HECATE_KNAPSACK_HPP 13 | 14 | #include 15 | 16 | namespace hecate { 17 | 18 | template 19 | inline T Tmax(T a, T b) {return (a>b) ? a : b; }; 20 | 21 | template 22 | inline void solve_01knapsack( const vector& value, const vector& weight, 23 | const int budget, vector& solution) 24 | { 25 | int n = (int)value.size(); 26 | solution.resize(n, false); 27 | 28 | // recursion 29 | vector > V( n+1, vector(budget+1,0) ); 30 | for( int i=1; i<=n; i++ ) 31 | for( int w=1; w<=budget; w++ ) 32 | V[i][w] = ( weight[i-1]>w ) 33 | ? V[i-1][w] : Tmax( V[i-1][w], V[i-1][w-weight[i-1]] + value[i-1] ); 34 | 35 | // backtrack 36 | int w = budget; 37 | for( int i=n; i>0; i-- ) { 38 | if( V[i][w]!=V[i-1][w] && V[i][w]==V[i-1][w-weight[i-1]]+value[i-1] ) { 39 | solution[i-1] = true; 40 | w -= weight[i-1]; 41 | } 42 | } 43 | } 44 | 45 | } 46 | #endif 47 | 48 | 49 | -------------------------------------------------------------------------------- /include/hecate/time.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Time util 3 | * 4 | * Copyright 2016 Yahoo Inc. 5 | * Licensed under the terms of the Apache 2.0 License. 6 | * See LICENSE file in the project root for terms. 7 | * 8 | * Developer: Yale Song (yalesong@yahoo-inc.com) 9 | */ 10 | 11 | #ifndef HECATE_TIME_HPP 12 | #define HECATE_TIME_HPP 13 | 14 | #include 15 | 16 | #include // display date and time 17 | #include // floor 18 | #include 19 | #include 20 | 21 | namespace hecate { 22 | 23 | typedef std::chrono::high_resolution_clock Clock; 24 | typedef std::chrono::milliseconds milliseconds; 25 | 26 | inline double elapsed_time_ms( Clock::time_point from) 27 | { 28 | Clock::time_point now = Clock::now(); 29 | milliseconds ms = std::chrono::duration_cast( now - from ); 30 | return (double)ms.count(); 31 | } 32 | 33 | inline double print_elapsed_time( Clock::time_point from, const char* prefix) 34 | { 35 | int msec = (int) elapsed_time_ms(from); 36 | int sec = msec/1000; 37 | printf("%s: Elapsed time %02d:%02d:%04d\n", prefix, sec/60, sec%60, msec%1000); 38 | 39 | return msec / 1000.0; 40 | } 41 | 42 | inline std::string second2string(double sec, const std::string& format) { 43 | char buf[128]; 44 | int hh, mm, ss, mss; 45 | hh = (int) floor( sec/3600 ); 46 | mm = (int) floor( sec/60 ) % 60; 47 | ss = (int) sec % 60; 48 | mss = (int) 10000*(sec - floor(sec)); 49 | if( format == "hh:mm:ss.mss" ) { 50 | sprintf( buf, "%02d:%02d:%02d.%04d", hh,mm,ss,mss ); 51 | } 52 | else if( format == "mm:ss.mss" ) { 53 | sprintf( buf, "%02d:%02d.%04d", 60*hh+mm,ss,mss ); 54 | } 55 | std::string ret = buf; 56 | return ret; 57 | } 58 | } 59 | #endif 60 | 61 | -------------------------------------------------------------------------------- /src/tools/hecate.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * HECATE Yahoo Video Processing Library - Binary 3 | * 4 | * Copyright 2016 Yahoo Inc. 5 | * Licensed under the terms of the Apache 2.0 License. 6 | * See LICENSE file in the project root for terms. 7 | * 8 | * Developer: Yale Song (yalesong@yahoo-inc.com) 9 | */ 10 | 11 | #include "hecate/hecate.hpp" 12 | 13 | using namespace std; 14 | using namespace cv; 15 | 16 | int main( int argc, char** argv ) 17 | { 18 | hecate_copyright(); 19 | if( argc<3 ) 20 | hecate_usage(); 21 | 22 | // Read input params 23 | hecate_params opt; 24 | hecate_parse_params( argc, argv, opt ); 25 | 26 | // Run VIDSUM 27 | vector v_thumb_idx; 28 | vector v_gif_range; 29 | vector v_mov_range; 30 | run_hecate( opt, v_thumb_idx, v_gif_range, v_mov_range ); 31 | 32 | // Print debugging info 33 | if( opt.debug ) { 34 | if( opt.jpg ) { 35 | printf("hecate: thumbnail indices: [ "); 36 | for(size_t i=0; i>~/.bashrc 33 | 34 | # install hecate 35 | RUN git clone https://github.com/yahoo/hecate.git && \ 36 | cd hecate && \ 37 | make all && \ 38 | make distribute 39 | 40 | RUN echo "export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/local/lib">>~/.bashrc 41 | 42 | # references 43 | # # ref: https://cerebrumedge.com/blog/entry/compiling-opencv-with-cuda-and-ffmpeg-on-ubuntu-16.04#:~:text=FFMpeg%20and%20OpenCV,OPENCV_SOURCE_CODE%2F3rdparty%2Fffmpeg%2F. 44 | # https://stackoverflow.com/questions/46884682/error-in-building-opencv-with-ffmpeg 45 | # https://stackoverflow.com/questions/12335848/opencv-program-compile-error-libopencv-core-so-2-4-cannot-open-shared-object-f -------------------------------------------------------------------------------- /docker/README.md: -------------------------------------------------------------------------------- 1 | # HECATE - DOCKER INSTALLATION 2 | 3 | 4 | ## Installation 5 | 6 | First you need to install docker on your local computer, see following [tutorial](https://docs.docker.com/install/linux/docker-ce/ubuntu/#set-up-the-repository). Note, for running the docker properly you have be logged as superuser otherwise you will face many partial issues which sometimes does not make much sense. 7 | 8 | Check your installation using 9 | ``` 10 | docker -v 11 | ``` 12 | 13 | Once you install the docker correctly, follow the instruction below: 14 | ``` 15 | $ git clone https://github.com/yahoo/hecate.git 16 | $ cd hecate/docker 17 | $ ./build.sh 18 | ``` 19 | 20 | Now the hecate CLI should open up for you or else you may need to: 21 | ``` 22 | docker exec -it hecate bash 23 | ``` 24 | 25 | Once you've successfully compiled hecate, it will generate a binary executable under `distribute/bin/`. Run the following command to check if everything works properly: 26 | ``` 27 | $ cd hecate 28 | $ ./distribute/bin/hecate 29 | 30 | -=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= 31 | HECATE Copyright 2016 Yahoo Inc. 32 | Licensed under the terms of the Apache 2.0 License. 33 | Developed by : Yale Song (yalesong@yahoo-inc.com) 34 | Built on : 11:46:03 Aug 11 2016 35 | -=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= 36 | USAGE: hecate -i infile [options] 37 | 38 | -i --in_video (string) Input video file 39 | -o --out_dir (string) Output directory (./output) 40 | -s --step (int) Frame subsampling step size (1) 41 | -n --njpg (int) Number of thumbnails to be generated (5) 42 | -q --ngif (int) Number of GIFs to be generated (5) 43 | -r --lmov (int) Length of video summary to be generated (in seconds) (15) 44 | -u --jpg_width_px (int) Pixel width of thumbnail images (360) 45 | -v --gif_width_px (int) Pixel width of animated GIFs (360) 46 | -w --mov_width_px (int) Pixel width of summary video (360) 47 | --generate_jpg Generate thumbnail images 48 | --generate_gif Generate animated GIFs 49 | --generate_mov Generate a summary video 50 | --generate_gifsum Generate animated GIFs summary 51 | --generate_gifall Generate all possible animated GIFs 52 | --print_shot_info Print shot boundary detection results 53 | --print_keyfrm_info Print keyframe indices 54 | ``` 55 | 56 | Congratulations! You have successfully installed hecate! 57 | 58 | 59 | ## Docker Developer 60 | 61 | Sayantan Mandal: [github](https://github.com/smandal047) 62 | 63 | ## -------------------------------------------------------------------------------- /include/hecate/sort.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | * MATLAB-like sort 3 | * 4 | * Copyright 2016 Yahoo Inc. 5 | * Licensed under the terms of the Apache 2.0 License. 6 | * See LICENSE file in the project root for terms. 7 | * 8 | * Developer: Yale Song (yalesong@yahoo-inc.com) 9 | */ 10 | 11 | #ifndef HECATE_SORT_HPP 12 | #define HECATE_SORT_HPP 13 | 14 | #include 15 | #include 16 | 17 | namespace hecate { 18 | 19 | // Act like matlab's [Y,I] = SORT(X) 20 | // Input: 21 | // unsorted unsorted vector 22 | // Output: 23 | // sorted sorted vector, allowed to be same as unsorted 24 | // index_map an index map such that sorted[i] = unsorted[index_map[i]] 25 | template 26 | void sort( 27 | std::vector &unsorted, 28 | std::vector &sorted, 29 | std::vector &index_map); 30 | 31 | // Act like matlab's Y = X[I] 32 | // where I contains a vector of indices so that after, 33 | // Y[j] = X[I[j]] for index j 34 | // this implies that Y.size() == I.size() 35 | // X and Y are allowed to be the same reference 36 | template< class T > 37 | void reorder( 38 | std::vector & unordered, 39 | std::vector const & index_map, 40 | std::vector & ordered); 41 | 42 | //////////////////////////////////////////////////////////////////////////////// 43 | // Implementation 44 | //////////////////////////////////////////////////////////////////////////////// 45 | 46 | 47 | // Comparison struct used by sort 48 | template struct index_cmp 49 | { 50 | index_cmp(const T arr) : arr(arr) {} 51 | bool operator()(const size_t a, const size_t b) const 52 | { 53 | return arr[a] < arr[b]; 54 | } 55 | const T arr; 56 | }; 57 | 58 | template 59 | inline void sort( 60 | std::vector & unsorted, 61 | std::vector & sorted, 62 | std::vector & index_map) 63 | { 64 | // Original unsorted index map 65 | index_map.resize(unsorted.size()); 66 | for(size_t i=0;i& >(unsorted)); 75 | 76 | sorted.resize(unsorted.size()); 77 | reorder(unsorted,index_map,sorted); 78 | } 79 | 80 | // This implementation is O(n), but also uses O(n) extra memory 81 | template< class T > 82 | inline void reorder( 83 | std::vector & unordered, 84 | std::vector const & index_map, 85 | std::vector & ordered) 86 | { 87 | // copy for the reorder according to index_map, because unsorted may also be 88 | // sorted 89 | std::vector copy = unordered; 90 | ordered.resize(index_map.size()); 91 | for(unsigned int i = 0; i 15 | #include 16 | 17 | #include 18 | 19 | namespace hecate { 20 | 21 | /* 22 | */ 23 | class Range { 24 | public: 25 | int start; 26 | int end; 27 | vector v_idx; // store indices (keyframe index) 28 | 29 | public: 30 | Range(int s, int e): start(s), end(e) {}; 31 | 32 | inline int length() const { return end-start+1; }; 33 | 34 | inline void print() const { 35 | printf("range(%d:%d) (%d) [", start, end, length() ); 36 | for(size_t i=0; i v_range; 50 | 51 | public: 52 | ShotRange(int s, int e): Range(s,e) {}; 53 | 54 | inline void print() const { 55 | printf("shot_range(%d:%d) (%d) [", start, end, length() ); 56 | for(size_t i=0; i v_tag; 90 | 91 | public: 92 | RangeTag(int s, int e): Range(s,e) {}; 93 | RangeTag(Range r): Range(r.start,r.end) { 94 | v_idx=r.v_idx; 95 | }; 96 | 97 | inline void print() const { 98 | printf("range(%d:%d) (%d) [", start, end, length() ); 99 | for(size_t i=0; i v_scores; 112 | for(size_t i=0; i v_srtval; 115 | vector v_srtidx; 116 | hecate::sort( v_scores, v_srtval, v_srtidx ); 117 | vector v_tag_new; 118 | for(size_t i=0; i v_range_tag; 129 | 130 | public: 131 | ShotRangeTag(int s, int e): Range(s,e) {}; 132 | ShotRangeTag(ShotRange& sr): Range(sr.start,sr.end) { 133 | v_idx = sr.v_idx; 134 | for(size_t i=0; i 16 | #include 17 | 18 | #include // for standard I/O 19 | #include 20 | #include 21 | 22 | #include 23 | #include 24 | #include 25 | #include 26 | 27 | #if CV_MAJOR_VERSION > 2 28 | #include 29 | #include 30 | #endif 31 | 32 | using namespace std; 33 | using namespace cv; 34 | 35 | namespace hecate { 36 | 37 | // Compute mean-squared error (mse) 38 | inline double calc_mse( const Mat& km_data, const Mat& km_lbl, const Mat& km_ctr ) 39 | { 40 | double compactness = 0; 41 | 42 | int nsamples = km_lbl.rows; 43 | int nclusters = km_ctr.rows; 44 | 45 | for( int k=0; k(i)==k ) { 50 | nk++; 51 | err += pow(cv::norm(km_data.row(i)-km_ctr.row(k)),2.0); 52 | } 53 | } 54 | compactness += 0.5 * err / nk; 55 | } 56 | 57 | return compactness; 58 | } 59 | 60 | inline void find_bounds( const Mat& km_data, Mat& bounds ) 61 | { 62 | int ndim = km_data.cols; 63 | bounds = Mat(2,ndim,CV_64F); 64 | 65 | double minval, maxval; 66 | for( int d=0; d(0,d) = minval; 69 | bounds.at(1,d) = maxval; 70 | } 71 | } 72 | 73 | inline void randu_bound( Mat& X, const Mat& bounds, int nsamples, int ndims ) 74 | { 75 | X = Mat(nsamples, ndims, CV_32F); 76 | randu( X, Scalar::all(0.0), Scalar::all(1.0) ); 77 | for(int c=0; c(0,c); 79 | double ub = bounds.at(1,c); 80 | X.col(c) = (ub-lb)*X.col(c) + lb; 81 | } 82 | } 83 | 84 | //fix error when there is only 1 data point: http://docs.opencv.org/2.4/modules/core/doc/clustering.html 85 | inline void perform_kmeans(const Mat& km_data, Mat& km_lbl, Mat& km_ctr, int ncluster, 86 | int km_attempts=1, int km_max_cnt=1000, double km_eps=0.0001) 87 | { 88 | if(km_data.rows==1){ 89 | km_lbl = Mat::zeros(1,1, km_lbl.type()); 90 | cv::reduce( km_data, km_ctr, 0, CV_REDUCE_AVG ); 91 | } 92 | else{ 93 | int km_k = min(ncluster, km_data.rows); 94 | TermCriteria km_opt = TermCriteria(CV_TERMCRIT_ITER|CV_TERMCRIT_EPS, km_max_cnt, km_eps); 95 | kmeans( km_data, km_k, km_lbl, km_opt, km_attempts, KMEANS_PP_CENTERS, km_ctr ); 96 | } 97 | } 98 | 99 | inline int perform_kmeans_gs( const Mat& km_data, Mat& km_lbl, Mat& km_ctr, 100 | vector K, int B=10, int N=500 ) 101 | { 102 | int ndims = km_data.cols; 103 | 104 | // Generate null reference dataset 105 | vector v_Xb; 106 | Mat bounds; find_bounds( km_data, bounds ); 107 | for(int b=0; b gap(K.size(), 0.0); // gap statistics = E[logWk] - logWk 115 | vector ElogWk(K.size(), 0.0); // compactness of the null reference distribution 116 | vector logWk(K.size(), 0.0); // compactness of the given data 117 | vector Sk(K.size(), 0.0); // standard deviations 118 | 119 | for( size_t i=0; i logWkb(B, 0.0); 127 | for(int b=0; b= gap(k+1) - S[k+1] 147 | size_t kstar=0; 148 | for(size_t i=0; i= gap[i+1] - Sk[i+1] ) { 150 | kstar = i; 151 | break; 152 | } 153 | } 154 | printf("\tgapstat: Optimal k=%d [%d:%d]\n", K[kstar], K[0], K[K.size()-1]); 155 | 156 | // return results 157 | perform_kmeans( km_data, km_lbl, km_ctr, K[kstar] ); 158 | return K[kstar]; 159 | 160 | } 161 | } 162 | 163 | #endif 164 | 165 | -------------------------------------------------------------------------------- /include/hecate/ffmpeg_helper.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Various helper functions using ffmpeg 3 | * 4 | * Copyright 2016 Yahoo Inc. 5 | * Licensed under the terms of the Apache 2.0 License. 6 | * See LICENSE file in the project root for terms. 7 | * 8 | * Developer: Yale Song (yalesong@yahoo-inc.com) 9 | */ 10 | 11 | 12 | #ifndef HECATE_FFMPEG_HPP 13 | #define HECATE_FFMPEG_HPP 14 | 15 | #include 16 | #include 17 | 18 | #include 19 | #include 20 | #include 21 | #include 22 | 23 | #include "hecate/file_helper.hpp" 24 | 25 | #define BUF_S 256 26 | #define BUF_M 512 27 | #define BUF_L 1024 28 | 29 | using namespace std; 30 | 31 | namespace hecate { 32 | const string _ffmpeg = hecate::which("ffmpeg"); 33 | const string _ffmarg = "-loglevel quiet -y"; 34 | const int _lfade = 8; // fade-in/out duration (unit: frame) 35 | 36 | // Crop video segment & resize 37 | static inline void ffmpeg_video_crop(string in_file, 38 | string out_file, 39 | string start_time, 40 | string duration, 41 | int out_width_px) 42 | { 43 | in_file = escape_space(in_file); 44 | 45 | char filter[BUF_S]; 46 | sprintf( filter, "'scale=%d:trunc(ow/a/2)*2'", out_width_px); 47 | 48 | char cmd[BUF_L]; 49 | sprintf( cmd, "%s -ss %s -t %s -i %s -strict -2 -vf %s %s %s", 50 | _ffmpeg.c_str(), start_time.c_str(), duration.c_str(), 51 | in_file.c_str(), filter, out_file.c_str(), _ffmarg.c_str()); 52 | 53 | system( cmd ); 54 | }; 55 | 56 | static inline void ffmpeg_video_concat(string in_filelist, 57 | string out_file) 58 | { 59 | char cmd[BUF_L]; 60 | sprintf( cmd, "%s -f concat -i %s -c copy %s %s", 61 | _ffmpeg.c_str(), in_filelist.c_str(), out_file.c_str(), 62 | _ffmarg.c_str() ); 63 | system( cmd ); 64 | }; 65 | 66 | // Audio fade in/out 67 | static inline void ffmpeg_audio_fade(string in_file, 68 | string out_file, 69 | double video_duration_sec, 70 | double video_fps) 71 | { 72 | in_file = escape_space(in_file); 73 | 74 | const double afade_sec = (double)2*_lfade/video_fps; 75 | const double afade_msec = (int)10000*(afade_sec-floor(afade_sec)); 76 | 77 | double afos = video_duration_sec - afade_sec; // audio fade-out start 78 | int afos_ss = (int) afos; // NOTE: we don't compute modulo this time 79 | int afos_mss = (int) 10000*(afos - floor(afos)); 80 | 81 | char filter[BUF_S]; 82 | sprintf( filter, "'afade=t=in:ss=0:d=0.%04d," 83 | "afade=t=out:st=%d.%04d:d=0.%04d'", 84 | (int)afade_msec, afos_ss, afos_mss, (int)afade_msec); 85 | 86 | char cmd[BUF_L]; 87 | sprintf( cmd, "%s -i %s -af %s %s %s", _ffmpeg.c_str(), 88 | in_file.c_str(), filter, out_file.c_str(), _ffmarg.c_str() ); 89 | system( cmd ); 90 | 91 | }; 92 | 93 | // Video fade in/out 94 | static inline void ffmpeg_video_fade(string in_file, 95 | string out_file, 96 | int video_duration, 97 | bool out_only=false) 98 | { 99 | in_file = escape_space(in_file); 100 | 101 | char filter[BUF_S]; 102 | if( out_only ) { 103 | sprintf( filter, "'fade=out:%d:%d'", 104 | video_duration-_lfade, _lfade); 105 | } 106 | else { 107 | sprintf( filter, "'fade=in:0:%d,fade=out:%d:%d'", 108 | _lfade, video_duration-_lfade, _lfade); 109 | } 110 | 111 | char cmd[BUF_L]; 112 | sprintf( cmd, "%s -i %s -vf %s %s %s", _ffmpeg.c_str(), 113 | in_file.c_str(), filter, out_file.c_str(), _ffmarg.c_str()); 114 | system( cmd ); 115 | }; 116 | 117 | 118 | // Based on http://blog.pkh.me/p/21-high-quality-gif-with-ffmpeg.html 119 | static inline void ffmpeg_video2gif(string in_file, 120 | string out_file, 121 | string start_time, 122 | string duration, 123 | int out_fps, 124 | int out_width_px) 125 | { 126 | in_file = escape_space(in_file); 127 | 128 | string out_dir = hecate::get_dir( std::string(out_file) ); 129 | 130 | char filter[BUF_S]; 131 | sprintf( filter, "fps=%d,scale=%d:-1:flags=lanczos", out_fps, out_width_px ); 132 | 133 | char palette[BUF_M]; 134 | sprintf( palette, "%s/palatte.png", out_dir.c_str() ); 135 | 136 | char time_setup[BUF_S] = ""; 137 | if( !start_time.empty() && !duration.empty() ) { 138 | sprintf( time_setup, "-ss %s -t %s", 139 | start_time.c_str(), duration.c_str() ); 140 | } 141 | 142 | char cmd[BUF_L]; 143 | 144 | // Create a palatte 145 | sprintf( cmd, "%s %s -i %s -vf '%s,palettegen=stats_mode=diff' %s %s", 146 | _ffmpeg.c_str(), time_setup, in_file.c_str(), filter, 147 | _ffmarg.c_str(), palette ); 148 | system( cmd ); 149 | 150 | // Convert segment to gif 151 | sprintf( cmd, "%s %s -i %s -i %s -lavfi '%s [x]; [x][1:v] paletteuse' %s %s", 152 | _ffmpeg.c_str(), time_setup, in_file.c_str(), palette, filter, 153 | _ffmarg.c_str(), out_file.c_str()); 154 | system( cmd ); 155 | 156 | // Delete palette 157 | sprintf( cmd, "rm %s", palette ); 158 | system( cmd ); 159 | }; 160 | 161 | 162 | 163 | } 164 | #endif 165 | 166 | -------------------------------------------------------------------------------- /include/hecate/file_helper.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Various helper functions for filesystem 3 | * 4 | * Copyright 2016 Yahoo Inc. 5 | * Licensed under the terms of the Apache 2.0 License. 6 | * See LICENSE file in the project root for terms. 7 | * 8 | * Developer: Yale Song (yalesong@yahoo-inc.com) 9 | */ 10 | 11 | #ifndef HECATE_FILE_HPP 12 | #define HECATE_FILE_HPP 13 | 14 | #include 15 | #include 16 | 17 | #include 18 | #include 19 | #include 20 | #include 21 | #include 22 | #include 23 | #include 24 | #include 25 | 26 | namespace hecate { 27 | 28 | // GET FILENAME WITHOUT EXTENSION AND TRAILING DIRECTORIES 29 | struct FileParts 30 | { 31 | std::string path; 32 | std::string name; 33 | std::string ext; 34 | }; 35 | 36 | static inline FileParts fileparts(std::string filename) 37 | { 38 | int idx0 = filename.rfind("/"); 39 | int idx1 = filename.rfind("."); 40 | 41 | if( idx1 == (int) std::string::npos ) 42 | idx1 = filename.length(); 43 | 44 | FileParts fp; 45 | fp.path = filename.substr(0,idx0+1); 46 | fp.name = filename.substr(idx0+1,idx1-idx0-1); 47 | fp.ext = filename.substr(idx1); 48 | 49 | return fp; 50 | }; 51 | 52 | static inline std::string get_dir( std::string filepath ) { 53 | hecate::FileParts fp = hecate::fileparts( filepath ); 54 | std::string dir = fp.path; 55 | return dir; 56 | }; 57 | 58 | static inline std::string escape_space( std::string s ) { 59 | std::string out; 60 | for( size_t i=0; i(std::isspace)))); 79 | return s; 80 | }; 81 | // trim from end 82 | static inline std::string &rtrim(std::string &s) { 83 | s.erase(std::find_if(s.rbegin(), s.rend(), std::not1(std::ptr_fun(std::isspace))).base(), s.end()); 84 | return s; 85 | }; 86 | // trim from both ends 87 | static inline std::string &trim(std::string &s) { 88 | return ltrim(rtrim(s)); 89 | }; 90 | 91 | 92 | static inline char *trim(char *str) 93 | { 94 | size_t len = 0; 95 | char *frontp = str; 96 | char *endp = NULL; 97 | 98 | if( str == NULL ) { return NULL; } 99 | if( str[0] == '\0' ) { return str; } 100 | 101 | len = strlen(str); 102 | endp = str + len; 103 | 104 | /* Move the front and back pointers to address the first non-whitespace 105 | * characters from each end. 106 | */ 107 | while( isspace(*frontp) ) { ++frontp; } 108 | if( endp != frontp ) 109 | { 110 | while( isspace(*(--endp)) && endp != frontp ) {} 111 | } 112 | 113 | if( str + len - 1 != endp ) 114 | *(endp + 1) = '\0'; 115 | else if( frontp != str && endp == frontp ) 116 | *str = '\0'; 117 | 118 | /* Shift the string so that it starts at str so that if it's dynamically 119 | * allocated, we can still free it on the returned pointer. Note the reuse 120 | * of endp to mean the front of the string buffer now. 121 | */ 122 | endp = str; 123 | if( frontp != str ) 124 | { 125 | while( *frontp ) { *endp++ = *frontp++; } 126 | *endp = '\0'; 127 | } 128 | return str; 129 | }; 130 | 131 | 132 | // CHECK IF FILE EXISTS 133 | static inline bool file_exists (const std::string& name) { 134 | if (FILE *file = fopen(name.c_str(), "r")) { 135 | fclose(file); 136 | return true; 137 | } else { 138 | return false; 139 | } 140 | }; 141 | 142 | // READ TEXT FILE INTO A VECTOR 143 | static inline void read_textlist( const std::string& in, 144 | std::vector& out ) 145 | { 146 | std::string str; 147 | std::ifstream file( in ); 148 | while( std::getline( file, str ) ) { 149 | str = hecate::trim(str); 150 | if( str.length()==0 ) continue; 151 | if( str.at(0)=='#' ) continue; 152 | out.push_back( str ); 153 | } 154 | }; 155 | 156 | // READ TEXT FILE CONTAINING A LIST OF FILEPATHS 157 | static inline void read_filelist( const std::string& in, 158 | std::vector& out ) 159 | { 160 | std::string str; 161 | std::ifstream file( in ); 162 | while( std::getline( file, str ) ) { 163 | str = hecate::trim(str); 164 | if( str.length()==0 ) continue; 165 | if( str.at(0)=='#' ) continue; 166 | 167 | if( hecate::file_exists( str ) ) 168 | out.push_back( str ); 169 | else 170 | fprintf( stderr, "File doesn't exist: %s\n", str.c_str() ); 171 | 172 | } 173 | }; 174 | 175 | static inline void split_string( char* in, 176 | std::vector& out, 177 | const char* delimiter) 178 | { 179 | out.clear(); 180 | char *token = in; 181 | while( (token=strsep(&in,delimiter)) != NULL ) 182 | out.push_back( hecate::trim(token) ); 183 | }; 184 | 185 | static inline std::string exec(const char* cmd) { 186 | FILE* pipe = popen(cmd, "r"); 187 | if (!pipe) return "ERROR"; 188 | char buffer[1024]; 189 | std::string result = ""; 190 | while (!feof(pipe)) { 191 | if (fgets(buffer, 1024, pipe) != NULL) 192 | result += buffer; 193 | } 194 | pclose(pipe); 195 | return result; 196 | }; 197 | 198 | static inline std::string which(const char* bin) { 199 | char buf[1024]; 200 | sprintf( buf, "echo `which %s`", bin ); 201 | std::string result = hecate::exec(buf); 202 | result.erase( result.find_last_not_of(" \n\r\t")+1); 203 | return result; 204 | } 205 | } 206 | #endif 207 | 208 | -------------------------------------------------------------------------------- /src/hecate/hecate_main.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * HECTATE Video Processing Library - Main 3 | * 4 | * Copyright 2016 Yahoo Inc. 5 | * Licensed under the terms of the Apache 2.0 License. 6 | * See LICENSE file in the project root for terms. 7 | * 8 | * Developer: Yale Song (yalesong@yahoo-inc.com) 9 | */ 10 | 11 | #include "hecate/hecate.hpp" 12 | 13 | using namespace std; 14 | using namespace cv; 15 | 16 | void run_hecate( hecate_params& opt, vector& v_thumb_idx, 17 | vector& v_gif_range, 18 | vector& v_mov_range) 19 | { 20 | if( !hecate::file_exists(opt.in_video) ) { 21 | fprintf(stderr, "File not exist: %s\n", opt.in_video.c_str()); 22 | return; 23 | } 24 | 25 | v_thumb_idx.clear(); 26 | v_gif_range.clear(); 27 | v_mov_range.clear(); 28 | 29 | hecate::Clock::time_point t0; 30 | hecate::VideoParser parser; 31 | 32 | vector v_shot_range; 33 | Mat histo_features; 34 | Mat diff_features; 35 | 36 | 37 | //////////////////////////////////////////////////////////////////////////// 38 | // 39 | // Parse video 40 | // 41 | //////////////////////////////////////////////////////////////////////////// 42 | 43 | if( opt.debug ) { 44 | printf("run_hecate: Video segmentation and keyframe extraction\n"); 45 | t0 = hecate::Clock::now(); 46 | } 47 | 48 | hecate::parser_params parser_opt; 49 | parser_opt.step_sz = opt.step_sz; 50 | parser_opt.gfl = opt.gfl; 51 | parser_opt.fltr_begin_sec = ( opt.fltr_begin_sec<0 ) 52 | ? max(0.5, 0.05 * parser.meta.duration) : opt.fltr_begin_sec; 53 | parser_opt.fltr_end_sec = ( opt.fltr_end_sec<0 ) 54 | ? max(0.5, 0.10 * parser.meta.duration) : opt.fltr_end_sec; 55 | parser_opt.max_duration = opt.max_duration; 56 | parser_opt.ignore_rest = (opt.max_duration>0); // ignore parts after max_nfrms 57 | parser_opt.debug = opt.debug; 58 | 59 | // PARSE 60 | v_shot_range = parser.parse_video( opt.in_video, parser_opt ); 61 | if( v_shot_range.empty() ) { 62 | fprintf(stderr, "run_hecate: Failed to parse the video\n"); 63 | return; 64 | } 65 | 66 | histo_features = parser.get_frame_features(); 67 | diff_features = parser.get_frame_diff_features(); 68 | opt.step_sz = parser.get_effective_step_size(); 69 | 70 | // If video is shorter than desired summary length 71 | if( opt.mov && opt.lmov >= parser.meta.duration ) { 72 | fprintf( stderr, "run_hecate: Video duration is %.2f seconds, " 73 | "shorter than the requested summary of length %.2f seconds.\n" 74 | "\tVideo summarization is disabled.", 75 | parser.meta.duration, (double)opt.lmov); 76 | opt.mov = false; 77 | } 78 | 79 | // Check desired resolution of output 80 | if( opt.jpg_width_px<0 || opt.jpg_width_px > parser.meta.width ) { 81 | //fprintf( stderr, "run_hecate: Forcing jpg_width_px to %d\n",parser.meta.width); 82 | opt.jpg_width_px = parser.meta.width; 83 | } 84 | if( opt.gif_width_px<0 || opt.gif_width_px > parser.meta.width ) { 85 | //fprintf( stderr, "run_hecate: Forcing gif_width_px to %d\n",parser.meta.width); 86 | opt.gif_width_px = parser.meta.width; 87 | } 88 | if( opt.mov_width_px<0 || opt.mov_width_px > parser.meta.width ) { 89 | //fprintf( stderr, "run_hecate: Forcing mov_width_px to %d\n",parser.meta.width); 90 | opt.mov_width_px = parser.meta.width; 91 | } 92 | 93 | if( opt.debug ) { 94 | hecate::print_elapsed_time( t0, "run_hecate" ); 95 | hecate::print_video_metadata( opt.in_video, parser.meta ); 96 | } 97 | 98 | 99 | 100 | //////////////////////////////////////////////////////////////////////////// 101 | // 102 | // Analyze video 103 | // 104 | //////////////////////////////////////////////////////////////////////////// 105 | 106 | // Print shot info 107 | if( opt.info_shot ) { 108 | printf("shots: "); 109 | for(size_t i=0; i keyfrms; 120 | for(size_t i=0; i& v_shot_range, 28 | const Mat& X, const Mat& diff, 29 | vector& v_thumb_idx) 30 | { 31 | v_thumb_idx.clear(); 32 | 33 | const int minK = 5; // min #clusters 34 | const int maxK = 30; // max #clusters 35 | const int nfrm = meta.nframes; 36 | 37 | vector v_frm_valid(nfrm,false); 38 | for(size_t i=0; i::max(); 49 | for( int i=0; i(i); 51 | if( val v_shot_len; 62 | vector v_keyfrm_idx; 63 | for(size_t i=0; imax_subshot_len ) { 69 | max_subshot_len = shotlen; 70 | max_subshot_id = j; 71 | } 72 | } 73 | v_shot_len.push_back(max_subshot_len); 74 | v_keyfrm_idx.push_back(v_shot_range[i].v_range[max_subshot_id].v_idx[0]); 75 | } 76 | 77 | // Include keyframes sorted by shot length 78 | vector v_srt_idx; // contains sorted indices 79 | vector v_srt_val; // contains sorted values 80 | hecate::sort( v_shot_len, v_srt_val, v_srt_idx ); 81 | for(size_t i=0; i v_valid_frm_idx; 86 | vector v_valid_frm_shotlen; 87 | for(size_t i=0; i Kset; 107 | //for(int i=km_k; i<=min(nfrm_valid,(int)(2*km_k)); i++) 108 | // Kset.push_back( i ); 109 | //km_k = hecate::perform_kmeans_gs( km_data, km_lbl, km_ctr, Kset, 3, 500 ); 110 | 111 | // measure cluster size 112 | vector clust_sz(km_k,0); 113 | for(int i=0; i(i) ] += v_valid_frm_shotlen[i]; 115 | 116 | // sort wrt cluster size in an ascending order 117 | vector v_srt_idx; // contains cluster id 118 | vector v_srt_val; // contains cluster size 119 | hecate::sort( clust_sz, v_srt_val, v_srt_idx ); 120 | 121 | // obtain thumbnails -- the most still frame per cluster 122 | for(int i=0; i::max(); 125 | for(int j=0; j(j) == v_srt_idx[km_k-i-1] ) { 127 | double mean_diff_j = diff.at(v_valid_frm_idx[j]); 128 | if( mean_diff_j& v_thumb_idx ) 151 | { 152 | char strbuf[256]; 153 | int njpg_cnt = 0; 154 | int frm_idx = 0; 155 | 156 | string filename = hecate::get_filename( std::string(opt.in_video) ); 157 | 158 | VideoCapture vr( opt.in_video ); 159 | double rsz_ratio = (double)(2+opt.jpg_width_px)/vr.get(CV_CAP_PROP_FRAME_WIDTH); 160 | while( njpg_cnt < (int)v_thumb_idx.size() ) 161 | { 162 | Mat frm; vr>>frm; 163 | if( frm.empty() ) break; 164 | 165 | // Check if the current frame is in the selected thumbnail list 166 | int rank = -1; 167 | for( size_t i=0; i=0 && rank= opt.njpg) { 185 | break; 186 | } 187 | 188 | frm_idx++; 189 | } 190 | vr.release(); 191 | } 192 | 193 | -------------------------------------------------------------------------------- /include/hecate/video_parser.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Video Parser 3 | * 4 | * Copyright 2016 Yahoo Inc. 5 | * Licensed under the terms of the Apache 2.0 License. 6 | * See LICENSE file in the project root for terms. 7 | * 8 | * Developer: Yale Song (yalesong@yahoo-inc.com) 9 | */ 10 | 11 | #ifndef HECATE_VIDEO_PARSER_HPP 12 | #define HECATE_VIDEO_PARSER_HPP 13 | 14 | #include 15 | #include 16 | #include 17 | #include 18 | #include // mkdir 19 | #include // access() function 20 | 21 | #include // display date and time 22 | #include // for standard I/O 23 | #include // for file I/O 24 | #include // for strings 25 | #include // for controlling float print precision 26 | #include // string to number conversion 27 | #include 28 | #include 29 | #include 30 | 31 | // OpenMP library 32 | #if defined(_OPENMP) 33 | #include 34 | #endif 35 | 36 | // OpenCV library 37 | #include 38 | #include 39 | #include 40 | #include 41 | 42 | #if CV_MAJOR_VERSION > 3 43 | #include 44 | #endif 45 | 46 | #include "hecate/sort.hpp" 47 | #include "hecate/gflseg.hpp" 48 | #include "hecate/gapstat.hpp" 49 | #include "hecate/shot_range.hpp" 50 | #include "hecate/hist_opencv.hpp" 51 | #include "hecate/image_metrics.hpp" 52 | 53 | using namespace std; 54 | using namespace cv; 55 | 56 | namespace hecate { 57 | 58 | // video metadata struct 59 | struct video_metadata { 60 | int nframes; 61 | int width; 62 | int height; 63 | double duration; // in seconds 64 | double fps; 65 | }; 66 | 67 | struct parser_params { 68 | int step_sz; 69 | double fltr_begin_sec; 70 | double fltr_end_sec; 71 | double max_duration; // in seconds 72 | bool gfl; // use group-fused lasso to refine boundaries 73 | bool fltr_lq; // filter low-quality frames 74 | bool fltr_rdt; // filter redundant frames 75 | bool debug; 76 | bool ignore_rest; // if video is too long, ignore the rest (true) or 77 | // adjust step_sz (false). for vidtag this should be 78 | // false, while for hecate this should be true 79 | 80 | parser_params(): 81 | step_sz(1), 82 | fltr_begin_sec(0), 83 | fltr_end_sec(0), 84 | max_duration(-1), 85 | gfl(false), 86 | fltr_lq(true), 87 | fltr_rdt(true), 88 | debug(false), 89 | ignore_rest(false) 90 | {}; 91 | }; 92 | 93 | inline void print_video_metadata( const string filename, 94 | const hecate::video_metadata m ) { 95 | printf("%s\n seconds=%.2f, nframes=%d, fps=%.2f, resolution=%dx%d\n", 96 | filename.c_str(), m.duration, m.nframes, m.fps, m.width, m.height); 97 | }; 98 | 99 | class VideoParser { 100 | public: 101 | VideoParser(); 102 | 103 | /* Perform video parsing */ 104 | // max_duration is there to handle too long videos 105 | // (100K frms ~= 1 hr long with 30 fps) 106 | // filter_first and filter_last is there to filter out 107 | // a few frames from the beginning and at the end 108 | // to manually handle logos, ending credits, etc. 109 | vector parse_video(const string& in_video, 110 | hecate::parser_params opt); 111 | 112 | /* Get the number of valid frames */ 113 | int get_nfrm_valid(); 114 | 115 | /* Display video with shot boundary information */ 116 | void play_video_filtered(const string& in_video, 117 | int step_sz=1, 118 | int max_frm_len=360); 119 | 120 | /* Get a vector of booleans representing valid frames */ 121 | inline void get_frm_valid(vector& vec) {vec=_v_frm_valid;} 122 | 123 | /* Get effective step size */ 124 | inline int get_effective_step_size() {return _step_sz;} 125 | 126 | /* Get extracted features */ 127 | inline const Mat get_frame_features() {return _X_feat;} 128 | inline const Mat get_frame_diff_features() {return _X_diff;} 129 | 130 | 131 | private: 132 | /* Read video into the memory */ 133 | int read_video(const string& in_video, 134 | int step_sz=1, 135 | double max_duration=30*60, 136 | bool ignore_rest=false, 137 | int max_frm_len=160); 138 | 139 | /* Filter first end last few frames */ 140 | void filter_heuristic(double fltr_begin_sec=0, 141 | double fltr_end_sec=0); 142 | 143 | /* Filter out low-quality frames */ 144 | void filter_low_quality(double thrsh_bright=0.075, 145 | double thrsh_sharp=0.08, 146 | double thrsh_uniform=0.80 ); 147 | 148 | /* Filter out frames during transition */ 149 | void filter_transition(double thrsh_diff=0.50, 150 | double thrsh_ecr=0.10 ); 151 | 152 | /* Filter out redundant frames */ 153 | void filter_redundant_and_obtain_subshots(); 154 | 155 | /* Update shot ranges, filter out shots if too short */ 156 | void update_shot_ranges(int min_shot_len=5); 157 | 158 | /* Perform post processing. Break up shots if too long */ 159 | void post_process(double min_shot_sec=2.0, bool gfl=false); 160 | 161 | /* Perform SBD using heuristics: add next big diff if 162 | the new shot length is longer than min_shot_len */ 163 | void sbd_heuristic(vector v_diff, vector& jump, 164 | int njumps, int min_shot_len ); 165 | 166 | // Helper functions 167 | 168 | /* Extract pyramid of histogram features */ 169 | void extract_histo_features(int pyr_level=2, 170 | bool omit_filtered=true, 171 | int nbin_color_hist=128, 172 | int nbin_edge_ori_hist=8, 173 | int nbin_edge_mag_hist=8); 174 | 175 | void mark_invalid( vector& vec, int idx, int wnd_sz=0 ); 176 | void mark_invalid( vector& vec, vector& vec2, 177 | int idx, const string msg, int wnd_sz=0 ); 178 | 179 | void release_memory(); 180 | 181 | 182 | public: 183 | hecate::video_metadata meta; 184 | vector _v_frm_valid; // filtered frames 185 | vector _v_frm_log; // filtered frames msgs (debug) 186 | 187 | private: 188 | bool _debug; 189 | bool _display; 190 | int _step_sz; 191 | int _nfrm_total; // number of frames BEFORE sampling 192 | int _nfrm_given; // number of frames AFTER sampling 193 | int _video_width; 194 | int _video_height; 195 | double _video_fps; 196 | double _video_sec; 197 | 198 | Mat _X_feat; // frame-wise feature representation 199 | Mat _X_diff; // n-by-1 frame-by-frame difference first-order derivative 200 | Mat _X_ecr; // n-by-1 ecr first-order derivative 201 | 202 | vector _v_frm_rgb; // rgb frames 203 | vector _v_frm_gray; // gray-scale frames 204 | 205 | vector _v_shot_ranges; 206 | }; 207 | } 208 | 209 | 210 | #endif 211 | -------------------------------------------------------------------------------- /include/hecate/cc_parser.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Video caption parser (VTT and TTML) 3 | * 4 | * Copyright 2016 Yahoo Inc. 5 | * Licensed under the terms of the Apache 2.0 License. 6 | * See LICENSE file in the project root for terms. 7 | * 8 | * Developer: Yale Song (yalesong@yahoo-inc.com) 9 | */ 10 | 11 | #ifndef HECATE_CC_PARSER_HPP 12 | #define HECATE_CC_PARSER_HPP 13 | 14 | #include 15 | #include 16 | #include 17 | #include 18 | #include 19 | #include 20 | #include 21 | 22 | #include "hecate/time.hpp" 23 | 24 | using namespace std; 25 | 26 | namespace hecate { 27 | 28 | struct tcc { 29 | double start; // milli-seconds 30 | double end; // milli-seconds 31 | string text; 32 | }; 33 | 34 | inline double time_str2num( const string str_time ) 35 | { 36 | int hh,mm,ss,ms; 37 | 38 | hh = stoi( str_time.substr(0,2) ); 39 | mm = stoi( str_time.substr(3,2) ); 40 | ss = stoi( str_time.substr(6,2) ); 41 | ms = stoi( str_time.substr(9,3) ); 42 | 43 | return 60*60*hh + 60*mm + ss + (double)ms/1000.0; 44 | } 45 | 46 | 47 | inline string truncate_caption( const string src, const int maxlen ) 48 | { 49 | string dst = src; 50 | while( (int)dst.length()>maxlen ) 51 | { 52 | size_t pos = dst.find_last_of(" "); 53 | dst = dst.substr(0,pos); 54 | } 55 | return dst; 56 | } 57 | 58 | inline string clean_caption( const string src ) 59 | { 60 | string dst = src; 61 | size_t pos, pos2; 62 | 63 | // Remove special characters 64 | char chars[] = ".,;:@#$%^&*\"><_+=~`/"; 65 | for(unsigned i=0; i(), ' '))); 70 | dst.erase(find_if(dst.rbegin(), dst.rend(), bind1st(not_equal_to(), ' ')).base(), dst.end()); 71 | 72 | // Remove leadning words 73 | vector lwords = {"And ","But ","Or ","and ","Then ","then "}; 74 | for(size_t i=0; i> [%s] -> [%s]\n", lwords[i].c_str(), dst.c_str(), dst.substr(lwords[i].length(),string::npos).c_str()); 79 | dst = dst.substr(lwords[i].length(),string::npos); 80 | } 81 | } 82 | 83 | // Remove trailing words 84 | dst = dst + " "; 85 | vector words = {" and "," or "," but "," however "," to "," of "," so ", 86 | " because "," who "," when "," where "," what "," how ", " why ", 87 | " while "," which "," with "," the "}; 88 | for(size_t i=0; i> [%s] -> [%s]\n", words[i].c_str(), dst.c_str(), dst.substr(0,pos).c_str()); 96 | dst = dst.substr(0,pos) + " "; 97 | } 98 | } 99 | 100 | // Remove annotation inside a bracket, e.g., [UNKNOWN], [LAUGH], [MUSIC], etc. 101 | pos = dst.find("["); 102 | while( pos!=string::npos ) 103 | { 104 | pos2 = dst.find("]"); 105 | dst = dst.substr(0,pos) + dst.substr(pos2+1,string::npos); 106 | pos = dst.find("["); 107 | } 108 | 109 | // Remove double ?? 110 | pos = dst.find("??"); 111 | while( pos!=string::npos ) 112 | { 113 | dst = dst.substr(0,pos) + dst.substr(pos+2,string::npos); 114 | pos = dst.find("??"); 115 | } 116 | 117 | // Remove double space 118 | pos = dst.find(" "); 119 | while( pos!=string::npos ) 120 | { 121 | dst = dst.substr(0,pos) + dst.substr(pos+1,string::npos); 122 | pos = dst.find(" "); 123 | } 124 | 125 | // Remove leading & trailing spaces 126 | dst.erase(dst.begin(), find_if(dst.begin(), dst.end(), bind1st(not_equal_to(), ' '))); 127 | dst.erase(find_if(dst.rbegin(), dst.rend(), bind1st(not_equal_to(), ' ')).base(), dst.end()); 128 | 129 | 130 | return dst; 131 | } 132 | 133 | 134 | 135 | inline void print_closed_caption( vector& cc ) 136 | { 137 | for(size_t i=0; i %.2f: [%s]\n",cc[i].start,cc[i].end,cc[i].text.c_str()); 139 | } 140 | 141 | 142 | 143 | inline void parse_vtt( const string filename, vector& vtt ) 144 | { 145 | string line; 146 | 147 | ifstream fid; 148 | fid.open( filename ); 149 | getline( fid, line ); 150 | while( fid ) 151 | { 152 | size_t pos = line.find(" --> "); 153 | if( pos!=string::npos ) 154 | { 155 | tcc cc; 156 | 157 | // time 158 | cc.start = time_str2num( line.substr( 0, pos ) ); 159 | cc.end = time_str2num( line.substr( pos+5, string::npos) ); 160 | 161 | // text 162 | getline( fid, line ); 163 | line = clean_caption( line ); 164 | 165 | pos = line.find("???"); 166 | if( pos==string::npos && line.length()>3 ) { 167 | cc.text = line; 168 | vtt.push_back( cc ); 169 | } 170 | } 171 | getline( fid, line ); 172 | } 173 | } 174 | 175 | inline void parse_ttml( const string filename, vector& ttml ) 176 | { 177 | size_t pos1, pos2, pos3; 178 | string line, item; 179 | 180 | string p_start = "

en-US"); 189 | if( pos1==string::npos ) 190 | return; 191 | 192 | // Parse TTML 193 | while( fid ) 194 | { 195 | pos1 = line.find(p_start); 196 | while( pos1!=string::npos ) 197 | { 198 | pos2 = line.find(p_end); 199 | item = line.substr(pos1, pos2-pos1); 200 | line = line.substr( pos2+3, string::npos ); 201 | 202 | pos1 = item.find("begin="); 203 | pos2 = item.find("end="); 204 | pos3 = item.find("\">"); 205 | 206 | tcc cc; 207 | cc.start = time_str2num( item.substr(pos1+7,12) ); 208 | cc.end = time_str2num( item.substr(pos2+5,12) ); 209 | cc.text = clean_caption( item.substr(pos3+2,string::npos) ); 210 | ttml.push_back( cc ); 211 | 212 | pos1 = line.find(p_start); 213 | } 214 | getline( fid, line ); 215 | } 216 | 217 | //print_closed_caption( ttml ); 218 | } 219 | 220 | // 221 | inline string encode_vtt( int index, float start_sec, float end_sec, 222 | vector& v_msg ) 223 | { 224 | char buf[512]; 225 | string start_sec_str = hecate::second2string( start_sec, "hh:mm:ss.mss" ); 226 | string end_sec_str = hecate::second2string( end_sec, "hh:mm:ss.mss" ); 227 | sprintf( buf, "%d\n%s --> %s\n", index, start_sec_str.c_str(), 228 | end_sec_str.c_str()); 229 | 230 | for(size_t i=0; i $@.$(WARNS_EXT) \ 219 | || (cat $@.$(WARNS_EXT); exit 1) 220 | @ cat $@.$(WARNS_EXT) 221 | 222 | # Target for extension-less tool binaries 223 | $(TOOL_BUILD_DIR)/%: $(TOOL_BUILD_DIR)/% | $(TOOL_BUILD_DIR) 224 | @ $(RM) $@ 225 | @ ln -s $(abspath $<) $@ 226 | 227 | $(TOOL_BINS): % : %.o | $(DYNAMIC_NAME) 228 | @ echo CXX/LD -o $@ 229 | $(Q)$(CXX) $< -o $@ $(LINKFLAGS) $(LDFLAGS) -l$(PROJECT)\ 230 | -Wl,-rpath,$(ORIGIN)/../lib 231 | 232 | clean: 233 | @- $(RM) -rf $(ALL_BUILD_DIRS) 234 | @- $(RM) -rf $(OTHER_BUILD_DIR) 235 | @- $(RM) -rf $(BUILD_DIR_LINK) 236 | @- $(RM) -rf $(DISTRIBUTE_DIR) 237 | 238 | $(DIST_ALIASES): $(DISTRIBUTE_DIR) 239 | 240 | $(DISTRIBUTE_DIR): all | $(DISTRIBUTE_SUBDIRS) 241 | # add include 242 | cp -r include $(DISTRIBUTE_DIR)/ 243 | # add tool binaries 244 | cp $(TOOL_BINS) $(DISTRIBUTE_DIR)/bin 245 | # add libraries 246 | cp $(STATIC_NAME) $(DISTRIBUTE_DIR)/lib 247 | install -m 644 $(DYNAMIC_NAME) $(DISTRIBUTE_DIR)/lib 248 | 249 | -include $(DEPS) 250 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # HECATE 2 | Hecate [hek-uh-tee] is a video processing library that auto-magically generates thumbnails, animated GIFs, and video summaries from videos. This library is developed and maintained by Yahoo Research, New York. 3 | 4 | The source code is Copyright 2016 Yahoo Inc. and is licensed under the terms of the Apache 2.0 License. See the [LICENSE](https://github.com/yahoo/hecate/blob/master/LICENSE) in the project root file for terms. 5 | 6 | The technology behind this library is based on our research work. If you find this library useful in your work, we ask you to cite our research paper: 7 | ``` 8 | "To Click or Not To Click: Automatic Selection of Beautiful Thumbnails from Videos." 9 | Yale Song, Miriam Redi, Jordi Vallmitjana, Alejandro Jaimes, 10 | Proceedings of the 25th ACM International on Conference on Information and Knowledge Management, CIKM 2016 11 | ``` 12 | 13 | ## Installation 14 | Hecate has one dependency: [OpenCV library](https://github.com/opencv/opencv) with an [FFMPEG](https://github.com/FFmpeg/FFmpeg) support. You will need to install the library properly before trying out Hecate! 15 | 16 | Once you install the dependenct library correctly, follow the instruction below: 17 | ``` 18 | $ git clone https://github.com/yahoo/hecate.git 19 | $ cd hecate 20 | $ vim Makefile.config 21 | - Set INCLUDE_DIRS and LIBRARY_DIRS to where your 22 | opencv library is installed. Usually under /usr/local. 23 | - If your OpenCV version is 2.4.x, comment out the line 24 | OPENCV_VERSION := 3 25 | - Save and exit 26 | $ make all 27 | $ make distribute 28 | ``` 29 | 30 | Once you've successfully compiled hecate, it will generate a binary executable under `distribute/bin/`. Run the following command to check if everything works properly: 31 | ``` 32 | $ ./distribute/bin/hecate 33 | 34 | -=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= 35 | HECATE Copyright 2016 Yahoo Inc. 36 | Licensed under the terms of the Apache 2.0 License. 37 | Developed by : Yale Song (yalesong@yahoo-inc.com) 38 | Built on : 11:46:03 Aug 11 2016 39 | -=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= 40 | USAGE: hecate -i infile [options] 41 | 42 | -i --in_video (string) Input video file 43 | -o --out_dir (string) Output directory (./output) 44 | -s --step (int) Frame subsampling step size (1) 45 | -n --njpg (int) Number of thumbnails to be generated (5) 46 | -q --ngif (int) Number of GIFs to be generated (5) 47 | -r --lmov (int) Length of video summary to be generated (in seconds) (15) 48 | -u --jpg_width_px (int) Pixel width of thumbnail images (360) 49 | -v --gif_width_px (int) Pixel width of animated GIFs (360) 50 | -w --mov_width_px (int) Pixel width of summary video (360) 51 | --generate_jpg Generate thumbnail images 52 | --generate_gif Generate animated GIFs 53 | --generate_mov Generate a summary video 54 | --generate_gifsum Generate animated GIFs summary 55 | --generate_gifall Generate all possible animated GIFs 56 | --print_shot_info Print shot boundary detection results 57 | --print_keyfrm_info Print keyframe indices 58 | ``` 59 | 60 | Congratulations! You have successfully installed hecate! 61 | 62 | 63 | ## Get started 64 | In order to get started, we will need a video file to play with. In this example, we will use the video ["The Spirit of '43" by Walt Disney](https://archive.org/details/TheSpiritOf43_56) from [The Internet Archive](https://archive.org). 65 | 66 | Let's download the video and save it as `examples/video.mp4`: 67 | ``` 68 | $ wget https://archive.org/download/TheSpiritOf43_56/The_Spirit_of__43_512kb.mp4 \ 69 | --output-document examples/video.mp4 --no-check-certificate 70 | ``` 71 | 72 | Hecate provides three main functionalities through a binary executable `hecate`: Thumbnail extraction, GIF generation, and video summarization. There are various other functionalities the library provides, such as shot boundary detection and keyframe extraction. 73 | 74 | We will explain each case below. 75 | 76 | ### Shot boundary detection and keyframe extraction 77 | Shot boundary detection and keyframe extraction are often the first steps towards various video processing methods. With Hecate, obtaining shot and keyframe information is easier than ever! Simply run the following command to get the result: 78 | ``` 79 | $ ./distribute/bin/hecate -i examples/video.mp4 --print_shot_info --print_keyfrm_info 80 | ``` 81 | 82 | Below is the results we obtained on our dev machine (OS X 10.10 with OpenCV v3.1): 83 | ``` 84 | shots: [0:81],[84:93],[96:102],[108:270],[272:418],...,[9966:10131],[10135:10164] 85 | keyframes: [52,85,98,128,165,208,242,259,265,273,...,10127,10141] 86 | ``` 87 | The units are frame indices (zero-based). You will notice that shot ranges are non-continuous; there are "gaps" between shots, e.g., two frames are missing between the first two shots [0:81] and [84:93]. This is normal and intentional: Hecate discards low-quality frames that aren't ideal in producing nicely looking thumbnails, animated GIFs, and video summaries. We refer to our CIKM 2016 paper for the rational behind our reason to invalidate low-quality frames. 88 | 89 | ### Thumbnail generation 90 | Hecate uses computer vision to determine frames that are most "suitable" as video thumbnails. By suitable, we mean a frame that is the most relevant to the video content and that is the most beautiful in terms of computational aesthetics; technical details are explained in our CIKM 2016 paper. 91 | 92 | You can generate thumbnail images using Hecate. Run the following command to generate one thumbnail image from the video. 93 | ``` 94 | $ ./distribute/bin/hecate -i examples/video.mp4 --generate_jpg --njpg 1 95 | ``` 96 | You will see the generated thumbnail image under the output directory (set as `output` by default; you can change this using the option `--out_dir YOUR_DIRECTORY`). On our dev machine we get this thumbnail image: 97 | 98 | ![alt text](https://github.com/yahoo/hecate/blob/master/examples/video_00.jpg "Hecate Thumbnail Image") 99 | 100 | In the above example, we generated only one thumbnail image. Are you not satisfied with the thumbnail image? Hecate can generate any number of thunbmail images! Let's generate five thumbnail images. 101 | ``` 102 | $ ./distribute/bin/hecate -i examples/video.mp4 --generate_jpg --njpg 3 103 | ``` 104 | 105 | The output files are named `_.jpg`. The files are ranked by their quality (rank 0 means it's the best one). 106 | 107 | ### Animated GIF generation 108 | Do you want to create animated GIFs from a video without the hassle of using manual tools? Hecate can automatically create them for you! Run the following command to create one animated GIF from the video. 109 | ``` 110 | $ ./distribute/bin/hecate -i examples/video.mp4 --generate_gif --ngif 1 111 | ``` 112 | On our dev machine, we get this animated GIF: 113 | 114 | ![alt text](https://github.com/yahoo/hecate/blob/master/examples/video_00.gif "Hecate Animated GIF") 115 | 116 | You can, of course, create more than just one GIF by setting the paramter `--ngif N` with an appropriate number N. When there are multiple GIFs, you can also generate a "summary GIF" by concatenating them, using this command: 117 | ``` 118 | $ ./distribute/bin/hecate -i examples/video.mp4 --generate_gif --ngif 3 --generate_gifsum 119 | ``` 120 | 121 | If you'd rather want to obtain all available GIFs from the video, use the following command: 122 | ``` 123 | $ ./distribute/bin/hecate -i examples/video.mp4 --generate_gifall 124 | ``` 125 | 126 | ### Video summary generation 127 | Last but not least, Hecate can summarize a video! Run the following command to create a video summary of length 15 seconds. 128 | ``` 129 | $ ./distribute/bin/hecate -i examples/video.mp4 --generate_mov --lmov 15 130 | ``` 131 | We included the video summary generated on our dev machine here: 132 | [https://github.com/yahoo/hecate/blob/master/examples/video_sum.mp4](https://github.com/yahoo/hecate/blob/master/examples/video_sum.mp4) 133 | 134 | 135 | ## Developer 136 | 137 | Yale Song: [github](https://github.com/yalesong), [website](http://people.csail.mit.edu/yalesong) 138 | -------------------------------------------------------------------------------- /include/hecate/hist_opencv.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Compute histogram of [x] using OpenCV 3 | * 4 | * Copyright 2016 Yahoo Inc. 5 | * Licensed under the terms of the Apache 2.0 License. 6 | * See LICENSE file in the project root for terms. 7 | * 8 | * Developer: Yale Song (yalesong@yahoo-inc.com) 9 | */ 10 | 11 | #ifndef HECATE_HIST_OPENCV_HPP 12 | #define HECATE_HIST_OPENCV_HPP 13 | 14 | #include 15 | #include 16 | 17 | #if CV_MAJOR_VERSION > 2 18 | #include 19 | #endif 20 | 21 | using namespace cv; 22 | using namespace std; 23 | 24 | namespace hecate { 25 | 26 | 27 | inline void print( const Mat x, const string filename ) 28 | { 29 | ofstream myfile; 30 | myfile.open( filename ); 31 | myfile << x; 32 | myfile.close(); 33 | } 34 | 35 | inline void calc_gray_hist( const Mat& img, Mat& hist, int nbins=128 ) 36 | { 37 | // Gray histogram 38 | float _range_val_gry[] = {0,256}; 39 | const float* _range_gry = {_range_val_gry}; 40 | 41 | calcHist( &img, 1, 0, Mat(), hist, 1, &nbins, &_range_gry, true, false); 42 | normalize( hist, hist ); 43 | } 44 | 45 | inline void calc_color_hist( const Mat& img, Mat& hist, int nbins=128 ) 46 | { 47 | // Color histogram 48 | float _range_val_clr[] = {0,256}; 49 | const float* _range_clr = {_range_val_clr}; 50 | 51 | // CV_BGR2HSV, CV_BGR2HLS, CV_BGR2YCrCb 52 | Mat img_cvt; 53 | cvtColor( img, img_cvt, CV_BGR2HSV ); 54 | 55 | vector planes; 56 | split( img_cvt, planes ); 57 | 58 | if( !hist.data ) 59 | hist = Mat( 3*nbins, 1, CV_32F, Scalar(0,0,0)[0] ); 60 | 61 | Mat hist0( hist, Rect(0,0*nbins,1,nbins) ); 62 | Mat hist1( hist, Rect(0,1*nbins,1,nbins) ); 63 | Mat hist2( hist, Rect(0,2*nbins,1,nbins) ); 64 | 65 | calcHist( &planes[0], 1, 0, Mat(), hist0, 1, &nbins, &_range_clr, true, false); 66 | calcHist( &planes[1], 1, 0, Mat(), hist1, 1, &nbins, &_range_clr, true, false); 67 | calcHist( &planes[2], 1, 0, Mat(), hist2, 1, &nbins, &_range_clr, true, false); 68 | 69 | normalize( hist0, hist0 ); 70 | normalize( hist1, hist1 ); 71 | normalize( hist2, hist2 ); 72 | } 73 | 74 | 75 | inline void orientation( const Mat& Gx, const Mat& Gy, Mat& ori ) 76 | { 77 | double alpha = 180.0 / 3.14159265358979323846264338327950288419716939937510; 78 | 79 | ori = Mat( Gx.rows, Gx.cols, CV_32F, Scalar(0,0,0)[0] ); 80 | for(int r=0; r(r,c), Gx.at(r,c)) * alpha; 83 | deg = (deg>=0) ? deg : deg + 360.0; 84 | deg = (deg<180.0) ? deg : deg - 180.0; 85 | ori.at(r,c) = deg; 86 | } 87 | } 88 | } 89 | 90 | 91 | inline void calc_edge_hist( const Mat& Gx, const Mat& Gy, Mat& hist, int nbins_ori=16, int nbins_mag=16 ) 92 | { 93 | // Edge histogram 94 | float _range_val_ori[] = {0,180}; 95 | float _range_val_mag[] = {0,256}; 96 | const float* _range_ori = {_range_val_ori}; 97 | const float* _range_mag = {_range_val_mag}; 98 | 99 | Mat ori, mag; 100 | orientation( Gx, Gy, ori ); 101 | cv::magnitude( Gx, Gy, mag ); 102 | 103 | if( !hist.data ) 104 | hist = Mat( nbins_ori+nbins_mag, 1, CV_32F, Scalar(0,0,0)[0] ); 105 | 106 | Mat hist_ori( hist, Rect(0,0,1,nbins_ori) ); 107 | Mat hist_mag( hist, Rect(0,nbins_ori,1,nbins_mag) ); 108 | 109 | calcHist( &ori, 1, 0, Mat(), hist_ori, 1, &nbins_ori, &_range_ori, true, false ); 110 | calcHist( &mag, 1, 0, Mat(), hist_mag, 1, &nbins_mag, &_range_mag, true, false ); 111 | 112 | normalize( hist_ori, hist_ori ); 113 | normalize( hist_mag, hist_mag ); 114 | } 115 | 116 | inline void calc_edge_hist( const Mat& img_gray, Mat& hist, int nbins_ori=16, int nbins_mag=16 ) 117 | { 118 | Mat Gx, Gy; 119 | Scharr( img_gray, Gx, CV_32F, 1, 0 ); // ddepth, dx, dy 120 | Scharr( img_gray, Gy, CV_32F, 0, 1 ); 121 | 122 | calc_edge_hist( Gx, Gy, hist, nbins_ori, nbins_mag ); 123 | } 124 | 125 | 126 | inline void calc_pyr_gray_hist( const Mat& img, Mat& hist, int nbins=128, int level=2) 127 | { 128 | int w = img.cols; 129 | int h = img.rows; 130 | 131 | int npatches = 0; 132 | for(int i=0; i 21 | #include 22 | 23 | #if CV_MAJOR_VERSION > 2 24 | #include 25 | #endif 26 | 27 | #include "hecate/sort.hpp" 28 | 29 | using namespace std; 30 | using namespace cv; 31 | 32 | namespace hecate { 33 | 34 | class Segmenter { 35 | public: 36 | /* 37 | Automatic multiple change-point detection 38 | 39 | This function is a wrapper for multi-dimensional signal segmentation into at 40 | least k change-points by the LARS (function gflars), followed by change-point 41 | selection with a dynamic programming optimization (function dpseg). 42 | 43 | INPUT 44 | X : n-by-p matrix to be segmented (n: # obs, p: # dim). 45 | k : the number of change points to find in the first segmentation step 46 | theta : stopping criteria, see dpseg. (default=0.1) 47 | forcek : k is set as the hard maximum number of change points 48 | 49 | OUTPUT 50 | jumps : vector of change-points 51 | val : matrix of the value on each interval 52 | updown : the up/down statistics on each interval 53 | */ 54 | void gflseg(const Mat& X, vector& jumps, int k, double theta=0.1); 55 | 56 | 57 | 58 | /* 59 | Segmentation of a multi-dimensional signal with dynamic programming 60 | http://pbil.univ-lyon1.fr/members/fpicard/franckpicard_fichiers/pdf/aCGH-stat.pdf 61 | Improvement is made by penalizing with the optimal segment length, 62 | lambda=sqrt(lstar-l) where lstar = n/k (typically chosen as 2-second long) 63 | 64 | INPUT 65 | X : n-by-p matrix to be segmented (n: # obs, p: # dim). 66 | cp : candidate change points (default: [0:n-2]) 67 | kmax : maximum number of change-points to test (default: length(cp)) 68 | theta : stopping criteria. Typically chonse to be in the interval (0 0.5]. 69 | The smaller the threshold, the higher the tendency to keep more 70 | breakpoints. The criteria is based on the method found in 71 | 'Picard et al (2005)' "A statistical approach for array CGH data 72 | analysis" (BMC Bioinformatics). (default=0.5) 73 | 74 | OUTPUT 75 | jump : j-by-1 vector of change-point positions for the i-th lambda value 76 | (j depends on lambda). i varies between 1 and kmax 77 | rse : (kmax+1)-by-1 vector of residual squared error 78 | kbest : the number of selected change-points 79 | */ 80 | void dpseg(const Mat& X, const vector& cp, vector >& jumps, 81 | Mat& rse, int& kbest, double theta, int kmax=-1 ); 82 | 83 | 84 | private: 85 | /* 86 | Segmentation of a multi-dimensional signal with the group fused LARS. 87 | 88 | INPUT 89 | X : n-by-p matrix to be segmented (n: # obs, p: # dim). 90 | k : the number of change points to find. 91 | epsilon : values smaller than epsilon are considered null. 92 | weights : (n-1)*1 vector of weights for the weighted graph fused LASSO. 93 | 94 | OUTPUT 95 | lambda : estimated lambda values for each change-point 96 | jump : successive change-point positions (1 x k) 97 | meansignal : mean signal per column (1 x p vector) 98 | */ 99 | void gflars(const Mat& X, vector& jumps, vector& lambda, 100 | const int k, const double epsilon ); 101 | 102 | 103 | /* 104 | Fast computation of Y' * X 105 | 106 | Compute R = Y'*X, where Y is the n-by-(n-1) design matrix for the weighted 107 | group fused lasso, with weights defined by the vector w, and X is a n-by-p 108 | data matrix. The computation is done in O(np). 109 | 110 | INPUT 111 | X : n-by-p matrix 112 | w : (n-1)-by-1 vector of weights 113 | 114 | OUTPUT 115 | R : (n-1)-by-p matrix equal to W' * X 116 | */ 117 | void leftmultiplybyXt(const Mat& X, const Mat& w, Mat& R); 118 | 119 | 120 | /* 121 | Fast computation of inv(W'*W)*X 122 | 123 | Compute R = inv(W(:,ind)'*W(:,ind))*X, where W is the n-by-(n-1) design matrix 124 | 125 | INPUT 126 | X : a-by-p matrix 127 | ind : a-by-1 vector of indices between 1 and n-1, sorted in an ascending order 128 | w : (n-1)-by-1 vector of weights 129 | n : the size of X is n-by-(n-1) 130 | 131 | OUTPUT 132 | R : a-by-p matrix equal to inv(W'*W)*X 133 | */ 134 | void leftmultiplybyinvXAtXA(const Mat& X, const vector& ind, 135 | const Mat& w, const int n, Mat& R); 136 | 137 | 138 | /* 139 | Fast computation of W' * W * X when X is sparse 140 | 141 | Compute R = W'*W*X, where X is a wor-sparse (n-1)-by-p matrix and W is the 142 | n-by-(n-1) design matrix 143 | 144 | INPUT 145 | X : a-by-p matrix whose rows are the non-zero rows of the original X 146 | (same order as ind) 147 | ind : a-by-1 vector of indices of the non-zero rows of X (each in [1,n-1]) 148 | w : (n-1)-by-1 vector of weights 149 | n : size of the problem 150 | 151 | OUTPUT 152 | R : (n-1)-by-p matrix equal to W'*W*X 153 | */ 154 | void multiplyXtXbysparse(const Mat& X, const vector& ind, 155 | const Mat& w, const int n, Mat& R); 156 | 157 | 158 | /* UTILITY FUNCTIONS */ 159 | 160 | // dst = src(I) 161 | inline void get_subvector(const Mat& src, Mat& dst, const vector& I) 162 | { 163 | dst = Mat( I.size(), 1, src.type() ); 164 | for( size_t i=0; i(i) = src.at(I[i]); 166 | }; 167 | 168 | // dst = src(I,:) 169 | inline void get_submatrix_row(const Mat& src, Mat& dst, const vector& I) 170 | { 171 | dst = Mat( I.size(), src.cols, src.type() ); 172 | for( size_t i=0; i(c) = cv::sum(src.col(c).mul(src.col(c)))[0]; 194 | } 195 | else { 196 | dst = Mat( src.rows, 1, src.type() ); 197 | for(int r=0; r(r) = cv::sum(src.row(r).mul(src.row(r)))[0]; 199 | } 200 | } 201 | 202 | inline void check_nan(vector& v) { 203 | for( size_t i=0; i(r,c)); 216 | else 217 | printf("%d ", X.at(r,c)); 218 | } 219 | printf("\n"); 220 | } 221 | printf("];\n\n"); 222 | } 223 | 224 | inline void print(vector v, string name) 225 | { 226 | cout << name << "_c=["; 227 | for(size_t i=0; i v, string name) 233 | { 234 | cout << name << "_c=["; 235 | for(size_t i=0; i 15 | #include 16 | #include 17 | #include 18 | #include // mkdir 19 | #include // access() function 20 | 21 | #include // display date and time 22 | #include // for standard I/O 23 | #include // for file I/O 24 | #include // for strings 25 | #include // for controlling float print precision 26 | #include // string to number conversion 27 | #include 28 | #include 29 | #include 30 | 31 | // OpenMP library 32 | #if defined(_OPENMP) 33 | #include 34 | #endif 35 | 36 | // Hecate headers 37 | #include "hecate/sort.hpp" 38 | #include "hecate/time.hpp" 39 | #include "hecate/gapstat.hpp" 40 | #include "hecate/knapsack.hpp" 41 | #include "hecate/cc_parser.hpp" 42 | #include "hecate/file_helper.hpp" 43 | #include "hecate/video_parser.hpp" 44 | #include "hecate/ffmpeg_helper.hpp" 45 | 46 | // OpenCV library 47 | #include 48 | #include 49 | 50 | using namespace std; 51 | 52 | // program options 53 | struct hecate_params { 54 | string in_video; 55 | string out_dir; 56 | string caption; 57 | int step_sz; // frame subsampling step size 58 | int njpg; // number of thumbnail images 59 | int ngif; // number of GIFs 60 | int lmov; // length of video summary (in seconds) 61 | int gif_fps; // gif play speed 62 | int jpg_width_px; // thumbnail image width 63 | int gif_width_px; // animated GIF width 64 | int mov_width_px; // summary video width 65 | int max_duration; // maximum length of video to process (in seconds) 66 | double fltr_begin_sec;// always filter out x-second frames at the beginning 67 | double fltr_end_sec; // always filter out x-second frames at the end 68 | double invalid_wnd; // window for dropping neighbor frames of low-quality ones 69 | bool jpg; // generate thumbnail jpg 70 | bool gif; // generate summary gif 71 | bool mov; // generate summary movie 72 | bool gifsum; // if enabled, combine individual GIFs into one summary 73 | bool gifall; // generate all possible gifs (for debugging purpose) 74 | bool info_shot; // print shot boundary info 75 | bool info_keyfrm; // print key frame indices 76 | bool prefer_dynamic; // if enabled, prefer dynamic scene in highlight 77 | bool gfl; // run group-fused lasso as part of shot segmentation 78 | bool fade; // disable fade-in/out during shot transition 79 | bool debug; 80 | bool display; 81 | 82 | hecate_params(): 83 | out_dir("./output"), 84 | caption(""), 85 | step_sz(1), 86 | njpg(5), 87 | ngif(5), 88 | lmov(15), 89 | gif_fps(8), 90 | jpg_width_px(360), 91 | gif_width_px(360), 92 | mov_width_px(360), 93 | max_duration(-1), 94 | fltr_begin_sec(-1.0), 95 | fltr_end_sec(-1.0), 96 | invalid_wnd(0.15), 97 | jpg(false), 98 | gif(false), 99 | mov(false), 100 | gifsum(false), 101 | gifall(false), 102 | info_shot(false), 103 | info_keyfrm(false), 104 | prefer_dynamic(true), 105 | gfl(false), 106 | fade(false), 107 | debug(false), 108 | display(false) 109 | {}; 110 | }; 111 | 112 | inline void hecate_parse_params(int argc, char** argv, hecate_params& opt) 113 | { 114 | static struct option long_options[] = { 115 | {"in_video", required_argument, 0, 'i'}, 116 | {"out_dir", required_argument, 0, 'o'}, 117 | {"step", required_argument, 0, 's'}, 118 | {"njpg", required_argument, 0, 'n'}, 119 | {"ngif", required_argument, 0, 'q'}, 120 | {"lmov", required_argument, 0, 'l'}, 121 | {"gif_fps", required_argument, 0, 'f'}, 122 | {"jpg_width_px", required_argument, 0, 'u'}, 123 | {"gif_width_px", required_argument, 0, 'v'}, 124 | {"mov_width_px", required_argument, 0, 'w'}, 125 | {"max_duration", required_argument, 0, 'd'}, 126 | {"fltr_begin_sec", required_argument, 0, 'a'}, 127 | {"fltr_end_sec", required_argument, 0, 'b'}, 128 | {"invalid_wnd", required_argument, 0, 'k'}, 129 | {"generate_jpg", no_argument, 0, 'J'}, 130 | {"generate_gif", no_argument, 0, 'G'}, 131 | {"generate_mov", no_argument, 0, 'M'}, 132 | {"optimize_gif", no_argument, 0, 'O'}, 133 | {"generate_gifsum", no_argument, 0, 'S'}, 134 | {"generate_gifall", no_argument, 0, 'A'}, 135 | {"print_shot_info", no_argument, 0, 'T'}, 136 | {"print_keyfrm_info", no_argument, 0, 'K'}, 137 | {"prefer_dynamic", no_argument, 0, 'V'}, 138 | {"gfl", no_argument, 0, 'B'}, 139 | {"fade", no_argument, 0, 'F'}, 140 | {"debug", no_argument, 0, 'D'}, 141 | {"display", no_argument, 0, 'C'}, 142 | {0,0,0,0} 143 | }; 144 | 145 | while( true ) { 146 | int opt_idx=0; 147 | int c = getopt_long( argc, argv, 148 | "0:1:2:3:4:5:6:7:8:9:" 149 | "a:b:c:d:e:f:g:h:i:j:k:l:m:n:o:p:q:r:s:t:u:v:w:x:y:z:" 150 | "A:B:C:D:E:F:G:H:I:J:K:L:M:N:O:P:Q:R:S:T:U:V:W:X:Y:Z", 151 | long_options, &opt_idx); 152 | if( c==-1 ) break; 153 | switch( c ) { 154 | case 'i': opt.in_video = optarg; break; 155 | case 'o': opt.out_dir = optarg; break; 156 | case 's': opt.step_sz = atoi(optarg); break; 157 | case 'n': opt.njpg = atoi(optarg); break; 158 | case 'q': opt.ngif = atoi(optarg); break; 159 | case 'l': opt.lmov = atoi(optarg); break; 160 | case 'f': opt.gif_fps = atoi(optarg); break; 161 | case 'u': opt.jpg_width_px = atoi(optarg); break; 162 | case 'v': opt.gif_width_px = atoi(optarg); break; 163 | case 'w': opt.mov_width_px = atoi(optarg); break; 164 | case 'd': opt.max_duration = atof(optarg); break; 165 | case 'a': opt.fltr_begin_sec = atof(optarg); break; 166 | case 'b': opt.fltr_end_sec = atof(optarg); break; 167 | case 'k': opt.invalid_wnd = atof(optarg); break; 168 | case 'J': opt.jpg = true; break; 169 | case 'G': opt.gif = true; break; 170 | case 'M': opt.mov = true; break; 171 | case 'S': opt.gifsum = opt.gif = true; break; 172 | case 'A': opt.gifall = opt.gif = true; break; 173 | case 'T': opt.info_shot = true; break; 174 | case 'K': opt.info_keyfrm = true; break; 175 | case 'V': opt.prefer_dynamic = true; break; 176 | case 'B': opt.gfl = true; break; 177 | case 'F': opt.fade = true; break; 178 | case 'D': opt.debug = true; break; 179 | case 'C': opt.display = true; break; 180 | } 181 | } 182 | 183 | // Create output dir (silently fails if dir already exists) 184 | mkdir( opt.out_dir.c_str(), S_IRWXU | S_IRWXG | S_IROTH | S_IXOTH ); 185 | } 186 | 187 | 188 | 189 | inline void hecate_copyright() 190 | { 191 | printf("\n"); 192 | printf("-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=\n"); 193 | printf(" HECATE Copyright 2016 Yahoo Inc.\n"); 194 | printf(" Licensed under the terms of the Apache 2.0 License.\n"); 195 | printf(" Developed by : Yale Song (yalesong@yahoo-inc.com)\n"); 196 | printf(" Built on : %s %s\n", __TIME__, __DATE__ ); 197 | printf("-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=\n"); 198 | } 199 | 200 | inline void hecate_usage() 201 | { 202 | hecate_params opt; 203 | printf("USAGE: hecate -i infile [options]\n"); 204 | printf("\n"); 205 | printf(" -i --in_video (string) Input video file\n"); 206 | printf(" -o --out_dir (string) Output directory (%s)\n", opt.out_dir.c_str()); 207 | printf(" -s --step (int) Frame subsampling step size (%d)\n", opt.step_sz); 208 | printf(" -n --njpg (int) Number of thumbnails to be generated (%d)\n", opt.njpg); 209 | printf(" -q --ngif (int) Number of GIFs to be generated (%d)\n", opt.ngif); 210 | printf(" -r --lmov (int) Length of video summary to be generated (in seconds) (%d)\n", opt.lmov); 211 | printf(" -u --jpg_width_px (int) Pixel width of thumbnail images (%d)\n", opt.jpg_width_px); 212 | printf(" -v --gif_width_px (int) Pixel width of animated GIFs (%d)\n", opt.gif_width_px); 213 | printf(" -w --mov_width_px (int) Pixel width of summary video (%d)\n", opt.mov_width_px); 214 | printf(" --generate_jpg Generate thumbnail images\n"); 215 | printf(" --generate_gif Generate animated GIFs\n"); 216 | printf(" --generate_mov Generate a summary video\n"); 217 | printf(" --generate_gifsum Generate animated GIFs summary\n"); 218 | printf(" --generate_gifall Generate all possible animated GIFs\n"); 219 | printf(" --print_shot_info Print valid shot ranges\n"); 220 | printf(" --print_keyfrm_info Print keyframe indices\n"); 221 | 222 | exit(-1); 223 | } 224 | 225 | 226 | 227 | /******************************************************************************* 228 | 229 | HECTATE INTERFACE 230 | 231 | INPUT: 232 | hecate_params opt : input option paramaters 233 | 234 | OUTPUT: 235 | vector v_thumb_idx : thumbnail frame indices 236 | vector v_gif_range : highlight shot ranges for GIF creation 237 | vector v_mov_range : highlight shot ranges for video summarization 238 | 239 | *******************************************************************************/ 240 | void run_hecate( hecate_params& opt, 241 | vector& v_thumb_idx, 242 | vector& v_gif_range, 243 | vector& v_mov_range ); 244 | 245 | inline void run_hecate( hecate_params& opt, vector& v_thumb_idx) { 246 | vector v_gif_range, v_mov_range; 247 | run_hecate( opt, v_thumb_idx, v_gif_range, v_mov_range ); 248 | } 249 | 250 | inline void run_hecate( hecate_params& opt, vector& v_range) { 251 | vector v_thumb_idx; 252 | vector v_xxx_range; 253 | 254 | if( opt.gif ) 255 | run_hecate( opt, v_thumb_idx, v_range, v_xxx_range ); 256 | else if( opt.mov ) 257 | run_hecate( opt, v_thumb_idx, v_xxx_range, v_range ); 258 | } 259 | 260 | 261 | /******************************************************************************* 262 | 263 | THUMBNAIL IMAGE GENERATION MODULE 264 | 265 | INPUT: 266 | hecate_params opt : input option paramaters 267 | video_metadata meta : video metadata 268 | vector v_shot_range 269 | : shot boundaries 270 | const Mat& X : input features 271 | const Mat& diff : n-by-1 vector of frame-by-frame difference scores 272 | 273 | OUTPUT: 274 | vector v_thumb_idx : vector of frame index numbers for thumbnails 275 | 276 | *******************************************************************************/ 277 | 278 | void detect_thumbnail_frames( hecate_params& opt, 279 | hecate::video_metadata& meta, 280 | const vector& v_shot_range, 281 | const Mat& X, 282 | const Mat& diff, 283 | vector& v_thumb_idx); 284 | 285 | void generate_thumbnails( hecate_params& opt, vector& v_thumb_idx ); 286 | 287 | 288 | 289 | 290 | /******************************************************************************* 291 | 292 | VIDEO SUMMARIZATION MODULE 293 | 294 | INPUT: 295 | hecate_params opt : input option paramaters 296 | video_metadata meta : video metadata 297 | vector v_shot_range 298 | : shot boundaries 299 | const Mat& X : input features 300 | const Mat& diff : n-by-1 vector of frame-by-frame difference scores 301 | 302 | OUTPUT: 303 | vector v_highlight_range 304 | : vector of highlight shot ranges 305 | 306 | *******************************************************************************/ 307 | 308 | void detect_highlight_shots( hecate_params& opt, 309 | hecate::video_metadata& meta, 310 | const vector& v_shot_range, 311 | const Mat& X, 312 | const Mat& diff, 313 | vector& v_highlight_range ); 314 | 315 | void generate_highlight_clips( hecate_params& opt, 316 | vector& v_highlight_range ); 317 | 318 | 319 | //////////////////////////////////////////////////////////////////////////////// 320 | // 321 | // VARIOUS HELPER FUNCTIONS 322 | // 323 | //////////////////////////////////////////////////////////////////////////////// 324 | 325 | 326 | inline void mark_invalid( vector& vec, int idx, int wnd_sz=0 ) 327 | { 328 | int vec_len = (int)vec.size(); 329 | for(int i=max(0,idx-wnd_sz); i<=min(vec_len-1,idx+wnd_sz); i++) 330 | vec[i] = false; 331 | } 332 | 333 | inline void expand_invalid_frms( vector& valid, int k ) 334 | { 335 | vector valid_new = valid; 336 | 337 | int nfrm = (int) valid.size(); 338 | for( int pos=1; pos 15 | #include 16 | #include 17 | #include 18 | #include 19 | 20 | #include 21 | #include 22 | 23 | #include 24 | 25 | using namespace cv; 26 | using namespace std; 27 | 28 | namespace hecate { 29 | 30 | 31 | #define VALIDATE(x) ( std::isnan(x) ) ? 0 : x 32 | 33 | /*------------------------------------------------------------------------ 34 | Contrast is measured as the standard deviation of the pixel intensities. 35 | RMS contrast does not depend on the angular frequency content or the 36 | spatial distribution of contrast in the image 37 | https://en.wikipedia.org/wiki/Contrast_(vision)#RMS_contrast 38 | ------------------------------------------------------------------------*/ 39 | inline double calc_rms_contrast( const Mat& gray_img ) 40 | /*-----------------------------------------------------------------------*/ 41 | 42 | { 43 | Mat I; 44 | gray_img.convertTo( I, CV_32FC1 ); 45 | Scalar mu = cv::mean( I ); 46 | I = I - mu[0]; 47 | return VALIDATE(cv::norm(I)/sqrt(I.rows*I.cols)); 48 | } 49 | 50 | /*------------------------------------------------------------------------ 51 | Sharpness is measured as the sum of magnitude in frequency domain 52 | ------------------------------------------------------------------------*/ 53 | inline double calc_sharpness( const Mat& gray_img ) 54 | /*-----------------------------------------------------------------------*/ 55 | { 56 | Mat img; 57 | gray_img.convertTo( img, CV_32FC1 ); 58 | img *= 1./255; 59 | 60 | Mat dx, dy; 61 | Sobel( img, dx, img.type(), 1, 0, 3 ); 62 | Sobel( img, dy, img.type(), 0, 1, 3 ); 63 | magnitude( dx, dy, dx ); 64 | 65 | int npixels = gray_img.rows * gray_img.cols; 66 | return VALIDATE(cv::sum(dx)[0] / npixels); 67 | } 68 | 69 | 70 | /*------------------------------------------------------------------------ 71 | Brightness is measured as the relative luminance in colorimetric spaces 72 | https://en.wikipedia.org/wiki/Relative_luminance 73 | ------------------------------------------------------------------------*/ 74 | inline double calc_brightness( const Mat& img ) 75 | /*-----------------------------------------------------------------------*/ 76 | { 77 | vector bgr; 78 | split( img, bgr ); 79 | for(size_t j=0; j(j); 107 | } 108 | return VALIDATE(val); 109 | } 110 | 111 | /*------------------------------------------------------------------------ 112 | Symmetry is measured as the difference between edge orientation & magnitude 113 | histograms of the left and right halves. 114 | ------------------------------------------------------------------------*/ 115 | inline double calc_asymmetry( const Mat& gray_img ) 116 | /*-----------------------------------------------------------------------*/ 117 | { 118 | Mat img_l( gray_img, Rect(0,0,gray_img.cols/2,gray_img.rows) ); 119 | Mat img_r( gray_img, Rect(gray_img.cols/2,0,gray_img.cols/2,gray_img.rows) ); 120 | 121 | Mat hist_l, hist_r; 122 | hecate::calc_edge_hist( img_l, hist_l ); 123 | hecate::calc_edge_hist( img_r, hist_r ); 124 | 125 | return VALIDATE((double)cv::norm( hist_l-hist_r )); 126 | } 127 | 128 | /*------------------------------------------------------------------------ 129 | Third Saliency is measured as the residual saliency on the resized image, 130 | then collects the average saliency for each sub-window. It's an indicator 131 | of the informativeness of each window, but also of the general distribution 132 | of the content in the image. 133 | ------------------------------------------------------------------------*/ 134 | inline vector calc_third_saliency( const Mat& gray_img ) 135 | { 136 | const int w = 3; // divide 128-by-128 image into 3-by-3 patches 137 | 138 | // 0-1 normalize & resize to 128-by-128 139 | Mat img; 140 | gray_img.convertTo( img, CV_32FC1 ); img *= 1./255; 141 | cv::resize( img, img, cv::Size(128,128), 0, 0, cv::INTER_AREA ); 142 | 143 | //real is img image, im is zero 144 | Mat planes[] = {Mat_(img), Mat::zeros(img.size(),CV_32FC1)}; 145 | Mat planes2[] = {Mat::zeros(img.size(),CV_32FC1), Mat::zeros(img.size(),CV_32FC1)}; 146 | Mat planesr[] = {Mat::zeros(img.size(),CV_32FC1), Mat::zeros(img.size(),CV_32FC1)}; 147 | 148 | Mat complexI, magI, phasem; 149 | merge( planes, 2, complexI ); // Merges for FT 150 | dft( complexI, complexI ); //Forward DFT 151 | split( complexI, planes ); // planes[0] = Re(DFT(I), planes[1] = Im(DFT(I)) 152 | magnitude( planes[0], planes[1], magI ); //DFT magnitude (amplitude) 153 | phase( planes[0], planes[1], phasem ); //DFT phase 154 | 155 | //log spectral amplitude 156 | cv::log( magI, magI ); 157 | 158 | Mat smooth, kernel; 159 | smooth = Mat::zeros( img.size(), CV_32FC1 ); 160 | kernel = Mat::zeros( Size(10,10), CV_32FC1 ); 161 | kernel += Scalar::all( 0.01 ); //kernel for average filtering ( 0.01=1/(10*10) ) 162 | 163 | //smoothed spectrum with average filtering 164 | cv::filter2D( magI, smooth, CV_32FC1, kernel, Point(-1,-1), 0, BORDER_REPLICATE ); 165 | 166 | Mat diff; 167 | cv::subtract( magI, smooth, diff ); //spectral residual (log domain) 168 | cv::exp( diff, diff ); //spectral residual (real domain) 169 | 170 | //recover real and im part of the DFT after the residual 171 | polarToCart( diff, phasem, planes2[0], planes2[1] ); 172 | 173 | //invert the DFT (we are back in the pixel domain! We have just created a Saliency Component) 174 | Mat result; 175 | merge( planes2, 2, complexI ); 176 | dft( complexI, result, DFT_INVERSE+DFT_SCALE ); 177 | split( result, planesr ); // planes[0] = Re(DFT(I), planes[1] = Im(DFT(I)) 178 | magnitude( planesr[0], planesr[1], result ); // sqrt(Re(DFT(I))^2 + Im(DFT(I))^2) 179 | 180 | normalize( result, result, 0, 2, cv::NORM_MINMAX ); 181 | 182 | //compute the window dimensions 183 | float cx = result.cols/w; 184 | float cy = result.rows/w; 185 | vector features; 186 | 187 | //local mean for each window 188 | for( int a=0; a(i)!=0 ) 214 | val -= ghist.at(i) * log2(ghist.at(i)); 215 | } 216 | return VALIDATE(val); 217 | } 218 | 219 | 220 | /*------------------------------------------------------------------------ 221 | Contrast balance is measured as the l2 distance between an original 222 | image and contrast-equalized image. 223 | ------------------------------------------------------------------------*/ 224 | inline double calc_contrast_balance( const Mat& img ) 225 | /*-----------------------------------------------------------------------*/ 226 | { 227 | double res = 0; 228 | 229 | vector hsv_planes; 230 | split( img, hsv_planes ); 231 | 232 | //for each channel 233 | for (int pl=0; pl < 3; ++pl) 234 | { 235 | //equalize the histogram 236 | Mat img_hist_equalized; 237 | equalizeHist( hsv_planes[pl], img_hist_equalized ); 238 | 239 | //compute the diff between original and equalized histograms 240 | res += cv::norm( hsv_planes[pl], img_hist_equalized ) / 241 | (double)(img.rows*img.cols); 242 | } 243 | 244 | //the larger the difference, the larger the contrast error. so we negate 245 | return VALIDATE(-res); 246 | } 247 | 248 | 249 | /*------------------------------------------------------------------------ 250 | Exposure balance is measured as an absolute value of the skewness 251 | of the luminance histogram. 252 | ------------------------------------------------------------------------*/ 253 | inline double calc_exposure_balance( const Mat& img ) 254 | /*-----------------------------------------------------------------------*/ 255 | { 256 | vector hsv_planes; 257 | split( img, hsv_planes ); 258 | 259 | /// Establish the number of bins 260 | const int histSize = 256; 261 | 262 | /// Set the ranges (for B,G,R) 263 | float range[] = { 0, 256 }; 264 | const float *histRange = { range }; 265 | 266 | cv::Mat hist; 267 | double res = 0; 268 | for (int pl=0; pl < 3; ++pl) 269 | { 270 | /// Compute the histograms: 271 | cv::calcHist( &hsv_planes[pl], 1, 0, cv::Mat(), hist, 1, &histSize, &histRange, 272 | /*uniform: */true, /*accumulate: */false ); 273 | cv::Scalar sump = cv::sum( hist ); 274 | 275 | double mean = (double) sump[0] / hist.rows; 276 | 277 | cv::Mat prod; 278 | cv::multiply( (hist-mean), (hist-mean), prod); 279 | cv::Scalar sums = cv::sum( prod ); 280 | 281 | cv::multiply( prod, (hist-mean), prod); 282 | cv::Scalar sumskew = cv::sum( prod ); 283 | 284 | double result[2]; 285 | result[0] = sqrt( (double) sums[0] / hist.rows ); 286 | result[1] = (double) sumskew[0] / (hist.rows * result[0]*result[0]*result[0]); 287 | 288 | //absolute value of the skewness gives an idea of how much 289 | //the histogram of colors is incorrect 290 | res += std::abs( result[1] ); 291 | } 292 | 293 | // the larger the skew, the larger the exposure error 294 | return VALIDATE(-res / 3); 295 | } 296 | 297 | /*------------------------------------------------------------------------ 298 | JPEG quality is measured using the no-reference quality estimation of 299 | Z. Wang, H. R. Sheikh, and A. C. Bovik. No-reference perceptual quality 300 | assessment of jpeg compressed images. In ICIP, 2002. 301 | ------------------------------------------------------------------------*/ 302 | inline double calc_jpeg_quality( const Mat& gray_img ) 303 | /*-----------------------------------------------------------------------*/ 304 | { 305 | // Convert to grayscale image with 64bit doubles 306 | cv::Mat img; 307 | gray_img.convertTo( img, CV_32FC1 ); 308 | 309 | const int m = img.rows; 310 | const int n = img.cols; 311 | 312 | if (m<16 || n<16) 313 | { 314 | return -2.0; 315 | } 316 | 317 | // feature extraction: horizontal features 318 | Mat dh, dh1, dh2; 319 | dh1 = img( Rect(1, 0, n-1, m) ); 320 | dh2 = img( Rect(0, 0, n-1, m) ); 321 | dh = dh1 - dh2; 322 | 323 | double sum = 0; 324 | int count = 0; 325 | double sumz = 0; 326 | for (int i=0; i < m; ++i) { 327 | for (int j=0; j < n-2; ++j) { 328 | if ((j+1)%8==0 && j>0 && (j+1)<8*floor(n/8)) 329 | { 330 | sum += std::abs( dh.at(i,j) ); 331 | count++; 332 | } 333 | double signval = copysign( 1.0, dh.at(i,j) ) * 334 | copysign( 1.0, dh.at(i,j+1) ); 335 | if (signval < 0) 336 | sumz += 1; 337 | } 338 | } 339 | 340 | double bh = sum / count; 341 | double ah = (8.0 * mean( cv::abs(dh) )[0] - bh) / 7; 342 | double zh = sumz / (m * (n-2)); 343 | 344 | // feature extraction: vertical features 345 | Mat dv1, dv2, dv; 346 | dv1 = img( Rect(0, 1, n, m-1) ); 347 | dv2 = img( Rect(0, 0, n, m-1) ); 348 | dv = dv1 - dv2; 349 | 350 | sum = 0; 351 | count = 0; 352 | sumz = 0; 353 | for (int i=0; i < m-2; ++i) { 354 | for (int j=0; j < n; ++j) { 355 | if ((i+1)%8==0 && i>0 && (i+1)<8*floor(m/8)) 356 | { 357 | sum += std::abs( dv.at(i,j) ); 358 | count++; 359 | } 360 | double signval = copysign( 1.0, dv.at(i,j) ) * 361 | copysign( 1.0, dv.at(i,j+1) ); 362 | if (signval < 0) 363 | sumz += 1; 364 | } 365 | } 366 | 367 | double bv = sum / count; 368 | double av = (8.0 * mean( cv::abs(dv) )[0] - bv) / 7; 369 | double zv = sumz / (n * (m-2)); 370 | 371 | //combined features 372 | double B = (bh + bv) / 2; 373 | double A = (ah + av) / 2; 374 | double Z = (zh + zv) / 2; 375 | 376 | // Quality Prediction 377 | const double alpha = -245.8909; 378 | const double beta = 261.9373; 379 | const double gamma1 = -239.8886; 380 | const double gamma2 = 160.1664; 381 | const double gamma3 = 64.2859; 382 | 383 | double score = alpha + beta * 384 | std::pow( B, (gamma1/10000) ) * 385 | std::pow( A, (gamma2/10000) ) * 386 | std::pow( Z, (gamma3/10000) ); 387 | 388 | return VALIDATE(score); 389 | } 390 | 391 | } 392 | #endif 393 | -------------------------------------------------------------------------------- /src/hecate/gflseg.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * GFLSEG Group Fused LASSO Change Point Detection solver 3 | * 4 | * Copyright 2016 Yahoo Inc. 5 | * Licensed under the terms of the Apache 2.0 License. 6 | * See LICENSE file in the project root for terms. 7 | * 8 | * Developer: Yale Song (yalesong@yahoo-inc.com) 9 | * 10 | * Implementation is based on: 11 | * Kevin Bleakley and Jean-Philippe Vert. 12 | * "The group fused lasso for multiple change-point detection." 13 | * arXiv preprint arXiv:1106.4199 (2011). 14 | */ 15 | 16 | #include "hecate/gflseg.hpp" 17 | 18 | using namespace std; 19 | using namespace cv; 20 | using namespace hecate; 21 | 22 | void Segmenter::gflseg(const Mat& X, vector& jumps, int k, double theta) 23 | { 24 | double epsilon = 1e-9; 25 | 26 | // group fused LASSO solver 27 | vector jmp1; 28 | vector lambda; 29 | gflars( X, jmp1, lambda, k, epsilon ); 30 | 31 | // DP optimizer 32 | vector > jmp2; 33 | Mat rse; 34 | int kbest = 0; 35 | dpseg( X, jmp1, jmp2, rse, kbest, theta, -1 ); 36 | 37 | // return the optimal solution 38 | jumps.resize( (int)jmp2[kbest].size(), 0 ); 39 | for( size_t i=0; i& cp, vector >& jumps, 46 | Mat& rse, int& kbest, double theta, int kmax ) 47 | { 48 | int n = X.rows; // the length of the signal X 49 | int p = X.cols; // the dimension of the signal X 50 | 51 | // Prevent the likelihood from diverging too much 52 | if( kmax<0 ) 53 | kmax = min((int)cp.size(), (int)floor(n/10)); 54 | 55 | vector::iterator it; 56 | vector cp_srt = cp; 57 | std::sort( cp_srt.begin(), cp_srt.end() ); 58 | 59 | // Compute boundaries of the smallest intervals considered 60 | // b = sort(union([0],union([n],option.candidatechangepoints))); 61 | vector b(2+cp.size()); 62 | vector edge = {-1, n-1}; 63 | it = set_union( edge.begin(), edge.end(), cp_srt.begin(), cp_srt.end(), b.begin() ); 64 | b.resize( it-b.begin() ); 65 | std::sort( b.begin(), b.end() ); 66 | 67 | 68 | // k is the number of such intervals 69 | int k = (int)b.size()-1; 70 | 71 | 72 | // Compute the k-by-k matrix J such that J(i,j) for i<=j is the RSE when 73 | // intervals i-to-j are merged. 74 | // 75 | // According to Picard, 76 | // "J(i,j) is the cost of the path connecting i-to-j in k steps (k segments)" 77 | // http://pbil.univ-lyon1.fr/members/fpicard/franckpicard_fichiers/pdf/aCGH-stat.pdf 78 | Mat J( k, k, X.type(), Scalar(0,0,0)[0] ); 79 | Mat S( n+1, p, X.type(), Scalar(0,0,0)[0] ); // cumsum of the rows of X 80 | Mat v( n+1, 1, X.type(), Scalar(0,0,0)[0] ); // cumsum of squared norm of the rows of X 81 | 82 | // S = [zeros(1,size(X,2));cumsum(X)]; % cumsum of the rows 83 | Mat Ssub = S( Rect(0,1,p,n) ); cumsum( X, Ssub ); 84 | 85 | // v = [0;cumsum(sum(X.*X,2))]; % cumsum of squared norm of the rows 86 | Mat vsub = v( Rect(0,1,1,n) ); 87 | cv::reduce( X.mul(X), vsub, 1, CV_REDUCE_SUM ); 88 | cumsum( vsub, vsub ); 89 | 90 | for( int i=0; i(i,j) = v.at(Iend+1)-v.at(Istart)-penalty; 97 | } 98 | } 99 | 100 | // 101 | // DP recursion 102 | // 103 | 104 | // V(i,j) is the best RSE for segmenting intervals 1 to j with at most i-1 change-points 105 | Mat V( kmax+1, k, X.type(), Scalar(0,0,0)[0] ); 106 | Mat jumpmat( kmax, k, CV_32S, Scalar(-1,0,0)[0] ); 107 | 108 | // With no change-points, V(1,j) is just the precomputed RSE for intervals 1 to j 109 | J.row(0).copyTo( V.row(0) ); 110 | 111 | // The recursive formula 112 | // NOTE: minidx/maxidx in minMaxIdx are very, very confusing to use! 113 | int minidx[2]; double minval; 114 | for( int ki=0; ki(ki+1,j) = minval; 119 | jumpmat.at(ki,j) = minidx[1]+ki; 120 | } 121 | } 122 | 123 | 124 | // Optimal segmentations 125 | for( int ki=0; ki jump_ki(ki+1, 0); 128 | jump_ki[ki] = jumpmat.at( ki,k-1 ); 129 | for( int i=ki-1; i>=0; i-- ) 130 | jump_ki[i] = jumpmat.at(i,jump_ki[i+1]); 131 | jumps.push_back( jump_ki ); 132 | } 133 | 134 | 135 | // Convert interval index back to the last position before the jump 136 | for( int ki=0; ki> JJtild = (JJ(Km)-JJ) / (JJ(Km)-JJ(1)) * (Km-1) + 1; 154 | int Km = JJ.rows; 155 | Mat JJtild = (JJ.at(Km-1)-JJ) / (JJ.at(Km-1)-JJ.at(0)) * (Km-1) + 1; 156 | 157 | 158 | // find the inflexion point 159 | // >> res.kbest = max(find(diff(diff(Jtild))>option.threshold))+1; 160 | Mat dJJtild(Km-1,1,X.type(),Scalar(0,0,0)[0]); 161 | Mat ddJJtild(Km-2,1,X.type(),Scalar(0,0,0)[0]); 162 | for(int i=0; i(i) = JJtild.at(i+1) - JJtild.at(i); 164 | for(int i=0; i(i) = dJJtild.at(i+1) - dJJtild.at(i); 166 | 167 | kbest = 0; 168 | for( int i=0; i(i)>theta && i>kbest ) 170 | kbest = i; 171 | }; 172 | 173 | 174 | void Segmenter::gflars(const Mat& X, vector& jumps, 175 | vector& lambda, const int k, const double epsilon) 176 | { 177 | int n = X.rows; // the length of the signal X 178 | int p = X.cols; // the dimension of the signal X 179 | 180 | jumps.clear(); 181 | lambda.clear(); 182 | 183 | // Default weight w(i) = sqrt(n/(i*(n-i))) 184 | Mat weights( n-1, 1, X.type() ); 185 | for( int i=0; i(i)= sqrt((double)n/((i+1)*(n-i-1))); 187 | 188 | // Auxilary variable to use MATLAB-like sort function. The variable vjmps 189 | // should always be synced with vector jumps. 190 | vector vsrtval; // contains sorted values 191 | vector vsrtidx; // contains sorted indices 192 | 193 | // 194 | // Initialize cHat = W'*X 195 | Mat cHat; 196 | leftmultiplybyXt(X,weights,cHat); 197 | 198 | vector A; // Active set indices (sorted in an ascending order) 199 | Mat cHatSub; // Used to access cHat(A,:) 200 | 201 | // 202 | // Main loop to find the successive jumps 203 | for( int iter=0; iter> cHatSquareNorm = sum(cHat.^2,2); 207 | Mat cHatSquareNorm; 208 | norm2sq( cHat, cHatSquareNorm, 1 ); // 0:col, 1:row-wise 209 | 210 | // >> [bigcHat,besti]=max(cHatSquareNorm); 211 | int besti[2]; 212 | double bigcHat; 213 | minMaxIdx( cHatSquareNorm, 0, &bigcHat, 0, besti ); // col matrix 214 | 215 | // 216 | // In the first iteration, we add the most correlated feature to the 217 | // active set. For the other iterations, this is already done at the 218 | // end of the previous iteration 219 | if( iter==0 ) { 220 | jumps.push_back( besti[0] ); 221 | } 222 | 223 | // Resize active set vector and cHatSub matrix 224 | A.resize( iter+1, 0 ); 225 | cHatSub = Mat( iter+1, p, X.type(), Scalar(0,0,0)[0] ); 226 | 227 | 228 | // 229 | // Compute the descent direction W = inv(X(:,A)'*X(:,A))*cHat(A,:) 230 | Mat W; // size of (iter+1)-by-p 231 | 232 | // >> [A,I]=sort(res.jump(1:iter)); 233 | hecate::sort( jumps, A, vsrtidx ); 234 | get_submatrix_row( cHat, cHatSub, A ); 235 | 236 | // >> w = leftmultiplybyinvXAtXA(n,A,cHat(A,:),weights); 237 | leftmultiplybyinvXAtXA(cHatSub,A,weights,n,W); 238 | 239 | // >> B = multiplyXtXbysparse(n,A,w,weights); 240 | Mat B; 241 | multiplyXtXbysparse(W,A,weights,n,B); 242 | 243 | // 244 | // Compute the descent step 245 | // For each i we find the largest possible step alpha by solving: 246 | // norm(cHat(i,:)-alpha*B(i,:)) = norm(cHat(j,:)-alpha*B(j,:)) 247 | // where j is in the active set. 248 | // We write it as a second order polynomial 249 | // a1(i)*alpha^2 - 2* a2(i)*alpha + a3(i) 250 | 251 | Mat a1,a2,a3; 252 | Mat a1sub, a2sub, a3sub; 253 | Mat tmp1, tmp2, tmp3; 254 | vector subset; 255 | 256 | // >> a1 = bigcHat - sum(B.^2,2); 257 | cv::reduce( B.mul(B), a1, 1, CV_REDUCE_SUM ); 258 | a1 = bigcHat - a1; 259 | 260 | // >> a2 = bigcHat - sum(B.*cHat,2); 261 | cv::reduce( B.mul(cHat), a2, 1, CV_REDUCE_SUM ); 262 | a2 = bigcHat - a2; 263 | 264 | // >> a3 = bigcHat - cHatSquareNorm; 265 | a3 = bigcHat - cHatSquareNorm; 266 | 267 | // 268 | // Now we solve it 269 | // >> gammaTemp = zeros(2*(n-1),1); 270 | Mat gammaTemp( 2*(n-1), 1, X.type(), Scalar(0,0,0)[0] ); 271 | 272 | // First, those where we really have a second-order polynomial 273 | // >> subset = find(a1 > EPSILON); 274 | for( int i=0; i(i)>epsilon ) 276 | subset.push_back(i); 277 | 278 | if( !subset.empty() ) 279 | { 280 | get_subvector( a1, a1sub, subset ); 281 | get_subvector( a2, a2sub, subset ); 282 | get_subvector( a3, a3sub, subset ); 283 | 284 | // tmp1 = sqrt( a2(subset).^2 - a1(subset).*a3(subset) ) 285 | cv::sqrt( a2sub.mul(a2sub)-a1sub.mul(a3sub), tmp1); 286 | 287 | // >> gammaTemp(subset) 288 | // = (a2(subset) + sqrt( a2(subset).^2 - a1(subset).*a3(subset) )) ./ a1(subset); 289 | cv::divide( a2sub + tmp1, a1sub, tmp2); 290 | for( size_t i=0; i(subset[i]) = tmp2.at(i); 292 | 293 | // >> gammaTemp(subset+n-1) 294 | // = (a2(subset) - sqrt( a2(subset).^2 - a1(subset).*a3(subset) )) ./ a1(subset); 295 | cv::divide( a2sub - tmp1, a1sub, tmp2); 296 | for( size_t i=0; i(subset[i]+n-1) = tmp2.at(i); 298 | 299 | subset.clear(); 300 | } 301 | 302 | // 303 | // then those where the quadratic term vanishes and we have a 304 | // first-order polynomial 305 | // >> subset = find((a1 <= EPSILON) & (a2 > EPSILON)); 306 | for( int i=0; i(i)<=epsilon && a2.at(i)>epsilon ) 308 | subset.push_back(i); 309 | 310 | if( !subset.empty() ) 311 | { 312 | get_subvector( a1, a1sub, subset ); 313 | get_subvector( a2, a2sub, subset ); 314 | get_subvector( a3, a3sub, subset ); 315 | 316 | // >> gammaTemp(subset) = a3(subset) ./ (2*a2(subset)); 317 | // >> gammaTemp(subset+n-1) = a3(subset) ./ (2*a2(subset)); 318 | cv::divide( a3sub, 2*a2sub, tmp2 ); 319 | for( size_t i=0; i(subset[i]) = gammaTemp.at(subset[i]+n-1) = tmp2.at(i); 321 | 322 | subset.clear(); 323 | } 324 | 325 | 326 | // 327 | // Finally the active set should not be taken into account, as well as 328 | // those for which the computation gives dummy solutions 329 | // >> maxg=max(gammaTemp)+1; 330 | double maxg; minMaxIdx(gammaTemp, 0, &maxg); maxg+=1.0; 331 | 332 | // >> subset = find((a1 <= EPSILON) & (a2 <= EPSILON)); 333 | for( int i=0; i(i)<=epsilon && a2.at(i)<=epsilon ) 335 | subset.push_back(i); 336 | 337 | if( !subset.empty() ) 338 | { 339 | // >> gammaTemp(subset) = maxg; 340 | // >> gammaTemp(n+subset) = maxg; 341 | for( size_t i=0; i(subset[i]) = gammaTemp.at(subset[i]+n) = maxg; 343 | subset.clear(); 344 | } 345 | 346 | // >> gammaTemp(A) = maxg; 347 | // >> gammaTemp(n+A-1) = maxg; 348 | for( size_t i=0; i(A[i]) = gammaTemp.at(A[i]+n-1) = maxg; 350 | 351 | // >> gammaTemp(gammaTemp<=0)=maxg; 352 | // >> gammaTemp(imag(gammaTemp)~=0) = maxg; 353 | for( int i=0; i(i)<=0 || gammaTemp.at(i)!=gammaTemp.at(i) ) 355 | gammaTemp.at(i) = maxg; 356 | 357 | // 358 | // Now we can take the minimum 359 | // >> [gamma,nexttoadd]=min(gammaTemp); 360 | double gamma; 361 | int nexttoadd[2]; 362 | minMaxIdx( gammaTemp, &gamma, 0, nexttoadd, 0); // col matrix 363 | 364 | // 365 | // Update 366 | // >> res.value{iter} = zeros(iter,p); 367 | // >> res.value{iter}(I,:) = gamma*w; 368 | // >> if iter>1 369 | // >> res.value{iter}(1:(iter-1),:) = res.value{iter}(1:(iter-1),:) + res.value{iter-1}; 370 | // >> end 371 | 372 | // >> res.lambda(iter)=sqrt(bigcHat); 373 | lambda.push_back( sqrt(bigcHat) ); 374 | 375 | // >> if iter> res.jump(iter+1) = 1+mod(nexttoadd-1,n-1); 377 | // >> cHat = cHat-gamma*a; 378 | // >> end 379 | if( iter+1(r) = (double)(r+1); 402 | 403 | Mat lt = llt*U.row(n-1)/n - U(Rect(0,0,p,n-1)); 404 | for( int c=0; c& ind, 412 | const Mat& w, const int n, Mat& R) 413 | { 414 | int a = X.rows; 415 | int p = X.cols; 416 | R = Mat( a, p, X.type(), Scalar(0,0,0)[0] ); 417 | 418 | if( a>0 ) 419 | { 420 | // >> u = diff([0;ind;n]) 421 | // Note: we convert C++ index system (zero-base) to MATLAB (one-base) 422 | Mat u( a+1, 1, X.type() ); 423 | u.at(0) = ind[0]+1; // [2 0 1] becomes [3 1 2] 424 | u.at(a) = n - (ind[a-1]+1); 425 | for( int i=1; i(i) = ind[i] - ind[i-1]; 427 | 428 | // >> val = val ./ w(ind,ones(1,p)) 429 | Mat val; X.copyTo(val); 430 | Mat wsub; get_subvector(w, wsub, ind); 431 | for( int c=0; c> delta = diff( [zeros(1,p); val; zeros(1,p)] ) ./ u(:,ones(1,p)) 435 | Mat delta( a+1, p, X.type(), Scalar(0,0,0)[0] ); 436 | delta.row(0) = val.row(0)+0.0; 437 | delta.row(a) = -val.row(a-1); 438 | for( int r=1; r> R = - diff( delta ) 444 | for(int r=0; r> R = R ./ w(ind,ones(1,p)) 448 | for(int c=0; c& ind, 456 | const Mat& w, const int n, Mat& R) 457 | { 458 | int a = X.rows; 459 | int p = X.cols; 460 | 461 | R = Mat( n-1, p, X.type(), Scalar(0,0,0)[0] ); 462 | if( a>0 ) 463 | { 464 | Mat wsub; get_subvector(w, wsub, ind); 465 | Mat val; X.copyTo(val); 466 | 467 | // First multiply beta by the weights 468 | // >> val = val .* w(ind,ones(1,p)); 469 | for(int c=0; c> S = zeros(n-1,p); 474 | Mat S( n-1, p, X.type(), Scalar(0,0,0)[0] ); 475 | 476 | // >> S(ind,:) = val; 477 | for( size_t i=0; i> S = flipud(cumsum(flipud(S))); 481 | flip(S,S,0); cumsum(S,S); flip(S,S,0); 482 | 483 | // >> u = ind' * val; // don't forget to add ones 484 | Mat indv( 1, a, X.type() ); 485 | for( size_t i=0; i(i) = (double)(ind[i]+1); 487 | Mat u = indv * val; 488 | 489 | // >> S = S - u(ones(n-1,1),:)/n; 490 | for( int r=0; r> R = cumsum(S); 495 | cumsum( S, R ); 496 | 497 | // then multiply the rows by the weights 498 | // >> R = R .* w(:,ones(1,p)); 499 | for( int c=0; c& v_shot_range, 19 | const Mat& X, const Mat& diff, 20 | vector& v_highlight_range) 21 | { 22 | if( opt.gif && opt.mov ) { 23 | fprintf( stderr, "Fatal Error in detect_highlight_shots():" 24 | "opt.gif and opt.mov cannot be true at the same time.\n" ); 25 | exit(-1); 26 | } 27 | 28 | v_highlight_range.clear(); 29 | 30 | const int minK = 5; 31 | const int maxK = 30; 32 | 33 | //////////////////////////////////////////////////////////////////////////// 34 | // 35 | // Discard shots that are too short or too static 36 | // 37 | //////////////////////////////////////////////////////////////////////////// 38 | 39 | int min_shot_len, max_shot_len, min_num_shot; 40 | if( opt.mov ) { 41 | double addrate_min = 0.5 * sqrt(max(0.0,(double)opt.lmov-15.0)/45.0); 42 | double addrate_max = sqrt(max(0.0,(double)opt.lmov-15.0)/45.0); 43 | min_shot_len = floor((3.0+addrate_min) * meta.fps / opt.step_sz); 44 | max_shot_len = round((1.0+addrate_max) * min_shot_len); 45 | min_num_shot = ceil( opt.lmov / 3.0 ); 46 | if( opt.debug ) { 47 | printf("detect_highlight_shots(): " 48 | "min_shot_len=%d, max_shot_len=%d, min_num_shot=%d, " 49 | "addrate_min=%f, addrate_max=%f\n", 50 | min_shot_len, max_shot_len, min_num_shot, 51 | addrate_min, addrate_max); 52 | } 53 | } 54 | else { 55 | min_shot_len = floor(1.5 * meta.fps / opt.step_sz); 56 | max_shot_len = ceil (4.5 * meta.fps / opt.step_sz); 57 | min_num_shot = opt.ngif; 58 | } 59 | 60 | // Active set containing candidates for highlight shots 61 | vector v_candidates; 62 | for(size_t i=0; i(i); 69 | avg_diff/=shot.length(); 70 | 71 | // Discard if too short. 72 | if( shot_len < min_shot_len ) { 73 | //printf("Discard SHORT (shot_len=%d, avg_diff=%f) ", shot_len, avg_diff); shot.print(); 74 | continue; 75 | } 76 | 77 | // Discard if too static 78 | if( avg_diff < 0.05 ) { 79 | //printf("Discard STATIC (shot_len=%d, avg_diff=%f) ", shot_len, avg_diff); shot.print(); 80 | continue; 81 | } 82 | 83 | // Add the shot 84 | v_candidates.push_back( shot ); 85 | } 86 | 87 | // If there's not enough shots, merged adjacent shots and add them 88 | if( (int)v_candidates.size() < min_num_shot ) { 89 | if( opt.debug ) { 90 | printf("detect_highlight_shots(): " 91 | "Not enough candidate shots (current=%d/%d, min=%d)\n", 92 | (int)v_candidates.size(), (int)v_shot_range.size(), min_num_shot); 93 | } 94 | for(size_t i=0; i= shot.start && 113 | v_candidates[k].start <= shot.end ) { 114 | exists = true; break; 115 | } 116 | } 117 | if( !exists && shot.length() >= min_shot_len ) { 118 | v_candidates.push_back( shot ); 119 | } 120 | i = j+1; 121 | } 122 | 123 | // Sort candidate shots in a chronogical order 124 | vector v_candidates_tmp; 125 | vector v_shot_nsrt; // shots in non-sorted order 126 | for( size_t i=0; i v_srt_idx; // contains idx 130 | vector v_srt_val; // contains value (start frame idx) 131 | hecate::sort( v_shot_nsrt, v_srt_val, v_srt_idx ); 132 | for(size_t i=0; i max_shot_len ) { 145 | while( shot_len > max_shot_len ) { 146 | Scalar mu, sigma, mu2, sigma2; 147 | meanStdDev( diff(Rect(0,shot.start+1,1,shot.length()-1)), mu, sigma ); 148 | meanStdDev( diff(Rect(0,shot.start, 1,shot.length()-1)), mu2, sigma2 ); 149 | if( sigma[0] < sigma2[0] ) 150 | shot.start++; 151 | else 152 | shot.end--; 153 | shot_len = shot.length(); 154 | } 155 | } 156 | v_candidates[i] = shot; 157 | } 158 | 159 | 160 | //////////////////////////////////////////////////////////////////////////// 161 | // 162 | // Group visually simimlar shots 163 | // 164 | // Represent each shot as a piece-wise constant multi-dimensional signal 165 | // Then run kmeans (k=#nshots_highlight). 166 | // 167 | //////////////////////////////////////////////////////////////////////////// 168 | 169 | // Prepare data for kmeans 170 | // Compute piece-wise constant feature representation 171 | Mat km_data( (int)v_candidates.size(), X.cols, X.type() ); 172 | for( size_t shotid=0; shotid v_shotlen; 186 | for(size_t i=0; i clust_sz(km_k,0); 190 | for(int i=0; i(i) ] += v_shotlen[i]; 192 | 193 | // sort wrt cluster size in an ascending order 194 | vector v_srt_idx; // contains cluster id 195 | vector v_srt_val; // contains cluster size 196 | hecate::sort( clust_sz, v_srt_val, v_srt_idx ); 197 | 198 | 199 | //////////////////////////////////////////////////////////////////////////// 200 | // 201 | // Shot evaluation 202 | // 203 | // Criteria: prefer the most dynamic shot among others within the same cluster 204 | // 205 | //////////////////////////////////////////////////////////////////////////// 206 | 207 | // Pre-compute per-shot diff score avg & stddev 208 | Mat shot_diff_avg(v_candidates.size(), 1, diff.type(), Scalar(0,0,0)[0]); 209 | Mat shot_diff_std(v_candidates.size(), 1, diff.type(), Scalar(0,0,0)[0]); 210 | for( size_t shotid=0; shotid(shotid) = (double) mu[0]; 215 | shot_diff_std.at(shotid) = (double) sigma[0]; 216 | } 217 | 218 | vector v_shot_len; 219 | vector v_shot_score; 220 | for(size_t i=0; i::max(); 231 | 232 | // Collect IDs & scores of the shots in the k-th cluster 233 | vector v_tmp_shotid; 234 | vector v_tmp_score; 235 | for(int shotid=0; shotid(shotid); 237 | if( lbl==k ) { 238 | double val = shot_diff_avg.at(shotid); 239 | if( val > best_val ) { 240 | best_range = v_candidates[shotid]; 241 | best_val = val; 242 | } 243 | v_tmp_shotid.push_back( shotid ); 244 | v_tmp_score.push_back( val ); 245 | } 246 | } 247 | 248 | // If GIF mode, add shots here 249 | if( opt.gif ) { 250 | if( best_range.start>=0 && best_range.length()>0 ) 251 | v_highlight_range.push_back( best_range ); 252 | if( (int)v_highlight_range.size() >= opt.ngif ) 253 | break; 254 | } 255 | 256 | // If MOV mode, store shot scores 257 | if( opt.mov ) { 258 | // Sort wrt avg_diff score, ascending order 259 | vector v_srt_idx2; 260 | vector v_srt_val2; 261 | hecate::sort( v_tmp_score, v_srt_val2, v_srt_idx2 ); 262 | 263 | // Record normalized scores 264 | for( size_t i=0; i sol; 294 | hecate::solve_01knapsack( v_shot_score, v_shot_len, budget, sol ); 295 | 296 | // merge shots if only they are only half a second apart 297 | int merge_thrsh = ceil(0.5 * meta.fps / opt.step_sz); 298 | for( size_t i=0; itarget ) { 340 | // Pick a random shot 341 | int idx = rand() % (int) v_highlight_range.size(); 342 | hecate::Range shot = v_highlight_range[idx]; 343 | // Remove frame that reduces stddev of the shot by the most 344 | Scalar mu, sigma, mu2, sigma2; 345 | meanStdDev( diff(Rect(0,shot.start+1,1,shot.length()-1)), mu, sigma ); 346 | meanStdDev( diff(Rect(0,shot.start,1,shot.length()-1)), mu2, sigma2 ); 347 | if( sigma[0] < sigma2[0] ) 348 | v_highlight_range[idx].start++; 349 | else 350 | v_highlight_range[idx].end--; 351 | curlen--; 352 | } 353 | // If video is longer than target, but summary is too short 354 | int shot_edge_buf = 10; 355 | int failed_attempt = 0; 356 | while( curlen 365 | v_highlight_range[idx].end+1+shot_edge_buf ) { 366 | v_highlight_range[idx].end++; 367 | } 368 | else { 369 | failed = true; 370 | } 371 | } 372 | else if( v_highlight_range[idx].end>meta.nframes-shot_edge_buf ) { 373 | if( idx>0 && v_highlight_range[idx-1].end < 374 | v_highlight_range[idx].start-1-shot_edge_buf ) { 375 | v_highlight_range[idx].start--; 376 | } 377 | else { 378 | failed = true; 379 | } 380 | } 381 | else { 382 | Scalar mu, sigma, mu2, sigma2; 383 | meanStdDev( diff(Rect(0,shot.start-1,1,shot.length()+1)), mu, sigma ); 384 | meanStdDev( diff(Rect(0,shot.start,1,shot.length()+1)), mu2, sigma2 ); 385 | if( sigma[0] < sigma2[0] ) { 386 | if( v_highlight_range[idx-1].end < 387 | v_highlight_range[idx].start-1-shot_edge_buf ) { 388 | v_highlight_range[idx].start--; 389 | } 390 | else { 391 | failed = true; 392 | } 393 | } 394 | else { 395 | if( v_highlight_range[idx+1].start > 396 | v_highlight_range[idx].end+1+shot_edge_buf) { 397 | v_highlight_range[idx].end++; 398 | } 399 | else { 400 | failed = true; 401 | } 402 | } 403 | } 404 | if( !failed ) 405 | curlen++; 406 | else 407 | failed_attempt++; 408 | if( failed_attempt>10*(int)v_highlight_range.size() ) { 409 | if( opt.debug ) { 410 | printf("Failed to meet the target length in video summary\n"); 411 | } 412 | break; 413 | } 414 | } 415 | } 416 | //curlen = 0; 417 | //for(size_t i=0; i& v_highlight_range ) 431 | { 432 | if( opt.gif && opt.mov ) { 433 | fprintf( stderr, "Fatal Error in generate_highlight_clips():" 434 | "opt.gif and opt.mov cannot be true at the same time.\n" ); 435 | exit(-1); 436 | } 437 | 438 | // prefix for hiddden files generated during execution 439 | const char *cdot = "__tmp__"; 440 | 441 | string filename = hecate::get_filename( std::string(opt.in_video) ); 442 | 443 | VideoCapture vr( opt.in_video ); 444 | double fps = vr.get(CV_CAP_PROP_FPS); 445 | vr.release(); 446 | 447 | // Sort shots in chronological order 448 | vector v_shot_nsrt; // shots in non-sorted order 449 | for( size_t i=0; i v_srt_idx; // contains idx 454 | vector v_srt_val; // contains value (start frame idx) 455 | hecate::sort( v_shot_nsrt, v_srt_val, v_srt_idx ); 456 | 457 | // For concatenating vieos clips 458 | char filelist[512]; 459 | sprintf( filelist, "%s/%s%s_seg.txt", 460 | opt.out_dir.c_str(), cdot, filename.c_str() ); 461 | FILE *ptr_filelist = fopen( filelist, "w+" ); 462 | 463 | // Parse ttml-format caption, if provided 464 | vector ttml; 465 | if( !opt.caption.empty() ) { 466 | parse_ttml( opt.caption, ttml ); 467 | if( ttml.empty() ) { 468 | fprintf( stderr, "generate_animated_gifs: " 469 | "Caption file %s cannot be read\n", opt.caption.c_str() ); 470 | } 471 | // Convert second to frame index 472 | for(size_t i=0; i VideoParser::parse_video(const string& in_video, 40 | hecate::parser_params opt) 41 | /*-----------------------------------------------------------------------*/ 42 | { 43 | _debug = opt.debug; 44 | 45 | int ret = read_video( in_video, opt.step_sz, opt.max_duration, 46 | opt.ignore_rest ); 47 | if( ret<0 ) { 48 | fprintf( stderr, "VideoParser: Failed to open input video: %s\n", 49 | in_video.c_str()); 50 | return vector(); 51 | } 52 | 53 | // Record metadata 54 | meta.nframes = _nfrm_total; 55 | meta.width = _video_width; 56 | meta.height = _video_height; 57 | meta.fps = _video_fps; 58 | meta.duration = _video_sec; 59 | 60 | // Frame filtering 61 | if( opt.fltr_begin_sec>.0 || opt.fltr_end_sec>.0 ) 62 | filter_heuristic(opt.fltr_begin_sec, opt.fltr_end_sec); 63 | 64 | if( opt.fltr_lq ) 65 | filter_low_quality(); 66 | 67 | filter_transition(); 68 | 69 | // Extract feature representation 70 | extract_histo_features(); 71 | 72 | // Post-process (break up shots if too long) 73 | double min_shot_len_sec = 2.0; 74 | post_process(min_shot_len_sec, opt.gfl); 75 | 76 | release_memory(); 77 | 78 | // Store shot information 79 | update_shot_ranges(); 80 | 81 | // Subshot detection; subshot info is stored during this step 82 | if( opt.fltr_rdt ) 83 | filter_redundant_and_obtain_subshots(); 84 | 85 | if( _display ) 86 | play_video_filtered( in_video, 2, 360 ); 87 | 88 | // Produce result 89 | return _v_shot_ranges; 90 | } 91 | 92 | /*------------------------------------------------------------------------ 93 | Read input video and store as RGB and GRAY formats. 94 | ------------------------------------------------------------------------*/ 95 | int VideoParser::read_video( const string& in_video, int step_sz, 96 | double max_duration, bool ignore_rest, 97 | int max_frm_len) 98 | /*-----------------------------------------------------------------------*/ 99 | { 100 | if( _debug ) 101 | printf("VideoParser: read_video(\"%s\", \n\tstep_sz=%d, " 102 | "max_duration=%.2f, max_frm_len=%d, ignore_rest=%d)\n", 103 | in_video.c_str(), step_sz, max_duration, max_frm_len, ignore_rest); 104 | 105 | VideoCapture vr( in_video ); 106 | if( !vr.isOpened() ) { 107 | return -1; 108 | } 109 | 110 | _nfrm_total = vr.get(CV_CAP_PROP_FRAME_COUNT); 111 | _video_width = vr.get(CV_CAP_PROP_FRAME_WIDTH); 112 | _video_height = vr.get(CV_CAP_PROP_FRAME_HEIGHT); 113 | _video_fps = max(1.0, vr.get(CV_CAP_PROP_FPS)); 114 | if( _video_fps!=_video_fps ) 115 | _video_fps = 29.97; 116 | 117 | int max_nfrms = (max_duration<0) 118 | ? _nfrm_total : round(max_duration*_video_fps); 119 | 120 | // need to store _step_sz for computing min_shot_len in sbg_gflseg() 121 | _step_sz = step_sz; 122 | if( step_sz>10 ) { 123 | _step_sz = 10; 124 | fprintf( stderr, "VideoParser: The maximum step size is 10" 125 | " (provided is %d)\n", step_sz ); 126 | } 127 | // if video is too long, and ignore_rest is false, adjust step size 128 | if( !ignore_rest && max_nfrms > 0 && _nfrm_total > max_nfrms ) { 129 | _step_sz = ceil( _nfrm_total/max_nfrms ); 130 | fprintf( stderr, "VideoParser: Video too long (%d frames)," 131 | "increasing step size to %d\n", _nfrm_total, step_sz ); 132 | } 133 | _step_sz = max(1, _step_sz); 134 | 135 | int maxlen = max( _video_width, _video_height ); 136 | double rsz_ratio = (maxlen>max_frm_len) 137 | ? (double) max_frm_len / maxlen : -1.0; 138 | 139 | // Read RGB frames 140 | _nfrm_total=0; // reset because vr.get is known to be inaccurate 141 | while( true ) 142 | { 143 | Mat frm; 144 | vr >> frm; 145 | if( frm.empty() ) break; 146 | 147 | if( _nfrm_total % _step_sz == 0 ) { 148 | if( rsz_ratio>0 ) 149 | resize( frm, frm, Size(), rsz_ratio, rsz_ratio, CV_INTER_LINEAR ); 150 | _v_frm_rgb.push_back( frm ); 151 | 152 | // if video is too long, and ignore_rest is true, cut the rest 153 | if( ignore_rest && _nfrm_total>=max_nfrms ) { 154 | if( _debug ) 155 | printf("VideoParser: video too long, " 156 | "cutting at (%d)-th frame\n", _nfrm_total); 157 | break; 158 | } 159 | } 160 | _nfrm_total++; 161 | } 162 | vr.release(); 163 | 164 | _nfrm_given = (int) _v_frm_rgb.size(); 165 | _video_sec = (double)_nfrm_total/_video_fps; 166 | 167 | // RGB2GRAY 168 | _v_frm_gray.assign( _nfrm_given, Mat() ); 169 | 170 | #pragma omp parallel for 171 | for( int i=0; i<_nfrm_given; i++ ) 172 | { 173 | Mat frm_gray; 174 | cvtColor( _v_frm_rgb[i], frm_gray, CV_BGR2GRAY ); 175 | GaussianBlur( frm_gray, frm_gray, Size(3,3), 0, 0 ); 176 | frm_gray.copyTo( _v_frm_gray[i] ); 177 | } 178 | 179 | _v_frm_valid.assign( _nfrm_given, true ); 180 | _v_frm_log.assign( _nfrm_given, " " ); 181 | 182 | _X_diff = Mat( _nfrm_given, 1, CV_64F, Scalar(0,0,0)[0] ); 183 | _X_ecr = Mat( _nfrm_given, 1, CV_64F, Scalar(0,0,0)[0] ); 184 | 185 | return 0; 186 | } 187 | 188 | 189 | /*------------------------------------------------------------------------ 190 | Manually invalidate first and last frames (heuristic) 191 | ------------------------------------------------------------------------*/ 192 | void VideoParser::filter_heuristic(double fltr_begin_sec, double fltr_end_sec) 193 | /*-----------------------------------------------------------------------*/ 194 | { 195 | int fltr_begin_nfrms = ceil(fltr_begin_sec * _video_fps / (double)_step_sz); 196 | int fltr_end_nfrms = ceil(fltr_end_sec * _video_fps / (double)_step_sz); 197 | 198 | for(int i=0; i v_brightness(_nfrm_given,0.0); 217 | vector v_sharpness(_nfrm_given,0.0); 218 | vector v_uniformity(_nfrm_given,0.0); 219 | 220 | #pragma omp parallel for 221 | for( int i=0; i<_nfrm_given; i++ ) { 222 | v_brightness[i] = hecate::calc_brightness( _v_frm_rgb[i] ); 223 | v_sharpness[i] = hecate::calc_sharpness( _v_frm_gray[i] ); 224 | v_uniformity[i] = hecate::calc_uniformity( _v_frm_gray[i] ); 225 | } 226 | 227 | vector v_srt_idx; // contains sorted indices 228 | vector v_srt_val; // contains sorted values 229 | 230 | // DARK frame detection 231 | hecate::sort( v_brightness, v_srt_val, v_srt_idx ); 232 | for( int i=0; i= thrsh_uniform ) 246 | mark_invalid(_v_frm_valid, _v_frm_log, v_srt_idx[_nfrm_given-i-1], "[Uniform]"); 247 | } 248 | 249 | 250 | /*------------------------------------------------------------------------ 251 | Frames around shot boundaries are usually low-quality, filter them out 252 | Use two SBD methods: frame-by-frame difference and ECR 253 | ------------------------------------------------------------------------*/ 254 | void VideoParser::filter_transition( double thrsh_diff, double thrsh_ecr ) 255 | /*-----------------------------------------------------------------------*/ 256 | { 257 | int nfrm_nperc = (int)(0.10*_nfrm_given); // n percent of the total frames 258 | 259 | vector v_diff(_nfrm_given, 0.0); 260 | vector v_ecr(_nfrm_given, 0.0); 261 | 262 | // sort wrt cluster size in an ascending order 263 | vector v_srt_idx; // contains sorted indices 264 | vector v_srt_val; // contains sorted values 265 | 266 | // compute the first-order derivative frame-by-frame difference 267 | int img_sz = _v_frm_gray[0].cols * _v_frm_gray[0].rows; 268 | #pragma omp parallel for 269 | for( int i=1; i<_nfrm_given-1; i++ ) { 270 | v_diff[i] = (double) (cv::norm( (_v_frm_rgb[i]-_v_frm_rgb[i-1]) ) 271 | + cv::norm( (_v_frm_rgb[i]-_v_frm_rgb[i+1]) )) / (2.0*img_sz); 272 | _X_diff.at(i) = v_diff[i]; 273 | } 274 | 275 | // compute edge-change-ratio (ECR) 276 | { 277 | int dl_sz = 5; // dilute size 278 | Mat dl_elm = getStructuringElement(MORPH_CROSS, 279 | Size(2*dl_sz+1, 2*dl_sz+1), 280 | Point(dl_sz, dl_sz)); 281 | 282 | // Pre-compute edge & edge dilation 283 | vector v_edge( _nfrm_given, Mat() ); // edge images 284 | vector v_edge_dl( _nfrm_given, Mat() ); // edge-diluted images 285 | 286 | #pragma omp parallel for 287 | for(int i=0; i<_nfrm_given; i++) 288 | { 289 | Mat tmp; 290 | double theta = threshold(_v_frm_gray[i],tmp,0,255,CV_THRESH_BINARY|CV_THRESH_OTSU); 291 | Canny( _v_frm_gray[i], v_edge[i], theta, 1.2*theta); 292 | dilate( v_edge[i], v_edge_dl[i], dl_elm ); 293 | v_edge[i] -= 254; v_edge_dl[i] -= 254; 294 | } 295 | 296 | // Transition detection using ECR (edge change ratio) 297 | #pragma omp parallel for 298 | for(int i=1; i<_nfrm_given; i++) 299 | { 300 | double rho_out, rho_in; 301 | rho_out = 1.0 - min(1.0,sum(v_edge[i-1].mul(v_edge_dl[i]))[0]/max(1e-6,sum(v_edge[i-1])[0])); 302 | rho_in = 1.0 - min(1.0,sum(v_edge_dl[i-1].mul(v_edge[i]))[0]/max(1e-6,sum(v_edge[i-1])[0])); 303 | 304 | v_ecr[i] = max(rho_out,rho_in); // edge change ratio 305 | _X_ecr.at(i) = v_ecr[i]; 306 | } 307 | } 308 | 309 | // CUT detection 310 | hecate::sort( v_diff, v_srt_val, v_srt_idx ); 311 | for( int i=0; i= thrsh_diff ) 313 | mark_invalid(_v_frm_valid, _v_frm_log, v_srt_idx[_nfrm_given-i-1], "[Cut]" ); 314 | 315 | // TRANSITION detection (cut, fade, dissolve, wipe) 316 | hecate::sort( v_ecr, v_srt_val, v_srt_idx ); 317 | for( int i=0; i= thrsh_ecr ) 319 | mark_invalid(_v_frm_valid, _v_frm_log, v_srt_idx[_nfrm_given-i-1], "[ECR]" ); 320 | } 321 | 322 | 323 | 324 | /*------------------------------------------------------------------------ 325 | Perform change point detection either by solving group-fused LASSO of 326 | "The group fused Lasso for multiple change-point detection" (2011) by 327 | Kevin Bleakley and Jean-Philippe Vert, or by using heuristics 328 | -----------------------------------------------------------------------*/ 329 | void VideoParser::post_process(double min_shot_sec, bool gfl) 330 | /*-----------------------------------------------------------------------*/ 331 | { 332 | hecate::Segmenter seg; 333 | int start_idx=-1, end_idx=-1, shotlen=-1; 334 | int min_shot_len = min_shot_sec * _video_fps / _step_sz; 335 | int max_shot_len = 3 * min_shot_len; 336 | double thrsh_gfl = 0.25; 337 | 338 | for( size_t i=0; i<_v_frm_valid.size(); i++ ) 339 | { 340 | if( start_idx<0 && _v_frm_valid[i] ) { 341 | start_idx = i; 342 | } 343 | if( start_idx>=0 && (!_v_frm_valid[i] || i+1==_v_frm_valid.size()) ) 344 | { 345 | end_idx = i; 346 | shotlen = end_idx-start_idx+1; 347 | if( shotlen >= max_shot_len ) 348 | { 349 | int njumps = floor(shotlen/min_shot_len); 350 | vector jump; 351 | 352 | // Solve group-fused LASSO 353 | if( gfl ) { 354 | Mat Xsub( shotlen, _X_feat.cols, _X_feat.type() ); 355 | for( int r=start_idx; r<=end_idx; r++ ) { 356 | _X_feat.row(r).copyTo( Xsub.row(r-start_idx) ); 357 | } 358 | Xsub.convertTo(Xsub,CV_64F); 359 | 360 | seg.gflseg( Xsub, jump, njumps, thrsh_gfl ); 361 | } 362 | // Use heuristics 363 | else { 364 | vector v_diff; 365 | for(int i=start_idx; i<=end_idx; i++) { 366 | v_diff.push_back( _X_diff.at(i) ); 367 | } 368 | sbd_heuristic( v_diff, jump, njumps, min_shot_len ); 369 | } 370 | 371 | for(size_t k=0; k v_diff, vector& jump, 390 | int njumps, int min_shot_len ) 391 | /*-----------------------------------------------------------------------*/ 392 | { 393 | vector v_srt_idx; 394 | vector v_srt_val; // contains sorted values 395 | hecate::sort( v_diff, v_srt_val, v_srt_idx ); 396 | for(int i=(int)v_srt_val.size()-1; i>=0; i--) { 397 | bool add = true; 398 | if (((unsigned) v_srt_idx[i] + 1 < min_shot_len) || 399 | ((unsigned) v_diff.size() - v_srt_idx[i] < min_shot_len)) { 400 | add = false; 401 | } else { 402 | for (size_t j = 0; j < jump.size(); j++) { 403 | int len = abs(jump[j] - (int) v_srt_idx[i]) + 1; 404 | if (len < min_shot_len) { 405 | add = false; 406 | break; 407 | } 408 | } 409 | } 410 | if( add ) { 411 | jump.push_back( (int)v_srt_idx[i] ); 412 | } 413 | if( (int)jump.size() == njumps ) { 414 | break; 415 | } 416 | } 417 | }; 418 | 419 | /*-----------------------------------------------------------------------*/ 420 | void VideoParser::extract_histo_features(int pyr_level, bool omit_filtered, 421 | int nbin_color, int nbin_edge_ori, 422 | int nbin_edge_mag) 423 | /*-----------------------------------------------------------------------*/ 424 | { 425 | int npatches = 0; 426 | for(int l=0; l v_idxmap( nfrm_valid, 0 ); 470 | 471 | int row=0; 472 | for(int i=0; i<_nfrm_given; i++) 473 | { 474 | if( _v_frm_valid[i] ) { 475 | _X_feat.row(i).copyTo(km_data.row(row)); 476 | v_idxmap[row] = i; 477 | row++; 478 | } 479 | } 480 | 481 | // 482 | // Perform k-means 483 | int ncluster = min(nfrm_valid/2, (int)_v_shot_ranges.size()); 484 | Mat km_lbl; // integer row vector; stores cluster IDs for every sample. 485 | Mat km_ctr; // One row per each cluster center. 486 | hecate::perform_kmeans( km_data, km_lbl, km_ctr, ncluster ); 487 | 488 | // 489 | // convert km_lbl to v_frm_clusterid 490 | vector v_frm_clusterid(_nfrm_given,-1); 491 | for(int i=0; i(i); 493 | 494 | // 495 | // Pick the most "still" frame from every sub-shot within each shot 496 | for(size_t shotid=0; shotid<_v_shot_ranges.size(); shotid++) 497 | { 498 | int sb0 = _v_shot_ranges[shotid].start; 499 | int sb1 = _v_shot_ranges[shotid].end; 500 | 501 | // Identify sub-shots (per kmeans result) 502 | int ssb0=-1, ssb1=-1, lbl=-1; 503 | for( int j=sb0; j<=sb1; j++ ) 504 | { 505 | if( _v_frm_valid[j] ) { 506 | if( ssb0<0 ) { // enter a new sub-shot area 507 | ssb0 = j; 508 | lbl = v_frm_clusterid[j]; 509 | } 510 | ssb1 = j; 511 | } 512 | if( ssb0>=0 && (v_frm_clusterid[j]!=lbl || j==sb1) ) // exit the current sub-shot area 513 | { 514 | int diff_min_idx=-1; 515 | double diff_min_val = numeric_limits::max(); 516 | for( int k=ssb0; k<=ssb1; k++ ) 517 | { 518 | double diff_k = _X_diff.at(k); 519 | if( diff_k < diff_min_val ) 520 | { 521 | diff_min_idx = k; 522 | diff_min_val = diff_k; 523 | } 524 | } 525 | 526 | // Store subshot with keyframe index 527 | hecate::Range r(ssb0, ssb1); 528 | r.v_idx.push_back( diff_min_idx ); 529 | _v_shot_ranges[shotid].v_idx.push_back( diff_min_idx ); 530 | _v_shot_ranges[shotid].v_range.push_back( r ); 531 | 532 | // Filter out redundant frames 533 | for( int k=ssb0; k<=ssb1; k++ ) 534 | if( k!=diff_min_idx ) 535 | mark_invalid(_v_frm_valid, _v_frm_log, k, "[Redundant]"); 536 | 537 | // reset sub-shot area 538 | ssb0 = ssb1 = lbl = -1; 539 | } 540 | } 541 | 542 | // reset shot area 543 | sb0 = sb1 = -1; 544 | } 545 | } 546 | 547 | 548 | /*-----------------------------------------------------------------------*/ 549 | void VideoParser::update_shot_ranges( int min_shot_len ) 550 | /*-----------------------------------------------------------------------*/ 551 | { 552 | _v_shot_ranges.clear(); 553 | 554 | int sb0=0, sb1=-1; 555 | for( int i=0; i<_nfrm_given; i++ ) 556 | { 557 | if( _v_frm_valid[i] ) { 558 | // enter a new shot area 559 | if( sb0<0 ) sb0 = i; 560 | sb1 = i; 561 | } 562 | 563 | // exit the current shot area 564 | if( sb0>=0 && sb1>=0 && (!_v_frm_valid[i] || i+1==_nfrm_given) ) 565 | { 566 | hecate::ShotRange r( sb0, sb1 ); 567 | if( r.length()>min_shot_len ) { 568 | _v_shot_ranges.push_back( r ); 569 | } 570 | else { 571 | for(int j=sb0; j<=sb1; j++ ) 572 | mark_invalid( _v_frm_valid, _v_frm_log, j, "[Short]" ); 573 | } 574 | sb0 = sb1 = -1; 575 | } 576 | } 577 | } 578 | 579 | 580 | /*-----------------------------------------------------------------------*/ 581 | int VideoParser::get_nfrm_valid() 582 | /*-----------------------------------------------------------------------*/ 583 | { 584 | return (int) accumulate( _v_frm_valid.begin(), _v_frm_valid.end(), 0 ); 585 | } 586 | 587 | 588 | /*-----------------------------------------------------------------------*/ 589 | void VideoParser::mark_invalid( vector& vec, int idx, int wnd_sz ) 590 | /*-----------------------------------------------------------------------*/ 591 | { 592 | int vec_len = (int)vec.size(); 593 | for(int i=max(0,idx-wnd_sz); i<=min(vec_len-1,idx+wnd_sz); i++) { 594 | vec[i] = false; 595 | } 596 | } 597 | 598 | /*-----------------------------------------------------------------------*/ 599 | void VideoParser::mark_invalid( vector& vec, vector& vec2, 600 | int idx, const string msg, int wnd_sz ) 601 | /*-----------------------------------------------------------------------*/ 602 | { 603 | int vec_len = (int)vec.size(); 604 | for(int i=max(0,idx-wnd_sz); i<=min(vec_len-1,idx+wnd_sz); i++) { 605 | vec[i] = false; 606 | vec2[i] = msg; 607 | } 608 | } 609 | 610 | 611 | 612 | /*-----------------------------------------------------------------------*/ 613 | void VideoParser::play_video_filtered( const string& in_video, 614 | int step_sz, int max_frm_len ) 615 | /*-----------------------------------------------------------------------*/ 616 | { 617 | printf("\nDebug mode: Displaying shot segmentation results...\n"); 618 | 619 | // Debug visualization 620 | const char* WND = "DEBUG"; 621 | namedWindow( WND ,1 ); 622 | 623 | Mat frm; 624 | double debug_rsz_ratio = (double)max_frm_len/_video_width; 625 | 626 | VideoCapture vr( in_video ); 627 | vr >> frm; 628 | resize( frm, frm, Size(), debug_rsz_ratio, debug_rsz_ratio, CV_INTER_LINEAR ); 629 | vr.set( CV_CAP_PROP_POS_FRAMES, 0 ); 630 | 631 | Size sz = frm.size(); 632 | Mat frm_lr( sz.height, 2*sz.width, CV_8UC3 ); 633 | Mat frm_l(frm_lr, Rect(0,0,sz.width,sz.height)); 634 | Mat frm_r(frm_lr, Rect(sz.width,0,sz.width,sz.height)); 635 | frm_lr.setTo(Scalar(0)); 636 | 637 | for( int i=0; i<_nfrm_given; i++) 638 | { 639 | for( int j=0; j> frm; 641 | if( frm.empty() ) break; 642 | } 643 | if( frm.empty() ) break; 644 | resize( frm, frm, Size(), debug_rsz_ratio, debug_rsz_ratio, CV_INTER_LINEAR ); 645 | 646 | std::stringstream s; 647 | s << _v_frm_log[i]; 648 | putText(frm, s.str(), Point2f(5,30), FONT_HERSHEY_PLAIN, 1.3, Scalar(255,0,255,255), 2); 649 | 650 | if( _v_frm_log[i].length()<2 ) { 651 | frm.copyTo( frm_l ); 652 | } 653 | else { 654 | frm.copyTo( frm_r ); 655 | } 656 | 657 | imshow( WND, frm_lr ); 658 | if( waitKey(_video_fps)>=0 ) break; 659 | } 660 | vr.release(); 661 | destroyWindow( WND ); 662 | } 663 | 664 | --------------------------------------------------------------------------------