├── .gitattributes
├── .gitignore
├── LICENSE
├── Performance Evaluation.png
├── README.md
├── Similarity Evaluation.png
├── hadoop-pot-assembly
    ├── pom.xml
    └── src
    │   └── main
    │       └── assembly
    │           └── assembly.xml
├── hadoop-pot-core
    ├── pom.xml
    └── src
    │   └── main
    │       └── java
    │           └── org
    │               └── pooledtimeseries
    │                   ├── Deduplicate.java
    │                   ├── FormatOutput.java
    │                   ├── GradientTimeSeries.java
    │                   ├── MeanChiSquareDistanceCalculation.java
    │                   ├── OpticalTimeSeries.java
    │                   ├── SimilarityCalculation.java
    │                   ├── cartesian
    │                       ├── CartesianInputFormat.java
    │                       └── CartesianRecordReader.java
    │                   ├── healthcheck
    │                       └── CheckCartesianProductSeqFile.java
    │                   ├── seqfile
    │                       ├── FullFileInputFormat.java
    │                       ├── FullFileRecordReader.java
    │                       ├── PoTVideoPathFilter.java
    │                       └── TextVectorsToSequenceFile.java
    │                   └── util
    │                       ├── ClassScope.java
    │                       ├── HadoopFileUtil.java
    │                       ├── PoTConstants.java
    │                       ├── PoTSerialiser.java
    │                       ├── PoTUtil.java
    │                       └── ReadSeqFileUtil.java
├── hadoop-pot-video
    ├── pom.xml
    └── src
    │   └── main
    │       └── java
    │           └── org
    │               └── pooledtimeseries
    │                   ├── FeatureVector.java
    │                   ├── PoT.java
    │                   ├── PoTException.java
    │                   └── healthcheck
    │                       └── CheckOpenCV.java
├── pom.xml
├── src
    └── main
    │   ├── bin
    │       ├── pooled-time-series
    │       └── pooled-time-series-hadoop
    │   └── resources
    │       └── tika-config.xml
└── visualization
    ├── circlepacking.html
    ├── cluster-d3.html
    ├── css
        ├── dashboard.css
        └── style.css
    ├── dashboard.html
    ├── data
        ├── formatted_similarity_calc.csv
        ├── similarity_cluster.json
        ├── similarity_cluster.png
        └── similarity_heatmap.png
    ├── favicon.ico
    ├── index.html
    ├── js
        ├── dashboard.js
        └── matrix.js
    └── py
        ├── evaluate_hmdb.py
        ├── similarity_cluster.py
        ├── similarity_heatmap.py
        └── video_duration.py


/.gitattributes:
--------------------------------------------------------------------------------
 1 | # Auto detect text files and perform LF normalization
 2 | * text=auto
 3 | 
 4 | # Custom for Visual Studio
 5 | *.cs     diff=csharp
 6 | 
 7 | # Standard to msysgit
 8 | *.doc	 diff=astextplain
 9 | *.DOC	 diff=astextplain
10 | *.docx diff=astextplain
11 | *.DOCX diff=astextplain
12 | *.dot  diff=astextplain
13 | *.DOT  diff=astextplain
14 | *.pdf  diff=astextplain
15 | *.PDF	 diff=astextplain
16 | *.rtf	 diff=astextplain
17 | *.RTF	 diff=astextplain
18 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | /target
 2 | 
 3 | # Windows image file caches
 4 | Thumbs.db
 5 | ehthumbs.db
 6 | 
 7 | # Folder config file
 8 | Desktop.ini
 9 | 
10 | # Recycle Bin used on file shares
11 | $RECYCLE.BIN/
12 | 
13 | # Windows Installer files
14 | *.cab
15 | *.msi
16 | *.msm
17 | *.msp
18 | 
19 | # Windows shortcuts
20 | *.lnk
21 | 
22 | # =========================
23 | # Operating System Files
24 | # =========================
25 | 
26 | # OSX
27 | # =========================
28 | 
29 | .DS_Store
30 | .AppleDouble
31 | .LSOverride
32 | 
33 | # Thumbnails
34 | ._*
35 | 
36 | # Files that might appear on external disk
37 | .Spotlight-V100
38 | .Trashes
39 | 
40 | # Directories potentially created on remote AFP share
41 | .AppleDB
42 | .AppleDesktop
43 | Network Trash Folder
44 | Temporary Items
45 | .apdisk
46 | /.classpath
47 | /.project
48 | /.settings/
49 | /data/
50 | /similarity.txt
51 | 
52 | *.mp4
53 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
  1 |                                  Apache License
  2 |                            Version 2.0, January 2004
  3 |                         http://www.apache.org/licenses/
  4 | 
  5 |    TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
  6 | 
  7 |    1. Definitions.
  8 | 
  9 |       "License" shall mean the terms and conditions for use, reproduction,
 10 |       and distribution as defined by Sections 1 through 9 of this document.
 11 | 
 12 |       "Licensor" shall mean the copyright owner or entity authorized by
 13 |       the copyright owner that is granting the License.
 14 | 
 15 |       "Legal Entity" shall mean the union of the acting entity and all
 16 |       other entities that control, are controlled by, or are under common
 17 |       control with that entity. For the purposes of this definition,
 18 |       "control" means (i) the power, direct or indirect, to cause the
 19 |       direction or management of such entity, whether by contract or
 20 |       otherwise, or (ii) ownership of fifty percent (50%) or more of the
 21 |       outstanding shares, or (iii) beneficial ownership of such entity.
 22 | 
 23 |       "You" (or "Your") shall mean an individual or Legal Entity
 24 |       exercising permissions granted by this License.
 25 | 
 26 |       "Source" form shall mean the preferred form for making modifications,
 27 |       including but not limited to software source code, documentation
 28 |       source, and configuration files.
 29 | 
 30 |       "Object" form shall mean any form resulting from mechanical
 31 |       transformation or translation of a Source form, including but
 32 |       not limited to compiled object code, generated documentation,
 33 |       and conversions to other media types.
 34 | 
 35 |       "Work" shall mean the work of authorship, whether in Source or
 36 |       Object form, made available under the License, as indicated by a
 37 |       copyright notice that is included in or attached to the work
 38 |       (an example is provided in the Appendix below).
 39 | 
 40 |       "Derivative Works" shall mean any work, whether in Source or Object
 41 |       form, that is based on (or derived from) the Work and for which the
 42 |       editorial revisions, annotations, elaborations, or other modifications
 43 |       represent, as a whole, an original work of authorship. For the purposes
 44 |       of this License, Derivative Works shall not include works that remain
 45 |       separable from, or merely link (or bind by name) to the interfaces of,
 46 |       the Work and Derivative Works thereof.
 47 | 
 48 |       "Contribution" shall mean any work of authorship, including
 49 |       the original version of the Work and any modifications or additions
 50 |       to that Work or Derivative Works thereof, that is intentionally
 51 |       submitted to Licensor for inclusion in the Work by the copyright owner
 52 |       or by an individual or Legal Entity authorized to submit on behalf of
 53 |       the copyright owner. For the purposes of this definition, "submitted"
 54 |       means any form of electronic, verbal, or written communication sent
 55 |       to the Licensor or its representatives, including but not limited to
 56 |       communication on electronic mailing lists, source code control systems,
 57 |       and issue tracking systems that are managed by, or on behalf of, the
 58 |       Licensor for the purpose of discussing and improving the Work, but
 59 |       excluding communication that is conspicuously marked or otherwise
 60 |       designated in writing by the copyright owner as "Not a Contribution."
 61 | 
 62 |       "Contributor" shall mean Licensor and any individual or Legal Entity
 63 |       on behalf of whom a Contribution has been received by Licensor and
 64 |       subsequently incorporated within the Work.
 65 | 
 66 |    2. Grant of Copyright License. Subject to the terms and conditions of
 67 |       this License, each Contributor hereby grants to You a perpetual,
 68 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 69 |       copyright license to reproduce, prepare Derivative Works of,
 70 |       publicly display, publicly perform, sublicense, and distribute the
 71 |       Work and such Derivative Works in Source or Object form.
 72 | 
 73 |    3. Grant of Patent License. Subject to the terms and conditions of
 74 |       this License, each Contributor hereby grants to You a perpetual,
 75 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 76 |       (except as stated in this section) patent license to make, have made,
 77 |       use, offer to sell, sell, import, and otherwise transfer the Work,
 78 |       where such license applies only to those patent claims licensable
 79 |       by such Contributor that are necessarily infringed by their
 80 |       Contribution(s) alone or by combination of their Contribution(s)
 81 |       with the Work to which such Contribution(s) was submitted. If You
 82 |       institute patent litigation against any entity (including a
 83 |       cross-claim or counterclaim in a lawsuit) alleging that the Work
 84 |       or a Contribution incorporated within the Work constitutes direct
 85 |       or contributory patent infringement, then any patent licenses
 86 |       granted to You under this License for that Work shall terminate
 87 |       as of the date such litigation is filed.
 88 | 
 89 |    4. Redistribution. You may reproduce and distribute copies of the
 90 |       Work or Derivative Works thereof in any medium, with or without
 91 |       modifications, and in Source or Object form, provided that You
 92 |       meet the following conditions:
 93 | 
 94 |       (a) You must give any other recipients of the Work or
 95 |           Derivative Works a copy of this License; and
 96 | 
 97 |       (b) You must cause any modified files to carry prominent notices
 98 |           stating that You changed the files; and
 99 | 
100 |       (c) You must retain, in the Source form of any Derivative Works
101 |           that You distribute, all copyright, patent, trademark, and
102 |           attribution notices from the Source form of the Work,
103 |           excluding those notices that do not pertain to any part of
104 |           the Derivative Works; and
105 | 
106 |       (d) If the Work includes a "NOTICE" text file as part of its
107 |           distribution, then any Derivative Works that You distribute must
108 |           include a readable copy of the attribution notices contained
109 |           within such NOTICE file, excluding those notices that do not
110 |           pertain to any part of the Derivative Works, in at least one
111 |           of the following places: within a NOTICE text file distributed
112 |           as part of the Derivative Works; within the Source form or
113 |           documentation, if provided along with the Derivative Works; or,
114 |           within a display generated by the Derivative Works, if and
115 |           wherever such third-party notices normally appear. The contents
116 |           of the NOTICE file are for informational purposes only and
117 |           do not modify the License. You may add Your own attribution
118 |           notices within Derivative Works that You distribute, alongside
119 |           or as an addendum to the NOTICE text from the Work, provided
120 |           that such additional attribution notices cannot be construed
121 |           as modifying the License.
122 | 
123 |       You may add Your own copyright statement to Your modifications and
124 |       may provide additional or different license terms and conditions
125 |       for use, reproduction, or distribution of Your modifications, or
126 |       for any such Derivative Works as a whole, provided Your use,
127 |       reproduction, and distribution of the Work otherwise complies with
128 |       the conditions stated in this License.
129 | 
130 |    5. Submission of Contributions. Unless You explicitly state otherwise,
131 |       any Contribution intentionally submitted for inclusion in the Work
132 |       by You to the Licensor shall be under the terms and conditions of
133 |       this License, without any additional terms or conditions.
134 |       Notwithstanding the above, nothing herein shall supersede or modify
135 |       the terms of any separate license agreement you may have executed
136 |       with Licensor regarding such Contributions.
137 | 
138 |    6. Trademarks. This License does not grant permission to use the trade
139 |       names, trademarks, service marks, or product names of the Licensor,
140 |       except as required for reasonable and customary use in describing the
141 |       origin of the Work and reproducing the content of the NOTICE file.
142 | 
143 |    7. Disclaimer of Warranty. Unless required by applicable law or
144 |       agreed to in writing, Licensor provides the Work (and each
145 |       Contributor provides its Contributions) on an "AS IS" BASIS,
146 |       WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147 |       implied, including, without limitation, any warranties or conditions
148 |       of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149 |       PARTICULAR PURPOSE. You are solely responsible for determining the
150 |       appropriateness of using or redistributing the Work and assume any
151 |       risks associated with Your exercise of permissions under this License.
152 | 
153 |    8. Limitation of Liability. In no event and under no legal theory,
154 |       whether in tort (including negligence), contract, or otherwise,
155 |       unless required by applicable law (such as deliberate and grossly
156 |       negligent acts) or agreed to in writing, shall any Contributor be
157 |       liable to You for damages, including any direct, indirect, special,
158 |       incidental, or consequential damages of any character arising as a
159 |       result of this License or out of the use or inability to use the
160 |       Work (including but not limited to damages for loss of goodwill,
161 |       work stoppage, computer failure or malfunction, or any and all
162 |       other commercial damages or losses), even if such Contributor
163 |       has been advised of the possibility of such damages.
164 | 
165 |    9. Accepting Warranty or Additional Liability. While redistributing
166 |       the Work or Derivative Works thereof, You may choose to offer,
167 |       and charge a fee for, acceptance of support, warranty, indemnity,
168 |       or other liability obligations and/or rights consistent with this
169 |       License. However, in accepting such obligations, You may act only
170 |       on Your own behalf and on Your sole responsibility, not on behalf
171 |       of any other Contributor, and only if You agree to indemnify,
172 |       defend, and hold each Contributor harmless for any liability
173 |       incurred by, or claims asserted against, such Contributor by reason
174 |       of your accepting any such warranty or additional liability.
175 | 
176 |    END OF TERMS AND CONDITIONS
177 | 
178 |    APPENDIX: How to apply the Apache License to your work.
179 | 
180 |       To apply the Apache License to your work, attach the following
181 |       boilerplate notice, with the fields enclosed by brackets "{}"
182 |       replaced with your own identifying information. (Don't include
183 |       the brackets!)  The text should be enclosed in the appropriate
184 |       comment syntax for the file format. We also recommend that a
185 |       file or class name and description of purpose be included on the
186 |       same "printed page" as the copyright notice for easier
187 |       identification within third-party archives.
188 | 
189 |    Copyright {Michael S. Ryoo} {name of copyright owner}
190 | 
191 |    Licensed under the Apache License, Version 2.0 (the "License");
192 |    you may not use this file except in compliance with the License.
193 |    You may obtain a copy of the License at
194 | 
195 |        http://www.apache.org/licenses/LICENSE-2.0
196 | 
197 |    Unless required by applicable law or agreed to in writing, software
198 |    distributed under the License is distributed on an "AS IS" BASIS,
199 |    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200 |    See the License for the specific language governing permissions and
201 |    limitations under the License.
202 | 
203 | 


--------------------------------------------------------------------------------
/Performance Evaluation.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/USCDataScience/hadoop-pot/c3e7d1dda74ce56c25f574795c09feb9a6429c62/Performance Evaluation.png


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | Hadoop implementation of the Pooled Time Series (PoT) algorithm
  2 | ===============================================================
  3 | PoT java implementation using Apache Hadoop.
  4 | 
  5 | # Dependencies
  6 | * Maven (Version shouldn't matter much. Tested with 2.x and 3.x.)
  7 | * OpenCV 2.4.x (Tested with 2.4.9 and 2.4.11)
  8 | 
  9 | # Pre-requisites
 10 | If you get any errors running brew install opencv related to numpy, please run:
 11 | 
 12 |  1. `pip install numpy`
 13 | 
 14 | Now move on to OpenCV (More detailed instructions in [wiki/Installing-opencv](https://github.com/USCDataScience/hadoop-pot/wiki/Installing-opencv))
 15 |  1. `brew install opencv --with-java`
 16 |  
 17 | The above should leave you with a:
 18 | 
 19 |     /usr/local/Cellar/opencv/<VERSION>/share/OpenCV/java
 20 | 
 21 | Directory which contains the associated dylib OpenCV dynamic library along with the OpenCV jar file.
 22 | 
 23 | # Getting started
 24 |  1. `cd hadoop-pot-assembly`
 25 |  2. `mvn install assembly:assembly`
 26 |  3. Set OPENCV_JAVA_HOME, e.g., to `export OPENCV_JAVA_HOME=/usr/local/Cellar/opencv/2.4.9/share/OpenCV/java`
 27 |  4. Set POOLED_TIME_SERIES_HOME, e.g., to `export POOLED_TIME_SERIES_HOME=$HOME/hadoop-pot/src/main`
 28 |  5. Run `pooled-time-series`, e.g., by creating an alias, `alias pooled-time-series="$POOLED_TIME_SERIES_HOME/bin/pooled-time-series"`
 29 |  
 30 |  The above should produce:
 31 |  
 32 | ```
 33 | usage: pooled_time_series
 34 |  -d,--dir <directory>            A directory with image files in it
 35 |  -f,--file <file>                Path to a single file
 36 |  -h,--help                       Print this message.
 37 |  -j,--json                       Set similarity output format to JSON.
 38 |                                  Defaults to .txt
 39 |  -o,--outputfile <output file>   File containing similarity results.
 40 |                                  Defaults to ./similarity.txt
 41 |  -p,--pathfile <path file>       A file containing full absolute paths to
 42 |                                  videos. Previous default was
 43 |                                  memex-index_temp.txt
 44 | ```
 45 | 
 46 | So, to call the code e.g., on a directory of files called `data`, you would run (e.g., with OpenCV 2.4.9):
 47 | 
 48 | ```
 49 | pooled-times-series -d data
 50 | ```
 51 | 
 52 | Alternatively you can create (independently of this tool) a file with absolute file paths to video files, 1 per line, and then pass it with the `-p` file to the above program.
 53 | 
 54 | ## Running Hadoop Jobs
 55 | ### Config and Getting Started
 56 | Add the following to your .bashrc
 57 | ```
 58 | export HADOOP_OPTS="-Djava.library.path=<path to OpenCV jar> -Dmapred.map.child.java.opts=-Djava.library.path=<path to OpenCV jar>"
 59 | alias pooled-time-series-hadoop="$POOLED_TIME_SERIES_HOME/bin/pooled-time-series-hadoop"
 60 | ```
 61 | 
 62 | Build and clean up the jar for running
 63 | ```
 64 | # Compile everything
 65 | mvn install assembly:assembly
 66 | 
 67 | # Drop the LICENSE file from our jar that will give us headaches otherwise
 68 | zip -d target/pooled-time-series-1.0-SNAPSHOT-jar-with-dependencies.jar META-INF/LICENSE
 69 | 
 70 | ```
 71 | 
 72 | # Documentation moving to the wiki 
 73 | 
 74 | We are moving our documentation to the wiki. Please bear with us and report issues as you find them.
 75 | 
 76 | * [Getting up and running with Hadoop - Individual MR commands](https://github.com/USCDataScience/hadoop-pot/wiki/Individual-MR-job-commands)
 77 | 
 78 | # Research Background and Detail
 79 | This is a source code used in the following conference paper [1].
 80 | It includes the pooled time series (PoT) representation framework as well as basic per-frame descriptor extractions including histogram of optical flows (HOF) and histogram of oriented gradients (HOG).
 81 | For more detailed information on the approach, please check the paper.
 82 | 
 83 | If you take advantage of this code for any academic purpose, please do cite:
 84 | 
 85 | ```
 86 | [1] Mattmann, Chris A., and Madhav Sharan. "Scalable Hadoop-Based Pooled Time Series of Big Video Data from the Deep Web." Proceedings of the 2017 ACM on International Conference on Multimedia Retrieval. ACM, 2017.
 87 | [2] M. S. Ryoo, B. Rothrock, and L. Matthies, "Pooled Motion Features for First-Person Videos", IEEE Conference on Computer Vision and Pattern Recognition (CVPR), June 2015.
 88 | 
 89 | https://arxiv.org/abs/1610.06669
 90 | http://arxiv.org/pdf/1412.6505v2.pdf
 91 | 
 92 | @inproceedings{mattmann2017scalable,
 93 |   title={Scalable Hadoop-Based Pooled Time Series of Big Video Data from the Deep Web},
 94 |   author={Mattmann, Chris A and Sharan, Madhav},
 95 |   booktitle={Proceedings of the 2017 ACM on International Conference on Multimedia Retrieval},
 96 |   pages={117--120},
 97 |   year={2017},
 98 |   organization={ACM}
 99 | }
100 | 
101 | @inproceedings{ryoo2015pot,
102 |  title={Pooled Motion Features for First-Person Videos},
103 |  author={M. S. Ryoo and B. Rothrock and L. Matthies},
104 |  booktitle={IEEE Conference on Computer Vision and Pattern Recognition (CVPR)},
105 |  year={2015},
106 |  month={June},
107 |  address={Boston, MA},
108 | }
109 | ```
110 | 
111 | # Evaluation
112 | ![](https://raw.githubusercontent.com/USCDataScience/hadoop-pot/master/Similarity%20Evaluation.png)
113 | 
114 | HMDB Dataset - http://serre-lab.clps.brown.edu/resource/hmdb-a-large-human-motion-database/
115 | 
116 | 


--------------------------------------------------------------------------------
/Similarity Evaluation.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/USCDataScience/hadoop-pot/c3e7d1dda74ce56c25f574795c09feb9a6429c62/Similarity Evaluation.png


--------------------------------------------------------------------------------
/hadoop-pot-assembly/pom.xml:
--------------------------------------------------------------------------------
 1 | <project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
 2 |   xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
 3 |   <modelVersion>4.0.0</modelVersion>
 4 |   <parent>
 5 |     <groupId>gov.nasa.jpl.memex</groupId>
 6 |     <artifactId>hadoop-pot</artifactId>
 7 |     <version>1.0-SNAPSHOT</version>
 8 |   </parent>
 9 |   <artifactId>pooled-time-series</artifactId>
10 |   <name>hadoop-pot-assembly</name>
11 |   <build>
12 |     <plugins>
13 |       <plugin>
14 |         <groupId>org.apache.maven.plugins</groupId>
15 |         <artifactId>maven-compiler-plugin</artifactId>
16 |         <version>3.3</version>
17 |         <configuration>
18 |           <source>1.7</source>
19 |           <target>1.7</target>
20 |         </configuration>
21 |       </plugin>
22 |       <plugin>
23 |         <!-- NOTE: We don't need a groupId specification because the group is
24 |              org.apache.maven.plugins ...which is assumed by default.
25 |          -->
26 |         <artifactId>maven-assembly-plugin</artifactId>
27 |         <version>2.5.4</version>
28 |         <configuration>
29 |           <descriptorRefs>
30 |             <descriptorRef>jar-with-dependencies</descriptorRef>
31 |           </descriptorRefs>
32 |           <archive>
33 |             <manifest>
34 |             </manifest>
35 |           </archive>
36 |         </configuration>
37 |       </plugin>
38 |     </plugins>
39 |   </build>
40 |   <dependencies>
41 |     <dependency>
42 |       <groupId>${project.groupId}</groupId>
43 |       <artifactId>hadoop-pot-core</artifactId>
44 |       <version>${project.version}</version>
45 |     </dependency>
46 |   </dependencies>
47 | </project>


--------------------------------------------------------------------------------
/hadoop-pot-assembly/src/main/assembly/assembly.xml:
--------------------------------------------------------------------------------
 1 | <assembly xmlns="http://maven.apache.org/plugins/maven-assembly-plugin/assembly/1.1.2" 
 2 |   xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
 3 |   xsi:schemaLocation="http://maven.apache.org/plugins/maven-assembly-plugin/assembly/1.1.2 http://maven.apache.org/xsd/assembly-1.1.2.xsd">
 4 |   <!-- TODO: a jarjar format would be better -->
 5 |   <id>jar-with-dependencies</id>
 6 |   <formats>
 7 |     <format>jar</format>
 8 |   </formats>
 9 |   <includeBaseDirectory>false</includeBaseDirectory>
10 |   <dependencySets>
11 |     <dependencySet>
12 |       <outputDirectory>/</outputDirectory>
13 |       <useProjectArtifact>true</useProjectArtifact>
14 |       <unpack>true</unpack>
15 |       <scope>runtime</scope>
16 |     </dependencySet>
17 |   </dependencySets>
18 | </assembly>
19 | 


--------------------------------------------------------------------------------
/hadoop-pot-core/pom.xml:
--------------------------------------------------------------------------------
 1 | <project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
 2 |   <modelVersion>4.0.0</modelVersion>
 3 |   <parent>
 4 |     <groupId>gov.nasa.jpl.memex</groupId>
 5 |     <artifactId>hadoop-pot</artifactId>
 6 |     <version>1.0-SNAPSHOT</version>
 7 |   </parent>
 8 |   <artifactId>hadoop-pot-core</artifactId>
 9 |   <dependencies>
10 |     <dependency>
11 |       <groupId>${project.groupId}</groupId>
12 |       <artifactId>hadoop-pot-video</artifactId>
13 |       <version>${project.version}</version>
14 |     </dependency>
15 |     <dependency>
16 |         <groupId>org.apache.hadoop</groupId>
17 |         <artifactId>hadoop-common</artifactId>
18 |         <version>2.7.2</version>
19 |     </dependency>
20 |     <dependency>
21 |         <groupId>org.apache.hadoop</groupId>
22 |         <artifactId>hadoop-mapreduce-client-core</artifactId>
23 |         <version>2.7.2</version>
24 |     </dependency>
25 |     <dependency>
26 |         <groupId>org.apache.hadoop</groupId>
27 |         <artifactId>hadoop-mapreduce-client-jobclient</artifactId>
28 |         <version>2.7.2</version>
29 |     </dependency>	  
30 |   </dependencies>
31 | </project>


--------------------------------------------------------------------------------
/hadoop-pot-core/src/main/java/org/pooledtimeseries/Deduplicate.java:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * Licensed to the Apache Software Foundation (ASF) under one or more
  3 |  * contributor license agreements.  See the NOTICE file distributed with
  4 |  * this work for additional information regarding copyright ownership.
  5 |  * The ASF licenses this file to You under the Apache License, Version 2.0
  6 |  * (the "License"); you may not use this file except in compliance with
  7 |  * the License.  You may obtain a copy of the License at
  8 |  *
  9 |  *     http://www.apache.org/licenses/LICENSE-2.0
 10 |  *
 11 |  * Unless required by applicable law or agreed to in writing, software
 12 |  * distributed under the License is distributed on an "AS IS" BASIS,
 13 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 14 |  * See the License for the specific language governing permissions and
 15 |  * limitations under the License.
 16 |  */
 17 | package org.pooledtimeseries;
 18 | 
 19 | import java.io.BufferedReader;
 20 | import java.io.File;
 21 | import java.io.FileReader;
 22 | import java.io.FileWriter;
 23 | import java.io.IOException;
 24 | import java.io.PrintWriter;
 25 | import java.util.Arrays;
 26 | import java.util.HashSet;
 27 | import java.util.Set;
 28 | 
 29 | public class Deduplicate {
 30 | 	private static final double DEFAULT_THRESHOLD = 0.99d;
 31 | 
 32 | 	public static void main(String[] args) throws IOException {
 33 | 		if (args.length < 3) {
 34 | 			System.err.println("Improper usage. Execute with below 2 arguments- ");
 35 | 			System.err.println("args[0] - path to CSV file to write the deduped similarity calc csv ");
 36 | 			System.err.println("args[1] - path to List of video names ");
 37 | 			System.err.println("args[2] - Video pairs with similarity score- 'vid1,vid2\t0.5' ");
 38 | 			System.err.println("args[3] - similarity threshold. Default 0.99 ");
 39 | 			throw new RuntimeException("Insufficient Input");
 40 | 		}
 41 | 		File outFile = new File(args[0]);// CSV file to write the output deduped_similarity_calc.csv
 42 | 		File outFileNames = new File(args[1]);// List of video names
 43 | 
 44 | 		if (outFile.exists() || outFileNames.exists()) {
 45 | 			throw new RuntimeException(String.format("Some output file already esists - %s , %s ", outFile.getAbsolutePath()
 46 | 					, outFileNames.getAbsolutePath()) );
 47 | 		}
 48 | 
 49 | 		File simFile = new File(args[2]);// Video pairs with similarity score
 50 | 		double threshold = args.length == 4 ? Double.parseDouble(args[3]) : DEFAULT_THRESHOLD;
 51 | 		// All videos to discard
 52 | 		Set<String> videosToDelete = new HashSet<>();
 53 | 		// One video from each similar set will be kept
 54 | 		Set<String> videosToKeep = new HashSet<>();
 55 | 
 56 | 		BufferedReader br = new BufferedReader(new FileReader(simFile));
 57 | 		String simLine = null;
 58 | 		while ((simLine = br.readLine()) != null) {
 59 | 			storeVideosToDelete(videosToDelete, videosToKeep, simLine, threshold);
 60 | 
 61 | 		}
 62 | 		br.close();
 63 | 
 64 | 		// Write output in outFile
 65 | 		PrintWriter similarity = new PrintWriter(new FileWriter(outFile, true));
 66 | 		//reset videosToKeep for outputting video names
 67 | 		videosToKeep = new HashSet<>();
 68 | 		
 69 | 		br = new BufferedReader(new FileReader(simFile));
 70 | 		simLine = null;
 71 | 		while ((simLine = br.readLine()) != null) {
 72 | 			String[] pairAndScore = simLine.split("\t");
 73 | 			String[] pair = pairAndScore[0].split(",");
 74 | 			boolean vid1InDelete = videosToDelete.contains(pair[0]);
 75 | 			boolean vid2InDelete = videosToDelete.contains(pair[1]);
 76 | 			if (vid1InDelete || vid2InDelete) {
 77 | 				continue;
 78 | 			}else{
 79 | 				videosToKeep.addAll(Arrays.asList(pair));
 80 | 				similarity.println(simLine);
 81 | 			}
 82 | 
 83 | 		}
 84 | 		br.close();
 85 | 		similarity.close();
 86 | 		
 87 | 		PrintWriter listOfFile = new PrintWriter(new FileWriter(outFileNames, true));
 88 | 		for (String videos : videosToKeep){
 89 | 			listOfFile.println(videos);
 90 | 		}
 91 | 		listOfFile.close();
 92 | 		
 93 | 		System.out.println("Stored results in: " + outFile.getAbsolutePath());
 94 | 
 95 | 	}
 96 | 
 97 | 	private static void storeVideosToDelete(Set<String> videosToDelete, Set<String> videosToKeep, String simLine, double threshold) {
 98 | 		String[] pairAndScore = simLine.split("\t");
 99 | 		double score = Double.parseDouble(pairAndScore[1]);
100 | 		if (score >= threshold) {
101 | 			String[] pair = pairAndScore[0].split(",");
102 | 			String vid1 = pair[0];
103 | 			String vid2 = pair[1];
104 | 			if(vid1.equals(vid2)){
105 | 				return;
106 | 			}
107 | 			boolean vid1InKeep = videosToKeep.contains(pair[0]);
108 | 			boolean vid2InKeep = videosToKeep.contains(pair[1]);
109 | 			boolean vid1InDelete = videosToDelete.contains(pair[0]);
110 | 			boolean vid2InDelete = videosToDelete.contains(pair[1]);
111 | 
112 | 			if (vid1InDelete || vid2InDelete) {
113 | 				return;
114 | 			}
115 | 			// None of the video is kept
116 | 			if (!vid1InKeep && !vid2InKeep) {
117 | 				videosToKeep.add(vid1);
118 | 				videosToDelete.add(vid2);
119 | 			} else if (vid1InKeep && vid2InKeep) {// Both of the video are kept
120 | 				videosToDelete.add(vid1);// delete any one of them
121 | 				videosToKeep.remove(vid1);
122 | 			} else if (vid1InKeep) { // Only vid1 is in keep
123 | 				videosToDelete.add(vid2);
124 | 			} else if (vid2InKeep) { // Only vid2 is in keep
125 | 				videosToDelete.add(vid1);
126 | 			}
127 | 
128 | 		}
129 | 	}
130 | }
131 | 


--------------------------------------------------------------------------------
/hadoop-pot-core/src/main/java/org/pooledtimeseries/FormatOutput.java:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * Licensed to the Apache Software Foundation (ASF) under one or more
  3 |  * contributor license agreements.  See the NOTICE file distributed with
  4 |  * this work for additional information regarding copyright ownership.
  5 |  * The ASF licenses this file to You under the Apache License, Version 2.0
  6 |  * (the "License"); you may not use this file except in compliance with
  7 |  * the License.  You may obtain a copy of the License at
  8 |  *
  9 |  *     http://www.apache.org/licenses/LICENSE-2.0
 10 |  *
 11 |  * Unless required by applicable law or agreed to in writing, software
 12 |  * distributed under the License is distributed on an "AS IS" BASIS,
 13 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 14 |  * See the License for the specific language governing permissions and
 15 |  * limitations under the License.
 16 |  */
 17 | 
 18 | package org.pooledtimeseries;
 19 | 
 20 | import java.io.BufferedReader;
 21 | import java.io.File;
 22 | import java.io.FileReader;
 23 | import java.io.FileWriter;
 24 | import java.io.IOException;
 25 | import java.io.PrintWriter;
 26 | import java.nio.file.Files;
 27 | import java.nio.file.Path;
 28 | import java.nio.file.Paths;
 29 | import java.text.DecimalFormat;
 30 | import java.util.List;
 31 | 
 32 | import com.google.common.base.Charsets;
 33 | 
 34 | public class FormatOutput {
 35 | 	
 36 | 	/**
 37 | 	 * Sample output-
 38 | 	 * ,1.mp4, 2.mp4, 3.mp4,<br/>
 39 | 	 * 1.mp4, 1.0, 0.677986882429, 0.514423983869,<br/>
 40 | 	 * 2.mp4, , 1.0, 0.12525353988,<br/>
 41 | 	 * 3.mp4, , , 1.0,<br/>
 42 | 	 * 
 43 | 	 * @param args <br/>
 44 | 	 * args[0] - path to CSV file to write the formatted similarity calc csv<br/>
 45 | 	 * args[1] - Video pairs with similarity score<br/>
 46 | 	 * args[2] - List of all videos<br/>
 47 | 	 * 
 48 | 	 * @throws IOException
 49 | 	 */
 50 | 	public static void main(String[] args) throws IOException {
 51 | 		if (args.length < 3) {
 52 | 			System.err.println("Improper usage. Execute with below 3 arguments- ");
 53 | 			System.err.println("args[0] - path to CSV file to write the formatted similarity calc csv ");
 54 | 			System.err.println("args[1] - Video pairs with similarity score- 'vid1,vid2\t0.5' ");
 55 | 			System.err.println("args[2] - List of all videos ");
 56 | 			throw new RuntimeException("Insufficient Input");
 57 | 		}
 58 | 		File outFile = new File(args[0]);//CSV file to write the output formatted_similarity_calc.csv
 59 | 		if (outFile.exists()) {
 60 | 			throw new RuntimeException("Output file already exists-" + outFile.getAbsolutePath());
 61 | 		}
 62 | 		
 63 | 		File simFile = new File(args[1]);//Video pairs with similarity score
 64 | 		
 65 | 		Path inputList = Paths.get(args[2]);// List of all videos
 66 | 		
 67 | 		List<String> videoList = Files.readAllLines(inputList, Charsets.UTF_8);
 68 | 		//adding a blank at first position to match output
 69 | 		videoList.add(0,"");
 70 | 		//Result is a 2D square matrix of size video count + 1
 71 | 		//additional 1 is for storing video file name
 72 | 		String[][] resultMatrix = new String [videoList.size()][videoList.size()];
 73 | 		System.out.println("Initialised input files and resultMatrix");
 74 | 		
 75 | 		//init first row with just video name
 76 | 		resultMatrix[0]=videoList.toArray(new String[videoList.size()]);
 77 | 		//init first col with just video name
 78 | 		for (int i=1;i<videoList.size();i++){
 79 | 			resultMatrix[i][0]=videoList.get(i);
 80 | 		}
 81 | 		
 82 | 		//Fill all scores from simFile to resultMatrix
 83 | 		BufferedReader br = new BufferedReader(new FileReader(simFile));
 84 | 		String simLine = null;
 85 | 		while ((simLine = br.readLine()) != null) {
 86 | 			fillSimLineInResult(simLine,resultMatrix,videoList);
 87 | 		}
 88 | 		br.close();
 89 | 		
 90 | 		//Write output in outFile
 91 | 		PrintWriter similarity = new PrintWriter(new FileWriter(outFile,true));
 92 | 
 93 | 		for(String[] resultRow: resultMatrix){
 94 | 			StringBuffer sb = new StringBuffer("");
 95 | 			for(String resultCell: resultRow){
 96 | 				//if resultCell == null print empty string else value
 97 | 				sb.append((resultCell == null?"":resultCell) +",");
 98 | 			}
 99 | 			similarity.print(sb.substring(0, sb.length()-1));
100 | 			similarity.println();
101 | 			
102 | 		}
103 | 		similarity.close();
104 | 		
105 | 		System.out.println("Stored results in: " + outFile.getAbsolutePath());
106 | 	}
107 | 
108 | 	/**
109 | 	 * Store similarity score for a pair at correct place in resultMatrix 
110 | 	 * @param simLine
111 | 	 * @param resultMatrix
112 | 	 * @param videoList
113 | 	 */
114 | 	private static void fillSimLineInResult(String simLine, String[][] resultMatrix, List<String> videoList) {
115 | 
116 | 		DecimalFormat df = new DecimalFormat("0.00");
117 | 
118 | 		String score = "";
119 | 		
120 | 		int indexOfvid1 = 0;
121 | 		int indexOfvid2 = 0;
122 | 
123 | 		{
124 | 			// scoped under a brace to limit scope of temp variables
125 | 			String[] pairAndScore = simLine.split("\t");
126 | 			
127 | 			score = df.format(Double.parseDouble(pairAndScore[1]) );
128 | 			String[] pair = pairAndScore[0].split(",");
129 | 			indexOfvid1 = videoList.indexOf(pair[0]);
130 | 			indexOfvid2 = videoList.indexOf(pair[1]);
131 | 		}
132 | 		
133 | 		//if this video is not present in input list of video skip it from matrix
134 | 		//This is used when we create output for a subset of videos
135 | 		if(indexOfvid2 == -1 || indexOfvid1 == -1) 
136 | 			return;
137 | 		
138 | 		//Fill only upper matrix
139 | 		if (indexOfvid1 < indexOfvid2) {
140 | 			resultMatrix[indexOfvid1][indexOfvid2]=score;
141 | 		} else {
142 | 			//equal score will be one anyway
143 | 			resultMatrix[indexOfvid2][indexOfvid1]=score;
144 | 		}
145 | 
146 | 	}
147 | 	
148 | }
149 | 


--------------------------------------------------------------------------------
/hadoop-pot-core/src/main/java/org/pooledtimeseries/GradientTimeSeries.java:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * Licensed to the Apache Software Foundation (ASF) under one or more
  3 |  * contributor license agreements.  See the NOTICE file distributed with
  4 |  * this work for additional information regarding copyright ownership.
  5 |  * The ASF licenses this file to You under the Apache License, Version 2.0
  6 |  * (the "License"); you may not use this file except in compliance with
  7 |  * the License.  You may obtain a copy of the License at
  8 |  *
  9 |  *     http://www.apache.org/licenses/LICENSE-2.0
 10 |  *
 11 |  * Unless required by applicable law or agreed to in writing, software
 12 |  * distributed under the License is distributed on an "AS IS" BASIS,
 13 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 14 |  * See the License for the specific language governing permissions and
 15 |  * limitations under the License.
 16 |  */
 17 | 
 18 | package org.pooledtimeseries;
 19 | 
 20 | import java.io.File;
 21 | import java.io.IOException;
 22 | import java.io.StringWriter;
 23 | import java.util.ArrayList;
 24 | import java.util.logging.Level;
 25 | import java.util.logging.Logger;
 26 | 
 27 | import org.apache.hadoop.conf.Configuration;
 28 | import org.apache.hadoop.fs.Path;
 29 | import org.apache.hadoop.io.LongWritable;
 30 | import org.apache.hadoop.io.Text;
 31 | import org.apache.hadoop.mapred.FileInputFormat;
 32 | import org.apache.hadoop.mapred.FileOutputFormat;
 33 | import org.apache.hadoop.mapred.JobClient;
 34 | import org.apache.hadoop.mapred.JobConf;
 35 | import org.apache.hadoop.mapred.MapReduceBase;
 36 | import org.apache.hadoop.mapred.Mapper;
 37 | import org.apache.hadoop.mapred.OutputCollector;
 38 | import org.apache.hadoop.mapred.Reporter;
 39 | import org.apache.hadoop.mapred.TextInputFormat;
 40 | import org.apache.hadoop.mapred.lib.MultipleTextOutputFormat;
 41 | import org.opencv.core.Core;
 42 | import org.pooledtimeseries.util.HadoopFileUtil;
 43 | import org.pooledtimeseries.util.PoTUtil;
 44 | 
 45 | public class GradientTimeSeries {
 46 | 	private static final Logger LOG = Logger.getLogger(GradientTimeSeries.class.getName());
 47 | 
 48 |     public static class Map extends MapReduceBase implements Mapper<LongWritable, Text, Text, Text> {
 49 |     	@Override
 50 |     	public void configure(JobConf job) {
 51 |     		super.configure(job);
 52 |     		PoTUtil.loadOpenCV();
 53 |     	}
 54 |     	
 55 |         public void map(LongWritable key, Text value, OutputCollector<Text, Text> output, Reporter reporter) throws IOException {
 56 |         	
 57 |             try {
 58 |             	File tempFile = new HadoopFileUtil().copyToTempDir(value.toString());
 59 |                 double[][] series1 = PoT.getGradientTimeSeries(tempFile.toPath(), 5, 5, 8);
 60 |                 tempFile.delete();
 61 |                 
 62 |                 String ofVector = saveVectors(series1);
 63 |                 output.collect(value, new Text(ofVector));
 64 |             } catch (Exception e) {
 65 |             	LOG.log(Level.SEVERE, "Exception while calling PoT.getGradientTimeSeries", e);
 66 |             }
 67 |         }
 68 | 
 69 |         private static String saveVectors(double[][] vectors) {
 70 |             int d = vectors[0].length;
 71 | 
 72 |             ArrayList<double[][][]> temp_hists = new ArrayList<double[][][]>();
 73 | 
 74 |             for (int i = 0; i < vectors.length; i++) {
 75 |               double[][][] temp_hist = new double[1][1][d];
 76 |               temp_hist[0][0] = vectors[i];
 77 | 
 78 |               temp_hists.add(temp_hist);
 79 |             }
 80 | 
 81 |             return getSaveHistogramsOutput(temp_hists);
 82 |         }
 83 | 
 84 |         private static String getSaveHistogramsOutput(ArrayList<double[][][]> hists) {
 85 |             int w_d = hists.get(0).length;
 86 |             int h_d = hists.get(0)[0].length;
 87 |             int o_d = hists.get(0)[0][0].length;
 88 | 
 89 |             int i, j, k, l;
 90 | 
 91 |             StringWriter writer = new StringWriter();
 92 |             String head = String.format("%d %d", hists.size(), w_d * h_d * o_d);
 93 |             writer.write(head);
 94 |             writer.write("\n");
 95 | 
 96 |             for (l = 0; l < (int) hists.size(); l++) {
 97 |                 double[][][] hist = hists.get(l);
 98 | 
 99 |                 for (i = 0; i < hist.length; i++) {
100 |                     for (j = 0; j < hist[0].length; j++) {
101 |                         for (k = 0; k < hist[0][0].length; k++) { // optical_bins+1
102 |                             writer.write(String.format("%f ", hist[i][j][k]));
103 |                         }
104 |                     }
105 |                 }
106 | 
107 |                 writer.write("\n");
108 |             }
109 | 
110 |             return writer.toString();
111 |         }
112 |     }
113 | 
114 |     public static class MultiFileOutput extends MultipleTextOutputFormat<Text, Text> {
115 |         protected String generateFileNameForKeyValue(Text key, Text value, String name) {
116 |             String[] splitPath = key.toString().split("/");
117 |             String fileName = splitPath[splitPath.length - 1];
118 |             String fName =fileName + ".hog.txt";
119 |             File file = new File(fName);
120 |             if(file.exists())
121 |             	file.delete();
122 |             return fName;
123 |         }
124 | 
125 |         protected Text generateActualKey(Text key, Text value) {
126 |             return null;
127 |         }
128 |     }
129 | 
130 |     public static void main(String[] args) throws Exception {
131 |         System.loadLibrary(Core.NATIVE_LIBRARY_NAME);
132 | 
133 |         Configuration baseConf = new Configuration();
134 |         baseConf.set("mapred.reduce.tasks", "0");
135 |         JobConf conf = new JobConf(baseConf, GradientTimeSeries.class);
136 | 
137 |         conf.setJobName("gradient_time_series");
138 | 
139 |         conf.setOutputKeyClass(Text.class);
140 |         conf.setOutputValueClass(Text.class);
141 | 
142 |         conf.setMapperClass(Map.class);
143 | 
144 |         conf.setInputFormat(TextInputFormat.class);
145 |         conf.setOutputFormat(MultiFileOutput.class);
146 | 
147 |         FileInputFormat.setInputPaths(conf, new Path(args[0]));
148 |         FileOutputFormat.setOutputPath(conf, new Path(args[1]));
149 | 
150 |         JobClient.runJob(conf);
151 |     }
152 | }
153 | 
154 | 


--------------------------------------------------------------------------------
/hadoop-pot-core/src/main/java/org/pooledtimeseries/MeanChiSquareDistanceCalculation.java:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * Licensed to the Apache Software Foundation (ASF) under one or more
  3 |  * contributor license agreements.  See the NOTICE file distributed with
  4 |  * this work for additional information regarding copyright ownership.
  5 |  * The ASF licenses this file to You under the Apache License, Version 2.0
  6 |  * (the "License"); you may not use this file except in compliance with
  7 |  * the License.  You may obtain a copy of the License at
  8 |  *
  9 |  *     http://www.apache.org/licenses/LICENSE-2.0
 10 |  *
 11 |  * Unless required by applicable law or agreed to in writing, software
 12 |  * distributed under the License is distributed on an "AS IS" BASIS,
 13 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 14 |  * See the License for the specific language governing permissions and
 15 |  * limitations under the License.
 16 |  */
 17 | 
 18 | package org.pooledtimeseries;
 19 | 
 20 | import java.io.IOException;
 21 | import java.util.Iterator;
 22 | import java.util.List;
 23 | import java.util.logging.Logger;
 24 | 
 25 | import org.apache.hadoop.conf.Configuration;
 26 | import org.apache.hadoop.fs.Path;
 27 | import org.apache.hadoop.io.DoubleWritable;
 28 | import org.apache.hadoop.io.IntWritable;
 29 | import org.apache.hadoop.io.NullWritable;
 30 | import org.apache.hadoop.io.BytesWritable;
 31 | import org.apache.hadoop.io.Text;
 32 | import org.apache.hadoop.mapred.FileOutputFormat;
 33 | import org.apache.hadoop.mapred.JobClient;
 34 | import org.apache.hadoop.mapred.JobConf;
 35 | import org.apache.hadoop.mapred.MapReduceBase;
 36 | import org.apache.hadoop.mapred.Mapper;
 37 | import org.apache.hadoop.mapred.OutputCollector;
 38 | import org.apache.hadoop.mapred.Reducer;
 39 | import org.apache.hadoop.mapred.Reporter;
 40 | import org.apache.hadoop.mapred.SequenceFileInputFormat;
 41 | import org.apache.hadoop.mapred.TextOutputFormat;
 42 | import org.pooledtimeseries.cartesian.CartesianInputFormat;
 43 | import org.pooledtimeseries.util.PoTSerialiser;
 44 | import org.pooledtimeseries.util.ReadSeqFileUtil;
 45 | 
 46 | public class MeanChiSquareDistanceCalculation {
 47 | 	private static final Logger LOG = Logger.getLogger(MeanChiSquareDistanceCalculation.class.getName());
 48 | 	static int videos=0;
 49 |     public static class Map extends MapReduceBase implements Mapper<Text, BytesWritable, IntWritable, DoubleWritable> {
 50 |     	
 51 |     	@Override
 52 |     	public void map(Text key, BytesWritable value, OutputCollector<IntWritable, DoubleWritable> output, Reporter reporter) throws IOException {
 53 |         	videos++;    
 54 |         	System.out.println(videos);
 55 |         	LOG.info("Processing pair - " + key);
 56 |         	long startTime = System.currentTimeMillis();
 57 |         	
 58 |         	String[] videoFiles = ReadSeqFileUtil.getFileNames(key);
 59 |             
 60 |         	// If we're looking at a pair of videos where the videos are the same
 61 |             // we don't include them in the meanChiSquareDistance calculation.
 62 |             if (videoFiles[0].equals(videoFiles[1]))
 63 |                 return;
 64 |             
 65 |             List<FeatureVector> fvList = (List<FeatureVector>) PoTSerialiser.getObject(value.getBytes()) ;
 66 |             
 67 |             LOG.info("Loaded Time Series for pair in - " + (System.currentTimeMillis() - startTime));
 68 | 
 69 |             for (int i = 0; i < fvList.get(0).numDim(); i++) {
 70 |             	
 71 |             	output.collect(new IntWritable(i), new DoubleWritable(
 72 |                     PoT.chiSquareDistance(
 73 |                         fvList.get(0).feature.get(i),
 74 |                         fvList.get(1).feature.get(i)
 75 |                     )
 76 |                 ));
 77 |             }
 78 |             
 79 |             LOG.info("Completed processing pair - " + key);
 80 |             LOG.info("Time taken to complete job - " + (System.currentTimeMillis() - startTime));
 81 |         }
 82 |     }
 83 |     
 84 |     public static class Reduce extends MapReduceBase implements Reducer<IntWritable, DoubleWritable, NullWritable, DoubleWritable>{
 85 |     
 86 |     	public void reduce(IntWritable key, Iterator<DoubleWritable> values,
 87 | 				OutputCollector<NullWritable, DoubleWritable> output, Reporter reporter) throws IOException {
 88 |             double sum = 0;
 89 |             int count = 0;
 90 |             
 91 |             while (values.hasNext()){
 92 |                 sum += values.next().get();
 93 |                 count++;
 94 |             }
 95 | 
 96 |             output.collect(null, new DoubleWritable(sum / (double) count));
 97 |         }
 98 | 
 99 |     }
100 | 
101 |     public static void main(String[] args) throws Exception {
102 | 
103 |         Configuration baseConf = new Configuration();
104 |         baseConf.set("mapreduce.job.maps", "96");
105 |         baseConf.set("mapred.tasktracker.map.tasks.maximum", "96");
106 |         
107 |         JobConf conf = new JobConf(baseConf, MeanChiSquareDistanceCalculation.class);
108 |         System.out.println("Before Map:"+ conf.getNumMapTasks());
109 |         conf.setNumMapTasks(96);
110 |         System.out.println("After Map:"+ conf.getNumMapTasks());
111 | 
112 | 	
113 |         conf.setJobName("mean_chi_square_calculation");
114 | 	
115 |         System.out.println("Track:" + baseConf.get("mapred.job.tracker"));
116 |         System.out.println("Job Name- "+conf.getJobName());
117 |         System.out.println(baseConf.get("mapreduce.job.maps"));
118 | 
119 |         conf.setMapOutputKeyClass(IntWritable.class);
120 |         conf.setMapOutputValueClass(DoubleWritable.class);
121 |         conf.setOutputKeyClass(IntWritable.class);
122 |         conf.setOutputValueClass(DoubleWritable.class);
123 | 
124 |         conf.setOutputFormat(TextOutputFormat.class);
125 |         
126 |         conf.setInputFormat(CartesianInputFormat.class);
127 | 		CartesianInputFormat.setLeftInputInfo(conf, SequenceFileInputFormat.class,
128 | 				args[0]);
129 | 		CartesianInputFormat.setRightInputInfo(conf, SequenceFileInputFormat.class,
130 | 				args[0]);
131 | 
132 | 
133 |         FileOutputFormat.setOutputPath(conf, new Path(args[1]));
134 | 
135 |         conf.setMapperClass(Map.class);
136 |         conf.setReducerClass(Reduce.class);
137 | 
138 |         JobClient.runJob(conf);
139 |     }
140 | }
141 | 
142 | 
143 | 


--------------------------------------------------------------------------------
/hadoop-pot-core/src/main/java/org/pooledtimeseries/OpticalTimeSeries.java:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * Licensed to the Apache Software Foundation (ASF) under one or more
  3 |  * contributor license agreements.  See the NOTICE file distributed with
  4 |  * this work for additional information regarding copyright ownership.
  5 |  * The ASF licenses this file to You under the Apache License, Version 2.0
  6 |  * (the "License"); you may not use this file except in compliance with
  7 |  * the License.  You may obtain a copy of the License at
  8 |  *
  9 |  *     http://www.apache.org/licenses/LICENSE-2.0
 10 |  *
 11 |  * Unless required by applicable law or agreed to in writing, software
 12 |  * distributed under the License is distributed on an "AS IS" BASIS,
 13 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 14 |  * See the License for the specific language governing permissions and
 15 |  * limitations under the License.
 16 |  */
 17 | 
 18 | package org.pooledtimeseries;
 19 | 
 20 | 
 21 | 
 22 | import java.io.File;
 23 | import java.io.IOException;
 24 | import java.io.StringWriter;
 25 | import java.util.ArrayList;
 26 | import java.util.logging.Level;
 27 | import java.util.logging.Logger;
 28 | 
 29 | import org.apache.hadoop.conf.Configuration;
 30 | import org.apache.hadoop.fs.Path;
 31 | import org.apache.hadoop.io.LongWritable;
 32 | import org.apache.hadoop.io.Text;
 33 | import org.apache.hadoop.mapred.FileInputFormat;
 34 | import org.apache.hadoop.mapred.FileOutputFormat;
 35 | import org.apache.hadoop.mapred.JobClient;
 36 | import org.apache.hadoop.mapred.JobConf;
 37 | import org.apache.hadoop.mapred.MapReduceBase;
 38 | import org.apache.hadoop.mapred.Mapper;
 39 | import org.apache.hadoop.mapred.OutputCollector;
 40 | import org.apache.hadoop.mapred.Reporter;
 41 | import org.apache.hadoop.mapred.TextInputFormat;
 42 | import org.apache.hadoop.mapred.lib.MultipleTextOutputFormat;
 43 | import org.opencv.core.Core;
 44 | import org.pooledtimeseries.util.HadoopFileUtil;
 45 | import org.pooledtimeseries.util.PoTUtil;
 46 | 
 47 | public class OpticalTimeSeries {
 48 | 	private static final Logger LOG = Logger.getLogger(OpticalTimeSeries.class.getName());
 49 | 
 50 |     public static class Map extends MapReduceBase implements Mapper<LongWritable, Text, Text, Text> {
 51 |     	@Override
 52 |     	public void configure(JobConf job) {
 53 |     		super.configure(job);
 54 |     		PoTUtil.loadOpenCV();
 55 |     	}
 56 |     	
 57 |         public void map(LongWritable key, Text value, OutputCollector<Text, Text> output, Reporter reporter) throws IOException {
 58 |         	
 59 |             try {
 60 |             	File tempFile = new HadoopFileUtil().copyToTempDir(value.toString());
 61 |                 double[][] series1 = PoT.getOpticalTimeSeries(tempFile.toPath(), 5, 5, 8);
 62 |                 tempFile.delete();
 63 |                 
 64 |                 String ofVector = saveVectors(series1);
 65 |                 output.collect(value, new Text(ofVector));
 66 |             } catch (Exception e) {
 67 |             	e.printStackTrace();
 68 |             	LOG.log(Level.SEVERE, "Exception while calling PoT.getOpticalTimeSeries", e);
 69 |             }
 70 |         }
 71 | 
 72 |         private static String saveVectors(double[][] vectors) {
 73 |             int d = vectors[0].length;
 74 | 
 75 |             ArrayList<double[][][]> temp_hists = new ArrayList<double[][][]>();
 76 | 
 77 |             for (int i = 0; i < vectors.length; i++) {
 78 |               double[][][] temp_hist = new double[1][1][d];
 79 |               temp_hist[0][0] = vectors[i];
 80 | 
 81 |               temp_hists.add(temp_hist);
 82 |             }
 83 | 
 84 |             return getSaveHistogramsOutput(temp_hists);
 85 |         }
 86 | 
 87 |         private static String getSaveHistogramsOutput(ArrayList<double[][][]> hists) {
 88 |             int w_d = hists.get(0).length;
 89 |             int h_d = hists.get(0)[0].length;
 90 |             int o_d = hists.get(0)[0][0].length;
 91 | 
 92 |             int i, j, k, l;
 93 | 
 94 |             StringWriter writer = new StringWriter();
 95 |             String head = String.format("%d %d", hists.size(), w_d * h_d * o_d);
 96 |             writer.write(head);
 97 |             writer.write("\n");
 98 | 
 99 |             for (l = 0; l < (int) hists.size(); l++) {
100 |                 double[][][] hist = hists.get(l);
101 | 
102 |                 for (i = 0; i < hist.length; i++) {
103 |                     for (j = 0; j < hist[0].length; j++) {
104 |                         for (k = 0; k < hist[0][0].length; k++) { // optical_bins+1
105 |                             writer.write(String.format("%f ", hist[i][j][k]));
106 |                         }
107 |                     }
108 |                 }
109 | 
110 |                 writer.write("\n");
111 |             }
112 | 
113 |             return writer.toString();
114 |         }
115 |     }
116 | 
117 |     public static class MultiFileOutput extends MultipleTextOutputFormat<Text, Text> {
118 |         protected String generateFileNameForKeyValue(Text key, Text value, String name) {
119 |             String[] splitPath = key.toString().split("/");
120 |             String fileName = splitPath[splitPath.length - 1];
121 |             String fName =fileName + ".of.txt";
122 |             File file = new File(fName);
123 |             if(file.exists())
124 |             	file.delete();
125 |             return fName;
126 |         }
127 | 
128 |         protected Text generateActualKey(Text key, Text value) {
129 |             return null;
130 |         }
131 |     }
132 | 
133 |     public static void main(String[] args) throws Exception {
134 |         System.loadLibrary(Core.NATIVE_LIBRARY_NAME);
135 |         LOG.info("Loaded- " + Core.NATIVE_LIBRARY_NAME);
136 |         
137 |         Configuration baseConf = new Configuration();
138 |         baseConf.set("mapred.reduce.tasks", "0");
139 |         JobConf conf = new JobConf(baseConf, OpticalTimeSeries.class);
140 | 
141 |         conf.setJobName("optical_time_series");
142 | 
143 |         conf.setOutputKeyClass(Text.class);
144 |         conf.setOutputValueClass(Text.class);
145 | 
146 |         conf.setMapperClass(Map.class);
147 | 
148 |         conf.setInputFormat(TextInputFormat.class);
149 |         conf.setOutputFormat(MultiFileOutput.class);
150 | 
151 |         FileInputFormat.setInputPaths(conf, new Path(args[0]));
152 |         FileOutputFormat.setOutputPath(conf, new Path(args[1]));
153 | 
154 |         JobClient.runJob(conf);
155 |     }
156 | }
157 | 


--------------------------------------------------------------------------------
/hadoop-pot-core/src/main/java/org/pooledtimeseries/SimilarityCalculation.java:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * Licensed to the Apache Software Foundation (ASF) under one or more
  3 |  * contributor license agreements.  See the NOTICE file distributed with
  4 |  * this work for additional information regarding copyright ownership.
  5 |  * The ASF licenses this file to You under the Apache License, Version 2.0
  6 |  * (the "License"); you may not use this file except in compliance with
  7 |  * the License.  You may obtain a copy of the License at
  8 |  *
  9 |  *     http://www.apache.org/licenses/LICENSE-2.0
 10 |  *
 11 |  * Unless required by applicable law or agreed to in writing, software
 12 |  * distributed under the License is distributed on an "AS IS" BASIS,
 13 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 14 |  * See the License for the specific language governing permissions and
 15 |  * limitations under the License.
 16 |  */
 17 | 
 18 | package org.pooledtimeseries;
 19 | 
 20 | import java.io.File;
 21 | import java.io.IOException;
 22 | import java.io.InputStream;
 23 | import java.util.ArrayList;
 24 | import java.util.List;
 25 | import java.util.Scanner;
 26 | import java.util.logging.Logger;
 27 | 
 28 | import org.apache.commons.lang.ArrayUtils;
 29 | import org.apache.hadoop.fs.Path;
 30 | import org.apache.hadoop.io.BytesWritable;
 31 | import org.apache.hadoop.io.Text;
 32 | import org.apache.hadoop.mapred.FileOutputFormat;
 33 | import org.apache.hadoop.mapred.JobClient;
 34 | import org.apache.hadoop.mapred.JobConf;
 35 | import org.apache.hadoop.mapred.MapReduceBase;
 36 | import org.apache.hadoop.mapred.Mapper;
 37 | import org.apache.hadoop.mapred.OutputCollector;
 38 | import org.apache.hadoop.mapred.Reporter;
 39 | import org.apache.hadoop.mapred.SequenceFileInputFormat;
 40 | import org.apache.hadoop.mapred.TextOutputFormat;
 41 | import org.pooledtimeseries.cartesian.CartesianInputFormat;
 42 | import org.pooledtimeseries.util.HadoopFileUtil;
 43 | import org.pooledtimeseries.util.PoTSerialiser;
 44 | import org.pooledtimeseries.util.ReadSeqFileUtil;
 45 | 
 46 | public class SimilarityCalculation {
 47 | 
 48 | 	private static final Logger LOG = Logger.getLogger(SimilarityCalculation.class.getName());
 49 | 
 50 | 	static int videos = 0;
 51 | 
 52 | 	public static class Map extends MapReduceBase implements Mapper<Text, BytesWritable, Text, Text> {
 53 | 
 54 | 		double[] meanDists = null;
 55 | 
 56 | 		@Override
 57 | 		public void configure(JobConf conf) {
 58 | 			super.configure(conf);
 59 | 			String meanDistsPath = conf.get("meanDistsFilePath");
 60 | 			List<Double> meanDistsList = new ArrayList<Double>();
 61 | 			InputStream in = null;
 62 | 			try {		
 63 | 				in = HadoopFileUtil.getInputStreamFromHDFS(meanDistsPath);
 64 | 				Scanner scin = new Scanner(in) ;
 65 | 				while (scin.hasNextDouble()) {
 66 | 					meanDistsList.add(scin.nextDouble());
 67 | 				}
 68 | 				scin.close();
 69 | 			} catch (IOException e) {
 70 | 				e.printStackTrace();
 71 | 			} finally {
 72 | 				if(in !=null){
 73 | 					try {
 74 | 						in.close();
 75 | 					} catch (IOException e) {}
 76 | 				}
 77 | 			}
 78 | 
 79 | 			this.meanDists = ArrayUtils.toPrimitive(meanDistsList.toArray(new Double[0]));
 80 | 			LOG.info("Loaded meanDist of length - " + meanDists.length);
 81 | 		}
 82 | 
 83 | 		@Override
 84 | 		public void map(Text key, BytesWritable value, OutputCollector<Text, Text> output, Reporter reporter)
 85 | 				throws IOException {
 86 | 			videos++;
 87 | 			LOG.info("Processing pair - " + key);
 88 | 			long startTime = System.currentTimeMillis();
 89 | 			
 90 | 			String[] videoPaths = ReadSeqFileUtil.getFileNames(key);
 91 | 
 92 | 			List<FeatureVector> fvList = (List<FeatureVector>) PoTSerialiser.getObject(value.getBytes()) ;
 93 | 			LOG.info("Loaded Time Series for pair in - " + (System.currentTimeMillis() - startTime));
 94 | 			
 95 | 			double similarity = PoT.kernelDistance(fvList.get(0), fvList.get(1), meanDists);
 96 | 
 97 | 			File p1 = new File(videoPaths[0]);
 98 | 			File p2 = new File(videoPaths[1]);
 99 | 			output.collect(new Text(p1.getName() + ',' + p2.getName()), new Text(String.valueOf(similarity)));
100 | 			
101 | 			LOG.info("Completed processing pair - " + key);
102 |             LOG.info("Time taken to complete job - " + (System.currentTimeMillis() - startTime));
103 | 		}
104 | 	}
105 | 
106 | 	public static void main(String[] args) throws Exception {
107 | 
108 | 		JobConf conf = new JobConf();
109 | 		System.out.println("Before Map:" + conf.getNumMapTasks());
110 | 		conf.setNumMapTasks(196);
111 | 		System.out.println("After Map:" + conf.getNumMapTasks());
112 | 		conf.setJobName("similarity_calc");
113 | 
114 | 		conf.set("meanDistsFilePath", args[2]);
115 | 
116 | 		System.out.println("Job Name: " + conf.getJobName());
117 | 		conf.setJarByClass(SimilarityCalculation.class);
118 | 
119 | 		conf.setOutputKeyClass(Text.class);
120 | 		conf.setOutputValueClass(Text.class);
121 | 
122 | 		conf.setInputFormat(CartesianInputFormat.class);
123 | 		CartesianInputFormat.setLeftInputInfo(conf, SequenceFileInputFormat.class,
124 | 				args[0]);
125 | 		CartesianInputFormat.setRightInputInfo(conf, SequenceFileInputFormat.class,
126 | 				args[0]);
127 | 		
128 | 		conf.setOutputFormat(TextOutputFormat.class);
129 | 
130 | 		FileOutputFormat.setOutputPath(conf, new Path(args[1]));
131 | 
132 | 		conf.setMapperClass(Map.class);
133 | 
134 | 		JobClient.runJob(conf);
135 | 	}
136 | }
137 | 


--------------------------------------------------------------------------------
/hadoop-pot-core/src/main/java/org/pooledtimeseries/cartesian/CartesianInputFormat.java:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * Licensed to the Apache Software Foundation (ASF) under one or more
  3 |  * contributor license agreements.  See the NOTICE file distributed with
  4 |  * this work for additional information regarding copyright ownership.
  5 |  * The ASF licenses this file to You under the Apache License, Version 2.0
  6 |  * (the "License"); you may not use this file except in compliance with
  7 |  * the License.  You may obtain a copy of the License at
  8 |  *
  9 |  *     http://www.apache.org/licenses/LICENSE-2.0
 10 |  *
 11 |  * Unless required by applicable law or agreed to in writing, software
 12 |  * distributed under the License is distributed on an "AS IS" BASIS,
 13 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 14 |  * See the License for the specific language governing permissions and
 15 |  * limitations under the License.
 16 |  */
 17 | 
 18 | package org.pooledtimeseries.cartesian;
 19 | 
 20 | import java.io.IOException;
 21 | 
 22 | import org.apache.commons.logging.Log;
 23 | import org.apache.commons.logging.LogFactory;
 24 | import org.apache.hadoop.mapred.FileInputFormat;
 25 | import org.apache.hadoop.mapred.InputSplit;
 26 | import org.apache.hadoop.mapred.JobConf;
 27 | import org.apache.hadoop.mapred.RecordReader;
 28 | import org.apache.hadoop.mapred.Reporter;
 29 | import org.apache.hadoop.mapred.join.CompositeInputSplit;
 30 | import org.apache.hadoop.util.ReflectionUtils;
 31 | 
 32 | public class CartesianInputFormat extends FileInputFormat {
 33 | 
 34 | 	public static final Log LOG = LogFactory.getLog(CartesianInputFormat.class);
 35 | 
 36 | 	public static final String LEFT_INPUT_FORMAT = "cart.left.inputformat";
 37 | 	public static final String LEFT_INPUT_PATH = "cart.left.path";
 38 | 	public static final String RIGHT_INPUT_FORMAT = "cart.right.inputformat";
 39 | 	public static final String RIGHT_INPUT_PATH = "cart.right.path";
 40 | 
 41 | 	public static void setLeftInputInfo(JobConf conf, Class<? extends FileInputFormat> inputFormat, String inputPath) {
 42 | 		conf.set(LEFT_INPUT_FORMAT, inputFormat.getCanonicalName());
 43 | 		conf.set(LEFT_INPUT_PATH, inputPath);
 44 | 	}
 45 | 
 46 | 	public static void setRightInputInfo(JobConf job, Class<? extends FileInputFormat> inputFormat, String inputPath) {
 47 | 		job.set(RIGHT_INPUT_FORMAT, inputFormat.getCanonicalName());
 48 | 		job.set(RIGHT_INPUT_PATH, inputPath);
 49 | 	}
 50 | 
 51 | 	@Override
 52 | 	public InputSplit[] getSplits(JobConf conf, int numSplits) throws IOException {
 53 | 
 54 | 		try {
 55 | 			// Get the input splits from both the left and right data sets
 56 | 			InputSplit[] leftSplits = getInputSplits(conf, conf.get(LEFT_INPUT_FORMAT), conf.get(LEFT_INPUT_PATH),
 57 | 					numSplits);
 58 | 			InputSplit[] rightSplits = getInputSplits(conf, conf.get(RIGHT_INPUT_FORMAT), conf.get(RIGHT_INPUT_PATH),
 59 | 					numSplits);
 60 | 
 61 | 			// Create our CartesianInputSplits, size equal to left.length *
 62 | 			// right.length
 63 | 			CompositeInputSplit[] returnSplits = new CompositeInputSplit[((leftSplits.length * (rightSplits.length - 1))
 64 | 					/ 2) + leftSplits.length];
 65 | 
 66 | 			int i = 0;
 67 | 			// For each of the left input splits
 68 | 			for (int leftLoop = 0; leftLoop < leftSplits.length; leftLoop++) {
 69 | 				InputSplit left = leftSplits[leftLoop];
 70 | 				// For each of the right input splits
 71 | 
 72 | 				for (int rightLoop = leftLoop; rightLoop < rightSplits.length; rightLoop++) {
 73 | 					InputSplit right = rightSplits[rightLoop];
 74 | 					// Create a new composite input split composing of the two
 75 | 
 76 | 					returnSplits[i] = new CompositeInputSplit(2);
 77 | 					returnSplits[i].add(left);
 78 | 					returnSplits[i].add(right);
 79 | 					++i;
 80 | 				}
 81 | 			}
 82 | 
 83 | 			// Return the composite splits
 84 | 			LOG.info("Total splits to process: " + returnSplits.length);
 85 | 			return returnSplits;
 86 | 		} catch (ClassNotFoundException e) {
 87 | 			e.printStackTrace();
 88 | 			throw new IOException(e);
 89 | 		}
 90 | 	}
 91 | 
 92 | 	@Override
 93 | 	public RecordReader getRecordReader(InputSplit split, JobConf conf, Reporter reporter) throws IOException {
 94 | 		// create a new instance of the Cartesian record reader
 95 | 		return new CartesianRecordReader((CompositeInputSplit) split, conf, reporter);
 96 | 	}
 97 | 
 98 | 	private InputSplit[] getInputSplits(JobConf conf, String inputFormatClass, String inputPath, int numSplits)
 99 | 			throws ClassNotFoundException, IOException {
100 | 		// Create a new instance of the input format
101 | 		FileInputFormat inputFormat = (FileInputFormat) ReflectionUtils.newInstance(Class.forName(inputFormatClass),
102 | 				conf);
103 | 
104 | 		// Set the input path for the left data set
105 | 		inputFormat.setInputPaths(conf, inputPath);
106 | 
107 | 		// Get the left input splits
108 | 		return inputFormat.getSplits(conf, numSplits);
109 | 	}
110 | }
111 | 


--------------------------------------------------------------------------------
/hadoop-pot-core/src/main/java/org/pooledtimeseries/cartesian/CartesianRecordReader.java:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * Licensed to the Apache Software Foundation (ASF) under one or more
  3 |  * contributor license agreements.  See the NOTICE file distributed with
  4 |  * this work for additional information regarding copyright ownership.
  5 |  * The ASF licenses this file to You under the Apache License, Version 2.0
  6 |  * (the "License"); you may not use this file except in compliance with
  7 |  * the License.  You may obtain a copy of the License at
  8 |  *
  9 |  *     http://www.apache.org/licenses/LICENSE-2.0
 10 |  *
 11 |  * Unless required by applicable law or agreed to in writing, software
 12 |  * distributed under the License is distributed on an "AS IS" BASIS,
 13 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 14 |  * See the License for the specific language governing permissions and
 15 |  * limitations under the License.
 16 |  */
 17 | 
 18 | package org.pooledtimeseries.cartesian;
 19 | 
 20 | import java.io.IOException;
 21 | import java.util.List;
 22 | 
 23 | import org.apache.hadoop.io.BytesWritable;
 24 | import org.apache.hadoop.io.Text;
 25 | import org.apache.hadoop.mapred.FileInputFormat;
 26 | import org.apache.hadoop.mapred.InputSplit;
 27 | import org.apache.hadoop.mapred.JobConf;
 28 | import org.apache.hadoop.mapred.RecordReader;
 29 | import org.apache.hadoop.mapred.Reporter;
 30 | import org.apache.hadoop.mapred.join.CompositeInputSplit;
 31 | import org.apache.hadoop.util.ReflectionUtils;
 32 | import org.pooledtimeseries.FeatureVector;
 33 | import org.pooledtimeseries.util.PoTSerialiser;
 34 | 
 35 | public class CartesianRecordReader<K1 extends Text, V1 extends BytesWritable, K2 extends Text, V2 extends BytesWritable> 
 36 | implements RecordReader<Text, BytesWritable> {
 37 | 
 38 | 	// Record readers to get key value pairs
 39 | 	private RecordReader<Text,BytesWritable>  leftRR = null, rightRR = null;
 40 | 
 41 | 	// Store configuration to re-create the right record reader
 42 | 	private FileInputFormat<Text, BytesWritable> rightFIF;
 43 | 	private JobConf rightConf;
 44 | 	private InputSplit rightIS;
 45 | 	private Reporter rightReporter;
 46 | 	// if left and right are same splits this flag is set
 47 | 	// It's used to avoid repeated pairs
 48 | 	// for l=1,2 r =1,2 pair=11,12,22
 49 | 	private boolean pairWithItself;
 50 | 
 51 | 	// Helper variables
 52 | 	private K1 lkey;
 53 | 	private V1 lvalue;
 54 | 	private K2 rkey;
 55 | 	private V2 rvalue;
 56 | 	private boolean goToNextLeft = true, alldone = false;
 57 | 	private int rightShiftCount = 1;
 58 | 
 59 | 	/**
 60 | 	 * Creates a new instance of the CartesianRecordReader
 61 | 	 * 
 62 | 	 * @param split
 63 | 	 * @param conf
 64 | 	 * @param reporter
 65 | 	 * @throws IOException
 66 | 	 */
 67 | 	public CartesianRecordReader(CompositeInputSplit split, JobConf conf, Reporter reporter) throws IOException {
 68 | 		this.rightConf = conf;
 69 | 		this.rightIS = split.get(1);
 70 | 		this.rightReporter = reporter;
 71 | 
 72 | 		try {
 73 | 			// Create left record reader
 74 | 			FileInputFormat<Text, BytesWritable> leftFIF = (FileInputFormat) ReflectionUtils
 75 | 					.newInstance(Class.forName(conf.get(CartesianInputFormat.LEFT_INPUT_FORMAT)), conf);
 76 | 
 77 | 			leftRR = leftFIF.getRecordReader(split.get(0), conf, reporter);
 78 | 
 79 | 			// Create right record reader
 80 | 			rightFIF = (FileInputFormat) ReflectionUtils
 81 | 					.newInstance(Class.forName(conf.get(CartesianInputFormat.RIGHT_INPUT_FORMAT)), conf);
 82 | 
 83 | 			rightRR = rightFIF.getRecordReader(rightIS, rightConf, rightReporter);
 84 | 		} catch (ClassNotFoundException e) {
 85 | 
 86 | 			e.printStackTrace();
 87 | 			throw new IOException(e);
 88 | 		}
 89 | 
 90 | 		// Create key value pairs for parsing
 91 | 		lkey = (K1) this.leftRR.createKey();
 92 | 		lvalue = (V1) this.leftRR.createValue();
 93 | 
 94 | 		rkey = (K2) this.rightRR.createKey();
 95 | 		rvalue = (V2) this.rightRR.createValue();
 96 | 	}
 97 | 
 98 | 	@Override
 99 | 	public Text createKey() {
100 | 		return new Text();
101 | 	}
102 | 	
103 | 	@Override
104 | 	public BytesWritable createValue() {
105 | 		return new BytesWritable();
106 | 	}
107 | 	
108 | 	@Override
109 | 	public long getPos() throws IOException {
110 | 		return leftRR.getPos();
111 | 	}
112 | 
113 | 	@Override
114 | 	public boolean next(Text key, BytesWritable value) throws IOException {
115 | 		
116 | 		do {
117 | 			// If we are to go to the next left key/value pair
118 | 			if (goToNextLeft) {
119 | 				// Read the next key value pair, false means no more pairs
120 | 				if (!leftRR.next(lkey, lvalue)) {
121 | 					// If no more, then this task is nearly finished
122 | 					alldone = true;
123 | 					break;
124 | 				} else {
125 | 					// If we aren't done, set the value to the key and set
126 | 					// our flags
127 | 					goToNextLeft = alldone = false;
128 | 
129 | 					// Reset the right record reader
130 | 					this.rightRR = this.rightFIF.getRecordReader(this.rightIS, this.rightConf, this.rightReporter);
131 | 				}
132 | 
133 | 				if (this.pairWithItself) {
134 | 					// shifting right data set to avoid repeated pairs
135 | 					// we consider a,b == b,a
136 | 					for (int i = 0; i < rightShiftCount; i++) {
137 | 						rightRR.next(rkey, rvalue);
138 | 					}
139 | 					rightShiftCount++;
140 | 				}
141 | 			}
142 | 
143 | 			// Read the next key value pair from the right data set
144 | 			if (rightRR.next(rkey, rvalue)) {
145 | 				// If success, set key and value for left and right splits
146 | 				key.set(lkey.toString() + "~" + rkey.toString());
147 | 				// Merge FeatureVector of both videos
148 | 				// Order is important and should be same as order of key
149 | 				List<FeatureVector> featureList = (List<FeatureVector>)PoTSerialiser.getObject(lvalue.getBytes());
150 | 				featureList.addAll((List<FeatureVector>) PoTSerialiser.getObject(rvalue.getBytes()));
151 | 				byte[] featureListBytes = PoTSerialiser.getBytes(featureList);
152 | 				value.set(featureListBytes, 0, featureListBytes.length);
153 | 				
154 | 				// This assumes that key will always be unique among all splits
155 | 				if (lkey.toString().equals(rkey.toString())) {
156 | 					this.pairWithItself = true;
157 | 				}
158 | 			} else {
159 | 				// Otherwise, this right data set is complete
160 | 				// and we should go to the next left pair
161 | 				goToNextLeft = true;
162 | 			}
163 | 
164 | 			// This loop will continue if we finished reading key/value
165 | 			// pairs from the right data set
166 | 		} while (goToNextLeft);
167 | 
168 | 		if (alldone) {
169 | 			// reset shift counter
170 | 			rightShiftCount = 1;
171 | 			this.pairWithItself = false;
172 | 		}
173 | 		// Return true if a key/value pair was read, false otherwise
174 | 		return !alldone;
175 | 	}
176 | 
177 | 	public void close() throws IOException {
178 | 		leftRR.close();
179 | 		rightRR.close();
180 | 	}
181 | 
182 | 	public float getProgress() throws IOException {
183 | 		return leftRR.getProgress();
184 | 	}
185 | }
186 | 


--------------------------------------------------------------------------------
/hadoop-pot-core/src/main/java/org/pooledtimeseries/healthcheck/CheckCartesianProductSeqFile.java:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * Licensed to the Apache Software Foundation (ASF) under one or more
  3 |  * contributor license agreements.  See the NOTICE file distributed with
  4 |  * this work for additional information regarding copyright ownership.
  5 |  * The ASF licenses this file to You under the Apache License, Version 2.0
  6 |  * (the "License"); you may not use this file except in compliance with
  7 |  * the License.  You may obtain a copy of the License at
  8 |  *
  9 |  *     http://www.apache.org/licenses/LICENSE-2.0
 10 |  *
 11 |  * Unless required by applicable law or agreed to in writing, software
 12 |  * distributed under the License is distributed on an "AS IS" BASIS,
 13 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 14 |  * See the License for the specific language governing permissions and
 15 |  * limitations under the License.
 16 |  */
 17 | 
 18 | package org.pooledtimeseries.healthcheck;
 19 | 
 20 | import java.io.IOException;
 21 | import java.util.Iterator;
 22 | import java.util.List;
 23 | 
 24 | import org.apache.hadoop.fs.Path;
 25 | import org.apache.hadoop.io.BytesWritable;
 26 | import org.apache.hadoop.io.IntWritable;
 27 | import org.apache.hadoop.io.Text;
 28 | import org.apache.hadoop.mapred.JobClient;
 29 | import org.apache.hadoop.mapred.JobConf;
 30 | import org.apache.hadoop.mapred.MapReduceBase;
 31 | import org.apache.hadoop.mapred.Mapper;
 32 | import org.apache.hadoop.mapred.OutputCollector;
 33 | import org.apache.hadoop.mapred.Reducer;
 34 | import org.apache.hadoop.mapred.Reporter;
 35 | import org.apache.hadoop.mapred.RunningJob;
 36 | import org.apache.hadoop.mapred.SequenceFileInputFormat;
 37 | import org.apache.hadoop.mapred.TextOutputFormat;
 38 | import org.apache.hadoop.util.GenericOptionsParser;
 39 | import org.pooledtimeseries.FeatureVector;
 40 | import org.pooledtimeseries.cartesian.CartesianInputFormat;
 41 | import org.pooledtimeseries.seqfile.TextVectorsToSequenceFile;
 42 | import org.pooledtimeseries.util.PoTSerialiser;
 43 | import org.pooledtimeseries.util.ReadSeqFileUtil;
 44 | 
 45 | /**
 46 |  * Program for verifying Sequence File generated by {@link TextVectorsToSequenceFile}<br/>
 47 |  * If SeqFile is correct logs for this job will have printed correct keys and Size<br/>
 48 |  * Output of this job will have 2 records- <br/>
 49 |  * - Number of pairs with similar key
 50 |  * - Number of pairs with different keys
 51 |  */
 52 | public class CheckCartesianProductSeqFile {
 53 | 
 54 | 	public static class CartesianMapper extends MapReduceBase implements Mapper<Text, BytesWritable, Text, IntWritable> {
 55 | 
 56 | 		private Text simkey = new Text("simkey");
 57 | 		private Text diskey = new Text("diskey");
 58 | 		private static final IntWritable one = new IntWritable(1);
 59 | 
 60 | 		public void map(Text key, BytesWritable value, OutputCollector<Text, IntWritable> output, Reporter reporter)
 61 | 				throws IOException {
 62 | 			// System.out.println(value);
 63 | 			System.out.println(key);
 64 | 			System.out.println("");
 65 | 			
 66 | 			System.out.println("Size- "+ ((List<FeatureVector>) PoTSerialiser.getObject(value.getBytes()) ).size() );
 67 | 			
 68 | 			System.out.println();
 69 | 			// If the two values are equal add one to output
 70 | 			String[] files = ReadSeqFileUtil.getFileNames(key);
 71 | 			if (files[0].equals(files[1])){
 72 | 				output.collect(simkey, one);
 73 | 			}else{
 74 | 				output.collect(diskey, one);
 75 | 			}
 76 | 			
 77 | 			
 78 | 		}
 79 | 	}
 80 | 
 81 | 	public static class CartesianReducer extends MapReduceBase implements Reducer<Text, IntWritable, Text, Text> {
 82 | 		private Text outputVal = new Text();
 83 | 
 84 | 		public void reduce(Text key, Iterator<IntWritable> values, OutputCollector<Text, Text> output,
 85 | 				Reporter reporter) throws IOException {
 86 | 			int sum = 0;
 87 | 			while (values.hasNext()) {
 88 | 				sum += values.next().get();
 89 | 			}
 90 | 			outputVal.set("" + sum);
 91 | 			output.collect(key, outputVal);
 92 | 		}
 93 | 
 94 | 	}
 95 | 
 96 | 	public static void main(String[] args) throws IOException, InterruptedException, ClassNotFoundException {
 97 | 
 98 | 		long start = System.currentTimeMillis();
 99 | 		JobConf conf = new JobConf("Cartesian Product");
100 | 		String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
101 | 		if (otherArgs.length != 2) {
102 | 			System.err.println("Usage: CheckCartesianProductSeqFile <input sequence file> <out>");
103 | 			System.exit(1);
104 | 		}
105 | 
106 | 		// Configure the join type
107 | 		conf.setJarByClass(CheckCartesianProductSeqFile.class);
108 | 
109 | 		conf.setMapperClass(CartesianMapper.class);
110 | 		conf.setReducerClass(CartesianReducer.class);
111 | 
112 | 		conf.setInputFormat(CartesianInputFormat.class);
113 | 		CartesianInputFormat.setLeftInputInfo(conf, SequenceFileInputFormat.class, otherArgs[0]);
114 | 		CartesianInputFormat.setRightInputInfo(conf, SequenceFileInputFormat.class, otherArgs[0]);
115 | 
116 | 		TextOutputFormat.setOutputPath(conf, new Path(otherArgs[1]));
117 | 
118 | 		conf.setOutputKeyClass(Text.class);
119 | 		conf.setOutputValueClass(IntWritable.class);
120 | 
121 | 		RunningJob job = JobClient.runJob(conf);
122 | 		while (!job.isComplete()) {
123 | 			Thread.sleep(1000);
124 | 		}
125 | 
126 | 		long finish = System.currentTimeMillis();
127 | 
128 | 		System.out.println("Time in ms: " + (finish - start));
129 | 
130 | 		System.exit(job.isSuccessful() ? 0 : 2);
131 | 	}
132 | 
133 | }
134 | 


--------------------------------------------------------------------------------
/hadoop-pot-core/src/main/java/org/pooledtimeseries/seqfile/FullFileInputFormat.java:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Licensed to the Apache Software Foundation (ASF) under one or more
 3 |  * contributor license agreements.  See the NOTICE file distributed with
 4 |  * this work for additional information regarding copyright ownership.
 5 |  * The ASF licenses this file to You under the Apache License, Version 2.0
 6 |  * (the "License"); you may not use this file except in compliance with
 7 |  * the License.  You may obtain a copy of the License at
 8 |  *
 9 |  *     http://www.apache.org/licenses/LICENSE-2.0
10 |  *
11 |  * Unless required by applicable law or agreed to in writing, software
12 |  * distributed under the License is distributed on an "AS IS" BASIS,
13 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 |  * See the License for the specific language governing permissions and
15 |  * limitations under the License.
16 |  */
17 | 
18 | package org.pooledtimeseries.seqfile;
19 | 
20 | import java.io.IOException;
21 | 
22 | import org.apache.hadoop.fs.Path;
23 | import org.apache.hadoop.io.BytesWritable;
24 | import org.apache.hadoop.io.Text;
25 | import org.apache.hadoop.mapreduce.InputSplit;
26 | import org.apache.hadoop.mapreduce.JobContext;
27 | import org.apache.hadoop.mapreduce.RecordReader;
28 | import org.apache.hadoop.mapreduce.TaskAttemptContext;
29 | import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
30 | 
31 | public class FullFileInputFormat extends
32 | 		FileInputFormat<Text, BytesWritable> {
33 | 	@Override
34 | 	protected boolean isSplitable(JobContext context, Path file) {
35 | 		return false; 
36 | 	}
37 | 
38 | 	@Override
39 | 	public RecordReader<Text, BytesWritable> createRecordReader(
40 | 			InputSplit split, TaskAttemptContext context) throws IOException, InterruptedException {
41 | 		FullFileRecordReader reader = new FullFileRecordReader();
42 | 		reader.initialize(split, context);
43 | 		return reader;
44 | 	}
45 | }


--------------------------------------------------------------------------------
/hadoop-pot-core/src/main/java/org/pooledtimeseries/seqfile/FullFileRecordReader.java:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Licensed to the Apache Software Foundation (ASF) under one or more
 3 |  * contributor license agreements.  See the NOTICE file distributed with
 4 |  * this work for additional information regarding copyright ownership.
 5 |  * The ASF licenses this file to You under the Apache License, Version 2.0
 6 |  * (the "License"); you may not use this file except in compliance with
 7 |  * the License.  You may obtain a copy of the License at
 8 |  *
 9 |  *     http://www.apache.org/licenses/LICENSE-2.0
10 |  *
11 |  * Unless required by applicable law or agreed to in writing, software
12 |  * distributed under the License is distributed on an "AS IS" BASIS,
13 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 |  * See the License for the specific language governing permissions and
15 |  * limitations under the License.
16 |  */
17 | 
18 | package org.pooledtimeseries.seqfile;
19 | 
20 | import java.io.IOException;
21 | 
22 | import org.apache.hadoop.conf.Configuration;
23 | import org.apache.hadoop.io.BytesWritable;
24 | import org.apache.hadoop.io.Text;
25 | import org.apache.hadoop.mapreduce.InputSplit;
26 | import org.apache.hadoop.mapreduce.RecordReader;
27 | import org.apache.hadoop.mapreduce.TaskAttemptContext;
28 | import org.apache.hadoop.mapreduce.lib.input.FileSplit;
29 | import org.pooledtimeseries.util.PoTConstants;
30 | import org.pooledtimeseries.util.PoTSerialiser;
31 | import org.pooledtimeseries.util.ReadSeqFileUtil;
32 | 
33 | public class FullFileRecordReader extends RecordReader<Text, BytesWritable> {
34 | 	public static final byte[] VECTOR_SEPERATOR = PoTConstants.VECTOR_SEPERATOR.getBytes();
35 | 	
36 | 	private FileSplit fileSplit;
37 | 	private Configuration conf;
38 | 	private BytesWritable value = new BytesWritable();
39 | 	private Text key = new Text();
40 | 
41 | 	private boolean processed = false;
42 | 
43 | 	@Override
44 | 	public void initialize(InputSplit split, TaskAttemptContext context) throws IOException, InterruptedException {
45 | 		this.fileSplit = (FileSplit) split;
46 | 		this.conf = context.getConfiguration();
47 | 	}
48 | 
49 | 	@Override
50 | 	public boolean nextKeyValue() throws IOException, InterruptedException {
51 | 		if (!processed) {
52 | 
53 | 			String files[] = new String[2];
54 | 			files[0] = fileSplit.getPath().toString() + ".of.txt";
55 | 			files[1] = fileSplit.getPath().toString() + ".hog.txt";
56 | 
57 | 			byte[] listFeatures = PoTSerialiser.getBytes(ReadSeqFileUtil.computeFeatureFromSeries(files) );
58 | 			
59 | 			value.set(listFeatures, 0, listFeatures.length );
60 | 			key.set(fileSplit.getPath().toString());
61 | 			processed = true;
62 | 			return true;
63 | 		}
64 | 		return false;
65 | 	}
66 | 
67 | 
68 | 	@Override
69 | 	public Text getCurrentKey() throws IOException, InterruptedException {
70 | 		return key;
71 | 	}
72 | 
73 | 	@Override
74 | 	public BytesWritable getCurrentValue() throws IOException, InterruptedException {
75 | 		return value;
76 | 	}
77 | 
78 | 	@Override
79 | 	public float getProgress() throws IOException {
80 | 		return processed ? 1.0f : 0.0f;
81 | 	}
82 | 
83 | 	@Override
84 | 	public void close() throws IOException {
85 | 		// do nothing
86 | 	}
87 | }


--------------------------------------------------------------------------------
/hadoop-pot-core/src/main/java/org/pooledtimeseries/seqfile/PoTVideoPathFilter.java:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Licensed to the Apache Software Foundation (ASF) under one or more
 3 |  * contributor license agreements.  See the NOTICE file distributed with
 4 |  * this work for additional information regarding copyright ownership.
 5 |  * The ASF licenses this file to You under the Apache License, Version 2.0
 6 |  * (the "License"); you may not use this file except in compliance with
 7 |  * the License.  You may obtain a copy of the License at
 8 |  *
 9 |  *     http://www.apache.org/licenses/LICENSE-2.0
10 |  *
11 |  * Unless required by applicable law or agreed to in writing, software
12 |  * distributed under the License is distributed on an "AS IS" BASIS,
13 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 |  * See the License for the specific language governing permissions and
15 |  * limitations under the License.
16 |  */
17 | 
18 | package org.pooledtimeseries.seqfile;
19 | 
20 | import java.io.IOException;
21 | 
22 | import org.apache.hadoop.conf.Configuration;
23 | import org.apache.hadoop.conf.Configured;
24 | import org.apache.hadoop.fs.FileSystem;
25 | import org.apache.hadoop.fs.Path;
26 | import org.apache.hadoop.fs.PathFilter;
27 | 
28 | public class PoTVideoPathFilter extends Configured implements PathFilter{
29 | 	Configuration conf;
30 |     FileSystem fs;
31 |     
32 |     @Override
33 | 	public boolean accept(Path path) {
34 | 		try {
35 | 			if (fs.isDirectory(path)) {
36 | 				return true;
37 | 			} else {
38 | 				//only accept files with mp4
39 | 				if (path.getName().endsWith(".mp4")) {
40 | 					return true;
41 | 				}
42 | 			}
43 | 		} catch (IOException e) {
44 | 			e.printStackTrace();
45 | 		}
46 | 		return false;
47 | 	}
48 | 	
49 |     @Override
50 |     public void setConf(Configuration conf) {
51 |         this.conf = conf;
52 |         if (conf != null) {
53 |             try {
54 |                 fs = FileSystem.get(conf);
55 |             } catch (IOException e) {
56 |                 e.printStackTrace();
57 |             }
58 |         }
59 |     }
60 | }
61 | 


--------------------------------------------------------------------------------
/hadoop-pot-core/src/main/java/org/pooledtimeseries/seqfile/TextVectorsToSequenceFile.java:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Licensed to the Apache Software Foundation (ASF) under one or more
 3 |  * contributor license agreements.  See the NOTICE file distributed with
 4 |  * this work for additional information regarding copyright ownership.
 5 |  * The ASF licenses this file to You under the Apache License, Version 2.0
 6 |  * (the "License"); you may not use this file except in compliance with
 7 |  * the License.  You may obtain a copy of the License at
 8 |  *
 9 |  *     http://www.apache.org/licenses/LICENSE-2.0
10 |  *
11 |  * Unless required by applicable law or agreed to in writing, software
12 |  * distributed under the License is distributed on an "AS IS" BASIS,
13 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 |  * See the License for the specific language governing permissions and
15 |  * limitations under the License.
16 |  */
17 | 
18 | package org.pooledtimeseries.seqfile;
19 | 
20 | import java.io.IOException;
21 | import java.util.logging.Logger;
22 | 
23 | import org.apache.hadoop.conf.Configuration;
24 | import org.apache.hadoop.conf.Configured;
25 | import org.apache.hadoop.fs.Path;
26 | import org.apache.hadoop.io.BytesWritable;
27 | import org.apache.hadoop.io.Text;
28 | import org.apache.hadoop.mapreduce.InputSplit;
29 | import org.apache.hadoop.mapreduce.Job;
30 | import org.apache.hadoop.mapreduce.Mapper;
31 | import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
32 | import org.apache.hadoop.mapreduce.lib.input.FileSplit;
33 | import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
34 | import org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat;
35 | 
36 | public class TextVectorsToSequenceFile extends Configured  {
37 | 	static class SequenceFileMapper extends
38 | 			Mapper<Text, BytesWritable, Text, BytesWritable> {
39 | 		private static final Logger LOG = Logger.getLogger(TextVectorsToSequenceFile.class.getName());
40 | 
41 | 		private Text filename;
42 | 
43 | 		@Override
44 | 		protected void setup(Context context) throws IOException,
45 | 				InterruptedException {
46 | 			InputSplit split = context.getInputSplit();
47 | 			Path path = ((FileSplit) split).getPath();
48 | 			filename = new Text(path.toString());
49 | 		}
50 | 
51 | 		@Override
52 | 		protected void map(Text key, BytesWritable value,
53 | 				Context context) throws IOException, InterruptedException {
54 | 			LOG.info("Processing filename- " + filename);
55 | 			context.write(filename, value);
56 | 		}
57 | 	}
58 | 
59 | 	
60 | 	public static void main(String[] args) throws Exception {
61 | 		Configuration conf = new Configuration();
62 | 		Job job = Job.getInstance(conf);
63 | 		job.setJarByClass(TextVectorsToSequenceFile.class);
64 | 		job.setJobName("smallfilestoseqfile");
65 | 		job.setInputFormatClass(FullFileInputFormat.class);
66 | 		job.setOutputFormatClass(SequenceFileOutputFormat.class);
67 | 		
68 | 		job.setNumReduceTasks(1);
69 | 		FullFileInputFormat.setInputPaths(job, new Path(args[0]));
70 | 		FileInputFormat.setInputPathFilter(job, PoTVideoPathFilter.class);
71 | 		
72 | 	    FileOutputFormat.setOutputPath(job, new Path(args[1]));
73 | 	    
74 | 		job.setOutputKeyClass(Text.class);
75 | 		job.setOutputValueClass(BytesWritable.class);
76 | 		job.setMapperClass(SequenceFileMapper.class);
77 | 		job.waitForCompletion(true);
78 | 		
79 | 	}
80 | 
81 | 	
82 | }


--------------------------------------------------------------------------------
/hadoop-pot-core/src/main/java/org/pooledtimeseries/util/ClassScope.java:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Licensed to the Apache Software Foundation (ASF) under one or more
 3 |  * contributor license agreements.  See the NOTICE file distributed with
 4 |  * this work for additional information regarding copyright ownership.
 5 |  * The ASF licenses this file to You under the Apache License, Version 2.0
 6 |  * (the "License"); you may not use this file except in compliance with
 7 |  * the License.  You may obtain a copy of the License at
 8 |  *
 9 |  *     http://www.apache.org/licenses/LICENSE-2.0
10 |  *
11 |  * Unless required by applicable law or agreed to in writing, software
12 |  * distributed under the License is distributed on an "AS IS" BASIS,
13 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 |  * See the License for the specific language governing permissions and
15 |  * limitations under the License.
16 |  */
17 | 
18 | package org.pooledtimeseries.util;
19 | 
20 | import java.util.Vector;
21 | import java.util.logging.Logger;
22 | 
23 | public class ClassScope {
24 | 	private static java.lang.reflect.Field LIBRARIES;
25 | 	private static final Logger LOG = Logger.getLogger(ClassScope.class.getName());
26 | 
27 | 	static {
28 | 		try {
29 | 			LIBRARIES = ClassLoader.class.getDeclaredField("loadedLibraryNames");
30 | 		} catch (Exception e) {
31 | 			LIBRARIES = null;
32 | 			e.printStackTrace();
33 | 		}
34 | 		LIBRARIES.setAccessible(true);
35 | 	}
36 | 
37 | 	private static Vector<String> getLoadedLibraries(final ClassLoader loader) throws Exception {
38 | 		final Vector<String> libraries = (Vector<String>) LIBRARIES.get(loader);
39 | 		return libraries;
40 | 	}
41 | 
42 | 	public static boolean isLibraryLoaded(String library) {
43 | 		try {
44 | 			final Vector<String> libraries = ClassScope.getLoadedLibraries(ClassLoader.getSystemClassLoader());
45 | 			LOG.info("Libraries found - " + libraries);
46 | 			return libraries.contains(library);
47 | 		} catch (Exception e) {
48 | 			e.printStackTrace();
49 | 			return false;
50 | 		}
51 | 
52 | 	}
53 | 
54 | }


--------------------------------------------------------------------------------
/hadoop-pot-core/src/main/java/org/pooledtimeseries/util/HadoopFileUtil.java:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Licensed to the Apache Software Foundation (ASF) under one or more
 3 |  * contributor license agreements.  See the NOTICE file distributed with
 4 |  * this work for additional information regarding copyright ownership.
 5 |  * The ASF licenses this file to You under the Apache License, Version 2.0
 6 |  * (the "License"); you may not use this file except in compliance with
 7 |  * the License.  You may obtain a copy of the License at
 8 |  *
 9 |  *     http://www.apache.org/licenses/LICENSE-2.0
10 |  *
11 |  * Unless required by applicable law or agreed to in writing, software
12 |  * distributed under the License is distributed on an "AS IS" BASIS,
13 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 |  * See the License for the specific language governing permissions and
15 |  * limitations under the License.
16 |  */
17 | 
18 | package org.pooledtimeseries.util;
19 | 
20 | import java.io.File;
21 | import java.io.FileOutputStream;
22 | import java.io.IOException;
23 | import java.io.InputStream;
24 | import java.io.OutputStream;
25 | import java.net.URI;
26 | import java.util.Arrays;
27 | import java.util.logging.Level;
28 | import java.util.logging.Logger;
29 | 
30 | import org.apache.hadoop.conf.Configuration;
31 | import org.apache.hadoop.fs.FSDataInputStream;
32 | import org.apache.hadoop.fs.FileSystem;
33 | import org.apache.hadoop.fs.Path;
34 | import org.apache.hadoop.io.IOUtils;
35 | 
36 | import com.google.common.io.Files;
37 | 
38 | public class HadoopFileUtil {
39 | 	private static final Logger LOG = Logger.getLogger(HadoopFileUtil.class.getName());
40 | 	
41 | 	/**
42 | 	 * Copies file to a temporary directory and return File object to temporary file
43 | 	 */
44 | 	public File copyToTempDir(String value) throws IOException {
45 | 		Path videoPath = new Path(value.toString());
46 | 		videoPath.getFileSystem(new Configuration());
47 | 		
48 | 		LOG.info("Reading file from - " + videoPath);
49 | 
50 | 		File tempDir = Files.createTempDir();
51 | 
52 | 		// Get the filesystem - HDFS
53 | 		FileSystem fs = FileSystem.get(URI.create(value.toString()), new Configuration());
54 | 
55 | 		// Open the path mentioned in HDFS
56 | 		FSDataInputStream in = null;
57 | 		OutputStream out = null;
58 | 		LOG.info("Copying file to a TempDir - " + tempDir.getPath());
59 | 		try {
60 | 			in = fs.open(videoPath);
61 | 			LOG.info("Available byte - " + in.available());
62 | 			out = new FileOutputStream(tempDir.getAbsolutePath() + "/" + videoPath.getName());
63 | 			IOUtils.copyBytes(in, out, new Configuration());
64 | 			
65 | 		} catch (Exception e) {
66 | 			LOG.log(Level.SEVERE, "Error while copying to TempDir", e);
67 | 			return null;
68 | 		} finally {
69 | 			try {
70 | 				in.close();
71 | 				out.close();
72 | 			} catch (Exception e) {}
73 | 		}
74 | 		LOG.info("Available files - " + Arrays.asList(tempDir.listFiles()) );
75 | 		
76 | 		return new File(tempDir.getAbsolutePath() + "/" + videoPath.getName());
77 | 	}
78 | 	
79 | 	public static InputStream getInputStreamFromHDFS(String pathToHDFS) throws IOException{
80 | 		Path videoPath = new Path(pathToHDFS.toString());
81 | 		return videoPath.getFileSystem(new Configuration()).open(videoPath);
82 | 	}
83 | }
84 | 


--------------------------------------------------------------------------------
/hadoop-pot-core/src/main/java/org/pooledtimeseries/util/PoTConstants.java:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Licensed to the Apache Software Foundation (ASF) under one or more
 3 |  * contributor license agreements.  See the NOTICE file distributed with
 4 |  * this work for additional information regarding copyright ownership.
 5 |  * The ASF licenses this file to You under the Apache License, Version 2.0
 6 |  * (the "License"); you may not use this file except in compliance with
 7 |  * the License.  You may obtain a copy of the License at
 8 |  *
 9 |  *     http://www.apache.org/licenses/LICENSE-2.0
10 |  *
11 |  * Unless required by applicable law or agreed to in writing, software
12 |  * distributed under the License is distributed on an "AS IS" BASIS,
13 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 |  * See the License for the specific language governing permissions and
15 |  * limitations under the License.
16 |  */
17 | 
18 | package org.pooledtimeseries.util;
19 | 
20 | public class PoTConstants {
21 | 	public static final String VECTOR_SEPERATOR = "|";
22 | 	public static final String FILE_SEPERATOR = "~";
23 | 	public static final String VECTOR_SEPERATOR_REGEX = "\\|";
24 | 	
25 | }
26 | 


--------------------------------------------------------------------------------
/hadoop-pot-core/src/main/java/org/pooledtimeseries/util/PoTSerialiser.java:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Licensed to the Apache Software Foundation (ASF) under one or more
 3 |  * contributor license agreements.  See the NOTICE file distributed with
 4 |  * this work for additional information regarding copyright ownership.
 5 |  * The ASF licenses this file to You under the Apache License, Version 2.0
 6 |  * (the "License"); you may not use this file except in compliance with
 7 |  * the License.  You may obtain a copy of the License at
 8 |  *
 9 |  *     http://www.apache.org/licenses/LICENSE-2.0
10 |  *
11 |  * Unless required by applicable law or agreed to in writing, software
12 |  * distributed under the License is distributed on an "AS IS" BASIS,
13 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 |  * See the License for the specific language governing permissions and
15 |  * limitations under the License.
16 |  */
17 | 
18 | package org.pooledtimeseries.util;
19 | 
20 | import java.io.ByteArrayInputStream;
21 | import java.io.ByteArrayOutputStream;
22 | import java.io.ObjectInput;
23 | import java.io.ObjectInputStream;
24 | import java.io.ObjectOutput;
25 | import java.io.ObjectOutputStream;
26 | import java.util.logging.Level;
27 | import java.util.logging.Logger;
28 | 
29 | public class PoTSerialiser {
30 | 
31 | 	private static final Logger LOG = Logger.getLogger(PoTSerialiser.class.getName());
32 | 
33 | 	public static byte[] getBytes(Object value) {
34 | 		long start = System.currentTimeMillis();
35 | 		ByteArrayOutputStream bos = new ByteArrayOutputStream();
36 | 		ObjectOutput out = null;
37 | 		byte[] byteArr = null;
38 | 		try {
39 | 			out = new ObjectOutputStream(bos);
40 | 			out.writeObject(value);
41 | 			byteArr = bos.toByteArray();
42 | 			LOG.fine("Time taken serializing - " + (System.currentTimeMillis() - start));
43 | 		} catch (Exception e) {
44 | 			LOG.log(Level.SEVERE, "Unable to serialize", e);
45 | 			
46 | 		} finally {
47 | 			try {
48 | 				if (out != null) {
49 | 					out.close();
50 | 				}
51 | 			} catch (Exception ex) {
52 | 				// ignore close exception
53 | 			}
54 | 			try {
55 | 				bos.close();
56 | 			} catch (Exception ex) {
57 | 				// ignore close exception
58 | 			}
59 | 		}
60 | 
61 | 		return byteArr;
62 | 	}
63 | 	
64 | 	public static Object getObject(byte[] byteArr) {
65 | 		 
66 | 		if(byteArr == null || byteArr.length == 0){
67 | 			return null;
68 | 		}
69 | 		long start = System.currentTimeMillis();
70 | 		ByteArrayInputStream bis = new ByteArrayInputStream(byteArr);
71 | 		ObjectInput in = null;
72 | 		try {
73 | 			in = new ObjectInputStream(bis);
74 | 			LOG.fine("Time taken deserializing - " + (System.currentTimeMillis() - start));
75 | 			return in.readObject();
76 | 		} catch (Exception e) {
77 | 			LOG.log(Level.SEVERE, "Unable to deserialize", e);
78 | 			return null;
79 | 		} finally {
80 | 			try {
81 | 				bis.close();
82 | 			} catch (Exception ex) {
83 | 				// ignore close exception
84 | 			}
85 | 			try {
86 | 				if (in != null) {
87 | 					in.close();
88 | 				}
89 | 			} catch (Exception ex) {
90 | 				// ignore close exception
91 | 			}
92 | 		}
93 | 
94 | 	}
95 | 	
96 | }
97 | 


--------------------------------------------------------------------------------
/hadoop-pot-core/src/main/java/org/pooledtimeseries/util/PoTUtil.java:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Licensed to the Apache Software Foundation (ASF) under one or more
 3 |  * contributor license agreements.  See the NOTICE file distributed with
 4 |  * this work for additional information regarding copyright ownership.
 5 |  * The ASF licenses this file to You under the Apache License, Version 2.0
 6 |  * (the "License"); you may not use this file except in compliance with
 7 |  * the License.  You may obtain a copy of the License at
 8 |  *
 9 |  *     http://www.apache.org/licenses/LICENSE-2.0
10 |  *
11 |  * Unless required by applicable law or agreed to in writing, software
12 |  * distributed under the License is distributed on an "AS IS" BASIS,
13 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 |  * See the License for the specific language governing permissions and
15 |  * limitations under the License.
16 |  */
17 | 
18 | package org.pooledtimeseries.util;
19 | 
20 | import java.util.logging.Logger;
21 | 
22 | import org.opencv.core.Core;
23 | import org.pooledtimeseries.SimilarityCalculation;
24 | 
25 | public class PoTUtil {
26 | 	private static final String DEFAULT_LIB_PATH = "/mnt/apps/opencv-2.4.11/release/lib/libopencv_java2411.so";
27 | 	private static final Logger LOG = Logger.getLogger(SimilarityCalculation.class.getName());
28 | 	
29 | 	public static void loadOpenCV(String libraryPath){
30 | 		
31 | 		if (!ClassScope.isLibraryLoaded(Core.NATIVE_LIBRARY_NAME)) {
32 |     		LOG.info("Trying to load - " + Core.NATIVE_LIBRARY_NAME);
33 |     		try{
34 |     			System.loadLibrary(Core.NATIVE_LIBRARY_NAME);
35 |     		}catch (java.lang.UnsatisfiedLinkError e){
36 |     			System.load(libraryPath);
37 |     		} 
38 |     	}
39 | 	}
40 | 	
41 | 	public static void loadOpenCV(){
42 | 		loadOpenCV(DEFAULT_LIB_PATH);
43 | 	}
44 | }
45 | 


--------------------------------------------------------------------------------
/hadoop-pot-core/src/main/java/org/pooledtimeseries/util/ReadSeqFileUtil.java:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Licensed to the Apache Software Foundation (ASF) under one or more
 3 |  * contributor license agreements.  See the NOTICE file distributed with
 4 |  * this work for additional information regarding copyright ownership.
 5 |  * The ASF licenses this file to You under the Apache License, Version 2.0
 6 |  * (the "License"); you may not use this file except in compliance with
 7 |  * the License.  You may obtain a copy of the License at
 8 |  *
 9 |  *     http://www.apache.org/licenses/LICENSE-2.0
10 |  *
11 |  * Unless required by applicable law or agreed to in writing, software
12 |  * distributed under the License is distributed on an "AS IS" BASIS,
13 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 |  * See the License for the specific language governing permissions and
15 |  * limitations under the License.
16 |  */
17 | 
18 | package org.pooledtimeseries.util;
19 | 
20 | import java.io.IOException;
21 | import java.util.ArrayList;
22 | import java.util.List;
23 | import java.util.logging.Level;
24 | import java.util.logging.Logger;
25 | 
26 | import org.apache.hadoop.io.Text;
27 | import org.pooledtimeseries.FeatureVector;
28 | import org.pooledtimeseries.MeanChiSquareDistanceCalculation;
29 | import org.pooledtimeseries.PoT;
30 | 
31 | public class ReadSeqFileUtil {
32 | 	private static final Logger LOG = Logger.getLogger(MeanChiSquareDistanceCalculation.class.getName());
33 | 
34 | 	/**
35 | 	 * Takes HDFS path to time series and convert them to {@link FeatureVector}
36 | 	 * @param files - path to of.txt and hog.txt
37 | 	 * @return List of {@link FeatureVector}
38 | 	 */
39 | 	public static List<FeatureVector> computeFeatureFromSeries(String[] files) {
40 | 
41 | 		ArrayList<double[]> tws = PoT.getTemporalWindows(4);
42 | 		ArrayList<FeatureVector> fvList = new ArrayList<FeatureVector>();
43 | 
44 | 		ArrayList<double[][]> multiSeries = new ArrayList<double[][]>();
45 | 
46 | 		long startIoTime = System.currentTimeMillis();
47 | 
48 | 		try {
49 | 			multiSeries.add(PoT.loadTimeSeries(HadoopFileUtil.getInputStreamFromHDFS(files[0])) );
50 | 			multiSeries.add(PoT.loadTimeSeries(HadoopFileUtil.getInputStreamFromHDFS(files[1])) );
51 | 		} catch (IOException e) {
52 | 			LOG.log(Level.SEVERE,"Unable to read series from filesysytem ",e);
53 | 			throw new RuntimeException("Unable to read series from filesysytem",e);
54 | 		}
55 | 
56 | 		LOG.info("Read both series in - " + (System.currentTimeMillis() - startIoTime));
57 | 
58 | 		FeatureVector fv = new FeatureVector();
59 | 		for (int i = 0; i < multiSeries.size(); i++) {
60 | 			fv.feature.add(PoT.computeFeaturesFromSeries(multiSeries.get(i), tws, 1));
61 | 			fv.feature.add(PoT.computeFeaturesFromSeries(multiSeries.get(i), tws, 2));
62 | 			fv.feature.add(PoT.computeFeaturesFromSeries(multiSeries.get(i), tws, 5));
63 | 		}
64 | 		fvList.add(fv);
65 | 
66 | 		return fvList;
67 | 
68 | 	}
69 | 
70 | 	public static String[] getFileNames(Text key) {
71 | 
72 | 		return key.toString().split(PoTConstants.FILE_SEPERATOR);
73 | 	}
74 | 
75 | }
76 | 


--------------------------------------------------------------------------------
/hadoop-pot-video/pom.xml:
--------------------------------------------------------------------------------
 1 | <project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
 2 |   <modelVersion>4.0.0</modelVersion>
 3 |   <parent>
 4 |     <groupId>gov.nasa.jpl.memex</groupId>
 5 |     <artifactId>hadoop-pot</artifactId>
 6 |     <version>1.0-SNAPSHOT</version>
 7 |   </parent>
 8 |   <artifactId>hadoop-pot-video</artifactId>
 9 |   <dependencies>
10 |   	<dependency>
11 |       <groupId>org.openpnp</groupId>
12 |         <artifactId>opencv</artifactId>
13 |       <version>2.4.11-2</version>
14 |     </dependency>
15 |     <dependency>
16 |       <groupId>junit</groupId>
17 |       <artifactId>junit</artifactId>
18 |     </dependency>
19 |     <dependency>
20 |       <groupId>commons-io</groupId>
21 |       <artifactId>commons-io</artifactId>
22 | 	</dependency>   
23 |     <dependency>
24 |       <groupId>commons-cli</groupId>
25 |       <artifactId>commons-cli</artifactId>
26 |     </dependency>	
27 |     <dependency>
28 |       <groupId>com.googlecode.json-simple</groupId>
29 |       <artifactId>json-simple</artifactId>
30 |     </dependency>
31 |   </dependencies>
32 | </project>


--------------------------------------------------------------------------------
/hadoop-pot-video/src/main/java/org/pooledtimeseries/FeatureVector.java:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Licensed to the Apache Software Foundation (ASF) under one or more
 3 |  * contributor license agreements.  See the NOTICE file distributed with
 4 |  * this work for additional information regarding copyright ownership.
 5 |  * The ASF licenses this file to You under the Apache License, Version 2.0
 6 |  * (the "License"); you may not use this file except in compliance with
 7 |  * the License.  You may obtain a copy of the License at
 8 |  *
 9 |  *     http://www.apache.org/licenses/LICENSE-2.0
10 |  *
11 |  * Unless required by applicable law or agreed to in writing, software
12 |  * distributed under the License is distributed on an "AS IS" BASIS,
13 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 |  * See the License for the specific language governing permissions and
15 |  * limitations under the License.
16 |  */
17 | 
18 | package org.pooledtimeseries;
19 | 
20 | import java.io.Serializable;
21 | import java.util.ArrayList;
22 | 
23 | public class FeatureVector implements Serializable{
24 | 	
25 | 	private static final long serialVersionUID = 1L;
26 | 	
27 | 	public ArrayList<ArrayList<Double>> feature;
28 | 	
29 | 	public FeatureVector() {
30 | 		feature = new ArrayList<ArrayList<Double>>();
31 | 	}
32 | 	
33 | 	public FeatureVector(ArrayList<ArrayList<Double>> f) {
34 | 		feature = f;
35 | 	}
36 | 	
37 | 	public int numDim() {
38 | 		return feature.size();
39 | 	}
40 | }
41 | 


--------------------------------------------------------------------------------
/hadoop-pot-video/src/main/java/org/pooledtimeseries/PoT.java:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * Licensed to the Apache Software Foundation (ASF) under one or more
  3 |  * contributor license agreements.  See the NOTICE file distributed with
  4 |  * this work for additional information regarding copyright ownership.
  5 |  * The ASF licenses this file to You under the Apache License, Version 2.0
  6 |  * (the "License"); you may not use this file except in compliance with
  7 |  * the License.  You may obtain a copy of the License at
  8 |  *
  9 |  *     http://www.apache.org/licenses/LICENSE-2.0
 10 |  *
 11 |  * Unless required by applicable law or agreed to in writing, software
 12 |  * distributed under the License is distributed on an "AS IS" BASIS,
 13 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 14 |  * See the License for the specific language governing permissions and
 15 |  * limitations under the License.
 16 |  */
 17 | 
 18 | package org.pooledtimeseries;
 19 | 
 20 | import java.io.BufferedReader;
 21 | import java.io.BufferedWriter;
 22 | import java.io.File;
 23 | import java.io.FileOutputStream;
 24 | import java.io.FileWriter;
 25 | import java.io.IOException;
 26 | import java.io.InputStream;
 27 | import java.io.InputStreamReader;
 28 | import java.io.OutputStreamWriter;
 29 | import java.nio.file.Files;
 30 | import java.nio.file.Path;
 31 | import java.nio.file.Paths;
 32 | import java.util.ArrayList;
 33 | import java.util.List;
 34 | import java.util.Scanner;
 35 | import java.util.logging.Level;
 36 | import java.util.logging.Logger;
 37 | 
 38 | import org.apache.commons.cli.CommandLine;
 39 | import org.apache.commons.cli.CommandLineParser;
 40 | import org.apache.commons.cli.GnuParser;
 41 | import org.apache.commons.cli.HelpFormatter;
 42 | import org.apache.commons.cli.Option;
 43 | import org.apache.commons.cli.OptionBuilder;
 44 | import org.apache.commons.cli.Options;
 45 | import org.apache.commons.cli.ParseException;
 46 | import org.apache.commons.io.FileUtils;
 47 | import org.apache.commons.io.filefilter.TrueFileFilter;
 48 | import org.json.simple.JSONObject;
 49 | import org.opencv.core.Core;
 50 | import org.opencv.core.Mat;
 51 | import org.opencv.core.MatOfPoint2f;
 52 | import org.opencv.core.Point;
 53 | import org.opencv.core.Size;
 54 | import org.opencv.highgui.VideoCapture;
 55 | import org.opencv.imgproc.Imgproc;
 56 | import org.opencv.video.Video;
 57 | 
 58 | /**
 59 |  * 
 60 |  * Pooled Time Series Similarity Metric.
 61 |  * 
 62 |  */
 63 | public class PoT {
 64 | 
 65 |   public static int frame_width = 320;
 66 |   public static int frame_height = 240;
 67 | 
 68 |   private static String outputFile = "similarity.txt";
 69 | 
 70 |   private static enum OUTPUT_FORMATS {TXT, JSON}
 71 |   private static OUTPUT_FORMATS outputFormat = OUTPUT_FORMATS.TXT;
 72 | 
 73 |   private static final Logger LOG = Logger.getLogger(PoT.class.getName());
 74 | 
 75 |   public static void main(String[] args) {
 76 |     System.loadLibrary(Core.NATIVE_LIBRARY_NAME);
 77 |     Option fileOpt = OptionBuilder.withArgName("file").hasArg()
 78 |             .withLongOpt("file")
 79 |             .withDescription("Path to a single file").create('f');
 80 |     
 81 |     Option dirOpt = OptionBuilder.withArgName("directory").hasArg()
 82 |         .withLongOpt("dir")
 83 |         .withDescription("A directory with image files in it").create('d');
 84 | 
 85 |     Option helpOpt = OptionBuilder.withLongOpt("help")
 86 |         .withDescription("Print this message.").create('h');
 87 | 
 88 |     Option pathFileOpt = OptionBuilder
 89 |         .withArgName("path file")
 90 |         .hasArg()
 91 |         .withLongOpt("pathfile")
 92 |         .withDescription(
 93 |             "A file containing full absolute paths to videos. Previous default was memex-index_temp.txt")
 94 |         .create('p');
 95 | 
 96 |     Option outputFileOpt = OptionBuilder
 97 |         .withArgName("output file")
 98 |         .withLongOpt("outputfile")
 99 |         .hasArg()
100 |         .withDescription("File containing similarity results. Defaults to ./similarity.txt")
101 |         .create('o');
102 | 
103 |     Option jsonOutputFlag = OptionBuilder
104 |         .withArgName("json output")
105 |         .withLongOpt("json")
106 |         .withDescription("Set similarity output format to JSON. Defaults to .txt")
107 |         .create('j');
108 | 
109 |     Option similarityFromFeatureVectorsOpt = OptionBuilder
110 |             .withArgName("similarity from FeatureVectors directory")
111 |             .withLongOpt("similarityFromFeatureVectorsDirectory")
112 |             .hasArg()
113 |             .withDescription("calculate similarity matrix from given directory of feature vectors")
114 |             .create('s');
115 | 
116 |     Options options = new Options();
117 |     options.addOption(dirOpt);
118 |     options.addOption(pathFileOpt);
119 |     options.addOption(fileOpt);
120 |     options.addOption(helpOpt);
121 |     options.addOption(outputFileOpt);
122 |     options.addOption(jsonOutputFlag);
123 |     options.addOption(similarityFromFeatureVectorsOpt);
124 | 
125 |     // create the parser
126 |     CommandLineParser parser = new GnuParser();
127 | 
128 |     try {
129 |       // parse the command line arguments
130 |       CommandLine line = parser.parse(options, args);
131 |       String directoryPath = null;
132 |       String pathFile = null;
133 |       String singleFilePath = null;
134 |       String similarityFromFeatureVectorsDirectory = null;
135 |       ArrayList<Path> videoFiles = null;
136 | 
137 |       if (line.hasOption("dir")) {
138 |         directoryPath = line.getOptionValue("dir");
139 |       }
140 | 
141 |       if (line.hasOption("pathfile")) {
142 |         pathFile = line.getOptionValue("pathfile");
143 |       }
144 |       
145 |       if (line.hasOption("file")) {
146 |           singleFilePath = line.getOptionValue("file");
147 |       }
148 | 
149 |       if (line.hasOption("outputfile")) {
150 |           outputFile = line.getOptionValue("outputfile");
151 |       }
152 | 
153 |       if (line.hasOption("json")) {
154 |           outputFormat = OUTPUT_FORMATS.JSON;
155 |       }
156 | 
157 |       if (line.hasOption("similarityFromFeatureVectorsDirectory")) {
158 |         similarityFromFeatureVectorsDirectory = line.getOptionValue("similarityFromFeatureVectorsDirectory");
159 |       }
160 | 
161 |       if (line.hasOption("help")
162 |           || (line.getOptions() == null || (line.getOptions() != null && line
163 |               .getOptions().length == 0))
164 |           || (directoryPath != null && pathFile != null
165 |               && !directoryPath.equals("") && !pathFile.equals(""))) {
166 |         HelpFormatter formatter = new HelpFormatter();
167 |         formatter.printHelp("pooled_time_series", options);
168 |         System.exit(1);
169 |       }
170 | 
171 |       if (directoryPath != null) {
172 |         File dir = new File(directoryPath);
173 |         List<File> files = (List<File>) FileUtils.listFiles(dir,
174 |             TrueFileFilter.INSTANCE, TrueFileFilter.INSTANCE);
175 |         videoFiles = new ArrayList<Path>(files.size());
176 | 
177 |         for (File file : files) {
178 |           String filePath = file.toString();
179 | 
180 |           // When given a directory to load videos from we need to ensure that we
181 |           // don't try to load the of.txt and hog.txt intermediate result files
182 |           // that results from previous processing runs.
183 |           if (!filePath.contains(".txt")) {
184 |             videoFiles.add(file.toPath());
185 |           }
186 |         }
187 | 
188 |         LOG.info("Added " + videoFiles.size() + " video files from "
189 |             + directoryPath);
190 | 
191 |       }
192 | 
193 |       if (pathFile != null) {
194 |         Path list_file = Paths.get(pathFile);
195 |         videoFiles = loadFiles(list_file);
196 |         LOG.info("Loaded " + videoFiles.size() + " video files from "
197 |             + pathFile);
198 |       }
199 |       
200 |       if (singleFilePath != null) {
201 |           Path singleFile = Paths.get(singleFilePath);
202 |           LOG.info("Loaded file: " + singleFile);
203 |           videoFiles = new ArrayList<Path>(1);
204 |           videoFiles.add(singleFile);
205 |       }
206 | 
207 |       if (similarityFromFeatureVectorsDirectory != null) {
208 |         File dir = new File(similarityFromFeatureVectorsDirectory);
209 |         List<File> files = (List<File>) FileUtils.listFiles(dir,
210 |                 TrueFileFilter.INSTANCE, TrueFileFilter.INSTANCE);
211 |         videoFiles = new ArrayList<Path>(files.size());
212 | 
213 |         for (File file : files) {
214 |           String filePath = file.toString();
215 | 
216 |           // We need to load only the *.of.txt and *.hog.txt values
217 |           if (filePath.endsWith(".of.txt")) {
218 |             videoFiles.add(file.toPath());
219 |           }
220 | 
221 |           if (filePath.endsWith(".hog.txt")) {
222 |             videoFiles.add(file.toPath());
223 |           }
224 |         }
225 | 
226 |         LOG.info("Added " + videoFiles.size() + " feature vectors from "
227 |                 + similarityFromFeatureVectorsDirectory);
228 |         evaluateSimilarity(videoFiles, 1);
229 |       }
230 |       else {
231 |         evaluateSimilarity(videoFiles, 0);
232 |       }
233 |       LOG.info("done.");
234 | 
235 |     } catch (ParseException exp) {
236 |       // oops, something went wrong
237 |       System.err.println("Parsing failed.  Reason: " + exp.getMessage());
238 |     }
239 | 
240 |   }
241 | 
242 |   public static void evaluateSimilarity(ArrayList<Path> files, int save_mode) {
243 |     // PoT level set
244 |     ArrayList<double[]> tws = getTemporalWindows(4);
245 | 
246 |     // computing feature vectors
247 |     ArrayList<FeatureVector> fv_list = new ArrayList<FeatureVector>();
248 | 
249 |     for (int k = 0; k < files.size(); k++) {
250 |       try {
251 |         LOG.fine(files.get(k).toString());
252 | 
253 |         ArrayList<double[][]> multi_series = new ArrayList<double[][]>();
254 |         Path file = files.get(k);
255 | 
256 |         // optical flow descriptors
257 |         String series_name1 = file.toString();
258 |         if ((!series_name1.endsWith(".of.txt")) && (!series_name1.endsWith(".hog.txt"))) {
259 |           series_name1 += ".of.txt";
260 |         }
261 |         Path series_path1 = Paths.get(series_name1);
262 |         double[][] series1;
263 | 
264 |         if (save_mode == 0) {
265 |           series1 = getOpticalTimeSeries(file, 5, 5, 8);
266 |           saveVectors(series1, series_path1);
267 | 
268 |         } else {
269 |           series1 = loadTimeSeries(series_path1);
270 |         }
271 | 
272 |         multi_series.add(series1);
273 | 
274 |         // gradients descriptors
275 |         String series_name2 = file.toString();
276 |         if ((!series_name2.endsWith(".hog.txt")) && (!series_name2.endsWith(".of.txt"))) {
277 |           series_name2 += ".hog.txt";
278 |         }
279 |         Path series_path2 = Paths.get(series_name2);
280 |         double[][] series2;
281 | 
282 |         if (save_mode == 0) {
283 |           series2 = getGradientTimeSeries(file, 5, 5, 8);
284 |           saveVectors(series2, series_path2);
285 |         } else {
286 |           series2 = loadTimeSeries(series_path2);
287 |         }
288 | 
289 |         multi_series.add(series2);
290 | 
291 |         // computing features from series of descriptors
292 |         FeatureVector fv = new FeatureVector();
293 | 
294 |         for (int i = 0; i < multi_series.size(); i++) {
295 |           fv.feature.add(computeFeaturesFromSeries(multi_series.get(i), tws, 1));
296 |           fv.feature.add(computeFeaturesFromSeries(multi_series.get(i), tws, 2));
297 |           fv.feature.add(computeFeaturesFromSeries(multi_series.get(i), tws, 5));
298 |         }
299 |         LOG.info( (k+1)+"/"+files.size()+" files done. " + "Finished processing file: " + file.getFileName());
300 |         fv_list.add(fv);
301 | 
302 |       } catch (PoTException e) {
303 |         LOG.severe("PoTException occurred: " + e.message + ": Skipping file " + files.get(k));
304 |         continue;
305 |       }
306 |     }
307 |     double[][] similarities = calculateSimilarities(fv_list);
308 |     writeSimilarityOutput(files, similarities);
309 |   }
310 | 
311 |   public static double[][] calculateSimilarities(ArrayList<FeatureVector> fv_list) {
312 |     // feature vector similarity measure
313 |     if (fv_list.size() < 1) {
314 |       LOG.info("Feature Vector list is empty. Nothing to calculate. Exiting...");
315 |       System.exit(1);
316 |     }
317 |     double[] mean_dists = new double[fv_list.get(0).numDim()];
318 |     for (int i = 0; i < fv_list.get(0).numDim(); i++)
319 |       mean_dists[i] = meanChiSquareDistances(fv_list, i);
320 | 
321 |     System.out.print("mean-chi-square-distances: ");
322 |     for (int i = 0; i < fv_list.get(0).numDim(); i++)
323 |       System.out.format("%f ", mean_dists[i]);
324 |     System.out.println("");
325 | 
326 |     double[][] sims = new double[fv_list.size()][fv_list.size()];
327 |     for (int i = 0; i < fv_list.size(); i++) {
328 |       for (int j = 0; j < fv_list.size(); j++) {
329 |         sims[i][j] = kernelDistance(fv_list.get(i), fv_list.get(j), mean_dists);
330 |       }
331 |     }
332 | 
333 |     return sims;
334 |   }
335 | 
336 |   private static void writeSimilarityOutput(ArrayList<Path> files, double[][] similarities) {
337 |     if (outputFormat == OUTPUT_FORMATS.TXT) {
338 |       writeSimilarityToTextFile(similarities);
339 |     } else if (outputFormat == OUTPUT_FORMATS.JSON) {
340 |       writeSimilarityToJSONFile(files, similarities);
341 |     } else {
342 |       LOG.severe("Invalid output format. Skipping similarity dump.");
343 |     }
344 |   }
345 | 
346 |   private static void writeSimilarityToTextFile(double[][] similarities) {
347 |     try {
348 |       FileOutputStream fos = new FileOutputStream(outputFile);
349 |       BufferedWriter writer = new BufferedWriter(new OutputStreamWriter(fos));
350 | 
351 |       for (int i = 0; i < similarities.length; i++) {
352 |         for (int j = 0; j < similarities[0].length; j++) {
353 |           writer.write(String.format("%f,", similarities[i][j]));
354 |         }
355 |         writer.newLine();
356 |       }
357 | 
358 |       writer.close();
359 |       fos.close();
360 |     } catch (IOException e) {
361 |       e.printStackTrace();
362 |     }
363 |   }
364 | 
365 |   private static void writeSimilarityToJSONFile(ArrayList<Path> files, double[][] similarities) {
366 |     JSONObject root_json_obj = new JSONObject();
367 | 
368 |     for (int i = 0; i < similarities.length; i++) {
369 |       JSONObject fileJsonObj = new JSONObject();
370 | 
371 |       for (int j = 0; j < similarities[0].length; j++) {
372 |         fileJsonObj.put(files.get(j).getFileName(), similarities[i][j]);
373 |       }
374 | 
375 |       root_json_obj.put(files.get(i).getFileName(), fileJsonObj);
376 |     }
377 | 
378 |     try {
379 |       outputFile = outputFile.substring(0, outputFile.lastIndexOf('.')) + ".json";
380 |       FileWriter file = new FileWriter(outputFile);
381 |       file.write(root_json_obj.toJSONString());
382 |       file.flush();
383 |       file.close();
384 |     } catch (IOException e) {
385 |       e.printStackTrace();
386 |     }
387 |   }
388 | 
389 |   public static ArrayList<Path> loadFiles(Path list_file) {
390 |     ArrayList<Path> filenames = new ArrayList<Path>();
391 | 
392 |     try (InputStream in = Files.newInputStream(list_file);
393 |         BufferedReader reader = new BufferedReader(new InputStreamReader(in))) {
394 |       String line = null;
395 |       while ((line = reader.readLine()) != null) {
396 |         filenames.add(Paths.get(line));
397 |       }
398 |     } catch (IOException x) {
399 |       System.err.println(x);
400 |     }
401 | 
402 |     return filenames;
403 |   }
404 | 
405 |   public static double[][] getOpticalTimeSeries(Path filename, int w_d,
406 |       int h_d, int o_d) throws PoTException {
407 |     ArrayList<double[][][]> hists = getOpticalHistograms(filename, w_d, h_d,
408 |         o_d);
409 |     double[][] vectors = new double[hists.size()][];
410 | 
411 |     for (int i = 0; i < hists.size(); i++) {
412 |       vectors[i] = histogramToVector(hists.get(i));
413 |     }
414 | 
415 |     return vectors;
416 |   }
417 | 
418 |   static double[] histogramToVector(double[][][] hist) {
419 |     int d1 = hist.length;
420 |     int d2 = hist[0].length;
421 |     int d3 = hist[0][0].length;
422 |     double[] vector = new double[d1 * d2 * d3];
423 | 
424 |     for (int i = 0; i < d1; i++) {
425 |       for (int j = 0; j < d2; j++) {
426 |         for (int k = 0; k < d3; k++) {
427 |           vector[d3 * d2 * i + d3 * j + k] = hist[i][j][k];
428 |         }
429 |       }
430 |     }
431 | 
432 |     return vector;
433 |   }
434 | 
435 |   static ArrayList<double[][][]> getOpticalHistograms(Path filename, int w_d,
436 |       int h_d, int o_d) throws PoTException{
437 |     ArrayList<double[][][]> histograms = new ArrayList<double[][][]>();
438 | 
439 |     try{
440 | 	LOG.info("opening video file " + filename.toString() );
441 | 	VideoCapture capture = new VideoCapture(filename.toString());
442 | 
443 |     if (!capture.isOpened()) {
444 |       LOG.warning("video file " + filename.getFileName() + " could not be opened.");
445 |       double[][][] hist = new double[w_d][h_d][o_d];
446 |       histograms.add(hist);
447 |     }
448 |     else {
449 | 	    // variables for processing images
450 | 	    Mat original_frame = new Mat();
451 | 	
452 | 	    Mat frame = new Mat();
453 | 	    Mat frame_gray = new Mat();
454 | 	    Mat prev_frame_gray = new Mat();
455 | 	    MatOfPoint2f flow = new MatOfPoint2f();
456 | 	
457 | 	    // computing a list of histogram of optical flows (i.e. a list of 5*5*8
458 | 	    // arrays)
459 | 	    for (int frame_index = 0;; frame_index++) {
460 | 	      // capturing the video images
461 | 	      capture.read(original_frame);
462 | 	
463 |             if (original_frame.empty()) {
464 |               if (frame_index == 0) {
465 |                 throw new PoTException("Could not read the video file");
466 |               }
467 |               else
468 |                 break;
469 |             }
470 | 	       else {
471 | 	        // resizing the captured frame and converting it to the gray scale
472 | 	        // image.
473 | 	        Imgproc.resize(original_frame, frame, new Size(frame_width,
474 | 	            frame_height));
475 | 	        Imgproc.cvtColor(frame, frame_gray, Imgproc.COLOR_BGR2GRAY);
476 | 	
477 | 	        double[][][] hist = new double[w_d][h_d][o_d];
478 | 	        histograms.add(hist);
479 | 	
480 | 	        // from frame #2
481 | 	        if (frame_index > 0) {
482 | 	          // calculate optical flows
483 | 	          Video.calcOpticalFlowFarneback(prev_frame_gray, frame_gray, flow,
484 | 	              0.5, 1, 10, 2, 7, 1.5, 0); // 0.5, 1, 15, 2, 7, 1.5, 0
485 | 	
486 | 	          // update histogram of optical flows
487 | 	          updateOpticalHistogram(histograms.get(frame_index), flow);
488 | 	        }
489 | 	
490 | 	        Mat temp_frame = prev_frame_gray;
491 | 	        prev_frame_gray = frame_gray;
492 | 	        frame_gray = temp_frame;
493 | 	      }
494 | 	    }
495 | 	
496 | 	    capture.release();
497 |     }
498 |    }catch(Exception e){
499 | 	   e.printStackTrace();
500 | 	   LOG.log(Level.SEVERE, "Exception in getOpticalHistograms ", e);
501 |   }
502 |     return histograms;
503 |   }
504 | 
505 |   static void updateOpticalHistogram(double[][][] hist, Mat flow) {
506 |     int d1 = hist.length;
507 |     int d2 = hist[0].length;
508 |     int d3 = hist[0][0].length;
509 | 
510 |     int step = 4; // 5;
511 | 
512 |     for (int x = 0; x < frame_width; x += step) {
513 |       int x_type = (int) (x * d1 / frame_width);
514 | 
515 |       for (int y = 0; y < frame_height; y += step) {
516 |         int y_type = (int) (y * d2 / frame_height);
517 | 
518 |         Point fxy = new Point(flow.get(y, x));
519 | 
520 |         double size = (fxy.x + fxy.y) * (fxy.x + fxy.y);
521 | 
522 |         if (size < 9) {
523 |           continue; // 25
524 |         } else {
525 |           int f_type = opticalFlowType(fxy, d3);
526 | 
527 |           hist[x_type][y_type][f_type]++;
528 |         }
529 |       }
530 |     }
531 |   }
532 | 
533 |   static int opticalFlowType(Point fxy, int dim) {
534 |     double degree = Math.atan2(fxy.y, fxy.x);
535 |     int type = 7;
536 | 
537 |     for (int i = 0; i < dim; i++) {
538 |       double boundary = (i + 1) * 2 * Math.PI / dim - Math.PI;
539 | 
540 |       if (degree < boundary) {
541 |         type = i;
542 |         break;
543 |       }
544 |     }
545 | 
546 |     return type;
547 |   }
548 | 
549 |   public static void saveVectors(double[][] vectors, Path outfile) {
550 |     int d = vectors[0].length;
551 | 
552 |     ArrayList<double[][][]> temp_hists = new ArrayList<double[][][]>();
553 | 
554 |     for (int i = 0; i < vectors.length; i++) {
555 |       double[][][] temp_hist = new double[1][1][d];
556 |       temp_hist[0][0] = vectors[i];
557 | 
558 |       temp_hists.add(temp_hist);
559 |     }
560 | 
561 |     saveHistograms(temp_hists, outfile);
562 |   }
563 | 
564 |   static void saveHistograms(ArrayList<double[][][]> hists, Path outfile) {
565 |     int w_d = hists.get(0).length;
566 |     int h_d = hists.get(0)[0].length;
567 |     int o_d = hists.get(0)[0][0].length;
568 | 
569 |     int i, j, k, l;
570 | 
571 |     try (FileOutputStream fos = new FileOutputStream(outfile.toFile());
572 |         BufferedWriter writer = new BufferedWriter(new OutputStreamWriter(fos))) {
573 |       String head = String.format("%d %d", hists.size(), w_d * h_d * o_d);
574 |       writer.write(head);
575 |       writer.newLine();
576 | 
577 |       for (l = 0; l < (int) hists.size(); l++) {
578 |         double[][][] hist = hists.get(l);
579 | 
580 |         for (i = 0; i < hist.length; i++) {
581 |           for (j = 0; j < hist[0].length; j++) {
582 |             for (k = 0; k < hist[0][0].length; k++) { // optical_bins+1
583 |               writer.write(String.format("%f ", hist[i][j][k]));
584 |             }
585 |           }
586 |         }
587 | 
588 |         writer.newLine();
589 |       }
590 | 
591 |     } catch (IOException x) {
592 |       System.err.println(x);
593 |     }
594 |   }
595 | 
596 | 	public static double[][] loadTimeSeries(Scanner scin) {
597 | 		double[][] series = new double[1][1];
598 | 		int num_frames = scin.nextInt();
599 | 		int dim = scin.nextInt();
600 | 
601 | 		series = new double[num_frames][dim];
602 | 
603 | 		for (int i = 0; i < num_frames; i++) {
604 | 			for (int j = 0; j < dim; j++) {
605 | 				series[i][j] = scin.nextDouble();
606 | 			}
607 | 		}
608 | 		scin.close();
609 | 		
610 | 		return series;
611 | 	}
612 | 	
613 | 	public static double[][] loadTimeSeries(InputStream in) {
614 | 		
615 | 		Scanner scin = new Scanner(in);
616 | 		try{
617 | 			return loadTimeSeries(scin);
618 | 		}finally{
619 | 			try {
620 | 				in.close();
621 | 			} catch (IOException e) {
622 | 				e.printStackTrace();
623 | 			}
624 | 		}
625 | 
626 | 	}
627 | 
628 | 	public static double[][] loadTimeSeries(Path filename) {
629 | 		try (InputStream in = Files.newInputStream(filename);) {
630 | 			return loadTimeSeries(in);
631 | 		} catch (IOException e) {
632 | 			e.printStackTrace();
633 | 			return null;
634 | 		}
635 | 	}
636 | 
637 |   public static double[][] getGradientTimeSeries(Path filename, int w_d,
638 |       int h_d, int o_d) throws PoTException {
639 |     ArrayList<double[][][]> hists = getGradientHistograms(filename, w_d, h_d,
640 |         o_d);
641 |     double[][] vectors = new double[hists.size()][];
642 | 
643 |     for (int i = 0; i < hists.size(); i++) {
644 |       vectors[i] = histogramToVector(hists.get(i));
645 |     }
646 | 
647 |     return vectors;
648 |   }
649 | 
650 |   static ArrayList<double[][][]> getGradientHistograms(Path filename, int w_d,
651 |       int h_d, int o_d) throws PoTException{
652 |     ArrayList<double[][][]> histograms = new ArrayList<double[][][]>();
653 | 
654 |     VideoCapture capture = new VideoCapture(filename.toString());
655 | 
656 |     if (!capture.isOpened()) {
657 |       LOG.warning("video file not opened.");
658 |       
659 |       double[][][] hist = new double[w_d][h_d][o_d];
660 |       histograms.add(hist);
661 |     }
662 |     else {
663 | 	    // variables for processing images
664 | 	    Mat original_frame = new Mat();
665 | 	    Mat resized = new Mat();
666 | 	    Mat resized_gray = new Mat();
667 | 	
668 | 	    // initializing a list of histogram of gradients (i.e. a list of s*s*9
669 | 	    // arrays)
670 | 	    for (int i = 0;; i++) {
671 | 	      // capturing the video images
672 | 	      capture.read(original_frame);
673 | 	      if (original_frame.empty()) {
674 | 
675 |             if (original_frame.empty()) {
676 |               if (i == 0) {
677 |                 throw new PoTException("Could not read the video file");
678 |               }
679 |               else
680 |                 break;
681 |             }
682 | 
683 | 	      }
684 | 	
685 | 	      double[][][] hist = new double[w_d][h_d][o_d];
686 | 	
687 | 	      Imgproc.resize(original_frame, resized, new Size(frame_width,
688 | 	          frame_height));
689 | 	      Imgproc.cvtColor(resized, resized_gray, Imgproc.COLOR_BGR2GRAY);
690 | 	
691 | 	      ArrayList<double[][]> gradients = computeGradients(resized_gray, o_d);
692 | 	      updateGradientHistogram(hist, gradients);
693 | 	
694 | 	      histograms.add(hist);
695 | 	    }
696 | 	
697 | 	    capture.release();
698 |     }
699 | 
700 |     return histograms;
701 |   }
702 | 
703 |   static ArrayList<double[][]> computeGradients(Mat frame, int dim) {
704 |     byte frame_array[] = new byte[(int) frame.total()];
705 |     frame.get(0, 0, frame_array);
706 | 
707 |     ArrayList<double[][]> gradients = new ArrayList<double[][]>();
708 | 
709 |     for (int k = 0; k < dim; k++) {
710 |       double angle = Math.PI * (double) k / (double) dim;
711 | 
712 |       double dx = Math.cos(angle) * 0.9999999;
713 |       double dy = Math.sin(angle) * 0.9999999;
714 | 
715 |       double[][] grad = new double[frame.width()][frame.height()];
716 | 
717 |       for (int i = 0; i < frame.cols(); i++) {
718 |         for (int j = 0; j < frame.rows(); j++) {
719 |           if (i <= 1 || j <= 1 || i >= frame.cols() - 2
720 |               || j >= frame.rows() - 2) {
721 |             grad[i][j] = 0;
722 |           } else {
723 |             double f1 = interpolatePixel(frame_array, frame.cols(), (double) i
724 |                 + dx, (double) j + dy);
725 |             double f2 = interpolatePixel(frame_array, frame.cols(), (double) i
726 |                 - dx, (double) j - dy);
727 | 
728 |             double diff = f1 - f2;
729 |             if (diff < 0)
730 |               diff = diff * -1;
731 |             if (diff >= 256)
732 |               diff = 255;
733 | 
734 |             grad[i][j] = diff;
735 |           }
736 |         }
737 |       }
738 | 
739 |       gradients.add(grad);
740 |     }
741 | 
742 |     return gradients;
743 |   }
744 | 
745 |   static double interpolatePixel(byte[] image, int w, double x, double y) {
746 |     double x1 = (double) ((int) x);
747 |     double x2 = (double) ((int) x + 1);
748 |     double y1 = (double) ((int) y);
749 |     double y2 = (double) ((int) y + 1);
750 | 
751 |     double f11 = (double) (image[(int) y * w + (int) x] & 0xFF);
752 |     double f21 = (double) (image[(int) y * w + (int) x + 1] & 0xFF);
753 |     double f12 = (double) (image[(int) (y + 1) * w + (int) x] & 0xFF);
754 |     double f22 = (double) (image[(int) (y + 1) * w + (int) x + 1] & 0xFF);
755 | 
756 |     double f = f11 * (x2 - x) * (y2 - y) + f21 * (x - x1) * (y2 - y) + f12
757 |         * (x2 - x) * (y - y1) + f22 * (x - x1) * (y - y1);
758 | 
759 |     return f;
760 |   }
761 | 
762 |   static void updateGradientHistogram(double[][][] hist,
763 |       ArrayList<double[][]> gradients) {
764 |     int d1 = hist.length;
765 |     int d2 = hist[0].length;
766 |     int d3 = hist[0][0].length;
767 | 
768 |     int width = gradients.get(0).length;
769 |     int height = gradients.get(0)[0].length;
770 | 
771 |     for (int i = 0; i < width; i++) {
772 |       int s1_index = i * d1 / width;
773 | 
774 |       for (int j = 0; j < height; j++) {
775 |         int s2_index = j * d2 / height;
776 | 
777 |         for (int k = 0; k < d3; k++) {
778 |           double val = gradients.get(k)[i][j] / 100;
779 |           hist[s1_index][s2_index][k] += val;
780 |         }
781 |       }
782 |     }
783 |   }
784 | 
785 |   public static ArrayList<double[]> getTemporalWindows(int level) {
786 |     ArrayList<double[]> fws = new ArrayList<double[]>();
787 | 
788 |     for (int l = 0; l < level; l++) {
789 |       int cascade_steps = (int) Math.pow((double) 2, (double) l);// 2;
790 |       double step_size = (double) 1 / (double) cascade_steps;
791 | 
792 |       for (int k = 0; k < cascade_steps; k++) {
793 |         double start = step_size * (double) k + 0.000001;
794 |         double end = step_size * (double) (k + 1) + 0.000001;
795 | 
796 |         double[] wind = new double[2];
797 |         wind[0] = start;
798 |         wind[1] = end;
799 | 
800 |         fws.add(wind);
801 |       }
802 |     }
803 | 
804 |     return fws;
805 |   }
806 | 
807 |   public static ArrayList<Double> computeFeaturesFromSeries(double[][] series,
808 |       ArrayList<double[]> time_windows_list, int feature_mode) {
809 |     int start = 0;
810 |     int end = series.length - 1;
811 | 
812 |     ArrayList<Double> feature = new ArrayList<Double>();
813 | 
814 |     for (int j = 0; j < time_windows_list.size(); j++) {
815 |       int duration = end - start;
816 | 
817 |       for (int i = 0; i < series[0].length; i++) {
818 |         if (duration < 0) {
819 |           if (feature_mode == 2 || feature_mode == 4) {
820 |             feature.add(0.0);
821 |             feature.add(0.0);
822 |           } else
823 |             feature.add(0.0);
824 | 
825 |           continue;
826 |         }
827 | 
828 |         int window_start = start
829 |             + (int) (duration * time_windows_list.get(j)[0] + 0.5);
830 |         int window_end = start
831 |             + (int) (duration * time_windows_list.get(j)[1] + 0.5);
832 | 
833 |         if (feature_mode == 1) { // Sum pooling
834 |           double sum = 0;
835 | 
836 |           for (int t = window_start; t <= window_end; t++) {
837 |             if (t < 0)
838 |               continue;
839 | 
840 |             sum += series[t][i];
841 |           }
842 | 
843 |           feature.add(sum);
844 |         } else if (feature_mode == 2) { // Gradient pooling1
845 |           double positive_gradients = 0;
846 |           double negative_gradients = 0;
847 | 
848 |           for (int t = window_start; t <= window_end; t++) {
849 |             int look = 2;
850 | 
851 |             if (t - look < 0)
852 |               continue;
853 |             else {
854 |               double dif = series[t][i] - series[t - look][i];
855 | 
856 |               if (dif > 0.01) { // 0.01 for optical
857 |                 positive_gradients++;
858 |               } else if (dif < -0.01) { // if (dif<-10)
859 |                 negative_gradients++;
860 |               }
861 |             }
862 |           }
863 | 
864 |           feature.add(positive_gradients);
865 |           feature.add(negative_gradients);
866 |         } else if (feature_mode == 4) { // Gradient pooling2
867 |           double positive_gradients = 0;
868 |           double negative_gradients = 0;
869 | 
870 |           for (int t = window_start; t <= window_end; t++) {
871 |             int look = 2;
872 | 
873 |             if (t - look < 0)
874 |               continue;
875 |             else {
876 |               double dif = series[t][i] - series[t - look][i];
877 | 
878 |               if (dif > 0) {
879 |                 positive_gradients += dif;
880 |               } else {
881 |                 negative_gradients += -dif;
882 |               }
883 |             }
884 |           }
885 | 
886 |           feature.add(positive_gradients);
887 |           feature.add(negative_gradients);
888 |         } else if (feature_mode == 5) { // Max pooling
889 |           double max = -1000000;
890 | 
891 |           for (int t = window_start; t <= window_end; t++) {
892 |             if (t < 0)
893 |               continue;
894 | 
895 |             if (series[t][i] > max)
896 |               max = series[t][i];
897 |           }
898 | 
899 |           feature.add(max);
900 |         }
901 |       }
902 |     }
903 | 
904 |     return feature;
905 |   }
906 | 
907 |   public static void normalizeFeatureL1(ArrayList<Double> sample) {
908 |     int sum = 0;
909 | 
910 |     for (int i = 0; i < sample.size(); i++) {
911 |       double val = sample.get(i);
912 |       if (val < 0)
913 |         val = -1 * val;
914 | 
915 |       sum += val;
916 |     }
917 | 
918 |     for (int i = 0; i < sample.size(); i++) {
919 |       double v;
920 |       if (sum == 0)
921 |         v = 0;
922 |       else
923 |         v = sample.get(i) / sum;// *100;
924 | 
925 |       sample.set(i, v);
926 |     }
927 |   }
928 | 
929 |   static double chiSquareDistance(ArrayList<Double> feature1,
930 |       ArrayList<Double> feature2) {
931 |     if (feature1.size() != feature2.size())
932 |       LOG.warning("feature vector dimension mismatch.");
933 | 
934 |     double score = 0;
935 | 
936 |     for (int i = 0; i < feature1.size(); i++) {
937 |       double h1 = feature1.get(i);
938 |       double h2 = feature2.get(i);
939 | 
940 |       if (h1 < 0 || h2 < 0) {
941 |         LOG.warning("A negative feature value. The chi square kernel "
942 |             + "does not work with negative values. Please try shifting "
943 |             + "the vector to make all its elements positive.");
944 |       }
945 | 
946 |       if (h1 == h2)
947 |         continue;
948 |       else
949 |         score += (h1 - h2) * (h1 - h2) / (h1 + h2);
950 |     }
951 | 
952 |     return 0.5 * score;
953 |   }
954 | 
955 |   static double meanChiSquareDistances(ArrayList<FeatureVector> samples, int d) {
956 |     double mean_dist = 0;
957 | 
958 |     double sum = 0;
959 |     int count = 0;
960 | 
961 |     for (int i = 0; i < samples.size(); i++) {
962 |       for (int j = i + 1; j < samples.size(); j++) {
963 |         count++;
964 | 
965 |         sum += chiSquareDistance(samples.get(i).feature.get(d),
966 |             samples.get(j).feature.get(d));
967 |       }
968 |     }
969 | 
970 |     mean_dist = sum / (double) count;
971 | 
972 |     return mean_dist;
973 |   }
974 | 
975 |   static double kernelDistance(FeatureVector sample1, FeatureVector sample2,
976 |       double[] mean_dists) {
977 |     double distance = 0;
978 | 
979 |     for (int d = 0; d < sample1.numDim(); d++) {
980 |       double weight = 1;
981 | 
982 |       double val = chiSquareDistance(sample1.feature.get(d),
983 |           sample2.feature.get(d))
984 |           / mean_dists[d] * weight;
985 |       if (mean_dists[d] == 0)
986 |         val = chiSquareDistance(sample1.feature.get(d), sample2.feature.get(d)) / 1000000.0;
987 | 
988 |       distance = distance + val;
989 |     }
990 | 
991 |     double final_score = Math.exp(-1 * distance / 10); // 10000 10
992 | 
993 |     return final_score;
994 |   }
995 | }
996 | 


--------------------------------------------------------------------------------
/hadoop-pot-video/src/main/java/org/pooledtimeseries/PoTException.java:
--------------------------------------------------------------------------------
 1 | package org.pooledtimeseries;
 2 | 
 3 | /**
 4 |  * Created by Aditya on 10/29/15.
 5 |  */
 6 | public class PoTException extends Exception {
 7 |     //Parameterless Constructor
 8 |     private PoTException() {}
 9 | 
10 |     //Constructor that accepts a message
11 |     public PoTException(String message)
12 |     {
13 |         super(message);
14 |         this.message = message;
15 |     }
16 | 
17 |     public String message;
18 | }
19 | 


--------------------------------------------------------------------------------
/hadoop-pot-video/src/main/java/org/pooledtimeseries/healthcheck/CheckOpenCV.java:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Licensed to the Apache Software Foundation (ASF) under one or more
 3 |  * contributor license agreements.  See the NOTICE file distributed with
 4 |  * this work for additional information regarding copyright ownership.
 5 |  * The ASF licenses this file to You under the Apache License, Version 2.0
 6 |  * (the "License"); you may not use this file except in compliance with
 7 |  * the License.  You may obtain a copy of the License at
 8 |  *
 9 |  *     http://www.apache.org/licenses/LICENSE-2.0
10 |  *
11 |  * Unless required by applicable law or agreed to in writing, software
12 |  * distributed under the License is distributed on an "AS IS" BASIS,
13 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 |  * See the License for the specific language governing permissions and
15 |  * limitations under the License.
16 |  */
17 | 
18 | package org.pooledtimeseries.healthcheck;
19 | 
20 | import org.opencv.core.Core;
21 | import org.opencv.core.CvType;
22 | import org.opencv.core.Mat;
23 | import org.opencv.highgui.VideoCapture;
24 | 
25 | public class CheckOpenCV {
26 | 	
27 | 	public static void main(String[] args) {
28 | 		System.loadLibrary(Core.NATIVE_LIBRARY_NAME);
29 | 		Mat mat = Mat.eye(3, 3, CvType.CV_8UC1);
30 | 		System.out.println("mat = " + mat.dump());
31 | 
32 | 		String filename = args[0];
33 | 
34 | 		System.out.println("opening video file " + filename);
35 | 		VideoCapture capture = new VideoCapture(filename.toString());
36 | 
37 | 		if (!capture.isOpened()) {
38 | 			System.out.println("video file " + filename + " could not be opened.");
39 | 
40 | 		}
41 | 	}
42 | }
43 | 


--------------------------------------------------------------------------------
/pom.xml:
--------------------------------------------------------------------------------
 1 | <project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
 2 |   xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
 3 |   <modelVersion>4.0.0</modelVersion>
 4 |   <groupId>gov.nasa.jpl.memex</groupId>
 5 |   <artifactId>hadoop-pot</artifactId>
 6 |   <packaging>pom</packaging>
 7 |   <version>1.0-SNAPSHOT</version>
 8 |   <name>hadoop-pot</name>
 9 |   <url>http://maven.apache.org</url>
10 |   <build>
11 |     <plugins>
12 |       <plugin>
13 |         <groupId>org.apache.maven.plugins</groupId>
14 |         <artifactId>maven-compiler-plugin</artifactId>
15 |         <version>3.3</version>
16 |         <configuration>
17 |           <source>1.7</source>
18 |           <target>1.7</target>
19 |         </configuration>
20 |       </plugin>
21 |     </plugins>
22 |   </build>
23 |   <dependencyManagement>
24 |     <dependencies>
25 |       <dependency>
26 |         <groupId>junit</groupId>
27 |         <artifactId>junit</artifactId>
28 |         <version>3.8.1</version>
29 |         <scope>test</scope>
30 |       </dependency>
31 |       <dependency>
32 |         <groupId>commons-io</groupId>
33 |         <artifactId>commons-io</artifactId>
34 |         <version>2.4</version>
35 | 	  </dependency>   
36 |       <dependency>
37 |         <groupId>commons-cli</groupId>
38 |         <artifactId>commons-cli</artifactId>
39 |         <version>1.2</version>
40 |       </dependency>	
41 |       <dependency>
42 |         <groupId>com.googlecode.json-simple</groupId>
43 |         <artifactId>json-simple</artifactId>
44 |         <version>1.1.1</version>
45 |       </dependency>  	
46 |   	</dependencies>
47 |   </dependencyManagement>
48 |   <modules>
49 |   	<module>hadoop-pot-video</module>
50 |   	<module>hadoop-pot-core</module>
51 |   	<module>hadoop-pot-assembly</module>
52 |   </modules>
53 | </project>
54 | 


--------------------------------------------------------------------------------
/src/main/bin/pooled-time-series:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | # 
 3 | # Licensed to the Apache Software Foundation (ASF) under one or more
 4 | # contributor license agreements.  See the NOTICE file distributed with
 5 | # this work for additional information regarding copyright ownership.
 6 | # The ASF licenses this file to You under the Apache License, Version 2.0
 7 | # (the "License"); you may not use this file except in compliance with
 8 | # the License.  You may obtain a copy of the License at
 9 | #
10 | #     http://www.apache.org/licenses/LICENSE-2.0
11 | #
12 | # Unless required by applicable law or agreed to in writing, software
13 | # distributed under the License is distributed on an "AS IS" BASIS,
14 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 | # See the License for the specific language governing permissions and
16 | # limitations under the License.
17 | 
18 | export ORIG_DIR=`pwd`
19 | export DIR=`dirname $0`
20 | cd $DIR
21 | export DIR_PATH=`pwd`
22 | cd $ORIG_DIR
23 | 
24 | java -Djava.library.path=$OPENCV_JAVA_HOME -cp $DIR_PATH/../../../hadoop-pot-assembly/target/pooled-time-series-1.0-SNAPSHOT-jar-with-dependencies.jar org.pooledtimeseries.PoT "$@"
25 | 


--------------------------------------------------------------------------------
/src/main/bin/pooled-time-series-hadoop:
--------------------------------------------------------------------------------
  1 | #!/bin/bash
  2 | # 
  3 | # Licensed to the Apache Software Foundation (ASF) under one or more
  4 | # contributor license agreements.  See the NOTICE file distributed with
  5 | # this work for additional information regarding copyright ownership.
  6 | # The ASF licenses this file to You under the Apache License, Version 2.0
  7 | # (the "License"); you may not use this file except in compliance with
  8 | # the License.  You may obtain a copy of the License at
  9 | #
 10 | #     http://www.apache.org/licenses/LICENSE-2.0
 11 | #
 12 | # Unless required by applicable law or agreed to in writing, software
 13 | # distributed under the License is distributed on an "AS IS" BASIS,
 14 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 15 | # See the License for the specific language governing permissions and
 16 | # limitations under the License.
 17 | 
 18 | MAIN_PTS_DIR=$POOLED_TIME_SERIES_HOME/../..
 19 | HDFS_PTS_DIR=/user/pts/output
 20 | 
 21 | TIME_SERIES_INPUT=$HDFS_PTS_DIR/OpticalAndGradientTimeSeriesInput
 22 | SIMILARITY_INPUT=$HDFS_PTS_DIR/SequenceVideoVectors
 23 | 
 24 | PTS_TIME_SERIES_INPUT=$POOLED_TIME_SERIES_HOME/OpticalAndGradientTimeSeriesInput
 25 | PTS_SIMILARITY_INPUT=$POOLED_TIME_SERIES_HOME/MeanChiSquareAndSimilarityInput
 26 | PTS_MEAN_CHI_OUTPUT=$POOLED_TIME_SERIES_HOME/MeanChiSquareOutput
 27 | 
 28 | echo "*****************"
 29 | echo "initialized, removing previous input directories"
 30 | hadoop fs -rm -r $TIME_SERIES_INPUT
 31 | hadoop fs -rm -r $SIMILARITY_INPUT
 32 | echo "*****************"
 33 | 
 34 | hadoop fs -mkdir $HDFS_PTS_DIR
 35 | hadoop fs -mkdir $TIME_SERIES_INPUT
 36 | mkdir $PTS_TIME_SERIES_INPUT
 37 | mkdir $PTS_SIMILARITY_INPUT
 38 | mkdir $PTS_MEAN_CHI_OUTPUT
 39 | 
 40 | echo "*****************"
 41 | echo "directory created"
 42 | echo "*****************"
 43 | 
 44 | # list full file names omitting information like
 45 | hadoop fs -ls $2 | sed '1d;s/  */ /g' | cut -d\  -f8 | grep '\.mp4$' > $PTS_TIME_SERIES_INPUT/original_videos.txt
 46 | echo "*****************"
 47 | echo "Checking- " $PTS_TIME_SERIES_INPUT/original_videos.txt
 48 | ls -lrt $PTS_TIME_SERIES_INPUT/original_videos.txt
 49 | echo "*****************"
 50 | 
 51 | 
 52 | mkdir $PTS_TIME_SERIES_INPUT/split
 53 | split -l 1000 $PTS_TIME_SERIES_INPUT/original_videos.txt $PTS_TIME_SERIES_INPUT/split/original_videos.txt_
 54 | 
 55 | # Create temp space for storing batch results of OTS and GTS 
 56 | hadoop fs -mkdir $HDFS_PTS_DIR/OTSOutput_batch
 57 | hadoop fs -mkdir $HDFS_PTS_DIR/GTSOutput_batch
 58 |  
 59 | #Loop over all the files in $PTS_TIME_SERIES_INPUT/split/
 60 | # process 1000 videos at one time
 61 | FILES=$PTS_TIME_SERIES_INPUT/split/*
 62 | for f in $FILES
 63 | do
 64 | 	# Remove old original_videos.txt
 65 | 	hadoop fs -rm $TIME_SERIES_INPUT/original_videos.txt
 66 | 	# copy new batch if video input to hdfs
 67 | 	ls $f
 68 | 	hadoop fs -put $f $TIME_SERIES_INPUT/original_videos.txt
 69 | 	# checking copied input
 70 | 	hadoop fs -ls $TIME_SERIES_INPUT
 71 | 	hadoop fs -cat $TIME_SERIES_INPUT/original_videos.txt | wc -l
 72 | 	hadoop fs -cat $TIME_SERIES_INPUT/original_videos.txt | head
 73 | 
 74 | 	# Optical and Gradient Time Series Calcs
 75 | 	hadoop jar hadoop-pot-assembly/target/pooled-time-series-1.0-SNAPSHOT-jar-with-dependencies.jar org.pooledtimeseries.OpticalTimeSeries $TIME_SERIES_INPUT $HDFS_PTS_DIR/OTSOutput
 76 | 	
 77 | 	echo "*****************"
 78 | 	echo "Completed OpticalTimeSeries. Output in - " $HDFS_PTS_DIR/OTSOutput
 79 | 	hadoop fs -ls  $HDFS_PTS_DIR/OTSOutput
 80 | 	echo "*****************"
 81 | 	hadoop jar hadoop-pot-assembly/target/pooled-time-series-1.0-SNAPSHOT-jar-with-dependencies.jar org.pooledtimeseries.GradientTimeSeries $TIME_SERIES_INPUT $HDFS_PTS_DIR/GTSOutput
 82 | 
 83 | 	echo "*****************"
 84 | 	echo "Completed GradientTimeSeries. Output in - " $HDFS_PTS_DIR/GTSOutput
 85 | 	hadoop fs -ls $HDFS_PTS_DIR/GTSOutput
 86 | 	echo "*****************"
 87 | 	
 88 | 	hadoop fs -cp $HDFS_PTS_DIR/OTSOutput/*.of.txt $HDFS_PTS_DIR/OTSOutput_batch
 89 | 	hadoop fs -cp $HDFS_PTS_DIR/GTSOutput/*.hog.txt $HDFS_PTS_DIR/GTSOutput_batch
 90 | 	
 91 | 	hadoop fs -rm -r $HDFS_PTS_DIR/OTSOutput
 92 | 	hadoop fs -rm -r $HDFS_PTS_DIR/GTSOutput
 93 | 	
 94 | done
 95 | 
 96 | hadoop fs -rm $2/*.of.txt 
 97 | hadoop fs -rm $2/*.hog.txt
 98 | 
 99 | hadoop fs -cp $HDFS_PTS_DIR/OTSOutput_batch/*.of.txt $2
100 | hadoop fs -cp $HDFS_PTS_DIR/GTSOutput_batch/*.hog.txt $2
101 | 
102 | echo "*****************"
103 | echo "Copied OTSOutput, GTSOutput to : " $2
104 | hadoop fs -ls $2 | head
105 | echo "*****************"
106 | 
107 | hadoop fs -rm -r $HDFS_PTS_DIR/OTSOutput_batch
108 | hadoop fs -rm -r $HDFS_PTS_DIR/GTSOutput_batch
109 | 
110 | echo "*****************"
111 | echo "Removed batch outputs after copying to : " $2
112 | echo "*****************"
113 | 
114 | 
115 | 
116 | # Converting of.txt and hog.txt into a single sequence file 
117 | hadoop jar hadoop-pot-assembly/target/pooled-time-series-1.0-SNAPSHOT-jar-with-dependencies.jar org.pooledtimeseries.seqfile.TextVectorsToSequenceFile $2 $SIMILARITY_INPUT
118 | echo "*****************"
119 | echo "Completed sequence file generation"
120 | hadoop fs -ls $HDFS_PTS_DIR/SequenceVideoVectors
121 | echo "*****************"
122 | 
123 | 
124 | # MeanChiSquareDistance Vector Calc
125 | hadoop jar hadoop-pot-assembly/target/pooled-time-series-1.0-SNAPSHOT-jar-with-dependencies.jar org.pooledtimeseries.MeanChiSquareDistanceCalculation $SIMILARITY_INPUT $HDFS_PTS_DIR/MeanChiSquaredCalcOutput
126 | hadoop fs -getmerge $HDFS_PTS_DIR/MeanChiSquaredCalcOutput $PTS_MEAN_CHI_OUTPUT/mean_dists.txt
127 | hadoop fs -put $PTS_MEAN_CHI_OUTPUT/mean_dists.txt $HDFS_PTS_DIR/
128 | 
129 | echo "*****************"
130 | echo "Expecting output in " $HDFS_PTS_DIR/MeanChiSquaredCalcOutput
131 | hadoop fs -ls $HDFS_PTS_DIR/MeanChiSquaredCalcOutput
132 | echo "Copied merged MeanChiSquaredCalcOutput output to " $PTS_MEAN_CHI_OUTPUT/mean_dists.txt
133 | ls -lrt $PTS_MEAN_CHI_OUTPUT/mean_dists.txt
134 | echo "Copied merged to hdfs " $HDFS_PTS_DIR/
135 | hadoop fs -ls $HDFS_PTS_DIR/mean_dists.txt
136 | echo "*****************"
137 | 
138 | # Similarity Calc
139 | hadoop fs -rm -r $HDFS_PTS_DIR/SimilarityCalc
140 | hadoop jar hadoop-pot-assembly/target/pooled-time-series-1.0-SNAPSHOT-jar-with-dependencies.jar org.pooledtimeseries.SimilarityCalculation $SIMILARITY_INPUT $HDFS_PTS_DIR/SimilarityCalc/ $HDFS_PTS_DIR/mean_dists.txt
141 | hadoop fs -getmerge $HDFS_PTS_DIR/SimilarityCalc $PTS_SIMILARITY_INPUT/similarity_calc.txt
142 | 
143 | echo "Output in - " $PTS_SIMILARITY_INPUT/similarity_calc.txt
144 | 
145 | 
146 | 


--------------------------------------------------------------------------------
/src/main/resources/tika-config.xml:
--------------------------------------------------------------------------------
 1 | <properties>
 2 |   <parsers>
 3 |   <parser class="org.apache.tika.parser.DefaultParser">
 4 |       <mime-exclude>video/mp4</mime-exclude>
 5 |       <mime-exclude>application/mp4</mime-exclude>
 6 |       <mime-exclude>video/quicktime</mime-exclude>
 7 |       <parser-exclude class="org.apache.tika.parser.mp4.MP4Parser"/>
 8 |   </parser>
 9 |   <parser class="org.apache.tika.parser.pot.PooledTimeSeriesParser">
10 |       <mime>video/mp4</mime>
11 |       <mime>application/mp4</mime>
12 |       <mime>video/quicktime</mime>
13 |   </parser>
14 |  </parsers>
15 | </properties>
16 | 


--------------------------------------------------------------------------------
/visualization/circlepacking.html:
--------------------------------------------------------------------------------
 1 | <!DOCTYPE html>
 2 | <meta charset="utf-8">
 3 | <style>
 4 | 
 5 | circle {
 6 |   fill: rgb(31, 119, 180);
 7 |   fill-opacity: .25;
 8 |   stroke: rgb(31, 119, 180);
 9 |   stroke-width: 1px;
10 | }
11 | 
12 | .leaf circle {
13 |   fill: #ff7f0e;
14 |   fill-opacity: 1;
15 | }
16 | 
17 | text {
18 |   font: 10px sans-serif;
19 | }
20 | 
21 | </style>
22 | <body>
23 | <script src="http://d3js.org/d3.v3.min.js"></script>
24 | <script>
25 | 
26 | var diameter = 960,
27 |     format = d3.format(",d");
28 | 
29 | var pack = d3.layout.pack()
30 |     .size([diameter - 4, diameter - 4])
31 |     .value(function(d) { return 0.5; });
32 | 
33 | var svg = d3.select("body").append("svg")
34 |     .attr("width", diameter)
35 |     .attr("height", diameter)
36 |   .append("g")
37 |     .attr("transform", "translate(2,2)");
38 | 
39 | d3.json("data/similarity_cluster.json", function(error, root) {
40 |   var node = svg.datum(root).selectAll(".node")
41 |       .data(pack.nodes)
42 |     .enter().append("g")
43 |       .attr("class", function(d) { return d.children ? "node" : "leaf node"; })
44 |       .attr("transform", function(d) { return "translate(" + d.x + "," + d.y + ")"; });
45 | 
46 |   node.append("title")
47 |       .text(function(d) { return d.name + (d.children ? "" : ": " + format(0.5)); });
48 | 
49 |   node.append("circle")
50 |       .attr("r", function(d) { return d.r; });
51 | 
52 |   node.filter(function(d) { return !d.children; }).append("text")
53 |       .attr("dy", ".3em")
54 |       .style("text-anchor", "middle")
55 |       .text(function(d) { return d.name.substring(0, d.r / 3); });
56 | });
57 | 
58 | d3.select(self.frameElement).style("height", diameter + "px");
59 | 
60 | </script>
61 | 


--------------------------------------------------------------------------------
/visualization/cluster-d3.html:
--------------------------------------------------------------------------------
  1 | <!DOCTYPE html>
  2 | <meta charset="utf-8">
  3 | <title>Flare Dendrogram</title>
  4 | <style>
  5 | 
  6 | .node circle {
  7 |   fill: #fff;
  8 |   stroke: steelblue;
  9 |   stroke-width: 1.5px;
 10 | }
 11 | .node circle img{
 12 | 
 13 | }
 14 | .node {
 15 |   font: 10px sans-serif;
 16 | }
 17 | 
 18 | .link {
 19 |   fill: none;
 20 |   stroke: #ccc;
 21 |   stroke-width: 1.5px;
 22 | }
 23 | 
 24 | div.tooltip {
 25 |   position: absolute; 
 26 |   text-align: center; 
 27 |   width: 400px;  
 28 |   height: 400px;   
 29 |   padding: 2px; 
 30 |   font: 12px sans-serif;  
 31 |   background: lightsteelblue; 
 32 |   border: 0px;          
 33 |   border-radius: 8px;
 34 |   overflow: scroll;
 35 |  /*  pointer-events: none;  This line needs to be removed */ 
 36 | }
 37 | 
 38 | div.tooltip:before{
 39 |     content:'';
 40 |     display:block;
 41 |     width:0;
 42 |     height:0;
 43 |     position:absolute;
 44 |     
 45 |     border-top: 30px solid transparent;
 46 |     border-bottom: 30px solid transparent; 
 47 |     border-right:30px solid lightsteelblue;
 48 |     left:-7px;
 49 |     top:7px;
 50 | }
 51 | 
 52 | object {
 53 |   max-height: 80%;
 54 |   max-width:  80%;
 55 | }
 56 | 
 57 | p {
 58 | 
 59 |   height: 40%;
 60 |   width:  100%;
 61 | }
 62 | 
 63 | </style>
 64 | <body>
 65 | <script src="http://d3js.org/d3.v3.min.js"></script>
 66 | <script src="http://labratrevenge.com/d3-tip/javascripts/d3.tip.v0.6.3.js"></script>
 67 | <script src = "http://ajax.googleapis.com/ajax/libs/jquery/1.11.2/jquery.min.js"></script>
 68 | <script>
 69 | clusterJson = d3.json("data/similarity_cluster.json", function(error, root){
 70 | var i = 0;
 71 | function visit(parent, visitFn, childrenFn) {
 72 |   if (!parent) return;
 73 | 
 74 |   visitFn(parent);
 75 | 
 76 |   var children = childrenFn(parent);
 77 |   if (children) {
 78 |       var count = children.length;
 79 |       for (var i = 0; i < count; i++) {
 80 |           visit(children[i], visitFn, childrenFn);
 81 |       }
 82 |   }
 83 | }
 84 | 
 85 | visit(root, function(d) {
 86 |   if(d.children == null)
 87 |     i++;
 88 | 
 89 | }, function(d) {
 90 |   return d.children && d.children.length > 0 ? d.children : null;
 91 | });
 92 | 
 93 | var radius =  340+1.4*i;
 94 | 
 95 | var cluster = d3.layout.cluster()
 96 |     .size([360, radius - 120]);
 97 | 
 98 | 
 99 | var translateX= radius+200;
100 | var translateY = radius +200;
101 | 
102 | var diagonal = d3.svg.diagonal.radial()
103 |     .projection(function(d) { return [d.y, d.x / 180 * Math.PI]; });
104 | 
105 | 
106 | var div = d3.select("body")
107 |           .append("div")  
108 |           .attr("class", "tooltip")         
109 |           .style("opacity", 0); 
110 | 
111 | 
112 | var svg = d3.select("body").append("svg")
113 |     .attr("width", radius * 2+400)
114 |     .attr("height", radius * 2+400)
115 |     .append("g")
116 |     .attr("transform", "translate(" + translateX + "," + translateY + ")");
117 | 
118 |   var nodes = cluster.nodes(root);
119 | 
120 |   var link = svg.selectAll("path.link")
121 |       .data(cluster.links(nodes))
122 |       .enter().append("path")
123 |       .attr("class", "link")
124 |       .attr("d", diagonal);
125 | 
126 |   var node = svg.selectAll("g.node")
127 |       .data(nodes)
128 |       .enter().append("g")
129 |       .attr("class", "node")
130 |       .attr("transform", function(d) { return "rotate(" + (d.x - 90) + ")translate(" + d.y + ")"; })
131 | 
132 |   node.append("circle")
133 |       .attr("id", function(d){ return d.name;}) 
134 |       .attr("class", "node--cluster")
135 |       .attr("r", 6)
136 |       .on('mouseover',function(d){
137 |           div.style("visibility", "visible");
138 |           div.transition().duration(200)
139 |               .style("opacity", .9);
140 |           div.on('mouseover', function(d){
141 |           div.style("visibility", "visible");
142 |           div.transition().duration(200)  
143 |               .style("opacity", .9);
144 |           });
145 |           div.on('mouseout', function(d){
146 |           div.style("visibility", "hidden");
147 |           div.transition().style('opacity', 0);
148 |           });
149 |           div .html( d.name.match(/^cluster(\d||\w)+$/)==null && d.name.match(/^group(\d||\w)+$/)==null ? '<h2>' + d.name +'</h2> <object data = "data/ht_video_pot_test_set/'+d.name+'"></object>' : '<h2>this is a cluster node </h2>')
150 |               .style("left", (d3.event.pageX) + "px" )      
151 |               .style("top", (d3.event.pageY) + "px"); 
152 | 
153 |                
154 |       })
155 |       .on('mouseout', function(d){
156 |           div.transition().style('opacity', 0);
157 |           div.style("visibility", "hidden");
158 |       });
159 | 
160 | 
161 |   node.append("text")
162 |       .attr("dy", ".31em")
163 |       .attr("text-anchor", function(d) { return d.x < 180 ? "start" : "end"; })
164 |       .attr("transform", function(d) { return d.x < 180 ? "translate(8)" : "rotate(180)translate(-8)"; })
165 |       .text(function(d) { return d.name; });
166 | 
167 | d3.select(self.frameElement).style("height", radius * 2 + "px");
168 | 
169 | });
170 | </script>
171 | </body>
172 | 
173 | 


--------------------------------------------------------------------------------
/visualization/css/dashboard.css:
--------------------------------------------------------------------------------
 1 | table, th, td {
 2 |    border: 1px solid black;
 3 |    padding:3px !important;
 4 | }
 5 | table {
 6 |    border-collapse: collapse;
 7 |    margin:10px;
 8 | }
 9 | html, body{
10 |     height: 100%;
11 | }
12 | .upper-div{
13 | 	overflow-y:scroll;
14 | 	overflow-x:scroll;
15 | 	overflow: -moz-scrollbars-vertical; 	
16 | }
17 | .lower-div{
18 | 	
19 | }
20 | ::-webkit-scrollbar {
21 |   -webkit-appearance: none;
22 |   width: 7px;
23 | }
24 | ::-webkit-scrollbar-thumb {
25 |   border-radius: 4px;
26 |   background-color: rgba(0, 0, 0, .5);
27 |   -webkit-box-shadow: 0 0 1px rgba(255, 255, 255, .5);
28 | }
29 |  .break-words  {
30 |     word-break:break-all
31 | }
32 | 
33 | .well{
34 | 	padding: 5px;
35 | 	margin-bottom: 2px;
36 | }


--------------------------------------------------------------------------------
/visualization/css/style.css:
--------------------------------------------------------------------------------
1 | body{
2 | 	font-size: 14px;
3 | 	background-color: whitesmoke;
4 | }
5 | h4{
6 | 	padding-left: 100px;
7 | }	
8 | 


--------------------------------------------------------------------------------
/visualization/dashboard.html:
--------------------------------------------------------------------------------
 1 | <!DOCTYPE html>
 2 | <html>
 3 | <head>
 4 | <script
 5 | 	src="//ajax.googleapis.com/ajax/libs/angularjs/1.2.15/angular.min.js"></script>
 6 | <script src="js/dashboard.js"></script>
 7 | 
 8 | </head>
 9 | <body ng-app="myApp" ng-controller="myCtrl" class="container-fluid">
10 | 	<div class="row">
11 | 		<div id="divID" class="col-md-9 upper-div">
12 | 			<br/>
13 | 			Min - <input type="text" ng-model="filters.min" ng-init="filters.min=0" /> &nbsp; 
14 | 			Max - <input type="text" ng-model="filters.max" ng-init="filters.max=1" /> &nbsp; 
15 | 			Click on any cell to view videos! &nbsp;
16 | 			<select ng-model="selectedDataSet" ng-options="x for x in dataSet" ng-change="readCSV()">
17 | 			</select>
18 | 			<table>
19 | 				<tr ng-repeat="x in data track by $index">
20 | 					<td ng-repeat="y in x track by $index"
21 | 						style="background: -webkit-linear-gradient(left, #efe3af {{ y | percentage:2 }},#ffffff {{ y | percentage:2 }});" 
22 | 						ng-click="showVideos($parent.$index, $index, y)">{{ y | range:filters.min : filters.max }}</td>
23 | 				</tr>
24 | 			</table>
25 | 			&nbsp;&nbsp;&nbsp;&nbsp;
26 | 		</div>
27 | 		<div class="col-md-3 lower-div">
28 | 			<br/>
29 | 			<b>Score - {{score}}</b>
30 | 			<div class="row">
31 | 				<div class="col-md-4">
32 | 					Comments on this pair -
33 | 				</div>
34 | 				<div class="col-md-8">
35 | 					<textarea ng-model="comments"></textarea>
36 | 				</div>
37 | 			</div>
38 | 			<div class="row">
39 | 				<div class="col-md-4"></div>
40 | 				<div class="col-md-4">
41 | 					<input type="button" value="Submit" ng-click="recordFeedback()" />
42 | 				</div>
43 | 				<div class="col-md-4">{{ feedback_response }}</div>
44 | 			</div>
45 | 			<br/>
46 | 			<div class="row well break-words"><b>{{videoId1}} - </b>{{ video1 }}
47 | 			<video id="video1" width="320" height="240" controls muted
48 | 							ng-src="{{ video1 }}" type="video/mp4"> Your browser does
49 | 							not support the video tag.
50 | 						</video></div>
51 | 			<div class="row well break-words"><b>{{videoId2}} - </b>{{ video2 }}
52 | 				<video id="video2" width="320" height="240" controls muted
53 | 							ng-src="{{ video2 }}" type="video/mp4"> Your browser does
54 | 							not support the video tag.
55 | 						</video></div>
56 | 				
57 | 
58 | 		</div>
59 | 	</div>
60 | 	<!-- Latest compiled and minified CSS -->
61 | 	<link rel="stylesheet"
62 | 		href="https://maxcdn.bootstrapcdn.com/bootstrap/3.3.7/css/bootstrap.min.css"
63 | 		integrity="sha384-BVYiiSIFeK1dGmJRAkycuHAHRg32OmUcww7on3RYdg4Va+PmSTsz/K68vbdEjh4u"
64 | 		crossorigin="anonymous">
65 | 	<link rel="stylesheet" href="css/dashboard.css" />
66 | 	
67 | 	<!-- Optional theme -->
68 | 	<link rel="stylesheet"
69 | 		href="https://maxcdn.bootstrapcdn.com/bootstrap/3.3.7/css/bootstrap-theme.min.css"
70 | 		integrity="sha384-rHyoN1iRsVXV4nD0JutlnGaslCJuC7uwjduW9SVrLvRYooPp2bWYgmgJQIXwl/Sp"
71 | 		crossorigin="anonymous">
72 | 
73 | </body>
74 | </html>


--------------------------------------------------------------------------------
/visualization/data/formatted_similarity_calc.csv:
--------------------------------------------------------------------------------
1 | ,1.mp4,2.mp4,3.mp4,4.mp4,5.mp4,6.mp4,7.mp4,
2 | 1.mp4,1.0,0.677986882429,0.514423983869,0.303814588993,0.05396806823,0.691531696688,0.220586417979,
3 | 2.mp4,,1.0,0.12525353988,0.106469279664,0.497897205246,0.98017123923,0.450498041365,
4 | 3.mp4,,,1.0,0.364850515997,0.440085467283,0.166097710583,0.730772512002,
5 | 4.mp4,,,,1.0,0.0413008435665,0.250465685569,0.782336621617,
6 | 5.mp4,,,,,1.0,0.623467310099,0.182500077256,
7 | 6.mp4,,,,,,1.0,0.922439590582,
8 | 7.mp4,,,,,,,1.0,


--------------------------------------------------------------------------------
/visualization/data/similarity_cluster.json:
--------------------------------------------------------------------------------
1 | {"children": [{"color": "#9e0142", "children": [{"name": "01091b9db35e6b57e9b2f9e41e67afd9_1-48988ee434340f9c61ba35ff26bf3b.mp4"}], "name": "cluster0"}, {"color": "#d8434e", "children": [{"name": "01167b79bb5926d6d1d9966ee78fb154_1-34e63d37ed1d2c55940d73dee88470.mp4"}], "name": "cluster1"}, {"color": "#f67a49", "children": [{"name": "011e50d1a95d4cbdc2e12e01060f5ffd_1-f89de38d12f1bcd0098a76140d117e.mp4"}, {"name": "01282698e48306b9f04da78d5388eaad_1-a86d3077108f51dc09c2db79ca8ca9.mp4"}, {"name": "012f564a40cdcc70a113f5143e9c1d28_1-6980c9cef71b9037693dca74d1e7dc.mp4"}, {"name": "013c4d37b8c7ff356df135d0f726263d_1-a9ed23f91c92cb25ac42629ffe3c83.mp4"}, {"name": "014dba5d8b4d950d3e8ca9f392365ea3_1-5bf7500286057453a143ac20e91095.mp4"}, {"name": "015d287a8a951567dc10218bb7141e07_1-2c1bcfd0c0704290df2792c8a0781f.mp4"}, {"name": "0169b72d1dec21850bb9a8df92507c12_1-61239f4f7f48122521996e0369e3b6.mp4"}, {"name": "016cebe78f399424bd923cc6968be490_1-d55689535b1a0f2c847a1332001e17.mp4"}, {"name": "01b720fcb93da59c32a0254f21ade0ae_1-9c9a93a5aed41546d4852ddcafd0b3.mp4"}, {"name": "01b98267a85406778d081213649c09e5_1-273d732e768dab37679b866a50fe58.mp4"}], "name": "cluster2"}, {"color": "#fdbf6f", "children": [{"name": "0143219c1a764fff93b9b487b8bc0132_1-d4d0011a28b479c5921cd29c09d346.mp4"}, {"name": "01b281631cd2b6aa9295cfaae6ee7f4c_1-41ebe9635716b926b329f530391713.mp4"}], "name": "cluster3"}, {"color": "#feeda1", "children": [{"name": "0167b5419cdabe121cb4a65e4a6bc9fa_1-c3193d9003c17d35c9823b71802039.mp4"}, {"name": "018a13837514bf46295802244586525e_1-c42c68c7e75c1161b3d5e77816e14c.mp4"}], "name": "cluster4"}, {"color": "#f1f9a9", "children": [{"name": "01934d5c7d3e7120f33ac34ac24c4d8b_1-eed0305ebc491848b4369e88b4721d.mp4"}], "name": "cluster5"}, {"color": "#bfe5a0", "children": [{"name": "01ab1348d3186a1336dcd1fdf98af3df_1-15f748b50d25bb646e423f790820b3.mp4"}], "name": "cluster6"}, {"color": "#74c7a5", "children": [{"name": "01cc7e5af3c6ed3e81dcf99ff2ec4a56_1-09c23856b5ff2dfd72cb13c502080d.mp4"}], "name": "cluster7"}, {"color": "#378ebb", "children": [{"name": "01d57beec61cdf4b20f37f1af1fbfe23_1-d93bfdce1c419f4204836789bb565f.mp4"}], "name": "cluster8"}, {"color": "#5e4fa2", "children": [{"name": "01e050e25f96845b62e5b7e08e8431cb_1-8d0e0383f1ec94beb40042bb3feb1a.mp4"}], "name": "cluster9"}], "name": "clusters"}


--------------------------------------------------------------------------------
/visualization/data/similarity_cluster.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/USCDataScience/hadoop-pot/c3e7d1dda74ce56c25f574795c09feb9a6429c62/visualization/data/similarity_cluster.png


--------------------------------------------------------------------------------
/visualization/data/similarity_heatmap.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/USCDataScience/hadoop-pot/c3e7d1dda74ce56c25f574795c09feb9a6429c62/visualization/data/similarity_heatmap.png


--------------------------------------------------------------------------------
/visualization/favicon.ico:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/USCDataScience/hadoop-pot/c3e7d1dda74ce56c25f574795c09feb9a6429c62/visualization/favicon.ico


--------------------------------------------------------------------------------
/visualization/index.html:
--------------------------------------------------------------------------------
 1 | <html>
 2 | <head>
 3 | 	<title>Similarity for Video files</title>
 4 | 	<script type="text/javascript" src="http://d3js.org/d3.v3.js"></script>
 5 | 	<script type="text/javascript" src="js/matrix.js"></script>
 6 | 	
 7 | 	<link rel="stylesheet" href="css/style.css"/>
 8 | </head>
 9 | <body>
10 | 	<h4>Range for similarity:</h4>
11 | 	<g class="legend"> <b><center>
12 | 		0 < - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - > 1</center></b>
13 | </g>
14 |     <br>
15 |     <br>
16 | 	<div id='chart'>
17 | 		
18 | 	</div>
19 | 	    <div class="tooltip" style="opacity: 0.01;padding: 10px;
20 |         background-color: white;
21 |         -webkit-border-radius: 10px;
22 |         -moz-border-radius: 10px;
23 |         border-radius: 10px;
24 |         -webkit-box-shadow: 4px 4px 10px rgba(0, 0, 0, 0.4);
25 |         -moz-box-shadow: 4px 4px 10px rgba(0, 0, 0, 0.4);
26 |         box-shadow: 4px 4px 10px rgba(0, 0, 0, 0.4);"></div>
27 | </body>
28 | </html>


--------------------------------------------------------------------------------
/visualization/js/dashboard.js:
--------------------------------------------------------------------------------
  1 | angular.module('myApp', []) //main controller
  2 | .controller('myCtrl',['$scope','$http',
  3 | function ($scope, $http) {
  4 | 	$scope.legends ={}
  5 | 	var VIDEO_PATH = "data/ht_video_pot_test_set/";
  6 | 	//FILL link here
  7 | 	var GOOGLE_FORMS_URL="";	
  8 | 	$scope.video1 = "";
  9 | 	$scope.video2 = "";
 10 | 	$scope.score = 0.0;
 11 | 	$scope.videoId1 = 0;
 12 | 	$scope.videoId2 = 0;
 13 | 	//Add paths to different data set here
 14 | 	$scope.dataSet = ['data/formatted_similarity_calc.csv','data/formatted_similarity_calc_6.csv'];
 15 | 		
 16 | 	$scope.readCSV = function() {
 17 | 		// http get request to read CSV file content
 18 | 		if($scope.selectedDataSet){
 19 | 			$http.get($scope.selectedDataSet).success($scope.processData);
 20 | 		}else{
 21 | 			$http.get($scope.dataSet[0]).success($scope.processData);
 22 | 		}
 23 | 		
 24 | 	};
 25 | 
 26 | 	$scope.processData = function(allText) {
 27 | 		// split content based on new line
 28 | 		var allTextLines = allText.split(/\r\n|\n/);
 29 | 		var headers = allTextLines[0].split(',');
 30 | 		var lines = [];
 31 | 
 32 | 		for ( var i = 0; i < allTextLines.length; i++) {
 33 | 			// split content based on comma
 34 | 			var data = allTextLines[i].split(',');
 35 | 			if (data.length == headers.length) {
 36 | 				var tarr = [];
 37 | 				for ( var j = 0; j < headers.length; j++) {
 38 | 					if(i==0){
 39 | 						tarr.push(j);
 40 | 						$scope.legends[j]=data[j]
 41 | 						continue;
 42 | 					}
 43 | 					if(j==0){
 44 | 						tarr.push(i);
 45 | 						continue;
 46 | 					}
 47 | 					
 48 | //					tarr.push((data[j]*100).toFixed(0) + "%");
 49 | 					tarr.push(data[j]);
 50 | 				}
 51 | 				lines.push(tarr);
 52 | 			}
 53 | 		}
 54 | 		
 55 | 		$scope.data = lines;
 56 | 	};
 57 | 	
 58 | 	$scope.showVideos = function(vid1, vid2, score){
 59 | 		$scope.videoId1=vid1;
 60 | 		$scope.videoId2=vid2;
 61 | 		$scope.video1 = VIDEO_PATH + $scope.legends[vid1]
 62 | 		$scope.video2 = VIDEO_PATH + $scope.legends[vid2]
 63 | 		$scope.score = score;
 64 | 		
 65 | 		$scope.playVideo(document.getElementById("video1"));
 66 | 		$scope.playVideo(document.getElementById("video2"));
 67 | 
 68 | 	}
 69 | 	
 70 | 	$scope.playVideo = function(video) {
 71 | 		video.addEventListener('loadeddata', function() {
 72 | 			video.play()
 73 | 		}, false);
 74 | 		
 75 | 	}
 76 | 	
 77 | 	$scope.recordFeedback = function(){
 78 | 		$scope.feedback_response="Posting..";
 79 | 		
 80 | 		$http({
 81 | 		      url:GOOGLE_FORMS_URL,
 82 | 		      headers: { 'Content-Type': 'application/x-www-form-urlencoded' },
 83 | 		      params: {
 84 | 		    	  "entry.1986871126" : $scope.video1.substr(VIDEO_PATH.length),
 85 | 		          "entry.489660422" : $scope.video2.substr(VIDEO_PATH.length), 
 86 | 		          "entry.1932134194" : $scope.score, 
 87 | 		          "entry.19555886": $scope.comments
 88 | 		          },
 89 | 		      method:"POST",
 90 | 		            }).error(function(data, status) {
 91 | 		            	//Error is expected as it's cross domain
 92 | 		            	//But if status is 0 then form was posted fine
 93 | 		            	if(status == 0){
 94 | 		            		$scope.feedback_response="Posted. Thanks!"
 95 | 		            	}else{
 96 | 		            		$scope.feedback_response="Error! Contact support"
 97 | 		            	}
 98 | 		                
 99 | 		            });
100 | 	}
101 | 	$scope.readCSV();
102 | }])//Filter for percentage in css
103 | .filter('percentage', ['$filter', function ($filter) {
104 | 	  return function (input, decimals) {
105 | 		  //works only for fractions
106 | 		  if(input>1){
107 | 			  return 0;
108 | 		  }
109 | 		    return $filter('number')(input * 100, decimals) + '%';
110 | 		  };
111 | }])//Filter for range
112 | .filter('range', function() {
113 | 	return function(input, min, max) {
114 | 		//works only for fractions
115 | 		if (input > 1) {
116 | 			return input;
117 | 		}
118 | 		if (input >= min && input <= max) {
119 | 			return input
120 | 		}
121 | 		return "";
122 | 	};
123 | });
124 | 
125 | 


--------------------------------------------------------------------------------
/visualization/js/matrix.js:
--------------------------------------------------------------------------------
  1 | var header;
  2 | var head=0;
  3 | window.onload = function () {
  4 | 
  5 |         d3.csv("data/formatted_similarity_calc.csv", function (error,data) {
  6 |            console.log("in");
  7 | 			if (error)
  8 | 				throw error;
  9 |             var label_col_full = Object.keys(data[0]);
 10 | 			header = d3.keys(data[0]);
 11 |             var label_row = [];
 12 |             var rows = [];
 13 |             var row = [];
 14 | 			var temp;
 15 |             for (var i = 0; i < data.length; i++) {
 16 | 				temp=data[i][label_col_full[0]];
 17 |                 label_row.push(temp);
 18 |                 row = [];
 19 | 				
 20 |                 for (var j = 1; j < label_col_full.length; j++) {
 21 | 					
 22 | 					temp=parseFloat(data[i][label_col_full[j]]);
 23 |                     row.push(temp);
 24 | 					
 25 |                 }
 26 |                 rows.push(row);
 27 | 				
 28 |             }
 29 |             
 30 |             d3.select("svg").remove();
 31 |             d3.select("rowLabelg").remove();
 32 |             main(rows, label_col_full.slice(1), label_row);
 33 |             
 34 |         });
 35 | };
 36 | 
 37 | var mapsize = 2000;
 38 | var pixelsize = 20;
 39 | var cellsize = pixelsize-1;
 40 | 
 41 | d3.select('.tooltip').style('padding',' 10px')
 42 | .style('background',' white')
 43 | .style('border-radius',' 10px')
 44 | .style('box-shadow',' 4px 4px 10px rgba(0, 0, 0, 0.4)');
 45 | 
 46 | var main = function (corr, label_col, label_row) {
 47 | 
 48 |     var transition_time = 1500;
 49 |     var body = d3.select('body');
 50 | 	body.select('g.legend').style('position','absolute')
 51 | 	.style('height','25px')
 52 | 	.style('width','400px').style('margin','auto').style('margin-left','100px')        
 53 | 	.style('background','linear-gradient(to right,#c8f2b9,#db3db6)');
 54 |     var tooltip = body.select('div.tooltip');
 55 |     var svg = body.select('#chart').append('svg')
 56 |         .attr('width', mapsize*3-500)
 57 |         .attr('height', mapsize-1400).style('margin','auto').style('margin-top','-50px').style('margin-left','150px');;;
 58 | 
 59 |   
 60 |     var row = corr;
 61 |     var col = d3.transpose(corr);
 62 | 	var total_len ;
 63 | 
 64 |     var indexify = function (mat) {
 65 |         var res = [];
 66 | 		total_len = mat.length;
 67 | 		console.log(total_len);
 68 |         for (var i = 0; i < mat.length; i++) {
 69 |             for (var j = 0; j < mat[0].length; j++) {
 70 | 				if(isNaN(mat[i][j]))
 71 | 					temp = 0;
 72 | 				else
 73 | 					temp=mat[i][j];
 74 |                 res.push({
 75 |                     i: i,
 76 |                     j: j,
 77 |                     val: temp
 78 | 			
 79 |                 });
 80 | 		
 81 |             }
 82 | 	
 83 |         }
 84 |         return res;
 85 |     };
 86 | 
 87 |     var corr_data = indexify(corr);
 88 |     var order_col = d3.range(label_col.length + 1);
 89 |     var order_row = d3.range(label_row.length + 1);
 90 | 
 91 |     var color = d3.scale.linear()
 92 |         .domain([ 0, 1])
 93 |         .range(['#c8f2b9', '#db3db6']);
 94 | 
 95 |     var scale = d3.scale.linear()
 96 |         .domain([0, d3.min([50, d3.max([label_col.length, label_row.length, 4])])])
 97 |         .range([0, parseFloat(1) * 250]);
 98 | 
 99 |    
100 | 
101 |     var label_space = 50;
102 | 
103 |     var matrix = svg.append('g')
104 |         .attr('class', 'matrix')
105 | 	.attr('height',mapsize-1400)
106 | 	.attr('width',mapsize*3-500)
107 |         .attr('transform', 'translate(' + (label_space + 10) + ',' + (label_space + 10) + ')')
108 | 	.selectAll('rect.pixel').data(corr_data)
109 | 	.enter().append('rect')
110 |         .attr('class', 'pixel')
111 |         .attr('width', cellsize)
112 |         .attr('height', cellsize)
113 | 	.attr('position','absolute')
114 | 	.attr('y',function(d){return d.i*pixelsize+ label_space-5})
115 | 	.attr('x',function(d){return d.j*pixelsize + label_space})
116 |         .style('fill', function (d) {
117 |             return color(d.val);
118 |         })
119 |         .on('mouseover', function (d) {
120 | 	       tooltip.style("opacity", 0.8)
121 | 	    .style('position', 'absolute')
122 |             .style("left", (d3.event.pageX + 35) + 'px')
123 |             .style("top", (d3.event.pageY + 30) + 'px')
124 |             .html('File: '+ header[d.i+1] +"<br>" + "File: " +header[d.j+1] + "<br>" + "Value: " + d.val.toFixed(3));
125 | 
126 | 
127 | 		d3.select(this).style("opacity", 0.5);
128 |         })
129 |         .on('mouseout', function (d) {
130 |             tooltip.style("opacity", 1e-6);
131 | 	    d3.select(this).style("opacity", 1);
132 |         });
133 |    
134 | 
135 | rowLabel = []
136 | colLabel = []
137 | 
138 | for(var head=1; head<header.length;head++)
139 | {
140 | rowLabel.push(header[head])
141 | colLabel.push(header[head]);
142 | }
143 | 
144 |     
145 | var rowLabels = svg.append("g")
146 |       .selectAll(".rowLabelg")
147 |       .data(rowLabel)
148 |       .enter()
149 |       .append("text")
150 |       .attr("class","rowLabelg")
151 |       .text(function (d) { return d; })
152 |       .style('font-size','9px')
153 |       .attr("x", 0)
154 |       .attr("y", function (d, i) { return i * pixelsize; })
155 |       .style("text-anchor", "end")
156 |       .attr("transform", "translate(107,115)");
157 |      
158 | 
159 |   var colLabels = svg.append("g")
160 |       .selectAll(".colLabelg")
161 |       .data(colLabel)
162 |       .enter()
163 |       .append("text")
164 |       .attr("class","colLabelg")
165 |       .text(function (d) { return d; })
166 |       .style('font-size','9px')
167 |       .attr("x", 0)
168 |       .attr("y", function (d, i) { return i * pixelsize; })
169 |       .style("text-anchor", "left")
170 |       .attr("transform", "translate(120,100) rotate (-90)");
171 | 
172 | 
173 | 
174 | };
175 |  
176 | 


--------------------------------------------------------------------------------
/visualization/py/evaluate_hmdb.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # 
 3 | # Licensed to the Apache Software Foundation (ASF) under one or more
 4 | # contributor license agreements.  See the NOTICE file distributed with
 5 | # this work for additional information regarding copyright ownership.
 6 | # The ASF licenses this file to You under the Apache License, Version 2.0
 7 | # (the "License"); you may not use this file except in compliance with
 8 | # the License.  You may obtain a copy of the License at
 9 | #
10 | #     http://www.apache.org/licenses/LICENSE-2.0
11 | #
12 | # Unless required by applicable law or agreed to in writing, software
13 | # distributed under the License is distributed on an "AS IS" BASIS,
14 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 | # See the License for the specific language governing permissions and
16 | # limitations under the License.
17 | 
18 | import matplotlib
19 | matplotlib.use('Agg')
20 | 
21 | import numpy as np
22 | import sys
23 | from sklearn.cluster import DBSCAN
24 | import matplotlib.pyplot as plt
25 | import matplotlib.colors as lib_colors
26 | from collections import Counter
27 | import json
28 | 
29 | N = 5
30 | 
31 | if len(sys.argv) < 2:
32 |     print "Usage - "
33 |     print "python evaluate_hmdb.py /path/to/formatted/similarity "
34 |     sys.exit()
35 | 
36 | 
37 | path_to_sim_mat = sys.argv[1]
38 | ## one line for each video minus header
39 | num_videos = sum(1 for line in open(path_to_sim_mat)) - 1
40 | 
41 | print path_to_sim_mat
42 | print num_videos
43 | 
44 | # List of all videos
45 | videos = None
46 | with open(path_to_sim_mat) as f:
47 |     videos = f.readline().strip().split(",")[1:]
48 | 
49 | 
50 | # List of all categories, last 7th word is category by naming convention
51 | video_categories = []
52 | for vid in videos:
53 |     video_categories.append(vid.split("_")[-7])
54 | 
55 | 
56 | # load data from formatted_similarity_calc.csv
57 | # skip header
58 | # skip first column so usecols=range(1 , num_videos),
59 | # paint only upper half filling_values=0)
60 | data = np.genfromtxt(path_to_sim_mat,
61 |                   delimiter=",", skip_header=1, usecols=range(1 , num_videos+1),
62 |                   filling_values=0)
63 | 
64 | ## add matrix with it's transpose to fill lower half  
65 | data = np.triu(data).T + np.triu(data)  
66 | ## Diagonal is also added to itself hence resetting it to 1 
67 | np.fill_diagonal(data, 1)
68 | 
69 | print "Data loaded"
70 | 
71 | category_to_acc_high = {}
72 | category_to_acc_low = {}
73 | 
74 | for i in range(num_videos):
75 |     category = video_categories[i]
76 |     
77 |     #create copies of array so we don't disturb original array 
78 |     sim_score_sort = 0+data[i]
79 |     video_categories_sort = video_categories+[]
80 |     
81 |     #sort with similar index
82 |     sim_score_sort, video_categories_sort = (list(x) for x in zip(*sorted(zip(sim_score_sort, video_categories_sort))))
83 |     
84 |     #keep top N and check how many video_categories_sort ==  category
85 |     low_acc = Counter(video_categories_sort[0:N])
86 |     high_acc = Counter(video_categories_sort[-N:]) 
87 |     
88 |     category_to_acc_high[category] = category_to_acc_high.get(category,Counter([]) ) + high_acc
89 |     category_to_acc_low[category] = category_to_acc_low.get(category,Counter([]) ) + low_acc
90 |     
91 | 
92 | print category_to_acc_high
93 | print category_to_acc_low
94 | 
95 |     


--------------------------------------------------------------------------------
/visualization/py/similarity_cluster.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | # 
  3 | # Licensed to the Apache Software Foundation (ASF) under one or more
  4 | # contributor license agreements.  See the NOTICE file distributed with
  5 | # this work for additional information regarding copyright ownership.
  6 | # The ASF licenses this file to You under the Apache License, Version 2.0
  7 | # (the "License"); you may not use this file except in compliance with
  8 | # the License.  You may obtain a copy of the License at
  9 | #
 10 | #     http://www.apache.org/licenses/LICENSE-2.0
 11 | #
 12 | # Unless required by applicable law or agreed to in writing, software
 13 | # distributed under the License is distributed on an "AS IS" BASIS,
 14 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 15 | # See the License for the specific language governing permissions and
 16 | # limitations under the License.
 17 | 
 18 | import matplotlib
 19 | matplotlib.use('Agg')
 20 | 
 21 | import numpy as np
 22 | import sys
 23 | from sklearn.cluster import DBSCAN
 24 | import matplotlib.pyplot as plt
 25 | import matplotlib.colors as lib_colors
 26 | from collections import Counter
 27 | import json
 28 | 
 29 | if len(sys.argv) < 2:
 30 |     print "Usage - "
 31 |     print "python similarity_cluster.py /path/to/formatted/similarity "
 32 |     print "Optional - Filter small cluster. Pass limit for smallest cluster. This will generate an additional json "
 33 |     print "python similarity_heatmap.py /path/to/formatted/similarity 5"
 34 |     sys.exit()
 35 | 
 36 | 
 37 | path_to_sim_mat = sys.argv[1]
 38 | ## one line for each video minus header
 39 | num_videos = sum(1 for line in open(path_to_sim_mat)) - 1
 40 | 
 41 | if len(sys.argv) >=3:
 42 |     limit_smallest_cluster = int(sys.argv[2])
 43 | else:
 44 |     limit_smallest_cluster = None
 45 |     
 46 | print path_to_sim_mat
 47 | print num_videos
 48 | print "Filter -", limit_smallest_cluster
 49 | # load data from formatted_similarity_calc.csv
 50 | # skip header
 51 | # skip first column so usecols=range(1 , num_videos),
 52 | # paint only upper half filling_values=0)
 53 | data = np.genfromtxt(path_to_sim_mat,
 54 |                   delimiter=",", skip_header=1, usecols=range(1 , num_videos+1),
 55 |                   filling_values=0)
 56 | 
 57 | ## add matrix with it's transpose to fill lower half  
 58 | data = np.triu(data).T + np.triu(data)  
 59 | ## Diagonal is also added to itself hence resetting it to 1 
 60 | np.fill_diagonal(data, 1)
 61 | # We have similarity matrix, to make it to distance matrix we 
 62 | # subtract similarity score from 1 
 63 | data = 1 - data
 64 | print "Data loaded"
 65 | 
 66 | db = DBSCAN(eps=0.2).fit(data)
 67 | with open(path_to_sim_mat) as f:
 68 |     videos = f.readline().strip().split(",")[1:]
 69 | 
 70 | ## each index stores it's cluster label 
 71 | clusters = db.labels_
 72 | 
 73 | ## map of cluster labe to set of videos contained by it
 74 | video_clusters = {}
 75 | for cluster, video in zip(clusters, videos):
 76 |     if cluster not in video_clusters:
 77 |         video_clusters[cluster] = []
 78 |     
 79 |     video_clusters[cluster].append(video)
 80 |     
 81 | 
 82 | print "clusters calculated"
 83 | 
 84 | ##############################################################################
 85 | # Plot result
 86 | 
 87 | core_samples_mask = np.zeros_like(clusters, dtype=bool)
 88 | core_samples_mask[db.core_sample_indices_] = True
 89 | 
 90 | # Black removed and is used for noise instead.
 91 | unique_labels = set(clusters)
 92 | colors = plt.cm.Spectral(np.linspace(0, 1, len(unique_labels)))
 93 | 
 94 | labelCtr = Counter(clusters)
 95 | 
 96 | # The slices for pie chart will be ordered and plotted counter-clockwise.
 97 | fracs = []
 98 | 
 99 | figure1 = plt.figure()
100 | ax1 = figure1.add_axes([0.07,0.25,0.90,0.70])
101 | 
102 | # initialize d3-hierarchy json
103 | clusterJson = {"children":[],"name": "clusters"}
104 | # initialize FILTERED d3-hierarchy json
105 | clusterJsonFiltered = {"children":[],"name": "clusters"}
106 | 
107 | # Single loop for forming piechart, , cluster image
108 | for k, col in zip(unique_labels, colors):
109 |     ## setting frac for piechart
110 |     fracs.append(labelCtr[k])
111 |     
112 |     ## d3 json
113 |     clusterJsonChild = {"children":[],"name": "cluster"+str(k),"color":lib_colors.rgb2hex(col)}
114 |     clusterJsonChildFiltered = {"children":[],"name": "cluster"+str(k),"color":lib_colors.rgb2hex(col)}
115 |     
116 |     for video in video_clusters[k]:
117 |         clusterJsonChild["children"].append({"name": video})
118 |         
119 |         # check if filter is enabled and clusters qualifies the filter
120 |         if(limit_smallest_cluster and len(video_clusters[k]) >= limit_smallest_cluster):
121 |             clusterJsonChildFiltered["children"].append({"name": video})
122 |             
123 |         
124 |     clusterJson["children"].append(clusterJsonChild)
125 |     
126 |     # check if filter is enabled and there are nodes after filtering
127 |     if(limit_smallest_cluster and len(clusterJsonChildFiltered["children"])>0):
128 |         clusterJsonFiltered["children"].append(clusterJsonChildFiltered)
129 |         
130 |     
131 |     ## cluster image
132 |     if k == -1:
133 |         # Black used for noise.
134 |         col = 'k'
135 | 
136 |     class_member_mask = (clusters == k)
137 | 
138 |     xy = data[class_member_mask & core_samples_mask]
139 |     ax1.plot(xy[:, 0], xy[:, 1], 'o', markerfacecolor=col,
140 |              markeredgecolor='k', markersize=14)
141 | 
142 |     xy = data[class_member_mask & ~core_samples_mask]
143 |     ax1.plot(xy[:, 0], xy[:, 1], 'o', markerfacecolor=col,
144 |              markeredgecolor='k', markersize=6)
145 | 
146 | plt.title('Estimated number of clusters: %d' % len(unique_labels) )
147 | 
148 | 
149 | # make a square figure and axes
150 | ax2 = figure1.add_axes([0.4,0.0,0.20,0.20])
151 | 
152 | ax2.pie(fracs, startangle=90, colors=colors)
153 | 
154 | plt.savefig('../data/similarity_cluster.png')
155 | 
156 | 
157 | with open('../data/similarity_cluster.json', 'w') as fp:
158 |     json.dump(clusterJson, fp)
159 | 
160 | if(limit_smallest_cluster):
161 |     with open('../data/similarity_cluster_filtered_'+str(limit_smallest_cluster)+'.json', 'w') as fp:
162 |         json.dump(clusterJsonFiltered, fp)
163 |     
164 | 


--------------------------------------------------------------------------------
/visualization/py/similarity_heatmap.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # 
 3 | # Licensed to the Apache Software Foundation (ASF) under one or more
 4 | # contributor license agreements.  See the NOTICE file distributed with
 5 | # this work for additional information regarding copyright ownership.
 6 | # The ASF licenses this file to You under the Apache License, Version 2.0
 7 | # (the "License"); you may not use this file except in compliance with
 8 | # the License.  You may obtain a copy of the License at
 9 | #
10 | #     http://www.apache.org/licenses/LICENSE-2.0
11 | #
12 | # Unless required by applicable law or agreed to in writing, software
13 | # distributed under the License is distributed on an "AS IS" BASIS,
14 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 | # See the License for the specific language governing permissions and
16 | # limitations under the License.
17 | 
18 | import matplotlib
19 | matplotlib.use('Agg')
20 | 
21 | import numpy as np
22 | import pylab as pl
23 | import sys
24 | 
25 | pl.ioff()
26 | 
27 | if len(sys.argv) != 3:
28 |     print "Usage - "
29 |     print "python similarity_heatmap.py /path/to/formatted/similarity number_of_videos"
30 |     print "number_of_videos can be less or equal to number of videos in similarity matrix"
31 |     sys.exit()
32 | 
33 | 
34 | path_to_sim_mat = sys.argv[1]
35 | num_videos = int(sys.argv[2])
36 | print path_to_sim_mat
37 | print num_videos
38 | 
39 | # load data from formatted_similarity_calc.csv
40 | # skip header
41 | # skip first column so usecols=range(1 , num_videos),
42 | # paint only upper half filling_values=0)
43 | data = np.genfromtxt(path_to_sim_mat,
44 |                   delimiter=",", skip_header=1, usecols=range(1 , num_videos+1),
45 |                   filling_values=0)
46 | 
47 | print "Data loaded"
48 | 
49 | #use single color blue 
50 | pl.imshow(data, cmap=pl.cm.Blues, interpolation="nearest")
51 | 
52 | #show color scale
53 | pl.colorbar()
54 | 
55 | # tpggle to pl.show() for just viewing image
56 | pl.savefig('../data/similarity_heatmap.png')
57 | print "saved in ../data/similarity_heatmap.png"
58 | 


--------------------------------------------------------------------------------
/visualization/py/video_duration.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # 
 3 | # Licensed to the Apache Software Foundation (ASF) under one or more
 4 | # contributor license agreements.  See the NOTICE file distributed with
 5 | # this work for additional information regarding copyright ownership.
 6 | # The ASF licenses this file to You under the Apache License, Version 2.0
 7 | # (the "License"); you may not use this file except in compliance with
 8 | # the License.  You may obtain a copy of the License at
 9 | #
10 | #     http://www.apache.org/licenses/LICENSE-2.0
11 | #
12 | # Unless required by applicable law or agreed to in writing, software
13 | # distributed under the License is distributed on an "AS IS" BASIS,
14 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 | # See the License for the specific language governing permissions and
16 | # limitations under the License.
17 | 
18 | '''
19 | Takes a directory path and outputs stats of video duration
20 | - Total length of all videos in seconds
21 | - Mean length of video length
22 | - Standard deviation of length
23 | 
24 | Install pymediainfo - https://github.com/sbraz/pymediainfo  
25 | '''
26 | 
27 | from pymediainfo import MediaInfo
28 | 
29 | import os
30 | import sys
31 | import numpy as np
32 | 
33 | if len(sys.argv) < 2:
34 |     print "Usage -"
35 |     print "\t python video_duration.py <path/to/video/dir>"
36 |     sys.exit()
37 | 
38 | file_path = sys.argv[1]
39 | 
40 | print "Finding length of all files in ", file_path
41 | 
42 | durations = []
43 | for f in os.listdir(file_path):
44 |     if not f[-3:] == "mp4":
45 |         continue
46 |     media_info = MediaInfo.parse(file_path+"/"+f)
47 |     #duration in millionseconds
48 |     
49 |     # Only if MediaInfo was able to open video file
50 |     if len(media_info.tracks) > 0 and media_info.tracks[0].duration:
51 |         duration_in_ms = media_info.tracks[0].duration
52 |     
53 |         durations.append(1.0*duration_in_ms/1000)
54 |     else:
55 |         print "Can't open ", file_path+"/"+f
56 | 
57 | print ""
58 | print "**********************************"
59 | print "Total Duration - ", sum(durations)
60 | print "Average Duration - ", np.average(durations)
61 | print "Standard deviation of whole set - ", np.std(durations)
62 | print "**********************************"
63 | 
64 | 


--------------------------------------------------------------------------------