├── .gitignore ├── LICENSE ├── README.md └── mergevec.py /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | 5 | # C extensions 6 | *.so 7 | 8 | # Distribution / packaging 9 | .Python 10 | env/ 11 | bin/ 12 | build/ 13 | develop-eggs/ 14 | dist/ 15 | eggs/ 16 | lib/ 17 | lib64/ 18 | parts/ 19 | sdist/ 20 | var/ 21 | *.egg-info/ 22 | .installed.cfg 23 | *.egg 24 | 25 | # Installer logs 26 | pip-log.txt 27 | pip-delete-this-directory.txt 28 | 29 | # Unit test / coverage reports 30 | htmlcov/ 31 | .tox/ 32 | .coverage 33 | .cache 34 | nosetests.xml 35 | coverage.xml 36 | 37 | # Translations 38 | *.mo 39 | 40 | # Mr Developer 41 | .mr.developer.cfg 42 | .project 43 | .pydevproject 44 | 45 | # Rope 46 | .ropeproject 47 | 48 | # Django stuff: 49 | *.log 50 | *.pot 51 | 52 | # Sphinx documentation 53 | docs/_build/ 54 | 55 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (c) 2014, Blake Wulfe 2 | 3 | Permission is hereby granted, free of charge, to any person obtaining a copy 4 | of this software and associated documentation files (the "Software"), to deal 5 | in the Software without restriction, including without limitation the rights 6 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 7 | copies of the Software, and to permit persons to whom the Software is 8 | furnished to do so, subject to the following conditions: 9 | 10 | The above copyright notice and this permission notice shall be included in 11 | all copies or substantial portions of the Software. 12 | 13 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 16 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 17 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 18 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 19 | THE SOFTWARE. 20 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | mergevec 2 | ======== 3 | 4 | Update (20/05/15): This approach worked for my purposes, but a few people have raised concerns that using artificial data is not the best way to train a classifier [1]. As such, I would recommend approaching this method with a fair amount of skepticism. 5 | 6 | Mergevec.py is used to merge .vec files for use in training a cascade classifier with openCV's opencv_traincascade. This code was made as replacement for mergevec.cpp (created by Naotoshi Seo. See: http://note.sonots.com/SciSoftware/haartraining/mergevec.cpp.html) in order to avoid recompiling opencv with mergevec.cpp. 7 | 8 | #To use mergevec.py: 9 | 10 | (1) Place all .vec files to be merged in a single directory ("vec_directory"). 11 | (2) Download mergevec.py 12 | (3) Navigate to mergevec.py in your CLI (terminal or cmd) and enter "python mergevec.py -v your_vec_directory -o your_output_filename.vec". 13 | 14 | #To test the output of mergevec.py: 15 | 16 | (1) Install openCV. 17 | (2) Navigate to the output file in your CLI (terminal or cmd). 18 | (3) Type "opencv_createsamples -w img_width -h img_height -vec your_output_filename.vec". This should show the .vec files in sequence. 19 | 20 | The aggregate .vec output from mergevec.py has successfully been used to train a cascade. See the below for resources on training cascade classifiers. 21 | #opencv_traincascade Resources 22 | 23 | [1] Counterargument: http://answers.opencv.org/question/55879/opencv-mergevec-haartraining-issues/ 24 | 25 | [2] OpenCV: http://docs.opencv.org/doc/user_guide/ug_traincascade.html 26 | 27 | [3] Naotoshi Seo: http://note.sonots.com/SciSoftware/haartraining.html 28 | 29 | [4] Coding Robin: http://coding-robin.de/2013/07/22/train-your-own-opencv-haar-classifier.html 30 | 31 | [5] StackOverflow: http://stackoverflow.com/questions/16058080/how-to-train-cascade-properly 32 | -------------------------------------------------------------------------------- /mergevec.py: -------------------------------------------------------------------------------- 1 | ############################################################################### 2 | # Copyright (c) 2014, Blake Wulfe 3 | # 4 | # Permission is hereby granted, free of charge, to any person obtaining a copy 5 | # of this software and associated documentation files (the "Software"), to deal 6 | # in the Software without restriction, including without limitation the rights 7 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 8 | # copies of the Software, and to permit persons to whom the Software is 9 | # furnished to do so, subject to the following conditions: 10 | # 11 | # The above copyright notice and this permission notice shall be included in 12 | # all copies or substantial portions of the Software. 13 | # 14 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 17 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 18 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 19 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 20 | # THE SOFTWARE. 21 | ############################################################################### 22 | 23 | """ 24 | File: mergevec.py 25 | Author: blake.w.wulfe@gmail.com 26 | Date: 6/13/2014 27 | File Description: 28 | 29 | This file contains a function that merges .vec files called "merge_vec_files". 30 | I made it as a replacement for mergevec.cpp (created by Naotoshi Seo. 31 | See: http://note.sonots.com/SciSoftware/haartraining/mergevec.cpp.html) 32 | in order to avoid recompiling openCV with mergevec.cpp. 33 | 34 | To use the function: 35 | (1) Place all .vec files to be merged in a single directory (vec_directory). 36 | (2) Navigate to this file in your CLI (terminal or cmd) and type "python mergevec.py -v your_vec_directory -o your_output_filename". 37 | 38 | The first argument (-v) is the name of the directory containing the .vec files 39 | The second argument (-o) is the name of the output file 40 | 41 | To test the output of the function: 42 | (1) Install openCV. 43 | (2) Navigate to the output file in your CLI (terminal or cmd). 44 | (2) Type "opencv_createsamples -w img_width -h img_height -vec output_filename". 45 | This should show the .vec files in sequence. 46 | 47 | """ 48 | 49 | import sys 50 | import glob 51 | import struct 52 | import argparse 53 | import traceback 54 | 55 | 56 | def exception_response(e): 57 | exc_type, exc_value, exc_traceback = sys.exc_info() 58 | lines = traceback.format_exception(exc_type, exc_value, exc_traceback) 59 | for line in lines: 60 | print(line) 61 | 62 | def get_args(): 63 | parser = argparse.ArgumentParser() 64 | parser.add_argument('-v', dest='vec_directory') 65 | parser.add_argument('-o', dest='output_filename') 66 | args = parser.parse_args() 67 | return (args.vec_directory, args.output_filename) 68 | 69 | def merge_vec_files(vec_directory, output_vec_file): 70 | """ 71 | Iterates throught the .vec files in a directory and combines them. 72 | 73 | (1) Iterates through files getting a count of the total images in the .vec files 74 | (2) checks that the image sizes in all files are the same 75 | 76 | The format of a .vec file is: 77 | 78 | 4 bytes denoting number of total images (int) 79 | 4 bytes denoting size of images (int) 80 | 2 bytes denoting min value (short) 81 | 2 bytes denoting max value (short) 82 | 83 | ex: 6400 0000 4605 0000 0000 0000 84 | 85 | hex 6400 0000 4605 0000 0000 0000 86 | # images size of h * w min max 87 | dec 100 1350 0 0 88 | 89 | :type vec_directory: string 90 | :param vec_directory: Name of the directory containing .vec files to be combined. 91 | Do not end with slash. Ex: '/Users/username/Documents/vec_files' 92 | 93 | :type output_vec_file: string 94 | :param output_vec_file: Name of aggregate .vec file for output. 95 | Ex: '/Users/username/Documents/aggregate_vec_file.vec' 96 | 97 | """ 98 | 99 | # Check that the .vec directory does not end in '/' and if it does, remove it. 100 | if vec_directory.endswith('/'): 101 | vec_directory = vec_directory[:-1] 102 | # Get .vec files 103 | files = glob.glob('{0}/*.vec'.format(vec_directory)) 104 | 105 | # Check to make sure there are .vec files in the directory 106 | if len(files) <= 0: 107 | print('Vec files to be mereged could not be found from directory: {0}'.format(vec_directory)) 108 | sys.exit(1) 109 | # Check to make sure there are more than one .vec files 110 | if len(files) == 1: 111 | print('Only 1 vec file was found in directory: {0}. Cannot merge a single file.'.format(vec_directory)) 112 | sys.exit(1) 113 | 114 | 115 | # Get the value for the first image size 116 | prev_image_size = 0 117 | try: 118 | with open(files[0], 'rb') as vecfile: 119 | content = b''.join((line) for line in vecfile.readlines()) 120 | val = struct.unpack('