├── .gitignore ├── CMakeLists.txt ├── LICENSE ├── README.md ├── dt_example1 ├── car.data ├── car.names ├── car.test ├── car.train └── decisiontree.cpp ├── dt_example2 ├── decisiontree.cpp ├── wdbc.data ├── wdbc.names ├── wdbc.test └── wdbc.train ├── handwritten_ex ├── decisiontree.cpp ├── neuralnetwork.cpp ├── semeion.names ├── semeion.test ├── semeion.train └── svm.cpp ├── opticaldigits_ex ├── boosttree.cpp ├── decisiontree.cpp ├── extremerandomforest.cpp ├── knn.cpp ├── knn_weighted.cpp ├── neuralnetwork.cpp ├── normalbayes.cpp ├── optdigits.names ├── optdigits.test ├── optdigits.train ├── randomforest.cpp └── svm.cpp ├── other_ex ├── normalbayes.cpp ├── wdbc.data ├── wdbc.names ├── wdbc.test └── wdbc.train ├── speech_ex ├── decisiontree.cpp ├── isolet1+2+3+4.train ├── isolet5.test └── svm.cpp └── tools ├── dt_varimportance.cc ├── ex_tree.xml ├── randomize.cc ├── selectlines.cc ├── tree.yml └── typechecker.cc /.gitignore: -------------------------------------------------------------------------------- 1 | # Prerequisites 2 | *.d 3 | 4 | # Compiled Object files 5 | *.slo 6 | *.lo 7 | *.o 8 | *.obj 9 | 10 | # Precompiled Headers 11 | *.gch 12 | *.pch 13 | 14 | # Compiled Dynamic libraries 15 | *.so 16 | *.dylib 17 | *.dll 18 | 19 | # Fortran module files 20 | *.mod 21 | *.smod 22 | 23 | # Compiled Static libraries 24 | *.lai 25 | *.la 26 | *.a 27 | *.lib 28 | 29 | # Executables 30 | *.exe 31 | *.out 32 | *.app 33 | -------------------------------------------------------------------------------- /CMakeLists.txt: -------------------------------------------------------------------------------- 1 | # this file is setup to build everything for the ML examples 2 | 3 | cmake_minimum_required (VERSION 2.6) 4 | cmake_policy(SET CMP0037 OLD) 5 | set( CMAKE_CXX_FLAGS "-O3 -Wall ${CMAKE_CXX_FLAGS}" ) 6 | # linux specific stuff 7 | 8 | IF ( UNIX ) 9 | set( CMAKE_PREFIX_PATH "/opt/opencv-2.4" ) 10 | set_property(GLOBAL PROPERTY TARGET_SUPPORTS_SHARED_LIBS TRUE) 11 | MESSAGE( "LINUX CONFIG" ) 12 | ENDIF ( UNIX ) 13 | 14 | # windows (inc. 64-bit specific stuff) 15 | 16 | IF ( WIN32 ) 17 | set( CMAKE_PREFIX_PATH "C:/OpenCV2.4/build" ) 18 | set( OpenCV_DIR "C:/OpenCV2.4/build" ) 19 | MESSAGE( "WINDOWS CONFIG" ) 20 | ENDIF ( WIN32 ) 21 | 22 | find_package( OpenCV 2.4.13 REQUIRED ) 23 | # MESSAGE ( "OPENCV CONFIG" ) 24 | # MESSAGE ( ${OpenCV_LIBS} ) 25 | 26 | project(decisiontree) 27 | add_executable(./handwritten_ex/decisiontree ./handwritten_ex/decisiontree.cpp) 28 | target_link_libraries( ./handwritten_ex/decisiontree ${OpenCV_LIBS} ) 29 | 30 | project(neuralnetwork) 31 | add_executable(./handwritten_ex/neuralnetwork ./handwritten_ex/neuralnetwork.cpp) 32 | target_link_libraries( ./handwritten_ex/neuralnetwork ${OpenCV_LIBS} ) 33 | 34 | project(svm) 35 | add_executable(./handwritten_ex/svm ./handwritten_ex/svm.cpp) 36 | target_link_libraries( ./handwritten_ex/svm ${OpenCV_LIBS} ) 37 | 38 | project(ga_interface) 39 | add_executable(./ga_ex/ga_interface ./ga_ex/ga_interface.cpp) 40 | target_link_libraries( ./ga_ex/ga_interface ${OpenCV_LIBS} ) 41 | 42 | project(decisiontree) 43 | add_executable(./dt_example1/decisiontree ./dt_example1/decisiontree.cpp) 44 | target_link_libraries( ./dt_example1/decisiontree ${OpenCV_LIBS} ) 45 | 46 | project(decisiontree2) 47 | add_executable(./dt_example2/decisiontree ./dt_example2/decisiontree.cpp) 48 | target_link_libraries( ./dt_example2/decisiontree ${OpenCV_LIBS} ) 49 | 50 | project(boosttree) 51 | add_executable(./opticaldigits_ex/boosttree ./opticaldigits_ex/boosttree.cpp) 52 | set_target_properties(./opticaldigits_ex/boosttree PROPERTIES COMPILE_FLAGS "-fpermissive") 53 | target_link_libraries( ./opticaldigits_ex/boosttree ${OpenCV_LIBS} ) 54 | 55 | project(decisiontree3) 56 | add_executable(./opticaldigits_ex/decisiontree ./opticaldigits_ex/decisiontree.cpp) 57 | target_link_libraries( ./opticaldigits_ex/decisiontree ${OpenCV_LIBS} ) 58 | 59 | project(extremerandomforest3) 60 | add_executable(./opticaldigits_ex/extremerandomforest ./opticaldigits_ex/extremerandomforest.cpp) 61 | target_link_libraries( ./opticaldigits_ex/extremerandomforest ${OpenCV_LIBS} ) 62 | 63 | project(randomforest) 64 | add_executable(./opticaldigits_ex/randomforest ./opticaldigits_ex/randomforest.cpp) 65 | target_link_libraries( ./opticaldigits_ex/randomforest ${OpenCV_LIBS} ) 66 | 67 | project(svm2) 68 | add_executable(./opticaldigits_ex/svm ./opticaldigits_ex/svm.cpp) 69 | target_link_libraries( ./opticaldigits_ex/svm ${OpenCV_LIBS} ) 70 | 71 | project(knn) 72 | add_executable(./opticaldigits_ex/knn ./opticaldigits_ex/knn.cpp) 73 | target_link_libraries( ./opticaldigits_ex/knn ${OpenCV_LIBS} ) 74 | 75 | project(knn_weighted) 76 | add_executable(./opticaldigits_ex/knn_weighted ./opticaldigits_ex/knn_weighted.cpp) 77 | target_link_libraries( ./opticaldigits_ex/knn_weighted ${OpenCV_LIBS} ) 78 | 79 | project(normalbayes) 80 | add_executable(./opticaldigits_ex/normalbayes ./opticaldigits_ex/normalbayes.cpp) 81 | target_link_libraries( ./opticaldigits_ex/normalbayes ${OpenCV_LIBS} ) 82 | 83 | project(neuralnetwork) 84 | add_executable(./opticaldigits_ex/neuralnetwork ./opticaldigits_ex/neuralnetwork.cpp) 85 | target_link_libraries( ./opticaldigits_ex/neuralnetwork ${OpenCV_LIBS} ) 86 | 87 | project(normalbayes) 88 | add_executable(./other_ex/normalbayes ./other_ex/normalbayes.cpp) 89 | target_link_libraries( ./other_ex/normalbayes ${OpenCV_LIBS} ) 90 | 91 | project(decisiontree) 92 | add_executable(./speech_ex/decisiontree ./speech_ex/decisiontree.cpp) 93 | target_link_libraries( ./speech_ex/decisiontree ${OpenCV_LIBS} ) 94 | 95 | project(svm) 96 | add_executable(./speech_ex/svm ./speech_ex/svm.cpp) 97 | target_link_libraries( ./speech_ex/svm ${OpenCV_LIBS} ) 98 | 99 | project(dt_varimportance) 100 | add_executable(./tools/dt_varimportance ./tools/dt_varimportance.cc) 101 | target_link_libraries( ./tools/dt_varimportance ${OpenCV_LIBS} ) 102 | 103 | project(randomize) 104 | add_executable(./tools/randomize tools/randomize.cc) 105 | 106 | project(selectlines) 107 | add_executable(./tools/selectlines tools/selectlines.cc) 108 | 109 | project(typechecker) 110 | add_executable(./tools/typechecker tools/typechecker.cc) 111 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # C++ Machine Learning OpenCV 2.4.x Teaching Examples 2 | 3 | OpenCV C/C++ Interface Machine Learning legacy 2.4.x interface examples used for teaching, instruction and reference over the years (2010-2013) - 4 | 5 | **Uses older C++ interface to OpenCV ML library, with additional code** - _as these examples pre-date the new C++ OpenCV 3.x ML interface._ 6 | 7 | All tested with OpenCV 2.4.x and GCC (Linux) and known to work with MS Visual Studio 200x on Win32 / Win64. 8 | 9 | N.B. due to changes in the OpenCV API _these do not generically work with OpenCV > 2.4.x_ by default (except from the genetic algorithm (GA) example). 10 | 11 | --- 12 | 13 | ### Background: 14 | 15 | If I taught you between 2010 and 2013 at [Cranfield University](http://www.cranfield.ac.uk) or [ESTIA](http://www.estia.fr) - these are the C++ examples from class. 16 | 17 | Demo source code is provided _"as is"_ to aid your learning and understanding. 18 | 19 | _For a long time, the in absence of other fully worked examples for the OpenCV machine learning components became the de facto reference for the use of these OpenCV routines (any conceptual errors or bad choices of parameters made here have propagated widely)._ 20 | 21 | --- 22 | 23 | ### How to download and run: 24 | 25 | In each sub-directory: 26 | 27 | + .cpp file(s) - code for the example 28 | + .name file - an explanation of the data and its source 29 | + .data file - the original and complete set of data (CSV file format) 30 | + .train file - the data to be used for training (CSV file format) 31 | + .test file - the data to be used for testing (CSV file format) 32 | + .xml, .yml - example data files for testing some tools 33 | 34 | All dataset examples are taken and reproduced from the [UCI Machine Learning Repository](http://archive.ics.uci.edu/ml/). 35 | 36 | Download each file as needed or to download the entire repository and run each try: 37 | 38 | ``` 39 | git clone https://github.com/tobybreckon/cpp-examples-ml.git 40 | cd cpp-examples-ml 41 | cmake . 42 | make 43 | cd 44 | ./ 45 | ``` 46 | 47 | The genetic algorithm (GA; inside directory ga_ex/) example runs with a webcam connected or from a command line supplied video file of a format OpenCV supports on your system (otherwise edit the code to provide your own image source). _N.B._ you may need to change the line near the top that specifies the camera device to use on this example - change "0" if you have one webcam, I have it set to "1" to skip my built-in laptop webcam and use the connected USB camera. 48 | 49 | --- 50 | 51 | If referencing these examples in your own work please use: 52 | ``` 53 | @TechReport{breckon2010, 54 | author = {Breckon, T.P. and Barnes, S.E.}, 55 | title = {Machine Learning - MSc Course Notes}, 56 | institution = {Cranfield University}, 57 | year = {2010}, 58 | address = {Bedfordshire, UK}, 59 | } 60 | ``` 61 | 62 | --- 63 | 64 | If you find any bugs please raise an issue (or better still submit a pull request, please) - toby.breckon@durham.ac.uk 65 | 66 | _"may the source be with you"_ - anon. 67 | -------------------------------------------------------------------------------- /dt_example1/car.names: -------------------------------------------------------------------------------- 1 | 1. Title: Car Evaluation Database 2 | 3 | 2. Sources: 4 | (a) Creator: Marko Bohanec 5 | (b) Donors: Marko Bohanec (marko.bohanec@ijs.si) 6 | Blaz Zupan (blaz.zupan@ijs.si) 7 | (c) Date: June, 1997 8 | 9 | 3. Past Usage: 10 | 11 | The hierarchical decision model, from which this dataset is 12 | derived, was first presented in 13 | 14 | M. Bohanec and V. Rajkovic: Knowledge acquisition and explanation for 15 | multi-attribute decision making. In 8th Intl Workshop on Expert 16 | Systems and their Applications, Avignon, France. pages 59-78, 1988. 17 | 18 | Within machine-learning, this dataset was used for the evaluation 19 | of HINT (Hierarchy INduction Tool), which was proved to be able to 20 | completely reconstruct the original hierarchical model. This, 21 | together with a comparison with C4.5, is presented in 22 | 23 | B. Zupan, M. Bohanec, I. Bratko, J. Demsar: Machine learning by 24 | function decomposition. ICML-97, Nashville, TN. 1997 (to appear) 25 | 26 | 4. Relevant Information Paragraph: 27 | 28 | Car Evaluation Database was derived from a simple hierarchical 29 | decision model originally developed for the demonstration of DEX 30 | (M. Bohanec, V. Rajkovic: Expert system for decision 31 | making. Sistemica 1(1), pp. 145-157, 1990.). The model evaluates 32 | cars according to the following concept structure: 33 | 34 | CAR car acceptability 35 | . PRICE overall price 36 | . . buying buying price 37 | . . maint price of the maintenance 38 | . TECH technical characteristics 39 | . . COMFORT comfort 40 | . . . doors number of doors 41 | . . . persons capacity in terms of persons to carry 42 | . . . lug_boot the size of luggage boot 43 | . . safety estimated safety of the car 44 | 45 | Input attributes are printed in lowercase. Besides the target 46 | concept (CAR), the model includes three intermediate concepts: 47 | PRICE, TECH, COMFORT. Every concept is in the original model 48 | related to its lower level descendants by a set of examples (for 49 | these examples sets see http://www-ai.ijs.si/BlazZupan/car.html). 50 | 51 | The Car Evaluation Database contains examples with the structural 52 | information removed, i.e., directly relates CAR to the six input 53 | attributes: buying, maint, doors, persons, lug_boot, safety. 54 | 55 | Because of known underlying concept structure, this database may be 56 | particularly useful for testing constructive induction and 57 | structure discovery methods. 58 | 59 | 5. Number of Instances: 1728 60 | (instances completely cover the attribute space) 61 | 62 | 6. Number of Attributes: 6 63 | 64 | 7. Attribute Values: 65 | 66 | buying v-high, high, med, low 67 | maint v-high, high, med, low 68 | doors 2, 3, 4, 5-more 69 | persons 2, 4, more 70 | lug_boot small, med, big 71 | safety low, med, high 72 | 73 | 8. Missing Attribute Values: none 74 | 75 | 9. Class Distribution (number of instances per class) 76 | 77 | class N N[%] 78 | ----------------------------- 79 | unacc 1210 (70.023 %) 80 | acc 384 (22.222 %) 81 | good 69 ( 3.993 %) 82 | v-good 65 ( 3.762 %) 83 | -------------------------------------------------------------------------------- /dt_example1/car.test: -------------------------------------------------------------------------------- 1 | high,high,3,2,med,high,unacc 2 | vhigh,high,4,more,small,high,unacc 3 | high,low,5more,2,med,high,unacc 4 | med,med,5more,more,small,low,unacc 5 | low,low,3,4,big,med,good 6 | high,med,5more,more,small,med,unacc 7 | low,low,4,more,med,low,unacc 8 | high,med,5more,2,small,low,unacc 9 | vhigh,vhigh,2,2,small,high,unacc 10 | high,high,3,more,small,low,unacc 11 | high,low,3,4,small,low,unacc 12 | vhigh,high,2,4,med,high,unacc 13 | med,vhigh,5more,more,med,med,acc 14 | high,vhigh,2,2,med,med,unacc 15 | vhigh,high,2,more,small,med,unacc 16 | med,high,2,4,small,high,acc 17 | vhigh,med,5more,4,big,med,acc 18 | med,low,4,2,med,low,unacc 19 | high,low,5more,2,small,med,unacc 20 | low,low,5more,2,big,med,unacc 21 | low,med,5more,more,small,high,good 22 | med,low,4,more,small,low,unacc 23 | low,vhigh,3,4,med,low,unacc 24 | med,vhigh,3,4,med,med,unacc 25 | vhigh,vhigh,4,2,big,low,unacc 26 | med,low,2,4,big,med,good 27 | med,high,4,more,med,high,acc 28 | high,low,5more,4,small,med,unacc 29 | high,vhigh,5more,4,small,high,unacc 30 | med,vhigh,5more,4,big,low,unacc 31 | vhigh,vhigh,2,4,small,med,unacc 32 | vhigh,vhigh,4,2,big,med,unacc 33 | low,med,4,2,big,high,unacc 34 | med,low,5more,more,small,high,good 35 | low,vhigh,5more,2,big,med,unacc 36 | vhigh,vhigh,2,more,med,med,unacc 37 | med,vhigh,5more,2,big,med,unacc 38 | high,low,3,more,med,high,acc 39 | low,low,5more,more,small,high,good 40 | high,low,3,4,big,low,unacc 41 | high,med,2,4,small,med,unacc 42 | vhigh,vhigh,2,more,small,high,unacc 43 | low,vhigh,2,4,med,med,unacc 44 | vhigh,med,4,2,big,low,unacc 45 | high,high,2,more,big,high,acc 46 | high,vhigh,2,2,big,high,unacc 47 | high,low,5more,more,big,low,unacc 48 | low,med,3,more,small,med,acc 49 | vhigh,low,2,4,med,low,unacc 50 | med,med,5more,2,big,med,unacc 51 | med,med,5more,4,med,high,vgood 52 | vhigh,low,3,more,big,low,unacc 53 | low,vhigh,5more,4,small,high,acc 54 | low,vhigh,2,4,small,med,unacc 55 | vhigh,low,2,more,small,low,unacc 56 | low,low,4,more,med,med,good 57 | vhigh,high,2,more,big,med,unacc 58 | high,vhigh,4,4,med,med,unacc 59 | vhigh,low,2,2,med,med,unacc 60 | med,med,5more,2,big,low,unacc 61 | med,high,5more,more,small,high,acc 62 | low,low,5more,4,big,high,vgood 63 | high,high,5more,2,small,high,unacc 64 | high,vhigh,4,4,big,low,unacc 65 | med,med,3,more,med,high,vgood 66 | med,low,4,4,big,high,vgood 67 | low,high,4,more,med,high,vgood 68 | low,vhigh,4,4,small,high,acc 69 | med,high,4,more,big,low,unacc 70 | high,vhigh,2,4,big,low,unacc 71 | high,low,3,4,med,high,acc 72 | high,vhigh,5more,2,med,low,unacc 73 | vhigh,high,5more,4,small,high,unacc 74 | med,med,4,4,small,med,acc 75 | vhigh,vhigh,3,more,med,high,unacc 76 | med,high,4,2,small,low,unacc 77 | high,med,2,more,med,high,acc 78 | med,high,5more,4,big,low,unacc 79 | med,low,5more,more,small,low,unacc 80 | low,low,5more,2,small,high,unacc 81 | low,low,3,2,small,high,unacc 82 | low,high,2,4,big,high,vgood 83 | med,high,3,more,med,med,acc 84 | vhigh,low,5more,4,big,high,acc 85 | vhigh,vhigh,2,more,med,low,unacc 86 | low,high,5more,more,big,low,unacc 87 | med,med,2,more,big,high,vgood 88 | low,med,3,4,big,med,good 89 | med,low,5more,2,small,high,unacc 90 | high,low,5more,2,med,med,unacc 91 | vhigh,vhigh,4,more,small,low,unacc 92 | med,high,2,more,big,med,acc 93 | vhigh,vhigh,5more,2,med,high,unacc 94 | vhigh,vhigh,5more,4,med,low,unacc 95 | vhigh,high,3,4,med,low,unacc 96 | vhigh,vhigh,2,2,big,high,unacc 97 | med,vhigh,3,2,big,med,unacc 98 | high,med,5more,4,small,med,unacc 99 | low,med,3,more,med,high,vgood 100 | low,vhigh,4,more,big,low,unacc 101 | low,vhigh,3,4,big,med,acc 102 | med,med,3,4,small,med,acc 103 | vhigh,med,3,4,big,med,acc 104 | med,med,5more,4,big,high,vgood 105 | low,vhigh,4,more,small,med,unacc 106 | vhigh,vhigh,5more,4,big,high,unacc 107 | high,high,5more,4,med,high,acc 108 | med,low,2,4,big,low,unacc 109 | vhigh,high,4,2,big,med,unacc 110 | med,low,3,more,big,low,unacc 111 | low,med,4,2,med,low,unacc 112 | vhigh,vhigh,2,2,small,med,unacc 113 | med,vhigh,5more,4,med,med,acc 114 | med,low,5more,2,big,med,unacc 115 | low,high,3,more,small,low,unacc 116 | high,vhigh,3,more,small,med,unacc 117 | med,med,4,2,big,med,unacc 118 | vhigh,low,4,more,med,low,unacc 119 | low,high,4,4,med,med,acc 120 | med,vhigh,2,4,big,high,acc 121 | high,high,3,2,big,low,unacc 122 | vhigh,vhigh,5more,2,med,low,unacc 123 | high,high,3,more,med,high,acc 124 | low,high,3,more,med,low,unacc 125 | med,med,2,more,med,high,acc 126 | med,med,5more,4,med,med,acc 127 | vhigh,med,2,more,big,high,acc 128 | med,med,2,more,small,high,unacc 129 | vhigh,med,3,2,big,low,unacc 130 | low,low,5more,2,big,low,unacc 131 | vhigh,med,5more,4,small,med,unacc 132 | high,high,4,more,med,med,acc 133 | high,low,2,2,small,med,unacc 134 | low,high,5more,2,med,med,unacc 135 | low,med,3,2,small,med,unacc 136 | med,low,2,4,med,low,unacc 137 | low,vhigh,2,more,med,med,unacc 138 | med,vhigh,2,more,big,low,unacc 139 | vhigh,med,3,2,small,med,unacc 140 | vhigh,vhigh,5more,2,big,med,unacc 141 | low,high,5more,4,big,med,acc 142 | vhigh,high,3,4,small,high,unacc 143 | high,vhigh,5more,4,big,high,unacc 144 | med,med,4,more,small,low,unacc 145 | med,high,2,more,med,med,unacc 146 | med,high,4,2,med,low,unacc 147 | med,high,5more,more,med,low,unacc 148 | med,low,2,2,small,high,unacc 149 | high,vhigh,3,4,big,med,unacc 150 | high,high,5more,more,small,low,unacc 151 | high,low,3,2,big,low,unacc 152 | vhigh,med,2,4,big,med,acc 153 | low,vhigh,5more,more,small,low,unacc 154 | vhigh,low,2,more,big,high,acc 155 | low,med,2,more,small,med,unacc 156 | low,med,5more,more,big,low,unacc 157 | med,low,5more,4,small,high,good 158 | vhigh,med,5more,more,small,med,unacc 159 | med,high,5more,2,small,low,unacc 160 | vhigh,high,5more,4,big,med,unacc 161 | low,low,5more,4,small,med,acc 162 | med,high,5more,4,big,high,acc 163 | med,high,2,2,small,low,unacc 164 | low,low,4,2,big,high,unacc 165 | high,high,2,4,small,high,acc 166 | high,low,5more,more,small,med,unacc 167 | vhigh,med,5more,more,med,low,unacc 168 | vhigh,low,4,2,small,med,unacc 169 | high,low,4,4,big,high,acc 170 | low,low,5more,4,med,high,vgood 171 | low,vhigh,5more,2,small,high,unacc 172 | high,high,4,4,med,low,unacc 173 | med,low,3,2,small,med,unacc 174 | vhigh,med,4,4,small,high,acc 175 | low,med,2,more,big,med,good 176 | vhigh,med,2,4,small,high,acc 177 | high,vhigh,5more,2,small,high,unacc 178 | med,med,2,2,small,high,unacc 179 | low,low,4,4,small,med,acc 180 | high,low,5more,more,med,high,acc 181 | med,high,2,more,small,med,unacc 182 | high,high,3,4,med,low,unacc 183 | vhigh,med,4,4,med,med,acc 184 | med,med,2,4,small,low,unacc 185 | high,low,5more,2,small,low,unacc 186 | vhigh,vhigh,5more,more,big,med,unacc 187 | high,high,5more,more,small,high,acc 188 | med,med,3,2,big,med,unacc 189 | high,med,4,more,med,high,acc 190 | low,vhigh,3,more,med,med,acc 191 | high,med,3,2,big,med,unacc 192 | high,high,4,2,small,high,unacc 193 | med,med,5more,more,med,low,unacc 194 | low,vhigh,4,2,small,high,unacc 195 | low,high,4,2,small,low,unacc 196 | med,vhigh,4,2,med,low,unacc 197 | low,med,3,2,small,low,unacc 198 | vhigh,high,5more,more,med,low,unacc 199 | low,med,2,more,med,high,good 200 | vhigh,vhigh,3,4,big,low,unacc 201 | vhigh,low,5more,2,med,low,unacc 202 | low,low,4,4,med,low,unacc 203 | vhigh,high,3,2,small,med,unacc 204 | high,low,4,4,med,high,acc 205 | high,low,4,2,small,high,unacc 206 | low,low,3,2,med,high,unacc 207 | vhigh,med,3,4,med,high,acc 208 | vhigh,low,3,2,big,high,unacc 209 | low,med,5more,2,med,med,unacc 210 | med,low,2,4,med,med,acc 211 | vhigh,high,5more,4,small,low,unacc 212 | low,med,2,4,big,med,good 213 | low,high,3,more,med,high,vgood 214 | low,high,4,more,big,low,unacc 215 | low,low,5more,2,med,med,unacc 216 | vhigh,med,3,more,big,low,unacc 217 | low,vhigh,4,more,big,med,acc 218 | vhigh,med,4,4,small,med,unacc 219 | vhigh,low,5more,2,small,low,unacc 220 | med,vhigh,3,2,big,low,unacc 221 | high,low,5more,2,small,high,unacc 222 | low,med,2,more,med,med,acc 223 | med,vhigh,2,4,med,med,unacc 224 | vhigh,high,4,more,big,med,unacc 225 | high,high,5more,4,big,low,unacc 226 | high,low,5more,more,med,low,unacc 227 | high,med,4,more,big,med,acc 228 | low,vhigh,3,2,big,med,unacc 229 | low,low,2,4,small,med,acc 230 | vhigh,vhigh,3,more,small,high,unacc 231 | vhigh,high,4,2,med,low,unacc 232 | high,low,2,2,big,med,unacc 233 | high,med,4,more,big,high,acc 234 | low,high,2,more,big,low,unacc 235 | med,vhigh,3,more,big,low,unacc 236 | vhigh,high,3,more,small,high,unacc 237 | high,med,3,more,med,med,acc 238 | high,high,4,2,big,low,unacc 239 | high,med,2,more,small,med,unacc 240 | low,vhigh,5more,2,med,high,unacc 241 | high,med,2,more,big,low,unacc 242 | low,vhigh,2,more,small,med,unacc 243 | low,high,5more,2,small,high,unacc 244 | med,vhigh,4,2,small,low,unacc 245 | low,med,4,2,med,med,unacc 246 | vhigh,low,5more,4,med,med,acc 247 | vhigh,high,5more,more,big,med,unacc 248 | high,low,5more,4,small,high,acc 249 | high,high,2,2,big,low,unacc 250 | high,med,5more,more,med,low,unacc 251 | low,high,3,more,big,low,unacc 252 | med,low,3,4,big,high,vgood 253 | med,med,2,4,med,med,acc 254 | high,med,5more,2,big,low,unacc 255 | high,low,4,more,med,med,acc 256 | high,med,3,2,med,high,unacc 257 | high,low,3,more,big,low,unacc 258 | high,med,2,4,big,low,unacc 259 | vhigh,high,3,more,med,high,unacc 260 | med,high,4,4,big,high,acc 261 | low,high,5more,more,med,med,acc 262 | vhigh,low,5more,more,med,low,unacc 263 | med,high,4,4,small,med,unacc 264 | med,high,5more,2,med,high,unacc 265 | vhigh,vhigh,4,more,med,med,unacc 266 | high,vhigh,3,4,small,low,unacc 267 | med,med,4,4,med,med,acc 268 | high,high,2,2,big,high,unacc 269 | low,med,5more,4,med,low,unacc 270 | med,high,3,more,big,low,unacc 271 | vhigh,vhigh,3,2,med,med,unacc 272 | low,high,2,2,med,med,unacc 273 | med,low,2,2,med,high,unacc 274 | med,high,5more,2,big,med,unacc 275 | low,vhigh,4,4,med,med,acc 276 | med,vhigh,3,more,small,low,unacc 277 | vhigh,high,2,4,big,med,unacc 278 | med,low,4,2,big,med,unacc 279 | vhigh,low,2,2,small,low,unacc 280 | vhigh,med,5more,4,big,low,unacc 281 | vhigh,high,2,2,big,low,unacc 282 | high,low,2,4,small,low,unacc 283 | med,high,3,more,big,med,acc 284 | med,med,3,more,big,low,unacc 285 | med,vhigh,3,4,med,high,acc 286 | vhigh,med,5more,2,big,low,unacc 287 | high,high,4,2,big,med,unacc 288 | high,vhigh,3,more,small,low,unacc 289 | low,low,5more,4,big,low,unacc 290 | vhigh,vhigh,2,4,big,low,unacc 291 | low,vhigh,4,2,big,med,unacc 292 | high,med,5more,4,small,high,acc 293 | low,med,4,more,small,high,good 294 | high,low,2,2,small,high,unacc 295 | high,high,5more,more,med,high,acc 296 | low,med,4,2,small,med,unacc 297 | med,vhigh,5more,more,big,low,unacc 298 | vhigh,low,5more,2,med,med,unacc 299 | high,low,5more,4,med,low,unacc 300 | med,low,3,4,med,high,good 301 | vhigh,vhigh,2,more,big,low,unacc 302 | vhigh,vhigh,5more,more,small,high,unacc 303 | low,med,3,2,med,low,unacc 304 | vhigh,med,2,more,med,low,unacc 305 | vhigh,med,3,2,small,low,unacc 306 | low,low,5more,4,small,low,unacc 307 | high,vhigh,5more,more,med,high,unacc 308 | vhigh,med,2,more,med,high,acc 309 | low,vhigh,2,4,med,low,unacc 310 | low,vhigh,4,4,med,low,unacc 311 | med,med,5more,more,big,low,unacc 312 | vhigh,low,3,4,small,med,unacc 313 | med,low,4,2,small,low,unacc 314 | low,low,3,4,big,high,vgood 315 | low,high,5more,more,small,med,acc 316 | vhigh,low,3,2,small,high,unacc 317 | vhigh,high,4,2,big,high,unacc 318 | med,low,2,more,big,low,unacc 319 | low,med,4,more,med,high,vgood 320 | med,vhigh,4,2,small,med,unacc 321 | high,med,3,more,big,low,unacc 322 | vhigh,vhigh,3,4,big,high,unacc 323 | med,vhigh,4,more,big,high,acc 324 | low,vhigh,2,2,big,low,unacc 325 | high,med,3,4,med,low,unacc 326 | low,vhigh,3,2,big,low,unacc 327 | low,vhigh,2,more,med,high,acc 328 | vhigh,med,5more,more,med,med,acc 329 | low,low,4,4,small,low,unacc 330 | med,med,2,4,big,high,vgood 331 | vhigh,vhigh,5more,4,med,med,unacc 332 | med,high,3,2,small,med,unacc 333 | high,high,2,4,small,low,unacc 334 | low,high,2,4,small,med,acc 335 | vhigh,low,3,more,med,low,unacc 336 | med,med,4,2,big,high,unacc 337 | med,high,5more,4,med,low,unacc 338 | vhigh,low,4,2,big,high,unacc 339 | med,vhigh,5more,more,small,high,acc 340 | med,vhigh,5more,more,small,med,unacc 341 | high,med,5more,2,small,high,unacc 342 | high,med,2,2,small,med,unacc 343 | low,high,5more,more,small,high,acc 344 | vhigh,high,4,4,med,low,unacc 345 | vhigh,med,3,2,med,high,unacc 346 | -------------------------------------------------------------------------------- /dt_example1/decisiontree.cpp: -------------------------------------------------------------------------------- 1 | // Example : decision tree learning 2 | // usage: prog training_data_file testing_data_file 3 | 4 | // For use with test / training datasets : dt_example1 5 | 6 | // Author : Toby Breckon, toby.breckon@cranfield.ac.uk 7 | 8 | // Copyright (c) 2010 School of Engineering, Cranfield University 9 | // License : LGPL - http://www.gnu.org/licenses/lgpl.html 10 | 11 | #include // opencv general include file 12 | #include // opencv machine learning include file 13 | 14 | using namespace cv; // OpenCV API is in the C++ "cv" namespace 15 | 16 | #include 17 | 18 | /******************************************************************************/ 19 | // global definitions (for speed and ease of use) 20 | 21 | #define NUMBER_OF_TRAINING_SAMPLES 1383 22 | #define ATTRIBUTES_PER_SAMPLE 6 // not the last as this is the class 23 | #define NUMBER_OF_TESTING_SAMPLES 345 24 | 25 | #define NUMBER_OF_CLASSES 4 // classes 0->3 26 | static char* CLASSES[NUMBER_OF_CLASSES] = 27 | {(char *) "unacc", (char *) "acc", (char *) "good", (char *) "vgood"}; 28 | 29 | /******************************************************************************/ 30 | 31 | // a basic hash function from: http://www.cse.yorku.ca/~oz/hash.html 32 | 33 | int hash(char *str) 34 | { 35 | int hash = 5381; 36 | int c; 37 | 38 | while ((c = (*str++))) 39 | { 40 | hash = ((hash << 5) + hash) + c; 41 | } 42 | 43 | return hash; 44 | } 45 | 46 | /******************************************************************************/ 47 | 48 | // loads the sample database from file (which is a CSV text file) 49 | 50 | int read_data_from_csv(const char* filename, Mat data, Mat classes, 51 | int n_samples ) 52 | { 53 | char tmp_buf[10]; 54 | int i = 0; 55 | char c; 56 | 57 | // if we can't read the input file then return 0 58 | FILE* f = fopen( filename, "r" ); 59 | if( !f ) 60 | { 61 | printf("ERROR: cannot read file %s\n", filename); 62 | return 0; // all not OK 63 | } 64 | 65 | // for each sample in the file 66 | 67 | for(int line = 0; line < n_samples; line++) 68 | { 69 | 70 | // for each attribute on the line in the file 71 | 72 | for(int attribute = 0; attribute < (ATTRIBUTES_PER_SAMPLE + 1); attribute++) 73 | { 74 | // last attribute is the class 75 | 76 | if (attribute == 6) 77 | { 78 | c = '\0'; 79 | for(i=0; c != '\n'; i++) 80 | { 81 | c = fgetc(f); 82 | tmp_buf[i] = c; 83 | } 84 | tmp_buf[i - 1] = '\0'; 85 | //printf("%s\n", tmp_buf); 86 | 87 | // find the class number and record this 88 | 89 | for (int i = 0; i < NUMBER_OF_CLASSES; i++) 90 | { 91 | if (strcmp(CLASSES[i], tmp_buf) == 0) 92 | { 93 | classes.at(line, 0) = (float) i; 94 | } 95 | } 96 | } 97 | else 98 | { 99 | 100 | // for all other attributes just read in the string value 101 | // and use a hash function to convert to to a float 102 | // (N.B. openCV uses a floating point decision tree implementation!) 103 | 104 | c = '\0'; 105 | for(i=0; c != ','; i++) 106 | { 107 | c = fgetc(f); 108 | tmp_buf[i] = c; 109 | } 110 | tmp_buf[i - 1] = '\0'; 111 | data.at(line, attribute) = (float) hash(tmp_buf); 112 | 113 | //printf("%s,", tmp_buf); 114 | } 115 | } 116 | } 117 | 118 | fclose(f); 119 | 120 | return 1; // all OK 121 | } 122 | 123 | /******************************************************************************/ 124 | 125 | int main( int argc, char** argv ) 126 | { 127 | // lets just check the version first 128 | 129 | printf ("OpenCV version %s (%d.%d.%d)\n", 130 | CV_VERSION, 131 | CV_MAJOR_VERSION, CV_MINOR_VERSION, CV_SUBMINOR_VERSION); 132 | 133 | // define training data storage matrices (one for attribute examples, one 134 | // for classifications) 135 | 136 | Mat training_data = Mat(NUMBER_OF_TRAINING_SAMPLES, ATTRIBUTES_PER_SAMPLE, CV_32FC1); 137 | Mat training_classifications = Mat(NUMBER_OF_TRAINING_SAMPLES, 1, CV_32FC1); 138 | 139 | //define testing data storage matrices 140 | 141 | Mat testing_data = Mat(NUMBER_OF_TESTING_SAMPLES, ATTRIBUTES_PER_SAMPLE, CV_32FC1); 142 | Mat testing_classifications = Mat(NUMBER_OF_TESTING_SAMPLES, 1, CV_32FC1); 143 | 144 | // define all the attributes as categorical (i.e. categories) 145 | // alternatives are CV_VAR_CATEGORICAL or CV_VAR_ORDERED(=CV_VAR_NUMERICAL) 146 | // that can be assigned on a per attribute basis 147 | 148 | // this is a classification problem (i.e. predict a discrete number of class 149 | // outputs) so also the last (+1) output var_type element to CV_VAR_CATEGORICAL 150 | 151 | Mat var_type = Mat(ATTRIBUTES_PER_SAMPLE + 1, 1, CV_8U ); 152 | var_type = Scalar(CV_VAR_CATEGORICAL); // all inputs are categorical 153 | 154 | CvDTreeNode* resultNode; // node returned from a prediction 155 | 156 | // load training and testing data sets 157 | 158 | if (read_data_from_csv(argv[1], training_data, training_classifications, NUMBER_OF_TRAINING_SAMPLES) && 159 | read_data_from_csv(argv[2], testing_data, testing_classifications, NUMBER_OF_TESTING_SAMPLES)) 160 | { 161 | // define the parameters for training the decision tree 162 | 163 | float priors[] = { 1, 1, 1, 1 }; // weights of each classification for classes 164 | //float priors[] = { 70, 22, 4, 4 }; // weights of each classification for classes 165 | 166 | CvDTreeParams params = CvDTreeParams(25, // max depth 167 | 10, // min sample count 168 | 0, // regression accuracy: N/A here 169 | false, // compute surrogate split, no missing data 170 | 25, // max number of categories (use sub-optimal algorithm for larger numbers) 171 | 10, // the number of cross-validation folds 172 | true, // use 1SE rule => smaller tree 173 | false, // throw away the pruned tree branches 174 | priors // the array of priors, the bigger weight, the more attention 175 | // to the maligant cases 176 | // (i.e. a case will be judjed to be maligant with bigger chance) 177 | ); 178 | 179 | 180 | // train decision tree classifier (using training data) 181 | 182 | printf( "\nUsing training database: %s\n\n", argv[1]); 183 | CvDTree* dtree = new CvDTree; 184 | 185 | dtree->train(training_data, CV_ROW_SAMPLE, training_classifications, 186 | Mat(), Mat(), var_type, Mat(), params); 187 | 188 | // perform classifier testing and report results 189 | 190 | Mat test_sample; 191 | int correct_class = 0; 192 | int wrong_class = 0; 193 | int false_positives [NUMBER_OF_CLASSES] = {0,0,0,0}; 194 | 195 | printf( "\nUsing testing database: %s\n\n", argv[2]); 196 | 197 | for (int tsample = 0; tsample < NUMBER_OF_TESTING_SAMPLES; tsample++) 198 | { 199 | 200 | // extract a row from the testing matrix 201 | 202 | test_sample = testing_data.row(tsample); 203 | 204 | // run decision tree prediction 205 | 206 | resultNode = dtree->predict(test_sample, Mat(), false); 207 | 208 | printf("Testing Sample %i -> class result %s\n", tsample, CLASSES[(int) (resultNode->value)]); 209 | 210 | // if the prediction and the (true) testing classification are the same 211 | // (N.B. openCV uses a floating point decision tree implementation!) 212 | 213 | if (fabs(resultNode->value - testing_classifications.at(tsample, 0)) 214 | >= FLT_EPSILON) 215 | { 216 | // if they differ more than floating point error => wrong class 217 | 218 | wrong_class++; 219 | 220 | false_positives[(int) resultNode->value]++; 221 | 222 | } 223 | else 224 | { 225 | 226 | // otherwise correct 227 | 228 | correct_class++; 229 | } 230 | } 231 | 232 | printf( "\nResults on the testing database: %s\n" 233 | "\tCorrect classification: %d (%g%%)\n" 234 | "\tWrong classifications: %d (%g%%)\n", 235 | argv[2], 236 | correct_class, (double) correct_class*100/NUMBER_OF_TESTING_SAMPLES, 237 | wrong_class, (double) wrong_class*100/NUMBER_OF_TESTING_SAMPLES); 238 | 239 | for (int i = 0; i < NUMBER_OF_CLASSES; i++) 240 | { 241 | printf( "\tClass %s false postives %d (%g%%)\n", CLASSES[i], 242 | false_positives[i], 243 | (double) false_positives[i]*100/NUMBER_OF_TESTING_SAMPLES); 244 | } 245 | 246 | // all matrix memory free by destructors 247 | 248 | // all OK : main returns 0 249 | 250 | return 0; 251 | } 252 | 253 | // not OK : main returns -1 254 | 255 | printf("usage: %s training_data_file testing_data_file\n", argv[0]); 256 | return -1; 257 | } 258 | /******************************************************************************/ 259 | -------------------------------------------------------------------------------- /dt_example2/decisiontree.cpp: -------------------------------------------------------------------------------- 1 | // Example : decision tree learning 2 | // usage: prog training_data_file testing_data_file 3 | 4 | // For use with test / training datasets : dt_example2 5 | 6 | // Author : Toby Breckon, toby.breckon@cranfield.ac.uk 7 | 8 | // Copyright (c) 2010 School of Engineering, Cranfield University 9 | // License : LGPL - http://www.gnu.org/licenses/lgpl.html 10 | 11 | #include // opencv general include file 12 | #include // opencv machine learning include file 13 | 14 | using namespace cv; // OpenCV API is in the C++ "cv" namespace 15 | 16 | #include 17 | 18 | /******************************************************************************/ 19 | // global definitions (for speed and ease of use) 20 | 21 | #define NUMBER_OF_TRAINING_SAMPLES 449 22 | #define ATTRIBUTES_PER_SAMPLE 30 // not the first two as patient ID and class 23 | #define NUMBER_OF_TESTING_SAMPLES 120 24 | 25 | static char CLASSES[2] = {'B', 'M'}; // class B = 0, class M = 1 26 | 27 | /******************************************************************************/ 28 | 29 | // loads the sample database from file (which is a CSV text file) 30 | 31 | int read_data_from_csv(const char* filename, Mat data, Mat classes, 32 | int n_samples ) 33 | { 34 | char tmpc; 35 | float tmpf; 36 | 37 | // if we can't read the input file then return 0 38 | FILE* f = fopen( filename, "r" ); 39 | if( !f ) 40 | { 41 | printf("ERROR: cannot read file %s\n", filename); 42 | return 0; // all not OK 43 | } 44 | 45 | // for each sample in the file 46 | 47 | for(int line = 0; line < n_samples; line++) 48 | { 49 | 50 | // for each attribute on the line in the file 51 | 52 | for(int attribute = 0; attribute < (ATTRIBUTES_PER_SAMPLE + 2); attribute++) 53 | { 54 | if (attribute == 0) 55 | { 56 | fscanf(f, "%f,", &tmpf); 57 | 58 | // ignore attribute 0 (as it's the patient ID) 59 | 60 | continue; 61 | } 62 | else if (attribute == 1) 63 | { 64 | 65 | // attribute 2 (in the database) is the classification 66 | // record 1 = M = malignant 67 | // record 0 = B = benign 68 | 69 | fscanf(f, "%c,", &tmpc); 70 | 71 | switch(tmpc) 72 | { 73 | case 'M': 74 | classes.at(line, 0) = 1.0; 75 | break; 76 | case 'B': 77 | classes.at(line, 0) = 0.0; 78 | break; 79 | default: 80 | printf("ERROR: unexpected class in file %s\n", filename); 81 | return 0; // all not OK 82 | } 83 | 84 | // printf("%c,", tmpc); 85 | } 86 | else 87 | { 88 | fscanf(f, "%f,", &tmpf); 89 | data.at(line, (attribute - 2)) = (float) tmpf; 90 | // printf("%f,", data.at(line, (attribute - 2))); 91 | } 92 | } 93 | fscanf(f, "\n"); 94 | // printf("\n"); 95 | } 96 | 97 | fclose(f); 98 | 99 | return 1; // all OK 100 | } 101 | 102 | /******************************************************************************/ 103 | 104 | int main( int argc, char** argv ) 105 | { 106 | // lets just check the version first 107 | 108 | printf ("OpenCV version %s (%d.%d.%d)\n", 109 | CV_VERSION, 110 | CV_MAJOR_VERSION, CV_MINOR_VERSION, CV_SUBMINOR_VERSION); 111 | 112 | // define training data storage matrices (one for attribute examples, one 113 | // for classifications) 114 | 115 | Mat training_data = 116 | Mat(NUMBER_OF_TRAINING_SAMPLES, ATTRIBUTES_PER_SAMPLE, CV_32FC1); 117 | Mat training_classifications = Mat(NUMBER_OF_TRAINING_SAMPLES, 1, CV_32FC1); 118 | 119 | //define testing data storage matrices 120 | 121 | Mat testing_data = 122 | Mat(NUMBER_OF_TESTING_SAMPLES, ATTRIBUTES_PER_SAMPLE, CV_32FC1); 123 | Mat testing_classifications = 124 | Mat(NUMBER_OF_TESTING_SAMPLES, 1, CV_32FC1); 125 | 126 | // define all the attributes as numerical 127 | // alternatives are CV_VAR_CATEGORICAL or CV_VAR_ORDERED(=CV_VAR_NUMERICAL) 128 | // that can be assigned on a per attribute basis 129 | 130 | Mat var_type = Mat(ATTRIBUTES_PER_SAMPLE + 1, 1, CV_8U ); 131 | var_type = Scalar(CV_VAR_NUMERICAL); // all inputs are numerical 132 | 133 | // this is a classification problem (i.e. predict a discrete number of class 134 | // outputs) so reset the last (+1) output var_type element to CV_VAR_CATEGORICAL 135 | 136 | var_type.at(ATTRIBUTES_PER_SAMPLE, 0) = CV_VAR_CATEGORICAL; 137 | 138 | CvDTreeNode* resultNode; // node returned from a prediction 139 | 140 | // load training and testing data sets 141 | 142 | if (read_data_from_csv(argv[1], training_data, training_classifications, NUMBER_OF_TRAINING_SAMPLES) && 143 | read_data_from_csv(argv[2], testing_data, testing_classifications, NUMBER_OF_TESTING_SAMPLES)) 144 | { 145 | // define the parameters for training the decision tree 146 | 147 | float priors[] = { 1, 1 }; // weights of each classification for classes 148 | // 0 = B = benign, 1 = M = malignant 149 | 150 | CvDTreeParams params = CvDTreeParams(8, // max depth 151 | 5, // min sample count 152 | 0, // regression accuracy: N/A here 153 | false, // compute surrogate split, no missing data 154 | 15, // max number of categories (use sub-optimal algorithm for larger numbers) 155 | 10, // the number of cross-validation folds 156 | true, // use 1SE rule => smaller tree 157 | false, // throw away the pruned tree branches 158 | priors // the array of priors, the bigger weight, the more attention 159 | // to the maligant cases 160 | // (i.e. a case will be judjed to be maligant with bigger chance) 161 | ); 162 | 163 | 164 | // train decision tree classifier (using training data) 165 | 166 | printf( "\nUsing training database: %s\n\n", argv[1]); 167 | CvDTree* dtree = new CvDTree; 168 | 169 | dtree->train(training_data, CV_ROW_SAMPLE, 170 | training_classifications, 171 | Mat(), Mat(), var_type, Mat(), params); 172 | 173 | // perform classifier testing and report results 174 | 175 | Mat test_sample; 176 | int correct_class = 0; 177 | int wrong_class = 0; 178 | int m_class_fp = 0; 179 | int b_class_fp = 0; 180 | 181 | printf( "\nUsing testing database: %s\n\n", argv[2]); 182 | 183 | for (int tsample = 0; tsample < NUMBER_OF_TESTING_SAMPLES; tsample++) 184 | { 185 | 186 | // extract a row from the testing matrix 187 | 188 | test_sample = testing_data.row(tsample); 189 | 190 | // run decision tree prediction 191 | 192 | resultNode = dtree->predict(test_sample, Mat(), false); 193 | 194 | printf("Testing Sample %i -> class result %c\n", tsample, CLASSES[(int) (resultNode->value)]); 195 | 196 | // if the prediction and the (true) testing classification are the same 197 | // (N.B. openCV uses a floating point decision tree implementation!) 198 | 199 | if (fabs(resultNode->value - testing_classifications.at(tsample, 0)) 200 | >= FLT_EPSILON) 201 | { 202 | // if they differ more than floating point error => wrong class 203 | 204 | wrong_class++; 205 | 206 | // if the result class is different from 1.0 (M class label) by 207 | // more than floating point error => B class false +ve 208 | 209 | if (fabs(resultNode->value - 1.0) >= FLT_EPSILON) 210 | { 211 | b_class_fp++; 212 | } 213 | else 214 | { 215 | 216 | // otherwise it's an 217 | 218 | m_class_fp++; 219 | } 220 | 221 | } 222 | else 223 | { 224 | 225 | // otherwise correct 226 | 227 | correct_class++; 228 | } 229 | } 230 | 231 | printf( "\nResults on the testing database: %s\n" 232 | "\tCorrect classification: %d (%g%%)\n" 233 | "\tWrong classifications: %d (%g%%)\n" 234 | "\tM false +ve classifications: %d (%g%%)\n" 235 | "\tB false +ve classifications: %d (%g%%)\n", 236 | argv[2], 237 | correct_class, (double) correct_class*100/NUMBER_OF_TESTING_SAMPLES, 238 | wrong_class, (double) wrong_class*100/NUMBER_OF_TESTING_SAMPLES, 239 | m_class_fp, (double) m_class_fp*100/NUMBER_OF_TESTING_SAMPLES, 240 | b_class_fp, (double) b_class_fp*100/NUMBER_OF_TESTING_SAMPLES ); 241 | 242 | // all matrix memory free by destructors 243 | 244 | 245 | // all OK : main returns 0 246 | 247 | return 0; 248 | } 249 | 250 | // not OK : main returns -1 251 | 252 | return -1; 253 | } 254 | /******************************************************************************/ 255 | -------------------------------------------------------------------------------- /dt_example2/wdbc.names: -------------------------------------------------------------------------------- 1 | 1. Title: Wisconsin Diagnostic Breast Cancer (WDBC) 2 | 3 | 2. Source Information 4 | 5 | a) Creators: 6 | 7 | Dr. William H. Wolberg, General Surgery Dept., University of 8 | Wisconsin, Clinical Sciences Center, Madison, WI 53792 9 | wolberg@eagle.surgery.wisc.edu 10 | 11 | W. Nick Street, Computer Sciences Dept., University of 12 | Wisconsin, 1210 West Dayton St., Madison, WI 53706 13 | street@cs.wisc.edu 608-262-6619 14 | 15 | Olvi L. Mangasarian, Computer Sciences Dept., University of 16 | Wisconsin, 1210 West Dayton St., Madison, WI 53706 17 | olvi@cs.wisc.edu 18 | 19 | b) Donor: Nick Street 20 | 21 | c) Date: November 1995 22 | 23 | 3. Past Usage: 24 | 25 | first usage: 26 | 27 | W.N. Street, W.H. Wolberg and O.L. Mangasarian 28 | Nuclear feature extraction for breast tumor diagnosis. 29 | IS&T/SPIE 1993 International Symposium on Electronic Imaging: Science 30 | and Technology, volume 1905, pages 861-870, San Jose, CA, 1993. 31 | 32 | OR literature: 33 | 34 | O.L. Mangasarian, W.N. Street and W.H. Wolberg. 35 | Breast cancer diagnosis and prognosis via linear programming. 36 | Operations Research, 43(4), pages 570-577, July-August 1995. 37 | 38 | Medical literature: 39 | 40 | W.H. Wolberg, W.N. Street, and O.L. Mangasarian. 41 | Machine learning techniques to diagnose breast cancer from 42 | fine-needle aspirates. 43 | Cancer Letters 77 (1994) 163-171. 44 | 45 | W.H. Wolberg, W.N. Street, and O.L. Mangasarian. 46 | Image analysis and machine learning applied to breast cancer 47 | diagnosis and prognosis. 48 | Analytical and Quantitative Cytology and Histology, Vol. 17 49 | No. 2, pages 77-87, April 1995. 50 | 51 | W.H. Wolberg, W.N. Street, D.M. Heisey, and O.L. Mangasarian. 52 | Computerized breast cancer diagnosis and prognosis from fine 53 | needle aspirates. 54 | Archives of Surgery 1995;130:511-516. 55 | 56 | W.H. Wolberg, W.N. Street, D.M. Heisey, and O.L. Mangasarian. 57 | Computer-derived nuclear features distinguish malignant from 58 | benign breast cytology. 59 | Human Pathology, 26:792--796, 1995. 60 | 61 | See also: 62 | http://www.cs.wisc.edu/~olvi/uwmp/mpml.html 63 | http://www.cs.wisc.edu/~olvi/uwmp/cancer.html 64 | 65 | Results: 66 | 67 | - predicting field 2, diagnosis: B = benign, M = malignant 68 | - sets are linearly separable using all 30 input features 69 | - best predictive accuracy obtained using one separating plane 70 | in the 3-D space of Worst Area, Worst Smoothness and 71 | Mean Texture. Estimated accuracy 97.5% using repeated 72 | 10-fold crossvalidations. Classifier has correctly 73 | diagnosed 176 consecutive new patients as of November 74 | 1995. 75 | 76 | 4. Relevant information 77 | 78 | Features are computed from a digitized image of a fine needle 79 | aspirate (FNA) of a breast mass. They describe 80 | characteristics of the cell nuclei present in the image. 81 | A few of the images can be found at 82 | http://www.cs.wisc.edu/~street/images/ 83 | 84 | Separating plane described above was obtained using 85 | Multisurface Method-Tree (MSM-T) [K. P. Bennett, "Decision Tree 86 | Construction Via Linear Programming." Proceedings of the 4th 87 | Midwest Artificial Intelligence and Cognitive Science Society, 88 | pp. 97-101, 1992], a classification method which uses linear 89 | programming to construct a decision tree. Relevant features 90 | were selected using an exhaustive search in the space of 1-4 91 | features and 1-3 separating planes. 92 | 93 | The actual linear program used to obtain the separating plane 94 | in the 3-dimensional space is that described in: 95 | [K. P. Bennett and O. L. Mangasarian: "Robust Linear 96 | Programming Discrimination of Two Linearly Inseparable Sets", 97 | Optimization Methods and Software 1, 1992, 23-34]. 98 | 99 | 100 | This database is also available through the UW CS ftp server: 101 | 102 | ftp ftp.cs.wisc.edu 103 | cd math-prog/cpo-dataset/machine-learn/WDBC/ 104 | 105 | 5. Number of instances: 569 106 | 107 | 6. Number of attributes: 32 (ID, diagnosis, 30 real-valued input features) 108 | 109 | 7. Attribute information 110 | 111 | 1) ID number 112 | 2) Diagnosis (M = malignant, B = benign) 113 | 3-32) 114 | 115 | Ten real-valued features are computed for each cell nucleus: 116 | 117 | a) radius (mean of distances from center to points on the perimeter) 118 | b) texture (standard deviation of gray-scale values) 119 | c) perimeter 120 | d) area 121 | e) smoothness (local variation in radius lengths) 122 | f) compactness (perimeter^2 / area - 1.0) 123 | g) concavity (severity of concave portions of the contour) 124 | h) concave points (number of concave portions of the contour) 125 | i) symmetry 126 | j) fractal dimension ("coastline approximation" - 1) 127 | 128 | Several of the papers listed above contain detailed descriptions of 129 | how these features are computed. 130 | 131 | The mean, standard error, and "worst" or largest (mean of the three 132 | largest values) of these features were computed for each image, 133 | resulting in 30 features. For instance, field 3 is Mean Radius, field 134 | 13 is Radius SE, field 23 is Worst Radius. 135 | 136 | All feature values are recoded with four significant digits. 137 | 138 | 8. Missing attribute values: none 139 | 140 | 9. Class distribution: 357 benign, 212 malignant -------------------------------------------------------------------------------- /dt_example2/wdbc.test: -------------------------------------------------------------------------------- 1 | 86517,M,18.66,17.12,121.4,1077,0.1054,0.11,0.1457,0.08665,0.1966,0.06213,0.7128,1.581,4.895,90.47,0.008102,0.02101,0.03342,0.01601,0.02045,0.00457,22.25,24.9,145.4,1549,0.1503,0.2291,0.3272,0.1674,0.2894,0.08456 2 | 84501001,M,12.46,24.04,83.97,475.9,0.1186,0.2396,0.2273,0.08543,0.203,0.08243,0.2976,1.599,2.039,23.94,0.007149,0.07217,0.07743,0.01432,0.01789,0.01008,15.09,40.68,97.65,711.4,0.1853,1.058,1.105,0.221,0.4366,0.2075 3 | 889403,M,15.61,19.38,100,758.6,0.0784,0.05616,0.04209,0.02847,0.1547,0.05443,0.2298,0.9988,1.534,22.18,0.002826,0.009105,0.01311,0.005174,0.01013,0.001345,17.91,31.67,115.9,988.6,0.1084,0.1807,0.226,0.08568,0.2683,0.06829 4 | 8911163,M,17.93,24.48,115.2,998.9,0.08855,0.07027,0.05699,0.04744,0.1538,0.0551,0.4212,1.433,2.765,45.81,0.005444,0.01169,0.01622,0.008522,0.01419,0.002751,20.92,34.69,135.1,1320,0.1315,0.1806,0.208,0.1136,0.2504,0.07948 5 | 90944601,B,13.78,15.79,88.37,585.9,0.08817,0.06718,0.01055,0.009937,0.1405,0.05848,0.3563,0.4833,2.235,29.34,0.006432,0.01156,0.007741,0.005657,0.01227,0.002564,15.27,17.5,97.9,706.6,0.1072,0.1071,0.03517,0.03312,0.1859,0.0681 6 | 892604,B,12.46,19.89,80.43,471.3,0.08451,0.1014,0.0683,0.03099,0.1781,0.06249,0.3642,1.04,2.579,28.32,0.00653,0.03369,0.04712,0.01403,0.0274,0.004651,13.46,23.07,88.13,551.3,0.105,0.2158,0.1904,0.07625,0.2685,0.07764 7 | 898690,B,11.47,16.03,73.02,402.7,0.09076,0.05886,0.02587,0.02322,0.1634,0.06372,0.1707,0.7615,1.09,12.25,0.009191,0.008548,0.0094,0.006315,0.01755,0.003009,12.51,20.79,79.67,475.8,0.1531,0.112,0.09823,0.06548,0.2851,0.08763 8 | 899147,B,11.95,14.96,77.23,426.7,0.1158,0.1206,0.01171,0.01787,0.2459,0.06581,0.361,1.05,2.455,26.65,0.0058,0.02417,0.007816,0.01052,0.02734,0.003114,12.81,17.72,83.09,496.2,0.1293,0.1885,0.03122,0.04766,0.3124,0.0759 9 | 895633,M,16.26,21.88,107.5,826.8,0.1165,0.1283,0.1799,0.07981,0.1869,0.06532,0.5706,1.457,2.961,57.72,0.01056,0.03756,0.05839,0.01186,0.04022,0.006187,17.73,25.21,113.7,975.2,0.1426,0.2116,0.3344,0.1047,0.2736,0.07953 10 | 926954,M,16.6,28.08,108.3,858.1,0.08455,0.1023,0.09251,0.05302,0.159,0.05648,0.4564,1.075,3.425,48.55,0.005903,0.03731,0.0473,0.01557,0.01318,0.003892,18.98,34.12,126.7,1124,0.1139,0.3094,0.3403,0.1418,0.2218,0.0782 11 | 9113239,B,13.24,20.13,86.87,542.9,0.08284,0.1223,0.101,0.02833,0.1601,0.06432,0.281,0.8135,3.369,23.81,0.004929,0.06657,0.07683,0.01368,0.01526,0.008133,15.44,25.5,115,733.5,0.1201,0.5646,0.6556,0.1357,0.2845,0.1249 12 | 894326,M,18.22,18.87,118.7,1027,0.09746,0.1117,0.113,0.0795,0.1807,0.05664,0.4041,0.5503,2.547,48.9,0.004821,0.01659,0.02408,0.01143,0.01275,0.002451,21.84,25,140.9,1485,0.1434,0.2763,0.3853,0.1776,0.2812,0.08198 13 | 894329,B,9.042,18.9,60.07,244.5,0.09968,0.1972,0.1975,0.04908,0.233,0.08743,0.4653,1.911,3.769,24.2,0.009845,0.0659,0.1027,0.02527,0.03491,0.007877,10.06,23.4,68.62,297.1,0.1221,0.3748,0.4609,0.1145,0.3135,0.1055 14 | 896864,B,12.98,19.35,84.52,514,0.09579,0.1125,0.07107,0.0295,0.1761,0.0654,0.2684,0.5664,2.465,20.65,0.005727,0.03255,0.04393,0.009811,0.02751,0.004572,14.42,21.95,99.21,634.3,0.1288,0.3253,0.3439,0.09858,0.3596,0.09166 15 | 9012000,M,22.01,21.9,147.2,1482,0.1063,0.1954,0.2448,0.1501,0.1824,0.0614,1.008,0.6999,7.561,130.2,0.003978,0.02821,0.03576,0.01471,0.01518,0.003796,27.66,25.8,195,2227,0.1294,0.3885,0.4756,0.2432,0.2741,0.08574 16 | 884948,M,20.94,23.56,138.9,1364,0.1007,0.1606,0.2712,0.131,0.2205,0.05898,1.004,0.8208,6.372,137.9,0.005283,0.03908,0.09518,0.01864,0.02401,0.005002,25.58,27,165.3,2010,0.1211,0.3172,0.6991,0.2105,0.3126,0.07849 17 | 91550,B,11.74,14.69,76.31,426,0.08099,0.09661,0.06726,0.02639,0.1499,0.06758,0.1924,0.6417,1.345,13.04,0.006982,0.03916,0.04017,0.01528,0.0226,0.006822,12.45,17.6,81.25,473.8,0.1073,0.2793,0.269,0.1056,0.2604,0.09879 18 | 88466802,B,10.65,25.22,68.01,347,0.09657,0.07234,0.02379,0.01615,0.1897,0.06329,0.2497,1.493,1.497,16.64,0.007189,0.01035,0.01081,0.006245,0.02158,0.002619,12.25,35.19,77.98,455.7,0.1499,0.1398,0.1125,0.06136,0.3409,0.08147 19 | 89346,B,9,14.4,56.36,246.3,0.07005,0.03116,0.003681,0.003472,0.1788,0.06833,0.1746,1.305,1.144,9.789,0.007389,0.004883,0.003681,0.003472,0.02701,0.002153,9.699,20.07,60.9,285.5,0.09861,0.05232,0.01472,0.01389,0.2991,0.07804 20 | 86561,B,13.85,17.21,88.44,588.7,0.08785,0.06136,0.0142,0.01141,0.1614,0.0589,0.2185,0.8561,1.495,17.91,0.004599,0.009169,0.009127,0.004814,0.01247,0.001708,15.49,23.58,100.3,725.9,0.1157,0.135,0.08115,0.05104,0.2364,0.07182 21 | 9111805,M,19.59,25,127.7,1191,0.1032,0.09871,0.1655,0.09063,0.1663,0.05391,0.4674,1.375,2.916,56.18,0.0119,0.01929,0.04907,0.01499,0.01641,0.001807,21.44,30.96,139.8,1421,0.1528,0.1845,0.3977,0.1466,0.2293,0.06091 22 | 893526,B,13.5,12.71,85.69,566.2,0.07376,0.03614,0.002758,0.004419,0.1365,0.05335,0.2244,0.6864,1.509,20.39,0.003338,0.003746,0.00203,0.003242,0.0148,0.001566,14.97,16.94,95.48,698.7,0.09023,0.05836,0.01379,0.0221,0.2267,0.06192 23 | 90602302,M,15.5,21.08,102.9,803.1,0.112,0.1571,0.1522,0.08481,0.2085,0.06864,1.37,1.213,9.424,176.5,0.008198,0.03889,0.04493,0.02139,0.02018,0.005815,23.17,27.65,157.1,1748,0.1517,0.4002,0.4211,0.2134,0.3003,0.1048 24 | 89813,B,14.42,16.54,94.15,641.2,0.09751,0.1139,0.08007,0.04223,0.1912,0.06412,0.3491,0.7706,2.677,32.14,0.004577,0.03053,0.0384,0.01243,0.01873,0.003373,16.67,21.51,111.4,862.1,0.1294,0.3371,0.3755,0.1414,0.3053,0.08764 25 | 917080,B,12.75,16.7,82.51,493.8,0.1125,0.1117,0.0388,0.02995,0.212,0.06623,0.3834,1.003,2.495,28.62,0.007509,0.01561,0.01977,0.009199,0.01805,0.003629,14.45,21.74,93.63,624.1,0.1475,0.1979,0.1423,0.08045,0.3071,0.08557 26 | 902976,B,13.88,16.16,88.37,596.6,0.07026,0.04831,0.02045,0.008507,0.1607,0.05474,0.2541,0.6218,1.709,23.12,0.003728,0.01415,0.01988,0.007016,0.01647,0.00197,15.51,19.97,99.66,745.3,0.08484,0.1233,0.1091,0.04537,0.2542,0.06623 27 | 911366,B,11.62,18.18,76.38,408.8,0.1175,0.1483,0.102,0.05564,0.1957,0.07255,0.4101,1.74,3.027,27.85,0.01459,0.03206,0.04961,0.01841,0.01807,0.005217,13.36,25.4,88.14,528.1,0.178,0.2878,0.3186,0.1416,0.266,0.0927 28 | 8953902,M,16.27,20.71,106.9,813.7,0.1169,0.1319,0.1478,0.08488,0.1948,0.06277,0.4375,1.232,3.27,44.41,0.006697,0.02083,0.03248,0.01392,0.01536,0.002789,19.28,30.38,129.8,1121,0.159,0.2947,0.3597,0.1583,0.3103,0.082 29 | 88199202,B,11.32,27.08,71.76,395.7,0.06883,0.03813,0.01633,0.003125,0.1869,0.05628,0.121,0.8927,1.059,8.605,0.003653,0.01647,0.01633,0.003125,0.01537,0.002052,12.08,33.75,79.82,452.3,0.09203,0.1432,0.1089,0.02083,0.2849,0.07087 30 | 859575,M,18.94,21.31,123.6,1130,0.09009,0.1029,0.108,0.07951,0.1582,0.05461,0.7888,0.7975,5.486,96.05,0.004444,0.01652,0.02269,0.0137,0.01386,0.001698,24.86,26.58,165.9,1866,0.1193,0.2336,0.2687,0.1789,0.2551,0.06589 31 | 898431,M,19.68,21.68,129.9,1194,0.09797,0.1339,0.1863,0.1103,0.2082,0.05715,0.6226,2.284,5.173,67.66,0.004756,0.03368,0.04345,0.01806,0.03756,0.003288,22.75,34.66,157.6,1540,0.1218,0.3458,0.4734,0.2255,0.4045,0.07918 32 | 913102,B,14.64,16.85,94.21,666,0.08641,0.06698,0.05192,0.02791,0.1409,0.05355,0.2204,1.006,1.471,19.98,0.003535,0.01393,0.018,0.006144,0.01254,0.001219,16.46,25.44,106,831,0.1142,0.207,0.2437,0.07828,0.2455,0.06596 33 | 853612,M,11.84,18.7,77.93,440.6,0.1109,0.1516,0.1218,0.05182,0.2301,0.07799,0.4825,1.03,3.475,41,0.005551,0.03414,0.04205,0.01044,0.02273,0.005667,16.82,28.12,119.4,888.7,0.1637,0.5775,0.6956,0.1546,0.4761,0.1402 34 | 927241,M,20.6,29.33,140.1,1265,0.1178,0.277,0.3514,0.152,0.2397,0.07016,0.726,1.595,5.772,86.22,0.006522,0.06158,0.07117,0.01664,0.02324,0.006185,25.74,39.42,184.6,1821,0.165,0.8681,0.9387,0.265,0.4087,0.124 35 | 9010018,M,15.08,25.74,98,716.6,0.1024,0.09769,0.1235,0.06553,0.1647,0.06464,0.6534,1.506,4.174,63.37,0.01052,0.02431,0.04912,0.01746,0.0212,0.004867,18.51,33.22,121.2,1050,0.166,0.2356,0.4029,0.1526,0.2654,0.09438 36 | 913535,M,16.69,20.2,107.1,857.6,0.07497,0.07112,0.03649,0.02307,0.1846,0.05325,0.2473,0.5679,1.775,22.95,0.002667,0.01446,0.01423,0.005297,0.01961,0.0017,19.18,26.56,127.3,1084,0.1009,0.292,0.2477,0.08737,0.4677,0.07623 37 | 925292,B,14.05,27.15,91.38,600.4,0.09929,0.1126,0.04462,0.04304,0.1537,0.06171,0.3645,1.492,2.888,29.84,0.007256,0.02678,0.02071,0.01626,0.0208,0.005304,15.3,33.17,100.2,706.7,0.1241,0.2264,0.1326,0.1048,0.225,0.08321 38 | 87164,M,15.46,11.89,102.5,736.9,0.1257,0.1555,0.2032,0.1097,0.1966,0.07069,0.4209,0.6583,2.805,44.64,0.005393,0.02321,0.04303,0.0132,0.01792,0.004168,18.79,17.04,125,1102,0.1531,0.3583,0.583,0.1827,0.3216,0.101 39 | 9111843,B,12,28.23,76.77,442.5,0.08437,0.0645,0.04055,0.01945,0.1615,0.06104,0.1912,1.705,1.516,13.86,0.007334,0.02589,0.02941,0.009166,0.01745,0.004302,13.09,37.88,85.07,523.7,0.1208,0.1856,0.1811,0.07116,0.2447,0.08194 40 | 8911800,B,13.59,17.84,86.24,572.3,0.07948,0.04052,0.01997,0.01238,0.1573,0.0552,0.258,1.166,1.683,22.22,0.003741,0.005274,0.01065,0.005044,0.01344,0.001126,15.5,26.1,98.91,739.1,0.105,0.07622,0.106,0.05185,0.2335,0.06263 41 | 9012315,M,16.35,23.29,109,840.4,0.09742,0.1497,0.1811,0.08773,0.2175,0.06218,0.4312,1.022,2.972,45.5,0.005635,0.03917,0.06072,0.01656,0.03197,0.004085,19.38,31.03,129.3,1165,0.1415,0.4665,0.7087,0.2248,0.4824,0.09614 42 | 90769601,B,11.13,16.62,70.47,381.1,0.08151,0.03834,0.01369,0.0137,0.1511,0.06148,0.1415,0.9671,0.968,9.704,0.005883,0.006263,0.009398,0.006189,0.02009,0.002377,11.68,20.29,74.35,421.1,0.103,0.06219,0.0458,0.04044,0.2383,0.07083 43 | 903483,B,8.734,16.84,55.27,234.3,0.1039,0.07428,0,0,0.1985,0.07098,0.5169,2.079,3.167,28.85,0.01582,0.01966,0,0,0.01865,0.006736,10.17,22.8,64.01,317,0.146,0.131,0,0,0.2445,0.08865 44 | 91903902,B,13.68,16.33,87.76,575.5,0.09277,0.07255,0.01752,0.0188,0.1631,0.06155,0.2047,0.4801,1.373,17.25,0.003828,0.007228,0.007078,0.005077,0.01054,0.001697,15.85,20.2,101.6,773.4,0.1264,0.1564,0.1206,0.08704,0.2806,0.07782 45 | 909220,B,14.04,15.98,89.78,611.2,0.08458,0.05895,0.03534,0.02944,0.1714,0.05898,0.3892,1.046,2.644,32.74,0.007976,0.01295,0.01608,0.009046,0.02005,0.00283,15.66,21.58,101.2,750,0.1195,0.1252,0.1117,0.07453,0.2725,0.07234 46 | 922296,B,13.21,28.06,84.88,538.4,0.08671,0.06877,0.02987,0.03275,0.1628,0.05781,0.2351,1.597,1.539,17.85,0.004973,0.01372,0.01498,0.009117,0.01724,0.001343,14.37,37.17,92.48,629.6,0.1072,0.1381,0.1062,0.07958,0.2473,0.06443 47 | 8511133,M,15.34,14.26,102.5,704.4,0.1073,0.2135,0.2077,0.09756,0.2521,0.07032,0.4388,0.7096,3.384,44.91,0.006789,0.05328,0.06446,0.02252,0.03672,0.004394,18.07,19.08,125.1,980.9,0.139,0.5954,0.6305,0.2393,0.4667,0.09946 48 | 864018,B,11.34,21.26,72.48,396.5,0.08759,0.06575,0.05133,0.01899,0.1487,0.06529,0.2344,0.9861,1.597,16.41,0.009113,0.01557,0.02443,0.006435,0.01568,0.002477,13.01,29.15,83.99,518.1,0.1699,0.2196,0.312,0.08278,0.2829,0.08832 49 | 885429,M,19.73,19.82,130.7,1206,0.1062,0.1849,0.2417,0.0974,0.1733,0.06697,0.7661,0.78,4.115,92.81,0.008482,0.05057,0.068,0.01971,0.01467,0.007259,25.28,25.59,159.8,1933,0.171,0.5955,0.8489,0.2507,0.2749,0.1297 50 | 8911230,B,11.33,14.16,71.79,396.6,0.09379,0.03872,0.001487,0.003333,0.1954,0.05821,0.2375,1.28,1.565,17.09,0.008426,0.008998,0.001487,0.003333,0.02358,0.001627,12.2,18.99,77.37,458,0.1259,0.07348,0.004955,0.01111,0.2758,0.06386 51 | 8913049,B,11.26,19.96,73.72,394.1,0.0802,0.1181,0.09274,0.05588,0.2595,0.06233,0.4866,1.905,2.877,34.68,0.01574,0.08262,0.08099,0.03487,0.03418,0.006517,11.86,22.33,78.27,437.6,0.1028,0.1843,0.1546,0.09314,0.2955,0.07009 52 | 872608,B,9.904,18.06,64.6,302.4,0.09699,0.1294,0.1307,0.03716,0.1669,0.08116,0.4311,2.261,3.132,27.48,0.01286,0.08808,0.1197,0.0246,0.0388,0.01792,11.26,24.39,73.07,390.2,0.1301,0.295,0.3486,0.0991,0.2614,0.1162 53 | 869224,B,12.9,15.92,83.74,512.2,0.08677,0.09509,0.04894,0.03088,0.1778,0.06235,0.2143,0.7712,1.689,16.64,0.005324,0.01563,0.0151,0.007584,0.02104,0.001887,14.48,21.82,97.17,643.8,0.1312,0.2548,0.209,0.1012,0.3549,0.08118 54 | 88725602,M,15.53,33.56,103.7,744.9,0.1063,0.1639,0.1751,0.08399,0.2091,0.0665,0.2419,1.278,1.903,23.02,0.005345,0.02556,0.02889,0.01022,0.009947,0.003359,18.49,49.54,126.3,1035,0.1883,0.5564,0.5703,0.2014,0.3512,0.1204 55 | 9010258,B,12.56,19.07,81.92,485.8,0.0876,0.1038,0.103,0.04391,0.1533,0.06184,0.3602,1.478,3.212,27.49,0.009853,0.04235,0.06271,0.01966,0.02639,0.004205,13.37,22.43,89.02,547.4,0.1096,0.2002,0.2388,0.09265,0.2121,0.07188 56 | 864685,B,11.93,21.53,76.53,438.6,0.09768,0.07849,0.03328,0.02008,0.1688,0.06194,0.3118,0.9227,2,24.79,0.007803,0.02507,0.01835,0.007711,0.01278,0.003856,13.67,26.15,87.54,583,0.15,0.2399,0.1503,0.07247,0.2438,0.08541 57 | 884689,B,11.52,14.93,73.87,406.3,0.1013,0.07808,0.04328,0.02929,0.1883,0.06168,0.2562,1.038,1.686,18.62,0.006662,0.01228,0.02105,0.01006,0.01677,0.002784,12.65,21.19,80.88,491.8,0.1389,0.1582,0.1804,0.09608,0.2664,0.07809 58 | 914333,B,14.87,20.21,96.12,680.9,0.09587,0.08345,0.06824,0.04951,0.1487,0.05748,0.2323,1.636,1.596,21.84,0.005415,0.01371,0.02153,0.01183,0.01959,0.001812,16.01,28.48,103.9,783.6,0.1216,0.1388,0.17,0.1017,0.2369,0.06599 59 | 893988,B,11.54,10.72,73.73,409.1,0.08597,0.05969,0.01367,0.008907,0.1833,0.061,0.1312,0.3602,1.107,9.438,0.004124,0.0134,0.01003,0.004667,0.02032,0.001952,12.34,12.87,81.23,467.8,0.1092,0.1626,0.08324,0.04715,0.339,0.07434 60 | 88299702,M,23.21,26.97,153.5,1670,0.09509,0.1682,0.195,0.1237,0.1909,0.06309,1.058,0.9635,7.247,155.8,0.006428,0.02863,0.04497,0.01716,0.0159,0.003053,31.01,34.51,206,2944,0.1481,0.4126,0.582,0.2593,0.3103,0.08677 61 | 911408,B,12.83,15.73,82.89,506.9,0.0904,0.08269,0.05835,0.03078,0.1705,0.05913,0.1499,0.4875,1.195,11.64,0.004873,0.01796,0.03318,0.00836,0.01601,0.002289,14.09,19.35,93.22,605.8,0.1326,0.261,0.3476,0.09783,0.3006,0.07802 62 | 90312,M,19.55,23.21,128.9,1174,0.101,0.1318,0.1856,0.1021,0.1989,0.05884,0.6107,2.836,5.383,70.1,0.01124,0.04097,0.07469,0.03441,0.02768,0.00624,20.82,30.44,142,1313,0.1251,0.2414,0.3829,0.1825,0.2576,0.07602 63 | 857374,B,11.94,18.24,75.71,437.6,0.08261,0.04751,0.01972,0.01349,0.1868,0.0611,0.2273,0.6329,1.52,17.47,0.00721,0.00838,0.01311,0.008,0.01996,0.002635,13.1,21.33,83.67,527.2,0.1144,0.08906,0.09203,0.06296,0.2785,0.07408 64 | 89742801,M,17.06,21,111.8,918.6,0.1119,0.1056,0.1508,0.09934,0.1727,0.06071,0.8161,2.129,6.076,87.17,0.006455,0.01797,0.04502,0.01744,0.01829,0.003733,20.99,33.15,143.2,1362,0.1449,0.2053,0.392,0.1827,0.2623,0.07599 65 | 905557,B,14.99,22.11,97.53,693.7,0.08515,0.1025,0.06859,0.03876,0.1944,0.05913,0.3186,1.336,2.31,28.51,0.004449,0.02808,0.03312,0.01196,0.01906,0.004015,16.76,31.55,110.2,867.1,0.1077,0.3345,0.3114,0.1308,0.3163,0.09251 66 | 901028,B,13.87,16.21,88.52,593.7,0.08743,0.05492,0.01502,0.02088,0.1424,0.05883,0.2543,1.363,1.737,20.74,0.005638,0.007939,0.005254,0.006042,0.01544,0.002087,15.11,25.58,96.74,694.4,0.1153,0.1008,0.05285,0.05556,0.2362,0.07113 67 | 89864002,B,11.71,15.45,75.03,420.3,0.115,0.07281,0.04006,0.0325,0.2009,0.06506,0.3446,0.7395,2.355,24.53,0.009536,0.01097,0.01651,0.01121,0.01953,0.0031,13.06,18.16,84.16,516.4,0.146,0.1115,0.1087,0.07864,0.2765,0.07806 68 | 87127,B,10.8,9.71,68.77,357.6,0.09594,0.05736,0.02531,0.01698,0.1381,0.064,0.1728,0.4064,1.126,11.48,0.007809,0.009816,0.01099,0.005344,0.01254,0.00212,11.6,12.02,73.66,414,0.1436,0.1257,0.1047,0.04603,0.209,0.07699 69 | 844981,M,13,21.82,87.5,519.8,0.1273,0.1932,0.1859,0.09353,0.235,0.07389,0.3063,1.002,2.406,24.32,0.005731,0.03502,0.03553,0.01226,0.02143,0.003749,15.49,30.73,106.2,739.3,0.1703,0.5401,0.539,0.206,0.4378,0.1072 70 | 907145,B,9.742,19.12,61.93,289.7,0.1075,0.08333,0.008934,0.01967,0.2538,0.07029,0.6965,1.747,4.607,43.52,0.01307,0.01885,0.006021,0.01052,0.031,0.004225,11.21,23.17,71.79,380.9,0.1398,0.1352,0.02085,0.04589,0.3196,0.08009 71 | 854268,M,14.25,21.72,93.63,633,0.09823,0.1098,0.1319,0.05598,0.1885,0.06125,0.286,1.019,2.657,24.91,0.005878,0.02995,0.04815,0.01161,0.02028,0.004022,15.89,30.36,116.2,799.6,0.1446,0.4238,0.5186,0.1447,0.3591,0.1014 72 | 904647,B,11.93,10.91,76.14,442.7,0.08872,0.05242,0.02606,0.01796,0.1601,0.05541,0.2522,1.045,1.649,18.95,0.006175,0.01204,0.01376,0.005832,0.01096,0.001857,13.8,20.14,87.64,589.5,0.1374,0.1575,0.1514,0.06876,0.246,0.07262 73 | 921644,B,14.74,25.42,94.7,668.6,0.08275,0.07214,0.04105,0.03027,0.184,0.0568,0.3031,1.385,2.177,27.41,0.004775,0.01172,0.01947,0.01269,0.0187,0.002626,16.51,32.29,107.4,826.4,0.106,0.1376,0.1611,0.1095,0.2722,0.06956 74 | 896839,M,16.03,15.51,105.8,793.2,0.09491,0.1371,0.1204,0.07041,0.1782,0.05976,0.3371,0.7476,2.629,33.27,0.005839,0.03245,0.03715,0.01459,0.01467,0.003121,18.76,21.98,124.3,1070,0.1435,0.4478,0.4956,0.1981,0.3019,0.09124 75 | 9113846,B,12.27,29.97,77.42,465.4,0.07699,0.03398,0,0,0.1701,0.0596,0.4455,3.647,2.884,35.13,0.007339,0.008243,0,0,0.03141,0.003136,13.45,38.05,85.08,558.9,0.09422,0.05213,0,0,0.2409,0.06743 76 | 8910720,B,10.71,20.39,69.5,344.9,0.1082,0.1289,0.08448,0.02867,0.1668,0.06862,0.3198,1.489,2.23,20.74,0.008902,0.04785,0.07339,0.01745,0.02728,0.00761,11.69,25.21,76.51,410.4,0.1335,0.255,0.2534,0.086,0.2605,0.08701 77 | 853401,M,18.63,25.11,124.8,1088,0.1064,0.1887,0.2319,0.1244,0.2183,0.06197,0.8307,1.466,5.574,105,0.006248,0.03374,0.05196,0.01158,0.02007,0.00456,23.15,34.01,160.5,1670,0.1491,0.4257,0.6133,0.1848,0.3444,0.09782 78 | 871001501,B,13,20.78,83.51,519.4,0.1135,0.07589,0.03136,0.02645,0.254,0.06087,0.4202,1.322,2.873,34.78,0.007017,0.01142,0.01949,0.01153,0.02951,0.001533,14.16,24.11,90.82,616.7,0.1297,0.1105,0.08112,0.06296,0.3196,0.06435 79 | 904689,B,12.96,18.29,84.18,525.2,0.07351,0.07899,0.04057,0.01883,0.1874,0.05899,0.2357,1.299,2.397,20.21,0.003629,0.03713,0.03452,0.01065,0.02632,0.003705,14.13,24.61,96.31,621.9,0.09329,0.2318,0.1604,0.06608,0.3207,0.07247 80 | 9112085,B,13.38,30.72,86.34,557.2,0.09245,0.07426,0.02819,0.03264,0.1375,0.06016,0.3408,1.924,2.287,28.93,0.005841,0.01246,0.007936,0.009128,0.01564,0.002985,15.05,41.61,96.69,705.6,0.1172,0.1421,0.07003,0.07763,0.2196,0.07675 81 | 8812816,B,13.65,13.16,87.88,568.9,0.09646,0.08711,0.03888,0.02563,0.136,0.06344,0.2102,0.4336,1.391,17.4,0.004133,0.01695,0.01652,0.006659,0.01371,0.002735,15.34,16.35,99.71,706.2,0.1311,0.2474,0.1759,0.08056,0.238,0.08718 82 | 875099,B,9.72,18.22,60.73,288.1,0.0695,0.02344,0,0,0.1653,0.06447,0.3539,4.885,2.23,21.69,0.001713,0.006736,0,0,0.03799,0.001688,9.968,20.83,62.25,303.8,0.07117,0.02729,0,0,0.1909,0.06559 83 | 861598,B,14.64,15.24,95.77,651.9,0.1132,0.1339,0.09966,0.07064,0.2116,0.06346,0.5115,0.7372,3.814,42.76,0.005508,0.04412,0.04436,0.01623,0.02427,0.004841,16.34,18.24,109.4,803.6,0.1277,0.3089,0.2604,0.1397,0.3151,0.08473 84 | 868682,B,11.43,15.39,73.06,399.8,0.09639,0.06889,0.03503,0.02875,0.1734,0.05865,0.1759,0.9938,1.143,12.67,0.005133,0.01521,0.01434,0.008602,0.01501,0.001588,12.32,22.02,79.93,462,0.119,0.1648,0.1399,0.08476,0.2676,0.06765 85 | 897132,B,11.22,19.86,71.94,387.3,0.1054,0.06779,0.005006,0.007583,0.194,0.06028,0.2976,1.966,1.959,19.62,0.01289,0.01104,0.003297,0.004967,0.04243,0.001963,11.98,25.78,76.91,436.1,0.1424,0.09669,0.01335,0.02022,0.3292,0.06522 86 | 91805,B,8.571,13.1,54.53,221.3,0.1036,0.07632,0.02565,0.0151,0.1678,0.07126,0.1267,0.6793,1.069,7.254,0.007897,0.01762,0.01801,0.00732,0.01592,0.003925,9.473,18.45,63.3,275.6,0.1641,0.2235,0.1754,0.08512,0.2983,0.1049 87 | 867387,B,15.71,13.93,102,761.7,0.09462,0.09462,0.07135,0.05933,0.1816,0.05723,0.3117,0.8155,1.972,27.94,0.005217,0.01515,0.01678,0.01268,0.01669,0.00233,17.5,19.25,114.3,922.8,0.1223,0.1949,0.1709,0.1374,0.2723,0.07071 88 | 884448,B,13.2,17.43,84.13,541.6,0.07215,0.04524,0.04336,0.01105,0.1487,0.05635,0.163,1.601,0.873,13.56,0.006261,0.01569,0.03079,0.005383,0.01962,0.00225,13.94,27.82,88.28,602,0.1101,0.1508,0.2298,0.0497,0.2767,0.07198 89 | 91594602,M,15.05,19.07,97.26,701.9,0.09215,0.08597,0.07486,0.04335,0.1561,0.05915,0.386,1.198,2.63,38.49,0.004952,0.0163,0.02967,0.009423,0.01152,0.001718,17.58,28.06,113.8,967,0.1246,0.2101,0.2866,0.112,0.2282,0.06954 90 | 881046502,M,20.58,22.14,134.7,1290,0.0909,0.1348,0.164,0.09561,0.1765,0.05024,0.8601,1.48,7.029,111.7,0.008124,0.03611,0.05489,0.02765,0.03176,0.002365,23.24,27.84,158.3,1656,0.1178,0.292,0.3861,0.192,0.2909,0.05865 91 | 921092,B,7.729,25.49,47.98,178.8,0.08098,0.04878,0,0,0.187,0.07285,0.3777,1.462,2.492,19.14,0.01266,0.009692,0,0,0.02882,0.006872,9.077,30.92,57.17,248,0.1256,0.0834,0,0,0.3058,0.09938 92 | 86730502,M,16.16,21.54,106.2,809.8,0.1008,0.1284,0.1043,0.05613,0.216,0.05891,0.4332,1.265,2.844,43.68,0.004877,0.01952,0.02219,0.009231,0.01535,0.002373,19.47,31.68,129.7,1175,0.1395,0.3055,0.2992,0.1312,0.348,0.07619 93 | 877500,M,14.45,20.22,94.49,642.7,0.09872,0.1206,0.118,0.0598,0.195,0.06466,0.2092,0.6509,1.446,19.42,0.004044,0.01597,0.02,0.007303,0.01522,0.001976,18.33,30.12,117.9,1044,0.1552,0.4056,0.4967,0.1838,0.4753,0.1013 94 | 873701,M,15.7,20.31,101.2,766.6,0.09597,0.08799,0.06593,0.05189,0.1618,0.05549,0.3699,1.15,2.406,40.98,0.004626,0.02263,0.01954,0.009767,0.01547,0.00243,20.11,32.82,129.3,1269,0.1414,0.3547,0.2902,0.1541,0.3437,0.08631 95 | 908489,M,13.98,19.62,91.12,599.5,0.106,0.1133,0.1126,0.06463,0.1669,0.06544,0.2208,0.9533,1.602,18.85,0.005314,0.01791,0.02185,0.009567,0.01223,0.002846,17.04,30.8,113.9,869.3,0.1613,0.3568,0.4069,0.1827,0.3179,0.1055 96 | 914769,M,18.49,17.52,121.3,1068,0.1012,0.1317,0.1491,0.09183,0.1832,0.06697,0.7923,1.045,4.851,95.77,0.007974,0.03214,0.04435,0.01573,0.01617,0.005255,22.75,22.88,146.4,1600,0.1412,0.3089,0.3533,0.1663,0.251,0.09445 97 | 9113455,B,13.14,20.74,85.98,536.9,0.08675,0.1089,0.1085,0.0351,0.1562,0.0602,0.3152,0.7884,2.312,27.4,0.007295,0.03179,0.04615,0.01254,0.01561,0.00323,14.8,25.46,100.9,689.1,0.1351,0.3549,0.4504,0.1181,0.2563,0.08174 98 | 887181,M,15.66,23.2,110.2,773.5,0.1109,0.3114,0.3176,0.1377,0.2495,0.08104,1.292,2.454,10.12,138.5,0.01236,0.05995,0.08232,0.03024,0.02337,0.006042,19.85,31.64,143.7,1226,0.1504,0.5172,0.6181,0.2462,0.3277,0.1019 99 | 9011971,M,21.71,17.25,140.9,1546,0.09384,0.08562,0.1168,0.08465,0.1717,0.05054,1.207,1.051,7.733,224.1,0.005568,0.01112,0.02096,0.01197,0.01263,0.001803,30.75,26.44,199.5,3143,0.1363,0.1628,0.2861,0.182,0.251,0.06494 100 | 91813702,B,12.34,12.27,78.94,468.5,0.09003,0.06307,0.02958,0.02647,0.1689,0.05808,0.1166,0.4957,0.7714,8.955,0.003681,0.009169,0.008732,0.00574,0.01129,0.001366,13.61,19.27,87.22,564.9,0.1292,0.2074,0.1791,0.107,0.311,0.07592 101 | 914101,B,12.46,12.83,78.83,477.3,0.07372,0.04043,0.007173,0.01149,0.1613,0.06013,0.3276,1.486,2.108,24.6,0.01039,0.01003,0.006416,0.007895,0.02869,0.004821,13.19,16.36,83.24,534,0.09439,0.06477,0.01674,0.0268,0.228,0.07028 102 | 87106,B,11.15,13.08,70.87,381.9,0.09754,0.05113,0.01982,0.01786,0.183,0.06105,0.2251,0.7815,1.429,15.48,0.009019,0.008985,0.01196,0.008232,0.02388,0.001619,11.99,16.3,76.25,440.8,0.1341,0.08971,0.07116,0.05506,0.2859,0.06772 103 | 916799,M,18.31,20.58,120.8,1052,0.1068,0.1248,0.1569,0.09451,0.186,0.05941,0.5449,0.9225,3.218,67.36,0.006176,0.01877,0.02913,0.01046,0.01559,0.002725,21.86,26.2,142.2,1493,0.1492,0.2536,0.3759,0.151,0.3074,0.07863 104 | 8711002,B,13.15,15.34,85.31,538.9,0.09384,0.08498,0.09293,0.03483,0.1822,0.06207,0.271,0.7927,1.819,22.79,0.008584,0.02017,0.03047,0.009536,0.02769,0.003479,14.77,20.5,97.67,677.3,0.1478,0.2256,0.3009,0.09722,0.3849,0.08633 105 | 851509,M,21.16,23.04,137.2,1404,0.09428,0.1022,0.1097,0.08632,0.1769,0.05278,0.6917,1.127,4.303,93.99,0.004728,0.01259,0.01715,0.01038,0.01083,0.001987,29.17,35.59,188,2615,0.1401,0.26,0.3155,0.2009,0.2822,0.07526 106 | 86408,B,12.63,20.76,82.15,480.4,0.09933,0.1209,0.1065,0.06021,0.1735,0.0707,0.3424,1.803,2.711,20.48,0.01291,0.04042,0.05101,0.02295,0.02144,0.005891,13.33,25.47,89,527.4,0.1287,0.225,0.2216,0.1105,0.2226,0.08486 107 | 881094802,M,17.42,25.56,114.5,948,0.1006,0.1146,0.1682,0.06597,0.1308,0.05866,0.5296,1.667,3.767,58.53,0.03113,0.08555,0.1438,0.03927,0.02175,0.01256,18.07,28.07,120.4,1021,0.1243,0.1793,0.2803,0.1099,0.1603,0.06818 108 | 869254,B,10.75,14.97,68.26,355.3,0.07793,0.05139,0.02251,0.007875,0.1399,0.05688,0.2525,1.239,1.806,17.74,0.006547,0.01781,0.02018,0.005612,0.01671,0.00236,11.95,20.72,77.79,441.2,0.1076,0.1223,0.09755,0.03413,0.23,0.06769 109 | 866714,B,12.19,13.29,79.08,455.8,0.1066,0.09509,0.02855,0.02882,0.188,0.06471,0.2005,0.8163,1.973,15.24,0.006773,0.02456,0.01018,0.008094,0.02662,0.004143,13.34,17.81,91.38,545.2,0.1427,0.2585,0.09915,0.08187,0.3469,0.09241 110 | 871641,B,11.08,14.71,70.21,372.7,0.1006,0.05743,0.02363,0.02583,0.1566,0.06669,0.2073,1.805,1.377,19.08,0.01496,0.02121,0.01453,0.01583,0.03082,0.004785,11.35,16.82,72.01,396.5,0.1216,0.0824,0.03938,0.04306,0.1902,0.07313 111 | 863031,B,11.64,18.33,75.17,412.5,0.1142,0.1017,0.0707,0.03485,0.1801,0.0652,0.306,1.657,2.155,20.62,0.00854,0.0231,0.02945,0.01398,0.01565,0.00384,13.14,29.26,85.51,521.7,0.1688,0.266,0.2873,0.1218,0.2806,0.09097 112 | 864877,M,15.78,22.91,105.7,782.6,0.1155,0.1752,0.2133,0.09479,0.2096,0.07331,0.552,1.072,3.598,58.63,0.008699,0.03976,0.0595,0.0139,0.01495,0.005984,20.19,30.5,130.3,1272,0.1855,0.4925,0.7356,0.2034,0.3274,0.1252 113 | 873593,M,21.09,26.57,142.7,1311,0.1141,0.2832,0.2487,0.1496,0.2395,0.07398,0.6298,0.7629,4.414,81.46,0.004253,0.04759,0.03872,0.01567,0.01798,0.005295,26.68,33.48,176.5,2089,0.1491,0.7584,0.678,0.2903,0.4098,0.1284 114 | 862485,B,11.6,12.84,74.34,412.6,0.08983,0.07525,0.04196,0.0335,0.162,0.06582,0.2315,0.5391,1.475,15.75,0.006153,0.0133,0.01693,0.006884,0.01651,0.002551,13.06,17.16,82.96,512.5,0.1431,0.1851,0.1922,0.08449,0.2772,0.08756 115 | 8913,B,12.89,13.12,81.89,515.9,0.06955,0.03729,0.0226,0.01171,0.1337,0.05581,0.1532,0.469,1.115,12.68,0.004731,0.01345,0.01652,0.005905,0.01619,0.002081,13.62,15.54,87.4,577,0.09616,0.1147,0.1186,0.05366,0.2309,0.06915 116 | 881861,M,12.83,22.33,85.26,503.2,0.1088,0.1799,0.1695,0.06861,0.2123,0.07254,0.3061,1.069,2.257,25.13,0.006983,0.03858,0.04683,0.01499,0.0168,0.005617,15.2,30.15,105.3,706,0.1777,0.5343,0.6282,0.1977,0.3407,0.1243 117 | 88411702,B,13.75,23.77,88.54,590,0.08043,0.06807,0.04697,0.02344,0.1773,0.05429,0.4347,1.057,2.829,39.93,0.004351,0.02667,0.03371,0.01007,0.02598,0.003087,15.01,26.34,98,706,0.09368,0.1442,0.1359,0.06106,0.2663,0.06321 118 | 874158,B,10.08,15.11,63.76,317.5,0.09267,0.04695,0.001597,0.002404,0.1703,0.06048,0.4245,1.268,2.68,26.43,0.01439,0.012,0.001597,0.002404,0.02538,0.00347,11.87,21.18,75.39,437,0.1521,0.1019,0.00692,0.01042,0.2933,0.07697 119 | 92751,B,7.76,24.54,47.92,181,0.05263,0.04362,0,0,0.1587,0.05884,0.3857,1.428,2.548,19.15,0.007189,0.00466,0,0,0.02676,0.002783,9.456,30.37,59.16,268.6,0.08996,0.06444,0,0,0.2871,0.07039 120 | 859471,B,9.029,17.33,58.79,250.5,0.1066,0.1413,0.313,0.04375,0.2111,0.08046,0.3274,1.194,1.885,17.67,0.009549,0.08606,0.3038,0.03322,0.04197,0.009559,10.31,22.65,65.5,324.7,0.1482,0.4365,1.252,0.175,0.4228,0.1175 121 | -------------------------------------------------------------------------------- /handwritten_ex/decisiontree.cpp: -------------------------------------------------------------------------------- 1 | // Example : decision tree learning 2 | // usage: prog training_data_file testing_data_file 3 | 4 | // For use with test / training datasets : handwritten_ex 5 | 6 | // Author : Toby Breckon, toby.breckon@cranfield.ac.uk 7 | 8 | // Copyright (c) 2010 School of Engineering, Cranfield University 9 | // License : LGPL - http://www.gnu.org/licenses/lgpl.html 10 | 11 | #include // opencv general include file 12 | #include // opencv machine learning include file 13 | 14 | using namespace cv; // OpenCV API is in the C++ "cv" namespace 15 | 16 | #include 17 | 18 | /******************************************************************************/ 19 | 20 | #define NUMBER_OF_TRAINING_SAMPLES 797 21 | #define ATTRIBUTES_PER_SAMPLE 256 22 | #define NUMBER_OF_TESTING_SAMPLES 796 23 | 24 | #define NUMBER_OF_CLASSES 10 25 | 26 | // N.B. classes are integer handwritten digits in range 0-9 27 | 28 | /******************************************************************************/ 29 | 30 | // loads the sample database from file (which is a CSV text file) 31 | 32 | int read_data_from_csv(const char* filename, Mat data, Mat classes, 33 | int n_samples ) 34 | { 35 | float tmpf; 36 | 37 | // if we can't read the input file then return 0 38 | FILE* f = fopen( filename, "r" ); 39 | if( !f ) 40 | { 41 | printf("ERROR: cannot read file %s\n", filename); 42 | return 0; // all not OK 43 | } 44 | 45 | // for each sample in the file 46 | 47 | for(int line = 0; line < n_samples; line++) 48 | { 49 | 50 | // for each attribute on the line in the file 51 | 52 | for(int attribute = 0; attribute < (ATTRIBUTES_PER_SAMPLE + 1); attribute++) 53 | { 54 | if (attribute < ATTRIBUTES_PER_SAMPLE) 55 | { 56 | 57 | // first 256 elements (0-255) in each line are the attributes 58 | 59 | fscanf(f, "%f,", &tmpf); 60 | data.at(line, attribute) = tmpf; 61 | 62 | } 63 | else if (attribute == ATTRIBUTES_PER_SAMPLE) 64 | { 65 | 66 | // attribute 256 is the class label {0 ... 9} 67 | 68 | fscanf(f, "%f,", &tmpf); 69 | classes.at(line, 0) = tmpf; 70 | } 71 | } 72 | } 73 | 74 | fclose(f); 75 | 76 | return 1; // all OK 77 | } 78 | 79 | /******************************************************************************/ 80 | 81 | int main( int argc, char** argv ) 82 | { 83 | // lets just check the version first 84 | 85 | printf ("OpenCV version %s (%d.%d.%d)\n", 86 | CV_VERSION, 87 | CV_MAJOR_VERSION, CV_MINOR_VERSION, CV_SUBMINOR_VERSION); 88 | 89 | // define training data storage matrices (one for attribute examples, one 90 | // for classifications) 91 | 92 | Mat training_data = 93 | Mat(NUMBER_OF_TRAINING_SAMPLES, ATTRIBUTES_PER_SAMPLE, CV_32FC1); 94 | Mat training_classifications = Mat(NUMBER_OF_TRAINING_SAMPLES, 1, CV_32FC1); 95 | 96 | //define testing data storage matrices 97 | 98 | Mat testing_data = 99 | Mat(NUMBER_OF_TESTING_SAMPLES, ATTRIBUTES_PER_SAMPLE, CV_32FC1); 100 | Mat testing_classifications = 101 | Mat(NUMBER_OF_TESTING_SAMPLES, 1, CV_32FC1); 102 | 103 | // define all the attributes as numerical 104 | // alternatives are CV_VAR_CATEGORICAL or CV_VAR_ORDERED(=CV_VAR_NUMERICAL) 105 | // that can be assigned on a per attribute basis 106 | 107 | Mat var_type = Mat(ATTRIBUTES_PER_SAMPLE + 1, 1, CV_8U ); 108 | var_type = Scalar(CV_VAR_NUMERICAL); // all inputs are numerical 109 | 110 | // this is a classification problem (i.e. predict a discrete number of class 111 | // outputs) so reset the last (+1) output var_type element to CV_VAR_CATEGORICAL 112 | 113 | var_type.at(ATTRIBUTES_PER_SAMPLE, 0) = CV_VAR_CATEGORICAL; 114 | 115 | CvDTreeNode* resultNode; // node returned from a prediction 116 | 117 | // load training and testing data sets 118 | 119 | if (read_data_from_csv(argv[1], training_data, training_classifications, NUMBER_OF_TRAINING_SAMPLES) && 120 | read_data_from_csv(argv[2], testing_data, testing_classifications, NUMBER_OF_TESTING_SAMPLES)) 121 | { 122 | // define the parameters for training the decision tree 123 | 124 | float priors[] = {1,1,1,1,1,1,1,1,1,1}; // weights of each classification for classes 125 | // (all equal as equal samples of each digit) 126 | 127 | CvDTreeParams params = CvDTreeParams(25, // max depth 128 | 5, // min sample count 129 | 0, // regression accuracy: N/A here 130 | false, // compute surrogate split, no missing data 131 | 15, // max number of categories (use sub-optimal algorithm for larger numbers) 132 | 15, // the number of cross-validation folds 133 | false, // use 1SE rule => smaller tree 134 | false, // throw away the pruned tree branches 135 | priors // the array of priors 136 | ); 137 | 138 | 139 | // train decision tree classifier (using training data) 140 | 141 | printf( "\nUsing training database: %s\n\n", argv[1]); 142 | CvDTree* dtree = new CvDTree; 143 | 144 | dtree->train(training_data, CV_ROW_SAMPLE, 145 | training_classifications, 146 | Mat(), Mat(), var_type, Mat(), params); 147 | 148 | // perform classifier testing and report results 149 | 150 | Mat test_sample; 151 | int correct_class = 0; 152 | int wrong_class = 0; 153 | int false_positives [NUMBER_OF_CLASSES] = {0,0,0,0,0,0,0,0,0,0}; 154 | 155 | printf( "\nUsing testing database: %s\n\n", argv[2]); 156 | 157 | for (int tsample = 0; tsample < NUMBER_OF_TESTING_SAMPLES; tsample++) 158 | { 159 | 160 | // extract a row from the testing matrix 161 | 162 | test_sample = testing_data.row(tsample); 163 | 164 | // run decision tree prediction 165 | 166 | resultNode = dtree->predict(test_sample, Mat(), false); 167 | 168 | printf("Testing Sample %i -> class result (digit %d)\n", tsample, (int) (resultNode->value)); 169 | 170 | // if the prediction and the (true) testing classification are the same 171 | // (N.B. openCV uses a floating point decision tree implementation!) 172 | 173 | if (fabs(resultNode->value - testing_classifications.at(tsample, 0)) 174 | >= FLT_EPSILON) 175 | { 176 | // if they differ more than floating point error => wrong class 177 | 178 | wrong_class++; 179 | 180 | false_positives[(int) resultNode->value]++; 181 | 182 | } 183 | else 184 | { 185 | 186 | // otherwise correct 187 | 188 | correct_class++; 189 | } 190 | } 191 | 192 | printf( "\nResults on the testing database: %s\n" 193 | "\tCorrect classification: %d (%g%%)\n" 194 | "\tWrong classifications: %d (%g%%)\n", 195 | argv[2], 196 | correct_class, (double) correct_class*100/NUMBER_OF_TESTING_SAMPLES, 197 | wrong_class, (double) wrong_class*100/NUMBER_OF_TESTING_SAMPLES); 198 | 199 | for (int i = 0; i < NUMBER_OF_CLASSES; i++) 200 | { 201 | printf( "\tClass (digit %d) false postives %d (%g%%)\n", i, 202 | false_positives[i], 203 | (double) false_positives[i]*100/NUMBER_OF_TESTING_SAMPLES); 204 | } 205 | 206 | 207 | // all matrix memory free by destructors 208 | 209 | 210 | // all OK : main returns 0 211 | 212 | return 0; 213 | } 214 | 215 | // not OK : main returns -1 216 | 217 | return -1; 218 | } 219 | /******************************************************************************/ 220 | -------------------------------------------------------------------------------- /handwritten_ex/neuralnetwork.cpp: -------------------------------------------------------------------------------- 1 | // Example : neural network learning 2 | // usage: prog training_data_file testing_data_file 3 | 4 | // For use with test / training datasets : handwritten_ex 5 | 6 | // Author : Toby Breckon, toby.breckon@cranfield.ac.uk 7 | 8 | // Copyright (c) 2010 School of Engineering, Cranfield University 9 | // License : LGPL - http://www.gnu.org/licenses/lgpl.html 10 | 11 | #include // opencv general include file 12 | #include // opencv machine learning include file 13 | 14 | using namespace cv; // OpenCV API is in the C++ "cv" namespace 15 | 16 | #include 17 | 18 | /******************************************************************************/ 19 | // global definitions (for speed and ease of use) 20 | 21 | #define NUMBER_OF_TRAINING_SAMPLES 797 22 | #define ATTRIBUTES_PER_SAMPLE 256 23 | #define NUMBER_OF_TESTING_SAMPLES 796 24 | 25 | #define NUMBER_OF_CLASSES 10 26 | 27 | // N.B. classes are integer handwritten digits in range 0-9 28 | 29 | /******************************************************************************/ 30 | 31 | // loads the sample database from file (which is a CSV text file) 32 | 33 | int read_data_from_csv(const char* filename, Mat data, Mat classes, 34 | int n_samples ) 35 | { 36 | 37 | int classlabel; // the class label 38 | float tmpf; 39 | 40 | // if we can't read the input file then return 0 41 | FILE* f = fopen( filename, "r" ); 42 | if( !f ) 43 | { 44 | printf("ERROR: cannot read file %s\n", filename); 45 | return 0; // all not OK 46 | } 47 | 48 | // for each sample in the file 49 | 50 | for(int line = 0; line < n_samples; line++) 51 | { 52 | 53 | // for each attribute on the line in the file 54 | 55 | for(int attribute = 0; attribute < (ATTRIBUTES_PER_SAMPLE + 1); attribute++) 56 | { 57 | if (attribute < ATTRIBUTES_PER_SAMPLE) 58 | { 59 | 60 | // first 256 elements (0-255) in each line are the attributes 61 | 62 | fscanf(f, "%f,", &tmpf); 63 | data.at(line, attribute) = tmpf; 64 | 65 | } 66 | else if (attribute == ATTRIBUTES_PER_SAMPLE) 67 | { 68 | 69 | // attribute 256 is the class label {0 ... 9} 70 | 71 | fscanf(f, "%i,", &classlabel); 72 | classes.at(line, classlabel) = 1.0; 73 | } 74 | } 75 | } 76 | 77 | fclose(f); 78 | 79 | return 1; // all OK 80 | } 81 | 82 | /******************************************************************************/ 83 | 84 | int main( int argc, char** argv ) 85 | { 86 | // lets just check the version first 87 | 88 | printf ("OpenCV version %s (%d.%d.%d)\n", 89 | CV_VERSION, 90 | CV_MAJOR_VERSION, CV_MINOR_VERSION, CV_SUBMINOR_VERSION); 91 | 92 | // define training data storage matrices (one for attribute examples, one 93 | // for classifications) 94 | 95 | Mat training_data = Mat(NUMBER_OF_TRAINING_SAMPLES, ATTRIBUTES_PER_SAMPLE, CV_32FC1); 96 | Mat training_classifications = Mat(NUMBER_OF_TRAINING_SAMPLES, NUMBER_OF_CLASSES, CV_32FC1); 97 | 98 | // define testing data storage matrices 99 | 100 | Mat testing_data = Mat(NUMBER_OF_TESTING_SAMPLES, ATTRIBUTES_PER_SAMPLE, CV_32FC1); 101 | Mat testing_classifications = Mat::zeros(NUMBER_OF_TESTING_SAMPLES, NUMBER_OF_CLASSES, CV_32FC1); 102 | 103 | // define classification output vector 104 | 105 | Mat classificationResult = Mat(1, NUMBER_OF_CLASSES, CV_32FC1); 106 | Point max_loc = Point(0,0); 107 | 108 | // load training and testing data sets 109 | 110 | if (read_data_from_csv(argv[1], training_data, training_classifications, NUMBER_OF_TRAINING_SAMPLES) && 111 | read_data_from_csv(argv[2], testing_data, testing_classifications, NUMBER_OF_TESTING_SAMPLES)) 112 | { 113 | // define the parameters for the neural network (MLP) 114 | 115 | // set the network to be 3 layer 256->10->10 116 | // - one input node per attribute in a sample 117 | // - 10 hidden nodes 118 | // - one output node per class 119 | 120 | // note that the OpenCV neural network (MLP) implementation does not 121 | // support categorical variables explicitly. 122 | // So, instead of the output class label, we will use 123 | // a binary vector of {0,0 ... 1,0,0} components (one element by class) 124 | // for training and therefore, MLP will give us a vector of "probabilities" 125 | // at the prediction stage - the highest probability can be accepted 126 | // as the "winning" class label output by the network 127 | 128 | int layers_d[] = { ATTRIBUTES_PER_SAMPLE, 10, NUMBER_OF_CLASSES}; 129 | Mat layers = Mat(1,3,CV_32SC1); 130 | layers.at(0,0) = layers_d[0]; 131 | layers.at(0,1) = layers_d[1]; 132 | layers.at(0,2) = layers_d[2]; 133 | 134 | // create the network using a sigmoid function with alpha and beta 135 | // parameters 0.6 and 1 specified respectively (refer to manual) 136 | 137 | CvANN_MLP* nnetwork = new CvANN_MLP; 138 | nnetwork->create(layers, CvANN_MLP::SIGMOID_SYM, 0.6, 1); 139 | 140 | // set the training parameters 141 | 142 | CvANN_MLP_TrainParams params = CvANN_MLP_TrainParams( 143 | 144 | // terminate the training after either 1000 145 | // iterations or a very small change in the 146 | // network wieghts below the specified value 147 | 148 | cvTermCriteria(CV_TERMCRIT_ITER+CV_TERMCRIT_EPS, 1000, 0.000001), 149 | 150 | // use backpropogation for training 151 | 152 | CvANN_MLP_TrainParams::BACKPROP, 153 | 154 | // co-efficents for backpropogation training 155 | // (refer to manual) 156 | 157 | 0.1, 158 | 0.1); 159 | 160 | // train the neural network (using training data) 161 | 162 | printf( "\nUsing training database: %s\n", argv[1]); 163 | 164 | int iterations = nnetwork->train(training_data, training_classifications, Mat(), Mat(), params); 165 | 166 | printf( "Training iterations: %i\n\n", iterations); 167 | 168 | // perform classifier testing and report results 169 | 170 | Mat test_sample; 171 | int correct_class = 0; 172 | int wrong_class = 0; 173 | int false_positives [NUMBER_OF_CLASSES] = {0,0,0,0,0,0,0,0,0,0}; 174 | 175 | printf( "\nUsing testing database: %s\n\n", argv[2]); 176 | 177 | for (int tsample = 0; tsample < NUMBER_OF_TESTING_SAMPLES; tsample++) 178 | { 179 | 180 | // extract a row from the testing matrix 181 | 182 | test_sample = testing_data.row(tsample); 183 | 184 | // run neural network prediction 185 | 186 | nnetwork->predict(test_sample, classificationResult); 187 | 188 | // The NN gives out a vector of probabilities for each class 189 | // We take the class with the highest "probability" 190 | // for simplicity (but we really should also check separation 191 | // of the different "probabilities" in this vector - what if 192 | // two classes have very similar values ?) 193 | 194 | minMaxLoc(classificationResult, 0, 0, 0, &max_loc); 195 | 196 | printf("Testing Sample %i -> class result (digit %d)\n", tsample, max_loc.x); 197 | 198 | // if the corresponding location in the testing classifications 199 | // is not "1" (i.e. this is the correct class) then record this 200 | 201 | if (!(testing_classifications.at(tsample, max_loc.x))) 202 | { 203 | // if they differ more than floating point error => wrong class 204 | 205 | wrong_class++; 206 | 207 | false_positives[(int) max_loc.x]++; 208 | 209 | } 210 | else 211 | { 212 | 213 | // otherwise correct 214 | 215 | correct_class++; 216 | } 217 | } 218 | 219 | printf( "\nResults on the testing database: %s\n" 220 | "\tCorrect classification: %d (%g%%)\n" 221 | "\tWrong classifications: %d (%g%%)\n", 222 | argv[2], 223 | correct_class, (double) correct_class*100/NUMBER_OF_TESTING_SAMPLES, 224 | wrong_class, (double) wrong_class*100/NUMBER_OF_TESTING_SAMPLES); 225 | 226 | for (int i = 0; i < NUMBER_OF_CLASSES; i++) 227 | { 228 | printf( "\tClass (digit %d) false postives %d (%g%%)\n", i, 229 | false_positives[i], 230 | (double) false_positives[i]*100/NUMBER_OF_TESTING_SAMPLES); 231 | } 232 | 233 | // all OK : main returns 0 234 | 235 | return 0; 236 | } 237 | 238 | // not OK : main returns -1 239 | 240 | return -1; 241 | } 242 | /******************************************************************************/ 243 | -------------------------------------------------------------------------------- /handwritten_ex/semeion.names: -------------------------------------------------------------------------------- 1 | Title: Semeion Handwritten Digit 2 | 3 | Abstract: 1593 handwritten digits from around 80 persons were scanned, 4 | stretched in a rectangular box 16x16 in a gray scale of 256 values. 5 | 6 | ----------------------------------------------------- 7 | 8 | Data Set Characteristics: Multivariate 9 | Number of Instances: 1593 10 | Area: Computer 11 | Attribute Characteristics: Integer 12 | Number of Attributes: 256 13 | Date Donated: 2008-11-11 14 | Associated Tasks: Classification 15 | Missing Values? N/A 16 | 17 | ----------------------------------------------------- 18 | 19 | Source: 20 | 21 | The dataset was created by Tactile Srl, Brescia, Italy 22 | (http://www.tattile.it/) and donated in 1994 to Semeion Research Center 23 | of Sciences of Communication, Rome, Italy (http://www.semeion.it/), for 24 | machine learning research. 25 | 26 | For any questions, e-mail Massimo Buscema (m.buscema '@' semeion.it) or 27 | Stefano Terzi (s.terzi '@' semeion.it) 28 | 29 | 30 | ----------------------------------------------------- 31 | 32 | Data Set Information: 33 | 34 | 35 | 1593 handwritten digits from around 80 persons were scanned, stretched 36 | in a rectangular box 16x16 in a gray scale of 256 values.Then each pixel 37 | of each image was scaled into a bolean (1/0) value using a fixed 38 | threshold. 39 | 40 | Each person wrote on a paper all the digits from 0 to 9, twice. The 41 | commitment was to write the digit the first time in the normal way 42 | (trying to write each digit accurately) and the second time in a fast 43 | way (with no accuracy). 44 | 45 | The best validation protocol for this dataset seems to be a 5x2CV, 50% 46 | Tune (Train +Test) and completly blind 50% Validation. 47 | 48 | ----------------------------------------------------- 49 | 50 | Attribute Information: 51 | 52 | This dataset consists of 1593 records (rows) and 256 attributes 53 | (columns). 54 | 55 | Each record represents a handwritten digit, orginally scanned with a 56 | resolution of 256 grays scale (28). 57 | 58 | Each pixel of the each original scanned image was first stretched, and 59 | after scaled between 0 and 1 (setting to 0 every pixel whose value was 60 | under tha value 127 of the grey scale (127 included) and setting to 1 61 | each pixel whose orinal value in the grey scale was over 127). 62 | 63 | Finally, each binary image was scaled again into a 16x16 square box (the 64 | final 256 binary attributes). 65 | 66 | ----------------------------------------------------- 67 | 68 | Relevant Papers: 69 | 70 | M Buscema, MetaNet: The Theory of Independent Judges, in Substance Use & 71 | Misuse 33(2)1998, pp 439-461. 72 | 73 | ----------------------------------------------------- 74 | 75 | Citation Request: 76 | 77 | Semeion Research Center of Sciences of Communication, via Sersale 117, 78 | 00128 Rome, Italy 79 | Tattile Via Gaetano Donizetti, 1-3-5,25030 Mairano (Brescia), Italy. 80 | 81 | 82 | 83 | 84 | -------------------------------------------------------------------------------- /handwritten_ex/svm.cpp: -------------------------------------------------------------------------------- 1 | // Example : Support Vector Machine (SVM) learning 2 | // usage: prog training_data_file testing_data_file 3 | 4 | // For use with test / training datasets : handwritten_ex 5 | 6 | // Author : Toby Breckon, toby.breckon@cranfield.ac.uk 7 | // Version : 0.2 8 | 9 | // Copyright (c) 2011 School of Engineering, Cranfield University 10 | // License : LGPL - http://www.gnu.org/licenses/lgpl.html 11 | 12 | #include // opencv general include file 13 | #include // opencv machine learning include file 14 | 15 | using namespace cv; // OpenCV API is in the C++ "cv" namespace 16 | 17 | #include 18 | 19 | /******************************************************************************/ 20 | 21 | // use SVM "grid search" for kernel parameters 22 | 23 | #define USE_OPENCV_GRID_SEARCH_AUTOTRAIN 1 // set to 0 to set SVM parameters manually 24 | 25 | /******************************************************************************/ 26 | 27 | #define NUMBER_OF_TRAINING_SAMPLES 797 28 | #define ATTRIBUTES_PER_SAMPLE 256 29 | #define NUMBER_OF_TESTING_SAMPLES 796 30 | 31 | #define NUMBER_OF_CLASSES 10 32 | 33 | // N.B. classes are integer handwritten digits in range 0-9 34 | 35 | /******************************************************************************/ 36 | 37 | // loads the sample database from file (which is a CSV text file) 38 | 39 | int read_data_from_csv(const char* filename, Mat data, Mat classes, 40 | int n_samples ) 41 | { 42 | float tmpf; 43 | 44 | // if we can't read the input file then return 0 45 | FILE* f = fopen( filename, "r" ); 46 | if( !f ) 47 | { 48 | printf("ERROR: cannot read file %s\n", filename); 49 | return 0; // all not OK 50 | } 51 | 52 | // for each sample in the file 53 | 54 | for(int line = 0; line < n_samples; line++) 55 | { 56 | 57 | // for each attribute on the line in the file 58 | 59 | for(int attribute = 0; attribute < (ATTRIBUTES_PER_SAMPLE + 1); attribute++) 60 | { 61 | if (attribute < ATTRIBUTES_PER_SAMPLE) 62 | { 63 | 64 | // first 256 elements (0-255) in each line are the attributes 65 | 66 | fscanf(f, "%f,", &tmpf); 67 | data.at(line, attribute) = tmpf; 68 | 69 | } 70 | else if (attribute == ATTRIBUTES_PER_SAMPLE) 71 | { 72 | 73 | // attribute 256 is the class label {0 ... 9} 74 | 75 | fscanf(f, "%f,", &tmpf); 76 | classes.at(line, 0) = tmpf; 77 | } 78 | } 79 | } 80 | 81 | fclose(f); 82 | 83 | return 1; // all OK 84 | } 85 | 86 | /******************************************************************************/ 87 | 88 | int main( int argc, char** argv ) 89 | { 90 | // lets just check the version first 91 | 92 | printf ("OpenCV version %s (%d.%d.%d)\n", 93 | CV_VERSION, 94 | CV_MAJOR_VERSION, CV_MINOR_VERSION, CV_SUBMINOR_VERSION); 95 | 96 | // define training data storage matrices (one for attribute examples, one 97 | // for classifications) 98 | 99 | Mat training_data = Mat(NUMBER_OF_TRAINING_SAMPLES, ATTRIBUTES_PER_SAMPLE, CV_32FC1); 100 | Mat training_classifications = Mat(NUMBER_OF_TRAINING_SAMPLES, 1, CV_32FC1); 101 | 102 | //define testing data storage matrices 103 | 104 | Mat testing_data = Mat(NUMBER_OF_TESTING_SAMPLES, ATTRIBUTES_PER_SAMPLE, CV_32FC1); 105 | Mat testing_classifications = Mat(NUMBER_OF_TESTING_SAMPLES, 1, CV_32FC1); 106 | 107 | // load training and testing data sets 108 | 109 | if (read_data_from_csv(argv[1], training_data, training_classifications, NUMBER_OF_TRAINING_SAMPLES) && 110 | read_data_from_csv(argv[2], testing_data, testing_classifications, NUMBER_OF_TESTING_SAMPLES)) 111 | { 112 | // define the parameters for training the SVM (kernel + SVMtype type used for auto-training, 113 | // other parameters for manual only) 114 | 115 | CvSVMParams params = CvSVMParams( 116 | CvSVM::C_SVC, // Type of SVM, here N classes (see manual) 117 | CvSVM::LINEAR, // kernel type (see manual) 118 | 0.0, // kernel parameter (degree) for poly kernel only 119 | 0.0, // kernel parameter (gamma) for poly/rbf kernel only 120 | 0.0, // kernel parameter (coef0) for poly/sigmoid kernel only 121 | 10, // SVM optimization parameter C 122 | 0, // SVM optimization parameter nu (not used for N classe SVM) 123 | 0, // SVM optimization parameter p (not used for N classe SVM) 124 | NULL, // class wieghts (or priors) 125 | // Optional weights, assigned to particular classes. 126 | // They are multiplied by C and thus affect the misclassification 127 | // penalty for different classes. The larger weight, the larger penalty 128 | // on misclassification of data from the corresponding class. 129 | 130 | // termination criteria for learning algorithm 131 | 132 | cvTermCriteria(CV_TERMCRIT_ITER+CV_TERMCRIT_EPS, 1000, 0.000001) 133 | 134 | ); 135 | 136 | // train SVM classifier (using training data) 137 | 138 | printf( "\nUsing training database: %s\n\n", argv[1]); 139 | CvSVM* svm = new CvSVM; 140 | 141 | #if (USE_OPENCV_GRID_SEARCH_AUTOTRAIN) 142 | 143 | // train using auto training parameter grid search if it is available 144 | // N.B. this does not search kernel choice 145 | 146 | svm->train_auto(training_data, training_classifications, Mat(), Mat(), params, 10); 147 | params = svm->get_params(); 148 | printf( "\nUsing optimal parameters degree %f, gamma %f, ceof0 %f\n\t C %f, nu %f, p %f\n", 149 | params.degree, params.gamma, params.coef0, params.C, params.nu, params.p); 150 | 151 | #else 152 | 153 | // otherwise use regular training and use parameters manually specified above 154 | 155 | svm->train(training_data, training_classifications, Mat(), Mat(), params); 156 | 157 | #endif 158 | 159 | // get the number of support vectors used to define the SVM decision boundary 160 | 161 | printf("Number of support vectors for trained SVM = %i\n", svm->get_support_vector_count()); 162 | 163 | // perform classifier testing and report results 164 | 165 | Mat test_sample; 166 | int correct_class = 0; 167 | int wrong_class = 0; 168 | int false_positives [NUMBER_OF_CLASSES] = {0,0,0,0,0,0,0,0,0,0}; 169 | float result; 170 | 171 | printf( "\nUsing testing database: %s\n\n", argv[2]); 172 | 173 | for (int tsample = 0; tsample < NUMBER_OF_TESTING_SAMPLES; tsample++) 174 | { 175 | 176 | // extract a row from the testing matrix 177 | 178 | test_sample = testing_data.row(tsample); 179 | 180 | // run SVM classifier 181 | 182 | result = svm->predict(test_sample); 183 | 184 | printf("Testing Sample %i -> class result (digit %d)\n", tsample, (int) result); 185 | 186 | // if the prediction and the (true) testing classification are the same 187 | // (N.B. openCV uses a floating point implementation!) 188 | 189 | if (fabs(result - testing_classifications.at(tsample, 0)) 190 | >= FLT_EPSILON) 191 | { 192 | // if they differ more than floating point error => wrong class 193 | 194 | wrong_class++; 195 | false_positives[(int) testing_classifications.at(tsample, 0)]++; 196 | 197 | } 198 | else 199 | { 200 | 201 | // otherwise correct 202 | 203 | correct_class++; 204 | } 205 | } 206 | 207 | printf( "\nResults on the testing database: %s\n" 208 | "\tCorrect classification: %d (%g%%)\n" 209 | "\tWrong classifications: %d (%g%%)\n", 210 | argv[2], 211 | correct_class, (double) correct_class*100/NUMBER_OF_TESTING_SAMPLES, 212 | wrong_class, (double) wrong_class*100/NUMBER_OF_TESTING_SAMPLES); 213 | 214 | for (int i = 0; i < NUMBER_OF_CLASSES; i++) 215 | { 216 | printf( "\tClass (digit %d) false postives %d (%g%%)\n", i, 217 | false_positives[i], 218 | (double) false_positives[i]*100/NUMBER_OF_TESTING_SAMPLES); 219 | } 220 | 221 | 222 | // all OK : main returns 0 223 | 224 | return 0; 225 | } 226 | 227 | // not OK : main returns -1 228 | 229 | return -1; 230 | } 231 | /******************************************************************************/ 232 | -------------------------------------------------------------------------------- /opticaldigits_ex/boosttree.cpp: -------------------------------------------------------------------------------- 1 | // Example : boosted tree learning 2 | // usage: prog training_data_file testing_data_file 3 | 4 | // For use with test / training datasets : opticaldigits_ex 5 | 6 | // Author : Toby Breckon, toby.breckon@cranfield.ac.uk 7 | 8 | // Copyright (c) 2011 School of Engineering, Cranfield University 9 | // License : LGPL - http://www.gnu.org/licenses/lgpl.html 10 | 11 | #include // opencv general include file 12 | #include // opencv machine learning include file 13 | 14 | using namespace cv; // OpenCV API is in the C++ "cv" namespace 15 | 16 | #include 17 | 18 | /******************************************************************************/ 19 | // global definitions (for speed and ease of use) 20 | 21 | #define NUMBER_OF_TRAINING_SAMPLES 3823 22 | #define ATTRIBUTES_PER_SAMPLE 64 23 | #define NUMBER_OF_TESTING_SAMPLES 1797 24 | 25 | #define NUMBER_OF_CLASSES 10 26 | 27 | // N.B. classes are integer handwritten digits in range 0-9 28 | 29 | /******************************************************************************/ 30 | 31 | // loads the sample database from file (which is a CSV text file) 32 | 33 | int read_data_from_csv(const char* filename, Mat data, Mat classes, 34 | int n_samples ) 35 | { 36 | float tmp; 37 | 38 | // if we can't read the input file then return 0 39 | FILE* f = fopen( filename, "r" ); 40 | if( !f ) 41 | { 42 | printf("ERROR: cannot read file %s\n", filename); 43 | return 0; // all not OK 44 | } 45 | 46 | // for each sample in the file 47 | 48 | for(int line = 0; line < n_samples; line++) 49 | { 50 | 51 | // for each attribute on the line in the file 52 | 53 | for(int attribute = 0; attribute < (ATTRIBUTES_PER_SAMPLE + 1); attribute++) 54 | { 55 | if (attribute < 64) 56 | { 57 | 58 | // first 64 elements (0-63) in each line are the attributes 59 | 60 | fscanf(f, "%f,", &tmp); 61 | data.at(line, attribute) = tmp; 62 | // printf("%f,", data.at(line, attribute)); 63 | 64 | } 65 | else if (attribute == 64) 66 | { 67 | 68 | // attribute 65 is the class label {0 ... 9} 69 | 70 | fscanf(f, "%f,", &tmp); 71 | classes.at(line, 0) = tmp; 72 | // printf("%f\n", classes.at(line, 0)); 73 | 74 | } 75 | } 76 | } 77 | 78 | fclose(f); 79 | 80 | return 1; // all OK 81 | } 82 | 83 | /******************************************************************************/ 84 | 85 | int main( int argc, char** argv ) 86 | { 87 | // lets just check the version first 88 | 89 | printf ("OpenCV version %s (%d.%d.%d)\n", 90 | CV_VERSION, 91 | CV_MAJOR_VERSION, CV_MINOR_VERSION, CV_SUBMINOR_VERSION); 92 | 93 | // define training data storage matrices (one for attribute examples, one 94 | // for classifications) 95 | 96 | Mat training_data = Mat(NUMBER_OF_TRAINING_SAMPLES, ATTRIBUTES_PER_SAMPLE, CV_32FC1); 97 | Mat training_classifications = Mat(NUMBER_OF_TRAINING_SAMPLES, 1, CV_32FC1); 98 | 99 | //define testing data storage matrices 100 | 101 | Mat testing_data = Mat(NUMBER_OF_TESTING_SAMPLES, ATTRIBUTES_PER_SAMPLE, CV_32FC1); 102 | Mat testing_classifications = Mat(NUMBER_OF_TESTING_SAMPLES, 1, CV_32FC1); 103 | 104 | // load training and testing data sets 105 | 106 | if (read_data_from_csv(argv[1], training_data, training_classifications, NUMBER_OF_TRAINING_SAMPLES) && 107 | read_data_from_csv(argv[2], testing_data, testing_classifications, NUMBER_OF_TESTING_SAMPLES)) 108 | { 109 | // !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! 110 | // 111 | // As currently boosted tree classifier in OpenCV can only be trained 112 | // for 2-class problems, we transform the training database by 113 | // "unrolling" each training sample as many times as the number of 114 | // classes (10) that we have. 115 | // 116 | // In "unrolling" we add an additional attribute to each training 117 | // sample that contains the classification - here 10 new samples 118 | // are added for every original sample, one for each possible class 119 | // but only one with the correct class as an additional attribute 120 | // value has a new binary class of 1, all the rest of the new samples 121 | // have a new binary class of 0. 122 | // 123 | // !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! 124 | 125 | Mat new_data = Mat(NUMBER_OF_TRAINING_SAMPLES*NUMBER_OF_CLASSES, ATTRIBUTES_PER_SAMPLE + 1, CV_32F ); 126 | Mat new_responses = Mat(NUMBER_OF_TRAINING_SAMPLES*NUMBER_OF_CLASSES, 1, CV_32S ); 127 | 128 | // 1. unroll the training samples 129 | 130 | printf( "\nUnrolling the database..."); 131 | fflush(NULL); 132 | for(int i = 0; i < NUMBER_OF_TRAINING_SAMPLES; i++ ) 133 | { 134 | for(int j = 0; j < NUMBER_OF_CLASSES; j++ ) 135 | { 136 | for(int k = 0; k < ATTRIBUTES_PER_SAMPLE; k++ ) 137 | { 138 | 139 | // copy over the attribute data 140 | 141 | new_data.at((i * NUMBER_OF_CLASSES) + j, k) = training_data.at(i, k); 142 | 143 | } 144 | 145 | // set the new attribute to the original class 146 | 147 | new_data.at((i * NUMBER_OF_CLASSES) + j, ATTRIBUTES_PER_SAMPLE) = (float) j; 148 | 149 | // set the new binary class 150 | 151 | if ( ( (int) training_classifications.at( i, 0)) == j) 152 | { 153 | new_responses.at((i * NUMBER_OF_CLASSES) + j, 0) = 1; 154 | } 155 | else 156 | { 157 | new_responses.at((i * NUMBER_OF_CLASSES) + j, 0) = 0; 158 | } 159 | } 160 | } 161 | printf( "Done\n"); 162 | 163 | // 2. Unroll the type mask 164 | 165 | // define all the attributes as numerical 166 | // alternatives are CV_VAR_CATEGORICAL or CV_VAR_ORDERED(=CV_VAR_NUMERICAL) 167 | // that can be assigned on a per attribute basis 168 | 169 | Mat var_type = Mat(ATTRIBUTES_PER_SAMPLE + 2, 1, CV_8U ); 170 | var_type.setTo(Scalar(CV_VAR_NUMERICAL) ); // all inputs are numerical 171 | 172 | // this is a classification problem (i.e. predict a discrete number of class 173 | // outputs) so reset the last (+1) output var_type element to CV_VAR_CATEGORICAL 174 | // *** the last (new) class indicator attribute, as well 175 | // *** as the new (binary) response (class) are categorical 176 | 177 | var_type.at(ATTRIBUTES_PER_SAMPLE, 0) = CV_VAR_CATEGORICAL; 178 | var_type.at(ATTRIBUTES_PER_SAMPLE + 1, 0) = CV_VAR_CATEGORICAL; 179 | 180 | // define the parameters for training the boosted trees 181 | 182 | // weights of each classification for classes 183 | // N.B. in the "unrolled" data we have an imbalance in the training examples 184 | 185 | float priors[] = {( NUMBER_OF_CLASSES - 1),1}; 186 | //float priors[] = {1,1}; 187 | 188 | // set the boost parameters 189 | 190 | CvBoostParams params = CvBoostParams(CvBoost::REAL, // boosting type 191 | 100, // number of weak classifiers 192 | 0.95, // trim rate 193 | 194 | // trim rate is a threshold (0->1) 195 | // used to eliminate samples with 196 | // boosting weight < 1.0 - (trim rate) 197 | // from the next round of boosting 198 | // Used for computational saving only. 199 | 200 | 25, // max depth of trees 201 | false, // compute surrogate split, no missing data 202 | priors ); 203 | 204 | // as CvBoostParams inherits from CvDTreeParams we can also set generic 205 | // parameters of decision trees too (otherwise they use the defaults) 206 | 207 | params.max_categories = 15; // max number of categories (use sub-optimal algorithm for larger numbers) 208 | params.min_sample_count = 5; // min sample count 209 | params.cv_folds = 1; // cross validation folds 210 | params.use_1se_rule = false; // use 1SE rule => smaller tree 211 | params.truncate_pruned_tree = false; // throw away the pruned tree branches 212 | params.regression_accuracy = 0.0; // regression accuracy: N/A here 213 | 214 | 215 | // train boosted tree classifier (using training data) 216 | 217 | printf( "\nUsing training database: %s\n\n", argv[1]); 218 | printf( "Training .... (this may take several minutes) .... "); 219 | fflush(NULL); 220 | 221 | CvBoost* boostTree = new CvBoost; 222 | 223 | boostTree->train( new_data, CV_ROW_SAMPLE, new_responses, Mat(), Mat(), var_type, 224 | Mat(), params, false); 225 | printf( "Done."); 226 | 227 | // perform classifier testing and report results 228 | 229 | Mat test_sample; 230 | int correct_class = 0; 231 | int wrong_class = 0; 232 | int false_positives [NUMBER_OF_CLASSES] = {0,0,0,0,0,0,0,0,0,0}; 233 | Mat weak_responses = Mat( 1, boostTree->get_weak_predictors()->total, CV_32F ); 234 | Mat new_sample = Mat( 1, ATTRIBUTES_PER_SAMPLE + 1, CV_32F ); 235 | int best_class = 0; // best class returned by weak classifier 236 | double max_sum; // highest score for a given class 237 | 238 | printf( "\nUsing testing database: %s\n\n", argv[2]); 239 | 240 | for (int tsample = 0; tsample < NUMBER_OF_TESTING_SAMPLES; tsample++) 241 | { 242 | 243 | // extract a row from the testing matrix 244 | 245 | test_sample = testing_data.row(tsample); 246 | 247 | // convert it to the new "un-rolled" format of input 248 | 249 | for(int k = 0; k < ATTRIBUTES_PER_SAMPLE; k++ ) 250 | { 251 | new_sample.at( 0, k) = test_sample.at(0, k); 252 | } 253 | 254 | // run boosted tree prediction (for N classes and take the 255 | // maximal response of all the weak classifiers) 256 | 257 | max_sum = INT_MIN; // maximum starts off as Min. Int. 258 | 259 | for(int c = 0; c < NUMBER_OF_CLASSES; c++ ) 260 | { 261 | // set the additional attribute to original class 262 | 263 | new_sample.at(0, ATTRIBUTES_PER_SAMPLE) = (float) c; 264 | 265 | // run prediction (getting also the responses of the weak classifiers) 266 | // - N.B. here we have to use CvMat() casts and take the address of temporary 267 | // in order to use the available call that gives us the weak responses 268 | // For this reason we also have to pass a NULL pointer for the missing data 269 | 270 | boostTree->predict(&CvMat((new_sample)), NULL, &CvMat(weak_responses)); 271 | 272 | // obtain the sum of the responses from the weak classifiers 273 | 274 | Scalar responseSum = sum( weak_responses ); 275 | 276 | // record the "best class" - i.e. one with maximal response 277 | // from weak classifiers 278 | 279 | if( responseSum.val[0] > max_sum) 280 | { 281 | max_sum = (double) responseSum.val[0]; 282 | best_class = c; 283 | } 284 | } 285 | 286 | 287 | printf("Testing Sample %i -> class result (digit %d)\n", tsample, best_class); 288 | 289 | // if the prediction and the (true) testing classification are the same 290 | // (N.B. openCV uses a floating point decision tree implementation!) 291 | 292 | if (fabs(((float) (best_class)) - testing_classifications.at( tsample, 0)) 293 | >= FLT_EPSILON) 294 | { 295 | // if they differ more than floating point error => wrong class 296 | 297 | wrong_class++; 298 | 299 | false_positives[best_class]++; 300 | 301 | } 302 | else 303 | { 304 | 305 | // otherwise correct 306 | 307 | correct_class++; 308 | } 309 | } 310 | 311 | printf( "\nResults on the testing database: %s\n" 312 | "\tCorrect classification: %d (%g%%)\n" 313 | "\tWrong classifications: %d (%g%%)\n", 314 | argv[2], 315 | correct_class, (double) correct_class*100/NUMBER_OF_TESTING_SAMPLES, 316 | wrong_class, (double) wrong_class*100/NUMBER_OF_TESTING_SAMPLES); 317 | 318 | for (int i = 0; i < NUMBER_OF_CLASSES; i++) 319 | { 320 | printf( "\tClass (digit %d) false postives %d (%g%%)\n", i, 321 | false_positives[i], 322 | (double) false_positives[i]*100/NUMBER_OF_TESTING_SAMPLES); 323 | } 324 | 325 | // all matrix memory free by destructors 326 | 327 | // all OK : main returns 0 328 | 329 | return 0; 330 | } 331 | 332 | // not OK : main returns -1 333 | 334 | return -1; 335 | } 336 | /******************************************************************************/ 337 | -------------------------------------------------------------------------------- /opticaldigits_ex/decisiontree.cpp: -------------------------------------------------------------------------------- 1 | // Example : decision tree learning 2 | // usage: prog training_data_file testing_data_file 3 | 4 | // For use with test / training datasets : opticaldigits_ex 5 | 6 | // Author : Toby Breckon, toby.breckon@cranfield.ac.uk 7 | 8 | // Copyright (c) 2011 School of Engineering, Cranfield University 9 | // License : LGPL - http://www.gnu.org/licenses/lgpl.html 10 | 11 | #include // opencv general include file 12 | #include // opencv machine learning include file 13 | 14 | using namespace cv; // OpenCV API is in the C++ "cv" namespace 15 | 16 | #include 17 | 18 | /******************************************************************************/ 19 | // global definitions (for speed and ease of use) 20 | 21 | #define NUMBER_OF_TRAINING_SAMPLES 3823 22 | #define ATTRIBUTES_PER_SAMPLE 64 23 | #define NUMBER_OF_TESTING_SAMPLES 1797 24 | 25 | #define NUMBER_OF_CLASSES 10 26 | 27 | // N.B. classes are integer handwritten digits in range 0-9 28 | 29 | /******************************************************************************/ 30 | 31 | // loads the sample database from file (which is a CSV text file) 32 | 33 | int read_data_from_csv(const char* filename, Mat data, Mat classes, 34 | int n_samples ) 35 | { 36 | float tmp; 37 | 38 | // if we can't read the input file then return 0 39 | FILE* f = fopen( filename, "r" ); 40 | if( !f ) 41 | { 42 | printf("ERROR: cannot read file %s\n", filename); 43 | return 0; // all not OK 44 | } 45 | 46 | // for each sample in the file 47 | 48 | for(int line = 0; line < n_samples; line++) 49 | { 50 | 51 | // for each attribute on the line in the file 52 | 53 | for(int attribute = 0; attribute < (ATTRIBUTES_PER_SAMPLE + 1); attribute++) 54 | { 55 | if (attribute < 64) 56 | { 57 | 58 | // first 64 elements (0-63) in each line are the attributes 59 | 60 | fscanf(f, "%f,", &tmp); 61 | data.at(line, attribute) = tmp; 62 | // printf("%f,", data.at(line, attribute)); 63 | 64 | } 65 | else if (attribute == 64) 66 | { 67 | 68 | // attribute 65 is the class label {0 ... 9} 69 | 70 | fscanf(f, "%f,", &tmp); 71 | classes.at(line, 0) = tmp; 72 | // printf("%f\n", classes.at(line, 0)); 73 | 74 | } 75 | } 76 | } 77 | 78 | fclose(f); 79 | 80 | return 1; // all OK 81 | } 82 | 83 | /******************************************************************************/ 84 | 85 | int main( int argc, char** argv ) 86 | { 87 | // lets just check the version first 88 | 89 | printf ("OpenCV version %s (%d.%d.%d)\n", 90 | CV_VERSION, 91 | CV_MAJOR_VERSION, CV_MINOR_VERSION, CV_SUBMINOR_VERSION); 92 | 93 | // define training data storage matrices (one for attribute examples, one 94 | // for classifications) 95 | 96 | Mat training_data = Mat(NUMBER_OF_TRAINING_SAMPLES, ATTRIBUTES_PER_SAMPLE, CV_32FC1); 97 | Mat training_classifications = Mat(NUMBER_OF_TRAINING_SAMPLES, 1, CV_32FC1); 98 | 99 | //define testing data storage matrices 100 | 101 | Mat testing_data = Mat(NUMBER_OF_TESTING_SAMPLES, ATTRIBUTES_PER_SAMPLE, CV_32FC1); 102 | Mat testing_classifications = Mat(NUMBER_OF_TESTING_SAMPLES, 1, CV_32FC1); 103 | 104 | // define all the attributes as numerical 105 | // alternatives are CV_VAR_CATEGORICAL or CV_VAR_ORDERED(=CV_VAR_NUMERICAL) 106 | // that can be assigned on a per attribute basis 107 | 108 | Mat var_type = Mat(ATTRIBUTES_PER_SAMPLE + 1, 1, CV_8U ); 109 | var_type.setTo(Scalar(CV_VAR_NUMERICAL) ); // all inputs are numerical 110 | 111 | // this is a classification problem (i.e. predict a discrete number of class 112 | // outputs) so reset the last (+1) output var_type element to CV_VAR_CATEGORICAL 113 | 114 | var_type.at(ATTRIBUTES_PER_SAMPLE, 0) = CV_VAR_CATEGORICAL; 115 | 116 | CvDTreeNode* resultNode; // node returned from a prediction 117 | 118 | // load training and testing data sets 119 | 120 | if (read_data_from_csv(argv[1], training_data, training_classifications, NUMBER_OF_TRAINING_SAMPLES) && 121 | read_data_from_csv(argv[2], testing_data, testing_classifications, NUMBER_OF_TESTING_SAMPLES)) 122 | { 123 | // define the parameters for training the decision tree 124 | 125 | float priors[] = {1,1,1,1,1,1,1,1,1,1}; // weights of each classification for classes 126 | // (all equal as equal samples of each digit) 127 | 128 | CvDTreeParams params = CvDTreeParams(25, // max depth 129 | 5, // min sample count 130 | 0, // regression accuracy: N/A here 131 | false, // compute surrogate split, no missing data 132 | 15, // max number of categories (use sub-optimal algorithm for larger numbers) 133 | 15, // the number of cross-validation folds 134 | false, // use 1SE rule => smaller tree 135 | false, // throw away the pruned tree branches 136 | priors // the array of priors 137 | ); 138 | 139 | 140 | // train decision tree classifier (using training data) 141 | 142 | printf( "\nUsing training database: %s\n\n", argv[1]); 143 | CvDTree* dtree = new CvDTree; 144 | 145 | dtree->train(training_data, CV_ROW_SAMPLE, training_classifications, 146 | Mat(), Mat(), var_type, Mat(), params); 147 | 148 | // perform classifier testing and report results 149 | 150 | Mat test_sample; 151 | int correct_class = 0; 152 | int wrong_class = 0; 153 | int false_positives [NUMBER_OF_CLASSES] = {0,0,0,0,0,0,0,0,0,0}; 154 | 155 | printf( "\nUsing testing database: %s\n\n", argv[2]); 156 | 157 | for (int tsample = 0; tsample < NUMBER_OF_TESTING_SAMPLES; tsample++) 158 | { 159 | 160 | // extract a row from the testing matrix 161 | 162 | test_sample = testing_data.row(tsample); 163 | 164 | // run decision tree prediction 165 | 166 | resultNode = dtree->predict(test_sample, Mat(), false); 167 | 168 | printf("Testing Sample %i -> class result (digit %d)\n", tsample, (int) (resultNode->value)); 169 | 170 | // if the prediction and the (true) testing classification are the same 171 | // (N.B. openCV uses a floating point decision tree implementation!) 172 | 173 | if (fabs(resultNode->value - testing_classifications.at(tsample, 0)) 174 | >= FLT_EPSILON) 175 | 176 | { 177 | // if they differ more than floating point error => wrong class 178 | 179 | wrong_class++; 180 | 181 | false_positives[(int) resultNode->value]++; 182 | 183 | } 184 | else 185 | { 186 | 187 | // otherwise correct 188 | 189 | correct_class++; 190 | } 191 | } 192 | 193 | printf( "\nResults on the testing database: %s\n" 194 | "\tCorrect classification: %d (%g%%)\n" 195 | "\tWrong classifications: %d (%g%%)\n", 196 | argv[2], 197 | correct_class, (double) correct_class*100/NUMBER_OF_TESTING_SAMPLES, 198 | wrong_class, (double) wrong_class*100/NUMBER_OF_TESTING_SAMPLES); 199 | 200 | for (int i = 0; i < NUMBER_OF_CLASSES; i++) 201 | { 202 | printf( "\tClass (digit %d) false postives %d (%g%%)\n", i, 203 | false_positives[i], 204 | (double) false_positives[i]*100/NUMBER_OF_TESTING_SAMPLES); 205 | } 206 | 207 | // all matrix memory free by destructors 208 | 209 | // all OK : main returns 0 210 | 211 | return 0; 212 | } 213 | 214 | // not OK : main returns -1 215 | 216 | return -1; 217 | } 218 | /******************************************************************************/ 219 | -------------------------------------------------------------------------------- /opticaldigits_ex/extremerandomforest.cpp: -------------------------------------------------------------------------------- 1 | // Example : extremely random forest (tree) learning 2 | // usage: prog training_data_file testing_data_file 3 | 4 | // For use with test / training datasets : opticaldigits_ex 5 | 6 | // Author : Toby Breckon, toby.breckon@cranfield.ac.uk 7 | 8 | // Copyright (c) 2012 School of Engineering, Cranfield University 9 | // License : LGPL - http://www.gnu.org/licenses/lgpl.html 10 | 11 | #include // opencv general include file 12 | #include // opencv machine learning include file 13 | 14 | using namespace cv; // OpenCV API is in the C++ "cv" namespace 15 | 16 | #include 17 | 18 | /******************************************************************************/ 19 | // global definitions (for speed and ease of use) 20 | 21 | #define NUMBER_OF_TRAINING_SAMPLES 3823 22 | #define ATTRIBUTES_PER_SAMPLE 64 23 | #define NUMBER_OF_TESTING_SAMPLES 1797 24 | 25 | #define NUMBER_OF_CLASSES 10 26 | 27 | // N.B. classes are integer handwritten digits in range 0-9 28 | 29 | /******************************************************************************/ 30 | 31 | // loads the sample database from file (which is a CSV text file) 32 | 33 | int read_data_from_csv(const char* filename, Mat data, Mat classes, 34 | int n_samples ) 35 | { 36 | float tmp; 37 | 38 | // if we can't read the input file then return 0 39 | FILE* f = fopen( filename, "r" ); 40 | if( !f ) 41 | { 42 | printf("ERROR: cannot read file %s\n", filename); 43 | return 0; // all not OK 44 | } 45 | 46 | // for each sample in the file 47 | 48 | for(int line = 0; line < n_samples; line++) 49 | { 50 | 51 | // for each attribute on the line in the file 52 | 53 | for(int attribute = 0; attribute < (ATTRIBUTES_PER_SAMPLE + 1); attribute++) 54 | { 55 | if (attribute < 64) 56 | { 57 | 58 | // first 64 elements (0-63) in each line are the attributes 59 | 60 | fscanf(f, "%f,", &tmp); 61 | data.at(line, attribute) = tmp; 62 | // printf("%f,", data.at(line, attribute)); 63 | 64 | } 65 | else if (attribute == 64) 66 | { 67 | 68 | // attribute 65 is the class label {0 ... 9} 69 | 70 | fscanf(f, "%f,", &tmp); 71 | classes.at(line, 0) = tmp; 72 | // printf("%f\n", classes.at(line, 0)); 73 | 74 | } 75 | } 76 | } 77 | 78 | fclose(f); 79 | 80 | return 1; // all OK 81 | } 82 | 83 | /******************************************************************************/ 84 | 85 | int main( int argc, char** argv ) 86 | { 87 | // lets just check the version first 88 | 89 | printf ("OpenCV version %s (%d.%d.%d)\n", 90 | CV_VERSION, 91 | CV_MAJOR_VERSION, CV_MINOR_VERSION, CV_SUBMINOR_VERSION); 92 | 93 | // define training data storage matrices (one for attribute examples, one 94 | // for classifications) 95 | 96 | Mat training_data = Mat(NUMBER_OF_TRAINING_SAMPLES, ATTRIBUTES_PER_SAMPLE, CV_32FC1); 97 | Mat training_classifications = Mat(NUMBER_OF_TRAINING_SAMPLES, 1, CV_32FC1); 98 | 99 | //define testing data storage matrices 100 | 101 | Mat testing_data = Mat(NUMBER_OF_TESTING_SAMPLES, ATTRIBUTES_PER_SAMPLE, CV_32FC1); 102 | Mat testing_classifications = Mat(NUMBER_OF_TESTING_SAMPLES, 1, CV_32FC1); 103 | 104 | // define all the attributes as numerical 105 | // alternatives are CV_VAR_CATEGORICAL or CV_VAR_ORDERED(=CV_VAR_NUMERICAL) 106 | // that can be assigned on a per attribute basis 107 | 108 | Mat var_type = Mat(ATTRIBUTES_PER_SAMPLE + 1, 1, CV_8U ); 109 | var_type.setTo(Scalar(CV_VAR_NUMERICAL) ); // all inputs are numerical 110 | 111 | // this is a classification problem (i.e. predict a discrete number of class 112 | // outputs) so reset the last (+1) output var_type element to CV_VAR_CATEGORICAL 113 | 114 | var_type.at(ATTRIBUTES_PER_SAMPLE, 0) = CV_VAR_CATEGORICAL; 115 | 116 | double result; // value returned from a prediction 117 | 118 | // load training and testing data sets 119 | 120 | if (read_data_from_csv(argv[1], training_data, training_classifications, NUMBER_OF_TRAINING_SAMPLES) && 121 | read_data_from_csv(argv[2], testing_data, testing_classifications, NUMBER_OF_TESTING_SAMPLES)) 122 | { 123 | // define the parameters for training the random forest (trees) 124 | 125 | float priors[] = {1,1,1,1,1,1,1,1,1,1}; // weights of each classification for classes 126 | // (all equal as equal samples of each digit) 127 | 128 | CvRTParams params = CvRTParams(25, // max depth 129 | 5, // min sample count 130 | 0, // regression accuracy: N/A here 131 | false, // compute surrogate split, no missing data 132 | 15, // max number of categories (use sub-optimal algorithm for larger numbers) 133 | priors, // the array of priors 134 | false, // calculate variable importance 135 | 4, // number of variables randomly selected at node and used to find the best split(s). 136 | 100, // max number of trees in the forest 137 | 0.01f, // forrest accuracy 138 | CV_TERMCRIT_ITER | CV_TERMCRIT_EPS // termination cirteria 139 | ); 140 | 141 | // train extreme random forest classifier (using training data) 142 | 143 | printf( "\nUsing training database: %s\n\n", argv[1]); 144 | CvERTrees* rtree = new CvERTrees; 145 | 146 | rtree->train(training_data, CV_ROW_SAMPLE, training_classifications, 147 | Mat(), Mat(), var_type, Mat(), params); 148 | 149 | // perform classifier testing and report results 150 | 151 | Mat test_sample; 152 | int correct_class = 0; 153 | int wrong_class = 0; 154 | int false_positives [NUMBER_OF_CLASSES] = {0,0,0,0,0,0,0,0,0,0}; 155 | 156 | printf( "\nUsing testing database: %s\n\n", argv[2]); 157 | 158 | for (int tsample = 0; tsample < NUMBER_OF_TESTING_SAMPLES; tsample++) 159 | { 160 | 161 | // extract a row from the testing matrix 162 | 163 | test_sample = testing_data.row(tsample); 164 | 165 | // run random forest prediction 166 | 167 | result = rtree->predict(test_sample, Mat()); 168 | 169 | printf("Testing Sample %i -> class result (digit %d)\n", tsample, (int) result); 170 | 171 | // if the prediction and the (true) testing classification are the same 172 | // (N.B. openCV uses a floating point decision tree implementation!) 173 | 174 | if (fabs(result - testing_classifications.at(tsample, 0)) 175 | >= FLT_EPSILON) 176 | { 177 | // if they differ more than floating point error => wrong class 178 | 179 | wrong_class++; 180 | 181 | false_positives[(int) result]++; 182 | 183 | } 184 | else 185 | { 186 | 187 | // otherwise correct 188 | 189 | correct_class++; 190 | } 191 | } 192 | 193 | printf( "\nResults on the testing database: %s\n" 194 | "\tCorrect classification: %d (%g%%)\n" 195 | "\tWrong classifications: %d (%g%%)\n", 196 | argv[2], 197 | correct_class, (double) correct_class*100/NUMBER_OF_TESTING_SAMPLES, 198 | wrong_class, (double) wrong_class*100/NUMBER_OF_TESTING_SAMPLES); 199 | 200 | for (int i = 0; i < NUMBER_OF_CLASSES; i++) 201 | { 202 | printf( "\tClass (digit %d) false postives %d (%g%%)\n", i, 203 | false_positives[i], 204 | (double) false_positives[i]*100/NUMBER_OF_TESTING_SAMPLES); 205 | } 206 | 207 | 208 | // all matrix memory free by destructors 209 | 210 | 211 | // all OK : main returns 0 212 | 213 | return 0; 214 | } 215 | 216 | // not OK : main returns -1 217 | 218 | return -1; 219 | } 220 | /******************************************************************************/ 221 | -------------------------------------------------------------------------------- /opticaldigits_ex/knn.cpp: -------------------------------------------------------------------------------- 1 | // Example : weighted knn digit classification 2 | // usage: prog training_data_file testing_data_file 3 | 4 | // For use with test / training datasets : opticaldigits_ex 5 | 6 | // Copyright (c) 2013 Toby Breckon, toby.breckon@durham.ac.uk 7 | // School of Engineering and Computing Sciences, Durham University 8 | // License : LGPL - http://www.gnu.org/licenses/lgpl.html 9 | 10 | #include "opencv2/core/core_c.h" 11 | #include "opencv2/ml/ml.hpp" 12 | using namespace cv; // OpenCV API is in the C++ "cv" namespace 13 | 14 | #include 15 | using namespace std; 16 | 17 | /******************************************************************************/ 18 | // global definitions 19 | 20 | #define NUMBER_OF_TRAINING_SAMPLES 3823 21 | #define ATTRIBUTES_PER_SAMPLE 64 22 | #define NUMBER_OF_TESTING_SAMPLES 1797 23 | 24 | #define NUMBER_OF_CLASSES 10 // digits 0->9 25 | 26 | // "self load" data from CSV file in Mat() objects 27 | // filename = file to load 28 | // data = training or testing attributes (1 sample per row) 29 | // responses = training or testing classes (1 sample per row) 30 | // n_samples = number of samples in the set 31 | 32 | int read_data_from_csv(const char* filename, Mat &data, Mat &responses, int n_samples ); 33 | 34 | /******************************************************************************/ 35 | 36 | int main( int argc, char** argv ) 37 | { 38 | // define data set objects 39 | 40 | Mat training_data; 41 | Mat training_responses; 42 | 43 | Mat testing_data; 44 | Mat testing_responses; 45 | 46 | // load training and testing data sets (either from command line or *.{test|train} files 47 | 48 | if (((argc > 1) && (!(read_data_from_csv(argv[1], 49 | training_data, training_responses, NUMBER_OF_TRAINING_SAMPLES)) 50 | && !(read_data_from_csv(argv[2], 51 | testing_data, testing_responses, NUMBER_OF_TESTING_SAMPLES)))) 52 | || (!(read_data_from_csv("optdigits.train", 53 | training_data, training_responses, NUMBER_OF_TRAINING_SAMPLES)) 54 | && !(read_data_from_csv("optdigits.test", 55 | testing_data, testing_responses, NUMBER_OF_TESTING_SAMPLES))) 56 | ) 57 | { 58 | 59 | CvKNearest knn; // knn classifier object 60 | 61 | // train kNN classifier (using training data) 62 | 63 | knn.train(training_data, training_responses, Mat(), false, 32, false); 64 | 65 | // perform classifier testing and report results 66 | 67 | Mat test_sample; 68 | int correct_class = 0; 69 | int wrong_class = 0; 70 | Mat false_positives = Mat::zeros(NUMBER_OF_CLASSES, 1, CV_32S); 71 | float result; 72 | 73 | // for each test example i the test set 74 | 75 | for (int tsample = 0; tsample < testing_data.rows; tsample++) 76 | { 77 | 78 | // extract a row from the testing matrix 79 | 80 | test_sample = testing_data.row(tsample); 81 | 82 | // run kNN classificaation (for k = 7) 83 | 84 | result = knn.find_nearest(test_sample, 7); 85 | 86 | printf("Test Example %i -> class result (digit %i)\n", 87 | tsample, ((int) result)); 88 | 89 | // if the prediction and the (true) testing classification are the same 90 | // (within the bounds of floating point error for cross-platfom safety) 91 | 92 | if (fabs(result - testing_responses.at(tsample, 0)) 93 | >= FLT_EPSILON) 94 | { 95 | // if they differ more than floating point error => wrong class 96 | 97 | wrong_class++; 98 | false_positives.at((int) result, 0)++; 99 | 100 | } else { 101 | 102 | // otherwise correct 103 | 104 | correct_class++; 105 | } 106 | } 107 | 108 | printf( "\nResults on the testing database: %s\n" 109 | "\tCorrect classification: %d (%g%%)\n" 110 | "\tWrong classification: %d (%g%%)\n", 111 | (argc > 1) ? argv[2] : "optdigits.test", 112 | correct_class, (double) correct_class*100/testing_data.rows, 113 | wrong_class, (double) wrong_class*100/testing_data.rows); 114 | 115 | for (unsigned int c = 0; c < NUMBER_OF_CLASSES; c++) 116 | { 117 | printf( "\tClass (digit %i) false positives %d (%g%%)\n", c, 118 | false_positives.at(c,0), 119 | (((double) false_positives.at(c,0))*100) 120 | /testing_data.rows); 121 | } 122 | 123 | // on MS Windows wait to exit prompt 124 | #ifdef WIN32 125 | getchar(); 126 | #endif // WIN32 127 | 128 | // all OK : main returns 0 129 | 130 | return 0; 131 | } 132 | 133 | // not OK : main returns -1 134 | 135 | printf("usage: %s filename.train filename.test\n", argv[0]); 136 | printf("Failed to load training and testing data from specified files\n"); 137 | return -1; 138 | } 139 | /******************************************************************************/ 140 | 141 | // loads the sample database from file (which is a CSV text file) 142 | 143 | int read_data_from_csv(const char* filename, Mat &data, Mat &responses, int n_samples ) 144 | { 145 | data = Mat(n_samples, ATTRIBUTES_PER_SAMPLE, CV_32FC1); 146 | responses = Mat(n_samples, 1, CV_32FC1); 147 | 148 | float tmp; 149 | 150 | // if we can't read the input file then return 0 151 | FILE* f = fopen( filename, "r" ); 152 | if( !f ) 153 | { 154 | printf("ERROR: cannot read file %s\n", filename); 155 | return 1; // all not OK 156 | } 157 | 158 | // for each sample in the file 159 | 160 | for(int line = 0; line < n_samples; line++) 161 | { 162 | 163 | // for each attribute on the line in the file 164 | 165 | for(int attribute = 0; attribute < (ATTRIBUTES_PER_SAMPLE + 1); attribute++) 166 | { 167 | if (attribute < ATTRIBUTES_PER_SAMPLE) 168 | { 169 | 170 | // first 64 elements (0-63) in each line are the attributes 171 | 172 | fscanf(f, "%f,", &tmp); 173 | data.at(line, attribute) = tmp; 174 | // printf("%f,", data.at(line, attribute)); 175 | 176 | } 177 | else if (attribute == ATTRIBUTES_PER_SAMPLE) 178 | { 179 | 180 | // attribute 65 is the class label {0 ... 9} 181 | 182 | fscanf(f, "%f,", &tmp); 183 | responses.at(line, 0) = tmp; 184 | // printf("%f\n", classes.at(line, 0)); 185 | 186 | } 187 | } 188 | } 189 | 190 | fclose(f); 191 | 192 | return 0; // all OK 193 | } 194 | 195 | /******************************************************************************/ 196 | -------------------------------------------------------------------------------- /opticaldigits_ex/knn_weighted.cpp: -------------------------------------------------------------------------------- 1 | // Example : weighted knn digit classification 2 | // usage: prog training_data_file testing_data_file 3 | 4 | // For use with test / training datasets : opticaldigits_ex 5 | 6 | // Copyright (c) 2013 Toby Breckon, toby.breckon@durham.ac.uk 7 | // School of Engineering and Computing Sciences, Durham University 8 | // License : LGPL - http://www.gnu.org/licenses/lgpl.html 9 | 10 | #include "opencv2/core/core_c.h" 11 | #include "opencv2/ml/ml.hpp" 12 | using namespace cv; // OpenCV API is in the C++ "cv" namespace 13 | 14 | #include 15 | using namespace std; 16 | 17 | /******************************************************************************/ 18 | // global definitions 19 | 20 | #define NUMBER_OF_CLASSES 10 // digits 0->9 21 | 22 | /******************************************************************************/ 23 | 24 | int main( int argc, char** argv ) 25 | { 26 | // define data loading objects 27 | 28 | CvMLData training_loader; 29 | CvMLData testing_loader; 30 | 31 | // load training and testing data sets (either from command line or *.{test|train} files 32 | 33 | if (((argc > 1) && (!(training_loader.read_csv(argv[1])) 34 | && !(testing_loader.read_csv(argv[2])))) 35 | || (!(training_loader.read_csv("optdigits.train")) 36 | && !(testing_loader.read_csv("optdigits.test"))) 37 | ) 38 | { 39 | 40 | CvKNearest knn; // knn classifier object 41 | 42 | // retrieve data from data loaders 43 | 44 | Mat training_data = 45 | (Mat(training_loader.get_values())).colRange(0,64); // 0->63 = attributes 46 | 47 | training_loader.set_response_idx(64); // 65th value is the classification 48 | Mat training_responses = training_loader.get_responses(); 49 | 50 | Mat testing_data = 51 | (Mat((testing_loader.get_values())).colRange(0,64)); // 0->63 = attributes 52 | 53 | testing_loader.set_response_idx(64); // 65th value is the classification 54 | Mat testing_responses = testing_loader.get_responses(); 55 | 56 | // train kNN classifier (using training data) 57 | 58 | knn.train(training_data, training_responses, Mat(), false, 32, false); 59 | 60 | // perform classifier testing and report results 61 | 62 | Mat test_sample; 63 | int correct_class = 0; 64 | int wrong_class = 0; 65 | Mat false_positives = Mat::zeros(NUMBER_OF_CLASSES, 1, CV_32S); 66 | Mat neighbourResponses, dists, results, weighted_results; 67 | double minVal, maxVal; // dummy variables for using minMaxLoc() 68 | Point result_class_location; 69 | int result_class; // resulting class with highest weighted knn score 70 | 71 | // for each test example i the test set 72 | 73 | for (int tsample = 0; tsample < testing_data.rows; tsample++) 74 | { 75 | 76 | // extract a row from the testing matrix 77 | 78 | test_sample = testing_data.row(tsample); 79 | 80 | // zero weighted results on each test iteration 81 | 82 | weighted_results = Mat::zeros(NUMBER_OF_CLASSES, 1, CV_32F); 83 | 84 | // run kNN classification (for k = 7) 85 | 86 | knn.find_nearest(test_sample, 7, results, neighbourResponses, dists); 87 | 88 | // perform weighted sum for all the classes that occur in the responses 89 | // from the k nearest neighbours based on distance from query sample 90 | 91 | for(int i=0; i < neighbourResponses.cols; i++) 92 | { 93 | weighted_results.at((int) neighbourResponses.at(0,i), 0) += 1.0 / pow((dists.at(0,i)),2.0); 94 | } 95 | 96 | // find the class with the maximum weighted sum (as the maximal y co-ordinate 97 | // of the resulting weighted_results matrix 98 | 99 | minMaxLoc(weighted_results, &minVal, &maxVal, 0, &result_class_location); 100 | result_class = result_class_location.y; // resulting class is in col location 101 | 102 | printf("Test Example %i -> class result (digit %i)\n", 103 | tsample, ((int) result_class)); 104 | 105 | // if the prediction and the (true) testing classification are the same 106 | // (within the bounds of floating point error for cross-platfom safety) 107 | 108 | if (fabs(((float) result_class) - testing_responses.at(tsample, 0)) 109 | >= FLT_EPSILON) 110 | { 111 | // if they differ more than floating point error => wrong class 112 | 113 | wrong_class++; 114 | false_positives.at(result_class, 0)++; 115 | 116 | } else { 117 | 118 | // otherwise correct 119 | 120 | correct_class++; 121 | } 122 | } 123 | 124 | printf( "\nResults on the testing database: %s\n" 125 | "\tCorrect classification: %d (%g%%)\n" 126 | "\tWrong classifications: %d (%g%%)\n", 127 | (argc > 1) ? argv[2] : "optdigits.test", 128 | correct_class, (double) correct_class*100/testing_data.rows, 129 | wrong_class, (double) wrong_class*100/testing_data.rows); 130 | 131 | for (unsigned int c = 0; c < NUMBER_OF_CLASSES; c++) 132 | { 133 | printf( "\tClass (digit %i) false positives %d (%g%%)\n", c, 134 | false_positives.at(c,0), 135 | (((double) false_positives.at(c,0))*100) 136 | /testing_data.rows); 137 | } 138 | 139 | // on MS Windows wait to exit prompt 140 | #ifdef WIN32 141 | getchar(); 142 | #endif // WIN32 143 | 144 | // all OK : main returns 0 145 | 146 | return 0; 147 | } 148 | 149 | // not OK : main returns -1 150 | 151 | printf("usage: %s filename.train filename.test\n", argv[0]); 152 | printf("Failed to load training and testing data from specified files\n"); 153 | return -1; 154 | } 155 | /******************************************************************************/ 156 | -------------------------------------------------------------------------------- /opticaldigits_ex/neuralnetwork.cpp: -------------------------------------------------------------------------------- 1 | // Example : neural network learning 2 | // usage: prog training_data_file testing_data_file 3 | 4 | // For use with test / training datasets : optical_ex 5 | 6 | // Author : Toby Breckon, toby.breckon@cranfield.ac.uk 7 | 8 | // Copyright (c) 2010 School of Engineering, Cranfield University 9 | // License : LGPL - http://www.gnu.org/licenses/lgpl.html 10 | 11 | #include // opencv general include file 12 | #include // opencv machine learning include file 13 | 14 | using namespace cv; // OpenCV API is in the C++ "cv" namespace 15 | 16 | #include 17 | 18 | /******************************************************************************/ 19 | 20 | // global definitions (for speed and ease of use) 21 | 22 | #define NUMBER_OF_TRAINING_SAMPLES 3823 23 | #define ATTRIBUTES_PER_SAMPLE 64 24 | #define NUMBER_OF_TESTING_SAMPLES 1797 25 | 26 | #define NUMBER_OF_CLASSES 10 27 | 28 | // N.B. classes are integer handwritten digits in range 0-9 29 | 30 | /******************************************************************************/ 31 | 32 | // loads the sample database from file (which is a CSV text file) 33 | 34 | int read_data_from_csv(const char* filename, Mat data, Mat classes, 35 | int n_samples ) 36 | { 37 | float tmp; 38 | 39 | // if we can't read the input file then return 0 40 | FILE* f = fopen( filename, "r" ); 41 | if( !f ) 42 | { 43 | printf("ERROR: cannot read file %s\n", filename); 44 | return 0; // all not OK 45 | } 46 | 47 | // for each sample in the file 48 | 49 | for(int line = 0; line < n_samples; line++) 50 | { 51 | 52 | // for each attribute on the line in the file 53 | 54 | for(int attribute = 0; attribute < (ATTRIBUTES_PER_SAMPLE + 1); attribute++) 55 | { 56 | if (attribute < 64) 57 | { 58 | 59 | // first 64 elements (0-63) in each line are the attributes 60 | 61 | fscanf(f, "%f,", &tmp); 62 | data.at(line, attribute) = tmp; 63 | // printf("%f,", data.at(line, attribute)); 64 | 65 | } 66 | else if (attribute == 64) 67 | { 68 | 69 | // attribute 65 is the class label {0 ... 9} 70 | 71 | fscanf(f, "%f,", &tmp); 72 | classes.at(line, (int) tmp) = 1.0; 73 | // printf("%f\n", classes.at(line, 0)); 74 | 75 | } 76 | } 77 | } 78 | 79 | fclose(f); 80 | 81 | return 1; // all OK 82 | } 83 | 84 | /******************************************************************************/ 85 | 86 | int main( int argc, char** argv ) 87 | { 88 | // lets just check the version first 89 | 90 | printf ("OpenCV version %s (%d.%d.%d)\n", 91 | CV_VERSION, 92 | CV_MAJOR_VERSION, CV_MINOR_VERSION, CV_SUBMINOR_VERSION); 93 | 94 | // define training data storage matrices (one for attribute examples, one 95 | // for classifications) 96 | 97 | Mat training_data = Mat(NUMBER_OF_TRAINING_SAMPLES, ATTRIBUTES_PER_SAMPLE, CV_32FC1); 98 | Mat training_classifications = Mat(NUMBER_OF_TRAINING_SAMPLES, NUMBER_OF_CLASSES, CV_32FC1); 99 | 100 | // define testing data storage matrices 101 | 102 | Mat testing_data = Mat(NUMBER_OF_TESTING_SAMPLES, ATTRIBUTES_PER_SAMPLE, CV_32FC1); 103 | Mat testing_classifications = Mat::zeros(NUMBER_OF_TESTING_SAMPLES, NUMBER_OF_CLASSES, CV_32FC1); 104 | 105 | // define classification output vector 106 | 107 | Mat classificationResult = Mat(1, NUMBER_OF_CLASSES, CV_32FC1); 108 | Point max_loc = Point(0,0); 109 | 110 | // load training and testing data sets 111 | 112 | if (read_data_from_csv(argv[1], training_data, training_classifications, NUMBER_OF_TRAINING_SAMPLES) && 113 | read_data_from_csv(argv[2], testing_data, testing_classifications, NUMBER_OF_TESTING_SAMPLES)) 114 | { 115 | // define the parameters for the neural network (MLP) 116 | 117 | // set the network to be 3 layer 64->10->10 118 | // - one input node per attribute in a sample 119 | // - 10 hidden nodes 120 | // - one output node per class 121 | 122 | // note that the OpenCV neural network (MLP) implementation does not 123 | // support categorical variables explicitly. 124 | // So, instead of the output class label, we will use 125 | // a binary vector of {0,0 ... 1,0,0} components (one element by class) 126 | // for training and therefore, MLP will give us a vector of "probabilities" 127 | // at the prediction stage - the highest probability can be accepted 128 | // as the "winning" class label output by the network 129 | 130 | int layers_d[] = { ATTRIBUTES_PER_SAMPLE, 10, NUMBER_OF_CLASSES}; 131 | Mat layers = Mat(1,3,CV_32SC1); 132 | layers.at(0,0) = layers_d[0]; 133 | layers.at(0,1) = layers_d[1]; 134 | layers.at(0,2) = layers_d[2]; 135 | 136 | // create the network using a sigmoid function with alpha and beta 137 | // parameters 0.6 and 1 specified respectively (refer to manual) 138 | 139 | CvANN_MLP* nnetwork = new CvANN_MLP; 140 | nnetwork->create(layers, CvANN_MLP::SIGMOID_SYM, 0.6, 1); 141 | 142 | // set the training parameters 143 | 144 | CvANN_MLP_TrainParams params = CvANN_MLP_TrainParams( 145 | 146 | // terminate the training after either 1000 147 | // iterations or a very small change in the 148 | // network wieghts below the specified value 149 | 150 | cvTermCriteria(CV_TERMCRIT_ITER+CV_TERMCRIT_EPS, 10000, 0.000001), 151 | 152 | // use backpropogation for training 153 | 154 | CvANN_MLP_TrainParams::BACKPROP, 155 | 156 | // co-efficents for backpropogation training 157 | // (refer to manual) 158 | 159 | 0.1, 160 | 0.1); 161 | 162 | // train the neural network (using training data) 163 | 164 | printf( "\nUsing training database: %s\n", argv[1]); 165 | 166 | int iterations = nnetwork->train(training_data, training_classifications, Mat(), Mat(), params); 167 | 168 | printf( "Training iterations: %i\n\n", iterations); 169 | 170 | // perform classifier testing and report results 171 | 172 | Mat test_sample; 173 | int correct_class = 0; 174 | int wrong_class = 0; 175 | int false_positives [NUMBER_OF_CLASSES] = {0,0,0,0,0,0,0,0,0,0}; 176 | 177 | printf( "\nUsing testing database: %s\n\n", argv[2]); 178 | 179 | for (int tsample = 0; tsample < NUMBER_OF_TESTING_SAMPLES; tsample++) 180 | { 181 | 182 | // extract a row from the testing matrix 183 | 184 | test_sample = testing_data.row(tsample); 185 | 186 | // run neural network prediction 187 | 188 | nnetwork->predict(test_sample, classificationResult); 189 | 190 | // The NN gives out a vector of probabilities for each class 191 | // We take the class with the highest "probability" 192 | // for simplicity (but we really should also check separation 193 | // of the different "probabilities" in this vector - what if 194 | // two classes have very similar values ?) 195 | 196 | minMaxLoc(classificationResult, 0, 0, 0, &max_loc); 197 | 198 | printf("Testing Sample %i -> class result (digit %d)\n", tsample, max_loc.x); 199 | 200 | // if the corresponding location in the testing classifications 201 | // is not "1" (i.e. this is the correct class) then record this 202 | 203 | if (!(testing_classifications.at(tsample, max_loc.x))) 204 | { 205 | // if they differ more than floating point error => wrong class 206 | 207 | wrong_class++; 208 | 209 | false_positives[(int) max_loc.x]++; 210 | 211 | } 212 | else 213 | { 214 | 215 | // otherwise correct 216 | 217 | correct_class++; 218 | } 219 | } 220 | 221 | printf( "\nResults on the testing database: %s\n" 222 | "\tCorrect classification: %d (%g%%)\n" 223 | "\tWrong classifications: %d (%g%%)\n", 224 | argv[2], 225 | correct_class, (double) correct_class*100/NUMBER_OF_TESTING_SAMPLES, 226 | wrong_class, (double) wrong_class*100/NUMBER_OF_TESTING_SAMPLES); 227 | 228 | for (int i = 0; i < NUMBER_OF_CLASSES; i++) 229 | { 230 | printf( "\tClass (digit %d) false postives %d (%g%%)\n", i, 231 | false_positives[i], 232 | (double) false_positives[i]*100/NUMBER_OF_TESTING_SAMPLES); 233 | } 234 | 235 | // all OK : main returns 0 236 | 237 | return 0; 238 | } 239 | 240 | // not OK : main returns -1 241 | 242 | return -1; 243 | } 244 | /******************************************************************************/ 245 | -------------------------------------------------------------------------------- /opticaldigits_ex/normalbayes.cpp: -------------------------------------------------------------------------------- 1 | // Example : normal / naive bayesian learning 2 | // usage: prog training_data_file testing_data_file 3 | 4 | // For use with test / training datasets opticaldigits_ex 5 | 6 | // N.B. *** This bayesian Fifier assumes that the attribute (or feature) 7 | // vectors for each class are normally distributed and independent *** 8 | // - see OpenCV manual 9 | 10 | // "It’s "naïve" because it assumes that all the features (attributes) are 11 | // independent from one another even though this is seldom the case 12 | // (e.g., finding one eye usually implies that another eye is lurking nearby). 13 | // Zhang discusses possible reasons for the sometimes surprisingly good 14 | // performance of this classifier [Zhang04]." - Learning OpenCV [Bradski 2009]. 15 | 16 | // Author : Toby Breckon, toby.breckon@cranfield.ac.uk 17 | 18 | // Copyright (c) 2013 School of Engineering, Cranfield University 19 | // License : LGPL - http://www.gnu.org/licenses/lgpl.html 20 | 21 | #include // opencv general include file 22 | #include // opencv machine learning include file 23 | 24 | using namespace cv; // OpenCV API is in the C++ "cv" namespace 25 | 26 | #include 27 | 28 | /******************************************************************************/ 29 | 30 | // global definitions (for speed and ease of use) 31 | 32 | #define NUMBER_OF_TRAINING_SAMPLES 3823 33 | #define ATTRIBUTES_PER_SAMPLE 64 34 | #define NUMBER_OF_TESTING_SAMPLES 1797 35 | 36 | #define NUMBER_OF_CLASSES 10 37 | 38 | // N.B. classes are integer handwritten digits in range 0-9 39 | 40 | /******************************************************************************/ 41 | 42 | // loads the sample database from file (which is a CSV text file) 43 | 44 | int read_data_from_csv(const char* filename, Mat data, Mat classes, 45 | int n_samples ) 46 | { 47 | float tmp; 48 | 49 | // if we can't read the input file then return 0 50 | FILE* f = fopen( filename, "r" ); 51 | if( !f ) 52 | { 53 | printf("ERROR: cannot read file %s\n", filename); 54 | return 0; // all not OK 55 | } 56 | 57 | // for each sample in the file 58 | 59 | for(int line = 0; line < n_samples; line++) 60 | { 61 | 62 | // for each attribute on the line in the file 63 | 64 | for(int attribute = 0; attribute < (ATTRIBUTES_PER_SAMPLE + 1); attribute++) 65 | { 66 | if (attribute < 64) 67 | { 68 | 69 | // first 64 elements (0-63) in each line are the attributes 70 | 71 | fscanf(f, "%f,", &tmp); 72 | data.at(line, attribute) = tmp; 73 | // printf("%f,", data.at(line, attribute)); 74 | 75 | } 76 | else if (attribute == 64) 77 | { 78 | 79 | // attribute 65 is the class label {0 ... 9} 80 | 81 | fscanf(f, "%f,", &tmp); 82 | classes.at(line, 0) = tmp; 83 | // printf("%f\n", classes.at(line, 0)); 84 | 85 | } 86 | } 87 | } 88 | 89 | fclose(f); 90 | 91 | return 1; // all OK 92 | } 93 | 94 | /******************************************************************************/ 95 | 96 | int main( int argc, char** argv ) 97 | { 98 | // lets just check the version first 99 | 100 | printf ("OpenCV version %s (%d.%d.%d)\n", 101 | CV_VERSION, 102 | CV_MAJOR_VERSION, CV_MINOR_VERSION, CV_SUBMINOR_VERSION); 103 | 104 | // define training data storage matrices (one for attribute examples, one 105 | // for classifications) 106 | 107 | Mat training_data = Mat(NUMBER_OF_TRAINING_SAMPLES, ATTRIBUTES_PER_SAMPLE, CV_32FC1); 108 | Mat training_classifications = Mat(NUMBER_OF_TRAINING_SAMPLES, 1, CV_32FC1); 109 | 110 | //define testing data storage matrices 111 | 112 | Mat testing_data = Mat(NUMBER_OF_TESTING_SAMPLES, ATTRIBUTES_PER_SAMPLE, CV_32FC1); 113 | Mat testing_classifications = Mat(NUMBER_OF_TESTING_SAMPLES, 1, CV_32FC1); 114 | 115 | 116 | // load training and testing data sets 117 | 118 | if (read_data_from_csv(argv[1], training_data, training_classifications, NUMBER_OF_TRAINING_SAMPLES) && 119 | read_data_from_csv(argv[2], testing_data, testing_classifications, NUMBER_OF_TESTING_SAMPLES)) 120 | { 121 | 122 | // train bayesian classifier (using training data) 123 | 124 | printf( "\nUsing training database: %s\n\n", argv[1]); 125 | CvNormalBayesClassifier *bayes = new CvNormalBayesClassifier; 126 | 127 | bayes->train(training_data, training_classifications, Mat(), Mat(), false); 128 | 129 | // perform classifier testing and report results 130 | 131 | Mat test_sample; 132 | int correct_class = 0; 133 | int wrong_class = 0; 134 | int false_positives [NUMBER_OF_CLASSES]; 135 | float result; 136 | 137 | // zero the false positive counters in a simple loop 138 | 139 | for (int i = 0; i < NUMBER_OF_CLASSES; i++) 140 | { 141 | false_positives[i] = 0; 142 | } 143 | 144 | printf( "\nUsing testing database: %s\n\n", argv[2]); 145 | 146 | for (int tsample = 0; tsample < NUMBER_OF_TESTING_SAMPLES; tsample++) 147 | { 148 | 149 | // extract a row from the testing matrix 150 | 151 | test_sample = testing_data.row(tsample); 152 | 153 | // run decision tree prediction 154 | 155 | result = bayes->predict(test_sample); 156 | 157 | printf("Testing Sample %i -> class result (character %i)\n", tsample, 158 | (int) result); 159 | 160 | // if the prediction and the (true) testing classification are the same 161 | // (N.B. openCV uses a floating point decision tree implementation!) 162 | 163 | if (fabs(result - testing_classifications.at(tsample, 0)) 164 | >= FLT_EPSILON) 165 | { 166 | // if they differ more than floating point error => wrong class 167 | 168 | wrong_class++; 169 | 170 | false_positives[((int) result)]++; 171 | 172 | } 173 | else 174 | { 175 | 176 | // otherwise correct 177 | 178 | correct_class++; 179 | } 180 | } 181 | printf( "\nResults on the testing database: %s\n" 182 | "\tCorrect classification: %d (%g%%)\n" 183 | "\tWrong classifications: %d (%g%%)\n", 184 | argv[2], 185 | correct_class, (double) correct_class*100/NUMBER_OF_TESTING_SAMPLES, 186 | wrong_class, (double) wrong_class*100/NUMBER_OF_TESTING_SAMPLES); 187 | 188 | for (int i = 0; i < NUMBER_OF_CLASSES; i++) 189 | { 190 | printf( "\tClass (digit %d) false postives %d (%g%%)\n", i, 191 | false_positives[i], 192 | (double) false_positives[i]*100/NUMBER_OF_TESTING_SAMPLES); 193 | } 194 | 195 | 196 | // all matrix memory free by destructors 197 | 198 | 199 | // all OK : main returns 0 200 | 201 | return 0; 202 | } 203 | 204 | // not OK : main returns -1 205 | 206 | return -1; 207 | } 208 | /******************************************************************************/ 209 | -------------------------------------------------------------------------------- /opticaldigits_ex/optdigits.names: -------------------------------------------------------------------------------- 1 | 2 | 1. Title of Database: Optical Recognition of Handwritten Digits 3 | 4 | 2. Source: 5 | E. Alpaydin, C. Kaynak 6 | Department of Computer Engineering 7 | Bogazici University, 80815 Istanbul Turkey 8 | alpaydin@boun.edu.tr 9 | July 1998 10 | 11 | 3. Past Usage: 12 | C. Kaynak (1995) Methods of Combining Multiple Classifiers and Their 13 | Applications to Handwritten Digit Recognition, 14 | MSc Thesis, Institute of Graduate Studies in Science and 15 | Engineering, Bogazici University. 16 | 17 | E. Alpaydin, C. Kaynak (1998) Cascading Classifiers, Kybernetika, 18 | to appear. ftp://ftp.icsi.berkeley.edu/pub/ai/ethem/kyb.ps.Z 19 | 20 | 4. Relevant Information: 21 | We used preprocessing programs made available by NIST to extract 22 | normalized bitmaps of handwritten digits from a preprinted form. From 23 | a total of 43 people, 30 contributed to the training set and different 24 | 13 to the test set. 32x32 bitmaps are divided into nonoverlapping 25 | blocks of 4x4 and the number of on pixels are counted in each block. 26 | This generates an input matrix of 8x8 where each element is an 27 | integer in the range 0..16. This reduces dimensionality and gives 28 | invariance to small distortions. 29 | 30 | For info on NIST preprocessing routines, see 31 | M. D. Garris, J. L. Blue, G. T. Candela, D. L. Dimmick, J. Geist, 32 | P. J. Grother, S. A. Janet, and C. L. Wilson, NIST Form-Based 33 | Handprint Recognition System, NISTIR 5469, 1994. 34 | 35 | 5. Number of Instances 36 | optdigits.tra Training 3823 37 | optdigits.tes Testing 1797 38 | 39 | The way we used the dataset was to use half of training for 40 | actual training, one-fourth for validation and one-fourth 41 | for writer-dependent testing. The test set was used for 42 | writer-independent testing and is the actual quality measure. 43 | 44 | 6. Number of Attributes 45 | 64 input+1 class attribute 46 | 47 | 7. For Each Attribute: 48 | All input attributes are integers in the range 0..16. 49 | The last attribute is the class code 0..9 50 | 51 | 8. Missing Attribute Values 52 | None 53 | 54 | 9. Class Distribution 55 | Class: No of examples in training set 56 | 0: 376 57 | 1: 389 58 | 2: 380 59 | 3: 389 60 | 4: 387 61 | 5: 376 62 | 6: 377 63 | 7: 387 64 | 8: 380 65 | 9: 382 66 | 67 | Class: No of examples in testing set 68 | 0: 178 69 | 1: 182 70 | 2: 177 71 | 3: 183 72 | 4: 181 73 | 5: 182 74 | 6: 181 75 | 7: 179 76 | 8: 174 77 | 9: 180 78 | 79 | Accuracy on the testing set with k-nn 80 | using Euclidean distance as the metric 81 | 82 | k = 1 : 98.00 83 | k = 2 : 97.38 84 | k = 3 : 97.83 85 | k = 4 : 97.61 86 | k = 5 : 97.89 87 | k = 6 : 97.77 88 | k = 7 : 97.66 89 | k = 8 : 97.66 90 | k = 9 : 97.72 91 | k = 10 : 97.55 92 | k = 11 : 97.89 93 | 94 | -------------------------------------------------------------------------------- /opticaldigits_ex/randomforest.cpp: -------------------------------------------------------------------------------- 1 | // Example : random forest (tree) learning 2 | // usage: prog training_data_file testing_data_file 3 | 4 | // For use with test / training datasets : opticaldigits_ex 5 | 6 | // Author : Toby Breckon, toby.breckon@cranfield.ac.uk 7 | 8 | // Copyright (c) 2011 School of Engineering, Cranfield University 9 | // License : LGPL - http://www.gnu.org/licenses/lgpl.html 10 | 11 | #include // opencv general include file 12 | #include // opencv machine learning include file 13 | 14 | using namespace cv; // OpenCV API is in the C++ "cv" namespace 15 | 16 | #include 17 | 18 | /******************************************************************************/ 19 | // global definitions (for speed and ease of use) 20 | 21 | #define NUMBER_OF_TRAINING_SAMPLES 3823 22 | #define ATTRIBUTES_PER_SAMPLE 64 23 | #define NUMBER_OF_TESTING_SAMPLES 1797 24 | 25 | #define NUMBER_OF_CLASSES 10 26 | 27 | // N.B. classes are integer handwritten digits in range 0-9 28 | 29 | /******************************************************************************/ 30 | 31 | // loads the sample database from file (which is a CSV text file) 32 | 33 | int read_data_from_csv(const char* filename, Mat data, Mat classes, 34 | int n_samples ) 35 | { 36 | float tmp; 37 | 38 | // if we can't read the input file then return 0 39 | FILE* f = fopen( filename, "r" ); 40 | if( !f ) 41 | { 42 | printf("ERROR: cannot read file %s\n", filename); 43 | return 0; // all not OK 44 | } 45 | 46 | // for each sample in the file 47 | 48 | for(int line = 0; line < n_samples; line++) 49 | { 50 | 51 | // for each attribute on the line in the file 52 | 53 | for(int attribute = 0; attribute < (ATTRIBUTES_PER_SAMPLE + 1); attribute++) 54 | { 55 | if (attribute < 64) 56 | { 57 | 58 | // first 64 elements (0-63) in each line are the attributes 59 | 60 | fscanf(f, "%f,", &tmp); 61 | data.at(line, attribute) = tmp; 62 | // printf("%f,", data.at(line, attribute)); 63 | 64 | } 65 | else if (attribute == 64) 66 | { 67 | 68 | // attribute 65 is the class label {0 ... 9} 69 | 70 | fscanf(f, "%f,", &tmp); 71 | classes.at(line, 0) = tmp; 72 | // printf("%f\n", classes.at(line, 0)); 73 | 74 | } 75 | } 76 | } 77 | 78 | fclose(f); 79 | 80 | return 1; // all OK 81 | } 82 | 83 | /******************************************************************************/ 84 | 85 | int main( int argc, char** argv ) 86 | { 87 | // lets just check the version first 88 | 89 | printf ("OpenCV version %s (%d.%d.%d)\n", 90 | CV_VERSION, 91 | CV_MAJOR_VERSION, CV_MINOR_VERSION, CV_SUBMINOR_VERSION); 92 | 93 | // define training data storage matrices (one for attribute examples, one 94 | // for classifications) 95 | 96 | Mat training_data = Mat(NUMBER_OF_TRAINING_SAMPLES, ATTRIBUTES_PER_SAMPLE, CV_32FC1); 97 | Mat training_classifications = Mat(NUMBER_OF_TRAINING_SAMPLES, 1, CV_32FC1); 98 | 99 | //define testing data storage matrices 100 | 101 | Mat testing_data = Mat(NUMBER_OF_TESTING_SAMPLES, ATTRIBUTES_PER_SAMPLE, CV_32FC1); 102 | Mat testing_classifications = Mat(NUMBER_OF_TESTING_SAMPLES, 1, CV_32FC1); 103 | 104 | // define all the attributes as numerical 105 | // alternatives are CV_VAR_CATEGORICAL or CV_VAR_ORDERED(=CV_VAR_NUMERICAL) 106 | // that can be assigned on a per attribute basis 107 | 108 | Mat var_type = Mat(ATTRIBUTES_PER_SAMPLE + 1, 1, CV_8U ); 109 | var_type.setTo(Scalar(CV_VAR_NUMERICAL) ); // all inputs are numerical 110 | 111 | // this is a classification problem (i.e. predict a discrete number of class 112 | // outputs) so reset the last (+1) output var_type element to CV_VAR_CATEGORICAL 113 | 114 | var_type.at(ATTRIBUTES_PER_SAMPLE, 0) = CV_VAR_CATEGORICAL; 115 | 116 | double result; // value returned from a prediction 117 | 118 | // load training and testing data sets 119 | 120 | if (read_data_from_csv(argv[1], training_data, training_classifications, NUMBER_OF_TRAINING_SAMPLES) && 121 | read_data_from_csv(argv[2], testing_data, testing_classifications, NUMBER_OF_TESTING_SAMPLES)) 122 | { 123 | // define the parameters for training the random forest (trees) 124 | 125 | float priors[] = {1,1,1,1,1,1,1,1,1,1}; // weights of each classification for classes 126 | // (all equal as equal samples of each digit) 127 | 128 | CvRTParams params = CvRTParams(25, // max depth 129 | 5, // min sample count 130 | 0, // regression accuracy: N/A here 131 | false, // compute surrogate split, no missing data 132 | 15, // max number of categories (use sub-optimal algorithm for larger numbers) 133 | priors, // the array of priors 134 | false, // calculate variable importance 135 | 4, // number of variables randomly selected at node and used to find the best split(s). 136 | 100, // max number of trees in the forest 137 | 0.01f, // forrest accuracy 138 | CV_TERMCRIT_ITER | CV_TERMCRIT_EPS // termination cirteria 139 | ); 140 | 141 | // train random forest classifier (using training data) 142 | 143 | printf( "\nUsing training database: %s\n\n", argv[1]); 144 | CvRTrees* rtree = new CvRTrees; 145 | 146 | rtree->train(training_data, CV_ROW_SAMPLE, training_classifications, 147 | Mat(), Mat(), var_type, Mat(), params); 148 | 149 | // perform classifier testing and report results 150 | 151 | Mat test_sample; 152 | int correct_class = 0; 153 | int wrong_class = 0; 154 | int false_positives [NUMBER_OF_CLASSES] = {0,0,0,0,0,0,0,0,0,0}; 155 | 156 | printf( "\nUsing testing database: %s\n\n", argv[2]); 157 | 158 | for (int tsample = 0; tsample < NUMBER_OF_TESTING_SAMPLES; tsample++) 159 | { 160 | 161 | // extract a row from the testing matrix 162 | 163 | test_sample = testing_data.row(tsample); 164 | 165 | // run random forest prediction 166 | 167 | result = rtree->predict(test_sample, Mat()); 168 | 169 | printf("Testing Sample %i -> class result (digit %d)\n", tsample, (int) result); 170 | 171 | // if the prediction and the (true) testing classification are the same 172 | // (N.B. openCV uses a floating point decision tree implementation!) 173 | 174 | if (fabs(result - testing_classifications.at(tsample, 0)) 175 | >= FLT_EPSILON) 176 | { 177 | // if they differ more than floating point error => wrong class 178 | 179 | wrong_class++; 180 | 181 | false_positives[(int) result]++; 182 | 183 | } 184 | else 185 | { 186 | 187 | // otherwise correct 188 | 189 | correct_class++; 190 | } 191 | } 192 | 193 | printf( "\nResults on the testing database: %s\n" 194 | "\tCorrect classification: %d (%g%%)\n" 195 | "\tWrong classifications: %d (%g%%)\n", 196 | argv[2], 197 | correct_class, (double) correct_class*100/NUMBER_OF_TESTING_SAMPLES, 198 | wrong_class, (double) wrong_class*100/NUMBER_OF_TESTING_SAMPLES); 199 | 200 | for (int i = 0; i < NUMBER_OF_CLASSES; i++) 201 | { 202 | printf( "\tClass (digit %d) false postives %d (%g%%)\n", i, 203 | false_positives[i], 204 | (double) false_positives[i]*100/NUMBER_OF_TESTING_SAMPLES); 205 | } 206 | 207 | 208 | // all matrix memory free by destructors 209 | 210 | 211 | // all OK : main returns 0 212 | 213 | return 0; 214 | } 215 | 216 | // not OK : main returns -1 217 | 218 | return -1; 219 | } 220 | /******************************************************************************/ 221 | -------------------------------------------------------------------------------- /opticaldigits_ex/svm.cpp: -------------------------------------------------------------------------------- 1 | // Example : Support Vector Machine (SVM) learning 2 | // usage: prog training_data_file testing_data_file 3 | 4 | // For use with test / training datasets : opticaldigits_ex 5 | 6 | // Author : Toby Breckon, toby.breckon@cranfield.ac.uk 7 | // Version : 0.2 8 | 9 | // Copyright (c) 2013 School of Engineering, Cranfield University 10 | // License : LGPL - http://www.gnu.org/licenses/lgpl.html 11 | 12 | #include // opencv general include file 13 | #include // opencv machine learning include file 14 | 15 | using namespace cv; // OpenCV API is in the C++ "cv" namespace 16 | 17 | #include 18 | 19 | /******************************************************************************/ 20 | 21 | // use SVM "grid search" for kernel parameters 22 | 23 | #define USE_OPENCV_GRID_SEARCH_AUTOTRAIN 1 // set to 0 to set SVM parameters manually 24 | 25 | /******************************************************************************/ 26 | // global definitions (for speed and ease of use) 27 | 28 | #define NUMBER_OF_TRAINING_SAMPLES 3823 29 | #define ATTRIBUTES_PER_SAMPLE 64 30 | #define NUMBER_OF_TESTING_SAMPLES 1797 31 | 32 | #define NUMBER_OF_CLASSES 10 33 | 34 | // N.B. classes are integer handwritten digits in range 0-9 35 | 36 | /******************************************************************************/ 37 | 38 | // loads the sample database from file (which is a CSV text file) 39 | 40 | int read_data_from_csv(const char* filename, Mat data, Mat classes, 41 | int n_samples ) 42 | { 43 | float tmp; 44 | 45 | // if we can't read the input file then return 0 46 | FILE* f = fopen( filename, "r" ); 47 | if( !f ) 48 | { 49 | printf("ERROR: cannot read file %s\n", filename); 50 | return 0; // all not OK 51 | } 52 | 53 | // for each sample in the file 54 | 55 | for(int line = 0; line < n_samples; line++) 56 | { 57 | 58 | // for each attribute on the line in the file 59 | 60 | for(int attribute = 0; attribute < (ATTRIBUTES_PER_SAMPLE + 1); attribute++) 61 | { 62 | if (attribute < 64) 63 | { 64 | 65 | // first 64 elements (0-63) in each line are the attributes 66 | 67 | fscanf(f, "%f,", &tmp); 68 | data.at(line, attribute) = tmp; 69 | // printf("%f,", data.at(line, attribute)); 70 | 71 | } 72 | else if (attribute == 64) 73 | { 74 | 75 | // attribute 65 is the class label {0 ... 9} 76 | 77 | fscanf(f, "%f,", &tmp); 78 | classes.at(line, 0) = tmp; 79 | // printf("%f\n", classes.at(line, 0)); 80 | 81 | } 82 | } 83 | } 84 | 85 | fclose(f); 86 | 87 | return 1; // all OK 88 | } 89 | 90 | /******************************************************************************/ 91 | 92 | int main( int argc, char** argv ) 93 | { 94 | // lets just check the version first 95 | 96 | printf ("OpenCV version %s (%d.%d.%d)\n", 97 | CV_VERSION, 98 | CV_MAJOR_VERSION, CV_MINOR_VERSION, CV_SUBMINOR_VERSION); 99 | 100 | // define training data storage matrices (one for attribute examples, one 101 | // for classifications) 102 | 103 | Mat training_data = Mat(NUMBER_OF_TRAINING_SAMPLES, ATTRIBUTES_PER_SAMPLE, CV_32FC1); 104 | Mat training_classifications = Mat(NUMBER_OF_TRAINING_SAMPLES, 1, CV_32FC1); 105 | 106 | //define testing data storage matrices 107 | 108 | Mat testing_data = Mat(NUMBER_OF_TESTING_SAMPLES, ATTRIBUTES_PER_SAMPLE, CV_32FC1); 109 | Mat testing_classifications = Mat(NUMBER_OF_TESTING_SAMPLES, 1, CV_32FC1); 110 | 111 | // load training and testing data sets 112 | 113 | if (read_data_from_csv(argv[1], training_data, training_classifications, NUMBER_OF_TRAINING_SAMPLES) && 114 | read_data_from_csv(argv[2], testing_data, testing_classifications, NUMBER_OF_TESTING_SAMPLES)) 115 | { 116 | // define the parameters for training the SVM (kernel + SVMtype type used for auto-training, 117 | // other parameters for manual only) 118 | 119 | CvSVMParams params = CvSVMParams( 120 | CvSVM::C_SVC, // Type of SVM, here N classes (see manual) 121 | CvSVM::LINEAR, // kernel type (see manual) 122 | 0.0, // kernel parameter (degree) for poly kernel only 123 | 0.0, // kernel parameter (gamma) for poly/rbf kernel only 124 | 0.0, // kernel parameter (coef0) for poly/sigmoid kernel only 125 | 10, // SVM optimization parameter C 126 | 0, // SVM optimization parameter nu (not used for N classe SVM) 127 | 0, // SVM optimization parameter p (not used for N classe SVM) 128 | NULL, // class wieghts (or priors) 129 | // Optional weights, assigned to particular classes. 130 | // They are multiplied by C and thus affect the misclassification 131 | // penalty for different classes. The larger weight, the larger penalty 132 | // on misclassification of data from the corresponding class. 133 | 134 | // termination criteria for learning algorithm 135 | 136 | cvTermCriteria(CV_TERMCRIT_ITER+CV_TERMCRIT_EPS, 1000, 0.000001) 137 | 138 | ); 139 | 140 | // train SVM classifier (using training data) 141 | 142 | printf( "\nUsing training database: %s\n\n", argv[1]); 143 | CvSVM* svm = new CvSVM; 144 | 145 | #if (USE_OPENCV_GRID_SEARCH_AUTOTRAIN) 146 | 147 | // train using auto training parameter grid search if it is available 148 | // N.B. this does not search kernel choice 149 | 150 | svm->train_auto(training_data, training_classifications, Mat(), Mat(), params, 10); 151 | params = svm->get_params(); 152 | printf( "\nUsing optimal parameters degree %f, gamma %f, ceof0 %f\n\t C %f, nu %f, p %f\n", 153 | params.degree, params.gamma, params.coef0, params.C, params.nu, params.p); 154 | 155 | #else 156 | 157 | // otherwise use regular training and use parameters manually specified above 158 | 159 | svm->train(training_data, training_classifications, Mat(), Mat(), params); 160 | 161 | #endif 162 | 163 | // get the number of support vectors used to define the SVM decision boundary 164 | 165 | printf("Number of support vectors for trained SVM = %i\n", svm->get_support_vector_count()); 166 | 167 | // perform classifier testing and report results 168 | 169 | Mat test_sample; 170 | int correct_class = 0; 171 | int wrong_class = 0; 172 | int false_positives [NUMBER_OF_CLASSES] = {0,0,0,0,0,0,0,0,0,0}; 173 | float result; 174 | 175 | printf( "\nUsing testing database: %s\n\n", argv[2]); 176 | 177 | for (int tsample = 0; tsample < NUMBER_OF_TESTING_SAMPLES; tsample++) 178 | { 179 | 180 | // extract a row from the testing matrix 181 | 182 | test_sample = testing_data.row(tsample); 183 | 184 | // run SVM classifier 185 | 186 | result = svm->predict(test_sample); 187 | 188 | printf("Testing Sample %i -> class result (digit %d)\n", tsample, (int) result); 189 | 190 | // if the prediction and the (true) testing classification are the same 191 | // (N.B. openCV uses a floating point implementation!) 192 | 193 | if (fabs(result - testing_classifications.at(tsample, 0)) 194 | >= FLT_EPSILON) 195 | { 196 | // if they differ more than floating point error => wrong class 197 | 198 | wrong_class++; 199 | false_positives[(int) testing_classifications.at(tsample, 0)]++; 200 | 201 | } 202 | else 203 | { 204 | 205 | // otherwise correct 206 | 207 | correct_class++; 208 | } 209 | } 210 | 211 | printf( "\nResults on the testing database: %s\n" 212 | "\tCorrect classification: %d (%g%%)\n" 213 | "\tWrong classifications: %d (%g%%)\n", 214 | argv[2], 215 | correct_class, (double) correct_class*100/NUMBER_OF_TESTING_SAMPLES, 216 | wrong_class, (double) wrong_class*100/NUMBER_OF_TESTING_SAMPLES); 217 | 218 | for (int i = 0; i < NUMBER_OF_CLASSES; i++) 219 | { 220 | printf( "\tClass (digit %d) false postives %d (%g%%)\n", i, 221 | false_positives[i], 222 | (double) false_positives[i]*100/NUMBER_OF_TESTING_SAMPLES); 223 | } 224 | 225 | 226 | // all OK : main returns 0 227 | 228 | return 0; 229 | } 230 | 231 | // not OK : main returns -1 232 | 233 | return -1; 234 | } 235 | /******************************************************************************/ 236 | -------------------------------------------------------------------------------- /other_ex/normalbayes.cpp: -------------------------------------------------------------------------------- 1 | // Example : normal / naive bayesian learning 2 | // usage: prog training_data_file testing_data_file 3 | 4 | // For use with test / training datasets : other_ex/wdbc.{train|test} 5 | 6 | // N.B. *** This bayesian classifier assumes that the attribute (or feature) 7 | // vectors for each class are normally distributed and independent *** 8 | // - see OpenCV manual 9 | 10 | // "It’s "naïve" because it assumes that all the features (attributes) are 11 | // independent from one another even though this is seldom the case 12 | // (e.g., finding one eye usually implies that another eye is lurking nearby). 13 | // Zhang discusses possible reasons for the sometimes surprisingly good 14 | // performance of this classifier [Zhang04]." - Learning OpenCV [Bradski 2009]. 15 | 16 | // Author : Toby Breckon, toby.breckon@cranfield.ac.uk 17 | 18 | // Copyright (c) 2011 School of Engineering, Cranfield University 19 | // License : LGPL - http://www.gnu.org/licenses/lgpl.html 20 | 21 | #include // opencv general include file 22 | #include // opencv machine learning include file 23 | 24 | using namespace cv; // OpenCV API is in the C++ "cv" namespace 25 | 26 | #include 27 | 28 | /******************************************************************************/ 29 | // global definitions (for speed and ease of use) 30 | 31 | #define NUMBER_OF_TRAINING_SAMPLES 449 32 | #define ATTRIBUTES_PER_SAMPLE 30 // not the first two as patient ID and class 33 | #define NUMBER_OF_TESTING_SAMPLES 120 34 | 35 | #define NUMBER_OF_CLASSES 2 36 | 37 | static char CLASSES[2] = {'B', 'M'}; // class B = 0, class M = 1 38 | 39 | /******************************************************************************/ 40 | 41 | // loads the sample database from file (which is a CSV text file) 42 | 43 | int read_data_from_csv(const char* filename, Mat data, Mat classes, int n_samples ) 44 | { 45 | char tmpc; 46 | float tmpf; 47 | 48 | // if we can't read the input file then return 0 49 | FILE* f = fopen( filename, "r" ); 50 | if( !f ) 51 | { 52 | printf("ERROR: cannot read file %s\n", filename); 53 | return 0; // all not OK 54 | } 55 | 56 | // for each sample in the file 57 | 58 | for(int line = 0; line < n_samples; line++) 59 | { 60 | 61 | // for each attribute on the line in the file 62 | 63 | for(int attribute = 0; attribute < (ATTRIBUTES_PER_SAMPLE + 2); attribute++) 64 | { 65 | if (attribute == 0) 66 | { 67 | fscanf(f, "%f,", &tmpf); 68 | 69 | // ignore attribute 0 (as it's the patient ID) 70 | 71 | continue; 72 | } 73 | else if (attribute == 1) 74 | { 75 | 76 | // attribute 2 (in the database) is the classification 77 | // record 1 = M = malignant 78 | // record 0 = B = benign 79 | 80 | fscanf(f, "%c,", &tmpc); 81 | 82 | switch(tmpc) 83 | { 84 | case 'M': 85 | classes.at(line, 0) = 1.0; 86 | break; 87 | case 'B': 88 | classes.at(line, 0) = 0.0; 89 | break; 90 | default: 91 | printf("ERROR: unexpected class in file %s\n", filename); 92 | return 0; // all not OK 93 | } 94 | 95 | // printf("%c,", tmpc); 96 | } 97 | else 98 | { 99 | fscanf(f, "%f,", &tmpf); 100 | data.at(line, (attribute - 2)) = tmpf; 101 | //printf("%f,", tmpf); 102 | } 103 | } 104 | fscanf(f, "\n"); 105 | //printf("\n"); 106 | } 107 | 108 | fclose(f); 109 | 110 | return 1; // all OK 111 | } 112 | 113 | /******************************************************************************/ 114 | 115 | int main( int argc, char** argv ) 116 | { 117 | // lets just check the version first 118 | 119 | printf ("OpenCV version %s (%d.%d.%d)\n", 120 | CV_VERSION, 121 | CV_MAJOR_VERSION, CV_MINOR_VERSION, CV_SUBMINOR_VERSION); 122 | 123 | // define training data storage matrices (one for attribute examples, one 124 | // for classifications) 125 | 126 | Mat training_data = Mat(NUMBER_OF_TRAINING_SAMPLES, ATTRIBUTES_PER_SAMPLE, CV_32FC1); 127 | Mat training_classifications = Mat(NUMBER_OF_TRAINING_SAMPLES, 1, CV_32FC1); 128 | 129 | //define testing data storage matrices 130 | 131 | Mat testing_data = Mat(NUMBER_OF_TESTING_SAMPLES, ATTRIBUTES_PER_SAMPLE, CV_32FC1); 132 | Mat testing_classifications = Mat(NUMBER_OF_TESTING_SAMPLES, 1, CV_32FC1); 133 | 134 | 135 | // load training and testing data sets 136 | 137 | if (read_data_from_csv(argv[1], training_data, training_classifications, NUMBER_OF_TRAINING_SAMPLES) && 138 | read_data_from_csv(argv[2], testing_data, testing_classifications, NUMBER_OF_TESTING_SAMPLES)) 139 | { 140 | 141 | // train bayesian classifier (using training data) 142 | 143 | printf( "\nUsing training database: %s\n\n", argv[1]); 144 | CvNormalBayesClassifier *bayes = new CvNormalBayesClassifier; 145 | 146 | bayes->train(training_data, training_classifications, Mat(), Mat(), false); 147 | 148 | // perform classifier testing and report results 149 | 150 | Mat test_sample; 151 | int correct_class = 0; 152 | int wrong_class = 0; 153 | int false_positives [NUMBER_OF_CLASSES]; 154 | float result; 155 | 156 | // zero the false positive counters in a simple loop 157 | 158 | for (int i = 0; i < NUMBER_OF_CLASSES; i++) 159 | { 160 | false_positives[i] = 0; 161 | } 162 | 163 | printf( "\nUsing testing database: %s\n\n", argv[2]); 164 | 165 | for (int tsample = 0; tsample < NUMBER_OF_TESTING_SAMPLES; tsample++) 166 | { 167 | 168 | // extract a row from the testing matrix 169 | 170 | test_sample = testing_data.row(tsample); 171 | 172 | // run decision tree prediction 173 | 174 | result = bayes->predict(test_sample); 175 | 176 | printf("Testing Sample %i -> class result (character %c)\n", tsample, 177 | CLASSES[((int) result)]); 178 | 179 | // if the prediction and the (true) testing classification are the same 180 | // (N.B. openCV uses a floating point decision tree implementation!) 181 | 182 | if (fabs(result - testing_classifications.at(tsample, 0)) 183 | >= FLT_EPSILON) 184 | { 185 | // if they differ more than floating point error => wrong class 186 | 187 | wrong_class++; 188 | 189 | false_positives[((int) result)]++; 190 | 191 | } 192 | else 193 | { 194 | 195 | // otherwise correct 196 | 197 | correct_class++; 198 | } 199 | } 200 | 201 | printf( "\nResults on the testing database: %s\n" 202 | "\tCorrect classification: %d (%g%%)\n" 203 | "\tWrong classifications: %d (%g%%)\n", 204 | argv[2], 205 | correct_class, (double) correct_class*100/NUMBER_OF_TESTING_SAMPLES, 206 | wrong_class, (double) wrong_class*100/NUMBER_OF_TESTING_SAMPLES); 207 | 208 | for (int i = 0; i < NUMBER_OF_CLASSES; i++) 209 | { 210 | printf( "\tClass (character %c) false postives %d (%g%%)\n", CLASSES[i], 211 | false_positives[i], 212 | (double) false_positives[i]*100/NUMBER_OF_TESTING_SAMPLES); 213 | } 214 | 215 | // all matrix memory free by destructors 216 | 217 | 218 | // all OK : main returns 0 219 | 220 | return 0; 221 | } 222 | 223 | // not OK : main returns -1 224 | 225 | return -1; 226 | } 227 | /******************************************************************************/ 228 | -------------------------------------------------------------------------------- /other_ex/wdbc.names: -------------------------------------------------------------------------------- 1 | 1. Title: Wisconsin Diagnostic Breast Cancer (WDBC) 2 | 3 | 2. Source Information 4 | 5 | a) Creators: 6 | 7 | Dr. William H. Wolberg, General Surgery Dept., University of 8 | Wisconsin, Clinical Sciences Center, Madison, WI 53792 9 | wolberg@eagle.surgery.wisc.edu 10 | 11 | W. Nick Street, Computer Sciences Dept., University of 12 | Wisconsin, 1210 West Dayton St., Madison, WI 53706 13 | street@cs.wisc.edu 608-262-6619 14 | 15 | Olvi L. Mangasarian, Computer Sciences Dept., University of 16 | Wisconsin, 1210 West Dayton St., Madison, WI 53706 17 | olvi@cs.wisc.edu 18 | 19 | b) Donor: Nick Street 20 | 21 | c) Date: November 1995 22 | 23 | 3. Past Usage: 24 | 25 | first usage: 26 | 27 | W.N. Street, W.H. Wolberg and O.L. Mangasarian 28 | Nuclear feature extraction for breast tumor diagnosis. 29 | IS&T/SPIE 1993 International Symposium on Electronic Imaging: Science 30 | and Technology, volume 1905, pages 861-870, San Jose, CA, 1993. 31 | 32 | OR literature: 33 | 34 | O.L. Mangasarian, W.N. Street and W.H. Wolberg. 35 | Breast cancer diagnosis and prognosis via linear programming. 36 | Operations Research, 43(4), pages 570-577, July-August 1995. 37 | 38 | Medical literature: 39 | 40 | W.H. Wolberg, W.N. Street, and O.L. Mangasarian. 41 | Machine learning techniques to diagnose breast cancer from 42 | fine-needle aspirates. 43 | Cancer Letters 77 (1994) 163-171. 44 | 45 | W.H. Wolberg, W.N. Street, and O.L. Mangasarian. 46 | Image analysis and machine learning applied to breast cancer 47 | diagnosis and prognosis. 48 | Analytical and Quantitative Cytology and Histology, Vol. 17 49 | No. 2, pages 77-87, April 1995. 50 | 51 | W.H. Wolberg, W.N. Street, D.M. Heisey, and O.L. Mangasarian. 52 | Computerized breast cancer diagnosis and prognosis from fine 53 | needle aspirates. 54 | Archives of Surgery 1995;130:511-516. 55 | 56 | W.H. Wolberg, W.N. Street, D.M. Heisey, and O.L. Mangasarian. 57 | Computer-derived nuclear features distinguish malignant from 58 | benign breast cytology. 59 | Human Pathology, 26:792--796, 1995. 60 | 61 | See also: 62 | http://www.cs.wisc.edu/~olvi/uwmp/mpml.html 63 | http://www.cs.wisc.edu/~olvi/uwmp/cancer.html 64 | 65 | Results: 66 | 67 | - predicting field 2, diagnosis: B = benign, M = malignant 68 | - sets are linearly separable using all 30 input features 69 | - best predictive accuracy obtained using one separating plane 70 | in the 3-D space of Worst Area, Worst Smoothness and 71 | Mean Texture. Estimated accuracy 97.5% using repeated 72 | 10-fold crossvalidations. Classifier has correctly 73 | diagnosed 176 consecutive new patients as of November 74 | 1995. 75 | 76 | 4. Relevant information 77 | 78 | Features are computed from a digitized image of a fine needle 79 | aspirate (FNA) of a breast mass. They describe 80 | characteristics of the cell nuclei present in the image. 81 | A few of the images can be found at 82 | http://www.cs.wisc.edu/~street/images/ 83 | 84 | Separating plane described above was obtained using 85 | Multisurface Method-Tree (MSM-T) [K. P. Bennett, "Decision Tree 86 | Construction Via Linear Programming." Proceedings of the 4th 87 | Midwest Artificial Intelligence and Cognitive Science Society, 88 | pp. 97-101, 1992], a classification method which uses linear 89 | programming to construct a decision tree. Relevant features 90 | were selected using an exhaustive search in the space of 1-4 91 | features and 1-3 separating planes. 92 | 93 | The actual linear program used to obtain the separating plane 94 | in the 3-dimensional space is that described in: 95 | [K. P. Bennett and O. L. Mangasarian: "Robust Linear 96 | Programming Discrimination of Two Linearly Inseparable Sets", 97 | Optimization Methods and Software 1, 1992, 23-34]. 98 | 99 | 100 | This database is also available through the UW CS ftp server: 101 | 102 | ftp ftp.cs.wisc.edu 103 | cd math-prog/cpo-dataset/machine-learn/WDBC/ 104 | 105 | 5. Number of instances: 569 106 | 107 | 6. Number of attributes: 32 (ID, diagnosis, 30 real-valued input features) 108 | 109 | 7. Attribute information 110 | 111 | 1) ID number 112 | 2) Diagnosis (M = malignant, B = benign) 113 | 3-32) 114 | 115 | Ten real-valued features are computed for each cell nucleus: 116 | 117 | a) radius (mean of distances from center to points on the perimeter) 118 | b) texture (standard deviation of gray-scale values) 119 | c) perimeter 120 | d) area 121 | e) smoothness (local variation in radius lengths) 122 | f) compactness (perimeter^2 / area - 1.0) 123 | g) concavity (severity of concave portions of the contour) 124 | h) concave points (number of concave portions of the contour) 125 | i) symmetry 126 | j) fractal dimension ("coastline approximation" - 1) 127 | 128 | Several of the papers listed above contain detailed descriptions of 129 | how these features are computed. 130 | 131 | The mean, standard error, and "worst" or largest (mean of the three 132 | largest values) of these features were computed for each image, 133 | resulting in 30 features. For instance, field 3 is Mean Radius, field 134 | 13 is Radius SE, field 23 is Worst Radius. 135 | 136 | All feature values are recoded with four significant digits. 137 | 138 | 8. Missing attribute values: none 139 | 140 | 9. Class distribution: 357 benign, 212 malignant -------------------------------------------------------------------------------- /speech_ex/decisiontree.cpp: -------------------------------------------------------------------------------- 1 | // Example : decision tree learning 2 | // usage: prog training_data_file testing_data_file 3 | 4 | // For use with test / training datasets : speech_ex 5 | 6 | // Author : Toby Breckon, toby.breckon@cranfield.ac.uk 7 | 8 | // Copyright (c) 2011 School of Engineering, Cranfield University 9 | // License : LGPL - http://www.gnu.org/licenses/lgpl.html 10 | 11 | #include // opencv general include file 12 | #include // opencv machine learning include file 13 | 14 | using namespace cv; // OpenCV API is in the C++ "cv" namespace 15 | 16 | #include 17 | 18 | /******************************************************************************/ 19 | 20 | #define NUMBER_OF_TRAINING_SAMPLES 6238 21 | #define ATTRIBUTES_PER_SAMPLE 617 22 | #define NUMBER_OF_TESTING_SAMPLES 1559 23 | 24 | #define NUMBER_OF_CLASSES 26 25 | 26 | // N.B. classes are spoken alphabetric letters A-Z labelled 1 -> 26 27 | 28 | /******************************************************************************/ 29 | 30 | // loads the sample database from file (which is a CSV text file) 31 | 32 | int read_data_from_csv(const char* filename, Mat data, Mat classes, int n_samples ) 33 | { 34 | float tmp; 35 | 36 | // if we can't read the input file then return 0 37 | FILE* f = fopen( filename, "r" ); 38 | if( !f ) 39 | { 40 | printf("ERROR: cannot read file %s\n", filename); 41 | return 0; // all not OK 42 | } 43 | 44 | // for each sample in the file 45 | 46 | for(int line = 0; line < n_samples; line++) 47 | { 48 | 49 | // for each attribute on the line in the file 50 | 51 | for(int attribute = 0; attribute < (ATTRIBUTES_PER_SAMPLE + 1); attribute++) 52 | { 53 | if (attribute < ATTRIBUTES_PER_SAMPLE) 54 | { 55 | 56 | // first 617 elements (0-616) in each line are the attributes 57 | 58 | fscanf(f, "%f,", &tmp); 59 | data.at(line, attribute) = tmp; 60 | 61 | 62 | } 63 | else if (attribute == ATTRIBUTES_PER_SAMPLE) 64 | { 65 | 66 | // attribute 617 is the class label {1 ... 26} == {A-Z} 67 | 68 | fscanf(f, "%f,", &tmp); 69 | classes.at(line, 0) = tmp; 70 | } 71 | } 72 | } 73 | 74 | fclose(f); 75 | 76 | return 1; // all OK 77 | } 78 | 79 | /******************************************************************************/ 80 | 81 | int main( int argc, char** argv ) 82 | { 83 | // lets just check the version first 84 | 85 | printf ("OpenCV version %s (%d.%d.%d)\n", 86 | CV_VERSION, 87 | CV_MAJOR_VERSION, CV_MINOR_VERSION, CV_SUBMINOR_VERSION); 88 | 89 | // define training data storage matrices (one for attribute examples, one 90 | // for classifications) 91 | 92 | Mat training_data = Mat(NUMBER_OF_TRAINING_SAMPLES, ATTRIBUTES_PER_SAMPLE, CV_32FC1); 93 | Mat training_classifications = Mat(NUMBER_OF_TRAINING_SAMPLES, 1, CV_32FC1); 94 | 95 | //define testing data storage matrices 96 | 97 | Mat testing_data = Mat(NUMBER_OF_TESTING_SAMPLES, ATTRIBUTES_PER_SAMPLE, CV_32FC1); 98 | Mat testing_classifications = Mat(NUMBER_OF_TESTING_SAMPLES, 1, CV_32FC1); 99 | 100 | // define all the attributes as numerical 101 | // alternatives are CV_VAR_CATEGORICAL or CV_VAR_ORDERED(=CV_VAR_NUMERICAL) 102 | // that can be assigned on a per attribute basis 103 | 104 | Mat var_type = Mat(ATTRIBUTES_PER_SAMPLE + 1, 1, CV_8U ); 105 | var_type.setTo(Scalar(CV_VAR_NUMERICAL) ); // all inputs are numerical 106 | 107 | // this is a classification problem (i.e. predict a discrete number of class 108 | // outputs) so reset the last (+1) output var_type element to CV_VAR_CATEGORICAL 109 | 110 | var_type.at(ATTRIBUTES_PER_SAMPLE, 0) = CV_VAR_CATEGORICAL; 111 | 112 | CvDTreeNode* resultNode; // node returned from a prediction 113 | 114 | // load training and testing data sets 115 | 116 | if (read_data_from_csv(argv[1], training_data, training_classifications, NUMBER_OF_TRAINING_SAMPLES) && 117 | read_data_from_csv(argv[2], testing_data, testing_classifications, NUMBER_OF_TESTING_SAMPLES)) 118 | { 119 | // define the parameters for training the decision tree 120 | 121 | float *priors = NULL; // weights of each classification for classes 122 | // (all equal as equal samples of each character) 123 | 124 | CvDTreeParams params = CvDTreeParams(25, // max depth 125 | 5, // min sample count 126 | 0, // regression accuracy: N/A here 127 | false, // compute surrogate split, no missing data 128 | 15, // max number of categories (use sub-optimal algorithm for larger numbers) 129 | 15, // the number of cross-validation folds 130 | false, // use 1SE rule => smaller tree 131 | false, // throw away the pruned tree branches 132 | priors // the array of priors 133 | ); 134 | 135 | 136 | // train decision tree classifier (using training data) 137 | 138 | printf( "\nUsing training database: %s\n\n", argv[1]); 139 | CvDTree* dtree = new CvDTree; 140 | 141 | dtree->train(training_data, CV_ROW_SAMPLE, training_classifications, 142 | Mat(), Mat(), var_type, Mat(), params); 143 | 144 | // perform classifier testing and report results 145 | 146 | Mat test_sample; 147 | int correct_class = 0; 148 | int wrong_class = 0; 149 | int false_positives [NUMBER_OF_CLASSES]; 150 | char class_labels[NUMBER_OF_CLASSES]; 151 | 152 | // zero the false positive counters in a simple loop 153 | 154 | for (int i = 0; i < NUMBER_OF_CLASSES; i++) 155 | { 156 | false_positives[i] = 0; 157 | class_labels[i] = (char) 65 + i; // ASCII 65 = A 158 | } 159 | 160 | printf( "\nUsing testing database: %s\n\n", argv[2]); 161 | 162 | for (int tsample = 0; tsample < NUMBER_OF_TESTING_SAMPLES; tsample++) 163 | { 164 | 165 | // extract a row from the testing matrix 166 | 167 | test_sample = testing_data.row(tsample); 168 | 169 | // run decision tree prediction 170 | 171 | resultNode = dtree->predict(test_sample, Mat(), false); 172 | 173 | printf("Testing Sample %i -> class result (character %c)\n", tsample, 174 | class_labels[((int) (resultNode->value)) - 1]); 175 | 176 | // if the prediction and the (true) testing classification are the same 177 | // (N.B. openCV uses a floating point decision tree implementation!) 178 | 179 | if (fabs(resultNode->value - testing_classifications.at(tsample, 0)) 180 | >= FLT_EPSILON) 181 | { 182 | // if they differ more than floating point error => wrong class 183 | 184 | wrong_class++; 185 | 186 | false_positives[((int) (resultNode->value)) - 1]++; 187 | 188 | } 189 | else 190 | { 191 | 192 | // otherwise correct 193 | 194 | correct_class++; 195 | } 196 | } 197 | 198 | printf( "\nResults on the testing database: %s\n" 199 | "\tCorrect classification: %d (%g%%)\n" 200 | "\tWrong classifications: %d (%g%%)\n", 201 | argv[2], 202 | correct_class, (double) correct_class*100/NUMBER_OF_TESTING_SAMPLES, 203 | wrong_class, (double) wrong_class*100/NUMBER_OF_TESTING_SAMPLES); 204 | 205 | for (int i = 0; i < NUMBER_OF_CLASSES; i++) 206 | { 207 | printf( "\tClass (character %c) false postives %d (%g%%)\n", class_labels[i], 208 | false_positives[i], 209 | (double) false_positives[i]*100/NUMBER_OF_TESTING_SAMPLES); 210 | } 211 | 212 | 213 | // all matrix memory free by destructors 214 | 215 | 216 | // all OK : main returns 0 217 | 218 | return 0; 219 | } 220 | 221 | // not OK : main returns -1 222 | 223 | return -1; 224 | } 225 | /******************************************************************************/ 226 | -------------------------------------------------------------------------------- /speech_ex/svm.cpp: -------------------------------------------------------------------------------- 1 | // Example : Support Vector Machine (SVM) learning 2 | // usage: prog training_data_file testing_data_file 3 | 4 | // For use with test / training datasets : speech_ex 5 | 6 | // Author : Toby Breckon, toby.breckon@cranfield.ac.uk 7 | // Version : 0.2 8 | 9 | // Copyright (c) 2011 School of Engineering, Cranfield University 10 | // License : LGPL - http://www.gnu.org/licenses/lgpl.html 11 | 12 | 13 | #include // opencv general include file 14 | #include // opencv machine learning include file 15 | 16 | using namespace cv; // OpenCV API is in the C++ "cv" namespace 17 | 18 | #include 19 | 20 | /******************************************************************************/ 21 | 22 | // use SVM "grid search" for kernel parameters 23 | 24 | #define USE_OPENCV_GRID_SEARCH_AUTOTRAIN 1 // set to 0 to set SVM parameters manually 25 | 26 | /******************************************************************************/ 27 | 28 | #define NUMBER_OF_TRAINING_SAMPLES 6238 29 | #define ATTRIBUTES_PER_SAMPLE 617 30 | #define NUMBER_OF_TESTING_SAMPLES 1559 31 | 32 | #define NUMBER_OF_CLASSES 26 33 | 34 | // N.B. classes are spoken alphabetric letters A-Z labelled 1 -> 26 35 | 36 | /******************************************************************************/ 37 | 38 | // loads the sample database from file (which is a CSV text file) 39 | 40 | 41 | int read_data_from_csv(const char* filename, Mat data, Mat classes, int n_samples ) 42 | { 43 | float tmp; 44 | 45 | // if we can't read the input file then return 0 46 | FILE* f = fopen( filename, "r" ); 47 | if( !f ) 48 | { 49 | printf("ERROR: cannot read file %s\n", filename); 50 | return 0; // all not OK 51 | } 52 | 53 | // for each sample in the file 54 | 55 | for(int line = 0; line < n_samples; line++) 56 | { 57 | 58 | // for each attribute on the line in the file 59 | 60 | for(int attribute = 0; attribute < (ATTRIBUTES_PER_SAMPLE + 1); attribute++) 61 | { 62 | if (attribute < ATTRIBUTES_PER_SAMPLE) 63 | { 64 | 65 | // first 617 elements (0-616) in each line are the attributes 66 | 67 | fscanf(f, "%f,", &tmp); 68 | data.at(line, attribute) = tmp; 69 | 70 | 71 | } 72 | else if (attribute == ATTRIBUTES_PER_SAMPLE) 73 | { 74 | 75 | // attribute 617 is the class label {1 ... 26} == {A-Z} 76 | 77 | fscanf(f, "%f,", &tmp); 78 | classes.at(line, 0) = tmp; 79 | } 80 | } 81 | } 82 | 83 | fclose(f); 84 | 85 | return 1; // all OK 86 | } 87 | 88 | /******************************************************************************/ 89 | 90 | int main( int argc, char** argv ) 91 | { 92 | // lets just check the version first 93 | 94 | printf ("OpenCV version %s (%d.%d.%d)\n", 95 | CV_VERSION, 96 | CV_MAJOR_VERSION, CV_MINOR_VERSION, CV_SUBMINOR_VERSION); 97 | 98 | // define training data storage matrices (one for attribute examples, one 99 | // for classifications) 100 | 101 | Mat training_data = Mat(NUMBER_OF_TRAINING_SAMPLES, ATTRIBUTES_PER_SAMPLE, CV_32FC1); 102 | Mat training_classifications = Mat(NUMBER_OF_TRAINING_SAMPLES, 1, CV_32FC1); 103 | 104 | //define testing data storage matrices 105 | 106 | Mat testing_data = Mat(NUMBER_OF_TESTING_SAMPLES, ATTRIBUTES_PER_SAMPLE, CV_32FC1); 107 | Mat testing_classifications = Mat(NUMBER_OF_TESTING_SAMPLES, 1, CV_32FC1); 108 | 109 | // load training and testing data sets 110 | 111 | if (read_data_from_csv(argv[1], training_data, training_classifications, NUMBER_OF_TRAINING_SAMPLES) && 112 | read_data_from_csv(argv[2], testing_data, testing_classifications, NUMBER_OF_TESTING_SAMPLES)) 113 | { 114 | // define the parameters for training the SVM (kernel + SVMtype type used for auto-training, 115 | // other parameters for manual only) 116 | 117 | CvSVMParams params = CvSVMParams( 118 | CvSVM::C_SVC, // Type of SVM, here N classes (see manual) 119 | CvSVM::LINEAR, // kernel type (see manual) 120 | 0.0, // kernel parameter (degree) for poly kernel only 121 | 0.0, // kernel parameter (gamma) for poly/rbf kernel only 122 | 0.0, // kernel parameter (coef0) for poly/sigmoid kernel only 123 | 10, // SVM optimization parameter C 124 | 0, // SVM optimization parameter nu (not used for N classe SVM) 125 | 0, // SVM optimization parameter p (not used for N classe SVM) 126 | NULL, // class wieghts (or priors) 127 | // Optional weights, assigned to particular classes. 128 | // They are multiplied by C and thus affect the misclassification 129 | // penalty for different classes. The larger weight, the larger penalty 130 | // on misclassification of data from the corresponding class. 131 | 132 | // termination criteria for learning algorithm 133 | 134 | cvTermCriteria(CV_TERMCRIT_ITER+CV_TERMCRIT_EPS, 1000, 0.000001) 135 | 136 | ); 137 | 138 | // train SVM classifier (using training data) 139 | 140 | printf( "\nUsing training database: %s\n\n", argv[1]); 141 | CvSVM* svm = new CvSVM; 142 | 143 | printf( "\nTraining the SVM (in progress) ..... "); 144 | fflush(NULL); 145 | 146 | #if (USE_OPENCV_GRID_SEARCH_AUTOTRAIN) 147 | 148 | printf( "(SVM 'grid search' => may take some time!)"); 149 | fflush(NULL); 150 | 151 | // train using auto training parameter grid search if it is available 152 | // (i.e. OpenCV 2.x) with 10 fold cross valdiation 153 | // N.B. this does not search kernel choice 154 | 155 | svm->train_auto(training_data, training_classifications, 156 | Mat(), Mat(), params, 10); 157 | params = svm->get_params(); 158 | printf( "\nUsing optimal parameters degree %f, gamma %f, ceof0 %f\n\t C %f, nu %f, p %f\n Training ..", 159 | params.degree, params.gamma, params.coef0, params.C, params.nu, params.p); 160 | #else 161 | // otherwise use regular training and use parameters manually specified above 162 | 163 | svm->train(training_data, training_classifications, Mat(), Mat(), params); 164 | 165 | #endif 166 | 167 | printf( ".... Done\n"); 168 | 169 | // get the number of support vectors used to define the SVM decision boundary 170 | 171 | printf("Number of support vectors for trained SVM = %i\n", svm->get_support_vector_count()); 172 | 173 | // perform classifier testing and report results 174 | 175 | Mat test_sample; 176 | int correct_class = 0; 177 | int wrong_class = 0; 178 | int false_positives [NUMBER_OF_CLASSES]; 179 | char class_labels[NUMBER_OF_CLASSES]; 180 | float result; 181 | 182 | // zero the false positive counters in a simple loop 183 | 184 | for (int i = 0; i < NUMBER_OF_CLASSES; i++) 185 | { 186 | false_positives[i] = 0; 187 | class_labels[i] = (char) 65 + i; // ASCII 65 = A 188 | } 189 | 190 | printf( "\nUsing testing database: %s\n\n", argv[2]); 191 | 192 | for (int tsample = 0; tsample < NUMBER_OF_TESTING_SAMPLES; tsample++) 193 | { 194 | 195 | // extract a row from the testing matrix 196 | 197 | test_sample = testing_data.row(tsample); 198 | 199 | // run SVM classifier 200 | 201 | result = svm->predict(test_sample); 202 | 203 | // printf("Testing Sample %i -> class result (character %c)\n", tsample, class_labels[((int) result) - 1]); 204 | 205 | // if the prediction and the (true) testing classification are the same 206 | // (N.B. openCV uses a floating point decision tree implementation!) 207 | 208 | if (fabs(result - testing_classifications.at(tsample, 0)) 209 | >= FLT_EPSILON) 210 | { 211 | // if they differ more than floating point error => wrong class 212 | 213 | wrong_class++; 214 | 215 | false_positives[(int) (testing_classifications.at(tsample, 0) - 1)]++; 216 | 217 | } 218 | else 219 | { 220 | 221 | // otherwise correct 222 | 223 | correct_class++; 224 | } 225 | } 226 | 227 | printf( "\nResults on the testing database: %s\n" 228 | "\tCorrect classification: %d (%g%%)\n" 229 | "\tWrong classifications: %d (%g%%)\n", 230 | argv[2], 231 | correct_class, (double) correct_class*100/NUMBER_OF_TESTING_SAMPLES, 232 | wrong_class, (double) wrong_class*100/NUMBER_OF_TESTING_SAMPLES); 233 | 234 | for (unsigned char i = 0; i < NUMBER_OF_CLASSES; i++) 235 | { 236 | printf( "\tClass (character %c) false postives %d (%g%%)\n",class_labels[(int) i], 237 | false_positives[(int) i], 238 | (double) false_positives[i]*100/NUMBER_OF_TESTING_SAMPLES); 239 | } 240 | 241 | // all matrix memory free by destructors 242 | 243 | // all OK : main returns 0 244 | 245 | return 0; 246 | } 247 | 248 | // not OK : main returns -1 249 | 250 | return -1; 251 | } 252 | /******************************************************************************/ 253 | -------------------------------------------------------------------------------- /tools/dt_varimportance.cc: -------------------------------------------------------------------------------- 1 | // Example : decision tree variable importance 2 | // usage: prog tree.{yml|.xml} 3 | 4 | // For use with any test / training datasets 5 | 6 | // Author : Toby Breckon, toby.breckon@cranfield.ac.uk 7 | 8 | // Copyright (c) 2011 School of Engineering, Cranfield University 9 | // License : LGPL - http://www.gnu.org/licenses/lgpl.html 10 | 11 | // Copyright (c) 2011 School of Engineering, Cranfield University 12 | // License : LGPL - http://www.gnu.org/licenses/lgpl.html 13 | 14 | #include // opencv general include file 15 | #include // opencv machine learning include file 16 | 17 | using namespace cv; // OpenCV API is in the C++ "cv" namespace 18 | 19 | #include 20 | 21 | /*****************************************************************************/ 22 | 23 | // prints out the relative importance of the variables (i.e. attributes) used 24 | // for decision tree classification 25 | 26 | // Based on the mushroom.cpp example from OpenCV 1.0 27 | 28 | int print_variable_importance(CvDTree* dtree) 29 | { 30 | const Mat var_importance = dtree->get_var_importance(); 31 | 32 | if( var_importance.empty() ) 33 | { 34 | printf( "Error: Variable importance can not be retrieved\n" ); 35 | return -1; 36 | } 37 | 38 | for(int i = 0; i < var_importance.cols*var_importance.rows; i++ ) 39 | { 40 | double val = var_importance.at(0,i); 41 | printf( "var #%d", i ); 42 | printf( ": %g%%\n", val*100. ); 43 | } 44 | 45 | return 1; 46 | } 47 | 48 | /*****************************************************************************/ 49 | 50 | int main( int argc, char** argv ) 51 | { 52 | 53 | // check we have enough command line arguments 54 | 55 | if (argc == 2) 56 | { 57 | // define a decision tree object 58 | 59 | CvDTree* dtree = new CvDTree; 60 | 61 | // load tree structure from XML file 62 | 63 | dtree->load(argv[1]); 64 | 65 | // extract (and display) variable importance information 66 | 67 | if (print_variable_importance(dtree)){ 68 | return 0; // all OK 69 | } else { 70 | return -1; // all not OK 71 | } 72 | 73 | } else { 74 | 75 | // not OK : main returns -1 76 | 77 | printf("usage: %s decision_tree_filename.xml\n", argv[0]); 78 | return -1; 79 | 80 | } 81 | } 82 | /******************************************************************************/ 83 | -------------------------------------------------------------------------------- /tools/randomize.cc: -------------------------------------------------------------------------------- 1 | // Example : randomize the lines in a specified input file 2 | // (also removing any empty lines in the file - i.e. no chars apart from "\n") 3 | 4 | // usage: prog input_file output_file 5 | 6 | // Author : Toby Breckon, toby.breckon@cranfield.ac.uk 7 | 8 | // Copyright (c) 2009 School of Engineering, Cranfield University 9 | // License : LGPL - http://www.gnu.org/licenses/lgpl.html 10 | 11 | /******************************************************************************/ 12 | 13 | #include 14 | #include 15 | #include 16 | #include 17 | 18 | using namespace std; 19 | 20 | #define LINELENGTHMAX 5000 // all file lines less than 5000 chars 21 | 22 | /******************************************************************************/ 23 | 24 | int main( int argc, char** argv ) 25 | { 26 | 27 | vector inputlines; // vector of input lines 28 | vector::iterator outline; // iterator for above 29 | 30 | char * line = NULL; // tmp pointer for line memory 31 | 32 | // open input file 33 | 34 | FILE* fi = fopen( argv[1], "r" ); 35 | if( !fi ){ 36 | printf("ERROR: cannot read input file %s\n", argv[1]); 37 | return -1; // all not OK 38 | } 39 | 40 | // open output file 41 | 42 | FILE* fw = fopen( argv[2], "w" ); 43 | if( !fw ){ 44 | printf("ERROR: cannot read output file %s\n", argv[2]); 45 | return -1; // all not OK 46 | } 47 | 48 | // read in all the lines of the file (allocating fresh memory for each) 49 | 50 | while (!feof(fi)) 51 | { 52 | line = (char *) malloc(LINELENGTHMAX * sizeof(char)); 53 | fscanf(fi, "%[^\n]\n", line); 54 | inputlines.push_back(line); 55 | } 56 | 57 | // shuffle input file lines 58 | 59 | // "This algorithm is described in section 3.4.2 of Knuth (D. E. Knuth, 60 | // The Art of Computer Programming. Volume 2: Seminumerical Algorithms, 61 | // second edition. Addison-Wesley, 1981). Knuth credits Moses and 62 | // Oakford (1963) and Durstenfeld (1964)." 63 | // - SGI STL manual, http://www.sgi.com/tech/stl/random_shuffle.html 64 | 65 | random_shuffle(inputlines.begin(), inputlines.end()); 66 | 67 | // output all of the lines to output file 68 | 69 | for(outline = inputlines.begin(); outline < inputlines.end(); outline++) 70 | { 71 | fprintf(fw, "%s\n", *outline); 72 | free((void *) *outline); // free memory also 73 | } 74 | 75 | // close files 76 | 77 | fclose(fi); 78 | fclose(fw); 79 | 80 | return 1; // all OK 81 | } 82 | /******************************************************************************/ 83 | -------------------------------------------------------------------------------- /tools/selectlines.cc: -------------------------------------------------------------------------------- 1 | // Example : select a subset of lines in a specified input file 2 | // between a specified min and max line numbers INCLUSIVE 3 | // (also removing any empty lines in the file - i.e. no chars apart from "\n") 4 | 5 | // usage: prog min max input_file output_file 6 | // where min and max are integer line numbers from the input file (range 1 to N) 7 | 8 | // Author : Toby Breckon, toby.breckon@cranfield.ac.uk 9 | 10 | // Copyright (c) 2009 School of Engineering, Cranfield University 11 | // License : LGPL - http://www.gnu.org/licenses/lgpl.html 12 | 13 | /******************************************************************************/ 14 | 15 | #include 16 | #include 17 | #include 18 | #include 19 | 20 | using namespace std; 21 | 22 | #define LINELENGTHMAX 5000 // all file lines less than 5000 chars 23 | 24 | /******************************************************************************/ 25 | 26 | int main( int argc, char** argv ) 27 | { 28 | 29 | vector inputlines; // vector of input lines 30 | vector::iterator outline; // iterator for above 31 | 32 | char * line = NULL; // tmp pointer for line memory 33 | 34 | // check we have the correct number of arguments 35 | 36 | if (argc < 5){ 37 | printf("usage: %s min max input_file output_file\n", argv[0]); 38 | exit(0); 39 | } 40 | 41 | // get min / max line numbers 42 | 43 | int minL = min(atoi(argv[1]), atoi(argv[2])); 44 | int maxL = max(atoi(argv[1]), atoi(argv[2])); 45 | 46 | int lineN = 0; 47 | 48 | // open input file 49 | 50 | FILE* fi = fopen( argv[3], "r" ); 51 | if( !fi ){ 52 | printf("ERROR: cannot read input file %s\n", argv[1]); 53 | return -1; // all not OK 54 | } 55 | 56 | // open output file 57 | 58 | FILE* fw = fopen( argv[4], "w" ); 59 | if( !fw ){ 60 | printf("ERROR: cannot read output file %s\n", argv[2]); 61 | return -1; // all not OK 62 | } 63 | 64 | // read in all the lines of the file (allocating fresh memory for each) 65 | 66 | while (!feof(fi)) 67 | { 68 | line = (char *) malloc(LINELENGTHMAX * sizeof(char)); 69 | fscanf(fi, "%[^\n]\n", line); 70 | inputlines.push_back(line); 71 | } 72 | 73 | // output seleted lines to output file 74 | 75 | for(outline = inputlines.begin(); outline < inputlines.end(); outline++) 76 | { 77 | if ((lineN >= minL) && (lineN <= maxL)) 78 | { 79 | fprintf(fw, "%s\n", *outline); 80 | } 81 | lineN++; 82 | 83 | free((void *) *outline); // free memory also 84 | } 85 | 86 | // close files 87 | 88 | fclose(fi); 89 | fclose(fw); 90 | 91 | return 1; // all OK 92 | } 93 | /******************************************************************************/ 94 | -------------------------------------------------------------------------------- /tools/typechecker.cc: -------------------------------------------------------------------------------- 1 | // Example : check type sizes on your platform 2 | // usage: prog 3 | 4 | // taken from: http://home.att.net/~jackklein/c/inttypes.html 5 | // (c) 2008 By Jack Klein. 6 | 7 | /*****************************************************************************/ 8 | 9 | #include 10 | #include 11 | 12 | volatile int char_min = CHAR_MIN; 13 | 14 | int main(void) 15 | { 16 | printf("\n\n Character Types\n"); 17 | printf("Number of bits in a character: %d\n", 18 | CHAR_BIT); 19 | printf("Size of character types is %d byte\n", 20 | (int)sizeof(char)); 21 | printf("Signed char min: %d max: %d\n", 22 | SCHAR_MIN, SCHAR_MAX); 23 | printf("Unsigned char min: 0 max: %u\n", 24 | (unsigned int)UCHAR_MAX); 25 | 26 | printf("Default char is "); 27 | if (char_min < 0) 28 | printf("signed\n"); 29 | else if (char_min == 0) 30 | printf("unsigned\n"); 31 | else 32 | printf("non-standard\n"); 33 | printf("*** This is %d bit character representation\n", 34 | (int)sizeof(char) * 8); 35 | 36 | printf("\n\n Short Int Types\n"); 37 | printf("Size of short int types is %d bytes\n", 38 | (int)sizeof(short)); 39 | printf("Signed short min: %d max: %d\n", 40 | SHRT_MIN, SHRT_MAX); 41 | printf("Unsigned short min: 0 max: %u\n", 42 | (unsigned int)USHRT_MAX); 43 | 44 | printf("\n Int Types\n"); 45 | printf("Size of int types is %d bytes\n", 46 | (int)sizeof(int)); 47 | printf("Signed int min: %d max: %d\n", 48 | INT_MIN, INT_MAX); 49 | printf("Unsigned int min: 0 max: %u\n", 50 | (unsigned int)UINT_MAX); 51 | printf("*** This is %d bit representation\n", 52 | (int)sizeof(int) * 8); 53 | 54 | printf("\n Long Int Types\n"); 55 | printf("Size of long int types is %d bytes\n", 56 | (int)sizeof(long)); 57 | printf("Signed long min: %ld max: %ld\n", 58 | LONG_MIN, LONG_MAX); 59 | printf("Unsigned long min: 0 max: %lu\n", 60 | ULONG_MAX); 61 | 62 | // mild addition by Toby Breckon, toby.breckon@cranfield.ac.uk 63 | 64 | printf("\n\n Float Types\n"); 65 | printf("Size of float types is %d bytes\n", 66 | (int)sizeof(float)); 67 | printf("*** This is %d bit representation\n", 68 | (int)sizeof(float) * 8); 69 | printf("\n Double Types\n"); 70 | printf("Size of float types is %d bytes\n\n", 71 | (int)sizeof(double)); 72 | 73 | return 0; 74 | } 75 | 76 | /*****************************************************************************/ --------------------------------------------------------------------------------