├── .gitignore
├── CMakeLists.txt
├── LICENSE
├── README.md
├── dt_example1
    ├── car.data
    ├── car.names
    ├── car.test
    ├── car.train
    └── decisiontree.cpp
├── dt_example2
    ├── decisiontree.cpp
    ├── wdbc.data
    ├── wdbc.names
    ├── wdbc.test
    └── wdbc.train
├── handwritten_ex
    ├── decisiontree.cpp
    ├── neuralnetwork.cpp
    ├── semeion.names
    ├── semeion.test
    ├── semeion.train
    └── svm.cpp
├── opticaldigits_ex
    ├── boosttree.cpp
    ├── decisiontree.cpp
    ├── extremerandomforest.cpp
    ├── knn.cpp
    ├── knn_weighted.cpp
    ├── neuralnetwork.cpp
    ├── normalbayes.cpp
    ├── optdigits.names
    ├── optdigits.test
    ├── optdigits.train
    ├── randomforest.cpp
    └── svm.cpp
├── other_ex
    ├── normalbayes.cpp
    ├── wdbc.data
    ├── wdbc.names
    ├── wdbc.test
    └── wdbc.train
├── speech_ex
    ├── decisiontree.cpp
    ├── isolet1+2+3+4.train
    ├── isolet5.test
    └── svm.cpp
└── tools
    ├── dt_varimportance.cc
    ├── ex_tree.xml
    ├── randomize.cc
    ├── selectlines.cc
    ├── tree.yml
    └── typechecker.cc


/.gitignore:
--------------------------------------------------------------------------------
 1 | # Prerequisites
 2 | *.d
 3 | 
 4 | # Compiled Object files
 5 | *.slo
 6 | *.lo
 7 | *.o
 8 | *.obj
 9 | 
10 | # Precompiled Headers
11 | *.gch
12 | *.pch
13 | 
14 | # Compiled Dynamic libraries
15 | *.so
16 | *.dylib
17 | *.dll
18 | 
19 | # Fortran module files
20 | *.mod
21 | *.smod
22 | 
23 | # Compiled Static libraries
24 | *.lai
25 | *.la
26 | *.a
27 | *.lib
28 | 
29 | # Executables
30 | *.exe
31 | *.out
32 | *.app
33 | 


--------------------------------------------------------------------------------
/CMakeLists.txt:
--------------------------------------------------------------------------------
  1 | # this file is setup to build everything for the ML examples
  2 | 
  3 | cmake_minimum_required (VERSION 2.6)
  4 | cmake_policy(SET CMP0037 OLD)
  5 | set( CMAKE_CXX_FLAGS "-O3 -Wall ${CMAKE_CXX_FLAGS}" )
  6 | # linux specific stuff
  7 | 
  8 | IF ( UNIX )
  9 |    set( CMAKE_PREFIX_PATH "/opt/opencv-2.4" )
 10 |    set_property(GLOBAL PROPERTY TARGET_SUPPORTS_SHARED_LIBS TRUE)
 11 |    MESSAGE( "LINUX CONFIG" )
 12 | ENDIF ( UNIX )
 13 | 
 14 | # windows (inc. 64-bit specific stuff)
 15 | 
 16 | IF ( WIN32 )
 17 |    set( CMAKE_PREFIX_PATH "C:/OpenCV2.4/build" )
 18 |    set( OpenCV_DIR "C:/OpenCV2.4/build" )
 19 |    MESSAGE( "WINDOWS CONFIG" )
 20 | ENDIF ( WIN32 )
 21 | 
 22 | find_package( OpenCV 2.4.13 REQUIRED )
 23 | # MESSAGE ( "OPENCV CONFIG" )
 24 | # MESSAGE ( ${OpenCV_LIBS} )
 25 | 
 26 | project(decisiontree)
 27 | add_executable(./handwritten_ex/decisiontree ./handwritten_ex/decisiontree.cpp)
 28 | target_link_libraries( ./handwritten_ex/decisiontree ${OpenCV_LIBS} )
 29 | 
 30 | project(neuralnetwork)
 31 | add_executable(./handwritten_ex/neuralnetwork ./handwritten_ex/neuralnetwork.cpp)
 32 | target_link_libraries( ./handwritten_ex/neuralnetwork ${OpenCV_LIBS} )
 33 | 
 34 | project(svm)
 35 | add_executable(./handwritten_ex/svm ./handwritten_ex/svm.cpp)
 36 | target_link_libraries( ./handwritten_ex/svm ${OpenCV_LIBS} )
 37 | 
 38 | project(ga_interface)
 39 | add_executable(./ga_ex/ga_interface ./ga_ex/ga_interface.cpp)
 40 | target_link_libraries( ./ga_ex/ga_interface ${OpenCV_LIBS} )
 41 | 
 42 | project(decisiontree)
 43 | add_executable(./dt_example1/decisiontree ./dt_example1/decisiontree.cpp)
 44 | target_link_libraries( ./dt_example1/decisiontree ${OpenCV_LIBS} )
 45 | 
 46 | project(decisiontree2)
 47 | add_executable(./dt_example2/decisiontree ./dt_example2/decisiontree.cpp)
 48 | target_link_libraries( ./dt_example2/decisiontree ${OpenCV_LIBS} )
 49 | 
 50 | project(boosttree)
 51 | add_executable(./opticaldigits_ex/boosttree ./opticaldigits_ex/boosttree.cpp)
 52 | set_target_properties(./opticaldigits_ex/boosttree PROPERTIES COMPILE_FLAGS "-fpermissive")
 53 | target_link_libraries( ./opticaldigits_ex/boosttree ${OpenCV_LIBS} )
 54 | 
 55 | project(decisiontree3)
 56 | add_executable(./opticaldigits_ex/decisiontree ./opticaldigits_ex/decisiontree.cpp)
 57 | target_link_libraries( ./opticaldigits_ex/decisiontree ${OpenCV_LIBS} )
 58 | 
 59 | project(extremerandomforest3)
 60 | add_executable(./opticaldigits_ex/extremerandomforest ./opticaldigits_ex/extremerandomforest.cpp)
 61 | target_link_libraries( ./opticaldigits_ex/extremerandomforest ${OpenCV_LIBS} )
 62 | 
 63 | project(randomforest)
 64 | add_executable(./opticaldigits_ex/randomforest ./opticaldigits_ex/randomforest.cpp)
 65 | target_link_libraries( ./opticaldigits_ex/randomforest ${OpenCV_LIBS} )
 66 | 
 67 | project(svm2)
 68 | add_executable(./opticaldigits_ex/svm ./opticaldigits_ex/svm.cpp)
 69 | target_link_libraries( ./opticaldigits_ex/svm ${OpenCV_LIBS} )
 70 | 
 71 | project(knn)
 72 | add_executable(./opticaldigits_ex/knn ./opticaldigits_ex/knn.cpp)
 73 | target_link_libraries( ./opticaldigits_ex/knn ${OpenCV_LIBS} )
 74 | 
 75 | project(knn_weighted)
 76 | add_executable(./opticaldigits_ex/knn_weighted ./opticaldigits_ex/knn_weighted.cpp)
 77 | target_link_libraries( ./opticaldigits_ex/knn_weighted ${OpenCV_LIBS} )
 78 | 
 79 | project(normalbayes)
 80 | add_executable(./opticaldigits_ex/normalbayes ./opticaldigits_ex/normalbayes.cpp)
 81 | target_link_libraries( ./opticaldigits_ex/normalbayes ${OpenCV_LIBS} )
 82 | 
 83 | project(neuralnetwork)
 84 | add_executable(./opticaldigits_ex/neuralnetwork ./opticaldigits_ex/neuralnetwork.cpp)
 85 | target_link_libraries( ./opticaldigits_ex/neuralnetwork ${OpenCV_LIBS} )
 86 | 
 87 | project(normalbayes)
 88 | add_executable(./other_ex/normalbayes ./other_ex/normalbayes.cpp)
 89 | target_link_libraries( ./other_ex/normalbayes ${OpenCV_LIBS} )
 90 | 
 91 | project(decisiontree)
 92 | add_executable(./speech_ex/decisiontree ./speech_ex/decisiontree.cpp)
 93 | target_link_libraries( ./speech_ex/decisiontree ${OpenCV_LIBS} )
 94 | 
 95 | project(svm)
 96 | add_executable(./speech_ex/svm ./speech_ex/svm.cpp)
 97 | target_link_libraries( ./speech_ex/svm ${OpenCV_LIBS} )
 98 | 
 99 | project(dt_varimportance)
100 | add_executable(./tools/dt_varimportance ./tools/dt_varimportance.cc)
101 | target_link_libraries( ./tools/dt_varimportance ${OpenCV_LIBS} )
102 | 
103 | project(randomize)
104 | add_executable(./tools/randomize tools/randomize.cc)
105 | 
106 | project(selectlines)
107 | add_executable(./tools/selectlines tools/selectlines.cc)
108 | 
109 | project(typechecker)
110 | add_executable(./tools/typechecker tools/typechecker.cc)
111 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # C++ Machine Learning OpenCV 2.4.x Teaching Examples
 2 | 
 3 | OpenCV C/C++ Interface Machine Learning legacy 2.4.x interface examples used for teaching, instruction and reference over the years (2010-2013) -
 4 | 
 5 | **Uses older C++ interface to OpenCV ML library, with additional code** - _as these examples pre-date the new C++ OpenCV 3.x ML interface._
 6 | 
 7 | All tested with OpenCV 2.4.x and GCC (Linux) and known to work with MS Visual Studio 200x on Win32 / Win64.
 8 | 
 9 | N.B. due to changes in the OpenCV API _these do not generically work with OpenCV > 2.4.x_ by default (except from the genetic algorithm (GA) example).
10 | 
11 | ---
12 | 
13 | ### Background:
14 | 
15 | If I taught you between 2010 and 2013 at [Cranfield University](http://www.cranfield.ac.uk) or [ESTIA](http://www.estia.fr) - these are the C++ examples from class.
16 | 
17 | Demo source code is provided _"as is"_ to aid your learning and understanding.
18 | 
19 | _For a long time, the in absence of other fully worked examples for the OpenCV machine learning components became the de facto reference for the use of these OpenCV routines (any conceptual errors or bad choices of parameters made here have propagated widely)._
20 | 
21 | ---
22 | 
23 | ### How to download and run:
24 | 
25 | In each sub-directory:
26 | 
27 | + .cpp file(s) - code for the example
28 | + .name file - an explanation of the data and its source
29 | + .data file - the original and complete set of data (CSV file format)
30 | + .train file - the data to be used for training (CSV file format)
31 | + .test file - the data to be used for testing (CSV file format)
32 | + .xml, .yml - example data files for testing some tools
33 | 
34 | All dataset examples are taken and reproduced from the [UCI Machine Learning Repository](http://archive.ics.uci.edu/ml/).
35 | 
36 | Download each file as needed or to download the entire repository and run each try:
37 | 
38 | ```
39 | git clone https://github.com/tobybreckon/cpp-examples-ml.git
40 | cd cpp-examples-ml
41 | cmake .
42 | make
43 | cd <sub directory of one of the examples>
44 | ./<insert name of executable>
45 | ```
46 | 
47 | The genetic algorithm (GA; inside directory ga_ex/) example runs with a webcam connected or from a command line supplied video file of a format OpenCV supports on your system (otherwise edit the code to provide your own image source). _N.B._ you may need to change the line near the top that specifies the camera device to use on this example - change "0" if you have one webcam, I have it set to "1" to skip my built-in laptop webcam and use the connected USB camera.
48 | 
49 | ---
50 | 
51 | If referencing these examples in your own work please use:
52 | ```
53 | @TechReport{breckon2010,
54 |   author =       {Breckon, T.P. and Barnes, S.E.},
55 |   title =        {Machine Learning - MSc Course Notes},
56 |   institution =  {Cranfield University},
57 |   year =         {2010},
58 |   address =      {Bedfordshire, UK},
59 | }
60 | ```
61 | 
62 | ---
63 | 
64 | If you find any bugs please raise an issue (or better still submit a pull request, please) - toby.breckon@durham.ac.uk
65 | 
66 | _"may the source be with you"_ - anon.
67 | 


--------------------------------------------------------------------------------
/dt_example1/car.names:
--------------------------------------------------------------------------------
 1 | 1. Title: Car Evaluation Database
 2 | 
 3 | 2. Sources:
 4 |    (a) Creator: Marko Bohanec
 5 |    (b) Donors: Marko Bohanec   (marko.bohanec@ijs.si)
 6 |                Blaz Zupan      (blaz.zupan@ijs.si)
 7 |    (c) Date: June, 1997
 8 | 
 9 | 3. Past Usage:
10 | 
11 |    The hierarchical decision model, from which this dataset is
12 |    derived, was first presented in 
13 | 
14 |    M. Bohanec and V. Rajkovic: Knowledge acquisition and explanation for
15 |    multi-attribute decision making. In 8th Intl Workshop on Expert
16 |    Systems and their Applications, Avignon, France. pages 59-78, 1988.
17 | 
18 |    Within machine-learning, this dataset was used for the evaluation
19 |    of HINT (Hierarchy INduction Tool), which was proved to be able to
20 |    completely reconstruct the original hierarchical model. This,
21 |    together with a comparison with C4.5, is presented in
22 | 
23 |    B. Zupan, M. Bohanec, I. Bratko, J. Demsar: Machine learning by
24 |    function decomposition. ICML-97, Nashville, TN. 1997 (to appear)
25 | 
26 | 4. Relevant Information Paragraph:
27 | 
28 |    Car Evaluation Database was derived from a simple hierarchical
29 |    decision model originally developed for the demonstration of DEX
30 |    (M. Bohanec, V. Rajkovic: Expert system for decision
31 |    making. Sistemica 1(1), pp. 145-157, 1990.). The model evaluates
32 |    cars according to the following concept structure:
33 | 
34 |    CAR                      car acceptability
35 |    . PRICE                  overall price
36 |    . . buying               buying price
37 |    . . maint                price of the maintenance
38 |    . TECH                   technical characteristics
39 |    . . COMFORT              comfort
40 |    . . . doors              number of doors
41 |    . . . persons            capacity in terms of persons to carry
42 |    . . . lug_boot           the size of luggage boot
43 |    . . safety               estimated safety of the car
44 | 
45 |    Input attributes are printed in lowercase. Besides the target
46 |    concept (CAR), the model includes three intermediate concepts:
47 |    PRICE, TECH, COMFORT. Every concept is in the original model
48 |    related to its lower level descendants by a set of examples (for
49 |    these examples sets see http://www-ai.ijs.si/BlazZupan/car.html).
50 | 
51 |    The Car Evaluation Database contains examples with the structural
52 |    information removed, i.e., directly relates CAR to the six input
53 |    attributes: buying, maint, doors, persons, lug_boot, safety.
54 | 
55 |    Because of known underlying concept structure, this database may be
56 |    particularly useful for testing constructive induction and
57 |    structure discovery methods.
58 | 
59 | 5. Number of Instances: 1728
60 |    (instances completely cover the attribute space)
61 | 
62 | 6. Number of Attributes: 6
63 | 
64 | 7. Attribute Values:
65 | 
66 |    buying       v-high, high, med, low
67 |    maint        v-high, high, med, low
68 |    doors        2, 3, 4, 5-more
69 |    persons      2, 4, more
70 |    lug_boot     small, med, big
71 |    safety       low, med, high
72 | 
73 | 8. Missing Attribute Values: none
74 | 
75 | 9. Class Distribution (number of instances per class)
76 | 
77 |    class      N          N[%]
78 |    -----------------------------
79 |    unacc     1210     (70.023 %) 
80 |    acc        384     (22.222 %) 
81 |    good        69     ( 3.993 %) 
82 |    v-good      65     ( 3.762 %) 
83 | 


--------------------------------------------------------------------------------
/dt_example1/car.test:
--------------------------------------------------------------------------------
  1 | high,high,3,2,med,high,unacc
  2 | vhigh,high,4,more,small,high,unacc
  3 | high,low,5more,2,med,high,unacc
  4 | med,med,5more,more,small,low,unacc
  5 | low,low,3,4,big,med,good
  6 | high,med,5more,more,small,med,unacc
  7 | low,low,4,more,med,low,unacc
  8 | high,med,5more,2,small,low,unacc
  9 | vhigh,vhigh,2,2,small,high,unacc
 10 | high,high,3,more,small,low,unacc
 11 | high,low,3,4,small,low,unacc
 12 | vhigh,high,2,4,med,high,unacc
 13 | med,vhigh,5more,more,med,med,acc
 14 | high,vhigh,2,2,med,med,unacc
 15 | vhigh,high,2,more,small,med,unacc
 16 | med,high,2,4,small,high,acc
 17 | vhigh,med,5more,4,big,med,acc
 18 | med,low,4,2,med,low,unacc
 19 | high,low,5more,2,small,med,unacc
 20 | low,low,5more,2,big,med,unacc
 21 | low,med,5more,more,small,high,good
 22 | med,low,4,more,small,low,unacc
 23 | low,vhigh,3,4,med,low,unacc
 24 | med,vhigh,3,4,med,med,unacc
 25 | vhigh,vhigh,4,2,big,low,unacc
 26 | med,low,2,4,big,med,good
 27 | med,high,4,more,med,high,acc
 28 | high,low,5more,4,small,med,unacc
 29 | high,vhigh,5more,4,small,high,unacc
 30 | med,vhigh,5more,4,big,low,unacc
 31 | vhigh,vhigh,2,4,small,med,unacc
 32 | vhigh,vhigh,4,2,big,med,unacc
 33 | low,med,4,2,big,high,unacc
 34 | med,low,5more,more,small,high,good
 35 | low,vhigh,5more,2,big,med,unacc
 36 | vhigh,vhigh,2,more,med,med,unacc
 37 | med,vhigh,5more,2,big,med,unacc
 38 | high,low,3,more,med,high,acc
 39 | low,low,5more,more,small,high,good
 40 | high,low,3,4,big,low,unacc
 41 | high,med,2,4,small,med,unacc
 42 | vhigh,vhigh,2,more,small,high,unacc
 43 | low,vhigh,2,4,med,med,unacc
 44 | vhigh,med,4,2,big,low,unacc
 45 | high,high,2,more,big,high,acc
 46 | high,vhigh,2,2,big,high,unacc
 47 | high,low,5more,more,big,low,unacc
 48 | low,med,3,more,small,med,acc
 49 | vhigh,low,2,4,med,low,unacc
 50 | med,med,5more,2,big,med,unacc
 51 | med,med,5more,4,med,high,vgood
 52 | vhigh,low,3,more,big,low,unacc
 53 | low,vhigh,5more,4,small,high,acc
 54 | low,vhigh,2,4,small,med,unacc
 55 | vhigh,low,2,more,small,low,unacc
 56 | low,low,4,more,med,med,good
 57 | vhigh,high,2,more,big,med,unacc
 58 | high,vhigh,4,4,med,med,unacc
 59 | vhigh,low,2,2,med,med,unacc
 60 | med,med,5more,2,big,low,unacc
 61 | med,high,5more,more,small,high,acc
 62 | low,low,5more,4,big,high,vgood
 63 | high,high,5more,2,small,high,unacc
 64 | high,vhigh,4,4,big,low,unacc
 65 | med,med,3,more,med,high,vgood
 66 | med,low,4,4,big,high,vgood
 67 | low,high,4,more,med,high,vgood
 68 | low,vhigh,4,4,small,high,acc
 69 | med,high,4,more,big,low,unacc
 70 | high,vhigh,2,4,big,low,unacc
 71 | high,low,3,4,med,high,acc
 72 | high,vhigh,5more,2,med,low,unacc
 73 | vhigh,high,5more,4,small,high,unacc
 74 | med,med,4,4,small,med,acc
 75 | vhigh,vhigh,3,more,med,high,unacc
 76 | med,high,4,2,small,low,unacc
 77 | high,med,2,more,med,high,acc
 78 | med,high,5more,4,big,low,unacc
 79 | med,low,5more,more,small,low,unacc
 80 | low,low,5more,2,small,high,unacc
 81 | low,low,3,2,small,high,unacc
 82 | low,high,2,4,big,high,vgood
 83 | med,high,3,more,med,med,acc
 84 | vhigh,low,5more,4,big,high,acc
 85 | vhigh,vhigh,2,more,med,low,unacc
 86 | low,high,5more,more,big,low,unacc
 87 | med,med,2,more,big,high,vgood
 88 | low,med,3,4,big,med,good
 89 | med,low,5more,2,small,high,unacc
 90 | high,low,5more,2,med,med,unacc
 91 | vhigh,vhigh,4,more,small,low,unacc
 92 | med,high,2,more,big,med,acc
 93 | vhigh,vhigh,5more,2,med,high,unacc
 94 | vhigh,vhigh,5more,4,med,low,unacc
 95 | vhigh,high,3,4,med,low,unacc
 96 | vhigh,vhigh,2,2,big,high,unacc
 97 | med,vhigh,3,2,big,med,unacc
 98 | high,med,5more,4,small,med,unacc
 99 | low,med,3,more,med,high,vgood
100 | low,vhigh,4,more,big,low,unacc
101 | low,vhigh,3,4,big,med,acc
102 | med,med,3,4,small,med,acc
103 | vhigh,med,3,4,big,med,acc
104 | med,med,5more,4,big,high,vgood
105 | low,vhigh,4,more,small,med,unacc
106 | vhigh,vhigh,5more,4,big,high,unacc
107 | high,high,5more,4,med,high,acc
108 | med,low,2,4,big,low,unacc
109 | vhigh,high,4,2,big,med,unacc
110 | med,low,3,more,big,low,unacc
111 | low,med,4,2,med,low,unacc
112 | vhigh,vhigh,2,2,small,med,unacc
113 | med,vhigh,5more,4,med,med,acc
114 | med,low,5more,2,big,med,unacc
115 | low,high,3,more,small,low,unacc
116 | high,vhigh,3,more,small,med,unacc
117 | med,med,4,2,big,med,unacc
118 | vhigh,low,4,more,med,low,unacc
119 | low,high,4,4,med,med,acc
120 | med,vhigh,2,4,big,high,acc
121 | high,high,3,2,big,low,unacc
122 | vhigh,vhigh,5more,2,med,low,unacc
123 | high,high,3,more,med,high,acc
124 | low,high,3,more,med,low,unacc
125 | med,med,2,more,med,high,acc
126 | med,med,5more,4,med,med,acc
127 | vhigh,med,2,more,big,high,acc
128 | med,med,2,more,small,high,unacc
129 | vhigh,med,3,2,big,low,unacc
130 | low,low,5more,2,big,low,unacc
131 | vhigh,med,5more,4,small,med,unacc
132 | high,high,4,more,med,med,acc
133 | high,low,2,2,small,med,unacc
134 | low,high,5more,2,med,med,unacc
135 | low,med,3,2,small,med,unacc
136 | med,low,2,4,med,low,unacc
137 | low,vhigh,2,more,med,med,unacc
138 | med,vhigh,2,more,big,low,unacc
139 | vhigh,med,3,2,small,med,unacc
140 | vhigh,vhigh,5more,2,big,med,unacc
141 | low,high,5more,4,big,med,acc
142 | vhigh,high,3,4,small,high,unacc
143 | high,vhigh,5more,4,big,high,unacc
144 | med,med,4,more,small,low,unacc
145 | med,high,2,more,med,med,unacc
146 | med,high,4,2,med,low,unacc
147 | med,high,5more,more,med,low,unacc
148 | med,low,2,2,small,high,unacc
149 | high,vhigh,3,4,big,med,unacc
150 | high,high,5more,more,small,low,unacc
151 | high,low,3,2,big,low,unacc
152 | vhigh,med,2,4,big,med,acc
153 | low,vhigh,5more,more,small,low,unacc
154 | vhigh,low,2,more,big,high,acc
155 | low,med,2,more,small,med,unacc
156 | low,med,5more,more,big,low,unacc
157 | med,low,5more,4,small,high,good
158 | vhigh,med,5more,more,small,med,unacc
159 | med,high,5more,2,small,low,unacc
160 | vhigh,high,5more,4,big,med,unacc
161 | low,low,5more,4,small,med,acc
162 | med,high,5more,4,big,high,acc
163 | med,high,2,2,small,low,unacc
164 | low,low,4,2,big,high,unacc
165 | high,high,2,4,small,high,acc
166 | high,low,5more,more,small,med,unacc
167 | vhigh,med,5more,more,med,low,unacc
168 | vhigh,low,4,2,small,med,unacc
169 | high,low,4,4,big,high,acc
170 | low,low,5more,4,med,high,vgood
171 | low,vhigh,5more,2,small,high,unacc
172 | high,high,4,4,med,low,unacc
173 | med,low,3,2,small,med,unacc
174 | vhigh,med,4,4,small,high,acc
175 | low,med,2,more,big,med,good
176 | vhigh,med,2,4,small,high,acc
177 | high,vhigh,5more,2,small,high,unacc
178 | med,med,2,2,small,high,unacc
179 | low,low,4,4,small,med,acc
180 | high,low,5more,more,med,high,acc
181 | med,high,2,more,small,med,unacc
182 | high,high,3,4,med,low,unacc
183 | vhigh,med,4,4,med,med,acc
184 | med,med,2,4,small,low,unacc
185 | high,low,5more,2,small,low,unacc
186 | vhigh,vhigh,5more,more,big,med,unacc
187 | high,high,5more,more,small,high,acc
188 | med,med,3,2,big,med,unacc
189 | high,med,4,more,med,high,acc
190 | low,vhigh,3,more,med,med,acc
191 | high,med,3,2,big,med,unacc
192 | high,high,4,2,small,high,unacc
193 | med,med,5more,more,med,low,unacc
194 | low,vhigh,4,2,small,high,unacc
195 | low,high,4,2,small,low,unacc
196 | med,vhigh,4,2,med,low,unacc
197 | low,med,3,2,small,low,unacc
198 | vhigh,high,5more,more,med,low,unacc
199 | low,med,2,more,med,high,good
200 | vhigh,vhigh,3,4,big,low,unacc
201 | vhigh,low,5more,2,med,low,unacc
202 | low,low,4,4,med,low,unacc
203 | vhigh,high,3,2,small,med,unacc
204 | high,low,4,4,med,high,acc
205 | high,low,4,2,small,high,unacc
206 | low,low,3,2,med,high,unacc
207 | vhigh,med,3,4,med,high,acc
208 | vhigh,low,3,2,big,high,unacc
209 | low,med,5more,2,med,med,unacc
210 | med,low,2,4,med,med,acc
211 | vhigh,high,5more,4,small,low,unacc
212 | low,med,2,4,big,med,good
213 | low,high,3,more,med,high,vgood
214 | low,high,4,more,big,low,unacc
215 | low,low,5more,2,med,med,unacc
216 | vhigh,med,3,more,big,low,unacc
217 | low,vhigh,4,more,big,med,acc
218 | vhigh,med,4,4,small,med,unacc
219 | vhigh,low,5more,2,small,low,unacc
220 | med,vhigh,3,2,big,low,unacc
221 | high,low,5more,2,small,high,unacc
222 | low,med,2,more,med,med,acc
223 | med,vhigh,2,4,med,med,unacc
224 | vhigh,high,4,more,big,med,unacc
225 | high,high,5more,4,big,low,unacc
226 | high,low,5more,more,med,low,unacc
227 | high,med,4,more,big,med,acc
228 | low,vhigh,3,2,big,med,unacc
229 | low,low,2,4,small,med,acc
230 | vhigh,vhigh,3,more,small,high,unacc
231 | vhigh,high,4,2,med,low,unacc
232 | high,low,2,2,big,med,unacc
233 | high,med,4,more,big,high,acc
234 | low,high,2,more,big,low,unacc
235 | med,vhigh,3,more,big,low,unacc
236 | vhigh,high,3,more,small,high,unacc
237 | high,med,3,more,med,med,acc
238 | high,high,4,2,big,low,unacc
239 | high,med,2,more,small,med,unacc
240 | low,vhigh,5more,2,med,high,unacc
241 | high,med,2,more,big,low,unacc
242 | low,vhigh,2,more,small,med,unacc
243 | low,high,5more,2,small,high,unacc
244 | med,vhigh,4,2,small,low,unacc
245 | low,med,4,2,med,med,unacc
246 | vhigh,low,5more,4,med,med,acc
247 | vhigh,high,5more,more,big,med,unacc
248 | high,low,5more,4,small,high,acc
249 | high,high,2,2,big,low,unacc
250 | high,med,5more,more,med,low,unacc
251 | low,high,3,more,big,low,unacc
252 | med,low,3,4,big,high,vgood
253 | med,med,2,4,med,med,acc
254 | high,med,5more,2,big,low,unacc
255 | high,low,4,more,med,med,acc
256 | high,med,3,2,med,high,unacc
257 | high,low,3,more,big,low,unacc
258 | high,med,2,4,big,low,unacc
259 | vhigh,high,3,more,med,high,unacc
260 | med,high,4,4,big,high,acc
261 | low,high,5more,more,med,med,acc
262 | vhigh,low,5more,more,med,low,unacc
263 | med,high,4,4,small,med,unacc
264 | med,high,5more,2,med,high,unacc
265 | vhigh,vhigh,4,more,med,med,unacc
266 | high,vhigh,3,4,small,low,unacc
267 | med,med,4,4,med,med,acc
268 | high,high,2,2,big,high,unacc
269 | low,med,5more,4,med,low,unacc
270 | med,high,3,more,big,low,unacc
271 | vhigh,vhigh,3,2,med,med,unacc
272 | low,high,2,2,med,med,unacc
273 | med,low,2,2,med,high,unacc
274 | med,high,5more,2,big,med,unacc
275 | low,vhigh,4,4,med,med,acc
276 | med,vhigh,3,more,small,low,unacc
277 | vhigh,high,2,4,big,med,unacc
278 | med,low,4,2,big,med,unacc
279 | vhigh,low,2,2,small,low,unacc
280 | vhigh,med,5more,4,big,low,unacc
281 | vhigh,high,2,2,big,low,unacc
282 | high,low,2,4,small,low,unacc
283 | med,high,3,more,big,med,acc
284 | med,med,3,more,big,low,unacc
285 | med,vhigh,3,4,med,high,acc
286 | vhigh,med,5more,2,big,low,unacc
287 | high,high,4,2,big,med,unacc
288 | high,vhigh,3,more,small,low,unacc
289 | low,low,5more,4,big,low,unacc
290 | vhigh,vhigh,2,4,big,low,unacc
291 | low,vhigh,4,2,big,med,unacc
292 | high,med,5more,4,small,high,acc
293 | low,med,4,more,small,high,good
294 | high,low,2,2,small,high,unacc
295 | high,high,5more,more,med,high,acc
296 | low,med,4,2,small,med,unacc
297 | med,vhigh,5more,more,big,low,unacc
298 | vhigh,low,5more,2,med,med,unacc
299 | high,low,5more,4,med,low,unacc
300 | med,low,3,4,med,high,good
301 | vhigh,vhigh,2,more,big,low,unacc
302 | vhigh,vhigh,5more,more,small,high,unacc
303 | low,med,3,2,med,low,unacc
304 | vhigh,med,2,more,med,low,unacc
305 | vhigh,med,3,2,small,low,unacc
306 | low,low,5more,4,small,low,unacc
307 | high,vhigh,5more,more,med,high,unacc
308 | vhigh,med,2,more,med,high,acc
309 | low,vhigh,2,4,med,low,unacc
310 | low,vhigh,4,4,med,low,unacc
311 | med,med,5more,more,big,low,unacc
312 | vhigh,low,3,4,small,med,unacc
313 | med,low,4,2,small,low,unacc
314 | low,low,3,4,big,high,vgood
315 | low,high,5more,more,small,med,acc
316 | vhigh,low,3,2,small,high,unacc
317 | vhigh,high,4,2,big,high,unacc
318 | med,low,2,more,big,low,unacc
319 | low,med,4,more,med,high,vgood
320 | med,vhigh,4,2,small,med,unacc
321 | high,med,3,more,big,low,unacc
322 | vhigh,vhigh,3,4,big,high,unacc
323 | med,vhigh,4,more,big,high,acc
324 | low,vhigh,2,2,big,low,unacc
325 | high,med,3,4,med,low,unacc
326 | low,vhigh,3,2,big,low,unacc
327 | low,vhigh,2,more,med,high,acc
328 | vhigh,med,5more,more,med,med,acc
329 | low,low,4,4,small,low,unacc
330 | med,med,2,4,big,high,vgood
331 | vhigh,vhigh,5more,4,med,med,unacc
332 | med,high,3,2,small,med,unacc
333 | high,high,2,4,small,low,unacc
334 | low,high,2,4,small,med,acc
335 | vhigh,low,3,more,med,low,unacc
336 | med,med,4,2,big,high,unacc
337 | med,high,5more,4,med,low,unacc
338 | vhigh,low,4,2,big,high,unacc
339 | med,vhigh,5more,more,small,high,acc
340 | med,vhigh,5more,more,small,med,unacc
341 | high,med,5more,2,small,high,unacc
342 | high,med,2,2,small,med,unacc
343 | low,high,5more,more,small,high,acc
344 | vhigh,high,4,4,med,low,unacc
345 | vhigh,med,3,2,med,high,unacc
346 | 


--------------------------------------------------------------------------------
/dt_example1/decisiontree.cpp:
--------------------------------------------------------------------------------
  1 | // Example : decision tree learning
  2 | // usage: prog training_data_file testing_data_file
  3 | 
  4 | // For use with test / training datasets : dt_example1
  5 | 
  6 | // Author : Toby Breckon, toby.breckon@cranfield.ac.uk
  7 | 
  8 | // Copyright (c) 2010 School of Engineering, Cranfield University
  9 | // License : LGPL - http://www.gnu.org/licenses/lgpl.html
 10 | 
 11 | #include <cv.h>       // opencv general include file
 12 | #include <ml.h>		  // opencv machine learning include file
 13 | 
 14 | using namespace cv; // OpenCV API is in the C++ "cv" namespace
 15 | 
 16 | #include <stdio.h>
 17 | 
 18 | /******************************************************************************/
 19 | // global definitions (for speed and ease of use)
 20 | 
 21 | #define NUMBER_OF_TRAINING_SAMPLES 1383
 22 | #define ATTRIBUTES_PER_SAMPLE 6  // not the last as this is the class
 23 | #define NUMBER_OF_TESTING_SAMPLES 345
 24 | 
 25 | #define NUMBER_OF_CLASSES 4 // classes 0->3
 26 | static char* CLASSES[NUMBER_OF_CLASSES] =
 27 | {(char *) "unacc", (char *) "acc", (char *) "good", (char *) "vgood"};
 28 | 
 29 | /******************************************************************************/
 30 | 
 31 | // a basic hash function from: http://www.cse.yorku.ca/~oz/hash.html
 32 | 
 33 | int hash(char *str)
 34 | {
 35 |     int hash = 5381;
 36 |     int c;
 37 | 
 38 |     while ((c = (*str++)))
 39 |     {
 40 |         hash = ((hash << 5) + hash) + c;
 41 |     }
 42 | 
 43 |     return hash;
 44 | }
 45 | 
 46 | /******************************************************************************/
 47 | 
 48 | // loads the sample database from file (which is a CSV text file)
 49 | 
 50 | int read_data_from_csv(const char* filename, Mat data, Mat classes,
 51 |                        int n_samples )
 52 | {
 53 |     char tmp_buf[10];
 54 |     int i = 0;
 55 |     char c;
 56 | 
 57 |     // if we can't read the input file then return 0
 58 |     FILE* f = fopen( filename, "r" );
 59 |     if( !f )
 60 |     {
 61 |         printf("ERROR: cannot read file %s\n",  filename);
 62 |         return 0; // all not OK
 63 |     }
 64 | 
 65 |     // for each sample in the file
 66 | 
 67 |     for(int line = 0; line < n_samples; line++)
 68 |     {
 69 | 
 70 |         // for each attribute on the line in the file
 71 | 
 72 |         for(int attribute = 0; attribute < (ATTRIBUTES_PER_SAMPLE + 1); attribute++)
 73 |         {
 74 |             // last attribute is the class
 75 | 
 76 |             if (attribute == 6)
 77 |             {
 78 |                 c = '\0';
 79 |                 for(i=0; c != '\n'; i++)
 80 |                 {
 81 |                     c = fgetc(f);
 82 |                     tmp_buf[i] = c;
 83 |                 }
 84 |                 tmp_buf[i - 1] = '\0';
 85 |                 //printf("%s\n", tmp_buf);
 86 | 
 87 |                 // find the class number and record this
 88 | 
 89 |                 for (int i = 0; i < NUMBER_OF_CLASSES; i++)
 90 |                 {
 91 |                     if (strcmp(CLASSES[i], tmp_buf) == 0)
 92 |                     {
 93 |                         classes.at<float>(line, 0) = (float) i;
 94 |                     }
 95 |                 }
 96 |             }
 97 |             else
 98 |             {
 99 | 
100 |                 // for all other attributes just read in the string value
101 |                 // and use a hash function to convert to to a float
102 |                 // (N.B. openCV uses a floating point decision tree implementation!)
103 | 
104 |                 c = '\0';
105 |                 for(i=0; c != ','; i++)
106 |                 {
107 |                     c = fgetc(f);
108 |                     tmp_buf[i] = c;
109 |                 }
110 |                 tmp_buf[i - 1] = '\0';
111 |                 data.at<float>(line, attribute) = (float) hash(tmp_buf);
112 | 
113 |                 //printf("%s,", tmp_buf);
114 |             }
115 |         }
116 |     }
117 | 
118 |     fclose(f);
119 | 
120 |     return 1; // all OK
121 | }
122 | 
123 | /******************************************************************************/
124 | 
125 | int main( int argc, char** argv )
126 | {
127 |     // lets just check the version first
128 | 
129 |     printf ("OpenCV version %s (%d.%d.%d)\n",
130 |             CV_VERSION,
131 |             CV_MAJOR_VERSION, CV_MINOR_VERSION, CV_SUBMINOR_VERSION);
132 | 
133 |     // define training data storage matrices (one for attribute examples, one
134 |     // for classifications)
135 | 
136 |     Mat training_data = Mat(NUMBER_OF_TRAINING_SAMPLES, ATTRIBUTES_PER_SAMPLE, CV_32FC1);
137 |     Mat training_classifications = Mat(NUMBER_OF_TRAINING_SAMPLES, 1, CV_32FC1);
138 | 
139 |     //define testing data storage matrices
140 | 
141 |     Mat testing_data = Mat(NUMBER_OF_TESTING_SAMPLES, ATTRIBUTES_PER_SAMPLE, CV_32FC1);
142 |     Mat testing_classifications = Mat(NUMBER_OF_TESTING_SAMPLES, 1, CV_32FC1);
143 | 
144 |     // define all the attributes as categorical (i.e. categories)
145 |     // alternatives are CV_VAR_CATEGORICAL or CV_VAR_ORDERED(=CV_VAR_NUMERICAL)
146 |     // that can be assigned on a per attribute basis
147 | 
148 |     // this is a classification problem (i.e. predict a discrete number of class
149 |     // outputs) so also the last (+1) output var_type element to CV_VAR_CATEGORICAL
150 | 
151 |     Mat var_type = Mat(ATTRIBUTES_PER_SAMPLE + 1, 1, CV_8U );
152 |     var_type = Scalar(CV_VAR_CATEGORICAL); // all inputs are categorical
153 | 
154 |     CvDTreeNode* resultNode; // node returned from a prediction
155 | 
156 |     // load training and testing data sets
157 | 
158 |     if (read_data_from_csv(argv[1], training_data, training_classifications, NUMBER_OF_TRAINING_SAMPLES) &&
159 |             read_data_from_csv(argv[2], testing_data, testing_classifications, NUMBER_OF_TESTING_SAMPLES))
160 |     {
161 |         // define the parameters for training the decision tree
162 | 
163 |         float priors[] = { 1, 1, 1, 1 }; // weights of each classification for classes
164 |         //float priors[] = { 70, 22, 4, 4 }; // weights of each classification for classes
165 | 
166 |         CvDTreeParams params = CvDTreeParams(25, // max depth
167 |                                              10, // min sample count
168 |                                              0, // regression accuracy: N/A here
169 |                                              false, // compute surrogate split, no missing data
170 |                                              25, // max number of categories (use sub-optimal algorithm for larger numbers)
171 |                                              10, // the number of cross-validation folds
172 |                                              true, // use 1SE rule => smaller tree
173 |                                              false, // throw away the pruned tree branches
174 |                                              priors // the array of priors, the bigger weight, the more attention
175 |                                              // to the maligant cases
176 |                                              // (i.e. a case will be judjed to be maligant with bigger chance)
177 |                                             );
178 | 
179 | 
180 |         // train decision tree classifier (using training data)
181 | 
182 |         printf( "\nUsing training database: %s\n\n", argv[1]);
183 |         CvDTree* dtree = new CvDTree;
184 | 
185 |         dtree->train(training_data, CV_ROW_SAMPLE, training_classifications,
186 |                      Mat(), Mat(), var_type, Mat(), params);
187 | 
188 |         // perform classifier testing and report results
189 | 
190 |         Mat test_sample;
191 |         int correct_class = 0;
192 |         int wrong_class = 0;
193 |         int false_positives [NUMBER_OF_CLASSES] = {0,0,0,0};
194 | 
195 |         printf( "\nUsing testing database: %s\n\n", argv[2]);
196 | 
197 |         for (int tsample = 0; tsample < NUMBER_OF_TESTING_SAMPLES; tsample++)
198 |         {
199 | 
200 |             // extract a row from the testing matrix
201 | 
202 |             test_sample = testing_data.row(tsample);
203 | 
204 |             // run decision tree prediction
205 | 
206 |             resultNode = dtree->predict(test_sample, Mat(), false);
207 | 
208 |             printf("Testing Sample %i -> class result %s\n", tsample, CLASSES[(int) (resultNode->value)]);
209 | 
210 |             // if the prediction and the (true) testing classification are the same
211 |             // (N.B. openCV uses a floating point decision tree implementation!)
212 | 
213 |             if (fabs(resultNode->value - testing_classifications.at<float>(tsample, 0))
214 |                     >= FLT_EPSILON)
215 |             {
216 |                 // if they differ more than floating point error => wrong class
217 | 
218 |                 wrong_class++;
219 | 
220 |                 false_positives[(int) resultNode->value]++;
221 | 
222 |             }
223 |             else
224 |             {
225 | 
226 |                 // otherwise correct
227 | 
228 |                 correct_class++;
229 |             }
230 |         }
231 | 
232 |         printf( "\nResults on the testing database: %s\n"
233 |                 "\tCorrect classification: %d (%g%%)\n"
234 |                 "\tWrong classifications: %d (%g%%)\n",
235 |                 argv[2],
236 |                 correct_class, (double) correct_class*100/NUMBER_OF_TESTING_SAMPLES,
237 |                 wrong_class, (double) wrong_class*100/NUMBER_OF_TESTING_SAMPLES);
238 | 
239 |         for (int i = 0; i < NUMBER_OF_CLASSES; i++)
240 |         {
241 |             printf( "\tClass %s false postives 	%d (%g%%)\n", CLASSES[i],
242 |                     false_positives[i],
243 |                     (double) false_positives[i]*100/NUMBER_OF_TESTING_SAMPLES);
244 |         }
245 | 
246 |         // all matrix memory free by destructors
247 | 
248 |         // all OK : main returns 0
249 | 
250 |         return 0;
251 |     }
252 | 
253 |     // not OK : main returns -1
254 | 
255 |     printf("usage: %s training_data_file testing_data_file\n", argv[0]);
256 |     return -1;
257 | }
258 | /******************************************************************************/
259 | 


--------------------------------------------------------------------------------
/dt_example2/decisiontree.cpp:
--------------------------------------------------------------------------------
  1 | // Example : decision tree learning
  2 | // usage: prog training_data_file testing_data_file
  3 | 
  4 | // For use with test / training datasets : dt_example2
  5 | 
  6 | // Author : Toby Breckon, toby.breckon@cranfield.ac.uk
  7 | 
  8 | // Copyright (c) 2010 School of Engineering, Cranfield University
  9 | // License : LGPL - http://www.gnu.org/licenses/lgpl.html
 10 | 
 11 | #include <cv.h>       // opencv general include file
 12 | #include <ml.h>		  // opencv machine learning include file
 13 | 
 14 | using namespace cv; // OpenCV API is in the C++ "cv" namespace
 15 | 
 16 | #include <stdio.h>
 17 | 
 18 | /******************************************************************************/
 19 | // global definitions (for speed and ease of use)
 20 | 
 21 | #define NUMBER_OF_TRAINING_SAMPLES 449
 22 | #define ATTRIBUTES_PER_SAMPLE 30  // not the first two as patient ID and class
 23 | #define NUMBER_OF_TESTING_SAMPLES 120
 24 | 
 25 | static char CLASSES[2] = {'B', 'M'};  // class B = 0, class M = 1
 26 | 
 27 | /******************************************************************************/
 28 | 
 29 | // loads the sample database from file (which is a CSV text file)
 30 | 
 31 | int read_data_from_csv(const char* filename, Mat data, Mat classes,
 32 |                        int n_samples )
 33 | {
 34 |     char tmpc;
 35 |     float tmpf;
 36 | 
 37 |     // if we can't read the input file then return 0
 38 |     FILE* f = fopen( filename, "r" );
 39 |     if( !f )
 40 |     {
 41 |         printf("ERROR: cannot read file %s\n",  filename);
 42 |         return 0; // all not OK
 43 |     }
 44 | 
 45 |     // for each sample in the file
 46 | 
 47 |     for(int line = 0; line < n_samples; line++)
 48 |     {
 49 | 
 50 |         // for each attribute on the line in the file
 51 | 
 52 |         for(int attribute = 0; attribute < (ATTRIBUTES_PER_SAMPLE + 2); attribute++)
 53 |         {
 54 |             if (attribute == 0)
 55 |             {
 56 |                 fscanf(f, "%f,", &tmpf);
 57 | 
 58 |                 // ignore attribute 0 (as it's the patient ID)
 59 | 
 60 |                 continue;
 61 |             }
 62 |             else if (attribute == 1)
 63 |             {
 64 | 
 65 |                 // attribute 2 (in the database) is the classification
 66 |                 // record 1 = M = malignant
 67 |                 // record 0 = B = benign
 68 | 
 69 |                 fscanf(f, "%c,", &tmpc);
 70 | 
 71 |                 switch(tmpc)
 72 |                 {
 73 |                 case 'M':
 74 |                     classes.at<float>(line, 0) = 1.0;
 75 |                     break;
 76 |                 case 'B':
 77 |                     classes.at<float>(line, 0) = 0.0;
 78 |                     break;
 79 |                 default:
 80 |                     printf("ERROR: unexpected class in file %s\n",  filename);
 81 |                     return 0; // all not OK
 82 |                 }
 83 | 
 84 |                 // printf("%c,", tmpc);
 85 |             }
 86 |             else
 87 |             {
 88 |                 fscanf(f, "%f,", &tmpf);
 89 |                 data.at<float>(line, (attribute - 2)) = (float) tmpf;
 90 |                 // printf("%f,", data.at<float>(line, (attribute - 2)));
 91 |             }
 92 |         }
 93 |         fscanf(f, "\n");
 94 |         // printf("\n");
 95 |     }
 96 | 
 97 |     fclose(f);
 98 | 
 99 |     return 1; // all OK
100 | }
101 | 
102 | /******************************************************************************/
103 | 
104 | int main( int argc, char** argv )
105 | {
106 |     // lets just check the version first
107 | 
108 |     printf ("OpenCV version %s (%d.%d.%d)\n",
109 |             CV_VERSION,
110 |             CV_MAJOR_VERSION, CV_MINOR_VERSION, CV_SUBMINOR_VERSION);
111 | 
112 |     // define training data storage matrices (one for attribute examples, one
113 |     // for classifications)
114 | 
115 |     Mat training_data =
116 |         Mat(NUMBER_OF_TRAINING_SAMPLES, ATTRIBUTES_PER_SAMPLE, CV_32FC1);
117 |     Mat training_classifications = Mat(NUMBER_OF_TRAINING_SAMPLES, 1, CV_32FC1);
118 | 
119 |     //define testing data storage matrices
120 | 
121 |     Mat testing_data =
122 |         Mat(NUMBER_OF_TESTING_SAMPLES, ATTRIBUTES_PER_SAMPLE, CV_32FC1);
123 |     Mat testing_classifications =
124 |         Mat(NUMBER_OF_TESTING_SAMPLES, 1, CV_32FC1);
125 | 
126 |     // define all the attributes as numerical
127 |     // alternatives are CV_VAR_CATEGORICAL or CV_VAR_ORDERED(=CV_VAR_NUMERICAL)
128 |     // that can be assigned on a per attribute basis
129 | 
130 |     Mat var_type = Mat(ATTRIBUTES_PER_SAMPLE + 1, 1, CV_8U );
131 |     var_type = Scalar(CV_VAR_NUMERICAL); // all inputs are numerical
132 | 
133 |     // this is a classification problem (i.e. predict a discrete number of class
134 |     // outputs) so reset the last (+1) output var_type element to CV_VAR_CATEGORICAL
135 | 
136 |     var_type.at<uchar>(ATTRIBUTES_PER_SAMPLE, 0) = CV_VAR_CATEGORICAL;
137 | 
138 |     CvDTreeNode* resultNode; // node returned from a prediction
139 | 
140 |     // load training and testing data sets
141 | 
142 |     if (read_data_from_csv(argv[1], training_data, training_classifications, NUMBER_OF_TRAINING_SAMPLES) &&
143 |             read_data_from_csv(argv[2], testing_data, testing_classifications, NUMBER_OF_TESTING_SAMPLES))
144 |     {
145 |         // define the parameters for training the decision tree
146 | 
147 |         float priors[] = { 1, 1 }; // weights of each classification for classes
148 |         // 0 = B = benign, 1 = M = malignant
149 | 
150 |         CvDTreeParams params = CvDTreeParams(8, // max depth
151 |                                              5, // min sample count
152 |                                              0, // regression accuracy: N/A here
153 |                                              false, // compute surrogate split, no missing data
154 |                                              15, // max number of categories (use sub-optimal algorithm for larger numbers)
155 |                                              10, // the number of cross-validation folds
156 |                                              true, // use 1SE rule => smaller tree
157 |                                              false, // throw away the pruned tree branches
158 |                                              priors // the array of priors, the bigger weight, the more attention
159 |                                              // to the maligant cases
160 |                                              // (i.e. a case will be judjed to be maligant with bigger chance)
161 |                                             );
162 | 
163 | 
164 |         // train decision tree classifier (using training data)
165 | 
166 |         printf( "\nUsing training database: %s\n\n", argv[1]);
167 |         CvDTree* dtree = new CvDTree;
168 | 
169 |         dtree->train(training_data, CV_ROW_SAMPLE,
170 |                      training_classifications,
171 |                      Mat(), Mat(), var_type, Mat(), params);
172 | 
173 |         // perform classifier testing and report results
174 | 
175 |         Mat test_sample;
176 |         int correct_class = 0;
177 |         int wrong_class = 0;
178 |         int m_class_fp = 0;
179 |         int b_class_fp = 0;
180 | 
181 |         printf( "\nUsing testing database: %s\n\n", argv[2]);
182 | 
183 |         for (int tsample = 0; tsample < NUMBER_OF_TESTING_SAMPLES; tsample++)
184 |         {
185 | 
186 |             // extract a row from the testing matrix
187 | 
188 |             test_sample = testing_data.row(tsample);
189 | 
190 |             // run decision tree prediction
191 | 
192 |             resultNode = dtree->predict(test_sample, Mat(), false);
193 | 
194 |             printf("Testing Sample %i -> class result %c\n", tsample, CLASSES[(int) (resultNode->value)]);
195 | 
196 |             // if the prediction and the (true) testing classification are the same
197 |             // (N.B. openCV uses a floating point decision tree implementation!)
198 | 
199 |             if (fabs(resultNode->value - testing_classifications.at<float>(tsample, 0))
200 |                     >= FLT_EPSILON)
201 |             {
202 |                 // if they differ more than floating point error => wrong class
203 | 
204 |                 wrong_class++;
205 | 
206 |                 // if the result class is different from 1.0 (M class label) by
207 |                 // more than floating point error => B class false +ve
208 | 
209 |                 if (fabs(resultNode->value - 1.0) >= FLT_EPSILON)
210 |                 {
211 |                     b_class_fp++;
212 |                 }
213 |                 else
214 |                 {
215 | 
216 |                     // otherwise it's an
217 | 
218 |                     m_class_fp++;
219 |                 }
220 | 
221 |             }
222 |             else
223 |             {
224 | 
225 |                 // otherwise correct
226 | 
227 |                 correct_class++;
228 |             }
229 |         }
230 | 
231 |         printf( "\nResults on the testing database: %s\n"
232 |                 "\tCorrect classification: %d (%g%%)\n"
233 |                 "\tWrong classifications: %d (%g%%)\n"
234 |                 "\tM false +ve classifications: %d (%g%%)\n"
235 |                 "\tB false +ve classifications: %d (%g%%)\n",
236 |                 argv[2],
237 |                 correct_class, (double) correct_class*100/NUMBER_OF_TESTING_SAMPLES,
238 |                 wrong_class, (double) wrong_class*100/NUMBER_OF_TESTING_SAMPLES,
239 |                 m_class_fp, (double) m_class_fp*100/NUMBER_OF_TESTING_SAMPLES,
240 |                 b_class_fp, (double) b_class_fp*100/NUMBER_OF_TESTING_SAMPLES );
241 | 
242 |         // all matrix memory free by destructors
243 | 
244 | 
245 |         // all OK : main returns 0
246 | 
247 |         return 0;
248 |     }
249 | 
250 |     // not OK : main returns -1
251 | 
252 |     return -1;
253 | }
254 | /******************************************************************************/
255 | 


--------------------------------------------------------------------------------
/dt_example2/wdbc.names:
--------------------------------------------------------------------------------
  1 | 1. Title: Wisconsin Diagnostic Breast Cancer (WDBC)
  2 | 
  3 | 2. Source Information
  4 | 
  5 | a) Creators: 
  6 | 
  7 | 	Dr. William H. Wolberg, General Surgery Dept., University of
  8 | 	Wisconsin,  Clinical Sciences Center, Madison, WI 53792
  9 | 	wolberg@eagle.surgery.wisc.edu
 10 | 
 11 | 	W. Nick Street, Computer Sciences Dept., University of
 12 | 	Wisconsin, 1210 West Dayton St., Madison, WI 53706
 13 | 	street@cs.wisc.edu  608-262-6619
 14 | 
 15 | 	Olvi L. Mangasarian, Computer Sciences Dept., University of
 16 | 	Wisconsin, 1210 West Dayton St., Madison, WI 53706
 17 | 	olvi@cs.wisc.edu 
 18 | 
 19 | b) Donor: Nick Street
 20 | 
 21 | c) Date: November 1995
 22 | 
 23 | 3. Past Usage:
 24 | 
 25 | first usage:
 26 | 
 27 | 	W.N. Street, W.H. Wolberg and O.L. Mangasarian 
 28 | 	Nuclear feature extraction for breast tumor diagnosis.
 29 | 	IS&T/SPIE 1993 International Symposium on Electronic Imaging: Science
 30 | 	and Technology, volume 1905, pages 861-870, San Jose, CA, 1993.
 31 | 
 32 | OR literature:
 33 | 
 34 | 	O.L. Mangasarian, W.N. Street and W.H. Wolberg. 
 35 | 	Breast cancer diagnosis and prognosis via linear programming. 
 36 | 	Operations Research, 43(4), pages 570-577, July-August 1995.
 37 | 
 38 | Medical literature:
 39 | 
 40 | 	W.H. Wolberg, W.N. Street, and O.L. Mangasarian. 
 41 | 	Machine learning techniques to diagnose breast cancer from
 42 | 	fine-needle aspirates.  
 43 | 	Cancer Letters 77 (1994) 163-171.
 44 | 
 45 | 	W.H. Wolberg, W.N. Street, and O.L. Mangasarian. 
 46 | 	Image analysis and machine learning applied to breast cancer
 47 | 	diagnosis and prognosis.  
 48 | 	Analytical and Quantitative Cytology and Histology, Vol. 17
 49 | 	No. 2, pages 77-87, April 1995. 
 50 | 
 51 | 	W.H. Wolberg, W.N. Street, D.M. Heisey, and O.L. Mangasarian. 
 52 | 	Computerized breast cancer diagnosis and prognosis from fine
 53 | 	needle aspirates.  
 54 | 	Archives of Surgery 1995;130:511-516.
 55 | 
 56 | 	W.H. Wolberg, W.N. Street, D.M. Heisey, and O.L. Mangasarian. 
 57 | 	Computer-derived nuclear features distinguish malignant from
 58 | 	benign breast cytology.  
 59 | 	Human Pathology, 26:792--796, 1995.
 60 | 
 61 | See also:
 62 | 	http://www.cs.wisc.edu/~olvi/uwmp/mpml.html
 63 | 	http://www.cs.wisc.edu/~olvi/uwmp/cancer.html
 64 | 
 65 | Results:
 66 | 
 67 | 	- predicting field 2, diagnosis: B = benign, M = malignant
 68 | 	- sets are linearly separable using all 30 input features
 69 | 	- best predictive accuracy obtained using one separating plane
 70 | 		in the 3-D space of Worst Area, Worst Smoothness and
 71 | 		Mean Texture.  Estimated accuracy 97.5% using repeated
 72 | 		10-fold crossvalidations.  Classifier has correctly
 73 | 		diagnosed 176 consecutive new patients as of November
 74 | 		1995. 
 75 | 
 76 | 4. Relevant information
 77 | 
 78 | 	Features are computed from a digitized image of a fine needle
 79 | 	aspirate (FNA) of a breast mass.  They describe
 80 | 	characteristics of the cell nuclei present in the image.
 81 | 	A few of the images can be found at
 82 | 	http://www.cs.wisc.edu/~street/images/
 83 | 
 84 | 	Separating plane described above was obtained using
 85 | 	Multisurface Method-Tree (MSM-T) [K. P. Bennett, "Decision Tree
 86 | 	Construction Via Linear Programming." Proceedings of the 4th
 87 | 	Midwest Artificial Intelligence and Cognitive Science Society,
 88 | 	pp. 97-101, 1992], a classification method which uses linear
 89 | 	programming to construct a decision tree.  Relevant features
 90 | 	were selected using an exhaustive search in the space of 1-4
 91 | 	features and 1-3 separating planes.
 92 | 
 93 | 	The actual linear program used to obtain the separating plane
 94 | 	in the 3-dimensional space is that described in:
 95 | 	[K. P. Bennett and O. L. Mangasarian: "Robust Linear
 96 | 	Programming Discrimination of Two Linearly Inseparable Sets",
 97 | 	Optimization Methods and Software 1, 1992, 23-34].
 98 | 
 99 | 
100 | 	This database is also available through the UW CS ftp server:
101 | 
102 | 	ftp ftp.cs.wisc.edu
103 | 	cd math-prog/cpo-dataset/machine-learn/WDBC/
104 | 
105 | 5. Number of instances: 569 
106 | 
107 | 6. Number of attributes: 32 (ID, diagnosis, 30 real-valued input features)
108 | 
109 | 7. Attribute information
110 | 
111 | 1) ID number
112 | 2) Diagnosis (M = malignant, B = benign)
113 | 3-32)
114 | 
115 | Ten real-valued features are computed for each cell nucleus:
116 | 
117 | 	a) radius (mean of distances from center to points on the perimeter)
118 | 	b) texture (standard deviation of gray-scale values)
119 | 	c) perimeter
120 | 	d) area
121 | 	e) smoothness (local variation in radius lengths)
122 | 	f) compactness (perimeter^2 / area - 1.0)
123 | 	g) concavity (severity of concave portions of the contour)
124 | 	h) concave points (number of concave portions of the contour)
125 | 	i) symmetry 
126 | 	j) fractal dimension ("coastline approximation" - 1)
127 | 
128 | Several of the papers listed above contain detailed descriptions of
129 | how these features are computed. 
130 | 
131 | The mean, standard error, and "worst" or largest (mean of the three
132 | largest values) of these features were computed for each image,
133 | resulting in 30 features.  For instance, field 3 is Mean Radius, field
134 | 13 is Radius SE, field 23 is Worst Radius.
135 | 
136 | All feature values are recoded with four significant digits.
137 | 
138 | 8. Missing attribute values: none
139 | 
140 | 9. Class distribution: 357 benign, 212 malignant


--------------------------------------------------------------------------------
/dt_example2/wdbc.test:
--------------------------------------------------------------------------------
  1 | 86517,M,18.66,17.12,121.4,1077,0.1054,0.11,0.1457,0.08665,0.1966,0.06213,0.7128,1.581,4.895,90.47,0.008102,0.02101,0.03342,0.01601,0.02045,0.00457,22.25,24.9,145.4,1549,0.1503,0.2291,0.3272,0.1674,0.2894,0.08456
  2 | 84501001,M,12.46,24.04,83.97,475.9,0.1186,0.2396,0.2273,0.08543,0.203,0.08243,0.2976,1.599,2.039,23.94,0.007149,0.07217,0.07743,0.01432,0.01789,0.01008,15.09,40.68,97.65,711.4,0.1853,1.058,1.105,0.221,0.4366,0.2075
  3 | 889403,M,15.61,19.38,100,758.6,0.0784,0.05616,0.04209,0.02847,0.1547,0.05443,0.2298,0.9988,1.534,22.18,0.002826,0.009105,0.01311,0.005174,0.01013,0.001345,17.91,31.67,115.9,988.6,0.1084,0.1807,0.226,0.08568,0.2683,0.06829
  4 | 8911163,M,17.93,24.48,115.2,998.9,0.08855,0.07027,0.05699,0.04744,0.1538,0.0551,0.4212,1.433,2.765,45.81,0.005444,0.01169,0.01622,0.008522,0.01419,0.002751,20.92,34.69,135.1,1320,0.1315,0.1806,0.208,0.1136,0.2504,0.07948
  5 | 90944601,B,13.78,15.79,88.37,585.9,0.08817,0.06718,0.01055,0.009937,0.1405,0.05848,0.3563,0.4833,2.235,29.34,0.006432,0.01156,0.007741,0.005657,0.01227,0.002564,15.27,17.5,97.9,706.6,0.1072,0.1071,0.03517,0.03312,0.1859,0.0681
  6 | 892604,B,12.46,19.89,80.43,471.3,0.08451,0.1014,0.0683,0.03099,0.1781,0.06249,0.3642,1.04,2.579,28.32,0.00653,0.03369,0.04712,0.01403,0.0274,0.004651,13.46,23.07,88.13,551.3,0.105,0.2158,0.1904,0.07625,0.2685,0.07764
  7 | 898690,B,11.47,16.03,73.02,402.7,0.09076,0.05886,0.02587,0.02322,0.1634,0.06372,0.1707,0.7615,1.09,12.25,0.009191,0.008548,0.0094,0.006315,0.01755,0.003009,12.51,20.79,79.67,475.8,0.1531,0.112,0.09823,0.06548,0.2851,0.08763
  8 | 899147,B,11.95,14.96,77.23,426.7,0.1158,0.1206,0.01171,0.01787,0.2459,0.06581,0.361,1.05,2.455,26.65,0.0058,0.02417,0.007816,0.01052,0.02734,0.003114,12.81,17.72,83.09,496.2,0.1293,0.1885,0.03122,0.04766,0.3124,0.0759
  9 | 895633,M,16.26,21.88,107.5,826.8,0.1165,0.1283,0.1799,0.07981,0.1869,0.06532,0.5706,1.457,2.961,57.72,0.01056,0.03756,0.05839,0.01186,0.04022,0.006187,17.73,25.21,113.7,975.2,0.1426,0.2116,0.3344,0.1047,0.2736,0.07953
 10 | 926954,M,16.6,28.08,108.3,858.1,0.08455,0.1023,0.09251,0.05302,0.159,0.05648,0.4564,1.075,3.425,48.55,0.005903,0.03731,0.0473,0.01557,0.01318,0.003892,18.98,34.12,126.7,1124,0.1139,0.3094,0.3403,0.1418,0.2218,0.0782
 11 | 9113239,B,13.24,20.13,86.87,542.9,0.08284,0.1223,0.101,0.02833,0.1601,0.06432,0.281,0.8135,3.369,23.81,0.004929,0.06657,0.07683,0.01368,0.01526,0.008133,15.44,25.5,115,733.5,0.1201,0.5646,0.6556,0.1357,0.2845,0.1249
 12 | 894326,M,18.22,18.87,118.7,1027,0.09746,0.1117,0.113,0.0795,0.1807,0.05664,0.4041,0.5503,2.547,48.9,0.004821,0.01659,0.02408,0.01143,0.01275,0.002451,21.84,25,140.9,1485,0.1434,0.2763,0.3853,0.1776,0.2812,0.08198
 13 | 894329,B,9.042,18.9,60.07,244.5,0.09968,0.1972,0.1975,0.04908,0.233,0.08743,0.4653,1.911,3.769,24.2,0.009845,0.0659,0.1027,0.02527,0.03491,0.007877,10.06,23.4,68.62,297.1,0.1221,0.3748,0.4609,0.1145,0.3135,0.1055
 14 | 896864,B,12.98,19.35,84.52,514,0.09579,0.1125,0.07107,0.0295,0.1761,0.0654,0.2684,0.5664,2.465,20.65,0.005727,0.03255,0.04393,0.009811,0.02751,0.004572,14.42,21.95,99.21,634.3,0.1288,0.3253,0.3439,0.09858,0.3596,0.09166
 15 | 9012000,M,22.01,21.9,147.2,1482,0.1063,0.1954,0.2448,0.1501,0.1824,0.0614,1.008,0.6999,7.561,130.2,0.003978,0.02821,0.03576,0.01471,0.01518,0.003796,27.66,25.8,195,2227,0.1294,0.3885,0.4756,0.2432,0.2741,0.08574
 16 | 884948,M,20.94,23.56,138.9,1364,0.1007,0.1606,0.2712,0.131,0.2205,0.05898,1.004,0.8208,6.372,137.9,0.005283,0.03908,0.09518,0.01864,0.02401,0.005002,25.58,27,165.3,2010,0.1211,0.3172,0.6991,0.2105,0.3126,0.07849
 17 | 91550,B,11.74,14.69,76.31,426,0.08099,0.09661,0.06726,0.02639,0.1499,0.06758,0.1924,0.6417,1.345,13.04,0.006982,0.03916,0.04017,0.01528,0.0226,0.006822,12.45,17.6,81.25,473.8,0.1073,0.2793,0.269,0.1056,0.2604,0.09879
 18 | 88466802,B,10.65,25.22,68.01,347,0.09657,0.07234,0.02379,0.01615,0.1897,0.06329,0.2497,1.493,1.497,16.64,0.007189,0.01035,0.01081,0.006245,0.02158,0.002619,12.25,35.19,77.98,455.7,0.1499,0.1398,0.1125,0.06136,0.3409,0.08147
 19 | 89346,B,9,14.4,56.36,246.3,0.07005,0.03116,0.003681,0.003472,0.1788,0.06833,0.1746,1.305,1.144,9.789,0.007389,0.004883,0.003681,0.003472,0.02701,0.002153,9.699,20.07,60.9,285.5,0.09861,0.05232,0.01472,0.01389,0.2991,0.07804
 20 | 86561,B,13.85,17.21,88.44,588.7,0.08785,0.06136,0.0142,0.01141,0.1614,0.0589,0.2185,0.8561,1.495,17.91,0.004599,0.009169,0.009127,0.004814,0.01247,0.001708,15.49,23.58,100.3,725.9,0.1157,0.135,0.08115,0.05104,0.2364,0.07182
 21 | 9111805,M,19.59,25,127.7,1191,0.1032,0.09871,0.1655,0.09063,0.1663,0.05391,0.4674,1.375,2.916,56.18,0.0119,0.01929,0.04907,0.01499,0.01641,0.001807,21.44,30.96,139.8,1421,0.1528,0.1845,0.3977,0.1466,0.2293,0.06091
 22 | 893526,B,13.5,12.71,85.69,566.2,0.07376,0.03614,0.002758,0.004419,0.1365,0.05335,0.2244,0.6864,1.509,20.39,0.003338,0.003746,0.00203,0.003242,0.0148,0.001566,14.97,16.94,95.48,698.7,0.09023,0.05836,0.01379,0.0221,0.2267,0.06192
 23 | 90602302,M,15.5,21.08,102.9,803.1,0.112,0.1571,0.1522,0.08481,0.2085,0.06864,1.37,1.213,9.424,176.5,0.008198,0.03889,0.04493,0.02139,0.02018,0.005815,23.17,27.65,157.1,1748,0.1517,0.4002,0.4211,0.2134,0.3003,0.1048
 24 | 89813,B,14.42,16.54,94.15,641.2,0.09751,0.1139,0.08007,0.04223,0.1912,0.06412,0.3491,0.7706,2.677,32.14,0.004577,0.03053,0.0384,0.01243,0.01873,0.003373,16.67,21.51,111.4,862.1,0.1294,0.3371,0.3755,0.1414,0.3053,0.08764
 25 | 917080,B,12.75,16.7,82.51,493.8,0.1125,0.1117,0.0388,0.02995,0.212,0.06623,0.3834,1.003,2.495,28.62,0.007509,0.01561,0.01977,0.009199,0.01805,0.003629,14.45,21.74,93.63,624.1,0.1475,0.1979,0.1423,0.08045,0.3071,0.08557
 26 | 902976,B,13.88,16.16,88.37,596.6,0.07026,0.04831,0.02045,0.008507,0.1607,0.05474,0.2541,0.6218,1.709,23.12,0.003728,0.01415,0.01988,0.007016,0.01647,0.00197,15.51,19.97,99.66,745.3,0.08484,0.1233,0.1091,0.04537,0.2542,0.06623
 27 | 911366,B,11.62,18.18,76.38,408.8,0.1175,0.1483,0.102,0.05564,0.1957,0.07255,0.4101,1.74,3.027,27.85,0.01459,0.03206,0.04961,0.01841,0.01807,0.005217,13.36,25.4,88.14,528.1,0.178,0.2878,0.3186,0.1416,0.266,0.0927
 28 | 8953902,M,16.27,20.71,106.9,813.7,0.1169,0.1319,0.1478,0.08488,0.1948,0.06277,0.4375,1.232,3.27,44.41,0.006697,0.02083,0.03248,0.01392,0.01536,0.002789,19.28,30.38,129.8,1121,0.159,0.2947,0.3597,0.1583,0.3103,0.082
 29 | 88199202,B,11.32,27.08,71.76,395.7,0.06883,0.03813,0.01633,0.003125,0.1869,0.05628,0.121,0.8927,1.059,8.605,0.003653,0.01647,0.01633,0.003125,0.01537,0.002052,12.08,33.75,79.82,452.3,0.09203,0.1432,0.1089,0.02083,0.2849,0.07087
 30 | 859575,M,18.94,21.31,123.6,1130,0.09009,0.1029,0.108,0.07951,0.1582,0.05461,0.7888,0.7975,5.486,96.05,0.004444,0.01652,0.02269,0.0137,0.01386,0.001698,24.86,26.58,165.9,1866,0.1193,0.2336,0.2687,0.1789,0.2551,0.06589
 31 | 898431,M,19.68,21.68,129.9,1194,0.09797,0.1339,0.1863,0.1103,0.2082,0.05715,0.6226,2.284,5.173,67.66,0.004756,0.03368,0.04345,0.01806,0.03756,0.003288,22.75,34.66,157.6,1540,0.1218,0.3458,0.4734,0.2255,0.4045,0.07918
 32 | 913102,B,14.64,16.85,94.21,666,0.08641,0.06698,0.05192,0.02791,0.1409,0.05355,0.2204,1.006,1.471,19.98,0.003535,0.01393,0.018,0.006144,0.01254,0.001219,16.46,25.44,106,831,0.1142,0.207,0.2437,0.07828,0.2455,0.06596
 33 | 853612,M,11.84,18.7,77.93,440.6,0.1109,0.1516,0.1218,0.05182,0.2301,0.07799,0.4825,1.03,3.475,41,0.005551,0.03414,0.04205,0.01044,0.02273,0.005667,16.82,28.12,119.4,888.7,0.1637,0.5775,0.6956,0.1546,0.4761,0.1402
 34 | 927241,M,20.6,29.33,140.1,1265,0.1178,0.277,0.3514,0.152,0.2397,0.07016,0.726,1.595,5.772,86.22,0.006522,0.06158,0.07117,0.01664,0.02324,0.006185,25.74,39.42,184.6,1821,0.165,0.8681,0.9387,0.265,0.4087,0.124
 35 | 9010018,M,15.08,25.74,98,716.6,0.1024,0.09769,0.1235,0.06553,0.1647,0.06464,0.6534,1.506,4.174,63.37,0.01052,0.02431,0.04912,0.01746,0.0212,0.004867,18.51,33.22,121.2,1050,0.166,0.2356,0.4029,0.1526,0.2654,0.09438
 36 | 913535,M,16.69,20.2,107.1,857.6,0.07497,0.07112,0.03649,0.02307,0.1846,0.05325,0.2473,0.5679,1.775,22.95,0.002667,0.01446,0.01423,0.005297,0.01961,0.0017,19.18,26.56,127.3,1084,0.1009,0.292,0.2477,0.08737,0.4677,0.07623
 37 | 925292,B,14.05,27.15,91.38,600.4,0.09929,0.1126,0.04462,0.04304,0.1537,0.06171,0.3645,1.492,2.888,29.84,0.007256,0.02678,0.02071,0.01626,0.0208,0.005304,15.3,33.17,100.2,706.7,0.1241,0.2264,0.1326,0.1048,0.225,0.08321
 38 | 87164,M,15.46,11.89,102.5,736.9,0.1257,0.1555,0.2032,0.1097,0.1966,0.07069,0.4209,0.6583,2.805,44.64,0.005393,0.02321,0.04303,0.0132,0.01792,0.004168,18.79,17.04,125,1102,0.1531,0.3583,0.583,0.1827,0.3216,0.101
 39 | 9111843,B,12,28.23,76.77,442.5,0.08437,0.0645,0.04055,0.01945,0.1615,0.06104,0.1912,1.705,1.516,13.86,0.007334,0.02589,0.02941,0.009166,0.01745,0.004302,13.09,37.88,85.07,523.7,0.1208,0.1856,0.1811,0.07116,0.2447,0.08194
 40 | 8911800,B,13.59,17.84,86.24,572.3,0.07948,0.04052,0.01997,0.01238,0.1573,0.0552,0.258,1.166,1.683,22.22,0.003741,0.005274,0.01065,0.005044,0.01344,0.001126,15.5,26.1,98.91,739.1,0.105,0.07622,0.106,0.05185,0.2335,0.06263
 41 | 9012315,M,16.35,23.29,109,840.4,0.09742,0.1497,0.1811,0.08773,0.2175,0.06218,0.4312,1.022,2.972,45.5,0.005635,0.03917,0.06072,0.01656,0.03197,0.004085,19.38,31.03,129.3,1165,0.1415,0.4665,0.7087,0.2248,0.4824,0.09614
 42 | 90769601,B,11.13,16.62,70.47,381.1,0.08151,0.03834,0.01369,0.0137,0.1511,0.06148,0.1415,0.9671,0.968,9.704,0.005883,0.006263,0.009398,0.006189,0.02009,0.002377,11.68,20.29,74.35,421.1,0.103,0.06219,0.0458,0.04044,0.2383,0.07083
 43 | 903483,B,8.734,16.84,55.27,234.3,0.1039,0.07428,0,0,0.1985,0.07098,0.5169,2.079,3.167,28.85,0.01582,0.01966,0,0,0.01865,0.006736,10.17,22.8,64.01,317,0.146,0.131,0,0,0.2445,0.08865
 44 | 91903902,B,13.68,16.33,87.76,575.5,0.09277,0.07255,0.01752,0.0188,0.1631,0.06155,0.2047,0.4801,1.373,17.25,0.003828,0.007228,0.007078,0.005077,0.01054,0.001697,15.85,20.2,101.6,773.4,0.1264,0.1564,0.1206,0.08704,0.2806,0.07782
 45 | 909220,B,14.04,15.98,89.78,611.2,0.08458,0.05895,0.03534,0.02944,0.1714,0.05898,0.3892,1.046,2.644,32.74,0.007976,0.01295,0.01608,0.009046,0.02005,0.00283,15.66,21.58,101.2,750,0.1195,0.1252,0.1117,0.07453,0.2725,0.07234
 46 | 922296,B,13.21,28.06,84.88,538.4,0.08671,0.06877,0.02987,0.03275,0.1628,0.05781,0.2351,1.597,1.539,17.85,0.004973,0.01372,0.01498,0.009117,0.01724,0.001343,14.37,37.17,92.48,629.6,0.1072,0.1381,0.1062,0.07958,0.2473,0.06443
 47 | 8511133,M,15.34,14.26,102.5,704.4,0.1073,0.2135,0.2077,0.09756,0.2521,0.07032,0.4388,0.7096,3.384,44.91,0.006789,0.05328,0.06446,0.02252,0.03672,0.004394,18.07,19.08,125.1,980.9,0.139,0.5954,0.6305,0.2393,0.4667,0.09946
 48 | 864018,B,11.34,21.26,72.48,396.5,0.08759,0.06575,0.05133,0.01899,0.1487,0.06529,0.2344,0.9861,1.597,16.41,0.009113,0.01557,0.02443,0.006435,0.01568,0.002477,13.01,29.15,83.99,518.1,0.1699,0.2196,0.312,0.08278,0.2829,0.08832
 49 | 885429,M,19.73,19.82,130.7,1206,0.1062,0.1849,0.2417,0.0974,0.1733,0.06697,0.7661,0.78,4.115,92.81,0.008482,0.05057,0.068,0.01971,0.01467,0.007259,25.28,25.59,159.8,1933,0.171,0.5955,0.8489,0.2507,0.2749,0.1297
 50 | 8911230,B,11.33,14.16,71.79,396.6,0.09379,0.03872,0.001487,0.003333,0.1954,0.05821,0.2375,1.28,1.565,17.09,0.008426,0.008998,0.001487,0.003333,0.02358,0.001627,12.2,18.99,77.37,458,0.1259,0.07348,0.004955,0.01111,0.2758,0.06386
 51 | 8913049,B,11.26,19.96,73.72,394.1,0.0802,0.1181,0.09274,0.05588,0.2595,0.06233,0.4866,1.905,2.877,34.68,0.01574,0.08262,0.08099,0.03487,0.03418,0.006517,11.86,22.33,78.27,437.6,0.1028,0.1843,0.1546,0.09314,0.2955,0.07009
 52 | 872608,B,9.904,18.06,64.6,302.4,0.09699,0.1294,0.1307,0.03716,0.1669,0.08116,0.4311,2.261,3.132,27.48,0.01286,0.08808,0.1197,0.0246,0.0388,0.01792,11.26,24.39,73.07,390.2,0.1301,0.295,0.3486,0.0991,0.2614,0.1162
 53 | 869224,B,12.9,15.92,83.74,512.2,0.08677,0.09509,0.04894,0.03088,0.1778,0.06235,0.2143,0.7712,1.689,16.64,0.005324,0.01563,0.0151,0.007584,0.02104,0.001887,14.48,21.82,97.17,643.8,0.1312,0.2548,0.209,0.1012,0.3549,0.08118
 54 | 88725602,M,15.53,33.56,103.7,744.9,0.1063,0.1639,0.1751,0.08399,0.2091,0.0665,0.2419,1.278,1.903,23.02,0.005345,0.02556,0.02889,0.01022,0.009947,0.003359,18.49,49.54,126.3,1035,0.1883,0.5564,0.5703,0.2014,0.3512,0.1204
 55 | 9010258,B,12.56,19.07,81.92,485.8,0.0876,0.1038,0.103,0.04391,0.1533,0.06184,0.3602,1.478,3.212,27.49,0.009853,0.04235,0.06271,0.01966,0.02639,0.004205,13.37,22.43,89.02,547.4,0.1096,0.2002,0.2388,0.09265,0.2121,0.07188
 56 | 864685,B,11.93,21.53,76.53,438.6,0.09768,0.07849,0.03328,0.02008,0.1688,0.06194,0.3118,0.9227,2,24.79,0.007803,0.02507,0.01835,0.007711,0.01278,0.003856,13.67,26.15,87.54,583,0.15,0.2399,0.1503,0.07247,0.2438,0.08541
 57 | 884689,B,11.52,14.93,73.87,406.3,0.1013,0.07808,0.04328,0.02929,0.1883,0.06168,0.2562,1.038,1.686,18.62,0.006662,0.01228,0.02105,0.01006,0.01677,0.002784,12.65,21.19,80.88,491.8,0.1389,0.1582,0.1804,0.09608,0.2664,0.07809
 58 | 914333,B,14.87,20.21,96.12,680.9,0.09587,0.08345,0.06824,0.04951,0.1487,0.05748,0.2323,1.636,1.596,21.84,0.005415,0.01371,0.02153,0.01183,0.01959,0.001812,16.01,28.48,103.9,783.6,0.1216,0.1388,0.17,0.1017,0.2369,0.06599
 59 | 893988,B,11.54,10.72,73.73,409.1,0.08597,0.05969,0.01367,0.008907,0.1833,0.061,0.1312,0.3602,1.107,9.438,0.004124,0.0134,0.01003,0.004667,0.02032,0.001952,12.34,12.87,81.23,467.8,0.1092,0.1626,0.08324,0.04715,0.339,0.07434
 60 | 88299702,M,23.21,26.97,153.5,1670,0.09509,0.1682,0.195,0.1237,0.1909,0.06309,1.058,0.9635,7.247,155.8,0.006428,0.02863,0.04497,0.01716,0.0159,0.003053,31.01,34.51,206,2944,0.1481,0.4126,0.582,0.2593,0.3103,0.08677
 61 | 911408,B,12.83,15.73,82.89,506.9,0.0904,0.08269,0.05835,0.03078,0.1705,0.05913,0.1499,0.4875,1.195,11.64,0.004873,0.01796,0.03318,0.00836,0.01601,0.002289,14.09,19.35,93.22,605.8,0.1326,0.261,0.3476,0.09783,0.3006,0.07802
 62 | 90312,M,19.55,23.21,128.9,1174,0.101,0.1318,0.1856,0.1021,0.1989,0.05884,0.6107,2.836,5.383,70.1,0.01124,0.04097,0.07469,0.03441,0.02768,0.00624,20.82,30.44,142,1313,0.1251,0.2414,0.3829,0.1825,0.2576,0.07602
 63 | 857374,B,11.94,18.24,75.71,437.6,0.08261,0.04751,0.01972,0.01349,0.1868,0.0611,0.2273,0.6329,1.52,17.47,0.00721,0.00838,0.01311,0.008,0.01996,0.002635,13.1,21.33,83.67,527.2,0.1144,0.08906,0.09203,0.06296,0.2785,0.07408
 64 | 89742801,M,17.06,21,111.8,918.6,0.1119,0.1056,0.1508,0.09934,0.1727,0.06071,0.8161,2.129,6.076,87.17,0.006455,0.01797,0.04502,0.01744,0.01829,0.003733,20.99,33.15,143.2,1362,0.1449,0.2053,0.392,0.1827,0.2623,0.07599
 65 | 905557,B,14.99,22.11,97.53,693.7,0.08515,0.1025,0.06859,0.03876,0.1944,0.05913,0.3186,1.336,2.31,28.51,0.004449,0.02808,0.03312,0.01196,0.01906,0.004015,16.76,31.55,110.2,867.1,0.1077,0.3345,0.3114,0.1308,0.3163,0.09251
 66 | 901028,B,13.87,16.21,88.52,593.7,0.08743,0.05492,0.01502,0.02088,0.1424,0.05883,0.2543,1.363,1.737,20.74,0.005638,0.007939,0.005254,0.006042,0.01544,0.002087,15.11,25.58,96.74,694.4,0.1153,0.1008,0.05285,0.05556,0.2362,0.07113
 67 | 89864002,B,11.71,15.45,75.03,420.3,0.115,0.07281,0.04006,0.0325,0.2009,0.06506,0.3446,0.7395,2.355,24.53,0.009536,0.01097,0.01651,0.01121,0.01953,0.0031,13.06,18.16,84.16,516.4,0.146,0.1115,0.1087,0.07864,0.2765,0.07806
 68 | 87127,B,10.8,9.71,68.77,357.6,0.09594,0.05736,0.02531,0.01698,0.1381,0.064,0.1728,0.4064,1.126,11.48,0.007809,0.009816,0.01099,0.005344,0.01254,0.00212,11.6,12.02,73.66,414,0.1436,0.1257,0.1047,0.04603,0.209,0.07699
 69 | 844981,M,13,21.82,87.5,519.8,0.1273,0.1932,0.1859,0.09353,0.235,0.07389,0.3063,1.002,2.406,24.32,0.005731,0.03502,0.03553,0.01226,0.02143,0.003749,15.49,30.73,106.2,739.3,0.1703,0.5401,0.539,0.206,0.4378,0.1072
 70 | 907145,B,9.742,19.12,61.93,289.7,0.1075,0.08333,0.008934,0.01967,0.2538,0.07029,0.6965,1.747,4.607,43.52,0.01307,0.01885,0.006021,0.01052,0.031,0.004225,11.21,23.17,71.79,380.9,0.1398,0.1352,0.02085,0.04589,0.3196,0.08009
 71 | 854268,M,14.25,21.72,93.63,633,0.09823,0.1098,0.1319,0.05598,0.1885,0.06125,0.286,1.019,2.657,24.91,0.005878,0.02995,0.04815,0.01161,0.02028,0.004022,15.89,30.36,116.2,799.6,0.1446,0.4238,0.5186,0.1447,0.3591,0.1014
 72 | 904647,B,11.93,10.91,76.14,442.7,0.08872,0.05242,0.02606,0.01796,0.1601,0.05541,0.2522,1.045,1.649,18.95,0.006175,0.01204,0.01376,0.005832,0.01096,0.001857,13.8,20.14,87.64,589.5,0.1374,0.1575,0.1514,0.06876,0.246,0.07262
 73 | 921644,B,14.74,25.42,94.7,668.6,0.08275,0.07214,0.04105,0.03027,0.184,0.0568,0.3031,1.385,2.177,27.41,0.004775,0.01172,0.01947,0.01269,0.0187,0.002626,16.51,32.29,107.4,826.4,0.106,0.1376,0.1611,0.1095,0.2722,0.06956
 74 | 896839,M,16.03,15.51,105.8,793.2,0.09491,0.1371,0.1204,0.07041,0.1782,0.05976,0.3371,0.7476,2.629,33.27,0.005839,0.03245,0.03715,0.01459,0.01467,0.003121,18.76,21.98,124.3,1070,0.1435,0.4478,0.4956,0.1981,0.3019,0.09124
 75 | 9113846,B,12.27,29.97,77.42,465.4,0.07699,0.03398,0,0,0.1701,0.0596,0.4455,3.647,2.884,35.13,0.007339,0.008243,0,0,0.03141,0.003136,13.45,38.05,85.08,558.9,0.09422,0.05213,0,0,0.2409,0.06743
 76 | 8910720,B,10.71,20.39,69.5,344.9,0.1082,0.1289,0.08448,0.02867,0.1668,0.06862,0.3198,1.489,2.23,20.74,0.008902,0.04785,0.07339,0.01745,0.02728,0.00761,11.69,25.21,76.51,410.4,0.1335,0.255,0.2534,0.086,0.2605,0.08701
 77 | 853401,M,18.63,25.11,124.8,1088,0.1064,0.1887,0.2319,0.1244,0.2183,0.06197,0.8307,1.466,5.574,105,0.006248,0.03374,0.05196,0.01158,0.02007,0.00456,23.15,34.01,160.5,1670,0.1491,0.4257,0.6133,0.1848,0.3444,0.09782
 78 | 871001501,B,13,20.78,83.51,519.4,0.1135,0.07589,0.03136,0.02645,0.254,0.06087,0.4202,1.322,2.873,34.78,0.007017,0.01142,0.01949,0.01153,0.02951,0.001533,14.16,24.11,90.82,616.7,0.1297,0.1105,0.08112,0.06296,0.3196,0.06435
 79 | 904689,B,12.96,18.29,84.18,525.2,0.07351,0.07899,0.04057,0.01883,0.1874,0.05899,0.2357,1.299,2.397,20.21,0.003629,0.03713,0.03452,0.01065,0.02632,0.003705,14.13,24.61,96.31,621.9,0.09329,0.2318,0.1604,0.06608,0.3207,0.07247
 80 | 9112085,B,13.38,30.72,86.34,557.2,0.09245,0.07426,0.02819,0.03264,0.1375,0.06016,0.3408,1.924,2.287,28.93,0.005841,0.01246,0.007936,0.009128,0.01564,0.002985,15.05,41.61,96.69,705.6,0.1172,0.1421,0.07003,0.07763,0.2196,0.07675
 81 | 8812816,B,13.65,13.16,87.88,568.9,0.09646,0.08711,0.03888,0.02563,0.136,0.06344,0.2102,0.4336,1.391,17.4,0.004133,0.01695,0.01652,0.006659,0.01371,0.002735,15.34,16.35,99.71,706.2,0.1311,0.2474,0.1759,0.08056,0.238,0.08718
 82 | 875099,B,9.72,18.22,60.73,288.1,0.0695,0.02344,0,0,0.1653,0.06447,0.3539,4.885,2.23,21.69,0.001713,0.006736,0,0,0.03799,0.001688,9.968,20.83,62.25,303.8,0.07117,0.02729,0,0,0.1909,0.06559
 83 | 861598,B,14.64,15.24,95.77,651.9,0.1132,0.1339,0.09966,0.07064,0.2116,0.06346,0.5115,0.7372,3.814,42.76,0.005508,0.04412,0.04436,0.01623,0.02427,0.004841,16.34,18.24,109.4,803.6,0.1277,0.3089,0.2604,0.1397,0.3151,0.08473
 84 | 868682,B,11.43,15.39,73.06,399.8,0.09639,0.06889,0.03503,0.02875,0.1734,0.05865,0.1759,0.9938,1.143,12.67,0.005133,0.01521,0.01434,0.008602,0.01501,0.001588,12.32,22.02,79.93,462,0.119,0.1648,0.1399,0.08476,0.2676,0.06765
 85 | 897132,B,11.22,19.86,71.94,387.3,0.1054,0.06779,0.005006,0.007583,0.194,0.06028,0.2976,1.966,1.959,19.62,0.01289,0.01104,0.003297,0.004967,0.04243,0.001963,11.98,25.78,76.91,436.1,0.1424,0.09669,0.01335,0.02022,0.3292,0.06522
 86 | 91805,B,8.571,13.1,54.53,221.3,0.1036,0.07632,0.02565,0.0151,0.1678,0.07126,0.1267,0.6793,1.069,7.254,0.007897,0.01762,0.01801,0.00732,0.01592,0.003925,9.473,18.45,63.3,275.6,0.1641,0.2235,0.1754,0.08512,0.2983,0.1049
 87 | 867387,B,15.71,13.93,102,761.7,0.09462,0.09462,0.07135,0.05933,0.1816,0.05723,0.3117,0.8155,1.972,27.94,0.005217,0.01515,0.01678,0.01268,0.01669,0.00233,17.5,19.25,114.3,922.8,0.1223,0.1949,0.1709,0.1374,0.2723,0.07071
 88 | 884448,B,13.2,17.43,84.13,541.6,0.07215,0.04524,0.04336,0.01105,0.1487,0.05635,0.163,1.601,0.873,13.56,0.006261,0.01569,0.03079,0.005383,0.01962,0.00225,13.94,27.82,88.28,602,0.1101,0.1508,0.2298,0.0497,0.2767,0.07198
 89 | 91594602,M,15.05,19.07,97.26,701.9,0.09215,0.08597,0.07486,0.04335,0.1561,0.05915,0.386,1.198,2.63,38.49,0.004952,0.0163,0.02967,0.009423,0.01152,0.001718,17.58,28.06,113.8,967,0.1246,0.2101,0.2866,0.112,0.2282,0.06954
 90 | 881046502,M,20.58,22.14,134.7,1290,0.0909,0.1348,0.164,0.09561,0.1765,0.05024,0.8601,1.48,7.029,111.7,0.008124,0.03611,0.05489,0.02765,0.03176,0.002365,23.24,27.84,158.3,1656,0.1178,0.292,0.3861,0.192,0.2909,0.05865
 91 | 921092,B,7.729,25.49,47.98,178.8,0.08098,0.04878,0,0,0.187,0.07285,0.3777,1.462,2.492,19.14,0.01266,0.009692,0,0,0.02882,0.006872,9.077,30.92,57.17,248,0.1256,0.0834,0,0,0.3058,0.09938
 92 | 86730502,M,16.16,21.54,106.2,809.8,0.1008,0.1284,0.1043,0.05613,0.216,0.05891,0.4332,1.265,2.844,43.68,0.004877,0.01952,0.02219,0.009231,0.01535,0.002373,19.47,31.68,129.7,1175,0.1395,0.3055,0.2992,0.1312,0.348,0.07619
 93 | 877500,M,14.45,20.22,94.49,642.7,0.09872,0.1206,0.118,0.0598,0.195,0.06466,0.2092,0.6509,1.446,19.42,0.004044,0.01597,0.02,0.007303,0.01522,0.001976,18.33,30.12,117.9,1044,0.1552,0.4056,0.4967,0.1838,0.4753,0.1013
 94 | 873701,M,15.7,20.31,101.2,766.6,0.09597,0.08799,0.06593,0.05189,0.1618,0.05549,0.3699,1.15,2.406,40.98,0.004626,0.02263,0.01954,0.009767,0.01547,0.00243,20.11,32.82,129.3,1269,0.1414,0.3547,0.2902,0.1541,0.3437,0.08631
 95 | 908489,M,13.98,19.62,91.12,599.5,0.106,0.1133,0.1126,0.06463,0.1669,0.06544,0.2208,0.9533,1.602,18.85,0.005314,0.01791,0.02185,0.009567,0.01223,0.002846,17.04,30.8,113.9,869.3,0.1613,0.3568,0.4069,0.1827,0.3179,0.1055
 96 | 914769,M,18.49,17.52,121.3,1068,0.1012,0.1317,0.1491,0.09183,0.1832,0.06697,0.7923,1.045,4.851,95.77,0.007974,0.03214,0.04435,0.01573,0.01617,0.005255,22.75,22.88,146.4,1600,0.1412,0.3089,0.3533,0.1663,0.251,0.09445
 97 | 9113455,B,13.14,20.74,85.98,536.9,0.08675,0.1089,0.1085,0.0351,0.1562,0.0602,0.3152,0.7884,2.312,27.4,0.007295,0.03179,0.04615,0.01254,0.01561,0.00323,14.8,25.46,100.9,689.1,0.1351,0.3549,0.4504,0.1181,0.2563,0.08174
 98 | 887181,M,15.66,23.2,110.2,773.5,0.1109,0.3114,0.3176,0.1377,0.2495,0.08104,1.292,2.454,10.12,138.5,0.01236,0.05995,0.08232,0.03024,0.02337,0.006042,19.85,31.64,143.7,1226,0.1504,0.5172,0.6181,0.2462,0.3277,0.1019
 99 | 9011971,M,21.71,17.25,140.9,1546,0.09384,0.08562,0.1168,0.08465,0.1717,0.05054,1.207,1.051,7.733,224.1,0.005568,0.01112,0.02096,0.01197,0.01263,0.001803,30.75,26.44,199.5,3143,0.1363,0.1628,0.2861,0.182,0.251,0.06494
100 | 91813702,B,12.34,12.27,78.94,468.5,0.09003,0.06307,0.02958,0.02647,0.1689,0.05808,0.1166,0.4957,0.7714,8.955,0.003681,0.009169,0.008732,0.00574,0.01129,0.001366,13.61,19.27,87.22,564.9,0.1292,0.2074,0.1791,0.107,0.311,0.07592
101 | 914101,B,12.46,12.83,78.83,477.3,0.07372,0.04043,0.007173,0.01149,0.1613,0.06013,0.3276,1.486,2.108,24.6,0.01039,0.01003,0.006416,0.007895,0.02869,0.004821,13.19,16.36,83.24,534,0.09439,0.06477,0.01674,0.0268,0.228,0.07028
102 | 87106,B,11.15,13.08,70.87,381.9,0.09754,0.05113,0.01982,0.01786,0.183,0.06105,0.2251,0.7815,1.429,15.48,0.009019,0.008985,0.01196,0.008232,0.02388,0.001619,11.99,16.3,76.25,440.8,0.1341,0.08971,0.07116,0.05506,0.2859,0.06772
103 | 916799,M,18.31,20.58,120.8,1052,0.1068,0.1248,0.1569,0.09451,0.186,0.05941,0.5449,0.9225,3.218,67.36,0.006176,0.01877,0.02913,0.01046,0.01559,0.002725,21.86,26.2,142.2,1493,0.1492,0.2536,0.3759,0.151,0.3074,0.07863
104 | 8711002,B,13.15,15.34,85.31,538.9,0.09384,0.08498,0.09293,0.03483,0.1822,0.06207,0.271,0.7927,1.819,22.79,0.008584,0.02017,0.03047,0.009536,0.02769,0.003479,14.77,20.5,97.67,677.3,0.1478,0.2256,0.3009,0.09722,0.3849,0.08633
105 | 851509,M,21.16,23.04,137.2,1404,0.09428,0.1022,0.1097,0.08632,0.1769,0.05278,0.6917,1.127,4.303,93.99,0.004728,0.01259,0.01715,0.01038,0.01083,0.001987,29.17,35.59,188,2615,0.1401,0.26,0.3155,0.2009,0.2822,0.07526
106 | 86408,B,12.63,20.76,82.15,480.4,0.09933,0.1209,0.1065,0.06021,0.1735,0.0707,0.3424,1.803,2.711,20.48,0.01291,0.04042,0.05101,0.02295,0.02144,0.005891,13.33,25.47,89,527.4,0.1287,0.225,0.2216,0.1105,0.2226,0.08486
107 | 881094802,M,17.42,25.56,114.5,948,0.1006,0.1146,0.1682,0.06597,0.1308,0.05866,0.5296,1.667,3.767,58.53,0.03113,0.08555,0.1438,0.03927,0.02175,0.01256,18.07,28.07,120.4,1021,0.1243,0.1793,0.2803,0.1099,0.1603,0.06818
108 | 869254,B,10.75,14.97,68.26,355.3,0.07793,0.05139,0.02251,0.007875,0.1399,0.05688,0.2525,1.239,1.806,17.74,0.006547,0.01781,0.02018,0.005612,0.01671,0.00236,11.95,20.72,77.79,441.2,0.1076,0.1223,0.09755,0.03413,0.23,0.06769
109 | 866714,B,12.19,13.29,79.08,455.8,0.1066,0.09509,0.02855,0.02882,0.188,0.06471,0.2005,0.8163,1.973,15.24,0.006773,0.02456,0.01018,0.008094,0.02662,0.004143,13.34,17.81,91.38,545.2,0.1427,0.2585,0.09915,0.08187,0.3469,0.09241
110 | 871641,B,11.08,14.71,70.21,372.7,0.1006,0.05743,0.02363,0.02583,0.1566,0.06669,0.2073,1.805,1.377,19.08,0.01496,0.02121,0.01453,0.01583,0.03082,0.004785,11.35,16.82,72.01,396.5,0.1216,0.0824,0.03938,0.04306,0.1902,0.07313
111 | 863031,B,11.64,18.33,75.17,412.5,0.1142,0.1017,0.0707,0.03485,0.1801,0.0652,0.306,1.657,2.155,20.62,0.00854,0.0231,0.02945,0.01398,0.01565,0.00384,13.14,29.26,85.51,521.7,0.1688,0.266,0.2873,0.1218,0.2806,0.09097
112 | 864877,M,15.78,22.91,105.7,782.6,0.1155,0.1752,0.2133,0.09479,0.2096,0.07331,0.552,1.072,3.598,58.63,0.008699,0.03976,0.0595,0.0139,0.01495,0.005984,20.19,30.5,130.3,1272,0.1855,0.4925,0.7356,0.2034,0.3274,0.1252
113 | 873593,M,21.09,26.57,142.7,1311,0.1141,0.2832,0.2487,0.1496,0.2395,0.07398,0.6298,0.7629,4.414,81.46,0.004253,0.04759,0.03872,0.01567,0.01798,0.005295,26.68,33.48,176.5,2089,0.1491,0.7584,0.678,0.2903,0.4098,0.1284
114 | 862485,B,11.6,12.84,74.34,412.6,0.08983,0.07525,0.04196,0.0335,0.162,0.06582,0.2315,0.5391,1.475,15.75,0.006153,0.0133,0.01693,0.006884,0.01651,0.002551,13.06,17.16,82.96,512.5,0.1431,0.1851,0.1922,0.08449,0.2772,0.08756
115 | 8913,B,12.89,13.12,81.89,515.9,0.06955,0.03729,0.0226,0.01171,0.1337,0.05581,0.1532,0.469,1.115,12.68,0.004731,0.01345,0.01652,0.005905,0.01619,0.002081,13.62,15.54,87.4,577,0.09616,0.1147,0.1186,0.05366,0.2309,0.06915
116 | 881861,M,12.83,22.33,85.26,503.2,0.1088,0.1799,0.1695,0.06861,0.2123,0.07254,0.3061,1.069,2.257,25.13,0.006983,0.03858,0.04683,0.01499,0.0168,0.005617,15.2,30.15,105.3,706,0.1777,0.5343,0.6282,0.1977,0.3407,0.1243
117 | 88411702,B,13.75,23.77,88.54,590,0.08043,0.06807,0.04697,0.02344,0.1773,0.05429,0.4347,1.057,2.829,39.93,0.004351,0.02667,0.03371,0.01007,0.02598,0.003087,15.01,26.34,98,706,0.09368,0.1442,0.1359,0.06106,0.2663,0.06321
118 | 874158,B,10.08,15.11,63.76,317.5,0.09267,0.04695,0.001597,0.002404,0.1703,0.06048,0.4245,1.268,2.68,26.43,0.01439,0.012,0.001597,0.002404,0.02538,0.00347,11.87,21.18,75.39,437,0.1521,0.1019,0.00692,0.01042,0.2933,0.07697
119 | 92751,B,7.76,24.54,47.92,181,0.05263,0.04362,0,0,0.1587,0.05884,0.3857,1.428,2.548,19.15,0.007189,0.00466,0,0,0.02676,0.002783,9.456,30.37,59.16,268.6,0.08996,0.06444,0,0,0.2871,0.07039
120 | 859471,B,9.029,17.33,58.79,250.5,0.1066,0.1413,0.313,0.04375,0.2111,0.08046,0.3274,1.194,1.885,17.67,0.009549,0.08606,0.3038,0.03322,0.04197,0.009559,10.31,22.65,65.5,324.7,0.1482,0.4365,1.252,0.175,0.4228,0.1175
121 | 


--------------------------------------------------------------------------------
/handwritten_ex/decisiontree.cpp:
--------------------------------------------------------------------------------
  1 | // Example : decision tree learning
  2 | // usage: prog training_data_file testing_data_file
  3 | 
  4 | // For use with test / training datasets : handwritten_ex
  5 | 
  6 | // Author : Toby Breckon, toby.breckon@cranfield.ac.uk
  7 | 
  8 | // Copyright (c) 2010 School of Engineering, Cranfield University
  9 | // License : LGPL - http://www.gnu.org/licenses/lgpl.html
 10 | 
 11 | #include <cv.h>       // opencv general include file
 12 | #include <ml.h>		  // opencv machine learning include file
 13 | 
 14 | using namespace cv; // OpenCV API is in the C++ "cv" namespace
 15 | 
 16 | #include <stdio.h>
 17 | 
 18 | /******************************************************************************/
 19 | 
 20 | #define NUMBER_OF_TRAINING_SAMPLES 797
 21 | #define ATTRIBUTES_PER_SAMPLE 256
 22 | #define NUMBER_OF_TESTING_SAMPLES 796
 23 | 
 24 | #define NUMBER_OF_CLASSES 10
 25 | 
 26 | // N.B. classes are integer handwritten digits in range 0-9
 27 | 
 28 | /******************************************************************************/
 29 | 
 30 | // loads the sample database from file (which is a CSV text file)
 31 | 
 32 | int read_data_from_csv(const char* filename, Mat data, Mat classes,
 33 |                        int n_samples )
 34 | {
 35 |     float tmpf;
 36 | 
 37 |     // if we can't read the input file then return 0
 38 |     FILE* f = fopen( filename, "r" );
 39 |     if( !f )
 40 |     {
 41 |         printf("ERROR: cannot read file %s\n",  filename);
 42 |         return 0; // all not OK
 43 |     }
 44 | 
 45 |     // for each sample in the file
 46 | 
 47 |     for(int line = 0; line < n_samples; line++)
 48 |     {
 49 | 
 50 |         // for each attribute on the line in the file
 51 | 
 52 |         for(int attribute = 0; attribute < (ATTRIBUTES_PER_SAMPLE + 1); attribute++)
 53 |         {
 54 |             if (attribute < ATTRIBUTES_PER_SAMPLE)
 55 |             {
 56 | 
 57 |                 // first 256 elements (0-255) in each line are the attributes
 58 | 
 59 |                 fscanf(f, "%f,", &tmpf);
 60 |                 data.at<float>(line, attribute) = tmpf;
 61 | 
 62 |             }
 63 |             else if (attribute == ATTRIBUTES_PER_SAMPLE)
 64 |             {
 65 | 
 66 |                 // attribute 256 is the class label {0 ... 9}
 67 | 
 68 |                 fscanf(f, "%f,", &tmpf);
 69 |                 classes.at<float>(line, 0) = tmpf;
 70 |             }
 71 |         }
 72 |     }
 73 | 
 74 |     fclose(f);
 75 | 
 76 |     return 1; // all OK
 77 | }
 78 | 
 79 | /******************************************************************************/
 80 | 
 81 | int main( int argc, char** argv )
 82 | {
 83 |     // lets just check the version first
 84 | 
 85 |     printf ("OpenCV version %s (%d.%d.%d)\n",
 86 |             CV_VERSION,
 87 |             CV_MAJOR_VERSION, CV_MINOR_VERSION, CV_SUBMINOR_VERSION);
 88 | 
 89 |     // define training data storage matrices (one for attribute examples, one
 90 |     // for classifications)
 91 | 
 92 |     Mat training_data =
 93 |         Mat(NUMBER_OF_TRAINING_SAMPLES, ATTRIBUTES_PER_SAMPLE, CV_32FC1);
 94 |     Mat training_classifications = Mat(NUMBER_OF_TRAINING_SAMPLES, 1, CV_32FC1);
 95 | 
 96 |     //define testing data storage matrices
 97 | 
 98 |     Mat testing_data =
 99 |         Mat(NUMBER_OF_TESTING_SAMPLES, ATTRIBUTES_PER_SAMPLE, CV_32FC1);
100 |     Mat testing_classifications =
101 |         Mat(NUMBER_OF_TESTING_SAMPLES, 1, CV_32FC1);
102 | 
103 |     // define all the attributes as numerical
104 |     // alternatives are CV_VAR_CATEGORICAL or CV_VAR_ORDERED(=CV_VAR_NUMERICAL)
105 |     // that can be assigned on a per attribute basis
106 | 
107 |     Mat var_type = Mat(ATTRIBUTES_PER_SAMPLE + 1, 1, CV_8U );
108 |     var_type = Scalar(CV_VAR_NUMERICAL); // all inputs are numerical
109 | 
110 |     // this is a classification problem (i.e. predict a discrete number of class
111 |     // outputs) so reset the last (+1) output var_type element to CV_VAR_CATEGORICAL
112 | 
113 |     var_type.at<uchar>(ATTRIBUTES_PER_SAMPLE, 0) = CV_VAR_CATEGORICAL;
114 | 
115 |     CvDTreeNode* resultNode; // node returned from a prediction
116 | 
117 |     // load training and testing data sets
118 | 
119 |     if (read_data_from_csv(argv[1], training_data, training_classifications, NUMBER_OF_TRAINING_SAMPLES) &&
120 |             read_data_from_csv(argv[2], testing_data, testing_classifications, NUMBER_OF_TESTING_SAMPLES))
121 |     {
122 |         // define the parameters for training the decision tree
123 | 
124 |         float priors[] = {1,1,1,1,1,1,1,1,1,1};  // weights of each classification for classes
125 |         // (all equal as equal samples of each digit)
126 | 
127 |         CvDTreeParams params = CvDTreeParams(25, // max depth
128 |                                              5, // min sample count
129 |                                              0, // regression accuracy: N/A here
130 |                                              false, // compute surrogate split, no missing data
131 |                                              15, // max number of categories (use sub-optimal algorithm for larger numbers)
132 |                                              15, // the number of cross-validation folds
133 |                                              false, // use 1SE rule => smaller tree
134 |                                              false, // throw away the pruned tree branches
135 |                                              priors // the array of priors
136 |                                             );
137 | 
138 | 
139 |         // train decision tree classifier (using training data)
140 | 
141 |         printf( "\nUsing training database: %s\n\n", argv[1]);
142 |         CvDTree* dtree = new CvDTree;
143 | 
144 |         dtree->train(training_data, CV_ROW_SAMPLE,
145 |                      training_classifications,
146 |                      Mat(), Mat(), var_type, Mat(), params);
147 | 
148 |         // perform classifier testing and report results
149 | 
150 |         Mat test_sample;
151 |         int correct_class = 0;
152 |         int wrong_class = 0;
153 |         int false_positives [NUMBER_OF_CLASSES] = {0,0,0,0,0,0,0,0,0,0};
154 | 
155 |         printf( "\nUsing testing database: %s\n\n", argv[2]);
156 | 
157 |         for (int tsample = 0; tsample < NUMBER_OF_TESTING_SAMPLES; tsample++)
158 |         {
159 | 
160 |             // extract a row from the testing matrix
161 | 
162 |             test_sample = testing_data.row(tsample);
163 | 
164 |             // run decision tree prediction
165 | 
166 |             resultNode = dtree->predict(test_sample, Mat(), false);
167 | 
168 |             printf("Testing Sample %i -> class result (digit %d)\n", tsample, (int) (resultNode->value));
169 | 
170 |             // if the prediction and the (true) testing classification are the same
171 |             // (N.B. openCV uses a floating point decision tree implementation!)
172 | 
173 |             if (fabs(resultNode->value - testing_classifications.at<float>(tsample, 0))
174 |                     >= FLT_EPSILON)
175 |             {
176 |                 // if they differ more than floating point error => wrong class
177 | 
178 |                 wrong_class++;
179 | 
180 |                 false_positives[(int) resultNode->value]++;
181 | 
182 |             }
183 |             else
184 |             {
185 | 
186 |                 // otherwise correct
187 | 
188 |                 correct_class++;
189 |             }
190 |         }
191 | 
192 |         printf( "\nResults on the testing database: %s\n"
193 |                 "\tCorrect classification: %d (%g%%)\n"
194 |                 "\tWrong classifications: %d (%g%%)\n",
195 |                 argv[2],
196 |                 correct_class, (double) correct_class*100/NUMBER_OF_TESTING_SAMPLES,
197 |                 wrong_class, (double) wrong_class*100/NUMBER_OF_TESTING_SAMPLES);
198 | 
199 |         for (int i = 0; i < NUMBER_OF_CLASSES; i++)
200 |         {
201 |             printf( "\tClass (digit %d) false postives 	%d (%g%%)\n", i,
202 |                     false_positives[i],
203 |                     (double) false_positives[i]*100/NUMBER_OF_TESTING_SAMPLES);
204 |         }
205 | 
206 | 
207 |         // all matrix memory free by destructors
208 | 
209 | 
210 |         // all OK : main returns 0
211 | 
212 |         return 0;
213 |     }
214 | 
215 |     // not OK : main returns -1
216 | 
217 |     return -1;
218 | }
219 | /******************************************************************************/
220 | 


--------------------------------------------------------------------------------
/handwritten_ex/neuralnetwork.cpp:
--------------------------------------------------------------------------------
  1 | // Example : neural network learning
  2 | // usage: prog training_data_file testing_data_file
  3 | 
  4 | // For use with test / training datasets : handwritten_ex
  5 | 
  6 | // Author : Toby Breckon, toby.breckon@cranfield.ac.uk
  7 | 
  8 | // Copyright (c) 2010 School of Engineering, Cranfield University
  9 | // License : LGPL - http://www.gnu.org/licenses/lgpl.html
 10 | 
 11 | #include <cv.h>       // opencv general include file
 12 | #include <ml.h>		  // opencv machine learning include file
 13 | 
 14 | using namespace cv; // OpenCV API is in the C++ "cv" namespace
 15 | 
 16 | #include <stdio.h>
 17 | 
 18 | /******************************************************************************/
 19 | // global definitions (for speed and ease of use)
 20 | 
 21 | #define NUMBER_OF_TRAINING_SAMPLES 797
 22 | #define ATTRIBUTES_PER_SAMPLE 256
 23 | #define NUMBER_OF_TESTING_SAMPLES 796
 24 | 
 25 | #define NUMBER_OF_CLASSES 10
 26 | 
 27 | // N.B. classes are integer handwritten digits in range 0-9
 28 | 
 29 | /******************************************************************************/
 30 | 
 31 | // loads the sample database from file (which is a CSV text file)
 32 | 
 33 | int read_data_from_csv(const char* filename, Mat data, Mat classes,
 34 |                        int n_samples )
 35 | {
 36 | 
 37 |     int classlabel; // the class label
 38 |     float tmpf;
 39 | 
 40 |     // if we can't read the input file then return 0
 41 |     FILE* f = fopen( filename, "r" );
 42 |     if( !f )
 43 |     {
 44 |         printf("ERROR: cannot read file %s\n",  filename);
 45 |         return 0; // all not OK
 46 |     }
 47 | 
 48 |     // for each sample in the file
 49 | 
 50 |     for(int line = 0; line < n_samples; line++)
 51 |     {
 52 | 
 53 |         // for each attribute on the line in the file
 54 | 
 55 |         for(int attribute = 0; attribute < (ATTRIBUTES_PER_SAMPLE + 1); attribute++)
 56 |         {
 57 |             if (attribute < ATTRIBUTES_PER_SAMPLE)
 58 |             {
 59 | 
 60 |                 // first 256 elements (0-255) in each line are the attributes
 61 | 
 62 |                 fscanf(f, "%f,", &tmpf);
 63 |                 data.at<float>(line, attribute) = tmpf;
 64 | 
 65 |             }
 66 |             else if (attribute == ATTRIBUTES_PER_SAMPLE)
 67 |             {
 68 | 
 69 |                 // attribute 256 is the class label {0 ... 9}
 70 | 
 71 |                 fscanf(f, "%i,", &classlabel);
 72 |                 classes.at<float>(line, classlabel) = 1.0;
 73 |             }
 74 |         }
 75 |     }
 76 | 
 77 |     fclose(f);
 78 | 
 79 |     return 1; // all OK
 80 | }
 81 | 
 82 | /******************************************************************************/
 83 | 
 84 | int main( int argc, char** argv )
 85 | {
 86 |     // lets just check the version first
 87 | 
 88 |     printf ("OpenCV version %s (%d.%d.%d)\n",
 89 |             CV_VERSION,
 90 |             CV_MAJOR_VERSION, CV_MINOR_VERSION, CV_SUBMINOR_VERSION);
 91 | 
 92 |     // define training data storage matrices (one for attribute examples, one
 93 |     // for classifications)
 94 | 
 95 |     Mat training_data = Mat(NUMBER_OF_TRAINING_SAMPLES, ATTRIBUTES_PER_SAMPLE, CV_32FC1);
 96 |     Mat training_classifications = Mat(NUMBER_OF_TRAINING_SAMPLES, NUMBER_OF_CLASSES, CV_32FC1);
 97 | 
 98 |     // define testing data storage matrices
 99 | 
100 |     Mat testing_data = Mat(NUMBER_OF_TESTING_SAMPLES, ATTRIBUTES_PER_SAMPLE, CV_32FC1);
101 |     Mat testing_classifications = Mat::zeros(NUMBER_OF_TESTING_SAMPLES, NUMBER_OF_CLASSES, CV_32FC1);
102 | 
103 |     // define classification output vector
104 | 
105 |     Mat classificationResult = Mat(1, NUMBER_OF_CLASSES, CV_32FC1);
106 |     Point max_loc = Point(0,0);
107 | 
108 |     // load training and testing data sets
109 | 
110 |     if (read_data_from_csv(argv[1], training_data, training_classifications, NUMBER_OF_TRAINING_SAMPLES) &&
111 |             read_data_from_csv(argv[2], testing_data, testing_classifications, NUMBER_OF_TESTING_SAMPLES))
112 |     {
113 |         // define the parameters for the neural network (MLP)
114 | 
115 |         // set the network to be 3 layer 256->10->10
116 |         // - one input node per attribute in a sample
117 |         // - 10 hidden nodes
118 |         // - one output node per class
119 | 
120 |         // note that the OpenCV neural network (MLP) implementation does not
121 |         // support categorical variables explicitly.
122 |         // So, instead of the output class label, we will use
123 |         // a binary vector of {0,0 ... 1,0,0} components (one element by class)
124 |         // for training and therefore, MLP will give us a vector of "probabilities"
125 |         // at the prediction stage - the highest probability can be accepted
126 |         // as the "winning" class label output by the network
127 | 
128 |         int layers_d[] = { ATTRIBUTES_PER_SAMPLE, 10,  NUMBER_OF_CLASSES};
129 |         Mat layers = Mat(1,3,CV_32SC1);
130 |         layers.at<int>(0,0) = layers_d[0];
131 |         layers.at<int>(0,1) = layers_d[1];
132 |         layers.at<int>(0,2) = layers_d[2];
133 | 
134 |         // create the network using a sigmoid function with alpha and beta
135 |         // parameters 0.6 and 1 specified respectively (refer to manual)
136 | 
137 |         CvANN_MLP* nnetwork = new CvANN_MLP;
138 |         nnetwork->create(layers, CvANN_MLP::SIGMOID_SYM, 0.6, 1);
139 | 
140 |         // set the training parameters
141 | 
142 |         CvANN_MLP_TrainParams params = CvANN_MLP_TrainParams(
143 | 
144 |                                            // terminate the training after either 1000
145 |                                            // iterations or a very small change in the
146 |                                            // network wieghts below the specified value
147 | 
148 |                                            cvTermCriteria(CV_TERMCRIT_ITER+CV_TERMCRIT_EPS, 1000, 0.000001),
149 | 
150 |                                            // use backpropogation for training
151 | 
152 |                                            CvANN_MLP_TrainParams::BACKPROP,
153 | 
154 |                                            // co-efficents for backpropogation training
155 |                                            // (refer to manual)
156 | 
157 |                                            0.1,
158 |                                            0.1);
159 | 
160 |         // train the neural network (using training data)
161 | 
162 |         printf( "\nUsing training database: %s\n", argv[1]);
163 | 
164 |         int iterations = nnetwork->train(training_data, training_classifications, Mat(), Mat(), params);
165 | 
166 |         printf( "Training iterations: %i\n\n", iterations);
167 | 
168 |         // perform classifier testing and report results
169 | 
170 |         Mat test_sample;
171 |         int correct_class = 0;
172 |         int wrong_class = 0;
173 |         int false_positives [NUMBER_OF_CLASSES] = {0,0,0,0,0,0,0,0,0,0};
174 | 
175 |         printf( "\nUsing testing database: %s\n\n", argv[2]);
176 | 
177 |         for (int tsample = 0; tsample < NUMBER_OF_TESTING_SAMPLES; tsample++)
178 |         {
179 | 
180 |             // extract a row from the testing matrix
181 | 
182 |             test_sample = testing_data.row(tsample);
183 | 
184 |             // run neural network prediction
185 | 
186 |             nnetwork->predict(test_sample, classificationResult);
187 | 
188 |             // The NN gives out a vector of probabilities for each class
189 |             // We take the class with the highest "probability"
190 |             // for simplicity (but we really should also check separation
191 |             // of the different "probabilities" in this vector - what if
192 |             // two classes have very similar values ?)
193 | 
194 |             minMaxLoc(classificationResult, 0, 0, 0, &max_loc);
195 | 
196 |             printf("Testing Sample %i -> class result (digit %d)\n", tsample, max_loc.x);
197 | 
198 |             // if the corresponding location in the testing classifications
199 |             // is not "1" (i.e. this is the correct class) then record this
200 | 
201 |             if (!(testing_classifications.at<float>(tsample, max_loc.x)))
202 |             {
203 |                 // if they differ more than floating point error => wrong class
204 | 
205 |                 wrong_class++;
206 | 
207 |                 false_positives[(int) max_loc.x]++;
208 | 
209 |             }
210 |             else
211 |             {
212 | 
213 |                 // otherwise correct
214 | 
215 |                 correct_class++;
216 |             }
217 |         }
218 | 
219 |         printf( "\nResults on the testing database: %s\n"
220 |                 "\tCorrect classification: %d (%g%%)\n"
221 |                 "\tWrong classifications: %d (%g%%)\n",
222 |                 argv[2],
223 |                 correct_class, (double) correct_class*100/NUMBER_OF_TESTING_SAMPLES,
224 |                 wrong_class, (double) wrong_class*100/NUMBER_OF_TESTING_SAMPLES);
225 | 
226 |         for (int i = 0; i < NUMBER_OF_CLASSES; i++)
227 |         {
228 |             printf( "\tClass (digit %d) false postives 	%d (%g%%)\n", i,
229 |                     false_positives[i],
230 |                     (double) false_positives[i]*100/NUMBER_OF_TESTING_SAMPLES);
231 |         }
232 | 
233 |         // all OK : main returns 0
234 | 
235 |         return 0;
236 |     }
237 | 
238 |     // not OK : main returns -1
239 | 
240 |     return -1;
241 | }
242 | /******************************************************************************/
243 | 


--------------------------------------------------------------------------------
/handwritten_ex/semeion.names:
--------------------------------------------------------------------------------
 1 | Title: Semeion Handwritten Digit
 2 | 
 3 | Abstract: 1593 handwritten digits from around 80 persons were scanned, 
 4 | stretched in a rectangular box 16x16 in a gray scale of 256 values.
 5 | 
 6 | -----------------------------------------------------
 7 | 
 8 | Data Set Characteristics: Multivariate
 9 | Number of Instances: 1593
10 | Area: Computer
11 | Attribute Characteristics: Integer
12 | Number of Attributes: 256
13 | Date Donated: 2008-11-11
14 | Associated Tasks: Classification
15 | Missing Values? N/A
16 | 
17 | -----------------------------------------------------
18 | 
19 | Source:
20 | 
21 | The dataset was created by Tactile Srl, Brescia, Italy 
22 | (http://www.tattile.it/) and donated in 1994 to Semeion Research Center 
23 | of Sciences of Communication, Rome, Italy (http://www.semeion.it/), for 
24 | machine learning research.
25 | 
26 | For any questions, e-mail Massimo Buscema (m.buscema '@' semeion.it) or 
27 | Stefano Terzi (s.terzi '@' semeion.it)
28 | 
29 | 
30 | -----------------------------------------------------
31 | 
32 | Data Set Information:
33 | 
34 | 
35 | 1593 handwritten digits from around 80 persons were scanned, stretched 
36 | in a rectangular box 16x16 in a gray scale of 256 values.Then each pixel 
37 | of each image was scaled into a bolean (1/0) value using a fixed 
38 | threshold.
39 | 
40 | Each person wrote on a paper all the digits from 0 to 9, twice. The 
41 | commitment was to write the digit the first time in the normal way 
42 | (trying to write each digit accurately) and the second time in a fast 
43 | way (with no accuracy).
44 | 
45 | The best validation protocol for this dataset seems to be a 5x2CV, 50% 
46 | Tune (Train +Test) and completly blind 50% Validation.
47 | 
48 | -----------------------------------------------------
49 | 
50 | Attribute Information:
51 | 
52 | This dataset consists of 1593 records (rows) and 256 attributes 
53 | (columns).
54 | 
55 | Each record represents a handwritten digit, orginally scanned with a 
56 | resolution of 256 grays scale (28).
57 | 
58 | Each pixel of the each original scanned image was first stretched, and 
59 | after scaled between 0 and 1 (setting to 0 every pixel whose value was 
60 | under tha value 127 of the grey scale (127 included) and setting to 1 
61 | each pixel whose orinal value in the grey scale was over 127).
62 | 
63 | Finally, each binary image was scaled again into a 16x16 square box (the 
64 | final 256 binary attributes). 
65 | 
66 | -----------------------------------------------------
67 | 
68 | Relevant Papers:
69 | 
70 | M Buscema, MetaNet: The Theory of Independent Judges, in Substance Use & 
71 | Misuse 33(2)1998, pp 439-461.
72 | 
73 | -----------------------------------------------------
74 | 
75 | Citation Request:
76 | 
77 | Semeion Research Center of Sciences of Communication, via Sersale 117, 
78 | 00128 Rome, Italy
79 | Tattile Via Gaetano Donizetti, 1-3-5,25030 Mairano (Brescia), Italy. 
80 | 
81 | 
82 | 
83 | 
84 | 


--------------------------------------------------------------------------------
/handwritten_ex/svm.cpp:
--------------------------------------------------------------------------------
  1 | // Example : Support Vector Machine (SVM) learning
  2 | // usage: prog training_data_file testing_data_file
  3 | 
  4 | // For use with test / training datasets : handwritten_ex
  5 | 
  6 | // Author : Toby Breckon, toby.breckon@cranfield.ac.uk
  7 | // Version : 0.2
  8 | 
  9 | // Copyright (c) 2011 School of Engineering, Cranfield University
 10 | // License : LGPL - http://www.gnu.org/licenses/lgpl.html
 11 | 
 12 | #include <cv.h>       // opencv general include file
 13 | #include <ml.h>		  // opencv machine learning include file
 14 | 
 15 | using namespace cv; // OpenCV API is in the C++ "cv" namespace
 16 | 
 17 | #include <stdio.h>
 18 | 
 19 | /******************************************************************************/
 20 | 
 21 | // use SVM "grid search" for kernel parameters
 22 | 
 23 | #define USE_OPENCV_GRID_SEARCH_AUTOTRAIN 1  // set to 0 to set SVM parameters manually
 24 | 
 25 | /******************************************************************************/
 26 | 
 27 | #define NUMBER_OF_TRAINING_SAMPLES 797
 28 | #define ATTRIBUTES_PER_SAMPLE 256
 29 | #define NUMBER_OF_TESTING_SAMPLES 796
 30 | 
 31 | #define NUMBER_OF_CLASSES 10
 32 | 
 33 | // N.B. classes are integer handwritten digits in range 0-9
 34 | 
 35 | /******************************************************************************/
 36 | 
 37 | // loads the sample database from file (which is a CSV text file)
 38 | 
 39 | int read_data_from_csv(const char* filename, Mat data, Mat classes,
 40 |                        int n_samples )
 41 | {
 42 |     float tmpf;
 43 | 
 44 |     // if we can't read the input file then return 0
 45 |     FILE* f = fopen( filename, "r" );
 46 |     if( !f )
 47 |     {
 48 |         printf("ERROR: cannot read file %s\n",  filename);
 49 |         return 0; // all not OK
 50 |     }
 51 | 
 52 |     // for each sample in the file
 53 | 
 54 |     for(int line = 0; line < n_samples; line++)
 55 |     {
 56 | 
 57 |         // for each attribute on the line in the file
 58 | 
 59 |         for(int attribute = 0; attribute < (ATTRIBUTES_PER_SAMPLE + 1); attribute++)
 60 |         {
 61 |             if (attribute < ATTRIBUTES_PER_SAMPLE)
 62 |             {
 63 | 
 64 |                 // first 256 elements (0-255) in each line are the attributes
 65 | 
 66 |                 fscanf(f, "%f,", &tmpf);
 67 |                 data.at<float>(line, attribute) = tmpf;
 68 | 
 69 |             }
 70 |             else if (attribute == ATTRIBUTES_PER_SAMPLE)
 71 |             {
 72 | 
 73 |                 // attribute 256 is the class label {0 ... 9}
 74 | 
 75 |                 fscanf(f, "%f,", &tmpf);
 76 |                 classes.at<float>(line, 0) = tmpf;
 77 |             }
 78 |         }
 79 |     }
 80 | 
 81 |     fclose(f);
 82 | 
 83 |     return 1; // all OK
 84 | }
 85 | 
 86 | /******************************************************************************/
 87 | 
 88 | int main( int argc, char** argv )
 89 | {
 90 |     // lets just check the version first
 91 | 
 92 |     printf ("OpenCV version %s (%d.%d.%d)\n",
 93 |             CV_VERSION,
 94 |             CV_MAJOR_VERSION, CV_MINOR_VERSION, CV_SUBMINOR_VERSION);
 95 | 
 96 |     // define training data storage matrices (one for attribute examples, one
 97 |     // for classifications)
 98 | 
 99 |     Mat training_data = Mat(NUMBER_OF_TRAINING_SAMPLES, ATTRIBUTES_PER_SAMPLE, CV_32FC1);
100 |     Mat training_classifications = Mat(NUMBER_OF_TRAINING_SAMPLES, 1, CV_32FC1);
101 | 
102 |     //define testing data storage matrices
103 | 
104 |     Mat testing_data = Mat(NUMBER_OF_TESTING_SAMPLES, ATTRIBUTES_PER_SAMPLE, CV_32FC1);
105 |     Mat testing_classifications = Mat(NUMBER_OF_TESTING_SAMPLES, 1, CV_32FC1);
106 | 
107 |     // load training and testing data sets
108 | 
109 |     if (read_data_from_csv(argv[1], training_data, training_classifications, NUMBER_OF_TRAINING_SAMPLES) &&
110 |             read_data_from_csv(argv[2], testing_data, testing_classifications, NUMBER_OF_TESTING_SAMPLES))
111 |     {
112 |         // define the parameters for training the SVM (kernel + SVMtype type used for auto-training,
113 |         // other parameters for manual only)
114 | 
115 |         CvSVMParams params = CvSVMParams(
116 |                                  CvSVM::C_SVC,   // Type of SVM, here N classes (see manual)
117 |                                  CvSVM::LINEAR,  // kernel type (see manual)
118 |                                  0.0,			// kernel parameter (degree) for poly kernel only
119 |                                  0.0,			// kernel parameter (gamma) for poly/rbf kernel only
120 |                                  0.0,			// kernel parameter (coef0) for poly/sigmoid kernel only
121 |                                  10,				// SVM optimization parameter C
122 |                                  0,				// SVM optimization parameter nu (not used for N classe SVM)
123 |                                  0,				// SVM optimization parameter p (not used for N classe SVM)
124 |                                  NULL,			// class wieghts (or priors)
125 |                                  // Optional weights, assigned to particular classes.
126 |                                  // They are multiplied by C and thus affect the misclassification
127 |                                  // penalty for different classes. The larger weight, the larger penalty
128 |                                  // on misclassification of data from the corresponding class.
129 | 
130 |                                  // termination criteria for learning algorithm
131 | 
132 |                                  cvTermCriteria(CV_TERMCRIT_ITER+CV_TERMCRIT_EPS, 1000, 0.000001)
133 | 
134 |                              );
135 | 
136 |         // train SVM classifier (using training data)
137 | 
138 |         printf( "\nUsing training database: %s\n\n", argv[1]);
139 |         CvSVM* svm = new CvSVM;
140 | 
141 | #if (USE_OPENCV_GRID_SEARCH_AUTOTRAIN)
142 | 
143 |         // train using auto training parameter grid search if it is available
144 |         // N.B. this does not search kernel choice
145 | 
146 |         svm->train_auto(training_data, training_classifications, Mat(), Mat(), params, 10);
147 |         params = svm->get_params();
148 |         printf( "\nUsing optimal parameters degree %f, gamma %f, ceof0 %f\n\t C %f, nu %f, p %f\n",
149 |                 params.degree, params.gamma, params.coef0, params.C, params.nu, params.p);
150 | 
151 | #else
152 | 
153 |         // otherwise use regular training and use parameters manually specified above
154 | 
155 |         svm->train(training_data, training_classifications, Mat(), Mat(), params);
156 | 
157 | #endif
158 | 
159 |         // get the number of support vectors used to define the SVM decision boundary
160 | 
161 |         printf("Number of support vectors for trained SVM = %i\n", svm->get_support_vector_count());
162 | 
163 |         // perform classifier testing and report results
164 | 
165 |         Mat test_sample;
166 |         int correct_class = 0;
167 |         int wrong_class = 0;
168 |         int false_positives [NUMBER_OF_CLASSES] = {0,0,0,0,0,0,0,0,0,0};
169 |         float result;
170 | 
171 |         printf( "\nUsing testing database: %s\n\n", argv[2]);
172 | 
173 |         for (int tsample = 0; tsample < NUMBER_OF_TESTING_SAMPLES; tsample++)
174 |         {
175 | 
176 |             // extract a row from the testing matrix
177 | 
178 |             test_sample = testing_data.row(tsample);
179 | 
180 |             // run SVM classifier
181 | 
182 |             result = svm->predict(test_sample);
183 | 
184 |             printf("Testing Sample %i -> class result (digit %d)\n", tsample, (int) result);
185 | 
186 |             // if the prediction and the (true) testing classification are the same
187 |             // (N.B. openCV uses a floating point implementation!)
188 | 
189 |             if (fabs(result - testing_classifications.at<float>(tsample, 0))
190 |                     >= FLT_EPSILON)
191 |             {
192 |                 // if they differ more than floating point error => wrong class
193 | 
194 |                 wrong_class++;
195 |                 false_positives[(int) testing_classifications.at<float>(tsample, 0)]++;
196 | 
197 |             }
198 |             else
199 |             {
200 | 
201 |                 // otherwise correct
202 | 
203 |                 correct_class++;
204 |             }
205 |         }
206 | 
207 |         printf( "\nResults on the testing database: %s\n"
208 |                 "\tCorrect classification: %d (%g%%)\n"
209 |                 "\tWrong classifications: %d (%g%%)\n",
210 |                 argv[2],
211 |                 correct_class, (double) correct_class*100/NUMBER_OF_TESTING_SAMPLES,
212 |                 wrong_class, (double) wrong_class*100/NUMBER_OF_TESTING_SAMPLES);
213 | 
214 |         for (int i = 0; i < NUMBER_OF_CLASSES; i++)
215 |         {
216 |             printf( "\tClass (digit %d) false postives 	%d (%g%%)\n", i,
217 |                     false_positives[i],
218 |                     (double) false_positives[i]*100/NUMBER_OF_TESTING_SAMPLES);
219 |         }
220 | 
221 | 
222 |         // all OK : main returns 0
223 | 
224 |         return 0;
225 |     }
226 | 
227 |     // not OK : main returns -1
228 | 
229 |     return -1;
230 | }
231 | /******************************************************************************/
232 | 


--------------------------------------------------------------------------------
/opticaldigits_ex/boosttree.cpp:
--------------------------------------------------------------------------------
  1 | // Example : boosted tree learning
  2 | // usage: prog training_data_file testing_data_file
  3 | 
  4 | // For use with test / training datasets : opticaldigits_ex
  5 | 
  6 | // Author : Toby Breckon, toby.breckon@cranfield.ac.uk
  7 | 
  8 | // Copyright (c) 2011 School of Engineering, Cranfield University
  9 | // License : LGPL - http://www.gnu.org/licenses/lgpl.html
 10 | 
 11 | #include <cv.h>       // opencv general include file
 12 | #include <ml.h>		  // opencv machine learning include file
 13 | 
 14 | using namespace cv; // OpenCV API is in the C++ "cv" namespace
 15 | 
 16 | #include <stdio.h>
 17 | 
 18 | /******************************************************************************/
 19 | // global definitions (for speed and ease of use)
 20 | 
 21 | #define NUMBER_OF_TRAINING_SAMPLES 3823
 22 | #define ATTRIBUTES_PER_SAMPLE 64
 23 | #define NUMBER_OF_TESTING_SAMPLES 1797
 24 | 
 25 | #define NUMBER_OF_CLASSES 10
 26 | 
 27 | // N.B. classes are integer handwritten digits in range 0-9
 28 | 
 29 | /******************************************************************************/
 30 | 
 31 | // loads the sample database from file (which is a CSV text file)
 32 | 
 33 | int read_data_from_csv(const char* filename, Mat data, Mat classes,
 34 |                        int n_samples )
 35 | {
 36 |     float tmp;
 37 | 
 38 |     // if we can't read the input file then return 0
 39 |     FILE* f = fopen( filename, "r" );
 40 |     if( !f )
 41 |     {
 42 |         printf("ERROR: cannot read file %s\n",  filename);
 43 |         return 0; // all not OK
 44 |     }
 45 | 
 46 |     // for each sample in the file
 47 | 
 48 |     for(int line = 0; line < n_samples; line++)
 49 |     {
 50 | 
 51 |         // for each attribute on the line in the file
 52 | 
 53 |         for(int attribute = 0; attribute < (ATTRIBUTES_PER_SAMPLE + 1); attribute++)
 54 |         {
 55 |             if (attribute < 64)
 56 |             {
 57 | 
 58 |                 // first 64 elements (0-63) in each line are the attributes
 59 | 
 60 |                 fscanf(f, "%f,", &tmp);
 61 |                 data.at<float>(line, attribute) = tmp;
 62 |                 // printf("%f,", data.at<float>(line, attribute));
 63 | 
 64 |             }
 65 |             else if (attribute == 64)
 66 |             {
 67 | 
 68 |                 // attribute 65 is the class label {0 ... 9}
 69 | 
 70 |                 fscanf(f, "%f,", &tmp);
 71 |                 classes.at<float>(line, 0) = tmp;
 72 |                 // printf("%f\n", classes.at<float>(line, 0));
 73 | 
 74 |             }
 75 |         }
 76 |     }
 77 | 
 78 |     fclose(f);
 79 | 
 80 |     return 1; // all OK
 81 | }
 82 | 
 83 | /******************************************************************************/
 84 | 
 85 | int main( int argc, char** argv )
 86 | {
 87 |     // lets just check the version first
 88 | 
 89 |     printf ("OpenCV version %s (%d.%d.%d)\n",
 90 |             CV_VERSION,
 91 |             CV_MAJOR_VERSION, CV_MINOR_VERSION, CV_SUBMINOR_VERSION);
 92 | 
 93 |     // define training data storage matrices (one for attribute examples, one
 94 |     // for classifications)
 95 | 
 96 |     Mat training_data = Mat(NUMBER_OF_TRAINING_SAMPLES, ATTRIBUTES_PER_SAMPLE, CV_32FC1);
 97 |     Mat training_classifications = Mat(NUMBER_OF_TRAINING_SAMPLES, 1, CV_32FC1);
 98 | 
 99 |     //define testing data storage matrices
100 | 
101 |     Mat testing_data = Mat(NUMBER_OF_TESTING_SAMPLES, ATTRIBUTES_PER_SAMPLE, CV_32FC1);
102 |     Mat testing_classifications = Mat(NUMBER_OF_TESTING_SAMPLES, 1, CV_32FC1);
103 | 
104 |     // load training and testing data sets
105 | 
106 |     if (read_data_from_csv(argv[1], training_data, training_classifications, NUMBER_OF_TRAINING_SAMPLES) &&
107 |             read_data_from_csv(argv[2], testing_data, testing_classifications, NUMBER_OF_TESTING_SAMPLES))
108 |     {
109 |         // !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
110 |         //
111 |         // As currently boosted tree classifier in OpenCV can only be trained
112 |         // for 2-class problems, we transform the training database by
113 |         // "unrolling" each training sample as many times as the number of
114 |         // classes (10) that we have.
115 |         //
116 |         //  In "unrolling" we add an additional attribute to each training
117 |         //	sample that contains the classification - here 10 new samples
118 |         //  are added for every original sample, one for each possible class
119 |         //	but only one with the correct class as an additional attribute
120 |         //  value has a new binary class of 1, all the rest of the new samples
121 |         //  have a new binary class of 0.
122 |         //
123 |         // !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
124 | 
125 |         Mat new_data = Mat(NUMBER_OF_TRAINING_SAMPLES*NUMBER_OF_CLASSES, ATTRIBUTES_PER_SAMPLE + 1, CV_32F );
126 |         Mat new_responses = Mat(NUMBER_OF_TRAINING_SAMPLES*NUMBER_OF_CLASSES, 1, CV_32S );
127 | 
128 |         // 1. unroll the training samples
129 | 
130 |         printf( "\nUnrolling the database...");
131 |         fflush(NULL);
132 |         for(int i = 0; i < NUMBER_OF_TRAINING_SAMPLES; i++ )
133 |         {
134 |             for(int j = 0; j < NUMBER_OF_CLASSES; j++ )
135 |             {
136 |                 for(int k = 0; k < ATTRIBUTES_PER_SAMPLE; k++ )
137 |                 {
138 | 
139 |                     // copy over the attribute data
140 | 
141 |                     new_data.at<float>((i * NUMBER_OF_CLASSES) + j, k) = training_data.at<float>(i, k);
142 | 
143 |                 }
144 | 
145 |                 // set the new attribute to the original class
146 | 
147 |                 new_data.at<float>((i * NUMBER_OF_CLASSES) + j, ATTRIBUTES_PER_SAMPLE) = (float) j;
148 | 
149 |                 // set the new binary class
150 | 
151 |                 if ( ( (int) training_classifications.at<float>( i, 0)) == j)
152 |                 {
153 |                     new_responses.at<int>((i * NUMBER_OF_CLASSES) + j, 0) = 1;
154 |                 }
155 |                 else
156 |                 {
157 |                     new_responses.at<int>((i * NUMBER_OF_CLASSES) + j, 0) = 0;
158 |                 }
159 |             }
160 |         }
161 |         printf( "Done\n");
162 | 
163 |         // 2. Unroll the type mask
164 | 
165 |         // define all the attributes as numerical
166 |         // alternatives are CV_VAR_CATEGORICAL or CV_VAR_ORDERED(=CV_VAR_NUMERICAL)
167 |         // that can be assigned on a per attribute basis
168 | 
169 |         Mat var_type = Mat(ATTRIBUTES_PER_SAMPLE + 2, 1, CV_8U );
170 |         var_type.setTo(Scalar(CV_VAR_NUMERICAL) ); // all inputs are numerical
171 | 
172 |         // this is a classification problem (i.e. predict a discrete number of class
173 |         // outputs) so reset the last (+1) output var_type element to CV_VAR_CATEGORICAL
174 |         // *** the last (new) class indicator attribute, as well
175 |         // *** as the new (binary) response (class) are categorical
176 | 
177 |         var_type.at<uchar>(ATTRIBUTES_PER_SAMPLE, 0) = CV_VAR_CATEGORICAL;
178 |         var_type.at<uchar>(ATTRIBUTES_PER_SAMPLE + 1, 0) = CV_VAR_CATEGORICAL;
179 | 
180 |         // define the parameters for training the boosted trees
181 | 
182 |         // weights of each classification for classes
183 |         // N.B. in the "unrolled" data we have an imbalance in the training examples
184 | 
185 |         float priors[] = {( NUMBER_OF_CLASSES - 1),1};
186 |         //float priors[] = {1,1};
187 | 
188 |         // set the boost parameters
189 | 
190 |         CvBoostParams params = CvBoostParams(CvBoost::REAL,  // boosting type
191 |                                              100,			 // number of weak classifiers
192 |                                              0.95,   		 // trim rate
193 | 
194 |                                              // trim rate is a threshold (0->1)
195 |                                              // used to eliminate samples with
196 |                                              // boosting weight < 1.0 - (trim rate)
197 |                                              // from the next round of boosting
198 |                                              // Used for computational saving only.
199 | 
200 |                                              25, 	  // max depth of trees
201 |                                              false,  // compute surrogate split, no missing data
202 |                                              priors );
203 | 
204 |         // as CvBoostParams inherits from CvDTreeParams we can also set generic
205 |         // parameters of decision trees too (otherwise they use the defaults)
206 | 
207 |         params.max_categories = 15; 	// max number of categories (use sub-optimal algorithm for larger numbers)
208 |         params.min_sample_count = 5; 	// min sample count
209 |         params.cv_folds = 1;					// cross validation folds
210 |         params.use_1se_rule = false; 			// use 1SE rule => smaller tree
211 |         params.truncate_pruned_tree = false; 	// throw away the pruned tree branches
212 |         params.regression_accuracy = 0.0; 		// regression accuracy: N/A here
213 | 
214 | 
215 |         // train boosted tree classifier (using training data)
216 | 
217 |         printf( "\nUsing training database: %s\n\n", argv[1]);
218 |         printf( "Training .... (this may take several minutes) .... ");
219 |         fflush(NULL);
220 | 
221 |         CvBoost* boostTree = new CvBoost;
222 | 
223 |         boostTree->train( new_data, CV_ROW_SAMPLE, new_responses, Mat(), Mat(), var_type,
224 |                           Mat(), params, false);
225 |         printf( "Done.");
226 | 
227 |         // perform classifier testing and report results
228 | 
229 |         Mat test_sample;
230 |         int correct_class = 0;
231 |         int wrong_class = 0;
232 |         int false_positives [NUMBER_OF_CLASSES] = {0,0,0,0,0,0,0,0,0,0};
233 |         Mat weak_responses = Mat( 1, boostTree->get_weak_predictors()->total, CV_32F );
234 |         Mat new_sample = Mat( 1,  ATTRIBUTES_PER_SAMPLE + 1, CV_32F );
235 |         int best_class = 0; // best class returned by weak classifier
236 |         double max_sum;	 // highest score for a given class
237 | 
238 |         printf( "\nUsing testing database: %s\n\n", argv[2]);
239 | 
240 |         for (int tsample = 0; tsample < NUMBER_OF_TESTING_SAMPLES; tsample++)
241 |         {
242 | 
243 |             // extract a row from the testing matrix
244 | 
245 |             test_sample = testing_data.row(tsample);
246 | 
247 |             // convert it to the new "un-rolled" format of input
248 | 
249 |             for(int k = 0; k < ATTRIBUTES_PER_SAMPLE; k++ )
250 |             {
251 |                 new_sample.at<float>( 0, k) = test_sample.at<float>(0, k);
252 |             }
253 | 
254 |             // run boosted tree prediction (for N classes and take the
255 |             // maximal response of all the weak classifiers)
256 | 
257 |             max_sum = INT_MIN; // maximum starts off as Min. Int.
258 | 
259 |             for(int c = 0; c < NUMBER_OF_CLASSES; c++ )
260 |             {
261 |                 // set the additional attribute to original class
262 | 
263 |                 new_sample.at<float>(0, ATTRIBUTES_PER_SAMPLE) = (float) c;
264 | 
265 |                 // run prediction (getting also the responses of the weak classifiers)
266 |                 // - N.B. here we have to use CvMat() casts and take the address of temporary
267 |                 // in order to use the available call that gives us the weak responses
268 |                 // For this reason we also have to pass a NULL pointer for the missing data
269 | 
270 |                 boostTree->predict(&CvMat((new_sample)), NULL, &CvMat(weak_responses));
271 | 
272 |                 // obtain the sum of the responses from the weak classifiers
273 | 
274 |                 Scalar responseSum = sum( weak_responses );
275 | 
276 |                 // record the "best class" - i.e. one with maximal response
277 |                 // from weak classifiers
278 | 
279 |                 if( responseSum.val[0] > max_sum)
280 |                 {
281 |                     max_sum = (double) responseSum.val[0];
282 |                     best_class = c;
283 |                 }
284 |             }
285 | 
286 | 
287 |             printf("Testing Sample %i -> class result (digit %d)\n", tsample, best_class);
288 | 
289 |             // if the prediction and the (true) testing classification are the same
290 |             // (N.B. openCV uses a floating point decision tree implementation!)
291 | 
292 |             if (fabs(((float) (best_class)) - testing_classifications.at<float>( tsample, 0))
293 |                     >= FLT_EPSILON)
294 |             {
295 |                 // if they differ more than floating point error => wrong class
296 | 
297 |                 wrong_class++;
298 | 
299 |                 false_positives[best_class]++;
300 | 
301 |             }
302 |             else
303 |             {
304 | 
305 |                 // otherwise correct
306 | 
307 |                 correct_class++;
308 |             }
309 |         }
310 | 
311 |         printf( "\nResults on the testing database: %s\n"
312 |                 "\tCorrect classification: %d (%g%%)\n"
313 |                 "\tWrong classifications: %d (%g%%)\n",
314 |                 argv[2],
315 |                 correct_class, (double) correct_class*100/NUMBER_OF_TESTING_SAMPLES,
316 |                 wrong_class, (double) wrong_class*100/NUMBER_OF_TESTING_SAMPLES);
317 | 
318 |         for (int i = 0; i < NUMBER_OF_CLASSES; i++)
319 |         {
320 |             printf( "\tClass (digit %d) false postives 	%d (%g%%)\n", i,
321 |                     false_positives[i],
322 |                     (double) false_positives[i]*100/NUMBER_OF_TESTING_SAMPLES);
323 |         }
324 | 
325 |         // all matrix memory free by destructors
326 | 
327 |         // all OK : main returns 0
328 | 
329 |         return 0;
330 |     }
331 | 
332 |     // not OK : main returns -1
333 | 
334 |     return -1;
335 | }
336 | /******************************************************************************/
337 | 


--------------------------------------------------------------------------------
/opticaldigits_ex/decisiontree.cpp:
--------------------------------------------------------------------------------
  1 | // Example : decision tree learning
  2 | // usage: prog training_data_file testing_data_file
  3 | 
  4 | // For use with test / training datasets : opticaldigits_ex
  5 | 
  6 | // Author : Toby Breckon, toby.breckon@cranfield.ac.uk
  7 | 
  8 | // Copyright (c) 2011 School of Engineering, Cranfield University
  9 | // License : LGPL - http://www.gnu.org/licenses/lgpl.html
 10 | 
 11 | #include <cv.h>       // opencv general include file
 12 | #include <ml.h>		  // opencv machine learning include file
 13 | 
 14 | using namespace cv; // OpenCV API is in the C++ "cv" namespace
 15 | 
 16 | #include <stdio.h>
 17 | 
 18 | /******************************************************************************/
 19 | // global definitions (for speed and ease of use)
 20 | 
 21 | #define NUMBER_OF_TRAINING_SAMPLES 3823
 22 | #define ATTRIBUTES_PER_SAMPLE 64
 23 | #define NUMBER_OF_TESTING_SAMPLES 1797
 24 | 
 25 | #define NUMBER_OF_CLASSES 10
 26 | 
 27 | // N.B. classes are integer handwritten digits in range 0-9
 28 | 
 29 | /******************************************************************************/
 30 | 
 31 | // loads the sample database from file (which is a CSV text file)
 32 | 
 33 | int read_data_from_csv(const char* filename, Mat data, Mat classes,
 34 |                        int n_samples )
 35 | {
 36 |     float tmp;
 37 | 
 38 |     // if we can't read the input file then return 0
 39 |     FILE* f = fopen( filename, "r" );
 40 |     if( !f )
 41 |     {
 42 |         printf("ERROR: cannot read file %s\n",  filename);
 43 |         return 0; // all not OK
 44 |     }
 45 | 
 46 |     // for each sample in the file
 47 | 
 48 |     for(int line = 0; line < n_samples; line++)
 49 |     {
 50 | 
 51 |         // for each attribute on the line in the file
 52 | 
 53 |         for(int attribute = 0; attribute < (ATTRIBUTES_PER_SAMPLE + 1); attribute++)
 54 |         {
 55 |             if (attribute < 64)
 56 |             {
 57 | 
 58 |                 // first 64 elements (0-63) in each line are the attributes
 59 | 
 60 |                 fscanf(f, "%f,", &tmp);
 61 |                 data.at<float>(line, attribute) = tmp;
 62 |                 // printf("%f,", data.at<float>(line, attribute));
 63 | 
 64 |             }
 65 |             else if (attribute == 64)
 66 |             {
 67 | 
 68 |                 // attribute 65 is the class label {0 ... 9}
 69 | 
 70 |                 fscanf(f, "%f,", &tmp);
 71 |                 classes.at<float>(line, 0) = tmp;
 72 |                 // printf("%f\n", classes.at<float>(line, 0));
 73 | 
 74 |             }
 75 |         }
 76 |     }
 77 | 
 78 |     fclose(f);
 79 | 
 80 |     return 1; // all OK
 81 | }
 82 | 
 83 | /******************************************************************************/
 84 | 
 85 | int main( int argc, char** argv )
 86 | {
 87 |     // lets just check the version first
 88 | 
 89 |     printf ("OpenCV version %s (%d.%d.%d)\n",
 90 |             CV_VERSION,
 91 |             CV_MAJOR_VERSION, CV_MINOR_VERSION, CV_SUBMINOR_VERSION);
 92 | 
 93 |     // define training data storage matrices (one for attribute examples, one
 94 |     // for classifications)
 95 | 
 96 |     Mat training_data = Mat(NUMBER_OF_TRAINING_SAMPLES, ATTRIBUTES_PER_SAMPLE, CV_32FC1);
 97 |     Mat training_classifications = Mat(NUMBER_OF_TRAINING_SAMPLES, 1, CV_32FC1);
 98 | 
 99 |     //define testing data storage matrices
100 | 
101 |     Mat testing_data = Mat(NUMBER_OF_TESTING_SAMPLES, ATTRIBUTES_PER_SAMPLE, CV_32FC1);
102 |     Mat testing_classifications = Mat(NUMBER_OF_TESTING_SAMPLES, 1, CV_32FC1);
103 | 
104 |     // define all the attributes as numerical
105 |     // alternatives are CV_VAR_CATEGORICAL or CV_VAR_ORDERED(=CV_VAR_NUMERICAL)
106 |     // that can be assigned on a per attribute basis
107 | 
108 |     Mat var_type = Mat(ATTRIBUTES_PER_SAMPLE + 1, 1, CV_8U );
109 |     var_type.setTo(Scalar(CV_VAR_NUMERICAL) ); // all inputs are numerical
110 | 
111 |     // this is a classification problem (i.e. predict a discrete number of class
112 |     // outputs) so reset the last (+1) output var_type element to CV_VAR_CATEGORICAL
113 | 
114 |     var_type.at<uchar>(ATTRIBUTES_PER_SAMPLE, 0) = CV_VAR_CATEGORICAL;
115 | 
116 |     CvDTreeNode* resultNode; // node returned from a prediction
117 | 
118 |     // load training and testing data sets
119 | 
120 |     if (read_data_from_csv(argv[1], training_data, training_classifications, NUMBER_OF_TRAINING_SAMPLES) &&
121 |             read_data_from_csv(argv[2], testing_data, testing_classifications, NUMBER_OF_TESTING_SAMPLES))
122 |     {
123 |         // define the parameters for training the decision tree
124 | 
125 |         float priors[] = {1,1,1,1,1,1,1,1,1,1};  // weights of each classification for classes
126 |         // (all equal as equal samples of each digit)
127 | 
128 |         CvDTreeParams params = CvDTreeParams(25, // max depth
129 |                                              5, // min sample count
130 |                                              0, // regression accuracy: N/A here
131 |                                              false, // compute surrogate split, no missing data
132 |                                              15, // max number of categories (use sub-optimal algorithm for larger numbers)
133 |                                              15, // the number of cross-validation folds
134 |                                              false, // use 1SE rule => smaller tree
135 |                                              false, // throw away the pruned tree branches
136 |                                              priors // the array of priors
137 |                                             );
138 | 
139 | 
140 |         // train decision tree classifier (using training data)
141 | 
142 |         printf( "\nUsing training database: %s\n\n", argv[1]);
143 |         CvDTree* dtree = new CvDTree;
144 | 
145 |         dtree->train(training_data, CV_ROW_SAMPLE, training_classifications,
146 |                      Mat(), Mat(), var_type, Mat(), params);
147 | 
148 |         // perform classifier testing and report results
149 | 
150 |         Mat test_sample;
151 |         int correct_class = 0;
152 |         int wrong_class = 0;
153 |         int false_positives [NUMBER_OF_CLASSES] = {0,0,0,0,0,0,0,0,0,0};
154 | 
155 |         printf( "\nUsing testing database: %s\n\n", argv[2]);
156 | 
157 |         for (int tsample = 0; tsample < NUMBER_OF_TESTING_SAMPLES; tsample++)
158 |         {
159 | 
160 |             // extract a row from the testing matrix
161 | 
162 |             test_sample = testing_data.row(tsample);
163 | 
164 |             // run decision tree prediction
165 | 
166 |             resultNode = dtree->predict(test_sample, Mat(), false);
167 | 
168 |             printf("Testing Sample %i -> class result (digit %d)\n", tsample, (int) (resultNode->value));
169 | 
170 |             // if the prediction and the (true) testing classification are the same
171 |             // (N.B. openCV uses a floating point decision tree implementation!)
172 | 
173 |             if (fabs(resultNode->value - testing_classifications.at<float>(tsample, 0))
174 |                     >= FLT_EPSILON)
175 | 
176 |             {
177 |                 // if they differ more than floating point error => wrong class
178 | 
179 |                 wrong_class++;
180 | 
181 |                 false_positives[(int) resultNode->value]++;
182 | 
183 |             }
184 |             else
185 |             {
186 | 
187 |                 // otherwise correct
188 | 
189 |                 correct_class++;
190 |             }
191 |         }
192 | 
193 |         printf( "\nResults on the testing database: %s\n"
194 |                 "\tCorrect classification: %d (%g%%)\n"
195 |                 "\tWrong classifications: %d (%g%%)\n",
196 |                 argv[2],
197 |                 correct_class, (double) correct_class*100/NUMBER_OF_TESTING_SAMPLES,
198 |                 wrong_class, (double) wrong_class*100/NUMBER_OF_TESTING_SAMPLES);
199 | 
200 |         for (int i = 0; i < NUMBER_OF_CLASSES; i++)
201 |         {
202 |             printf( "\tClass (digit %d) false postives 	%d (%g%%)\n", i,
203 |                     false_positives[i],
204 |                     (double) false_positives[i]*100/NUMBER_OF_TESTING_SAMPLES);
205 |         }
206 | 
207 |         // all matrix memory free by destructors
208 | 
209 |         // all OK : main returns 0
210 | 
211 |         return 0;
212 |     }
213 | 
214 |     // not OK : main returns -1
215 | 
216 |     return -1;
217 | }
218 | /******************************************************************************/
219 | 


--------------------------------------------------------------------------------
/opticaldigits_ex/extremerandomforest.cpp:
--------------------------------------------------------------------------------
  1 | // Example : extremely random forest (tree) learning
  2 | // usage: prog training_data_file testing_data_file
  3 | 
  4 | // For use with test / training datasets : opticaldigits_ex
  5 | 
  6 | // Author : Toby Breckon, toby.breckon@cranfield.ac.uk
  7 | 
  8 | // Copyright (c) 2012 School of Engineering, Cranfield University
  9 | // License : LGPL - http://www.gnu.org/licenses/lgpl.html
 10 | 
 11 | #include <cv.h>       // opencv general include file
 12 | #include <ml.h>		  // opencv machine learning include file
 13 | 
 14 | using namespace cv; // OpenCV API is in the C++ "cv" namespace
 15 | 
 16 | #include <stdio.h>
 17 | 
 18 | /******************************************************************************/
 19 | // global definitions (for speed and ease of use)
 20 | 
 21 | #define NUMBER_OF_TRAINING_SAMPLES 3823
 22 | #define ATTRIBUTES_PER_SAMPLE 64
 23 | #define NUMBER_OF_TESTING_SAMPLES 1797
 24 | 
 25 | #define NUMBER_OF_CLASSES 10
 26 | 
 27 | // N.B. classes are integer handwritten digits in range 0-9
 28 | 
 29 | /******************************************************************************/
 30 | 
 31 | // loads the sample database from file (which is a CSV text file)
 32 | 
 33 | int read_data_from_csv(const char* filename, Mat data, Mat classes,
 34 |                        int n_samples )
 35 | {
 36 |     float tmp;
 37 | 
 38 |     // if we can't read the input file then return 0
 39 |     FILE* f = fopen( filename, "r" );
 40 |     if( !f )
 41 |     {
 42 |         printf("ERROR: cannot read file %s\n",  filename);
 43 |         return 0; // all not OK
 44 |     }
 45 | 
 46 |     // for each sample in the file
 47 | 
 48 |     for(int line = 0; line < n_samples; line++)
 49 |     {
 50 | 
 51 |         // for each attribute on the line in the file
 52 | 
 53 |         for(int attribute = 0; attribute < (ATTRIBUTES_PER_SAMPLE + 1); attribute++)
 54 |         {
 55 |             if (attribute < 64)
 56 |             {
 57 | 
 58 |                 // first 64 elements (0-63) in each line are the attributes
 59 | 
 60 |                 fscanf(f, "%f,", &tmp);
 61 |                 data.at<float>(line, attribute) = tmp;
 62 |                 // printf("%f,", data.at<float>(line, attribute));
 63 | 
 64 |             }
 65 |             else if (attribute == 64)
 66 |             {
 67 | 
 68 |                 // attribute 65 is the class label {0 ... 9}
 69 | 
 70 |                 fscanf(f, "%f,", &tmp);
 71 |                 classes.at<float>(line, 0) = tmp;
 72 |                 // printf("%f\n", classes.at<float>(line, 0));
 73 | 
 74 |             }
 75 |         }
 76 |     }
 77 | 
 78 |     fclose(f);
 79 | 
 80 |     return 1; // all OK
 81 | }
 82 | 
 83 | /******************************************************************************/
 84 | 
 85 | int main( int argc, char** argv )
 86 | {
 87 |     // lets just check the version first
 88 | 
 89 |     printf ("OpenCV version %s (%d.%d.%d)\n",
 90 |             CV_VERSION,
 91 |             CV_MAJOR_VERSION, CV_MINOR_VERSION, CV_SUBMINOR_VERSION);
 92 | 
 93 |     // define training data storage matrices (one for attribute examples, one
 94 |     // for classifications)
 95 | 
 96 |     Mat training_data = Mat(NUMBER_OF_TRAINING_SAMPLES, ATTRIBUTES_PER_SAMPLE, CV_32FC1);
 97 |     Mat training_classifications = Mat(NUMBER_OF_TRAINING_SAMPLES, 1, CV_32FC1);
 98 | 
 99 |     //define testing data storage matrices
100 | 
101 |     Mat testing_data = Mat(NUMBER_OF_TESTING_SAMPLES, ATTRIBUTES_PER_SAMPLE, CV_32FC1);
102 |     Mat testing_classifications = Mat(NUMBER_OF_TESTING_SAMPLES, 1, CV_32FC1);
103 | 
104 |     // define all the attributes as numerical
105 |     // alternatives are CV_VAR_CATEGORICAL or CV_VAR_ORDERED(=CV_VAR_NUMERICAL)
106 |     // that can be assigned on a per attribute basis
107 | 
108 |     Mat var_type = Mat(ATTRIBUTES_PER_SAMPLE + 1, 1, CV_8U );
109 |     var_type.setTo(Scalar(CV_VAR_NUMERICAL) ); // all inputs are numerical
110 | 
111 |     // this is a classification problem (i.e. predict a discrete number of class
112 |     // outputs) so reset the last (+1) output var_type element to CV_VAR_CATEGORICAL
113 | 
114 |     var_type.at<uchar>(ATTRIBUTES_PER_SAMPLE, 0) = CV_VAR_CATEGORICAL;
115 | 
116 |     double result; // value returned from a prediction
117 | 
118 |     // load training and testing data sets
119 | 
120 |     if (read_data_from_csv(argv[1], training_data, training_classifications, NUMBER_OF_TRAINING_SAMPLES) &&
121 |             read_data_from_csv(argv[2], testing_data, testing_classifications, NUMBER_OF_TESTING_SAMPLES))
122 |     {
123 |         // define the parameters for training the random forest (trees)
124 | 
125 |         float priors[] = {1,1,1,1,1,1,1,1,1,1};  // weights of each classification for classes
126 |         // (all equal as equal samples of each digit)
127 | 
128 |         CvRTParams params = CvRTParams(25, // max depth
129 |                                        5, // min sample count
130 |                                        0, // regression accuracy: N/A here
131 |                                        false, // compute surrogate split, no missing data
132 |                                        15, // max number of categories (use sub-optimal algorithm for larger numbers)
133 |                                        priors, // the array of priors
134 |                                        false,  // calculate variable importance
135 |                                        4,       // number of variables randomly selected at node and used to find the best split(s).
136 |                                        100,	 // max number of trees in the forest
137 |                                        0.01f,				// forrest accuracy
138 |                                        CV_TERMCRIT_ITER |	CV_TERMCRIT_EPS // termination cirteria
139 |                                       );
140 | 
141 |         // train extreme random forest classifier (using training data)
142 | 
143 |         printf( "\nUsing training database: %s\n\n", argv[1]);
144 |         CvERTrees* rtree = new CvERTrees;
145 | 
146 |         rtree->train(training_data, CV_ROW_SAMPLE, training_classifications,
147 |                      Mat(), Mat(), var_type, Mat(), params);
148 | 
149 |         // perform classifier testing and report results
150 | 
151 |         Mat test_sample;
152 |         int correct_class = 0;
153 |         int wrong_class = 0;
154 |         int false_positives [NUMBER_OF_CLASSES] = {0,0,0,0,0,0,0,0,0,0};
155 | 
156 |         printf( "\nUsing testing database: %s\n\n", argv[2]);
157 | 
158 |         for (int tsample = 0; tsample < NUMBER_OF_TESTING_SAMPLES; tsample++)
159 |         {
160 | 
161 |             // extract a row from the testing matrix
162 | 
163 |             test_sample = testing_data.row(tsample);
164 | 
165 |             // run random forest prediction
166 | 
167 |             result = rtree->predict(test_sample, Mat());
168 | 
169 |             printf("Testing Sample %i -> class result (digit %d)\n", tsample, (int) result);
170 | 
171 |             // if the prediction and the (true) testing classification are the same
172 |             // (N.B. openCV uses a floating point decision tree implementation!)
173 | 
174 |             if (fabs(result - testing_classifications.at<float>(tsample, 0))
175 |                     >= FLT_EPSILON)
176 |             {
177 |                 // if they differ more than floating point error => wrong class
178 | 
179 |                 wrong_class++;
180 | 
181 |                 false_positives[(int) result]++;
182 | 
183 |             }
184 |             else
185 |             {
186 | 
187 |                 // otherwise correct
188 | 
189 |                 correct_class++;
190 |             }
191 |         }
192 | 
193 |         printf( "\nResults on the testing database: %s\n"
194 |                 "\tCorrect classification: %d (%g%%)\n"
195 |                 "\tWrong classifications: %d (%g%%)\n",
196 |                 argv[2],
197 |                 correct_class, (double) correct_class*100/NUMBER_OF_TESTING_SAMPLES,
198 |                 wrong_class, (double) wrong_class*100/NUMBER_OF_TESTING_SAMPLES);
199 | 
200 |         for (int i = 0; i < NUMBER_OF_CLASSES; i++)
201 |         {
202 |             printf( "\tClass (digit %d) false postives 	%d (%g%%)\n", i,
203 |                     false_positives[i],
204 |                     (double) false_positives[i]*100/NUMBER_OF_TESTING_SAMPLES);
205 |         }
206 | 
207 | 
208 |         // all matrix memory free by destructors
209 | 
210 | 
211 |         // all OK : main returns 0
212 | 
213 |         return 0;
214 |     }
215 | 
216 |     // not OK : main returns -1
217 | 
218 |     return -1;
219 | }
220 | /******************************************************************************/
221 | 


--------------------------------------------------------------------------------
/opticaldigits_ex/knn.cpp:
--------------------------------------------------------------------------------
  1 | // Example : weighted knn digit classification
  2 | // usage: prog training_data_file testing_data_file
  3 | 
  4 | // For use with test / training datasets : opticaldigits_ex
  5 | 
  6 | // Copyright (c) 2013 Toby Breckon, toby.breckon@durham.ac.uk
  7 | // School of Engineering and Computing Sciences, Durham University
  8 | // License : LGPL - http://www.gnu.org/licenses/lgpl.html
  9 | 
 10 | #include "opencv2/core/core_c.h"
 11 | #include "opencv2/ml/ml.hpp"
 12 | using namespace cv;            // OpenCV API is in the C++ "cv" namespace
 13 | 
 14 | #include <cstdio>
 15 | using namespace std;
 16 | 
 17 | /******************************************************************************/
 18 | // global definitions
 19 | 
 20 | #define NUMBER_OF_TRAINING_SAMPLES 3823
 21 | #define ATTRIBUTES_PER_SAMPLE 64
 22 | #define NUMBER_OF_TESTING_SAMPLES 1797
 23 | 
 24 | #define NUMBER_OF_CLASSES 10 // digits 0->9
 25 | 
 26 | // "self load" data from CSV file in Mat() objects
 27 | // filename = file to load
 28 | // data = training or testing attributes (1 sample per row)
 29 | // responses =  training or testing classes (1 sample per row)
 30 | // n_samples = number of samples in the set
 31 | 
 32 | int read_data_from_csv(const char* filename, Mat &data, Mat &responses, int n_samples );
 33 | 
 34 | /******************************************************************************/
 35 | 
 36 | int main( int argc, char** argv )
 37 | {
 38 |     // define data set objects
 39 | 
 40 |         Mat training_data;
 41 |         Mat training_responses;
 42 | 
 43 |         Mat testing_data;
 44 |         Mat testing_responses;
 45 | 
 46 |     // load training and testing data sets (either from command line or *.{test|train} files
 47 | 
 48 |     if (((argc > 1) && (!(read_data_from_csv(argv[1],
 49 |                           training_data, training_responses, NUMBER_OF_TRAINING_SAMPLES))
 50 |                     && !(read_data_from_csv(argv[2],
 51 |                           testing_data, testing_responses, NUMBER_OF_TESTING_SAMPLES))))
 52 |         ||            (!(read_data_from_csv("optdigits.train",
 53 |                           training_data, training_responses, NUMBER_OF_TRAINING_SAMPLES))
 54 |                     && !(read_data_from_csv("optdigits.test",
 55 |                           testing_data, testing_responses, NUMBER_OF_TESTING_SAMPLES)))
 56 |         )
 57 |     {
 58 | 
 59 |         CvKNearest knn; // knn classifier object
 60 | 
 61 |         // train kNN classifier (using training data)
 62 | 
 63 |         knn.train(training_data, training_responses, Mat(), false, 32, false);
 64 | 
 65 |         // perform classifier testing and report results
 66 | 
 67 |         Mat test_sample;
 68 |         int correct_class = 0;
 69 |         int wrong_class = 0;
 70 |         Mat false_positives = Mat::zeros(NUMBER_OF_CLASSES, 1, CV_32S);
 71 |         float result;
 72 | 
 73 |         // for each test example i the test set
 74 | 
 75 |         for (int tsample = 0; tsample < testing_data.rows; tsample++)
 76 |         {
 77 | 
 78 |             // extract a row from the testing matrix
 79 | 
 80 |             test_sample = testing_data.row(tsample);
 81 | 
 82 |             // run kNN classificaation (for k = 7)
 83 | 
 84 |             result = knn.find_nearest(test_sample, 7);
 85 | 
 86 |             printf("Test Example %i -> class result (digit %i)\n",
 87 |                     tsample, ((int) result));
 88 | 
 89 |             // if the prediction and the (true) testing classification are the same
 90 |             // (within the bounds of floating point error for cross-platfom safety)
 91 | 
 92 |             if (fabs(result - testing_responses.at<float>(tsample, 0))
 93 |                 >= FLT_EPSILON)
 94 |             {
 95 |                 // if they differ more than floating point error => wrong class
 96 | 
 97 |                 wrong_class++;
 98 |                 false_positives.at<int>((int) result, 0)++;
 99 | 
100 |             } else {
101 | 
102 |                 // otherwise correct
103 | 
104 |                 correct_class++;
105 |             }
106 |         }
107 | 
108 |         printf( "\nResults on the testing database: %s\n"
109 |                 "\tCorrect classification: %d (%g%%)\n"
110 |                 "\tWrong classification: %d (%g%%)\n",
111 |                 (argc > 1) ? argv[2] : "optdigits.test",
112 |                 correct_class, (double) correct_class*100/testing_data.rows,
113 |                 wrong_class, (double) wrong_class*100/testing_data.rows);
114 | 
115 |         for (unsigned int c = 0; c < NUMBER_OF_CLASSES; c++)
116 |         {
117 |             printf( "\tClass (digit %i) false positives 	%d (%g%%)\n", c,
118 |                     false_positives.at<int>(c,0),
119 |                     (((double) false_positives.at<int>(c,0))*100)
120 |                                                     /testing_data.rows);
121 |         }
122 | 
123 |         // on MS Windows wait to exit prompt
124 |         #ifdef WIN32
125 |             getchar();
126 |         #endif // WIN32
127 | 
128 |         // all OK : main returns 0
129 | 
130 |         return 0;
131 |     }
132 | 
133 |     // not OK : main returns -1
134 | 
135 |     printf("usage: %s filename.train filename.test\n", argv[0]);
136 |     printf("Failed to load training and testing data from specified files\n");
137 |     return -1;
138 | }
139 | /******************************************************************************/
140 | 
141 | // loads the sample database from file (which is a CSV text file)
142 | 
143 | int read_data_from_csv(const char* filename, Mat &data, Mat &responses, int n_samples )
144 | {
145 |     data = Mat(n_samples, ATTRIBUTES_PER_SAMPLE, CV_32FC1);
146 |     responses = Mat(n_samples, 1, CV_32FC1);
147 | 
148 |     float tmp;
149 | 
150 |     // if we can't read the input file then return 0
151 |     FILE* f = fopen( filename, "r" );
152 |     if( !f )
153 |     {
154 |         printf("ERROR: cannot read file %s\n",  filename);
155 |         return 1; // all not OK
156 |     }
157 | 
158 |     // for each sample in the file
159 | 
160 |     for(int line = 0; line < n_samples; line++)
161 |     {
162 | 
163 |         // for each attribute on the line in the file
164 | 
165 |         for(int attribute = 0; attribute < (ATTRIBUTES_PER_SAMPLE + 1); attribute++)
166 |         {
167 |             if (attribute < ATTRIBUTES_PER_SAMPLE)
168 |             {
169 | 
170 |                 // first 64 elements (0-63) in each line are the attributes
171 | 
172 |                 fscanf(f, "%f,", &tmp);
173 |                 data.at<float>(line, attribute) = tmp;
174 |                 // printf("%f,", data.at<float>(line, attribute));
175 | 
176 |             }
177 |             else if (attribute == ATTRIBUTES_PER_SAMPLE)
178 |             {
179 | 
180 |                 // attribute 65 is the class label {0 ... 9}
181 | 
182 |                 fscanf(f, "%f,", &tmp);
183 |                 responses.at<float>(line, 0) = tmp;
184 |                 // printf("%f\n", classes.at<float>(line, 0));
185 | 
186 |             }
187 |         }
188 |     }
189 | 
190 |     fclose(f);
191 | 
192 |     return 0; // all OK
193 | }
194 | 
195 | /******************************************************************************/
196 | 


--------------------------------------------------------------------------------
/opticaldigits_ex/knn_weighted.cpp:
--------------------------------------------------------------------------------
  1 | // Example : weighted knn digit classification
  2 | // usage: prog training_data_file testing_data_file
  3 | 
  4 | // For use with test / training datasets : opticaldigits_ex
  5 | 
  6 | // Copyright (c) 2013 Toby Breckon, toby.breckon@durham.ac.uk
  7 | // School of Engineering and Computing Sciences, Durham University
  8 | // License : LGPL - http://www.gnu.org/licenses/lgpl.html
  9 | 
 10 | #include "opencv2/core/core_c.h"
 11 | #include "opencv2/ml/ml.hpp"
 12 | using namespace cv;            // OpenCV API is in the C++ "cv" namespace
 13 | 
 14 | #include <cstdio>
 15 | using namespace std;
 16 | 
 17 | /******************************************************************************/
 18 | // global definitions
 19 | 
 20 | #define NUMBER_OF_CLASSES 10 // digits 0->9
 21 | 
 22 | /******************************************************************************/
 23 | 
 24 | int main( int argc, char** argv )
 25 | {
 26 |     // define data loading objects
 27 | 
 28 |     CvMLData training_loader;
 29 |     CvMLData testing_loader;
 30 | 
 31 |     // load training and testing data sets (either from command line or *.{test|train} files
 32 | 
 33 |     if (((argc > 1) && (!(training_loader.read_csv(argv[1]))
 34 |                     && !(testing_loader.read_csv(argv[2]))))
 35 |         ||            (!(training_loader.read_csv("optdigits.train"))
 36 |                     && !(testing_loader.read_csv("optdigits.test")))
 37 |         )
 38 |     {
 39 | 
 40 |         CvKNearest knn; // knn classifier object
 41 | 
 42 |         // retrieve data from data loaders
 43 | 
 44 |         Mat training_data =
 45 |         (Mat(training_loader.get_values())).colRange(0,64); // 0->63 = attributes
 46 | 
 47 |         training_loader.set_response_idx(64); // 65th value is the classification
 48 |         Mat training_responses = training_loader.get_responses();
 49 | 
 50 |         Mat testing_data =
 51 |         (Mat((testing_loader.get_values())).colRange(0,64)); // 0->63 = attributes
 52 | 
 53 |         testing_loader.set_response_idx(64); // 65th value is the classification
 54 |         Mat testing_responses = testing_loader.get_responses();
 55 | 
 56 |         // train kNN classifier (using training data)
 57 | 
 58 |         knn.train(training_data, training_responses, Mat(), false, 32, false);
 59 | 
 60 |         // perform classifier testing and report results
 61 | 
 62 |         Mat test_sample;
 63 |         int correct_class = 0;
 64 |         int wrong_class = 0;
 65 |         Mat false_positives = Mat::zeros(NUMBER_OF_CLASSES, 1, CV_32S);
 66 |         Mat neighbourResponses, dists, results, weighted_results;
 67 |         double minVal, maxVal; // dummy variables for using minMaxLoc()
 68 |         Point result_class_location;
 69 |         int result_class; // resulting class with highest weighted knn score
 70 | 
 71 |         // for each test example i the test set
 72 | 
 73 |         for (int tsample = 0; tsample < testing_data.rows; tsample++)
 74 |         {
 75 | 
 76 |             // extract a row from the testing matrix
 77 | 
 78 |             test_sample = testing_data.row(tsample);
 79 | 
 80 |             // zero weighted results on each test iteration
 81 | 
 82 |             weighted_results = Mat::zeros(NUMBER_OF_CLASSES, 1, CV_32F);
 83 | 
 84 |             // run kNN classification (for k = 7)
 85 | 
 86 |             knn.find_nearest(test_sample, 7, results, neighbourResponses, dists);
 87 | 
 88 |             // perform weighted sum for all the classes that occur in the responses
 89 |             // from the k nearest neighbours based on distance from query sample
 90 | 
 91 |             for(int i=0; i < neighbourResponses.cols; i++)
 92 |             {
 93 |                 weighted_results.at<float>((int) neighbourResponses.at<float>(0,i), 0) += 1.0 / pow((dists.at<float>(0,i)),2.0);
 94 |             }
 95 | 
 96 |             // find the class with the maximum weighted sum (as the maximal y co-ordinate
 97 |             // of the resulting weighted_results matrix
 98 | 
 99 |             minMaxLoc(weighted_results, &minVal, &maxVal, 0, &result_class_location);
100 |             result_class = result_class_location.y; // resulting class is in col location
101 | 
102 |             printf("Test Example %i -> class result (digit %i)\n",
103 |                     tsample, ((int) result_class));
104 | 
105 |             // if the prediction and the (true) testing classification are the same
106 |             // (within the bounds of floating point error for cross-platfom safety)
107 | 
108 |             if (fabs(((float) result_class) - testing_responses.at<float>(tsample, 0))
109 |                 >= FLT_EPSILON)
110 |             {
111 |                 // if they differ more than floating point error => wrong class
112 | 
113 |                 wrong_class++;
114 |                 false_positives.at<int>(result_class, 0)++;
115 | 
116 |             } else {
117 | 
118 |                 // otherwise correct
119 | 
120 |                 correct_class++;
121 |             }
122 |         }
123 | 
124 |         printf( "\nResults on the testing database: %s\n"
125 |                 "\tCorrect classification: %d (%g%%)\n"
126 |                 "\tWrong classifications: %d (%g%%)\n",
127 |                 (argc > 1) ? argv[2] : "optdigits.test",
128 |                 correct_class, (double) correct_class*100/testing_data.rows,
129 |                 wrong_class, (double) wrong_class*100/testing_data.rows);
130 | 
131 |         for (unsigned int c = 0; c < NUMBER_OF_CLASSES; c++)
132 |         {
133 |             printf( "\tClass (digit %i) false positives 	%d (%g%%)\n", c,
134 |                     false_positives.at<int>(c,0),
135 |                     (((double) false_positives.at<int>(c,0))*100)
136 |                                                     /testing_data.rows);
137 |         }
138 | 
139 |         // on MS Windows wait to exit prompt
140 |         #ifdef WIN32
141 |             getchar();
142 |         #endif // WIN32
143 | 
144 |         // all OK : main returns 0
145 | 
146 |         return 0;
147 |     }
148 | 
149 |     // not OK : main returns -1
150 | 
151 |     printf("usage: %s filename.train filename.test\n", argv[0]);
152 |     printf("Failed to load training and testing data from specified files\n");
153 |     return -1;
154 | }
155 | /******************************************************************************/
156 | 


--------------------------------------------------------------------------------
/opticaldigits_ex/neuralnetwork.cpp:
--------------------------------------------------------------------------------
  1 | // Example : neural network learning
  2 | // usage: prog training_data_file testing_data_file
  3 | 
  4 | // For use with test / training datasets : optical_ex
  5 | 
  6 | // Author : Toby Breckon, toby.breckon@cranfield.ac.uk
  7 | 
  8 | // Copyright (c) 2010 School of Engineering, Cranfield University
  9 | // License : LGPL - http://www.gnu.org/licenses/lgpl.html
 10 | 
 11 | #include <cv.h>       // opencv general include file
 12 | #include <ml.h>		  // opencv machine learning include file
 13 | 
 14 | using namespace cv; // OpenCV API is in the C++ "cv" namespace
 15 | 
 16 | #include <stdio.h>
 17 | 
 18 | /******************************************************************************/
 19 | 
 20 | // global definitions (for speed and ease of use)
 21 | 
 22 | #define NUMBER_OF_TRAINING_SAMPLES 3823
 23 | #define ATTRIBUTES_PER_SAMPLE 64
 24 | #define NUMBER_OF_TESTING_SAMPLES 1797
 25 | 
 26 | #define NUMBER_OF_CLASSES 10
 27 | 
 28 | // N.B. classes are integer handwritten digits in range 0-9
 29 | 
 30 | /******************************************************************************/
 31 | 
 32 | // loads the sample database from file (which is a CSV text file)
 33 | 
 34 | int read_data_from_csv(const char* filename, Mat data, Mat classes,
 35 |                        int n_samples )
 36 | {
 37 |     float tmp;
 38 | 
 39 |     // if we can't read the input file then return 0
 40 |     FILE* f = fopen( filename, "r" );
 41 |     if( !f )
 42 |     {
 43 |         printf("ERROR: cannot read file %s\n",  filename);
 44 |         return 0; // all not OK
 45 |     }
 46 | 
 47 |     // for each sample in the file
 48 | 
 49 |     for(int line = 0; line < n_samples; line++)
 50 |     {
 51 | 
 52 |         // for each attribute on the line in the file
 53 | 
 54 |         for(int attribute = 0; attribute < (ATTRIBUTES_PER_SAMPLE + 1); attribute++)
 55 |         {
 56 |             if (attribute < 64)
 57 |             {
 58 | 
 59 |                 // first 64 elements (0-63) in each line are the attributes
 60 | 
 61 |                 fscanf(f, "%f,", &tmp);
 62 |                 data.at<float>(line, attribute) = tmp;
 63 |                 // printf("%f,", data.at<float>(line, attribute));
 64 | 
 65 |             }
 66 |             else if (attribute == 64)
 67 |             {
 68 | 
 69 |                 // attribute 65 is the class label {0 ... 9}
 70 | 
 71 |                 fscanf(f, "%f,", &tmp);
 72 |                 classes.at<float>(line, (int) tmp) = 1.0;
 73 |                 // printf("%f\n", classes.at<float>(line, 0));
 74 | 
 75 |             }
 76 |         }
 77 |     }
 78 | 
 79 |     fclose(f);
 80 | 
 81 |     return 1; // all OK
 82 | }
 83 | 
 84 | /******************************************************************************/
 85 | 
 86 | int main( int argc, char** argv )
 87 | {
 88 |     // lets just check the version first
 89 | 
 90 |     printf ("OpenCV version %s (%d.%d.%d)\n",
 91 |             CV_VERSION,
 92 |             CV_MAJOR_VERSION, CV_MINOR_VERSION, CV_SUBMINOR_VERSION);
 93 | 
 94 |     // define training data storage matrices (one for attribute examples, one
 95 |     // for classifications)
 96 | 
 97 |     Mat training_data = Mat(NUMBER_OF_TRAINING_SAMPLES, ATTRIBUTES_PER_SAMPLE, CV_32FC1);
 98 |     Mat training_classifications = Mat(NUMBER_OF_TRAINING_SAMPLES, NUMBER_OF_CLASSES, CV_32FC1);
 99 | 
100 |     // define testing data storage matrices
101 | 
102 |     Mat testing_data = Mat(NUMBER_OF_TESTING_SAMPLES, ATTRIBUTES_PER_SAMPLE, CV_32FC1);
103 |     Mat testing_classifications = Mat::zeros(NUMBER_OF_TESTING_SAMPLES, NUMBER_OF_CLASSES, CV_32FC1);
104 | 
105 |     // define classification output vector
106 | 
107 |     Mat classificationResult = Mat(1, NUMBER_OF_CLASSES, CV_32FC1);
108 |     Point max_loc = Point(0,0);
109 | 
110 |     // load training and testing data sets
111 | 
112 |     if (read_data_from_csv(argv[1], training_data, training_classifications, NUMBER_OF_TRAINING_SAMPLES) &&
113 |             read_data_from_csv(argv[2], testing_data, testing_classifications, NUMBER_OF_TESTING_SAMPLES))
114 |     {
115 |         // define the parameters for the neural network (MLP)
116 | 
117 |         // set the network to be 3 layer 64->10->10
118 |         // - one input node per attribute in a sample
119 |         // - 10 hidden nodes
120 |         // - one output node per class
121 | 
122 |         // note that the OpenCV neural network (MLP) implementation does not
123 |         // support categorical variables explicitly.
124 |         // So, instead of the output class label, we will use
125 |         // a binary vector of {0,0 ... 1,0,0} components (one element by class)
126 |         // for training and therefore, MLP will give us a vector of "probabilities"
127 |         // at the prediction stage - the highest probability can be accepted
128 |         // as the "winning" class label output by the network
129 | 
130 |         int layers_d[] = { ATTRIBUTES_PER_SAMPLE, 10,  NUMBER_OF_CLASSES};
131 |         Mat layers = Mat(1,3,CV_32SC1);
132 |         layers.at<int>(0,0) = layers_d[0];
133 |         layers.at<int>(0,1) = layers_d[1];
134 |         layers.at<int>(0,2) = layers_d[2];
135 | 
136 |         // create the network using a sigmoid function with alpha and beta
137 |         // parameters 0.6 and 1 specified respectively (refer to manual)
138 | 
139 |         CvANN_MLP* nnetwork = new CvANN_MLP;
140 |         nnetwork->create(layers, CvANN_MLP::SIGMOID_SYM, 0.6, 1);
141 | 
142 |         // set the training parameters
143 | 
144 |         CvANN_MLP_TrainParams params = CvANN_MLP_TrainParams(
145 | 
146 |                                            // terminate the training after either 1000
147 |                                            // iterations or a very small change in the
148 |                                            // network wieghts below the specified value
149 | 
150 |                                            cvTermCriteria(CV_TERMCRIT_ITER+CV_TERMCRIT_EPS, 10000, 0.000001),
151 | 
152 |                                            // use backpropogation for training
153 | 
154 |                                            CvANN_MLP_TrainParams::BACKPROP,
155 | 
156 |                                            // co-efficents for backpropogation training
157 |                                            // (refer to manual)
158 | 
159 |                                            0.1,
160 |                                            0.1);
161 | 
162 |         // train the neural network (using training data)
163 | 
164 |         printf( "\nUsing training database: %s\n", argv[1]);
165 | 
166 |         int iterations = nnetwork->train(training_data, training_classifications, Mat(), Mat(), params);
167 | 
168 |         printf( "Training iterations: %i\n\n", iterations);
169 | 
170 |         // perform classifier testing and report results
171 | 
172 |         Mat test_sample;
173 |         int correct_class = 0;
174 |         int wrong_class = 0;
175 |         int false_positives [NUMBER_OF_CLASSES] = {0,0,0,0,0,0,0,0,0,0};
176 | 
177 |         printf( "\nUsing testing database: %s\n\n", argv[2]);
178 | 
179 |         for (int tsample = 0; tsample < NUMBER_OF_TESTING_SAMPLES; tsample++)
180 |         {
181 | 
182 |             // extract a row from the testing matrix
183 | 
184 |             test_sample = testing_data.row(tsample);
185 | 
186 |             // run neural network prediction
187 | 
188 |             nnetwork->predict(test_sample, classificationResult);
189 | 
190 |             // The NN gives out a vector of probabilities for each class
191 |             // We take the class with the highest "probability"
192 |             // for simplicity (but we really should also check separation
193 |             // of the different "probabilities" in this vector - what if
194 |             // two classes have very similar values ?)
195 | 
196 |             minMaxLoc(classificationResult, 0, 0, 0, &max_loc);
197 | 
198 |             printf("Testing Sample %i -> class result (digit %d)\n", tsample, max_loc.x);
199 | 
200 |             // if the corresponding location in the testing classifications
201 |             // is not "1" (i.e. this is the correct class) then record this
202 | 
203 |             if (!(testing_classifications.at<float>(tsample, max_loc.x)))
204 |             {
205 |                 // if they differ more than floating point error => wrong class
206 | 
207 |                 wrong_class++;
208 | 
209 |                 false_positives[(int) max_loc.x]++;
210 | 
211 |             }
212 |             else
213 |             {
214 | 
215 |                 // otherwise correct
216 | 
217 |                 correct_class++;
218 |             }
219 |         }
220 | 
221 |         printf( "\nResults on the testing database: %s\n"
222 |                 "\tCorrect classification: %d (%g%%)\n"
223 |                 "\tWrong classifications: %d (%g%%)\n",
224 |                 argv[2],
225 |                 correct_class, (double) correct_class*100/NUMBER_OF_TESTING_SAMPLES,
226 |                 wrong_class, (double) wrong_class*100/NUMBER_OF_TESTING_SAMPLES);
227 | 
228 |         for (int i = 0; i < NUMBER_OF_CLASSES; i++)
229 |         {
230 |             printf( "\tClass (digit %d) false postives 	%d (%g%%)\n", i,
231 |                     false_positives[i],
232 |                     (double) false_positives[i]*100/NUMBER_OF_TESTING_SAMPLES);
233 |         }
234 | 
235 |         // all OK : main returns 0
236 | 
237 |         return 0;
238 |     }
239 | 
240 |     // not OK : main returns -1
241 | 
242 |     return -1;
243 | }
244 | /******************************************************************************/
245 | 


--------------------------------------------------------------------------------
/opticaldigits_ex/normalbayes.cpp:
--------------------------------------------------------------------------------
  1 | // Example : normal / naive bayesian learning
  2 | // usage: prog training_data_file testing_data_file
  3 | 
  4 | // For use with test / training datasets  opticaldigits_ex
  5 | 
  6 | // N.B. *** This bayesian Fifier assumes that the attribute (or feature)
  7 | // vectors for each class are normally distributed and independent ***
  8 | // - see OpenCV manual
  9 | 
 10 | // "It’s "naïve" because it assumes that all the features (attributes) are
 11 | // independent from one another even though this is seldom the case
 12 | // (e.g., finding one eye usually implies that another eye is lurking nearby).
 13 | // Zhang discusses possible reasons for the sometimes surprisingly good
 14 | // performance of this classifier [Zhang04]." - Learning OpenCV [Bradski 2009].
 15 | 
 16 | // Author : Toby Breckon, toby.breckon@cranfield.ac.uk
 17 | 
 18 | // Copyright (c) 2013 School of Engineering, Cranfield University
 19 | // License : LGPL - http://www.gnu.org/licenses/lgpl.html
 20 | 
 21 | #include <cv.h>       // opencv general include file
 22 | #include <ml.h>		  // opencv machine learning include file
 23 | 
 24 | using namespace cv; // OpenCV API is in the C++ "cv" namespace
 25 | 
 26 | #include <stdio.h>
 27 | 
 28 | /******************************************************************************/
 29 | 
 30 | // global definitions (for speed and ease of use)
 31 | 
 32 | #define NUMBER_OF_TRAINING_SAMPLES 3823
 33 | #define ATTRIBUTES_PER_SAMPLE 64
 34 | #define NUMBER_OF_TESTING_SAMPLES 1797
 35 | 
 36 | #define NUMBER_OF_CLASSES 10
 37 | 
 38 | // N.B. classes are integer handwritten digits in range 0-9
 39 | 
 40 | /******************************************************************************/
 41 | 
 42 | // loads the sample database from file (which is a CSV text file)
 43 | 
 44 | int read_data_from_csv(const char* filename, Mat data, Mat classes,
 45 |                        int n_samples )
 46 | {
 47 |     float tmp;
 48 | 
 49 |     // if we can't read the input file then return 0
 50 |     FILE* f = fopen( filename, "r" );
 51 |     if( !f )
 52 |     {
 53 |         printf("ERROR: cannot read file %s\n",  filename);
 54 |         return 0; // all not OK
 55 |     }
 56 | 
 57 |     // for each sample in the file
 58 | 
 59 |     for(int line = 0; line < n_samples; line++)
 60 |     {
 61 | 
 62 |         // for each attribute on the line in the file
 63 | 
 64 |         for(int attribute = 0; attribute < (ATTRIBUTES_PER_SAMPLE + 1); attribute++)
 65 |         {
 66 |             if (attribute < 64)
 67 |             {
 68 | 
 69 |                 // first 64 elements (0-63) in each line are the attributes
 70 | 
 71 |                 fscanf(f, "%f,", &tmp);
 72 |                 data.at<float>(line, attribute) = tmp;
 73 |                 // printf("%f,", data.at<float>(line, attribute));
 74 | 
 75 |             }
 76 |             else if (attribute == 64)
 77 |             {
 78 | 
 79 |                 // attribute 65 is the class label {0 ... 9}
 80 | 
 81 |                 fscanf(f, "%f,", &tmp);
 82 |                 classes.at<float>(line, 0) = tmp;
 83 |                 // printf("%f\n", classes.at<float>(line, 0));
 84 | 
 85 |             }
 86 |         }
 87 |     }
 88 | 
 89 |     fclose(f);
 90 | 
 91 |     return 1; // all OK
 92 | }
 93 | 
 94 | /******************************************************************************/
 95 | 
 96 | int main( int argc, char** argv )
 97 | {
 98 |     // lets just check the version first
 99 | 
100 |     printf ("OpenCV version %s (%d.%d.%d)\n",
101 |             CV_VERSION,
102 |             CV_MAJOR_VERSION, CV_MINOR_VERSION, CV_SUBMINOR_VERSION);
103 | 
104 |     // define training data storage matrices (one for attribute examples, one
105 |     // for classifications)
106 | 
107 |     Mat training_data = Mat(NUMBER_OF_TRAINING_SAMPLES, ATTRIBUTES_PER_SAMPLE, CV_32FC1);
108 |     Mat training_classifications = Mat(NUMBER_OF_TRAINING_SAMPLES, 1, CV_32FC1);
109 | 
110 |     //define testing data storage matrices
111 | 
112 |     Mat testing_data = Mat(NUMBER_OF_TESTING_SAMPLES, ATTRIBUTES_PER_SAMPLE, CV_32FC1);
113 |     Mat testing_classifications = Mat(NUMBER_OF_TESTING_SAMPLES, 1, CV_32FC1);
114 | 
115 | 
116 |     // load training and testing data sets
117 | 
118 |     if (read_data_from_csv(argv[1], training_data, training_classifications, NUMBER_OF_TRAINING_SAMPLES) &&
119 |             read_data_from_csv(argv[2], testing_data, testing_classifications, NUMBER_OF_TESTING_SAMPLES))
120 |     {
121 | 
122 |         // train bayesian classifier (using training data)
123 | 
124 |         printf( "\nUsing training database: %s\n\n", argv[1]);
125 |         CvNormalBayesClassifier *bayes = new CvNormalBayesClassifier;
126 | 
127 |         bayes->train(training_data, training_classifications, Mat(), Mat(), false);
128 | 
129 |         // perform classifier testing and report results
130 | 
131 |         Mat test_sample;
132 |         int correct_class = 0;
133 |         int wrong_class = 0;
134 |         int false_positives [NUMBER_OF_CLASSES];
135 |         float result;
136 | 
137 |         // zero the false positive counters in a simple loop
138 | 
139 |         for (int i = 0; i < NUMBER_OF_CLASSES; i++)
140 |         {
141 |             false_positives[i] = 0;
142 |         }
143 | 
144 |         printf( "\nUsing testing database: %s\n\n", argv[2]);
145 | 
146 |         for (int tsample = 0; tsample < NUMBER_OF_TESTING_SAMPLES; tsample++)
147 |         {
148 | 
149 |             // extract a row from the testing matrix
150 | 
151 |             test_sample = testing_data.row(tsample);
152 | 
153 |             // run decision tree prediction
154 | 
155 |             result = bayes->predict(test_sample);
156 | 
157 |             printf("Testing Sample %i -> class result (character %i)\n", tsample,
158 |                    (int) result);
159 | 
160 |             // if the prediction and the (true) testing classification are the same
161 |             // (N.B. openCV uses a floating point decision tree implementation!)
162 | 
163 |             if (fabs(result - testing_classifications.at<float>(tsample, 0))
164 |                     >= FLT_EPSILON)
165 |             {
166 |                 // if they differ more than floating point error => wrong class
167 | 
168 |                 wrong_class++;
169 | 
170 |                 false_positives[((int) result)]++;
171 | 
172 |             }
173 |             else
174 |             {
175 | 
176 |                 // otherwise correct
177 | 
178 |                 correct_class++;
179 |             }
180 |         }
181 |         printf( "\nResults on the testing database: %s\n"
182 |                 "\tCorrect classification: %d (%g%%)\n"
183 |                 "\tWrong classifications: %d (%g%%)\n",
184 |                 argv[2],
185 |                 correct_class, (double) correct_class*100/NUMBER_OF_TESTING_SAMPLES,
186 |                 wrong_class, (double) wrong_class*100/NUMBER_OF_TESTING_SAMPLES);
187 | 
188 |         for (int i = 0; i < NUMBER_OF_CLASSES; i++)
189 |         {
190 |             printf( "\tClass (digit %d) false postives 	%d (%g%%)\n", i,
191 |                     false_positives[i],
192 |                     (double) false_positives[i]*100/NUMBER_OF_TESTING_SAMPLES);
193 |         }
194 | 
195 | 
196 |         // all matrix memory free by destructors
197 | 
198 | 
199 |         // all OK : main returns 0
200 | 
201 |         return 0;
202 |     }
203 | 
204 |     // not OK : main returns -1
205 | 
206 |     return -1;
207 | }
208 | /******************************************************************************/
209 | 


--------------------------------------------------------------------------------
/opticaldigits_ex/optdigits.names:
--------------------------------------------------------------------------------
 1 | 
 2 | 1. Title of Database: Optical Recognition of Handwritten Digits
 3 | 
 4 | 2. Source:
 5 | 	E. Alpaydin, C. Kaynak
 6 | 	Department of Computer Engineering
 7 | 	Bogazici University, 80815 Istanbul Turkey
 8 | 	alpaydin@boun.edu.tr
 9 | 	July 1998
10 | 
11 | 3. Past Usage:
12 | 	C. Kaynak (1995) Methods of Combining Multiple Classifiers and Their
13 | 	Applications to Handwritten Digit Recognition, 
14 | 	MSc Thesis, Institute of Graduate Studies in Science and 
15 | 	Engineering, Bogazici University.
16 | 
17 | 	E. Alpaydin, C. Kaynak (1998) Cascading Classifiers, Kybernetika,
18 | 	to appear. ftp://ftp.icsi.berkeley.edu/pub/ai/ethem/kyb.ps.Z
19 | 
20 | 4. Relevant Information:
21 | 	We used preprocessing programs made available by NIST to extract
22 | 	normalized bitmaps of handwritten digits from a preprinted form. From
23 | 	a total of 43 people, 30 contributed to the training set and different
24 | 	13 to the test set. 32x32 bitmaps are divided into nonoverlapping 
25 | 	blocks of 4x4 and the number of on pixels are counted in each block.
26 | 	This generates an input matrix of 8x8 where each element is an 
27 | 	integer in the range 0..16. This reduces dimensionality and gives 
28 | 	invariance to small distortions.
29 | 
30 | 	For info on NIST preprocessing routines, see 
31 | 	M. D. Garris, J. L. Blue, G. T. Candela, D. L. Dimmick, J. Geist, 
32 | 	P. J. Grother, S. A. Janet, and C. L. Wilson, NIST Form-Based 
33 | 	Handprint Recognition System, NISTIR 5469, 1994.
34 | 
35 | 5. Number of Instances
36 | 	optdigits.tra	Training	3823
37 | 	optdigits.tes	Testing		1797
38 | 	
39 | 	The way we used the dataset was to use half of training for 
40 | 	actual training, one-fourth for validation and one-fourth
41 | 	for writer-dependent testing. The test set was used for 
42 | 	writer-independent testing and is the actual quality measure.
43 | 
44 | 6. Number of Attributes
45 | 	64 input+1 class attribute
46 | 
47 | 7. For Each Attribute:
48 | 	All input attributes are integers in the range 0..16.
49 | 	The last attribute is the class code 0..9
50 | 
51 | 8. Missing Attribute Values
52 | 	None
53 | 
54 | 9. Class Distribution
55 | 	Class:	No of examples in training set
56 | 	0:  376
57 | 	1:  389
58 | 	2:  380
59 | 	3:  389
60 | 	4:  387
61 | 	5:  376
62 | 	6:  377
63 | 	7:  387
64 | 	8:  380
65 | 	9:  382
66 | 
67 | 	Class: No of examples in testing set
68 | 	0:  178
69 | 	1:  182
70 | 	2:  177
71 | 	3:  183
72 | 	4:  181
73 | 	5:  182
74 | 	6:  181
75 | 	7:  179
76 | 	8:  174
77 | 	9:  180
78 | 
79 | Accuracy on the testing set with k-nn 
80 | using Euclidean distance as the metric
81 | 
82 |  k =  1   : 98.00
83 |  k =  2   : 97.38
84 |  k =  3   : 97.83
85 |  k =  4   : 97.61
86 |  k =  5   : 97.89
87 |  k =  6   : 97.77
88 |  k =  7   : 97.66
89 |  k =  8   : 97.66
90 |  k =  9   : 97.72
91 |  k = 10   : 97.55
92 |  k = 11   : 97.89
93 | 
94 | 


--------------------------------------------------------------------------------
/opticaldigits_ex/randomforest.cpp:
--------------------------------------------------------------------------------
  1 | // Example : random forest (tree) learning
  2 | // usage: prog training_data_file testing_data_file
  3 | 
  4 | // For use with test / training datasets : opticaldigits_ex
  5 | 
  6 | // Author : Toby Breckon, toby.breckon@cranfield.ac.uk
  7 | 
  8 | // Copyright (c) 2011 School of Engineering, Cranfield University
  9 | // License : LGPL - http://www.gnu.org/licenses/lgpl.html
 10 | 
 11 | #include <cv.h>       // opencv general include file
 12 | #include <ml.h>		  // opencv machine learning include file
 13 | 
 14 | using namespace cv; // OpenCV API is in the C++ "cv" namespace
 15 | 
 16 | #include <stdio.h>
 17 | 
 18 | /******************************************************************************/
 19 | // global definitions (for speed and ease of use)
 20 | 
 21 | #define NUMBER_OF_TRAINING_SAMPLES 3823
 22 | #define ATTRIBUTES_PER_SAMPLE 64
 23 | #define NUMBER_OF_TESTING_SAMPLES 1797
 24 | 
 25 | #define NUMBER_OF_CLASSES 10
 26 | 
 27 | // N.B. classes are integer handwritten digits in range 0-9
 28 | 
 29 | /******************************************************************************/
 30 | 
 31 | // loads the sample database from file (which is a CSV text file)
 32 | 
 33 | int read_data_from_csv(const char* filename, Mat data, Mat classes,
 34 |                        int n_samples )
 35 | {
 36 |     float tmp;
 37 | 
 38 |     // if we can't read the input file then return 0
 39 |     FILE* f = fopen( filename, "r" );
 40 |     if( !f )
 41 |     {
 42 |         printf("ERROR: cannot read file %s\n",  filename);
 43 |         return 0; // all not OK
 44 |     }
 45 | 
 46 |     // for each sample in the file
 47 | 
 48 |     for(int line = 0; line < n_samples; line++)
 49 |     {
 50 | 
 51 |         // for each attribute on the line in the file
 52 | 
 53 |         for(int attribute = 0; attribute < (ATTRIBUTES_PER_SAMPLE + 1); attribute++)
 54 |         {
 55 |             if (attribute < 64)
 56 |             {
 57 | 
 58 |                 // first 64 elements (0-63) in each line are the attributes
 59 | 
 60 |                 fscanf(f, "%f,", &tmp);
 61 |                 data.at<float>(line, attribute) = tmp;
 62 |                 // printf("%f,", data.at<float>(line, attribute));
 63 | 
 64 |             }
 65 |             else if (attribute == 64)
 66 |             {
 67 | 
 68 |                 // attribute 65 is the class label {0 ... 9}
 69 | 
 70 |                 fscanf(f, "%f,", &tmp);
 71 |                 classes.at<float>(line, 0) = tmp;
 72 |                 // printf("%f\n", classes.at<float>(line, 0));
 73 | 
 74 |             }
 75 |         }
 76 |     }
 77 | 
 78 |     fclose(f);
 79 | 
 80 |     return 1; // all OK
 81 | }
 82 | 
 83 | /******************************************************************************/
 84 | 
 85 | int main( int argc, char** argv )
 86 | {
 87 |     // lets just check the version first
 88 | 
 89 |     printf ("OpenCV version %s (%d.%d.%d)\n",
 90 |             CV_VERSION,
 91 |             CV_MAJOR_VERSION, CV_MINOR_VERSION, CV_SUBMINOR_VERSION);
 92 | 
 93 |     // define training data storage matrices (one for attribute examples, one
 94 |     // for classifications)
 95 | 
 96 |     Mat training_data = Mat(NUMBER_OF_TRAINING_SAMPLES, ATTRIBUTES_PER_SAMPLE, CV_32FC1);
 97 |     Mat training_classifications = Mat(NUMBER_OF_TRAINING_SAMPLES, 1, CV_32FC1);
 98 | 
 99 |     //define testing data storage matrices
100 | 
101 |     Mat testing_data = Mat(NUMBER_OF_TESTING_SAMPLES, ATTRIBUTES_PER_SAMPLE, CV_32FC1);
102 |     Mat testing_classifications = Mat(NUMBER_OF_TESTING_SAMPLES, 1, CV_32FC1);
103 | 
104 |     // define all the attributes as numerical
105 |     // alternatives are CV_VAR_CATEGORICAL or CV_VAR_ORDERED(=CV_VAR_NUMERICAL)
106 |     // that can be assigned on a per attribute basis
107 | 
108 |     Mat var_type = Mat(ATTRIBUTES_PER_SAMPLE + 1, 1, CV_8U );
109 |     var_type.setTo(Scalar(CV_VAR_NUMERICAL) ); // all inputs are numerical
110 | 
111 |     // this is a classification problem (i.e. predict a discrete number of class
112 |     // outputs) so reset the last (+1) output var_type element to CV_VAR_CATEGORICAL
113 | 
114 |     var_type.at<uchar>(ATTRIBUTES_PER_SAMPLE, 0) = CV_VAR_CATEGORICAL;
115 | 
116 |     double result; // value returned from a prediction
117 | 
118 |     // load training and testing data sets
119 | 
120 |     if (read_data_from_csv(argv[1], training_data, training_classifications, NUMBER_OF_TRAINING_SAMPLES) &&
121 |             read_data_from_csv(argv[2], testing_data, testing_classifications, NUMBER_OF_TESTING_SAMPLES))
122 |     {
123 |         // define the parameters for training the random forest (trees)
124 | 
125 |         float priors[] = {1,1,1,1,1,1,1,1,1,1};  // weights of each classification for classes
126 |         // (all equal as equal samples of each digit)
127 | 
128 |         CvRTParams params = CvRTParams(25, // max depth
129 |                                        5, // min sample count
130 |                                        0, // regression accuracy: N/A here
131 |                                        false, // compute surrogate split, no missing data
132 |                                        15, // max number of categories (use sub-optimal algorithm for larger numbers)
133 |                                        priors, // the array of priors
134 |                                        false,  // calculate variable importance
135 |                                        4,       // number of variables randomly selected at node and used to find the best split(s).
136 |                                        100,	 // max number of trees in the forest
137 |                                        0.01f,				// forrest accuracy
138 |                                        CV_TERMCRIT_ITER |	CV_TERMCRIT_EPS // termination cirteria
139 |                                       );
140 | 
141 |         // train random forest classifier (using training data)
142 | 
143 |         printf( "\nUsing training database: %s\n\n", argv[1]);
144 |         CvRTrees* rtree = new CvRTrees;
145 | 
146 |         rtree->train(training_data, CV_ROW_SAMPLE, training_classifications,
147 |                      Mat(), Mat(), var_type, Mat(), params);
148 | 
149 |         // perform classifier testing and report results
150 | 
151 |         Mat test_sample;
152 |         int correct_class = 0;
153 |         int wrong_class = 0;
154 |         int false_positives [NUMBER_OF_CLASSES] = {0,0,0,0,0,0,0,0,0,0};
155 | 
156 |         printf( "\nUsing testing database: %s\n\n", argv[2]);
157 | 
158 |         for (int tsample = 0; tsample < NUMBER_OF_TESTING_SAMPLES; tsample++)
159 |         {
160 | 
161 |             // extract a row from the testing matrix
162 | 
163 |             test_sample = testing_data.row(tsample);
164 | 
165 |             // run random forest prediction
166 | 
167 |             result = rtree->predict(test_sample, Mat());
168 | 
169 |             printf("Testing Sample %i -> class result (digit %d)\n", tsample, (int) result);
170 | 
171 |             // if the prediction and the (true) testing classification are the same
172 |             // (N.B. openCV uses a floating point decision tree implementation!)
173 | 
174 |             if (fabs(result - testing_classifications.at<float>(tsample, 0))
175 |                     >= FLT_EPSILON)
176 |             {
177 |                 // if they differ more than floating point error => wrong class
178 | 
179 |                 wrong_class++;
180 | 
181 |                 false_positives[(int) result]++;
182 | 
183 |             }
184 |             else
185 |             {
186 | 
187 |                 // otherwise correct
188 | 
189 |                 correct_class++;
190 |             }
191 |         }
192 | 
193 |         printf( "\nResults on the testing database: %s\n"
194 |                 "\tCorrect classification: %d (%g%%)\n"
195 |                 "\tWrong classifications: %d (%g%%)\n",
196 |                 argv[2],
197 |                 correct_class, (double) correct_class*100/NUMBER_OF_TESTING_SAMPLES,
198 |                 wrong_class, (double) wrong_class*100/NUMBER_OF_TESTING_SAMPLES);
199 | 
200 |         for (int i = 0; i < NUMBER_OF_CLASSES; i++)
201 |         {
202 |             printf( "\tClass (digit %d) false postives 	%d (%g%%)\n", i,
203 |                     false_positives[i],
204 |                     (double) false_positives[i]*100/NUMBER_OF_TESTING_SAMPLES);
205 |         }
206 | 
207 | 
208 |         // all matrix memory free by destructors
209 | 
210 | 
211 |         // all OK : main returns 0
212 | 
213 |         return 0;
214 |     }
215 | 
216 |     // not OK : main returns -1
217 | 
218 |     return -1;
219 | }
220 | /******************************************************************************/
221 | 


--------------------------------------------------------------------------------
/opticaldigits_ex/svm.cpp:
--------------------------------------------------------------------------------
  1 | // Example : Support Vector Machine (SVM) learning
  2 | // usage: prog training_data_file testing_data_file
  3 | 
  4 | // For use with test / training datasets : opticaldigits_ex
  5 | 
  6 | // Author : Toby Breckon, toby.breckon@cranfield.ac.uk
  7 | // Version : 0.2
  8 | 
  9 | // Copyright (c) 2013 School of Engineering, Cranfield University
 10 | // License : LGPL - http://www.gnu.org/licenses/lgpl.html
 11 | 
 12 | #include <cv.h>       // opencv general include file
 13 | #include <ml.h>		  // opencv machine learning include file
 14 | 
 15 | using namespace cv; // OpenCV API is in the C++ "cv" namespace
 16 | 
 17 | #include <stdio.h>
 18 | 
 19 | /******************************************************************************/
 20 | 
 21 | // use SVM "grid search" for kernel parameters
 22 | 
 23 | #define USE_OPENCV_GRID_SEARCH_AUTOTRAIN 1  // set to 0 to set SVM parameters manually
 24 | 
 25 | /******************************************************************************/
 26 | // global definitions (for speed and ease of use)
 27 | 
 28 | #define NUMBER_OF_TRAINING_SAMPLES 3823
 29 | #define ATTRIBUTES_PER_SAMPLE 64
 30 | #define NUMBER_OF_TESTING_SAMPLES 1797
 31 | 
 32 | #define NUMBER_OF_CLASSES 10
 33 | 
 34 | // N.B. classes are integer handwritten digits in range 0-9
 35 | 
 36 | /******************************************************************************/
 37 | 
 38 | // loads the sample database from file (which is a CSV text file)
 39 | 
 40 | int read_data_from_csv(const char* filename, Mat data, Mat classes,
 41 |                        int n_samples )
 42 | {
 43 |     float tmp;
 44 | 
 45 |     // if we can't read the input file then return 0
 46 |     FILE* f = fopen( filename, "r" );
 47 |     if( !f )
 48 |     {
 49 |         printf("ERROR: cannot read file %s\n",  filename);
 50 |         return 0; // all not OK
 51 |     }
 52 | 
 53 |     // for each sample in the file
 54 | 
 55 |     for(int line = 0; line < n_samples; line++)
 56 |     {
 57 | 
 58 |         // for each attribute on the line in the file
 59 | 
 60 |         for(int attribute = 0; attribute < (ATTRIBUTES_PER_SAMPLE + 1); attribute++)
 61 |         {
 62 |             if (attribute < 64)
 63 |             {
 64 | 
 65 |                 // first 64 elements (0-63) in each line are the attributes
 66 | 
 67 |                 fscanf(f, "%f,", &tmp);
 68 |                 data.at<float>(line, attribute) = tmp;
 69 |                 // printf("%f,", data.at<float>(line, attribute));
 70 | 
 71 |             }
 72 |             else if (attribute == 64)
 73 |             {
 74 | 
 75 |                 // attribute 65 is the class label {0 ... 9}
 76 | 
 77 |                 fscanf(f, "%f,", &tmp);
 78 |                 classes.at<float>(line, 0) = tmp;
 79 |                 // printf("%f\n", classes.at<float>(line, 0));
 80 | 
 81 |             }
 82 |         }
 83 |     }
 84 | 
 85 |     fclose(f);
 86 | 
 87 |     return 1; // all OK
 88 | }
 89 | 
 90 | /******************************************************************************/
 91 | 
 92 | int main( int argc, char** argv )
 93 | {
 94 |     // lets just check the version first
 95 | 
 96 |     printf ("OpenCV version %s (%d.%d.%d)\n",
 97 |             CV_VERSION,
 98 |             CV_MAJOR_VERSION, CV_MINOR_VERSION, CV_SUBMINOR_VERSION);
 99 | 
100 |     // define training data storage matrices (one for attribute examples, one
101 |     // for classifications)
102 | 
103 |     Mat training_data = Mat(NUMBER_OF_TRAINING_SAMPLES, ATTRIBUTES_PER_SAMPLE, CV_32FC1);
104 |     Mat training_classifications = Mat(NUMBER_OF_TRAINING_SAMPLES, 1, CV_32FC1);
105 | 
106 |     //define testing data storage matrices
107 | 
108 |     Mat testing_data = Mat(NUMBER_OF_TESTING_SAMPLES, ATTRIBUTES_PER_SAMPLE, CV_32FC1);
109 |     Mat testing_classifications = Mat(NUMBER_OF_TESTING_SAMPLES, 1, CV_32FC1);
110 | 
111 |     // load training and testing data sets
112 | 
113 |     if (read_data_from_csv(argv[1], training_data, training_classifications, NUMBER_OF_TRAINING_SAMPLES) &&
114 |             read_data_from_csv(argv[2], testing_data, testing_classifications, NUMBER_OF_TESTING_SAMPLES))
115 |     {
116 |         // define the parameters for training the SVM (kernel + SVMtype type used for auto-training,
117 |         // other parameters for manual only)
118 | 
119 |         CvSVMParams params = CvSVMParams(
120 |                                  CvSVM::C_SVC,   // Type of SVM, here N classes (see manual)
121 |                                  CvSVM::LINEAR,  // kernel type (see manual)
122 |                                  0.0,			// kernel parameter (degree) for poly kernel only
123 |                                  0.0,			// kernel parameter (gamma) for poly/rbf kernel only
124 |                                  0.0,			// kernel parameter (coef0) for poly/sigmoid kernel only
125 |                                  10,				// SVM optimization parameter C
126 |                                  0,				// SVM optimization parameter nu (not used for N classe SVM)
127 |                                  0,				// SVM optimization parameter p (not used for N classe SVM)
128 |                                  NULL,			// class wieghts (or priors)
129 |                                  // Optional weights, assigned to particular classes.
130 |                                  // They are multiplied by C and thus affect the misclassification
131 |                                  // penalty for different classes. The larger weight, the larger penalty
132 |                                  // on misclassification of data from the corresponding class.
133 | 
134 |                                  // termination criteria for learning algorithm
135 | 
136 |                                  cvTermCriteria(CV_TERMCRIT_ITER+CV_TERMCRIT_EPS, 1000, 0.000001)
137 | 
138 |                              );
139 | 
140 |         // train SVM classifier (using training data)
141 | 
142 |         printf( "\nUsing training database: %s\n\n", argv[1]);
143 |         CvSVM* svm = new CvSVM;
144 | 
145 | #if (USE_OPENCV_GRID_SEARCH_AUTOTRAIN)
146 | 
147 |         // train using auto training parameter grid search if it is available
148 |         // N.B. this does not search kernel choice
149 | 
150 |         svm->train_auto(training_data, training_classifications, Mat(), Mat(), params, 10);
151 |         params = svm->get_params();
152 |         printf( "\nUsing optimal parameters degree %f, gamma %f, ceof0 %f\n\t C %f, nu %f, p %f\n",
153 |                 params.degree, params.gamma, params.coef0, params.C, params.nu, params.p);
154 | 
155 | #else
156 | 
157 |         // otherwise use regular training and use parameters manually specified above
158 | 
159 |         svm->train(training_data, training_classifications, Mat(), Mat(), params);
160 | 
161 | #endif
162 | 
163 |         // get the number of support vectors used to define the SVM decision boundary
164 | 
165 |         printf("Number of support vectors for trained SVM = %i\n", svm->get_support_vector_count());
166 | 
167 |         // perform classifier testing and report results
168 | 
169 |         Mat test_sample;
170 |         int correct_class = 0;
171 |         int wrong_class = 0;
172 |         int false_positives [NUMBER_OF_CLASSES] = {0,0,0,0,0,0,0,0,0,0};
173 |         float result;
174 | 
175 |         printf( "\nUsing testing database: %s\n\n", argv[2]);
176 | 
177 |         for (int tsample = 0; tsample < NUMBER_OF_TESTING_SAMPLES; tsample++)
178 |         {
179 | 
180 |             // extract a row from the testing matrix
181 | 
182 |             test_sample = testing_data.row(tsample);
183 | 
184 |             // run SVM classifier
185 | 
186 |             result = svm->predict(test_sample);
187 | 
188 |             printf("Testing Sample %i -> class result (digit %d)\n", tsample, (int) result);
189 | 
190 |             // if the prediction and the (true) testing classification are the same
191 |             // (N.B. openCV uses a floating point implementation!)
192 | 
193 |             if (fabs(result - testing_classifications.at<float>(tsample, 0))
194 |                     >= FLT_EPSILON)
195 |             {
196 |                 // if they differ more than floating point error => wrong class
197 | 
198 |                 wrong_class++;
199 |                 false_positives[(int) testing_classifications.at<float>(tsample, 0)]++;
200 | 
201 |             }
202 |             else
203 |             {
204 | 
205 |                 // otherwise correct
206 | 
207 |                 correct_class++;
208 |             }
209 |         }
210 | 
211 |         printf( "\nResults on the testing database: %s\n"
212 |                 "\tCorrect classification: %d (%g%%)\n"
213 |                 "\tWrong classifications: %d (%g%%)\n",
214 |                 argv[2],
215 |                 correct_class, (double) correct_class*100/NUMBER_OF_TESTING_SAMPLES,
216 |                 wrong_class, (double) wrong_class*100/NUMBER_OF_TESTING_SAMPLES);
217 | 
218 |         for (int i = 0; i < NUMBER_OF_CLASSES; i++)
219 |         {
220 |             printf( "\tClass (digit %d) false postives 	%d (%g%%)\n", i,
221 |                     false_positives[i],
222 |                     (double) false_positives[i]*100/NUMBER_OF_TESTING_SAMPLES);
223 |         }
224 | 
225 | 
226 |         // all OK : main returns 0
227 | 
228 |         return 0;
229 |     }
230 | 
231 |     // not OK : main returns -1
232 | 
233 |     return -1;
234 | }
235 | /******************************************************************************/
236 | 


--------------------------------------------------------------------------------
/other_ex/normalbayes.cpp:
--------------------------------------------------------------------------------
  1 | // Example : normal / naive bayesian learning
  2 | // usage: prog training_data_file testing_data_file
  3 | 
  4 | // For use with test / training datasets : other_ex/wdbc.{train|test}
  5 | 
  6 | // N.B. *** This bayesian classifier assumes that the attribute (or feature)
  7 | // vectors for each class are normally distributed and independent ***
  8 | // - see OpenCV manual
  9 | 
 10 | // "It’s "naïve" because it assumes that all the features (attributes) are
 11 | // independent from one another even though this is seldom the case
 12 | // (e.g., finding one eye usually implies that another eye is lurking nearby).
 13 | // Zhang discusses possible reasons for the sometimes surprisingly good
 14 | // performance of this classifier [Zhang04]." - Learning OpenCV [Bradski 2009].
 15 | 
 16 | // Author : Toby Breckon, toby.breckon@cranfield.ac.uk
 17 | 
 18 | // Copyright (c) 2011 School of Engineering, Cranfield University
 19 | // License : LGPL - http://www.gnu.org/licenses/lgpl.html
 20 | 
 21 | #include <cv.h>       // opencv general include file
 22 | #include <ml.h>		  // opencv machine learning include file
 23 | 
 24 | using namespace cv; // OpenCV API is in the C++ "cv" namespace
 25 | 
 26 | #include <stdio.h>
 27 | 
 28 | /******************************************************************************/
 29 | // global definitions (for speed and ease of use)
 30 | 
 31 | #define NUMBER_OF_TRAINING_SAMPLES 449
 32 | #define ATTRIBUTES_PER_SAMPLE 30  // not the first two as patient ID and class
 33 | #define NUMBER_OF_TESTING_SAMPLES 120
 34 | 
 35 | #define NUMBER_OF_CLASSES 2
 36 | 
 37 | static char CLASSES[2] = {'B', 'M'};  // class B = 0, class M = 1
 38 | 
 39 | /******************************************************************************/
 40 | 
 41 | // loads the sample database from file (which is a CSV text file)
 42 | 
 43 | int read_data_from_csv(const char* filename, Mat data, Mat classes, int n_samples )
 44 | {
 45 |     char tmpc;
 46 |     float tmpf;
 47 | 
 48 |     // if we can't read the input file then return 0
 49 |     FILE* f = fopen( filename, "r" );
 50 |     if( !f )
 51 |     {
 52 |         printf("ERROR: cannot read file %s\n",  filename);
 53 |         return 0; // all not OK
 54 |     }
 55 | 
 56 |     // for each sample in the file
 57 | 
 58 |     for(int line = 0; line < n_samples; line++)
 59 |     {
 60 | 
 61 |         // for each attribute on the line in the file
 62 | 
 63 |         for(int attribute = 0; attribute < (ATTRIBUTES_PER_SAMPLE + 2); attribute++)
 64 |         {
 65 |             if (attribute == 0)
 66 |             {
 67 |                 fscanf(f, "%f,", &tmpf);
 68 | 
 69 |                 // ignore attribute 0 (as it's the patient ID)
 70 | 
 71 |                 continue;
 72 |             }
 73 |             else if (attribute == 1)
 74 |             {
 75 | 
 76 |                 // attribute 2 (in the database) is the classification
 77 |                 // record 1 = M = malignant
 78 |                 // record 0 = B = benign
 79 | 
 80 |                 fscanf(f, "%c,", &tmpc);
 81 | 
 82 |                 switch(tmpc)
 83 |                 {
 84 |                 case 'M':
 85 |                     classes.at<float>(line, 0) = 1.0;
 86 |                     break;
 87 |                 case 'B':
 88 |                     classes.at<float>(line, 0) = 0.0;
 89 |                     break;
 90 |                 default:
 91 |                     printf("ERROR: unexpected class in file %s\n",  filename);
 92 |                     return 0; // all not OK
 93 |                 }
 94 | 
 95 |                 // printf("%c,", tmpc);
 96 |             }
 97 |             else
 98 |             {
 99 |                 fscanf(f, "%f,", &tmpf);
100 |                 data.at<float>(line, (attribute - 2)) = tmpf;
101 |                 //printf("%f,", tmpf);
102 |             }
103 |         }
104 |         fscanf(f, "\n");
105 |         //printf("\n");
106 |     }
107 | 
108 |     fclose(f);
109 | 
110 |     return 1; // all OK
111 | }
112 | 
113 | /******************************************************************************/
114 | 
115 | int main( int argc, char** argv )
116 | {
117 |     // lets just check the version first
118 | 
119 |     printf ("OpenCV version %s (%d.%d.%d)\n",
120 |             CV_VERSION,
121 |             CV_MAJOR_VERSION, CV_MINOR_VERSION, CV_SUBMINOR_VERSION);
122 | 
123 |     // define training data storage matrices (one for attribute examples, one
124 |     // for classifications)
125 | 
126 |     Mat training_data = Mat(NUMBER_OF_TRAINING_SAMPLES, ATTRIBUTES_PER_SAMPLE, CV_32FC1);
127 |     Mat training_classifications = Mat(NUMBER_OF_TRAINING_SAMPLES, 1, CV_32FC1);
128 | 
129 |     //define testing data storage matrices
130 | 
131 |     Mat testing_data = Mat(NUMBER_OF_TESTING_SAMPLES, ATTRIBUTES_PER_SAMPLE, CV_32FC1);
132 |     Mat testing_classifications = Mat(NUMBER_OF_TESTING_SAMPLES, 1, CV_32FC1);
133 | 
134 | 
135 |     // load training and testing data sets
136 | 
137 |     if (read_data_from_csv(argv[1], training_data, training_classifications, NUMBER_OF_TRAINING_SAMPLES) &&
138 |             read_data_from_csv(argv[2], testing_data, testing_classifications, NUMBER_OF_TESTING_SAMPLES))
139 |     {
140 | 
141 |         // train bayesian classifier (using training data)
142 | 
143 |         printf( "\nUsing training database: %s\n\n", argv[1]);
144 |         CvNormalBayesClassifier *bayes = new CvNormalBayesClassifier;
145 | 
146 |         bayes->train(training_data, training_classifications, Mat(), Mat(), false);
147 | 
148 |         // perform classifier testing and report results
149 | 
150 |         Mat test_sample;
151 |         int correct_class = 0;
152 |         int wrong_class = 0;
153 |         int false_positives [NUMBER_OF_CLASSES];
154 |         float result;
155 | 
156 |         // zero the false positive counters in a simple loop
157 | 
158 |         for (int i = 0; i < NUMBER_OF_CLASSES; i++)
159 |         {
160 |             false_positives[i] = 0;
161 |         }
162 | 
163 |         printf( "\nUsing testing database: %s\n\n", argv[2]);
164 | 
165 |         for (int tsample = 0; tsample < NUMBER_OF_TESTING_SAMPLES; tsample++)
166 |         {
167 | 
168 |             // extract a row from the testing matrix
169 | 
170 |             test_sample = testing_data.row(tsample);
171 | 
172 |             // run decision tree prediction
173 | 
174 |             result = bayes->predict(test_sample);
175 | 
176 |             printf("Testing Sample %i -> class result (character %c)\n", tsample,
177 |                    CLASSES[((int) result)]);
178 | 
179 |             // if the prediction and the (true) testing classification are the same
180 |             // (N.B. openCV uses a floating point decision tree implementation!)
181 | 
182 |             if (fabs(result - testing_classifications.at<float>(tsample, 0))
183 |                     >= FLT_EPSILON)
184 |             {
185 |                 // if they differ more than floating point error => wrong class
186 | 
187 |                 wrong_class++;
188 | 
189 |                 false_positives[((int) result)]++;
190 | 
191 |             }
192 |             else
193 |             {
194 | 
195 |                 // otherwise correct
196 | 
197 |                 correct_class++;
198 |             }
199 |         }
200 | 
201 |         printf( "\nResults on the testing database: %s\n"
202 |                 "\tCorrect classification: %d (%g%%)\n"
203 |                 "\tWrong classifications: %d (%g%%)\n",
204 |                 argv[2],
205 |                 correct_class, (double) correct_class*100/NUMBER_OF_TESTING_SAMPLES,
206 |                 wrong_class, (double) wrong_class*100/NUMBER_OF_TESTING_SAMPLES);
207 | 
208 |         for (int i = 0; i < NUMBER_OF_CLASSES; i++)
209 |         {
210 |             printf( "\tClass (character %c) false postives 	%d (%g%%)\n", CLASSES[i],
211 |                     false_positives[i],
212 |                     (double) false_positives[i]*100/NUMBER_OF_TESTING_SAMPLES);
213 |         }
214 | 
215 |         // all matrix memory free by destructors
216 | 
217 | 
218 |         // all OK : main returns 0
219 | 
220 |         return 0;
221 |     }
222 | 
223 |     // not OK : main returns -1
224 | 
225 |     return -1;
226 | }
227 | /******************************************************************************/
228 | 


--------------------------------------------------------------------------------
/other_ex/wdbc.names:
--------------------------------------------------------------------------------
  1 | 1. Title: Wisconsin Diagnostic Breast Cancer (WDBC)
  2 | 
  3 | 2. Source Information
  4 | 
  5 | a) Creators: 
  6 | 
  7 | 	Dr. William H. Wolberg, General Surgery Dept., University of
  8 | 	Wisconsin,  Clinical Sciences Center, Madison, WI 53792
  9 | 	wolberg@eagle.surgery.wisc.edu
 10 | 
 11 | 	W. Nick Street, Computer Sciences Dept., University of
 12 | 	Wisconsin, 1210 West Dayton St., Madison, WI 53706
 13 | 	street@cs.wisc.edu  608-262-6619
 14 | 
 15 | 	Olvi L. Mangasarian, Computer Sciences Dept., University of
 16 | 	Wisconsin, 1210 West Dayton St., Madison, WI 53706
 17 | 	olvi@cs.wisc.edu 
 18 | 
 19 | b) Donor: Nick Street
 20 | 
 21 | c) Date: November 1995
 22 | 
 23 | 3. Past Usage:
 24 | 
 25 | first usage:
 26 | 
 27 | 	W.N. Street, W.H. Wolberg and O.L. Mangasarian 
 28 | 	Nuclear feature extraction for breast tumor diagnosis.
 29 | 	IS&T/SPIE 1993 International Symposium on Electronic Imaging: Science
 30 | 	and Technology, volume 1905, pages 861-870, San Jose, CA, 1993.
 31 | 
 32 | OR literature:
 33 | 
 34 | 	O.L. Mangasarian, W.N. Street and W.H. Wolberg. 
 35 | 	Breast cancer diagnosis and prognosis via linear programming. 
 36 | 	Operations Research, 43(4), pages 570-577, July-August 1995.
 37 | 
 38 | Medical literature:
 39 | 
 40 | 	W.H. Wolberg, W.N. Street, and O.L. Mangasarian. 
 41 | 	Machine learning techniques to diagnose breast cancer from
 42 | 	fine-needle aspirates.  
 43 | 	Cancer Letters 77 (1994) 163-171.
 44 | 
 45 | 	W.H. Wolberg, W.N. Street, and O.L. Mangasarian. 
 46 | 	Image analysis and machine learning applied to breast cancer
 47 | 	diagnosis and prognosis.  
 48 | 	Analytical and Quantitative Cytology and Histology, Vol. 17
 49 | 	No. 2, pages 77-87, April 1995. 
 50 | 
 51 | 	W.H. Wolberg, W.N. Street, D.M. Heisey, and O.L. Mangasarian. 
 52 | 	Computerized breast cancer diagnosis and prognosis from fine
 53 | 	needle aspirates.  
 54 | 	Archives of Surgery 1995;130:511-516.
 55 | 
 56 | 	W.H. Wolberg, W.N. Street, D.M. Heisey, and O.L. Mangasarian. 
 57 | 	Computer-derived nuclear features distinguish malignant from
 58 | 	benign breast cytology.  
 59 | 	Human Pathology, 26:792--796, 1995.
 60 | 
 61 | See also:
 62 | 	http://www.cs.wisc.edu/~olvi/uwmp/mpml.html
 63 | 	http://www.cs.wisc.edu/~olvi/uwmp/cancer.html
 64 | 
 65 | Results:
 66 | 
 67 | 	- predicting field 2, diagnosis: B = benign, M = malignant
 68 | 	- sets are linearly separable using all 30 input features
 69 | 	- best predictive accuracy obtained using one separating plane
 70 | 		in the 3-D space of Worst Area, Worst Smoothness and
 71 | 		Mean Texture.  Estimated accuracy 97.5% using repeated
 72 | 		10-fold crossvalidations.  Classifier has correctly
 73 | 		diagnosed 176 consecutive new patients as of November
 74 | 		1995. 
 75 | 
 76 | 4. Relevant information
 77 | 
 78 | 	Features are computed from a digitized image of a fine needle
 79 | 	aspirate (FNA) of a breast mass.  They describe
 80 | 	characteristics of the cell nuclei present in the image.
 81 | 	A few of the images can be found at
 82 | 	http://www.cs.wisc.edu/~street/images/
 83 | 
 84 | 	Separating plane described above was obtained using
 85 | 	Multisurface Method-Tree (MSM-T) [K. P. Bennett, "Decision Tree
 86 | 	Construction Via Linear Programming." Proceedings of the 4th
 87 | 	Midwest Artificial Intelligence and Cognitive Science Society,
 88 | 	pp. 97-101, 1992], a classification method which uses linear
 89 | 	programming to construct a decision tree.  Relevant features
 90 | 	were selected using an exhaustive search in the space of 1-4
 91 | 	features and 1-3 separating planes.
 92 | 
 93 | 	The actual linear program used to obtain the separating plane
 94 | 	in the 3-dimensional space is that described in:
 95 | 	[K. P. Bennett and O. L. Mangasarian: "Robust Linear
 96 | 	Programming Discrimination of Two Linearly Inseparable Sets",
 97 | 	Optimization Methods and Software 1, 1992, 23-34].
 98 | 
 99 | 
100 | 	This database is also available through the UW CS ftp server:
101 | 
102 | 	ftp ftp.cs.wisc.edu
103 | 	cd math-prog/cpo-dataset/machine-learn/WDBC/
104 | 
105 | 5. Number of instances: 569 
106 | 
107 | 6. Number of attributes: 32 (ID, diagnosis, 30 real-valued input features)
108 | 
109 | 7. Attribute information
110 | 
111 | 1) ID number
112 | 2) Diagnosis (M = malignant, B = benign)
113 | 3-32)
114 | 
115 | Ten real-valued features are computed for each cell nucleus:
116 | 
117 | 	a) radius (mean of distances from center to points on the perimeter)
118 | 	b) texture (standard deviation of gray-scale values)
119 | 	c) perimeter
120 | 	d) area
121 | 	e) smoothness (local variation in radius lengths)
122 | 	f) compactness (perimeter^2 / area - 1.0)
123 | 	g) concavity (severity of concave portions of the contour)
124 | 	h) concave points (number of concave portions of the contour)
125 | 	i) symmetry 
126 | 	j) fractal dimension ("coastline approximation" - 1)
127 | 
128 | Several of the papers listed above contain detailed descriptions of
129 | how these features are computed. 
130 | 
131 | The mean, standard error, and "worst" or largest (mean of the three
132 | largest values) of these features were computed for each image,
133 | resulting in 30 features.  For instance, field 3 is Mean Radius, field
134 | 13 is Radius SE, field 23 is Worst Radius.
135 | 
136 | All feature values are recoded with four significant digits.
137 | 
138 | 8. Missing attribute values: none
139 | 
140 | 9. Class distribution: 357 benign, 212 malignant


--------------------------------------------------------------------------------
/speech_ex/decisiontree.cpp:
--------------------------------------------------------------------------------
  1 | // Example : decision tree learning
  2 | // usage: prog training_data_file testing_data_file
  3 | 
  4 | // For use with test / training datasets : speech_ex
  5 | 
  6 | // Author : Toby Breckon, toby.breckon@cranfield.ac.uk
  7 | 
  8 | // Copyright (c) 2011 School of Engineering, Cranfield University
  9 | // License : LGPL - http://www.gnu.org/licenses/lgpl.html
 10 | 
 11 | #include <cv.h>       // opencv general include file
 12 | #include <ml.h>		  // opencv machine learning include file
 13 | 
 14 | using namespace cv; // OpenCV API is in the C++ "cv" namespace
 15 | 
 16 | #include <stdio.h>
 17 | 
 18 | /******************************************************************************/
 19 | 
 20 | #define NUMBER_OF_TRAINING_SAMPLES 6238
 21 | #define ATTRIBUTES_PER_SAMPLE 617
 22 | #define NUMBER_OF_TESTING_SAMPLES 1559
 23 | 
 24 | #define NUMBER_OF_CLASSES 26
 25 | 
 26 | // N.B. classes are spoken alphabetric letters A-Z labelled 1 -> 26
 27 | 
 28 | /******************************************************************************/
 29 | 
 30 | // loads the sample database from file (which is a CSV text file)
 31 | 
 32 | int read_data_from_csv(const char* filename, Mat data, Mat classes, int n_samples )
 33 | {
 34 |     float tmp;
 35 | 
 36 |     // if we can't read the input file then return 0
 37 |     FILE* f = fopen( filename, "r" );
 38 |     if( !f )
 39 |     {
 40 |         printf("ERROR: cannot read file %s\n",  filename);
 41 |         return 0; // all not OK
 42 |     }
 43 | 
 44 |     // for each sample in the file
 45 | 
 46 |     for(int line = 0; line < n_samples; line++)
 47 |     {
 48 | 
 49 |         // for each attribute on the line in the file
 50 | 
 51 |         for(int attribute = 0; attribute < (ATTRIBUTES_PER_SAMPLE + 1); attribute++)
 52 |         {
 53 |             if (attribute < ATTRIBUTES_PER_SAMPLE)
 54 |             {
 55 | 
 56 |                 // first 617 elements (0-616) in each line are the attributes
 57 | 
 58 |                 fscanf(f, "%f,", &tmp);
 59 |                 data.at<float>(line, attribute) = tmp;
 60 | 
 61 | 
 62 |             }
 63 |             else if (attribute == ATTRIBUTES_PER_SAMPLE)
 64 |             {
 65 | 
 66 |                 // attribute 617 is the class label {1 ... 26} == {A-Z}
 67 | 
 68 |                 fscanf(f, "%f,", &tmp);
 69 |                 classes.at<float>(line, 0) = tmp;
 70 |             }
 71 |         }
 72 |     }
 73 | 
 74 |     fclose(f);
 75 | 
 76 |     return 1; // all OK
 77 | }
 78 | 
 79 | /******************************************************************************/
 80 | 
 81 | int main( int argc, char** argv )
 82 | {
 83 |     // lets just check the version first
 84 | 
 85 |     printf ("OpenCV version %s (%d.%d.%d)\n",
 86 |             CV_VERSION,
 87 |             CV_MAJOR_VERSION, CV_MINOR_VERSION, CV_SUBMINOR_VERSION);
 88 | 
 89 |     // define training data storage matrices (one for attribute examples, one
 90 |     // for classifications)
 91 | 
 92 |     Mat training_data = Mat(NUMBER_OF_TRAINING_SAMPLES, ATTRIBUTES_PER_SAMPLE, CV_32FC1);
 93 |     Mat training_classifications = Mat(NUMBER_OF_TRAINING_SAMPLES, 1, CV_32FC1);
 94 | 
 95 |     //define testing data storage matrices
 96 | 
 97 |     Mat testing_data = Mat(NUMBER_OF_TESTING_SAMPLES, ATTRIBUTES_PER_SAMPLE, CV_32FC1);
 98 |     Mat testing_classifications = Mat(NUMBER_OF_TESTING_SAMPLES, 1, CV_32FC1);
 99 | 
100 |     // define all the attributes as numerical
101 |     // alternatives are CV_VAR_CATEGORICAL or CV_VAR_ORDERED(=CV_VAR_NUMERICAL)
102 |     // that can be assigned on a per attribute basis
103 | 
104 |     Mat var_type = Mat(ATTRIBUTES_PER_SAMPLE + 1, 1, CV_8U );
105 |     var_type.setTo(Scalar(CV_VAR_NUMERICAL) ); // all inputs are numerical
106 | 
107 |     // this is a classification problem (i.e. predict a discrete number of class
108 |     // outputs) so reset the last (+1) output var_type element to CV_VAR_CATEGORICAL
109 | 
110 |     var_type.at<uchar>(ATTRIBUTES_PER_SAMPLE, 0) = CV_VAR_CATEGORICAL;
111 | 
112 |     CvDTreeNode* resultNode; // node returned from a prediction
113 | 
114 |     // load training and testing data sets
115 | 
116 |     if (read_data_from_csv(argv[1], training_data, training_classifications, NUMBER_OF_TRAINING_SAMPLES) &&
117 |             read_data_from_csv(argv[2], testing_data, testing_classifications, NUMBER_OF_TESTING_SAMPLES))
118 |     {
119 |         // define the parameters for training the decision tree
120 | 
121 |         float *priors = NULL;  // weights of each classification for classes
122 |         // (all equal as equal samples of each character)
123 | 
124 |         CvDTreeParams params = CvDTreeParams(25, // max depth
125 |                                              5, // min sample count
126 |                                              0, // regression accuracy: N/A here
127 |                                              false, // compute surrogate split, no missing data
128 |                                              15, // max number of categories (use sub-optimal algorithm for larger numbers)
129 |                                              15, // the number of cross-validation folds
130 |                                              false, // use 1SE rule => smaller tree
131 |                                              false, // throw away the pruned tree branches
132 |                                              priors // the array of priors
133 |                                             );
134 | 
135 | 
136 |         // train decision tree classifier (using training data)
137 | 
138 |         printf( "\nUsing training database: %s\n\n", argv[1]);
139 |         CvDTree* dtree = new CvDTree;
140 | 
141 |         dtree->train(training_data, CV_ROW_SAMPLE, training_classifications,
142 |                      Mat(), Mat(), var_type, Mat(), params);
143 | 
144 |         // perform classifier testing and report results
145 | 
146 |         Mat test_sample;
147 |         int correct_class = 0;
148 |         int wrong_class = 0;
149 |         int false_positives [NUMBER_OF_CLASSES];
150 |         char class_labels[NUMBER_OF_CLASSES];
151 | 
152 |         // zero the false positive counters in a simple loop
153 | 
154 |         for (int i = 0; i < NUMBER_OF_CLASSES; i++)
155 |         {
156 |             false_positives[i] = 0;
157 |             class_labels[i] = (char) 65 + i; // ASCII 65 = A
158 |         }
159 | 
160 |         printf( "\nUsing testing database: %s\n\n", argv[2]);
161 | 
162 |         for (int tsample = 0; tsample < NUMBER_OF_TESTING_SAMPLES; tsample++)
163 |         {
164 | 
165 |             // extract a row from the testing matrix
166 | 
167 |             test_sample = testing_data.row(tsample);
168 | 
169 |             // run decision tree prediction
170 | 
171 |             resultNode = dtree->predict(test_sample, Mat(), false);
172 | 
173 |             printf("Testing Sample %i -> class result (character %c)\n", tsample,
174 |                    class_labels[((int) (resultNode->value)) - 1]);
175 | 
176 |             // if the prediction and the (true) testing classification are the same
177 |             // (N.B. openCV uses a floating point decision tree implementation!)
178 | 
179 |             if (fabs(resultNode->value - testing_classifications.at<float>(tsample, 0))
180 |                     >= FLT_EPSILON)
181 |             {
182 |                 // if they differ more than floating point error => wrong class
183 | 
184 |                 wrong_class++;
185 | 
186 |                 false_positives[((int) (resultNode->value)) - 1]++;
187 | 
188 |             }
189 |             else
190 |             {
191 | 
192 |                 // otherwise correct
193 | 
194 |                 correct_class++;
195 |             }
196 |         }
197 | 
198 |         printf( "\nResults on the testing database: %s\n"
199 |                 "\tCorrect classification: %d (%g%%)\n"
200 |                 "\tWrong classifications: %d (%g%%)\n",
201 |                 argv[2],
202 |                 correct_class, (double) correct_class*100/NUMBER_OF_TESTING_SAMPLES,
203 |                 wrong_class, (double) wrong_class*100/NUMBER_OF_TESTING_SAMPLES);
204 | 
205 |         for (int i = 0; i < NUMBER_OF_CLASSES; i++)
206 |         {
207 |             printf( "\tClass (character %c) false postives 	%d (%g%%)\n", class_labels[i],
208 |                     false_positives[i],
209 |                     (double) false_positives[i]*100/NUMBER_OF_TESTING_SAMPLES);
210 |         }
211 | 
212 | 
213 |         // all matrix memory free by destructors
214 | 
215 | 
216 |         // all OK : main returns 0
217 | 
218 |         return 0;
219 |     }
220 | 
221 |     // not OK : main returns -1
222 | 
223 |     return -1;
224 | }
225 | /******************************************************************************/
226 | 


--------------------------------------------------------------------------------
/speech_ex/svm.cpp:
--------------------------------------------------------------------------------
  1 | // Example : Support Vector Machine (SVM) learning
  2 | // usage: prog training_data_file testing_data_file
  3 | 
  4 | // For use with test / training datasets : speech_ex
  5 | 
  6 | // Author : Toby Breckon, toby.breckon@cranfield.ac.uk
  7 | // Version : 0.2
  8 | 
  9 | // Copyright (c) 2011 School of Engineering, Cranfield University
 10 | // License : LGPL - http://www.gnu.org/licenses/lgpl.html
 11 | 
 12 | 
 13 | #include <cv.h>       // opencv general include file
 14 | #include <ml.h>		  // opencv machine learning include file
 15 | 
 16 | using namespace cv; // OpenCV API is in the C++ "cv" namespace
 17 | 
 18 | #include <stdio.h>
 19 | 
 20 | /******************************************************************************/
 21 | 
 22 | // use SVM "grid search" for kernel parameters
 23 | 
 24 | #define USE_OPENCV_GRID_SEARCH_AUTOTRAIN 1  // set to 0 to set SVM parameters manually
 25 | 
 26 | /******************************************************************************/
 27 | 
 28 | #define NUMBER_OF_TRAINING_SAMPLES 6238
 29 | #define ATTRIBUTES_PER_SAMPLE 617
 30 | #define NUMBER_OF_TESTING_SAMPLES 1559
 31 | 
 32 | #define NUMBER_OF_CLASSES 26
 33 | 
 34 | // N.B. classes are spoken alphabetric letters A-Z labelled 1 -> 26
 35 | 
 36 | /******************************************************************************/
 37 | 
 38 | // loads the sample database from file (which is a CSV text file)
 39 | 
 40 | 
 41 | int read_data_from_csv(const char* filename, Mat data, Mat classes, int n_samples )
 42 | {
 43 |     float tmp;
 44 | 
 45 |     // if we can't read the input file then return 0
 46 |     FILE* f = fopen( filename, "r" );
 47 |     if( !f )
 48 |     {
 49 |         printf("ERROR: cannot read file %s\n",  filename);
 50 |         return 0; // all not OK
 51 |     }
 52 | 
 53 |     // for each sample in the file
 54 | 
 55 |     for(int line = 0; line < n_samples; line++)
 56 |     {
 57 | 
 58 |         // for each attribute on the line in the file
 59 | 
 60 |         for(int attribute = 0; attribute < (ATTRIBUTES_PER_SAMPLE + 1); attribute++)
 61 |         {
 62 |             if (attribute < ATTRIBUTES_PER_SAMPLE)
 63 |             {
 64 | 
 65 |                 // first 617 elements (0-616) in each line are the attributes
 66 | 
 67 |                 fscanf(f, "%f,", &tmp);
 68 |                 data.at<float>(line, attribute) = tmp;
 69 | 
 70 | 
 71 |             }
 72 |             else if (attribute == ATTRIBUTES_PER_SAMPLE)
 73 |             {
 74 | 
 75 |                 // attribute 617 is the class label {1 ... 26} == {A-Z}
 76 | 
 77 |                 fscanf(f, "%f,", &tmp);
 78 |                 classes.at<float>(line, 0) = tmp;
 79 |             }
 80 |         }
 81 |     }
 82 | 
 83 |     fclose(f);
 84 | 
 85 |     return 1; // all OK
 86 | }
 87 | 
 88 | /******************************************************************************/
 89 | 
 90 | int main( int argc, char** argv )
 91 | {
 92 |     // lets just check the version first
 93 | 
 94 |     printf ("OpenCV version %s (%d.%d.%d)\n",
 95 |             CV_VERSION,
 96 |             CV_MAJOR_VERSION, CV_MINOR_VERSION, CV_SUBMINOR_VERSION);
 97 | 
 98 |     // define training data storage matrices (one for attribute examples, one
 99 |     // for classifications)
100 | 
101 |     Mat training_data = Mat(NUMBER_OF_TRAINING_SAMPLES, ATTRIBUTES_PER_SAMPLE, CV_32FC1);
102 |     Mat training_classifications = Mat(NUMBER_OF_TRAINING_SAMPLES, 1, CV_32FC1);
103 | 
104 |     //define testing data storage matrices
105 | 
106 |     Mat testing_data = Mat(NUMBER_OF_TESTING_SAMPLES, ATTRIBUTES_PER_SAMPLE, CV_32FC1);
107 |     Mat testing_classifications = Mat(NUMBER_OF_TESTING_SAMPLES, 1, CV_32FC1);
108 | 
109 |     // load training and testing data sets
110 | 
111 |     if (read_data_from_csv(argv[1], training_data, training_classifications, NUMBER_OF_TRAINING_SAMPLES) &&
112 |             read_data_from_csv(argv[2], testing_data, testing_classifications, NUMBER_OF_TESTING_SAMPLES))
113 |     {
114 |         // define the parameters for training the SVM (kernel + SVMtype type used for auto-training,
115 |         // other parameters for manual only)
116 | 
117 |         CvSVMParams params = CvSVMParams(
118 |                                  CvSVM::C_SVC,   // Type of SVM, here N classes (see manual)
119 |                                  CvSVM::LINEAR,  // kernel type (see manual)
120 |                                  0.0,			// kernel parameter (degree) for poly kernel only
121 |                                  0.0,			// kernel parameter (gamma) for poly/rbf kernel only
122 |                                  0.0,			// kernel parameter (coef0) for poly/sigmoid kernel only
123 |                                  10,				// SVM optimization parameter C
124 |                                  0,				// SVM optimization parameter nu (not used for N classe SVM)
125 |                                  0,				// SVM optimization parameter p (not used for N classe SVM)
126 |                                  NULL,			// class wieghts (or priors)
127 |                                  // Optional weights, assigned to particular classes.
128 |                                  // They are multiplied by C and thus affect the misclassification
129 |                                  // penalty for different classes. The larger weight, the larger penalty
130 |                                  // on misclassification of data from the corresponding class.
131 | 
132 |                                  // termination criteria for learning algorithm
133 | 
134 |                                  cvTermCriteria(CV_TERMCRIT_ITER+CV_TERMCRIT_EPS, 1000, 0.000001)
135 | 
136 |                              );
137 | 
138 |         // train SVM classifier (using training data)
139 | 
140 |         printf( "\nUsing training database: %s\n\n", argv[1]);
141 |         CvSVM* svm = new CvSVM;
142 | 
143 |         printf( "\nTraining the SVM (in progress) ..... ");
144 |         fflush(NULL);
145 | 
146 | #if (USE_OPENCV_GRID_SEARCH_AUTOTRAIN)
147 | 
148 |         printf( "(SVM 'grid search' => may take some time!)");
149 |         fflush(NULL);
150 | 
151 |         // train using auto training parameter grid search if it is available
152 |         // (i.e. OpenCV 2.x) with 10 fold cross valdiation
153 |         // N.B. this does not search kernel choice
154 | 
155 |         svm->train_auto(training_data, training_classifications,
156 |                         Mat(), Mat(), params, 10);
157 |         params = svm->get_params();
158 |         printf( "\nUsing optimal parameters degree %f, gamma %f, ceof0 %f\n\t C %f, nu %f, p %f\n Training ..",
159 |                 params.degree, params.gamma, params.coef0, params.C, params.nu, params.p);
160 | #else
161 |         // otherwise use regular training and use parameters manually specified above
162 | 
163 |         svm->train(training_data, training_classifications, Mat(), Mat(), params);
164 | 
165 | #endif
166 | 
167 |         printf( ".... Done\n");
168 | 
169 |         // get the number of support vectors used to define the SVM decision boundary
170 | 
171 |         printf("Number of support vectors for trained SVM = %i\n", svm->get_support_vector_count());
172 | 
173 |         // perform classifier testing and report results
174 | 
175 |         Mat test_sample;
176 |         int correct_class = 0;
177 |         int wrong_class = 0;
178 |         int false_positives [NUMBER_OF_CLASSES];
179 |         char class_labels[NUMBER_OF_CLASSES];
180 |         float result;
181 | 
182 |         // zero the false positive counters in a simple loop
183 | 
184 |         for (int i = 0; i < NUMBER_OF_CLASSES; i++)
185 |         {
186 |             false_positives[i] = 0;
187 |             class_labels[i] = (char) 65 + i; // ASCII 65 = A
188 |         }
189 | 
190 |         printf( "\nUsing testing database: %s\n\n", argv[2]);
191 | 
192 |         for (int tsample = 0; tsample < NUMBER_OF_TESTING_SAMPLES; tsample++)
193 |         {
194 | 
195 |             // extract a row from the testing matrix
196 | 
197 |             test_sample = testing_data.row(tsample);
198 | 
199 |             // run SVM classifier
200 | 
201 |             result = svm->predict(test_sample);
202 | 
203 |             // printf("Testing Sample %i -> class result (character %c)\n", tsample, class_labels[((int) result) - 1]);
204 | 
205 |             // if the prediction and the (true) testing classification are the same
206 |             // (N.B. openCV uses a floating point decision tree implementation!)
207 | 
208 |             if (fabs(result - testing_classifications.at<float>(tsample, 0))
209 |                     >= FLT_EPSILON)
210 |             {
211 |                 // if they differ more than floating point error => wrong class
212 | 
213 |                 wrong_class++;
214 | 
215 |                 false_positives[(int) (testing_classifications.at<float>(tsample, 0) - 1)]++;
216 | 
217 |             }
218 |             else
219 |             {
220 | 
221 |                 // otherwise correct
222 | 
223 |                 correct_class++;
224 |             }
225 |         }
226 | 
227 |         printf( "\nResults on the testing database: %s\n"
228 |                 "\tCorrect classification: %d (%g%%)\n"
229 |                 "\tWrong classifications: %d (%g%%)\n",
230 |                 argv[2],
231 |                 correct_class, (double) correct_class*100/NUMBER_OF_TESTING_SAMPLES,
232 |                 wrong_class, (double) wrong_class*100/NUMBER_OF_TESTING_SAMPLES);
233 | 
234 |         for (unsigned char i = 0; i < NUMBER_OF_CLASSES; i++)
235 |         {
236 |             printf( "\tClass (character %c) false postives 	%d (%g%%)\n",class_labels[(int) i],
237 |                     false_positives[(int) i],
238 |                     (double) false_positives[i]*100/NUMBER_OF_TESTING_SAMPLES);
239 |         }
240 | 
241 |         // all matrix memory free by destructors
242 | 
243 |         // all OK : main returns 0
244 | 
245 |         return 0;
246 |     }
247 | 
248 |     // not OK : main returns -1
249 | 
250 |     return -1;
251 | }
252 | /******************************************************************************/
253 | 


--------------------------------------------------------------------------------
/tools/dt_varimportance.cc:
--------------------------------------------------------------------------------
 1 | // Example : decision tree variable importance
 2 | // usage: prog tree.{yml|.xml}
 3 | 
 4 | // For use with any test / training datasets
 5 | 
 6 | // Author : Toby Breckon, toby.breckon@cranfield.ac.uk
 7 | 
 8 | // Copyright (c) 2011 School of Engineering, Cranfield University
 9 | // License : LGPL - http://www.gnu.org/licenses/lgpl.html
10 | 
11 | // Copyright (c) 2011 School of Engineering, Cranfield University
12 | // License : LGPL - http://www.gnu.org/licenses/lgpl.html
13 | 
14 | #include <cv.h>       // opencv general include file
15 | #include <ml.h>		  // opencv machine learning include file
16 | 
17 | using namespace cv; // OpenCV API is in the C++ "cv" namespace
18 | 
19 | #include <stdio.h>
20 | 
21 | /*****************************************************************************/
22 | 
23 | // prints out the relative importance of the variables (i.e. attributes) used
24 | // for decision tree classification
25 | 
26 | // Based on the mushroom.cpp example from OpenCV 1.0
27 | 
28 | int print_variable_importance(CvDTree* dtree)
29 | {
30 |     const Mat var_importance = dtree->get_var_importance();
31 | 
32 |     if( var_importance.empty() )
33 |     {
34 |         printf( "Error: Variable importance can not be retrieved\n" );
35 |         return -1;
36 |     }
37 | 
38 |     for(int i = 0; i < var_importance.cols*var_importance.rows; i++ )
39 |     {
40 |         double val = var_importance.at<double>(0,i);
41 |         printf( "var #%d", i );
42 |         printf( ": %g%%\n", val*100. );
43 |     }
44 | 
45 | 	return 1;
46 | }
47 | 
48 | /*****************************************************************************/
49 | 
50 | int main( int argc, char** argv )
51 | {
52 | 
53 | 	// check we have enough command line arguments
54 | 
55 | 	if (argc == 2)
56 | 	{
57 | 		// define a decision tree object
58 | 
59 | 		CvDTree* dtree = new CvDTree;
60 | 
61 | 		// load tree structure from XML file
62 | 
63 | 		dtree->load(argv[1]);
64 | 
65 | 		// extract (and display) variable importance information
66 | 
67 | 		if (print_variable_importance(dtree)){
68 | 			return 0; // all OK
69 | 		} else {
70 | 			return -1; // all not OK
71 | 		}
72 | 
73 |     } else {
74 | 
75 |     // not OK : main returns -1
76 | 
77 | 	printf("usage: %s decision_tree_filename.xml\n", argv[0]);
78 |     return -1;
79 | 
80 |     }
81 | }
82 | /******************************************************************************/
83 | 


--------------------------------------------------------------------------------
/tools/randomize.cc:
--------------------------------------------------------------------------------
 1 | // Example : randomize the lines in a specified input file
 2 | // (also removing any empty lines in the file - i.e. no chars apart from "\n")
 3 | 
 4 | // usage: prog input_file output_file
 5 | 
 6 | // Author : Toby Breckon, toby.breckon@cranfield.ac.uk
 7 | 
 8 | // Copyright (c) 2009 School of Engineering, Cranfield University
 9 | // License : LGPL - http://www.gnu.org/licenses/lgpl.html
10 | 
11 | /******************************************************************************/
12 | 
13 | #include <vector>
14 | #include <algorithm>
15 | #include <stdio.h>
16 | #include <stdlib.h>
17 | 
18 | using namespace std;
19 | 
20 | #define LINELENGTHMAX 5000 // all file lines less than 5000 chars
21 | 
22 | /******************************************************************************/
23 | 
24 | int main( int argc, char** argv )
25 | {
26 | 
27 | 	vector<char *> inputlines; 				// vector of input lines
28 | 	vector<char *>::iterator outline;		// iterator for above
29 | 	
30 | 	char * line = NULL;						// tmp pointer for line memory
31 | 	
32 | 	// open input file
33 | 	
34 | 	FILE* fi = fopen( argv[1], "r" );
35 | 	if( !fi ){
36 | 		printf("ERROR: cannot read input file %s\n",  argv[1]);
37 | 		return -1; // all not OK
38 | 	}
39 | 
40 | 	// open output file
41 | 	
42 | 	FILE* fw = fopen( argv[2], "w" );
43 | 	if( !fw ){
44 | 		printf("ERROR: cannot read output file %s\n",  argv[2]);
45 | 		return -1; // all not OK
46 | 	}
47 | 
48 | 	// read in all the lines of the file (allocating fresh memory for each)
49 | 	
50 | 	while (!feof(fi))
51 | 	{
52 | 		line = (char *) malloc(LINELENGTHMAX * sizeof(char));
53 | 		fscanf(fi, "%[^\n]\n", line);
54 | 		inputlines.push_back(line);
55 | 	}
56 | 
57 | 	// shuffle input file lines
58 | 
59 | 	// "This algorithm is described in section 3.4.2 of Knuth (D. E. Knuth, 
60 | 	// The Art of Computer Programming. Volume 2: Seminumerical Algorithms, 
61 | 	// second edition. Addison-Wesley, 1981). Knuth credits Moses and 
62 | 	// Oakford (1963) and Durstenfeld (1964)." 
63 | 	// - SGI STL manual, http://www.sgi.com/tech/stl/random_shuffle.html
64 | 
65 | 	random_shuffle(inputlines.begin(), inputlines.end());
66 | 
67 | 	// output all of the lines to output file
68 | 	
69 | 	for(outline = inputlines.begin(); outline < inputlines.end(); outline++)
70 | 	{
71 | 		fprintf(fw, "%s\n", *outline); 
72 | 		free((void *) *outline); // free memory also
73 | 	}
74 | 
75 | 	// close files
76 | 	
77 | 	fclose(fi);
78 | 	fclose(fw);
79 | 	
80 | 	return 1; // all OK
81 | }
82 | /******************************************************************************/
83 | 


--------------------------------------------------------------------------------
/tools/selectlines.cc:
--------------------------------------------------------------------------------
 1 | // Example : select a subset of lines in a specified input file
 2 | // between a specified min and max line numbers INCLUSIVE
 3 | // (also removing any empty lines in the file - i.e. no chars apart from "\n")
 4 | 
 5 | // usage: prog min max input_file output_file
 6 | // where min and max are integer line numbers from the input file (range 1 to N)
 7 | 
 8 | // Author : Toby Breckon, toby.breckon@cranfield.ac.uk
 9 | 
10 | // Copyright (c) 2009 School of Engineering, Cranfield University
11 | // License : LGPL - http://www.gnu.org/licenses/lgpl.html
12 | 
13 | /******************************************************************************/
14 | 
15 | #include <vector>
16 | #include <algorithm>
17 | #include <stdio.h>
18 | #include <stdlib.h>
19 | 
20 | using namespace std;
21 | 
22 | #define LINELENGTHMAX 5000 // all file lines less than 5000 chars
23 | 
24 | /******************************************************************************/
25 | 
26 | int main( int argc, char** argv )
27 | {
28 | 
29 | 	vector<char *> inputlines; 				// vector of input lines
30 | 	vector<char *>::iterator outline;		// iterator for above
31 | 	
32 | 	char * line = NULL;						// tmp pointer for line memory
33 | 
34 | 	// check we have the correct number of arguments
35 | 	
36 | 	if (argc < 5){
37 | 		printf("usage: %s min max input_file output_file\n", argv[0]);
38 | 		exit(0); 
39 | 	}
40 | 	
41 | 	// get min / max line numbers
42 | 	
43 | 	int minL = min(atoi(argv[1]), atoi(argv[2]));
44 | 	int maxL = max(atoi(argv[1]), atoi(argv[2]));
45 | 
46 | 	int lineN = 0;
47 | 			
48 | 	// open input file
49 | 	
50 | 	FILE* fi = fopen( argv[3], "r" );
51 | 	if( !fi ){
52 | 		printf("ERROR: cannot read input file %s\n",  argv[1]);
53 | 		return -1; // all not OK
54 | 	}
55 | 
56 | 	// open output file
57 | 	
58 | 	FILE* fw = fopen( argv[4], "w" );
59 | 	if( !fw ){
60 | 		printf("ERROR: cannot read output file %s\n",  argv[2]);
61 | 		return -1; // all not OK
62 | 	}
63 | 
64 | 	// read in all the lines of the file (allocating fresh memory for each)
65 | 	
66 | 	while (!feof(fi))
67 | 	{
68 | 		line = (char *) malloc(LINELENGTHMAX * sizeof(char));
69 | 		fscanf(fi, "%[^\n]\n", line);
70 | 		inputlines.push_back(line);
71 | 	}
72 | 
73 | 	// output seleted lines to output file
74 | 	
75 | 	for(outline = inputlines.begin(); outline < inputlines.end(); outline++)
76 | 	{
77 | 		if ((lineN >= minL) && (lineN <= maxL))
78 | 		{
79 | 			fprintf(fw, "%s\n", *outline); 
80 | 		}
81 | 		lineN++;
82 | 		
83 | 		free((void *) *outline); // free memory also
84 | 	}
85 | 
86 | 	// close files
87 | 	
88 | 	fclose(fi);
89 | 	fclose(fw);
90 | 	
91 | 	return 1; // all OK
92 | }
93 | /******************************************************************************/
94 | 


--------------------------------------------------------------------------------
/tools/typechecker.cc:
--------------------------------------------------------------------------------
 1 | // Example : check type sizes on your platform
 2 | // usage: prog
 3 | 
 4 | // taken from: http://home.att.net/~jackklein/c/inttypes.html
 5 | // (c) 2008 By Jack Klein.
 6 | 
 7 | /*****************************************************************************/
 8 | 
 9 | #include <stdio.h>
10 | #include <limits.h>
11 | 
12 | volatile int char_min = CHAR_MIN;
13 | 
14 | int main(void)
15 | {
16 |     printf("\n\n       Character Types\n");
17 |     printf("Number of bits in a character: %d\n",
18 |         CHAR_BIT);
19 |     printf("Size of character types is %d byte\n",
20 |         (int)sizeof(char));
21 |     printf("Signed char min: %d max: %d\n",
22 |         SCHAR_MIN, SCHAR_MAX);
23 |     printf("Unsigned char min: 0 max: %u\n",
24 |         (unsigned int)UCHAR_MAX);
25 | 
26 |     printf("Default char is ");
27 |     if (char_min < 0)
28 |         printf("signed\n");
29 |     else if (char_min == 0)
30 |         printf("unsigned\n");
31 |     else
32 |         printf("non-standard\n");
33 | 		printf("*** This is %d bit character representation\n",
34 |        		(int)sizeof(char) * 8);
35 | 	
36 |     printf("\n\n        Short Int Types\n");
37 |     printf("Size of short int types is %d bytes\n",
38 |         (int)sizeof(short));
39 |     printf("Signed short min: %d max: %d\n",
40 |         SHRT_MIN, SHRT_MAX);
41 |     printf("Unsigned short min: 0 max: %u\n",
42 |         (unsigned int)USHRT_MAX);
43 | 
44 |     printf("\n           Int Types\n");
45 |     printf("Size of int types is %d bytes\n",
46 |         (int)sizeof(int));
47 |     printf("Signed int min: %d max: %d\n",
48 |         INT_MIN, INT_MAX);
49 |     printf("Unsigned int min: 0 max: %u\n",
50 |         (unsigned int)UINT_MAX);
51 | 	printf("*** This is %d bit representation\n",
52 |        (int)sizeof(int) * 8);
53 | 
54 |     printf("\n        Long Int Types\n");
55 |     printf("Size of long int types is %d bytes\n",
56 |         (int)sizeof(long));
57 |     printf("Signed long min: %ld max: %ld\n",
58 |         LONG_MIN, LONG_MAX);
59 |     printf("Unsigned long min: 0 max: %lu\n",
60 |         ULONG_MAX);
61 | 
62 | 	// mild addition by Toby Breckon, toby.breckon@cranfield.ac.uk
63 | 	
64 | 	printf("\n\n        Float Types\n");
65 |     printf("Size of float types is %d bytes\n",
66 |         (int)sizeof(float));
67 | 	printf("*** This is %d bit representation\n",
68 |        (int)sizeof(float) * 8);
69 |  	printf("\n        Double Types\n");
70 |     printf("Size of float types is %d bytes\n\n",
71 |         (int)sizeof(double));
72 | 	
73 |     return 0;
74 | }
75 | 
76 | /*****************************************************************************/


--------------------------------------------------------------------------------