├── .gitignore ├── README.md ├── data ├── census.names ├── census.test ├── census.train ├── dna.names ├── dna.test ├── dna.train ├── ocr17.names ├── ocr17.test ├── ocr17.train ├── ocr49.names ├── ocr49.test └── ocr49.train ├── results ├── forest_census.txt ├── forest_census.txt-plot.png ├── forest_dna.txt ├── forest_dna.txt-plot.png ├── forest_ocr17.txt ├── forest_ocr17.txt-plot.png ├── forest_ocr49.txt └── forest_ocr49.txt-plot.png └── src ├── Adaboost.java ├── BaselineClassifier.java ├── BinaryDataSet.java ├── Classifier.java ├── DataSet.java ├── DecisionForest.java ├── DecisionTree.java ├── DiscreteDataSet.java ├── Makefile ├── MultiLayerNeuralNet.java ├── NumericDataSet.java ├── SingleLayerNeuralNet.java ├── TestForestSize.java ├── TestHarness.java ├── TestkNN.java ├── forestGraph.py └── kNN.java /.gitignore: -------------------------------------------------------------------------------- 1 | *.class 2 | bin/* 3 | data/*.testout 4 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | A series of standard Machine Learning (classification) algorithms implemented in Java. The repository includes: 2 | 3 | - AdaBoost 4 | - Decision Trees 5 | - Decision Forests 6 | - kNN 7 | - Perceptron Learners 8 | - Neural Networks 9 | 10 | Each of these implementations satisfy the Classifier interface. 11 | 12 | Additionally, BaselineClassifier.java provides a classifier that simply predicts every example to be the most frequently occuring classification in the data set, as a point of comparison for the other implementations. 13 | 14 | ## Data Sets 15 | 16 | DataSet.java allows users to read data from a data set as specified by the 'example' files in the data/ directory. Once parsed, users can easily extract information about the target data set using the DataSet class. 17 | 18 | TestHarness.java can be used to test any Classifier on any DataSet through the use of a holdout set. 19 | 20 | ## Contact 21 | 22 | For any questions, please reach out to David Dohan (ddohan@princeton.edu) or Charlie Marsh (crmarsh@princeton.edu). 23 | -------------------------------------------------------------------------------- /data/census.names: -------------------------------------------------------------------------------- 1 | >50K <=50K 2 | 3 | age numeric 4 | workclass Private Self-emp-not-inc Self-emp-inc Federal-gov Local-gov State-gov Without-pay Never-worked 5 | education Bachelors Some-college 11th HS-grad Prof-school Assoc-acdm Assoc-voc 9th 7th-8th 12th Masters 1st-4th 10th Doctorate 5th-6th Preschool 6 | education-num numeric 7 | marital-status Married-civ-spouse Divorced Never-married Separated Widowed Married-spouse-absent Married-AF-spouse 8 | occupation Tech-support Craft-repair Other-service Sales Exec-managerial Prof-specialty Handlers-cleaners Machine-op-inspct Adm-clerical Farming-fishing Transport-moving Priv-house-serv Protective-serv Armed-Forces 9 | relationship Wife Own-child Husband Not-in-family Other-relative Unmarried 10 | race White Asian-Pac-Islander Amer-Indian-Eskimo Other Black 11 | sex Female Male 12 | capital-gain numeric 13 | capital-loss numeric 14 | hours-per-week numeric 15 | native-country United-States Cambodia England Puerto-Rico Canada Germany Outlying-US(Guam-USVI-etc) India Japan Greece South China Cuba Iran Honduras Philippines Italy Poland Jamaica Vietnam Mexico Portugal Ireland France Dominican-Republic Laos Ecuador Taiwan Haiti Columbia Hungary Guatemala Nicaragua Scotland Thailand Yugoslavia El-Salvador Trinadad&Tobago Peru Hong Holand-Netherlands 16 | -------------------------------------------------------------------------------- /data/dna.names: -------------------------------------------------------------------------------- 1 | S N 2 | 3 | -30 A C G T 4 | -29 A C G T 5 | -28 A C G T 6 | -27 A C G T 7 | -26 A C G T 8 | -25 A C G T 9 | -24 A C G T 10 | -23 A C G T 11 | -22 A C G T 12 | -21 A C G T 13 | -20 A C G T 14 | -19 A C G T 15 | -18 A C G T 16 | -17 A C G T 17 | -16 A C G T 18 | -15 A C G T 19 | -14 A C G T 20 | -13 A C G T 21 | -12 A C G T 22 | -11 A C G T 23 | -10 A C G T 24 | -9 A C G T 25 | -8 A C G T 26 | -7 A C G T 27 | -6 A C G T 28 | -5 A C G T 29 | -4 A C G T 30 | -3 A C G T 31 | -2 A C G T 32 | -1 A C G T 33 | +1 A C G T 34 | +2 A C G T 35 | +3 A C G T 36 | +4 A C G T 37 | +5 A C G T 38 | +6 A C G T 39 | +7 A C G T 40 | +8 A C G T 41 | +9 A C G T 42 | +10 A C G T 43 | +11 A C G T 44 | +12 A C G T 45 | +13 A C G T 46 | +14 A C G T 47 | +15 A C G T 48 | +16 A C G T 49 | +17 A C G T 50 | +18 A C G T 51 | +19 A C G T 52 | +20 A C G T 53 | +21 A C G T 54 | +22 A C G T 55 | +23 A C G T 56 | +24 A C G T 57 | +25 A C G T 58 | +26 A C G T 59 | +27 A C G T 60 | +28 A C G T 61 | +29 A C G T 62 | +30 A C G T 63 | -------------------------------------------------------------------------------- /data/dna.train: -------------------------------------------------------------------------------- 1 | A G G G G G A A G C G T G G G T G G G G A A T G C C A C C T T A A G C T G G T G C C A G C A T A C A C C A T A C T T T A N 2 | G C A G G A T C C T A C A C C T T A C A C A T C A T A A A G G G A G A T G A T G G G A C T A G A G G A G T A A C T G G A N 3 | C C T G A C A C T A A G G A A A T G C T G A A G C T T T T G G T A A G T G T T T G C T G G A T T C C T A A A G T G G T A S 4 | T C A G C T T T G T T T G T T T T C T C T T T C T G A T A G A G T G C C C A C G A C C C T C C G G C T G T C C A C C T C S 5 | C T C C C T G A T G C C C T C A G A A T C T C C C C A C A G G C C G C C T G A T C T T T G A C A A C T T G A A G A A A T S 6 | G T A T A T G C A T T T G G A A T T G T T C T G T A T G A A T T G A T G A C T G G A C A G T T A C C T T A T T C A A A C N 7 | G T T T T A C C C A A T A A G G C A C C A C C C A C C T A T G A T A C T G T G C T A C A G A T G G A G T A T C T T G A C N 8 | T A C T G A T T T T C A T T T C T C T T T T T C T T C T A G A A T G T C T T G A T T G T G G A A G T A A G T T C A C A T S 9 | A G G C C C T G G C C C T T A C C C A G A C C T G G G C G G G T G A G T G C G G G G T C G G G A T G G A A A C G G C C T S 10 | G A C A A C T T C C C C A G A T A C C C C G T G G G C A A G T T C T T C C A A T A T G A C A C C T G G A A G C A G T C C N 11 | G G G C C G A T C T C A G C C C C T C C T C G C C C C C A G G C T C C C A C T C C T T G A A G T A T T T C C A C A C T T S 12 | C A G G A C C T C A C C A C G G A A A G C A A C C T G C T G G T A C G T G G G C C A T G A C T G C C A T C T T G G C T T S 13 | A A T A T T A C C T G A G G T A A G G T A A G G C A A A G A G T G G G A G G C A G G G A G T C C A G T T C A G G G A C G N 14 | C A A A T T G T G G A C G T G A T T C C C T T T C T C A G G G T G A G G A C C T G G A G C C T A G A C A C C C C T G G G S 15 | C T C T G C C T G T G C A C G G C C A G C T G C A T C T A C T C A G G C C C C A A G G G G T T T C T G T T T C T A T T C N 16 | T G T G G A G C A G T C T T C G T T T C G C C C A G C C A G G T T G G T G T G C A G G A T C C C T G T G T C C C G C C C S 17 | C T G A A A T C C G G C A T G T T C T T G T C A C A C T G G G T A A G G T T C T G T G T C C T T G T C C T T G A G C T G S 18 | C T C T G G C T T C T T C C T C C T C A A T C T T G A C A G A A A A A G G G T G C A G A C G T C T G G T T C A A A G A G S 19 | A G G A T G G C G A T C A G A A G C T G G A G A A G A T C A G T G A G T G C C A G G C T G G C C C C T G G G G C T G G G S 20 | C A T T G T A C T C C A G C C T G G G C A A C A G G A G T G A A A C T C T G T C T C A A A A A A A A C A A A A A A C A A N 21 | C C G G G C C T C C T T C G T C A C T G G C G T C A A C T G C G A G A C C A G C T G C G T T C A G C A G C C G C C C T G N 22 | G A T T C T C C T G T G C T A G A T G T G C A A A T G C A A G C T A G T G G C T T C A A A A T A G A G A A T C C C A C T N 23 | T C T C A G C T A A T T T T T A A T T T A T T T T T T T A G A G A T A C G G T C C C A C T C T G T T T C C C A G G C T G N 24 | A C T C C C T T A C A A A T G A A T G G C A T C G A A G A G G A T T C T G A T G A G C C T T T A G A G A G A A G G C T G N 25 | G C C G C C C T C C C A T T G A T T G G C C A T G A G G G A A G G A A G T C G C C T G G G T G C C C C T T G G C C C T T N 26 | A G G T A T T T A G T A T G C T G T A A A T A T T T T T A G G T A T T G G T A C T G T T C C T G T T G G C C G A G T G G S 27 | A A A T C C A A C A C C T C A G C G A T G T G G G C A C C A C A T T C A A C C T C A T T C T G A T G C C T G A A A A G C N 28 | C A C G G C C C A T C T C C C C A C A G G T C A A C G C T G A G G G C T C C G T G G A C A G T G T C T T C T C C C A G G S 29 | T C T C A C T T C G G G G G A A A A A T A A C T G A G G T A A G G G C C A T G G C A G G G T G G G A G A G G C G G T G T N 30 | A A T T A T C A A T G T T C T A G T T C T G T G G C C A T C T G C T T A G T A G A G C T T T T T G C A T G T A T C T T C N 31 | G G G G C C T G A T G G A C T G T C C C C A T C C T G C A G A T T G A G G A G A G C A C C A C A G T G G T C A C C A C A S 32 | G A G T T A A A G A A A C C C T T T T C C A T T G A G G A G G T G G A G G T T G C A C C T C C T A A G G C C C A T G A A N 33 | G T C C C C T C A C A G G G C A T T T T C T T C C C A C A G G T G G A A A A G G A G G G A G C T G C T C T C A G G C T G S 34 | A G A G A C C C T T T G A A G T C A A G G A C A C C G A G G A A G A G G A C T T C C A C G T G G A C C A G G T G A C C A N 35 | C C T G G G G T G C C T T G G G T C A G G A C A T C A A C T T G G A C A T T C C T A G T T T T C A A A T G A G T G A T G N 36 | G C T G G A A A C C A A A G C A A T C A T C T C T T T A G T G G A A A C T A T T C T T A A A G A A G A T C T T G A T G G N 37 | C C T T C A G C T C A G C T C A G G A T T T A A A T G A A G G T A A G A T G A A T T G G G G G A A G A T A T T G T G A C S 38 | G G C T G C C G G A G C C C C T C A C C C T G A G A T G G G G T A A G G A G G G G G A T G A G G G G T A C T G T G T C T S 39 | C T C T C C C T C C C T G A G C T G G A G C A A C A G C A G G A A C A G C A T C A G G A G C A G C A G C A G G A G C A G N 40 | G T A T A C G T G G C A A T G C G T T G C T G G G T T A T T T T A A T C A T T C T A G G C A T C G T T T T C C T C C T T N 41 | A T C C A G A A C T G C C C C C T G G G A G G C A A G A G G G C C G C G C C G G A C C T C G A C G T G C G C A A G G T G N 42 | C C C C T G G C T G A T G C C A C T C C T T G C C A G C A G A G C G T T T C T T G A A T C C A G C G G G G A C C C A G C S 43 | T G C T C C A G G G A G C T G A G G C C C T T C A A A G C C C A G T A C A T G C T G G T G G T T C A C A T G C G C A G A N 44 | G A A T T C T C A G T A G C T T C T T T G T G T G T G T G T A C T C A A C T C A C A G A G T T G A A C C T T C C T T T A N 45 | T G G T G A C C G G G A G C G T G G G A G G A T T G A T G G G T G A G T G G T A G G G A G T G G C C T C G G C A G C T C S 46 | G C G T C C G A C T G C G G G G C A G T G A A G C A C T G C C T G C A G A C C G T T T G G A A C A A G C C A A C A G T G N 47 | G C C T G C C T C T G C C G G G A A C A C C C T G A C T T C C C T G C C T G C C C A C T C C T G G T T A T C T A A G G C N 48 | C G C A C A C G G G G G G C A C A G C G C C T C C C T C G C G A G C C G G T G C G A A A G G G G C C G C C T G C G G T G N 49 | A C A C A T T C T A A A T A T G T G G C C T A G A A G A T T T T G G T C T A C T T T T C T G T G A A C A A A A T T T A A N 50 | G T T G C C C G T G G G G T G C A A A A G A T C C T G C A G G T G A G T A T A T T A C T A T G T G G G A T C A G T G T C S 51 | G A C A G A G C C G A G A C T C C A T C T C A A A A A A T A C G A A A A C A A A A A T C A G C C G G G T G G T G G C G G N 52 | A G C T G A G C C C G G C A G A T A C T A T G T T G C A T C A G C T T T C A C G C T T G C A G T T A A T A T C A T T G C N 53 | T A G A T A C A T C A A C A T G C T G A C C A G G C C T A G G T G T G T G C C A C A G T T G G G G A G A G A G A T C C C S 54 | C G G G G T C A C G T G C T C G C C A A G G A G C T C G A G G C G T T C A G G G A G G C C A A A C G T C A C C G T C C C N 55 | T G A A T T T G T T C A T G A A T A T T T T T T C T A T A G T G T G A G A C A G C T G C C T T G T G T G G G A C T G A G S 56 | G G C T T T G A G A A G G G T G G A C A G A G G C T C C T A G C C C C A C C A G G G A T G C T T A G C C A A G C A G T C N 57 | A C C G T T C A C T G G C T T T T T C T C T T C T A G C A G G T G T C T C C T G C A C C T G C G C C A G C T C C T G C A S 58 | C G G A C C A G C T C A T G G C C T T C G G C G G C T C C A G C G A G C C G T G C G C G C T C T G C A G C C T G C A C A N 59 | T T T C C A T C C T T C T T T C T G T G G A C C G A G T A G A C A G T C T C C C A C A G G C T G C G G C T G T A C G A G S 60 | G C G C T G G C A C C C A G C A C C A T G A A G A T C A A G G T G G G T G G T G G C C T G C G C G G G C T G T C G G C G S 61 | C C A C T C T A T T T G C A T T T T G T T G C A T T T C A G A G G A A C A T C A A G A A A T C A T G A A C A A C T T T G S 62 | C C C T C G T G C G G T C C A C G A C C A A G A C C A G C G G T G A G C C A C G G G C A G G C C G G G G T C G T G G G G S 63 | C T G G T G T T T T T T C C T C A A G C C T T C T G T T T A C T T A T G G G A A G C T A C T A T G G T G G C T G C T T A N 64 | T T C A G C A C A T C T G G A C T C T T T A A C T T C T T A A A G A T C A G G T T C T G A A G G G T G A T G G A A A T T N 65 | C T A T C G C T C T A T C T C A C A A A G A G A C T A A A A A A G T G A G G A C G A G A G T A A G A A C T C C C A C T A N 66 | G C C G C C T A A C A C T T T G A G C A G A T A T A A G C C T T A C A C A G G A T T A T G A A G T C T G A A A G G A T T N 67 | A C C T G G T C C T C C T G G C C C C C C T G G T C T C G G T G G G A A C T T T G C T G C T C A G T A T G A T G G A A A N 68 | A G G A G G A A C C C A T C G A T C T G T C T T T G G C A G A T G G G A A A C G A C T C T C G G A T T A T A G C A T C G S 69 | A G G T T T T C T T C A G G C A G A G G G T C T C T T C A G G T A A C T G A T G G A A A C C C C T G G C C A T G G G G T S 70 | C T C A A T T T T G T A G A A A A A T T T G T A T T T C T T T G T G A T A A T G A C T T T A G T G T T G T A T T T T T T N 71 | C T C T T A A A A C A T C T C C T A C A G T T T T A A A T T T C A C A T T T A C T A C T T T A A T T C A T C T G G G A T N 72 | G T A T C C C C A C T G C C T T G T A T G T G G T G A A A C C A A A G G G T T A C T T T T A C A C T T A A T C T A T A G N 73 | G G A G T T G G C T G G A G T C A T C C T C T C C C A C A A T G T G A A A T T G T C A A G T G T A A G C C T C C T C C A N 74 | G G C T G A G G C A G G A G A C T C A C T T G A A C C T G G G A G G C G G A G G T T G C A A T G A G C T G A G A T C G C N 75 | T T G A T T T C A G T G C T G G A T T A T T C T T T G C A G A A A A T T T G A G A A G C A A T G G G C A T C C T G A A G S 76 | G C C A T C G A C A G T T T T C T C A G A A T A C C T T T G G T A A G T G G G G C T G G G G T G G G C G T T A T T T C A S 77 | C T T T T C A G T C A T T C C T C A T T C T G T C C T C A G G A A T G T C C C A A G C C T T T G A G T A G G G T A A G C S 78 | A T T A T T A T T A T T A T T A T T A T T A T T A T G T A G G G G A C C T G G A G C C T T G A G G G A A A T A A A C T T S 79 | T G A G C A A T C T G C T T C T T T C A G A G G G T G T G T G C G T T C T C T C A G C T T T C T T G A A T T T A T T T C N 80 | G A G A A A G G A T T C C A G A G G C T A G C T C C A A A A C C A T C C C A G G T C A T T C T T C A T C C T C A C C C A N 81 | A A G A G T G C A A A T G C A C C T C C T G C A A G A A G A G T G A G T G C G G G G C C A T C T C C A G G A A T C T G G S 82 | T G A C C T G A T C T T T G C T C T C C C C C T G G C C A G T T G A G G A G G A G A A C C C G G A C T T C T G G A A C C S 83 | C C A T G G G A G A G T G G C C A T C G T G G T G G G C G C C C C G C G G A C C C T G G G C C C C A G C C A G G A G G A N 84 | A C T G A A A G T A C A G A G A A A T G T T C A G A A A A T G A A A A C C A T G T G T T T C C T A T T A A A A G C C A T N 85 | T C C A T C T T C T G C A G G G T T A G T G A T A C A G A T G C T A G C T T T T T C A C T A A A G A G G T C T T T T A G N 86 | C C A C C C T C C A G C C C C C A C C T C C T C C T G C A G A C A A G C T G G T G T C T A A G A A C T A C C C G G A C C S 87 | T G T T T C A G T T T G A G A A T T C T T T T C T T C T A G G A A A C A G T G A C T G C T A C T T T G G G A A T G G G T S 88 | A C T G T C T G T C T T T C T G T C T G T C C C A C C C A G G C T G C A G G A G G A G A T T C A G T T G A A G G A A G A S 89 | C C T G A C G T C A C A G A A G C C C T C T G T C C T C C T C A C T T T G C G T G G C G T C A A C A C G G A C A G C G G N 90 | G T G G A G C A G C T G A C C C A G G A G T T C T G T G A G G T A A G G C T G G G C T C C T G A G G C C A C C T C G G G S 91 | C C T G G G T G C T T A G C C C G T A A C C C A A C T C C A T C C T A G A C C C C T G T C C T C T G T G A G G C C T C C N 92 | A G C A T T G G C A G G A G G G G C A A G G T G G A A C A G G T G A G G A A C T C T A G C G T A C T C T T C C T G G G A S 93 | A T T C C A A A A A T G C T T T C T T T C C C C C T G C A G G T G A G A A G A T G A C A G A G G A A G A A G T A G A G A S 94 | G A G G A T G A A G C T A A G G A G C T G G C T G C G G A G G T G T G G G G T C T G G G A T G C C T G G G A C C C A G G S 95 | A A A A G G A A A T A T C C T C A G A T G A A A T C T G G A A A G A A G C T T T C T G A G A A A C T G C T T A G T G T T N 96 | G A C C G C T T C C C G T A C G T G G C T C T G T C C A A G G T A A G T G C T G G G C T A C C T T A G A G T C C T C C A S 97 | A G C C T C C T G T T G G G C A A T T T C C T C T T C C A G A A T C A A C T C C A C T A C C C A T C C T G G G G C C G A S 98 | G T G G A G A A G A T T G G G G G C T G G C T G A G G A A A A A T G T G C T G G T G G T A G C T G C A G C A G C C C T T N 99 | G T C A T C G A A G G G A A G G G T G G G G G G C A G C T G C G G T G G G G A G C T A T A A A A A T G A C A A T T A A A N 100 | G G T A A C A A G G G G A G G G G C C A G G A C A C A G T T T T T C C T G A T T T A A A C C C A G G C A G C C T G G A T N 101 | C A T C T T T G T T T T C T C C T T T C A T C T C A A C A G C T C C T G G G A A A T G T G C T G G T G A C C G T T T T G S 102 | G A G A T T T G A T G C C A A G C T G G C C A T C G T G G G C A T T G C C T T C T C C T G G A T C T G G G T A A G G G T N 103 | A C T T T G G G T A C T G C G A C C T C A A C T A T T G T G G T G A G C T G C C T G G G T A G G G G G C C T G A G T T G S 104 | G A G C A G T T C A C G G C C A T G T T C C G G C G C A A G G C C T T C T T G C A C T G G T A C A C G G G C G A G G G C N 105 | A C T C G C T G T A T A C A A T A C C A A A G G G C T T T T T T G G G T C C C A C C T C C T G C C T T T T G C T T T T C N 106 | A T C C A G T C T A A T A A G A A A A A G A T C A A C T C G T A G G A G T G T C C G T G G A T C A C A A G C C C A A G A N 107 | C G A G T G A C A A G C C T G T A G C C C A T G T T G T A G G T A A G A G C T C T G A G G A T G T G T C T T G G A A C T S 108 | G T T C T A A T C A T T T C A C C A T T T T T G T T A T T C G T T T T A A A A C A T C T A T C T G G A G G C A G G A C A N 109 | A G C T G C T C T C A G G C T G C G T G T A A G T G A T G G C G G T G G G C G T G T G G A G G A G C T C A C C C A C C C N 110 | T C C A G C A T G G G C T C G C C T G T C A A C G C G C A G G T A A G G C T G G C T T C C C G T C G C C G C G G G G C C S 111 | T C C C C G A G C C C T C A G C A C A G C A G C T G C G A G A G A T C T T C A A C A T G G C G A G G G A T C A G C G C A N 112 | A T T C T G T C A C G T C T G T C A T G T G T C C C C C A G T A C C T C C A G A G G T A A C T G T G C T C A C G A A C A S 113 | G A C C T C T C C T T C T C T T C C T T C A C T T T G C A G A G G C T G G A A G A C G G C A G C C G C C G G A C T G G G S 114 | T G C G C G G C T A C T A C A A C C A G A G C G A G G C C G G G T C T C A C A C C C T C C A G A G G A T G T A T G G C T N 115 | A G C A A G C C T C A C A C A C T T C T G C T C C T T C A G G G A T G G G G G T G T C T A C G G T G A C A G C T G C C A S 116 | G A C C A G G T C T T T T T T T T T G T T C T A C C C C A G C C A G C A A C A G T G C C C A G G G C T C T G A T G A G T S 117 | C C C A T C T T A A T C C C G A G A G T C C G G A G G G C T G T G C T A C C C C A A G A A G A G G A A G G A T C A G G G N 118 | G T G G C T T T A T T C T C T T T G C T C C A G A T G G T C C A G A C C T C C C C A G A A T T T A C C C T T C A T T C A N 119 | T G G C A C C A A C C C G G C C C T C C T T G T C C A C A G G G C A C C A G C C C C A G A A G G T G G C C C G G C G C G S 120 | T T C T A T G A G G C A T T C T C T A A A A A T C T C A A G G T A A A A A G G C A A A T A A T G C T T A T T C C C T T T S 121 | T A A T A T A C A A A A T C A T A T A T A T A T A T A T G T T C T T G T T T T T T G A G A C A G A G T C T C A C A C T G N 122 | G C T A C C C C C T G C G A G G G A G C T C C A T C T T C G G T A G G C C T G G G G A T G A G T G G C A G G T G C T G C S 123 | T C T T G G C T C A G T C T C C A C C T C C A A G A T C A A G T A A G G G A C A G T G G G C A T T G C C T G T A T T C A S 124 | C C T T T G A G G A C A G C A C C A A G A A G T G T G C A G G T A C G T T C C C A C C T G C C C T G G T G G C C G C C A S 125 | T C A A T C T A A G G C T T T T G T G A T C G T C C A C A G G T G A C A A T C T C C A A T A G A C T T G T G T C T T C A S 126 | A T G A G G C T G A C A C C A C C G A A G A T C A G T C T G G T G A T T G G G T G C T C C A G A G G G G G T G G A T A G S 127 | C T G A G G A C C A C A C A C C A C T T C C C A C T C C A G G C T G A G C T G G A G A T T C A G A A A G A C G C C C T G S 128 | A G G G C C C C T C A C C T T C C C C T C C T T T C C C A G A G C C A T C T T C C C A G C C C A C C A T C C C C A T C G S 129 | A A G G T G A A A A A G G A G G A G G A A G A A G A A G A A A A C T T C C C A G G T C A G G C A T C C A G C C A A C A A N 130 | G C A A A A T T T C C A A T C A A A T G G A C A G C T C C T G A A G C T G C A C T G T A T G G T C G G T T T A C A A T A N 131 | T T A G A C A T C A A A T C C T G C A G A C A A G G G G A G G T A A G G G G A C C C C C T G G G C T C A C G G G G T A G S 132 | T T T T C G T T A A C A A A A C C T T T G T G G A C T C A G G T G G A G A G A C T T A T C C A A G A A C G T G G C T G G S 133 | A G G C A G C C T T C G A C G T C A A T A A C A A G G A T G T G T C G G T G A T G A T G A G C G A G A T G G A C G T G A N 134 | A T G A C C T T T G G G C T T T G T G G A C A G C T A C T G A G G T A A G G G T C T C T C C C C C T C A A A A G T G G T N 135 | A C C A G T T G T C T C T G T T T G G G C A T T C C C T C T G T A A G T A T A G T G A A A T A A C A T A A T G T T G A C S 136 | C T C A C A T T T A A G T T T T A C A T G C C C A A G A A G G T A A G T A C A A T A T T T T A T G T T C A A T T T C T G S 137 | T C C A A C A G G G A G G A A A C A C A A C A G A A A T C C G T G A G T G G A T G C C T T C T C C C C A G G C G G G G A S 138 | T G C G C G G C T A C T A C A A C C A G A G C G A G G C C G G T G A G T G A C C C C G G C C C G G G C G C A G G T C A C S 139 | A G C C A C C T T T G C C T T T G C T G G A C T T C A A C A A C C T C A A T G G G G A A G A C C A A G A C A T T C T G A N 140 | C T C T T G C C T G G A C A T G T C C A G G C C T A C C A G G T G G G T C C T G T G A G A A G G A A T G G A G A G G C T S 141 | T G G T G A T G G C A C T A T A A C A A C A A A G G A A C T T G G G A C T G T A A T G A G A T C T C T T G G G C A G A A N 142 | G A T G G A A T G A A C C T G T G T A T G G C A G A A A T A C A G G A C A C T T C T C A G G A G T A A T G A C A A T T T N 143 | T T C A C C C C A C A G G T G C A G G C T G C C T A T C A G A A G G T G G T G G C T G G T G T G G C T A A T G C C C T G N 144 | T G T G C A C A G A C A G G A A G G T G T G T A A A C C G C A G A C T T C T T T G C A A T G G G G A C A A T G A C T G T N 145 | T G T T T C T T G T C C T C C T G G T A T T G G G A T T T G G T G A G T G T G G G C T T C C G G G G A G G G A A G C C T S 146 | T T A A C T T T T G G A A A A T T T T G T T C T T A A T A G G A A C A G G A G T A C A G C T G T G T A G T A A A G A T G S 147 | C C C C A T C A C C A G C T C C C C C C T T C T C C C C A G A T C A C T G G C A A G T G G T T T T A T A T C G C A T C G S 148 | T G T T A C T T T C C T C T T C T G T G A T T T C T C T A C T C A T T T T T T T T T T C A G G A C A C A G T G G T G G C N 149 | T T G A G G A G A A G A T C A C A C A C A G T C C C C T G A C C A T C T G C T T C C C T G A G T A C A C A G G G G C C A N 150 | G C T G A T T C T C T C C T C C T C C C T C T T C T G C A G G C T C A A A A A T G A C C A G G C T A A C T A C T C G C T S 151 | G A T T C T T A C A G A A A A C A A G T G G T T A T A G A T G G T G A A A C C T G T T T G T T G G A C A T A C T G G A T N 152 | G C A C C T T C T T T C C C T T C A T C T T T G A A G A A G G T A G T T A G C C A A G A G C A G G C A G T A G A T C T C S 153 | C C T T G C T C C T G C C G C T G G C C T T G C T G C T C C G T G A G T T T G A G A C G C C C G G G A A G G C A G G G G S 154 | C T C C A T G C T C C C A C A T C T T C C A T T T T T C A G A T C A C C T T C T A C G A G G A C A G G G C C T T C C A G S 155 | T C T C C A T C G C C T T G T C T G T G G G G T G C A C T G G T G A G A T T G G G G G G A T A A A G G A A G G G G G G C S 156 | A C A T G T T G C C C T C T G G A T A A A T T C T C A A G G A G G C A G A A A G G T G A A A G C T G G A G A T A C T G T N 157 | C A G G C T G G A G T G C A G T G G C A C G A T C T C T G C T C A T T G C A A G C T C C G C C T C C T G G G T T C A C G N 158 | T T C T T T A T G T G T A A A A T C T T C T A C A A C A T T T C T G T T T A A A C A T C T C C A T C T T C T G G G G A G N 159 | T G A G A A G G A G A G A C A A A T C A A G A A A C A A A C G T G A G G A G T A T T T C A T T A C T G C A T G T G T T T S 160 | C A A A T T A C C T G T G C C T T T T C C A T C C T G C A G A T T C T C A C A G C C A C A G T G G A C A A T G C C A A T S 161 | A A T T G A C T G G G G A C G C A G T C T T G T A C T A T G C A C T T T C T T T G C C A A A G G C A A A C G C A G A A C N 162 | G G G A T C C A C T C A A G G C T C C C T T G C C C A C A G G T C C T C A T G C C T C T C C T C C T C T T G C T G C T C S 163 | G A A T T T T T G C C A A G C A G G A A A A G A A C T C A G G T G A G C A G A A A C A C C T T T G C T T T T C A A T C A S 164 | G A G G A G C T T C C A A A T T A T C C A T T A G C A C A A G C C C G T C A G T G G C C C C A T G C A T A A A T G T A C N 165 | G A T G T T A A C C A T T C T C C T T C T C C C C A A C A G T T C C C C A G G G A C C T C T C T C T A A T C A G C C C T S 166 | C A C C G C C T A C C T G C A G T G G A G C A G C C T G A A G G C C T C G G A C A C C G C C A T G T A T T A C T G T G C N 167 | C C G G C G A G G C T G A C G G A T C G T C C C C T G C A G G G C G T C A T G G T G G G C A T G G G C C A G A A G G A C S 168 | A G G G G C T G C C G C A G C C G C C G C G A G C C T C C G G A C A G A C G C C A G A G C G A G G A G G C G C T A C G C N 169 | A A G G G A G G A A C A G A A A T G A G A C C C C T A T C T T T C C C T A T A A A A A C A A C A T T T T T A C T G T C T N 170 | C C C T C C T T C T G T G T G G G G C A C T C T C C A C A G G G C T T C G A G C T C A T C T A G A T G A G G A G C T C C S 171 | G G G A C G G G C T G A C C A C G G G G G C G G G G C C A G G G T C T C A C A C C C T C C A G A G G A T G T A C G G C T S 172 | T T A T G A G C C A A T C A G A G G T G T T G A A T A A A C A C C T C C C T A C T A G G T C A A G G T A G A A A G G G G N 173 | C C G C T C C A G G T G G C G G G C G G C T G G A G C G A G G T G A G G C T G C G G G T G G C C A G G G C A C G G G C G S 174 | C C C G G G A G C C T C G G G C C C G G C G C C C T C A C A C C C G G G G G C G T C T G G G A G G A G G C G G C C G C G N 175 | T G A C C T G A T C T T T G C T C T C C C C C T G G C C A G T T G A G G A G G A G A A C C C G G A C T T C T G G A A C C S 176 | T G T T T T T A T T C C C C A C T A C T C T T C T C T C T A T C A G A T A C C A T T T A T G A G A C A T T C T T G C T A N 177 | T G A A G G C T C C G C C T T G G G A A A A C A G C T A A A G T A A G G A C C C A G C C T G G G G T T G A G G G C A G G S 178 | T C C C C C A G C A A G A C C C G G G G G C A G A T C C A G G T G C G G G G G C C A G C C C T G C G C G T G G C T G G G S 179 | G C T G A C T G C C A G A T G A C T T G T G A G A A C C A A A A C C C A T G T T T T C C C A T A C A A C T C C C G G A G N 180 | G G T C T C T G C T G G T T C T A G C T T C C T C T T C C C A T T T C T G A C T C C T G G C T T T A G C T C T C T G G A N 181 | A T G T T T A A A C C T C G C G T T T C C T C C C C G C A G C T C T T G G G C A A T G T G C T G G T G T G T G T G C T G S 182 | A C A C C A C A C T C G C A T A G T C G G A C C C C A G A A T A C T A C C C A A A T G C T G G C T T G A T C A T G A A C N 183 | G G A C T A C C T T A C C C G T A C C C T C C C T C C C T C C C A T C A T G G C T G A G T C C A G G G T T T A G G C T T N 184 | G C T C T C A C A G C C T T C T C T C C C C A C C C G C A G A A G G A G A A T A A G A A T G A A A A G G T C A T A G A A S 185 | G C T T T G T G A G A T A A A A C T C T C C T T T T C C T T A C C A T A C C A C T T T G A C A C G C T T C A A G G A T A N 186 | T T T T T C A A T G A T G A A T C T T T T G T T T T G T A G G T T A C C A T A G A G A T G A A T G A A C C A G T T C A A S 187 | C A G C C G G G T T G G C C T A A G G G A C T T A G T G C C G G G C G G A A A G G G G A C T T T G G G T T G G G G A T T N 188 | G A A G G C T T A T G T G G A T G A T A C T C C T G C T G A A C A G A T G A A A G C A G A A C G T G A G C A G A G G A T N 189 | A T T C T A G C T A A T G T A A T A A C T G T G A A G T T T A C A T T G T A A A T A G T A T T T G A G A G T T C T A A A N 190 | G G C C C A T C C A G G C T A A T C A C A C G G G C A C A G G T A A C C A T T A C A C C C C T C A C C C C C T G G G C C S 191 | C C C A C C C T C A C T C T T T G C T T G T C C T G G C A G G A T G T T C C T G T C C T T C C C C A C C A C C A A G A C S 192 | G C A C C T T T A A G A A A G A A T G T G T G G A G T G T A A G A A G T T T G A C C G G G A G C C C T A C A T G A C C G N 193 | T A A C T C T T T C C A C C A A T T T C A A A T A G T G C G G C T T A A A A G A T G A A G G A G A G C T A A A C A A A C N 194 | C C C T G G C C C T T C C C T T G G C T G A T T T T T C A G A A A C C C A G A T C G A G A C T C A A A G C C C T G G T G S 195 | T C A G G A T T A T T G G G A C T T T G C A G A A C T C T G C A G A G T T T T C A G A A G C C T T T C A C T G C C G C A N 196 | T C C A G C T T T T C T C A G T C A C T C A G C A T C C A C A C A G G C C A G G A C C A G A A A T C C C T T T T C A C C N 197 | A C G C C C C A G C C C T C T G A A G G T G C T G C T C C A G C T G T C C T G G G G G A G G T G G A C A C C T C G T T G N 198 | A G T G G C A C C A G C C G G A T C A G T T C A T G C C T G G T G A G T C T G T C C T G T C C T G C G C C C T G G G C C S 199 | A G C T C G G C C C A G C T T G G C T C A G C C C G A C A C A G C C T G C T C A G C C C A G T T C A G C T C G G C T C A N 200 | A G A A C G G G A A G G A G A C G C T G C A G C G C G C G G G T A C C A G G G G C C A C A G G G C G C C T C C C G G A T S 201 | C C C C A T C C T G A A G G G C C T T A T G T C T T C C A G G T G A A C G C T A T G G C T C C A A G A A G A G C A T G G S 202 | G C A G T A A G A G T C T T A A T T T T G T T T T C A C A G G T T G T T T T G A A T G G T A A A G T T G A T G C A T T C S 203 | G G T G A C A G T G G G G G A C C C T T T G T C A T G A A G G T A A G C T T C T C T A A A G C C C A G G G C C T G G T G S 204 | T T G G A G G C C A A G G A G G C C G A G A A T A T C A C G G T G A G A C C C C T T C C C C A G C A C A T T C C A C A G S 205 | T G A T A G T C A A T A C C A G G G A C C A G G A A G G T C G T G A C C A G T C C T G G A G G C C C C A G G C T G T A C N 206 | G C C G C T T C C T C A T C C T G G C A C A C T C T C T T C A C A G C C G A A G A A G G C C A G T T G T A T G G A C C G N 207 | G T T G G G G A A A T G C A T T G T C A G A A A A T T G C C T G T G T T C T A C C T G T A C T G A T G G A T G G C A T A N 208 | G A A C G A C A A T A G C T T T A C C C T C A G G C A T A G G C C T G G G T G C T G G C T G C C C A G A C C C C T C T G N 209 | A T G A A A G C C T A C A A C G C A C G T T A C T T G C T T T A T A G A T T T T T T T T A A T T T T A A G T T T T A T G N 210 | A C T C A T C G A A C T C T G C T G A T A G C C A A T G A G G T A A T T T T C T T T A T G A T T C C T A C A G T C T G T S 211 | C T G A G A T T C C A G C A T C C T G C A A C C T C C A G T T C T G A A A T A T T T T C A G T T G T A G C T A A G G G C N 212 | C A G A A T G A G C A A G T G A A G A A C T T T G T G G C T T C C C A T A T T G C C A A T A T C T T G A A C T C A G A A N 213 | T G C T A C T T A A A T C G T A C T T C T C T G A A G A A G G T G A G G A G G A A G G G G A C A A G A T G A C A T A G A S 214 | G G G G C A G G A C T C C A C C C G A T C A T T C C C C A G A T T C A G C A G C G A C T G C A G G A G G A G C T A G A C S 215 | A C G G G A A G G A G A C G C T G C A G C G C G C G G G T A C C A G G G G C C A C G G G G C G C C T A C C T G A T C G C N 216 | T T A G T T C A C T C A A C T T A T G G A G T A G A A T G A A A A A A A C T G T A A A G C C A A T T T T C A T T A C T A N 217 | C T T C C G C A C C C A G C G C G T C G A C C T T T A C C A G C A G G C C T C C C C A C C A G A T G C C C T G C G C T G N 218 | T A C C A A C T G C A G A G C C A G G A A A A C T T T G A A G C C T T C A T G A A G G C A A T C G G T C T G C C G G A A N 219 | A G A C A G G A C A A C A T T G C G G C T T A T C C T C C T C T G T T G C C A T C A T G G A C C T C T C G C A C C C C G N 220 | T T G G A T T T G A A G G A A A G A A C T G T G A A T T A G G T A A G T A A C T A T T T T T T G A A T A C T C A T G G T S 221 | A T T T T T G C T G G C C T G G A C C T T C C T T C C A C C T T C A C T G A A A C C A C C C T G T A C C G G A T A C T G N 222 | A T A T C A C A C G G G C C T T T G A A G G G C A G G C A C A C G G T G C A G A T A G A T C T G G A A A G G A C G G C G N 223 | T G A C G C C C T C A A G G G C A C T G T G A G T C C C T G C C C A C C T G G G C C A G G C C C T G C C C C T T C T C T N 224 | A A G G A A A T T T T C C A A A A T G T G G A T G A C A C A G T A A G G C C A C C A T G G G T C C A G A G G A T G A G G S 225 | T T G T C C A C C G C C T C T A C C A C G C A G G A G G G T G A G C T G G C C T C C A C A C A A A G C G A G C T C A C C N 226 | G G C C T C T C C T T C T C T T C C T T C A C T T T G C A G A G G C T G G A A G A T G G C A G C C C C C G G A C T G G G S 227 | T T G C C C C T C A C T G C T T G G C T T G C T C T G C A G A C T G T G G G C T G C G A C C T C T G T T C G A G A A G A S 228 | A A A A C T T A T A C T T T A T T T T C C C T G T T G C A G G A A A A A A T C A T G T C C T A C A T A T G T T C T C A A S 229 | G T C A T T A A G A T T G T T G A T C T G C C T T T C T A G A C G A A G A T G A G T C G A G T G A G C A G A C C T T T A S 230 | A C T C C C C A G A G T T C A C A C C T T A C C T G G A G A G T A A G T G G C T T G G G C T G T A A T A C C G T T C A T S 231 | G A G G T G G G C G G C A G C G T C G C C G G C T T C C A G A C A C C A A T G G G A A T C C C A A T G G G G A A G T C G N 232 | G C C G T G G T T T T T T T G C T T C A C C A C C C T G A G G T G C G T C C T G G G G A C A A G C A A A A G G C T C C T S 233 | C T G A G G C A C T C T T C C A G C C T T C C T T C C T G G G T G A G T G G A G A C T G T C T C C C G G C T C T G C C T S 234 | C T G G T G T G A A T G G C A T T C T C T T T T T T G C A G A C A G A G G A G C T G A A C C G C G A G G T G G C C A C C S 235 | A G A G G T T G G T T C T T A T C C C T T G G A G G A T T T A A A A C C T T A G T A C A A A T A G T C C T A G C C A T A N 236 | T G G C C G A C A C A C G C T T C G A C A T G G T C C A G C T C A T C G A C G T G G C C C G G C A G A C T G C C C A G G N 237 | C A T C A T C C G T G T C A A C C A G C C A T G G G G A A G G T G A G C A G A A C A C A A A T T A A A T A A A A T G A A S 238 | C T G T C A A A C T G C T C T T G T T C A A T C T C A C A G G C T C C T G G T T G T C T A C C C A T G G A C C C A G A G S 239 | A T T C A G G A T T A T G A A G T T T T T C G A T G C G A A G A T T C A C T G G A C G A A A G A A A G A T A A A A G G G N 240 | C C A G C A G G C T G A G G G C C A G A G C G G C C A G C C C T G G G A G C T G G C A C T G G G T C G C T T T T G G G A N 241 | A T T T T G A G A A A A A T A C A T G T G A G T C A T T T T T T C T G T T T C T C T T T T C T C T T A A C G A T T A T C N 242 | T G T T T T C T A G G A A G A T G T G G A C A A G G C A G T G A A G G C C G C C C G G G C C G C C T T C C A G C T G G G N 243 | C G C C T T G A G G A G C T A G A C T T G G C C A T C T A C A A T A G C A T C A A T G G T G C C A T C A C C C A G T T C N 244 | A A G C T G C A T G T G G A T C C T G A G A A C T T C A G G G T G A G T C C A G G A G T T T C A G C A G T T T C A G A G S 245 | G T C T T C T G C T C C T C T T C G T C T G G T C C C T T A C T T C C A A G A C C C C A G A G A G G A A G G C A T G C T N 246 | A G G C C C A T C C C T A C T C C T C T C C T C A C A C A G A G G G G A C C T C A C C C C A G A C G A G G T G G T G G C S 247 | T A C T T A A C T C A G T C T T C T T T T T T C T C A T A G G A C T A C A A A T C C C T C C A G G A T A T C A T T G C C S 248 | C A A G G T G A G C T C C C C T C C C T C C A A A A C C A G A C T C A G T G T T C T C C A G C A G C G A G C G T G C C C S 249 | A A G G A C A C G G G C A G C A G A C A G T G G T C A G T C C T T T C T T G G C T C T G C T G A C A C T C G A G C C C A N 250 | A C A C A G C G A G G C C T C A A G A C A G T G T T T G A C G A A G C G A T C C G A G C A G T C C T C T G C C C G C C T N 251 | A A G T T G C A C G T G G A T C C T C A G A A C T T C A C T G T G A G T C T A T G G G A C C T T C A A T G T T T C T C T S 252 | G G C A G A T A A T G C T A T G A T A C A T T T T T T G T A G G T G G T G C C A T T T G A C T G C A T T T A G C C A A C N 253 | T G C T C T A T G G C C C T T C C T C A T C A G G G G A C C G T T T C C C C C C T C T T C C T T C A C A G T A T T T A A N 254 | G G A G C A G G G C A G A G T G A G C A G G A A G A A A T G A T G C T G G G G A A T T T G T G T G C T C C T T G G G T G N 255 | G C C A G G A A G A T G T T C C C C T C G T A A T A T C A G G T A A A T C C C A A T A A A T T C T C A G T A A A C T C T S 256 | A A A G A G G T C A T A T T A A T G G G A T G A A A A C C C A A G T G A G T T A T T A T A T G A C C G A G A A A G T C T N 257 | G A C A G A A A C A A A G A G A C A T T T C T C T G C A A A A C C C C C C A A A T G C C T T G C A G T C A C T T G G T C N 258 | T G C G T C C T G C G G A C A A G C A A A A G G C T C C T T C C C A G C A A C C T G G C C A G G G C G G T G G C C A C C N 259 | C C T G G C A A G C G C T G A G C G G A G A T C T T G C A G G C A G T G G C C T T G T C G T C G A A G T C T G G C C T T S 260 | G C C T G C T G A A C T C A C A C T G T T T C T C C A C A G C G C A T G A G A G C C C A G C C C G G C A C C C C T G T G S 261 | C A G C T G G C C A T T G A C A C C T A C C A G G A G T T T G T A A G T T C T T G G G G A A T G G G T G C G G G T C A G S 262 | C C T A C T G T T T C C C T C T A T C A A A A A G C C T C C T T G G C G C A G G T T C C C T G A G C T G T G G G A T T C N 263 | C C A A C T A G T T T T G A T T T T A C A T G C T C A T A G G C T G A A G G G A C T T G C C T T G T C T C A G A T G A G N 264 | T C C T G C T T T A T T G T T G G T T A A T T T G C T C T C T G G G T T T T G G G G G G C T G G G G G T T G C T T T G C N 265 | T G C T C C G C T A C T A C A A C C A G A G C G A G G C C G G T G A G T G A C C C C G G C C C G G G C G C A G G T C A C S 266 | G C T G A C C C C C T A C C C C G C C T T G T G T C G C A G A C G G T G A C C A G T G C T T G G T C T T G C C C T T G G S 267 | A C C T T T G T T G G T T G T A A A T C T G T C T T A C C A A T G G T G G T T T G T T C C C T C C T G A A C A G T T T T N 268 | C T G G A C A G G T T G G A G G A G G T G A C C A A T G A A T C G G G C T A T G T G G A C G T G T T G A C C C T T C T G N 269 | C A A C A A A G C A A A A G C C C A G G A T G G T G C A G C C A T G G A A A T G C A G C C A T T G A A G A G T G A A G A N 270 | G A G C C T A G A C A C C C C T G G G T T G T A G G G G A G A G G C T G G G G T G G A G G G A G A G G C T C C T T C C C N 271 | T C C T G A C C C T G G C C C T G G T G G C T G T C G C C G G T G A G T A G A A G C T G T C T T T G G A T G G C A C T C S 272 | T T G G A A T C C A C C A G C T A C A T C C A G C T C C C T G A G G C A G G T A A T C C A T G A T G T T T T A C A T C C N 273 | A C G A C C T G C A C G C C A C C A A G C T G G C G C C C G G T G A G A G C A C C C C C C G C T C C G G C C G G G G A T S 274 | G C C A A G A A G A C G T T C C C T T G G T A A T A T C A G G T A A A T C C C A A T A A A T T C T C A G T A A A C T C T S 275 | C T T C A G C C T C C C A A G T A G C T G G T A C T A C A G G G C A C A C G T G T T T C A C G T T G A C A G G T T T G C S 276 | G A G A G A G C C C C A G A C T G A G G G A A G C A T G G A T G G A T G G A G A A G G A T G C C T C G C T G G G G A C T N 277 | A T G T C C C A G C A A C A C A C A C T G C C A G T G A C C C T C T C C C C T G C C C T C A G T C A G G A G C T C C T C N 278 | G G G C C G G G G G A C G G C G G C T C C C C G C G C G G C T C C A G C G G C T C G G G G A T C C C G G C C G G G C C C N 279 | A T G G G C T A T T A A G T G C A G A G G C A A A A A A G T C C C C A A C A G G T G A G G A A A C A A T G G C A T A G A N 280 | G A A C G T C T G G C T A A A T A C A A T C A G C T C A T G A G A A T T G A G G A A G A G C T G G G G G A T G A A G C T N 281 | T G A A G G C T C C G C C T T G G G A A A A C A G C T A A A G T A A G G A C C C A G C C T G G G G T T G A G G G C A G G S 282 | C C T T T A C T C A G T T T A C A A T C T A G G C A G T A G A A T G T A A T C A C T G C T T T A A A C T T G A T A C T G N 283 | T G G C C T G A C A C T T C A C C T C C T T T A T G G C A G A G A T G A A T G T G A A G G T G C T G G A C T T T G A G C S 284 | C C A G T C A C C A C A G G A C C C C T T G T C C C A C A G G T C T C T T T G A G C C T G G A G A C A T G A A A T A C G S 285 | A G T A A C G C C T C C C C C C A G G C T G G T G A A A A A G A A T G A G G A A G C T G C T G A T G A G G T C T T C A A N 286 | T G C T G G C A T G G G T T G C T C T G G C T G A C C A A G G T A C A G G G G A T G T T G G T G G C C A T C T G G G T C S 287 | T G A T T C T C A T G A A A T A C C C T G A G A T C G A A G G T A G G C A A G T G A C T G A A G G G A C A C C G T G C G S 288 | A G A T G C T G A G C G A C A G C T A T T G T C C T T T G G A A A C C C A A G A G G T A C C T T T C T T A T C C G C G A N 289 | C T C C C C T A A A C C A G G C C C T T G G A G A G C A G G C C C C A G G G G A G C A G T G C A A C T C A C C T T C A C N 290 | A T G G C C G T C A T G G C G C C C C G A A C C C T C G T C C T G C T A C T C T C G G G G G C T C T G G C C C T G A C C N 291 | C T A C A A G C A C T G A A G C T G T G C C G C C A G C T C T C A G T C C C A C A G C T C T C A G G C C C C T C T C T G N 292 | G C G G A A A A C G G G G A A A C G A A G A C T G A G G A G G T C A G A A G T G T G T G T G T G T G T G T G T G T G T G S 293 | A G G C C T C A G G A T A A T C A T T T C T C C C C A C A G A C A T T C C C C A G C C T C A T G C A G A G C C C T G G G S 294 | T C A A C A T A T G C G A G G T T T G T G C A G A C G C A C A G A C C C T C T G G G G A G T A T A T G T T T G A A T T T N 295 | A A G G T C C C G G A G A G C T G A G C A G T C A A G A T G G T G G G G C C C A G G T C T T G G G A G A C G G G C A G G S 296 | T G C C T T G C T G A C G C C C T C C C G T C C C C G C A G G C G G C A A G T A C G T G C C C C G C G C C G T G C T C G S 297 | G A G T G A A A A T A A T A T T C A G A C A A T T T T T G C A G T T A C T G A A G A A T T T C A G C C T G T T T A C A A N 298 | G C A G C C C A G A C C T A C C T C T T G C T T T T G C A G C A A T A T A A A T G T C A C C C T G G G C G C C C A C A A S 299 | A T G C C C C C A T G G A G G A C T G T G T G A T C T C C A T C C T G G G A A G G G G G C T C A T T C A C A G A G A G A N 300 | G A A A G A A A G A G A A A G A A A G A A A G A A A G A A A G A A A G A A A G A A A G A A A G A A A G A A A G A A A G A N 301 | C A C A G C C T T T G T G T C C A A G C A G G A G G G C A G C G A G G T A G T G A A G A G A C C C A G G C G C T A C C T N 302 | A A A A A A A A A A T A G C T G G G C A T G G T G G C A G G C G C C T G T A G T T T C A G C T G C T T G G T G T C T G A N 303 | G G C A A G G A C T T C T C T G G G C G G C C T C A A A T G G T A A G T G G T G C C C A T C T C C T C C C T G C C C C C S 304 | G C C A T T G C A C T T T A T A G T C T G A T T A C A A A A C G T G C A A T T C A G G A G C C C A G C A G T G A C A C A N 305 | G G C T A C A G C T C T C C C T G G G C A T C A T C C C A G G T A A T G A G G C T C C C C G A G C T G C C C C T A C A C S 306 | T G G G G G T G C C T G G G C T G C A G A A G T G A T C A G G T A A C T G A G C T C C T G G G A C G T T A G G G C T G G S 307 | G T C C G G G C T G G C G G A G G C A T T G G G G A C G A G A T C G A G G A C C C C G C G G G T G A C G A A T A C G A G N 308 | A T G T T A T A G T A A A T T T A T T T T A T T T T A G A T A T T A A A T G A T G T T T T A T T A G A T A A A T T T C A N 309 | G C C T G C G G G G A G C A C C T G G A G G C C T T T G C T T T G C T G G A G C G C T T C A G C G G C T A C C G G G A A N 310 | C C T G T G A G G A A G G C T T C A T G T T G C A G G G A C C A G C C C A G G T T G A A T G C A C C A C T C A A G G G C N 311 | G G G C C C C T C T G A T C A C C T C C A C T C C T A T A G G T A T G A G A C A G A G C T G G C C A T G C G C C A G T C S 312 | A C T T T T G A T C C A C A G T C T G C C T G T G T C A C A C A A T T G A A A T G C A T C A C A A C A T T G A C A C T G N 313 | C T T T T G T G G A C T T G T C T G A C C A C C T T C T A T T T G C C C A G A G T T T G C T C A A T T C C A A G A C A G N 314 | T A A G A A G C T G T C C A G C T G G G T G C T G C T G A T G A A A T A C C T G G G C A A T G C C A C C G C C A T C T T N 315 | C A A T G G A C C C C A A C T G C T C C T G T G C C G C T G G T A A G G G A C G C C C G G G T T C T G T G C C T T G G A S 316 | G A A T G C T T A G T G C C C T C A C T T C T T C T C T C T C T C T C T A T A C C A T C T G A G C A C C C A T T G C T C N 317 | C T G A C A T G C T T T C A T C T A G T T T C C T C G C T T C C T T C C T T T T C T G C A G T T T T C G C T T C A C A G N 318 | C C T T C A G C T C A G C T C A G G A C T T A G A T G A A G G T A A G C C G A A T T G G G G G A A G A T A T T G T G A C S 319 | G A G A T C G A C C T G G A C T C C A T G A G A A A T C T G G T G A G T G C C T T C A C A T C A C C T G C C C A G C T C S 320 | T T T T C C C A T C A T C C T G T A C T T C T T T T C T A G A T G T C A G C C A G G A A G A T G T T C C C C T C G T A A S 321 | T C C T C A T G C A T C C A C C C C C T T C C T C C C C A G G A A T A G C C A G G T C T G G C T G G G T C G G C A C A A S 322 | G G G G G A C C C G G C G C G G C C G C G C G C T G C C G G G C G G G A G G C T G G G G G G C C G G G G C G G G G C C G N 323 | C A A C C T C T T C T G T T C T A T C C C T C T T C A G C T C T C C C A G C T G A A C C T C A G C T T T C C A A T C C C N 324 | G G C T G A C T T T A T G G C T A A G A A G T T T T C A C T G G A T G C A T T A A T A A C A A A T A T T T T A C C T T T N 325 | T T G A C A T A C A T C C A A C A T T A A A A G C C A C C C C C A A A T G C C C A A G A A A A A A A G A A A G A C T T A N 326 | G C C C A T T C C T G C T C A C C T G C T G G G G A A C A T G T G G G C G C A G A C C T G G T C C A A C A T C T A T G A N 327 | A A G G C T C C A T A T A T C C A T T C A G A A T G T C T C A A C A C A A G A A G T T G C T T G T A G T A A A A T G T A N 328 | G C T A T T G G A G G A T C T T G A A A G G C T G T T G T T A T C C T T C T G T G G A C A A C A A C A G C A A A A T G T N 329 | T T G C T C T G C A G G T G C G C C T C C T G G G A G T G G T G T C C A A G G G C C A G C C C A C G C T G G T G G T G A N 330 | C T G T C C C T T G G G C T G T T T T C C T A C C C T C A G A T T A C T G G T G G T C T A C C C T T G G A C C C A G A G S 331 | T T G G A G A T A A A C T T C C T G A A T G T G A A G C A G G T G G G T G C T G A G C A C T T A A G A G A G C A G G C A S 332 | A G A G C A C T A T T T G C T A G G T G C T G G G A G A G A G A A G C A G G T G G C T T T C A C A C C C C A C A T T A C N 333 | G A C A G G G A T A T G T C T T T G C T T T C T T T A A A G G C T G A C T G T G C T G T C C T G A T T G T T G C T G C T S 334 | C C G T G G T C A G C A G C A G C A C G A C T T C C G C C T C C G C A G G T G G C T A T G G A G G A G G T T A C G G C G N 335 | A A A G C C A C A A G G A C C A C C C C C A C A A G G A G G C A A C C A G C C T C A A G G T C C C C C A C C T C C T C C N 336 | G C A C A C G G A T G C C A T G A T C A T C G A C A T G A G G T C A G T G G C C A G G G G T C A G T G C T T C C T A G C S 337 | C T G G T G A T G C C C A C C T T C C C C T C T C T C C A G G C A A A T G G G A G A G A C C C T T T G A A G T C A A G G S 338 | G A G C T C T G C A G G C T G A A G A T G T C G C A G C T G T C A G C G G G A T G G G G G C A C G C T T G C T G A C G C N 339 | A G A G T G A T T C C T T T C C T A A A A G T G T A A G A A A G C A T A G A G A T T T G T T C G T A T T T A G A A T G G N 340 | G G T A A G A G A A A T A G T G T T G A T T T T A G G G A G A A T A A C T C A G C A A T T G G A T C T G G T A T G T G T N 341 | A A G G A A A C C T T A A A C T C T C T T A T C A T T T A G G A A T C C T G A T G G G G A T G C C A A G C C C T G G T G S 342 | T T T T C C C A T C A T C C T G T A C T T C T T T T C T A G A T G T C A G C C A A G A A G A C G T T C C C T T G G T A A S 343 | C C C C C A T T C T C G C C T C T C T C A C C T C C T C A G A C C C G T C C A A G A A G C A G G G A C C A T G G C T G G S 344 | G T G G G G A G A A T A T G T G G A C A A C A C A A A C C T G G T C C C C A G G C T C T G G C C C A G A G C A G G G G C N 345 | C A C T C A G T A T G T C T G C A G A T G T A C C C C T T G G T A A G A T A A T A A A T T T G A A C C T T G T T T T G A S 346 | C G G A G C C C T C A C C A G C A T T T G T T T C C A C A G C C G A T A T T A T G T A C A C G G G G A C A G T T G A C T S 347 | T T T A C T A A T T T A A A G A C C T G T T T C C C A T A G G A G C T C A G T C T G A A T A T T C T T G G A G A A A G A S 348 | T C T C C T C C C A G A A C G T C A C A G T G C T C A G A G G T G A G A C A A G C C C C T A A C A A G G T C A A G T G A S 349 | G T T C A C G T T T G A A A C A C T C T T T T T G C A G G A T C T A C A A G T G G A T A T T T G G A C C A C T C T G T G N 350 | A C A G C T G C A T T C T C A T G C T T C C T G C C G C A G T T C T T C C C C A A T C C A G G T C T C C G G A G G C T G S 351 | A T G A G C A G C A A G G G C A A G C T C T A T G G C T C G G T G A G T A C C G C A G G G G T C T G G C T A G G C A C C S 352 | A A A C G C T A A T T T T A A C A C T T A T T T T C T T A G T T T C C C A G A A G A G G T C G C C A T T G T T G A A G A S 353 | G A A T G A T G A T G C A G T C C G G C C G G A A A G G G G G T A A G C T T G T G C T C T A C T C A T C T A A A C T T G S 354 | A T T T G G A G G G C T T C T T T G C C A C C C T G G G C G G T A T G A G C C G G G T G T G G G T G G G G T G T G C A G S 355 | A G T C A C T G A G T T C A T T T A A T T A C C C T A C A G G T G A G A G C A A A G A G C A G G T G G C C A A C T C A G S 356 | C T T G C C C C A C C G C A C C A C A C G G C C C A G C A G G T G A C T C C C G A G G G T T G G G G A T G A G T G A G G S 357 | G T C A C G C A T G A A G G G A G C A C C G T G G A G A A G A C A G T G G C C C C T A C A G A A T G T T C A T A G G T T N 358 | C T G T A C C A A C A A C G A G G A G C A C G G G G G C C T C T G G A A G T C A T G A G A G T A G A A A A A C C A G T C N 359 | C T C A C A T T T G C T T G T T T T T C T G G C T C A C A G A C A T G T C G A G G A A G G C T T T T G T G T T T C C C A S 360 | T A T G A A A A G T T G T T C T T G T G G A T G G T C A C T C G C A T T A A C C A G C A A C T G G A T A C G A A G C T T N 361 | G A C C A G A A A A T T A A T G A A G T T T C T T C T T C T G T A A G T A T A T G A G G C C C A T G C T G G C A G T G C S 362 | T G G C A G C G G G A T G G G G A G G A C C A G A C C C A G G A C A C G G A G C T T G T G G A G A C C A G G C C T G C A N 363 | T G A C T G C G G G G A C G G C T C T G A C G A G G A G G A C T G C A G C A T C G A C C C C A A G C T G A C C A G C T G N 364 | G A C C G G C G G G A A C G G A A G G C T G C C T T C A A G G T A A G G C A T G G G C A T T G G C C A A C A C A C C C C S 365 | C T C A G T G G A G C C A T G A A T A C T A G C T C C T G C A G T C T G A T G A A G A T A G C A A A T G A T A T T C G A N 366 | G T G G C C A C A C A G C C T G A G T T G C A A G T T C C A G G G C C A G T A G A A A A C C T G C A A G C T G T A T C T N 367 | A G C C T A C A T G G A G C T G A G C A G C C T G A G A T C T G A G G A C A C G G C C G T G T A T T A C T G T G C G A G N 368 | T G A G C G G G A A C G C C T A C A T T C T T C C T C C A G C T G A G C A G A A G A G T C A G A C G G A A T C G A A A C S 369 | T T A T T T T C T C C A T T G T C T T A C C T T T T A C A G G T G T T A A T A T A G T G A A A A G G A A G C T T G C A G S 370 | A C T T T C A C A G T G T A C G A A T A C C A C A G A C C A G A T A A A C A G T G T A C C A T G T T T T A T A G C A C T N 371 | G A C C T C T C A T T C A C C C C T T C C C T G C C A G A G C T G G T A C C A G A C A G C C C C A G T T C A T T A G C C S 372 | A C C T G A G G T C G G G A G T T C G A G A C C A G C C T G A C C A A C A T G G T G A A A C C C C A T C T T T A C T A A N 373 | G A A G G C T T G A G G C C T G G A G T T T G A G A C C A G C C T G G C C A A C A T A A C A A G A C C T C A T C T C T T N 374 | G C A C C C T C A A A C C T G C T G C T C A C C T C A T T G G T A A A C A T C C A C C T G A C C T C C C A G A C A T G T S 375 | G T G C C T A G C C A T G G G A A T T C T C C A T C T G T T T T G C T A C A T T G A A C C C A G A T G C C A T T C T A A N 376 | C T G G T G T G C T C T C T G G T G A T C A A G A T A C A G G T A G G T C A T C A T C G C A G C A T C T T T C T T A G T S 377 | T C A C G C A G C C G C T A G C G C C C A G G C G C C T C T C G C C T T C T C C T T C A G G T G G C G C A A A A C T T T N 378 | T C A C T T T T T C C C T T C T T T T T C C T T A T C C A G G G A A C C G A A G A C G T G T T T G C A A A T G T C C C C S 379 | C C A T G T T G T C T G C C A T T C T G G C C T T T C C A G A A C A T C A A T G C G G C C A A A T C T A G T T T C C T C S 380 | T A A T T T A A A A C T C A G A A G A A A G C C T A C A T C A A A G G G A A G T A G A A G T G G G C C C G G C T C A C T N 381 | T T A A C T G T G T C T C G T G C A C G G A A A A T C C A G C G T T T C T T G T C T C A G C C A T T C C A G G T G G C T N 382 | T T T C T T C T C T T G A T C T C T T T T C T C G G A C A G A G A A T G A A G A A A T T G A T G T T G T G A C A G T A G S 383 | C T T A A C T G A T G A C A T T C C A C C A C A A A A G A A G T G A A A A T G G C C G G T C C T T G C C T T A A G T G A N 384 | G C C T C T C C G G C T T C C G T C T G C C G G A C C A A G C A C C T G C A C C T G C G C T G C A G C G T C G A C T T T N 385 | T T G A A C T G G G G A A G C G A A G G T G G C A G T G A G C T G A G A T T G C G C C A T T A C A C T C C A G C C T G G N 386 | G C G A A G C C C T C C T C C T T C T C C C C T G C C G G T G C A C A G G C C T C T G C C T G C T G G G G A T T A C T C N 387 | C A T C T C T G T T G T C T C T T T C T A C C T T C A C A G C T C C T G G G A A A C G A G C T G G T G A T T G T C T T G S 388 | A A A A A T A A T A A T A A A T A A A T A A A T T T T A A A A A C C C T G A A T G C C A C G G A A G C C G C C A T G G C N 389 | T C T C C T C A G T A A T C T A G G A T T C A A T G A C A G G T A C G A T T C C A G T T T A T T A T C C A T C A A A G G S 390 | G C A G A G G A G C T G A A G A A G G A G C A G G A C A C C A G C G C C C A C C T G G A G C G C A T G A A G A A G A A C N 391 | A C C C C C G G T C C A A G C C T C C C C T C C A C C A C T G C G C C C T T C T C C C T G A G G A C C T C A G C T T T C N 392 | C C C G C C C G G G C C C G C C C G C G T C C C T T G T A G T T T T G G G A G G T G A T C A G C G A T G A G C A C G G C S 393 | T G T C T A A T A G C G G A G T A A C A T G T C C T C A C T G C G G A G T G C C A G G C C A G C A T C T G A A T G T C A N 394 | T C A T T T T T G G G C C C C T T T C T C T G C C C T T A G G T G T C A G A C C T G A C C C A G G C A G C C A A C A A G S 395 | G G A C A G C T C A C C T A G C G G C A A T G G C T A C A G G T A A G C G C C C C T A A A A T C C C T T T G G G C A C A S 396 | C T A A G T T G T C C T T T T C T G G T T T C G T G T T C A C C A T G G A A C A T T T T G A T T A T A G T T A A T C C T N 397 | C A A A A A A A G G C A G G G G T T G C A A T C C T A G T C T C T G A T A A A A C A G A C T T T A A A C C A A C A A A G N 398 | G A G C T G G T G A G G A C A G C C T G C C A G A G T C T G G T A A G A A A G G G A C T C A G G G T G C G G G G A C A G S 399 | T T A A T A A A G A A A T A C A T T G A C G G C C A A A A A G T A A G T T A C A C A C A T T C A A T G G A A G C T A T A S 400 | G T C A A G A T T G A C C A G A C C G T G G A G G A G C T G C G C C G C A G C C T G G C T C C C T A T G C T C A G G A C N 401 | T G G G C G C T T C A C G G A A C T C G C A T T T C C C A G T C T T C G T A A C C C A G G A G G A A G C C C A C G G C G S 402 | A A T G T A T A T G T A A T A A T T C T T C A T T T T C A G G A A G A A G A A T T A C A G A A A T A C A T C C A G G A G S 403 | T T G G A G A T A A A C T T C C T G A A T G T G A A G C A G G T G G G T G C T G A G C A C T G A G C A C T T A A G A G A S 404 | C A G C G C A G A C G T C A G G G A T A T T T A T A A C A A A C C C C C T T T C A A G C A A G T G A T G C T G A A G G G N 405 | G G A A C T A A G G A G C C A T G G A T C T G G T C C T A G G C A T G G T G G T G A G G A C T A C G T C T T C T C C C T S 406 | A G C T G G C T G C T T A G A G A C T G C G A G A A G G A G G T G C G T C C T G C T G C C T G C C C C G G T C A C T C T S 407 | T A A T C T T C C T T G T T T G C T T G T C T T T T T C A G G A A G T G A A T A A G A A G A T A A C A G T G T T T C A A S 408 | T T A C C T A A T T A G A A A A A A A A T C T A G T C A A A C A A T T A T A A T A A T G G G G A A G T C A T A T A C A A N 409 | A G C G C C T G G T G G T C C T A C A C C T G G A G G A A G G T A T G T G G G G C C C A G C C C C A A G C T T G G C A C S 410 | C T G C C A T A G T C A G A C T T C A G A C T T T C A A G A T T A T T C T A A A T C A C C A G A A A A T T A A T T T C A N 411 | A T C G T G G A G A T G C A G C T G A G G C A G C A C A A G G T G G G G A C T C T A C G T G G A C G G C C T C C C C T C S 412 | A T T G G T G G C A G A A G A G G A A G A T T T C T G A A G A G T G C A G C T G C C T G A A C C G A G C C C T G C C G A N 413 | G A A A A G G A G G G A G C T G C T C T C A G G C T G C G T G T A A G T G A T G G C G G T G G G C G T G T G G A G G A G S 414 | G T G G C C T G A C C C G G A C T C C T C T G C T C T C A G C C C T C A G T C T G C A G G G C T C C A T A A T G A C A G S 415 | G A A A T G C A A G A A A C G G G T C A C C A T C C T G G T G G A G G G A G G A G A G A T T G A G C T G T T T G A C G G N 416 | G T T C T A G A A G C C T G G G C G A A G T C C G G C T A A T T G T G G A C T T G G G G A A A A T A A G G C C C A A C C N 417 | C T G C T A C T T C T A G T T T C A G C T G G C A T G C G G A C T G A A G A T C T C C C A A A G G C T G T G G T G T T C N 418 | A G G G C A T G G T G A A A A A G G A A A T A T C T T C C G T T C A A A A C T G G A A A T A A G C T T T C T G A G A A A N 419 | G C A T T T T T T T G A G G C A A T G A T G T C A A G A A A T A A A G A A T T T A A A T T T T A G G A C T T T A C A G C N 420 | A G A A A T A A T C A G A A A A G C A A C A T C A C A G T C C T T G G T T A T C T T G G A T G A A C T A G G A A G A G G N 421 | A C C C T G G C C T T C C T G C T G C A C A A T C C T C A G G T G T G C T T C C C C C T C A T T G A T C C T A G A C C C S 422 | G G A G C C T G A C G C T G C C C G C T C T C T C C G C A G C T G G C C T T C T G G T C C A A G C A C G T C G G T G A G S 423 | C T A A T T T A A A A A T T T T A A A A T T T A A T T C C A T T A G A A A A C A A A A C T G A C T T T T A A G A A C A A N 424 | C C C C T C T T T C A A T G T C A G C C T T C A G G C A A G T G G G G A A A G A G C A T T G C T T G G C T C C A T T G C N 425 | C A G T T C A T A G G C T A T C C C A T C A C C C T T T A T G T G A G T A T G G A C T T T T A A A T C T T T T A C A C T S 426 | C A G C T T T C T C T A A T A A G A A A G C T C T T T C C T G C C A C C G T A T A G G T C A C C T T C T T A T T G G T A N 427 | G G A G A C A A A G T C A G T C T C A C T C C T T C G C C C A G G G T G G A G T G C A G T G G C G C G A T C T C G G C T N 428 | C G G G A G A T C T T C A A G G A C G C G G A G A G G A C G G T G A G C C C A G C C T C G G G G C G C C C C G C G C G G S 429 | A A A T A T T T T C A T C C C T A T T T A T T T C T A C A G T G C T A C A A T G A A A A A G A A G G G T G A G A A G A G S 430 | C C A A G A A C C T C A T C C T C T T C C T G G G C G A T G G T G A G T G A G C A A G G C C T G T C C A G C C C C G T A S 431 | C A C G A T C T T T C T C A G A G A G T A C C A G A C C C G G T G A G A G C C C C C A T T C C A A T G C A C C C C C G A S 432 | A T T A C A G G C G T G A G C C A C T G C G C C T G G C C T C C A T C C T C A T C C T G A A G A T G C A A G A A C T T C N 433 | C T G G T G C G G T A C C A G T G C A C A G A G G G G T T T G T C C A G C G C C A C A T G C C C A C C A T C C G G T G C N 434 | C G T C A A T C A A G T C T A C A C T G T T C A A A T A A G G T A A G C T G G G T A C A G A A A A A G A A A A T T A A G S 435 | C C A C A C C C C T T C C T C A A G T G C A G A G C C C A G C C T T G C C A T G G A C C C A C A G C G G C C C C T G G T N 436 | A C G C A G G G G G C C T C T G T A A G G C A C A T G G A G G T G A G T T A G G T G T G G T C A G A G G A A G A C A T A S 437 | C T G T C T C T A C T A A A A G T A C A A A A A T T A G C C G G G C A T G G T G G T G G G C G C C T G T A A T C C C A G N 438 | C C A G C T C C A C T C C G C T G T C G C C C A C C C G C A T C A C C C G G C T G C A G G A G A A G G A G G A C C T G C N 439 | T C C C T C T G T T G C C C T C T G G T T T C T C C C C A G G C T C C C G G A C G T C C C T G C T C C T G G C T T T T G S 440 | G A T C A A A A C T T T G C T T C T A T A A A G G A A A A G A C G G T A A C T G G A A T T T C G C G A A C T T T A G A G N 441 | A G G G T C C C T C A C C T T C C C C C C T T T T C C C A G A G C C A T C T T C C C A G C C C A C C A T C C C C A T C G S 442 | C C T C C C G C T T T G T G T G C C C C G C T C C A G C A G C C T C C C G C G A C G A T G C C C C T C A A C G T T A C C S 443 | T G A A A T A T T A T A T A T A A T A T A G A A T C A A G A G G C C T G T C C A A A A G T C C T C C C A A A G T A T T A N 444 | T C T A C A C A G A T G T C T T A G A G T T T G A C G A T G G T A A G A G G C C T C C A G T C T C C T A C C C C C A G G S 445 | C T C T A A T C A G C C C T C T G G C C C A G G C A G T C A G T A A G T G T C T C C A A A C C T C T T T C C T A A T T C S 446 | A A T G A C T G G G A A G A C C A C T T G G C A G T C A A G G T G T G A G A A G C C T T T G C A T G T T G G C T C A A C S 447 | C G C T C A G C C C G C T C C T T T C A C C C T C T G C A G G A G A G C C T C G T G G C A G G C C A G T G G A G G G A C S 448 | A C T T C A T C T C T G A G A T C A G C A G T A C C A G G A T C C C A C C A C A G A A G A G G A A G A G G A T T T C A G N 449 | A G C G G G A G A A T G G G A C C G T C T C C A G A T A C G G T G A G G G C C A G C C C T C A G G C A G G A G G G T T C S 450 | T C C C T C C A T T G C C C T C C G G T T T C T C C C C A G G C T C C C G G A C G T C C C T G C T C C T G G C T T T T G S 451 | C C A T C T A C A G G C T C C A C C T T G G G C T G C A A G G T G A G A G G C T G A T C T C G C T C T G G C C C T C A C S 452 | G T A T C G G G G A A G T C C T G G C C A A G T G G G A G A T C T T C C T C T T C C T G G C C A T C C T G C T A C A G C N 453 | T G T G T C T G T G T G T G T G T G G G T C T G T G T G T G T G T G T G T G T G T G G T A G C T G T C T G T T T G A A A N 454 | C A C T G A G G G G C C T T C T G A C A T G A G T C T G C C T G G C C C C A C C T C C T A G T T C C T C A T A A T A A A N 455 | G C G C A C A C C C T C C C G C T C T T T C G G C T G C A G G A C G C T G A T G G A C G A G A C C A T G A A G G A G T T S 456 | T G A C C A G G T C T T G T T T T T G T T C T A C C C C A G G G A G C G A C A G T G C C C A G G G G T C T G A G T C T C S 457 | G T G C G A A A A G A T C T G C A A A A T T T T C T C A A G G T A G G G C T G G A C T C T G G C A G G T C T G A C C C A S 458 | A A G C T G C A T G T G G A T C C T G A G A A C T T C A A G G T G A G T C C A G G A G A T G T T T C A G C C C T G T T G S 459 | G C T G A C T C C A C A A C C A G G A G T C T T T T C A C T A T A T A A T T T C A A G A A T T C T A T A G A A G T A G A N 460 | G C C A T G G A G G A T G A A C C A T C C A A C G A C A A T G T G A G C C C A C A C G C C T G A C C C G G G A A C A G C S 461 | G G A G A T G A T C T C T C A A C T T T A A C T G G A A A G G T A T G T A T C T T G A A A G G G A A G A A A A A A A A G S 462 | T C C C C G C G T T C C A T G C G C A C A G G G A C A G G G G T G A G T C C G C G T C C C T G G C A C G G A G C G G G G S 463 | A G G G G G C T G A A T C C T C A G T T T A A G G G C C T G G A C C T G G G A G C T T A T G G A A G A G C A A G G G G C N 464 | G T G T G T A T G T G G G T A G A T G G A T G T T T G A G G G A G T A T G C A T G T G G G T T C A T G C A T A G A T C T S 465 | T G C T G A C C T T G G C C G T G C T C T T C C T G A C G G G T A G G T G T C C C C T A A C C T A G G A G C C A A C C A S 466 | C T T C T C T C C G G T C T G T C T C C G G T A T C A C A G G A A A A G C A C A G T G C A G A G C G C T T G T A C A C A S 467 | A C T T A G C A T T T C A T A A C A G G C G C A G G T T C A C T G A G A A G A T A T T T A A A C T C T G A A A C G A G G N 468 | A A C C T A G A G G C C A A G G T T C A A G T T T A C C C A T C T C C A G T A G C C T A G C A A T A T T T G C A A C A T N 469 | A A T C C T T T C T T T C A G C T G G A G T G T C C T C A G G A G C C A G C C C C A C C C T T A G A A A A G A T G T T T N 470 | T T C C A A C C A A G C T C A T T T C C T T T G T T T C A G C A A A C C T C G G A C A G C C A A C A A T T C A G A G T T S 471 | T A A T A G A A G A A C A T C C A A G G A G A A A C A G A G A C A G G C C C A A G A G A T G A A G A G T G A G A G G G C N 472 | C A G G C T G G G G C A G C A T C G A A C C A G A G G A G T G T A C G C C T G G G C C A G A T G G T G T A G C T G G G A S 473 | T T A C T T G C A C T T G T A A A C C A G G T T G G C A A G G A G A A A A G T G T G A A T T T G A C A T A A A T G A A T N 474 | G C C G C A G C G G C G C A G C T G A T A G C A G G A A C A G G A T C A A G T C T T C C A T A C C A G G G A C C A G G C N 475 | G C G A C C T C T A C C T C A A T G A C T A C T G C C A C C G C G G C T T G C G G C T G C C C A C C A G C A A C A A C G N 476 | A C C A G C G C C A A A T G T T C A T C C T C A T T G C C T C C T G T T C T G C C C A C G A T C C C C T C C C C C A A G N 477 | T C A T C C T C C T C C C T G A G T C C T C T C T G C A G G G G C C A C A C C A G G A G C C T A G C T C C T T G T C C T S 478 | C A G C T C A G C A G C C G C C G C C A G A G C A G G A C G A A C C G C C A A T C G C A A G G C A C C T C T G A G A A C N 479 | C T T A A G G A T T C A C A C G T A T T T T T G T T T C A G G G C T A C C A T A T T T T T T G C C C A G T T T G T T C A S 480 | C A T G G C A C T G A C T A G G C C C T C T G C T G C C A G C T C C A A G C C C A G C C C T C A G C C A T G G C A T G C S 481 | A T G G T C T C A A A G G A G A C C T G G G C C C T C C A G G T A C T G T G C T G C A G A C C C C A C C C T C A G C T G S 482 | T A A A T A A T T T T A A G A A A G C T G G T T C A C A A G G T G C C A C A T T T G A T G A A A G C A A A A T A C A G T N 483 | T C A A G G C C A A C A A C A A T G A C A G C G G G G A G T A C A C G T G C C A G A C T G G C C A G A C C A G C C T C A N 484 | C C A T C T A C A G G C T C C A C C T T G G G C T G C A A G G T G A G A G G C T G A T C T C G C T C T G G C C C T C A C S 485 | C C T C C T C A C T T T T G C C C C T G T C A C C T T T A G G A A G T A A A A G G C C T A C A A G C C C A G A T T G C C S 486 | G T T C A G A G G T A A G A G G G A A G G C T T G A G A G G A C C T G G T T C A T C T G G C C T T T C T T C G G A T G A N 487 | G C G G G C C C T G C C G G A C T T T A G T G C T T T A G G G G T T A A T T T C G G G C T G A C A G G G A C G G A G C C N 488 | C T G G C T C C C C T C C T G C C T C G A G A A G G G C A G G G C T T C T C A G A G G C T T G G C G G G A A A A A G A A N 489 | G G G A A G A A A T G A A A A C A A G A T G G G C T A T T A A G T G C A G A G A C A A A A A C T C C C C A A C A G G T G N 490 | A A C T G T C A G G G G A A G A C C T A C C T C T T C A A G G T G C C A G G G G C T G T G G G C C A G G G T A G A A A G S 491 | A T T T A T G G T A A A A C A T T A T T C A C C A T C T T C T G T A T T T C T T T C T A A G G G T G C T C G T G G T T T N 492 | A G C C G T T C G G C T T C T G G G C T C T G T C C A C A G G G A T G C T G C C T G A C C C C A A G A A C G T G C A C A S 493 | A T T T T G T T T T G C T T T T T C T G A C T C C A G T G G G G C A A G A T T T T C C T T T T T T A T A C A C A T A A T N 494 | A C C G G G A C A G G A T T T G A C T G T G C T A G C A A G G T A A G C G A T A G C A G C A G G C C T C A A A A G C G T S 495 | G C C C T G G C A C C C A G C A C A A T G A A G A T C A A G G T G G G T G T C T T T C C T G C C T G A G C T G A C C T G S 496 | G A C C C C A G G G T C C C C C A C A C C T C G T G G C A G G T A G G A G C T G C T G A C T G C C C T G C T T G C C T C S 497 | G G T G T C A G G T G G G A G T A C T G C A A C C T G A C G C A A T G C T C A G A C G C A G A A G G G A C T G C C G T C N 498 | G C C A G A G A C C A G G A T T T G G C T A C G G A G G C A G A G C G T C C G A C T A T A A A T C G G C T C A C A A G G N 499 | A G C C T G G G C T G A C C C C A C G T C T G G C C A C A G G C C C G C G T G C T G C C C C G G A A G T C T A T G C G T S 500 | G G C A A T G A C G C T T T C T C T T C C T C C C C A C A G A G G A T G T C A C C C C C A T C C C C T C T G A C A G C A S 501 | G A T G G A C C T T G G A A A A A G G C A C C A T G G A T G G G C C T T G G T T C T G T T C T A A A G C T T C T C T T C N 502 | C T G T C C C T T G G G C T G T T T T C C T A C C C T C A G G C T G C T G G T G G T C T A C C C T T G G A C C C A G A G S 503 | A G C C C C G G C T G G G T A C G C A C C C G C T G G G C A C T G C T G C T G C T C T T C T G G C T C G G C T G G C T C N 504 | G C T C T C A A C T C C T C C A A T T G C G G G T T C C A G G C C A T C C G C G G A A C T C G A G G A G T C G C C A C C S 505 | C C A A G A A C C T C A T C A T C T T C C T G G G T G A C G G T G A G T G A G C C A G G C C T T C C A G C C C C G C A G S 506 | C T G A G G A T T C C T G T T C C T G T A C A T A A A A A T G T A A G T T A A A T T A T G A T T C A G T A A A A T G A T S 507 | T A A A A C C A T A T G A T G G T T C T C C C C T T T C A G T G T C T C C T T T C T A T T C G G A G C G G C T G A A G G S 508 | T G T T T C T T G T C C T C C T G G T A T T G G G A T T T G G T G A G T G T G G G C T T C C G G G G A G G G A A G C C T S 509 | A T G G C C C T C A C G G C C T T T G T T C T C A T C T C G C T G C A G G A G G C T A A A G A T A T T T G C G A G G A G N 510 | G G C T C T C A G C C C T T C T G T T T T C A A G G C C A T C A T G G A G A A A C T G G A G A T G T C C A A G T T C C A N 511 | G A A C A G T C T T C A G G G T G T T T A G A A A A C C A G G T G A G T G A A T A A T T T T A A A A A A G C A T T G T G S 512 | T G T G T G C C A A C C C A G A G A A G A A A T G G G T T C G G G A G T A C A T C A A C T C T T T G G A G A T G A G C T N 513 | A G C A G A A C G C T A A G T A T G T A A C G A A C C C T T C A T G A A G C A G T T G A T A G G T A G G C T T A A A C A N 514 | G C C A A G T G G A G C A C C C A A G C C T G A C G A G C C C T C T C A C A G T G G A A T G G A G A G C A C G G T C T G N 515 | G G C A C C G A G C A G T T C G C G T C C A T G C G G G A C C T G T A C A T C A A G A A C G G C C A G G G C T T C A T C N 516 | A A A C T G A G C T G A T G A T A A T T A T T A T T C T A G G C C A C A G A A C T G A A A C A T C T T C A G T G T C T A S 517 | G T G G T A A C T A T T G T A C T G A C A A G A A A C C T G C T G C T A T A A A T T G G A T A G A G G G A A G A G G A A N 518 | A T C C T G T G T T G A C A T T C A T C T G A A T C C T A G A G T T C A A G G A G G C C T T C C A G C T G T T T G A C C S 519 | G A G G G G G A T G G G G G G G C A G A T C T T G G A C T C A T G A G G A G G G G C C C C C C T G C C C A G A G G G G T N 520 | C A G C T G T C T C T G T C C C C A C C C A C C C T C C A G A G A A G C C A A G G C A T G C G T G G T G C A C G G C T C S 521 | C C A G T C A C C A C A G G A C C C C T T G T C C C A C A G G T C T C T T T G A G C C T G G A G A C A T G A A A T A C G S 522 | T C A G A C C T G G G C G G G A T G C C C A A T A C G A G C C A G G T G C C A G G G T T C C T G T C T G G C A C A G T C N 523 | C A T G C C T T G A A T T T C T T T T C T G C A C G A C A G G T C T G C C A G C T T A C A T T T A C C C A A A C T G T C S 524 | A A A A G C A T T C T A A G G C T G T T T C T C C A C C A G G T T T C C G C C C C A C C A C C T G A C G G T G A T C T T S 525 | G C G G G A G G C C T C T C T A G G A A A T C T G T G G A C T C A C A C G T T T A C T A A T G T T G C T G C A G C C C C N 526 | G A G A A A A G C C C G T C T G T T T G C A G C C C T C T G A A C A T G A C A T C T T C G G T T T G C A G C C C T G C T N 527 | A C T C T C C T C T C C C T C T C T C C C T C C C T C C A G C A A A C C C T C A A G C T G A G G G G C A G C T C C A G T S 528 | C T G C A A A A G C T G G A A G A A G C A G A A A A A G C T G C T G G T G A G A G T G A G A G A G G T A T G A A G G T T N 529 | A A T A T C T T T C G T T G G C T T C C A G G T T A C A G A A A A A T A A T T T G T A A C A A A G T T T A A A G G T C A N 530 | A C C A T C C A G A A T G A C A T C A T G T T A T T G C A G G T A C C A C C T A C C T G G C C C T C T G G C T C C T T C S 531 | G A T G A C C A T T C A G A A A T A A A G C A A A A A G C A G G C C A C A T A C C T T A A C C A A C A C C A A A G A A A N 532 | G A A G C A A A T C C C C G G A G A G G C A A G A T G A G G T T C A G A A G G A A T G C G G C T T C C T T C C C T G G G N 533 | A C C T T T G G C T C G G G A G A G G C A G A C T G T G G G C T G C G A C C T C T G T T C G A G A A G A A G T C G C T G N 534 | A A T T A A A A G T T A A A T A A C T T A T C C T T A C A G G T G A C C T G G T A C T G C T T C A T G T A T G T C C C C S 535 | A G G G G A C C C A G G A C G T C C C C A G T G C C G T T A G C G G C T T T C A G G G G G C C C G G A G C G C C T C G G N 536 | C A T A T A T A G A C T A C A T G C T A G T T A A G T A C A T A G A G G A T G T G T G T G T A T A G A T A T A T G T T A N 537 | G A A T A A A A G T G C A T G C C T T A T A A A A A A A A C A C T T A T G T T T C C A C T A T A C T G T A G T C T A T T N 538 | T C G C T G T G G A T C G G C T A C T C T T T T G T G A T G C A C A C C A G C G C T G G T G C A G A A G G C T C T G G C N 539 | C T C C A C T T A A G G A C C A T G G C C T G T G T C T C T T T T C C A G G G A T G G A G T C T G C T G G C A T C C A T N 540 | G C C C G G A C C C C A G A G G C C G G T C C T G T A T T T G T T C C T G G A T G G A A G G A G A A T A A G A A C G G G N 541 | C C C T C A C C C C A A T T C C C C G C T G C C T T C T A G G A T A A G T G T G A G C C A C T G G A G A A G C A G C A C S 542 | A T G G T T A T G G A G G C A T C G C T T T G C C T G A A T G T G A G T T C C C T G C C T C T G T G T T T C A T C C A T S 543 | G C C C A T A T T C T T A T G A A A A T G T G G A C T T T T G G C A A C T T C T G G T G C G A G T T T T G G A C T T C C N 544 | G G G G A A A A A G T A A A A C A A A A T G T A A A T A A G G A G T A A A G C A T G T A C T T A A A A T A T A T T C T T N 545 | C C T G G G C G G G A C G C G C C A G G C C G A C T C C C G G C G A G A G G A T G G G G C C A G A C T T G C G G T C T G N 546 | G G T C T A G T G C C C A T T T A C T C T G G A C T C C G G G T A A G T G G G C T C A C G C C T G C C T G G G G C T C T S 547 | C T A G A G G C C T G A G G A T G A G C T G G A A G G A G T G A G A G G G G A C A A A A C C C A C C T T G T T G G A G C N 548 | A A G C T T C G G G T G G A C C C G G T C A A C T T C A A G G T G A G C G G C G A G C C G G G A G C G A T C T G G G G T S 549 | G C A A A A A G A A T G C C C T G T G C A G A A G A C T A T G T G A G T C T T T A A A A A A A T A T A A T A A A T T A A S 550 | C G T G G G A G T A C T G T G A T G T G C C C T C C T G C T G T A A G G G C T G G G C C C C G G C T G C C T C C C T G C S 551 | A A G G G A T A C T T T G A T A A T T A A G G C C A G A G G C C C A T T A G T T G A G A A A G T C A C A G A T A T A T T N 552 | G C C C A G A C G C C C G C C T T C G A C A A G C C C A A A G T G A G C G C G C G C G G G G G C T C C G G G G A C G G G S 553 | A G A T C T T C C G G C C G G A C A A C T T C G T T T T C G G T G A G C C G T G G C T G G G G A C T G G G C G G C G G C S 554 | G T G T T G C T C C A G T A C G T T G A T G A C C T T T T G C T G G G A C A C C C C A C G G C A G T C G G G T G G C C A N 555 | A G G G A A A A G C C C A T G A T C T T C A A G C A G G G A A G C C C C A G T G A G T A G C T G C A T T C C T A G A A A N 556 | G G C T T A A A C T C C A A C T G G G A A C C C A A A A C A T T C A T T T G C T A A G A G T C T G G T G T T C T A C C A N 557 | G G A G A T C T G C T T G A A T G T G C T G A T G A C A G G G T A A A G A G T C G T C G A T A T G C T T T T T G G T A G S 558 | G T C C T T C T T G G T G G G G G G T C C T T C T C C T A G G A A G A A A C C T A T A T C C C A A A G G A C C A G A A G S 559 | T G C T G A T G T C C C T C T G T C G G G T T T T T G C A G C G T C G G C G T G T T C A A G A A C G G C C G C G T G G A S 560 | A A C C A T G A C C A G A A G C T A T G G G T C A A C C C A T C T G A G T T C C T A C C T G A A C G G T T T C T C A C C N 561 | C T A G A G G A A G G C A T C C A A A C G C T G A T G T G G G T G A G G G T G G C A C C A G G G A T C C C A A T C C T G S 562 | A A C T T G G C T T G G G A G T C T G C A T C C A A C A C T T T C T G G C T G T G T G A C C T T G G G C A A G T T A C T N 563 | T G G C C T C C T C T C T T C C T T G G A C C A C A C C A G G T G T G C A G C T G G C T G G C T G G C T G G C A G C G G S 564 | T G G C G A C T A C G G C G C G G A G G C C C T G G A G A G G T G A G G A C C C T C C T G T C C C T G C T C C A G T C C S 565 | T G T T T G A T T T T C T A G G T A A T C A A A T G C A A A G C A G C T G T G C T A T G G G A G G T A A A G A A A C C C N 566 | G T A T G G G C C A G G A G A A A G G G G A A T T C A G A G G T G A G T G G C T C T G C C A G C C A T T T G C C T G G G S 567 | A A T G T A C A G A G C T A A A T A G T A A G A G C C T A A C T C A T C T T T C C T C C C T T C T A T C C T T C T C A A N 568 | G G G G A C A T G G C C C T C T C A G A A C A A G G A A G C T T C A G C T T T G G C A A G G C T C T C C C T C C T T C A N 569 | C C C C A A G T G T G G T T A T G C C T C C T C G A T T G C T C C G T A C T C T A A C A T C T A G C T G G C T T C C C T N 570 | A A C T C T G G G A A C A A G G T T A A A G T G A G C C C G C C A A A A T A G A A G C T T T C C G A G C T T C A C T T T N 571 | C T G T C A A A C T G C T C T T G T T C A A T C T C A C A G G C T C C T G G T T G T C T A C C C A T G G A C C C A G A G S 572 | C T G C T G T G A T A T G G A G G A A G A A G A G C T C A G G T G G G G A A G G G A G A A G G G T G G G G T C T G A G T S 573 | G C G G C C T G A C G C T G A G T A C T G G A A C A G C C A G A A G G A C C T C C T G G A G C G G A G G C G G G C C G A N 574 | G G A C G A A G T T G G T G G T G A G G C C C T G G G C A G G T T G G T A T C A A G G T T A C A A G G C A G G T T T A A S 575 | C C C C T C T A A C C A T C T G T G C T T T C T C C C C A G G G A C T T G T A C A G C A A A A G C A C A G C A G C C A T S 576 | C T A C A G C A G C A G C T T C T G C A C A C A T G A C C G G T C A G T C C C T G C C C C C T G C A G T C C T G T C C A S 577 | G G T G G G C C A A G C A A G G C T G T G C A C A T T T T G T C T G C T C C T C G G C G G G C A A C G C A G G C A T G G N 578 | T G T C T T G G G G C A T G T G C A G A G G G G T G G G A C G C C A T C A G C C T T T G A C A G A A T T C T G G G C A G N 579 | T C T G A G G A T G T G A A A C T T A A A T T T G A A G A G G T A A T T T A G A A C A A A A C T G T A A T A C T C A G T S 580 | C A G G A C A A A C C C T A C A A A T G T A C T G A C T G T G G G A A G T C G T T T A A C C A T A A C G C A C A C C T C N 581 | T C A A T T C T A T T C C A T T C G A T T T A G T T C G A T T C T A T T C A C T T C C A T T C C A T T C G A T T C C A G N 582 | C C A A A G G C G G G A A G A T G G T G G G C A G C C C A G A C A C C G T T G G G A T G A A C T A C G G C A G C T A C A N 583 | T A G A T G G T C C T G A G G A A A A G T T T A T A G C T T G T C T A T T T C T C T C T C T A A C A T A G T T G T C A G N 584 | G A C T C C A A T G A A T T T T C T G T A A T T G C T G A C C C A A G A G G A A A C A C T C T A G G A C G G G G A A C G N 585 | T T C G A G G C T C T G G A G T C C T C C A C G G C T A C G G T G A G C C T G G G C T G C T C G G A C G G T G C C G G G S 586 | G G C A T G G A G A A G A T C T G G C A C C A C A C C C T A C A A C A A G C T G C A T G T G T T C C T G G A G G A G C A N 587 | A C C A G T C T T G T C A C A G A G G A A A T A G T G A A T G T C C A T A A A G A G A T T C A A A T G T C G G T T G A A N 588 | T G G G T T G A T A T T G C T C T T G A A T G T G A G C G A T A T T T A G C T C C G A A G G G A T T T G G A G G G G T T N 589 | C A C G T C C C C G A G G C T G C C T G T C T T C T A G T T G A C T T T G C A C C T G T C T T C A G G C T G C C A G G G N 590 | T G C T G A C C T T G G C C G T G C T C T T C C T G A C G G G T A G G T G T C C C C T A A C C T A G G G A G C C A A C C S 591 | A C C T T T G G C T T C T C A G C A G G C A G C T G A C T G A G G A G A C T T G G G G T C T T C C T G G C T C A C T A T N 592 | A G A A A C T A T A A A A T C T C T G T A G A T A T T T T C T C A T A A A T A A T C T T A T G A A C C A T T A T G G T G N 593 | G A A G A T G A A A T T A A C C G C C G C A C A G C T G C T G A G A A T G A G T T T G T G G T C C T G A A G A A G G A T N 594 | G A G A A A A G A C A G C T C C G A T G C C G G C A G C A G C A G T T G C A G A A A A G A A T T G C A T A C C T C A G T N 595 | T T G C T C T G G T G A A T T A C A T C T T C T T T A A A G G T A A G G T T G C T C A A C C A G C C T G A G C T G T T T S 596 | G A T C G T A T C T T C C T T C T C A C C A T G C A T T T G T T G A C A G T A T T T T T G A G G C A G T G G C T C C A A N 597 | A A G C A A A G T G A T A T A T A T T T G T T T A T G A A A T G T T A C A T G T A G A A A A A T A C T G A T T T T A A A N 598 | A C A C T G C C A T G T T C T G C C C T G T C C T C A C A G G G A G C T C A G C C C G C T G G A C T T A T A A T G C C A S 599 | C C A G C C T C T C T C A T G C T T T T G G C C A G A C A G G T A A G G G C C A C C C C A G G C T A T G G G A G A G T T S 600 | T A A A A G A C G G A A G A T A C A A T A A T A C T T T C C T T A C A G G G T T C T G A G A C T A C T A A G A G A A C T N 601 | C C C A G G A G G G G T G G A C C C A C A G C C C A G G G A G G C C G A A A G C G C G G G C G G G C A G G C A G A G G C N 602 | A C C T C G G G C C T G C G G C A T G T G C A G C T G G C C T T C T T C C C T C C C G G C A C C G T G C A T C C G C T G N 603 | A T T G T T T A A T A G A A G C A G T T C T T C A G T T G C A A A G G T T C T T T G C A G T A G A A T T T T C T C A G C N 604 | G T C A A A A T T A A G C A T A A C T G G C G G A A A C C C A G A G G T C T T A A C A G T A G G G T T C G T A G A A G G N 605 | C A C T A C T G C T A C C C T C A T T T C A C C T G C G C T G T G G A C A C T G A G A A C A T C C G C C G T G T G T T C N 606 | A C T C A C C T T G T C C C C C T T C T C C A T T A T C A G G T T G G C C C A T A A A G C C C T C T G C A C T G A G A A S 607 | C T T C G G G A G T T G A T C T C T A A T G C T T C T G A T G T A G G T G C T C T G G T T T C C A C A T T T G G C A T G S 608 | A A T G T C G G T C T T T T T T T G T G A T T A T T C T A G T T A T C T C C A G A A G A A G A A G A G A A A A G G A G A S 609 | C C G T C T T C C C A G C C C A C C A T C C C C A T C G T G G G C A T C A A T G C T G G C C T G G T T C T C T T T G C A N 610 | T G A G C C C T G C T C C C C T T T T C T C T C C C G C A G A G A A G C T G A A G A A A A C C A A G A T C A T C T T T G S 611 | T A A G G G A A C T C A G A G G C T G G C G G G A T C C T C C A T A T G C T G A A C G C T G G T T C C C C G G G T C C C N 612 | C C T G G T C C C C C T G G C C C T G C T G G C G A G A A A G G A T C C C C T G G T G C T G A T G G T C C T G C T G G T N 613 | C T T T A A A A A A T T A A C A T T T T T C T T T T A T A G G G A T C T G A A A C A A C A T T C A T G T G T G A A T A T S 614 | G C C T C T A T T C G T G C G T C T C C G T C T C C G C A G G T C A G C T C C G C C G A A G G C G C C G C C A A G G A A S 615 | G T G G G C T G A A G C C T G G C T C T G T C C C T G C A G G G T G C C T G G T A T G T G T G G A A C C G C A C T G A G S 616 | T C C T C T C A C C A C T T T T C T T G G T C T C A C C A G G G T G C T G C C G C C C C A G G T A C A G A C C G A G A T S 617 | A T T T T T C A G A T A C A A T G T G A A G A C A T T G A A G A T A T G T G G T C C T C C T G A A A G T C A C T G G C T N 618 | T C A A C C A T T A A T G T C T C T T C C T T C C T G C A G C G T T T G T G A G G C A T A T C C T G T C G G G G T G A C S 619 | C G G A G G C G C T G T T C C A G C C T T C C T T C C T G G G T A G G T G T T G T G A G C T A A A G G T T T C T A C T C S 620 | G C G G G C C G G A G C A C G G G C A C C C A G C A T G G G G G T A C T G C T C A C A C A G A G G A C G C T G C T C A G N 621 | C G G C A T C G A C T T G G G G A C C A C C T A C T C C T G G T A A G T G G G G T T G C G G A T G A G G G G G A C G G G S 622 | G A C A A C A G C C T C A A G A T C A T C A G C A A T G C C T C C T G C A C C A C C A A C T G C T T A G C A C C C C T G N 623 | G C C C A G C T A A T T T T T G A A A A C A G G G T C T T T C T A T G T T G C C C A G G C T G G T C T C A A A C T C C T N 624 | G G A G C T G G A G G A C A C A G C A C A T T G G A A A T C A A G A A G A A A A T A A A A G T A A A A A C A G G A A T T N 625 | C T G T C T C A C C T G T G C C T T C T C C C T A C T G A A C A C A C G C A C G G G A T G G G C C T G G G G G G A C C C N 626 | T T C T T T C A A T T C C A T C T C T T A G T T T T C C A T G T A A G T A T T C A G T T T A C A T T T A T G T T G C A G S 627 | C T A A G G A C T T G G G C A G T G G T G G C T G T G G C C A T T T T T T C T C T C C C T C A G A G G T G G T T T T G A N 628 | T C C C G C C T T C T G G G C A T C T G C C T G A C A T C C A C G G T G C A G C T G G T G A C A C A G C T T A T G C C C N 629 | A A C A T T C C T T A T T T T T T G C C T C T A G A T A C C G A C T C A T T G G T T C C T C G T C T G C C A C A T G C A N 630 | C T A G G C T C C A G A T A G C C A T A G A A G A A C C A A A C A C T T T C T G C G T G T G T G A G A A T A A T C A G A N 631 | T G G C T C G C T T T T G G T G T C C A T T T G T G T G G A A T G T C A T T T T C C A C C C C T T T A C C T T A A G T T N 632 | A G A G A A A G T T G G T G G C G A G G C C T T G G G C A G G T T G G T A T C G G G G T T A T A G G G C A G G C T T A A S 633 | T T T T C T G G T A T C T T T G A G G A C A G A A G C T A C A T C T G C A A A A C C A C C A T T G G G G A C A G G G A G N 634 | G A T C C C C C T G A C C C A G C A C C C C C T C C G C A G G T G C C G T G C C C C T C A T C C A G T C T C G G A T T G S 635 | T C T C C G T G A T G A A C C G G G C C A A G C A A G C A G G T G A G C T G G G G C C C G C T G T G G G G T C A G G A C S 636 | A A G T A G T A A A A T A T T T T T T T C C C T C T C T A G C T T T A C A A T A A T T T T C C C A G C T T T C T A C A C S 637 | C C G G G T G A A C G T G G A G A G A C T G G G C C C C C C G G A C C A G C G G G A T T T G C T G G G C C T C C T G G T N 638 | A A G G C G A C A C T A A G G A T T G G C C A A G A T G G A A T A T C T A C C A G T G C A A C G A C C A A C T T G A A G N 639 | C C C T C C T A C C G G A A C T G A A C C C T C C A A C A G G G A G C C C C G A G T A T C G G C A G C A G T C A G C A G S 640 | T G C T G T G T T G C T G T C T T T T T C C C T C T C T C T C A C G A A A G G T T T C C C A G G G C A G C C A G G G G C N 641 | C A A A A A G A G C T T T C C T T C T C C A G G A A T A C T G A A C A T G G G A G C T C T T G A A A T A T G T A G T A T N 642 | C C T G C T C T C A C A C C A T C T C C T C C A C T C T A G G G C A G T A C T G C T A T G A A C T G G A C G A A A A G G S 643 | T T G C A G C A A A G A T C T T G G A A A T G C T T G A A G A C C A C A A G G C T G A A A A G T G C T T T G C T G G A G N 644 | T G C T T G C C C C T G C T G C C T C T T C C C T C C C A G G T A T G G G A A A A G A C A C A A A G A G G A C A C G C T S 645 | G G G T C C C A C C A A T C T T G T T T G C T T C T G C A G A G C C T C A G C C T G C C T G G A A G A T G C C G A G A T S 646 | C A C C C C C G G T A T T A A A A C G A A C G G G G C G G A A A G A A G C C C T C A G T C G C C G G C C G G G A G G C G N 647 | T T A T T T T A T A T A T T T A T T T T A T T T T T T T A G A G A G T C T C A C T C T G T C G C C C A A G C T G G A G T N 648 | C T G G A T G A G C T G A A G A A G G A G G T G G C A A T G G T G A G G G A A C T G C T G G G C C A T G G A G G A G G G S 649 | G G A G G G G G T C C T G C A G G G C G G G G G G C T G G G A A G G T G G G G A G A G G C T G C C G A G A G C C A C C C N 650 | A C T T C T G C T C A G G T G G A A T T G A A C A A G A A G C G G G A G G C T G A G T T T C A G A A A C T G C G C A G G N 651 | C G C A A C T T G G G C C T G G G C A A G A A G T C G C T G G A G C A G T G G G T G A C C G A G G A G G C C G C C T G C N 652 | A A G G A C C A G T G A A G A C G T C A G G G G C A A G G T C T C G G G G G T C C G G A A G G G T G A T C A T C G A C C N 653 | A T G C C T T G A T A T A G G T A T A G G C A T A T T T A A G T T T A T T A T G A A T T T T G C T G A T A T A G G A T G N 654 | A C C C T T G C C T C C C A C C C T C C G C G C G G C C A G G T G C C A G A G G A C G A G C T G G T G T C C A C G C T G S 655 | C C C C A G C T T A C C G C C C A G T A C G C A G A C T C T G A A G C T T A T T G A G A C T C A C C T G A G A A C T A T N 656 | T A C A A T G C T C A T A A C A T A A A A A A T C A A A C A T A A C A T C A A A T A A A A T A T A T G A T T C T G A T A N 657 | A T C G C A T A T C C T G A C C T C T A T C A C C C T C A G G A A A G T C A G T G G G A G T G G T A A C C A C C A C A C S 658 | C G C T T G G G A C T G C A G C G T G G A G A A C G G C G G C T G C G A G C A C G C G T G C A A T G C G A T C C C T G G N 659 | C A C G C T C T T T A C T C C A T G T G T G G G A C A T T C A T T G C G G A A T A A C A T C G G A G G A G A A G T T T C N 660 | G G T C A G C A C G T C C C C C C T T C C C T C C C G C A G G G A G C G G A C A T G G A C T A C G A C T C G T A C C A G S 661 | C T T C A C T G C T A C A G A G C C A T A G T A C C A G C C A G G A G A A A A C T T C T A A T T C A A G T A G C C T A T N 662 | T G G A C G A C A C G C A G T T C G T G C G G T T C G A C A G C G A C G C C G C G A G T C C A A G A G G G G A G C C G C N 663 | C G C T C C A A C T C T A C C G C T G C T A C C A A T G A G G T T C C T G A G G T C A C A G T G T T T T C C A A G T C T N 664 | G C T A C C C C C T G C G A G G G A G C T C C A T C T T C G G T A G G C C T G G G G A G A G T G G C A G G T G C T G C T S 665 | G A C A T C A C A G T A A C C A T G T C T T T T T T C T A G G A T C T G T T C C G G T C T A C T A T G A A G C C C G T C S 666 | G A T G T T A A C C A T T C T C C T T C T C C C C A A C A G T T C C C C A G G G A C C T C T C T C T A A T C A G C C C T S 667 | T A T T T T A T T T A A T T G T T C T A T T T T A T C A T T A G T T A T T G T T T G T T A A T C T C T T G C T G T A C C N 668 | A C T G A G C T T T C T C A C C C T G G T T G A T G G C A G A T T T T A T C C G T G G T G T G G T T G A C T C T G A G G S 669 | A G C C A G G G C A C T C A C C A G G C T G C A A G A A C A G T G C T G G G G T A A G A G G G G A G C G G G G G A T C C S 670 | G G G A G G A G G A G G T T G C A G T G A G C C G A G A T G G T G C C A C T G C A C T C C A G C C T G G C A A C A G A G N 671 | C C A T T C T C C T G C C T C A G C C T C C C G A G T A G C T G G G A T T A C G G G C G C C C G C C A C C A C G C C C G N 672 | A G C C C A G C T G T T T G T C T C C C A T T T T C T C T A C T T C T A A A A T A C A T T T C T T C A C T A A G T G A G N 673 | G G A G C T C C A G A A A C A G C A G T C T T A G G C G C T G A G C T C A G C G C G G T G G G T G A G A A C G G C G G G N 674 | T G C C C T G A A G T G C A C T C A C C C T C C T A C C A G G G A G C C C C G A T T A C C A G C A G C A G G C G G C G G S 675 | G C T G C C T C T C T G T G T C C C T T A C C T C C T C A G G G A C A G C C C T G C C A T C A A G G A A G A C A G C A C S 676 | C A A T T T G C C C C A G A A T C T T A C C G G C T A C A T C T G G T A C A A A G G G C A A A T G A G G G A C C T C T A N 677 | T C C A A T G C C A A T C C A C A T G C C A A T G C A C C T G T G T G A A A T A G G C A C T G A A A G G T G A A C T C T N 678 | T C T T A A T A A C A A T G C A T T A T A C T T T C T T A G A A T T A C A A G A A T C C C A A A C T C A C C A G G A T G S 679 | A G G T G T G C A T C A C C T T T G A C C A G G C T G A C C T G A C C A T C A A G C T G C C A G A C G G A C A T G A A T N 680 | C A G C A T C T C C C C C C T C T G G C C T T C C T G C A G G T G G T C G C A T C G A C C A T G G T C A T C A T G A A A S 681 | G G G C T C T C C T G A C A C A C T C T C C C C C T G C A G A G G T C C A G G G G A C C C A A C A G C C C C A G C A A G S 682 | C C C T C C A T C G T G G G G C G C C C C A G G C A C C A G G T A G G G G A G C T G G C T G G G T G G G G C A G C C C C S 683 | T T T T T T G A C A G G G C C T G G C T C T G T T A C T C A G G C T G G A G T C A G T G G C A T G A T C T C T G C T C A N 684 | C G C C G C C G C C G G G G C G G C C C C C G C G C T C T A C C C T G C A C T C G G C C T C A A C G G G C T C C C G C A N 685 | A G G A T T T T A G T T C T G A G C A G G G C A G G A C T G C G C C C C A G G A C C A G A A A G C C A G T A T C C A G A N 686 | G T G A G T C A C C T G T G A C C A C T C T T G T T T C A G G A T G A T A A G G A T G C C T T C T A T G T G G C A G A C S 687 | A G G A A G G G G T G A C A G T G C T T A T A A A C G A A G A C A A A G A G T T G G C T G A G C T G C G A G G T C T G G N 688 | C G T G A G T G A T C C T G T C C T C T C C A C C C G C A G G G C C G C G T T C G C A C C A A A A C C G T G A A G A A G S 689 | C C A A G A A C C T C A T C A T C T T C C T G G G C G A T G G T G A G T G A G C C A G G C C T T C C A G C C C T G C A G S 690 | T C A T G A C A A T G A A G A G A C A T T T T T G A A A A A G T A A G T A A T C A G A T G T T T A T A G T T C A A A A T S 691 | T A C C A T G T A C T C T G C C T T A T C T C C C C C C A G A G T T T G A T G C T C C A A G C C C T C T C C C A G A G A S 692 | A G G G A G G T G T C T G A T T G G T C C A G C T T A G T C C A T G T C C C T A C C C T G A A C A G G G G C A T G G G G N 693 | T T C A G A A G C T G T T C T G G A T T T C T T A C A G T G G T G A G T G G A T G A T C A C C A C C A G T C C T G C C T S 694 | A A A A G A G C T G A T G A A A C A A T G G C A A G A C C T C C A A G G T G A A A T T G A A G C T C A C A C A G A T G T N 695 | C A G G A C T T A G T T A A A A A A T G C T T C T T T C A G G T T T C T C G T T G C T T A C A C A A A G A A A G C C C C S 696 | C A C A T A C C T G C A T C A T T T T C A T C C T T T C T T T C A G C A A C T G A C C C T C T C A A C T G G A T G T A G N 697 | C A G G G A C T C A G G C A G A A T T C A A T C T C C G A G G T A G A T T T C C T C G G A G T C T A T T T T T C C C A C S 698 | G A T T T A C A G A T G A T T T T G A A T G G A A T T A A T G T A A G T A T A T T T C C T T T C T T A C T A A A A T T A S 699 | C T T T A A A A A A T T A A C A T T T T T C T T T T A T A G G G A T C T G A A A C A A C A T T C A T G T G T G A A T A T S 700 | C T A A C A A G T T T T T C T G T A T G T A A C T T C T A G G T G A A A G A C C C C T G A C A A A A G A C A A T C A T C S 701 | C T C A C A T T T G C T T G T T T T T C T G G C T C A C A G A C A T G T C G A G G A A G G C T T T T G T G T T T C C C A S 702 | C A G C A G G A C A T C A A G T T C T T G C C G T T C A A G G T T C G A C C G G T T T T C C T C A T C C A G T T A G A G S 703 | C C A C G G C T G G A T G A T G G G A G A T G G C A C C A G G T A A G C T A G C T C T G G T C C T C A G G G G A G G G A S 704 | C T C G C T G A C T C T C C T T T T T C T T T C T C C A A G C C C A A G A G G A G A T C G G C G C G G T T G T C A G C T S 705 | T C T G A T G C C C G C T G T G T T T T T G A C A T G G G G G T G A G T A T A C G T G A C C C T G T T A G G G A A G G G S 706 | T T G G T C C A C C A C T C A C T G T T T T G T T T C C A G G A G G A G A G G G A T C G C C C T C C C A G C T A A C A C S 707 | T A G A G C A A G G G T T C A C T G T T C C T G A A A T C A A G A C C A T C C T T G G G A C C A T G C C T G C C T T T G N 708 | G C G C T G C A G G G C C G C G A G T G C A G C C T G C A G G T A G G G T C C C C G G G C C G G G C C G C C G G G T C C S 709 | A T T C A T T A C T C A G T T G G T G C T G G T A T C A C T G A C C A A G G A G A A G T C C C C A A T G G C T A C A A T N 710 | G T A G A T T T T A T C A G A C T G A A G A G C T A T T G T G T G A G T A T A T T T A A T A T A T G A T T C T T T T T A S 711 | T A T T T C A T C A G G A A G A A T G G A G G T C T G A C C T A A A G G T A G A A A T A T G T C A A A T G T A C A G C A N 712 | C C T C T A T C C G T A T T A G G T C T T T G A G A G C T G G A T G C A C C A T T G G C T C C T G T T T G A A A T G A G N 713 | T C A G G C A T C C G C G T C T C C G A C A A C T T G C C G G T G A G T G G G C G C C C C G C G G T G G G G A G G G C G S 714 | G G A C A G C T C A C C T A G C G G C A A T G G C T G C A G G T A A G C G C C C C T A A A A T C C T T T G G G C A C A A S 715 | A A G G C T C A G G A G G A G G G A G A T C A A C A T C A A C C T G C C C C G C C C C C T C C C C A G C C T G A T A A A N 716 | A G A C G A T G A C G A C G A T G G C T C T G A G G G G A C C T C A G G G G C T G C C G A G C T G G G G G G G C G C T C N 717 | A C A G G C C C G G T G G G T C C T G C A G G C C C A G C A G G G A A A C C T G G C C C T G A T G G T C T G A G G G G G N 718 | T G G G C A T G G T G G C T G G A G A T G A G G A G A C C T A T G A G G T A G G G G G T C C C C A G A G T C T C C C T G N 719 | G C C C C C A A C C A C G C T C T G T T G C T C T T G C A G C G A A G C C C A C C A C G A C G C C A G C G C C G C G A C S 720 | A C A G G A A C C A T G A T G C T G G C A T C T G C T C A G C T T C T G A G G A A G C C T C A A G A A A C T T T C A A T N 721 | G C C C G C C T G A G T C C A T C C T T T T C C G G G C A G G T C G C A A T G G A A G A A G A G A T C G C C G C G C T G S 722 | T G G A G G A A A T T C T C G A G G T G A C C T G G A C A G G G C G G G C T G G G C G A G G A C G C T C C C G G G G C C N 723 | G G A C A G C T C A C C T A G C G G C A A T G G C T G C A G G T A A G C G C C C C T A A A A T C C C T T T G G G C A C A S 724 | C A C C C A C C G G C A C A C A G A C C C C A A C A A C G A C A C C C A T C A G C A C C A C C A C C A C G G T G A C C C N 725 | C A T C T T T A T T G T C T C C T T T C A T C T C A A C A G C T C C T G G G A A A T G T G C T G G T G A C C G T T T T G S 726 | C T G G A T T T T T T T C G G G T A G T G G A A A A C C A G G T A A G C A C C G A A G T C C A C T T G C C T T T T A A T S 727 | T G C T G A G G C A A A G G A T G T C T T C C T G G G C A T G T A A G T A G A T A A G A A A T T A T T C T T T T A T A G S 728 | A C A G C T G C A T T C T C A T G C T T C C T G C C G C A G T T C T T C C C C A A T C C A G G T C T C C G G A G G C T G S 729 | C C T T G C T C A G A G C G G A G A A A G C A T T T G T T T G T A C A A G A T C C G C A G A C G T G T A A A T G T T C C N 730 | C C T G T C A T C T A T A T C A T G A T G A A C A A G C A G G T G C C T A C T G C G G G T G G G A G G G C C C C A G T G S 731 | T G C G C G G C T A C T A C A A C C A G A G C G A G G C C G G T G A G T G A C C C C G G C C C G G G G C G C A G G T C A S 732 | A C A T G C C G T G G A A G C C G G G G C C T C A C A G A G G T G A G C A G G G A C T G C C A C T G G T T T T G T C C T S 733 | T C T T T T C G T T C C T T G G C G A G G C T T T T G A T G G T A A G G C T T C A G A A G G T T T G C A G G A T T T C T S 734 | G C T C T C C C T G C T T C A G G A G C G G T T G G G G G C C T G T G G C C T G G A G G A G G A G G C A C C A G C T T G N 735 | T T C T G G A G G C A G T A C C A G C A T G G A T G C A G C A T C T G C T T C C G G T G C G G G C C T C A G G A A C C T N 736 | C T T T T A C A A A T C A G G G A A T C A A G G A T C A T A G A A G C C A C G T G C A C T T G T C C A A G T C A A C A T N 737 | A T G T T A T A T G T C A C A T T T T G T A A T T A A C A G C T T G C T G G T G A A A A G G A C C C C A C G A A G T G T S 738 | A G A G A A A G G A G A G A C A A T T A T G T T C C T G A G G T A A A C T T T C T G G A T A T T T G G G C T T C T G G C S 739 | C C C A C G G G G G C C G C A T C G A C T A C A T C G C A G G C G A G T G C C A G T G G C C G C A T C T A G G G C G C T S 740 | C T T G C A C A T A C C T A T G C T T C A A G A A A T C C C A A C A T G A A G A A A G G A G A C G A G T G T A A A A A C N 741 | A T C C T G T G T T G A C A T T C A T C T G A A T C C T A G A G T T C A A G G A G G C C T T C C A G C T G T T T G A C C S 742 | G A T A A A A G G A T T T G G G C T G A A C A G G G T G G A G G G A G C A T T G G A A T G G C A C T C A G G G C A A A G N 743 | T T A T G C A T A G A A A G T A G C T G A A G A A A T C A A A A A A T C A G G A A A A G G A C A T A G A T T T G T C T G N 744 | A C T A T A C C C A A A A T C C C C A C C C T T C C C T G G G G A C A C C T G G T C C A C C C T A A G C T G C C T T T C N 745 | A T T C A C G A T A T T T A A G A C A T A A T A T G T G T G T G T G T A T T T A T G A T G C T G T C A C T G T C T C T G N 746 | G A A A A G G A G G G A G C T A C T C T C A G G C T G C G T G T A A G T G A T G G G G G T G G G A G T G T G G A G G A G S 747 | C C A T C A T C A C C C T C T C T C T T T C C C T G A C A G T G C T G A G G C G G C A T T A A G A G G A A G T C C T G G S 748 | G A C A A A T T T G T G C T A C C C T G G T C T T A C C T G G G A C A C C T G G G G A C A C T G A A C T G G T G C T G A N 749 | T C C C G T G C C C T G G G A G C G G G T T C A G G A C C G C G G A T C G G A A G T G A G A A T C C C A G C T G T G T G N 750 | G G T A A T T T T A T T T T G A A A C A G A A G A G A G G A A A A T G C A T G C T T T A A T A A T G T C A G T T T C C A N 751 | C T C G G G G A C G C C C C C A C C A G C T C A A A T T C C G A G T G C A G A A A A G A A A C C G G A C C C C C C A G T N 752 | T G G C T C T G C T G T T G T G G T C G T G C T A A A C C G G T G A G G G C A A T G G T G A G G T C T G G G A A G T G G S 753 | A A T C A A T T A T C T T T A T T T T T G C T G G C T A T G A A A C C A C G A G C A G T G T T C T C T C C T T C A T T A N 754 | T C A A G G A C A C C G T C T A C A C G G A C T T C G A T G G T G A G C C A G G C C C G G G A G G G A G C T G C C C A G S 755 | A C C T T T T T T A T T C T C C T C T T T C T C T A C C A G G T G T G C C T G A C C C T G A C A G C C A A G C G C A T G S 756 | T C T A C C A G G A G T G G C G G T G G G G A C C T G A C A C T A G G G C T G G A G C C C T C T G A A G A G G A G G C C N 757 | G A T T C T C G G G C C G A T G T T C T C A G G A A A A A G G T A A T G G C T T C G C G G G G C T G G G G T G G A G C T S 758 | G A A G T C A A A C A T T T C A A A G T T T G G A T T G C A T C A A G T G G C A T G T G C T G T G A C C A T T T A T A A N 759 | G G C T T G A T C A G G A A C T A C T G C A G G A A T C C A G A T C C T G T G G C A G C C C C T T A T T G T T A T A C G N 760 | A T C T C G G C T C A C A G C A A C C T C C G C C T C C C A G G T T C A A G C C A T T C T C C T G C C T C A G G C T C C N 761 | C T G C A A A T G C A A A G A G T G C A A A T G C A C C T C C T G C A A G A A A A G C T G C T G C T C C T G C T G C C C N 762 | A A A G A A G A G A G A T A A G T C C A G C T G A G G A G T C T G T G T T A T G G G A T A A T C G G A A T T T G T A C A N 763 | C A T T C T C A C A T C T G T T C T C T A A C T T T A A C T G A A G T T T T C T A A T T C T C T C C A G C T T G A A A G N 764 | G C A C C C T C T A A A A T A G C T G G A T T T G C T G T C A T C A T T C T T G T T T T T A A C A C A G C A A T A C A C N 765 | T G G G A A C C T C A C C A G C C T G T A T A C C A G T C T T G C T T T A G G G T G A C C T T C T G G C C C T G G A G C N 766 | G A A A A G G A G G G A G C T A C T C T C A G G C T G C A A G T A A G T A T G A A G G A G G C T G A T C C C T G A A A T S 767 | A G C C C T C A A C C C T T C T G T C T C A C C C T C C A G C C T A A A G C T C C T T G A C A A C T G G G A C A G C G T S 768 | T A T A T A T A T A G T T T T T T T T T T T T T A A C T A G A A T G A C C A G T C A A C A G G G G A C A T A A A A G T A S 769 | T G G G C A A A G G T G G A A A T G A A G A A A G T A C A A A G A C A G G A A A C G C T G G A A G T C G T T T G G C T T N 770 | C C C A T C C C A C C C A C G T G T C G C T A T C T C T A G G T G A A G C T A G A G G A A C C A G A C C T C A T C A G C S 771 | G C C C T G C C A C C C C A A G G T G C C T G A G C C C T G C C A C C C C A A A G T G C C T G A G C C C T G C C A G C C N 772 | T G G G A C A C T T A A C A G A T G C A A T G T G C T A C T G A T T G T T T C A T T G C G A A T C T T T T T T A G C A T N 773 | T G T C T C T A C A G A G A G C T T C A G G A G A C T T C C A G A C G A C A A A G C T G A A T G G A T T T G A G G T C T N 774 | T T T A C A G A A T C A G G T T T A T T T G T T T C A T A A T T T G T A G A A T T A T C A A G C A T C A T A T T T T A G N 775 | T C A G C T T A C T G A C A C C A G C C C A C T C C A C A G A T G G G G A C C A G T G T G C C T C A A G T C C A T G C C S 776 | A A G C T G C A T G T G G A T C C T G A G A A C T T C A G G G T G A G T C C A G G A G T T T C A G C A G T T T C A G A G S 777 | A G T A T T G T C C C T A G C T G T A C T T A G T A T G C A A A T G A A G T T T G G C C T T G A G T T T C C C T T T T C N 778 | C A G G A G A T T T T A T G C C T C A A A A T A T A G A T G C T T T T T G G A A G G A C A G C A C A T T A A T T A T T T N 779 | C C C T G A T G A A G G C A C C T T G G G G C T T C C C T G C C G C A T C C T C T C C C C T C A G G A A G G G G A C T G N 780 | C C T G G T G A A G A C C A A G G A G G A G C T G C A C C T G G T G A T G A C A G C A C C C C C G C C C C C A C C A C C N 781 | T C C G C T G A C G C T G C T T T G G C T G T C T C C C A G A T G T G G T G G T G C T G A A C T C C A A G A G G A C C A S 782 | T G C C A T T T T A C A A A T A A G A C T T A T A T T T G T C C T T T T G T T T T T C A G C C T A C C A T G A G A A T A N 783 | G A T T T A C A G A T G A T T T T G A A T G G A A T T A A T G T A A G T A T A T T T C C T T T C T T A C T A A A A T T A S 784 | C G C T C A G C C C G C T C C T T T C A C C C T C T G C A G G A G A G C C T C G T G G C A G G C C A G T G G A G G G A C S 785 | C T C T C T T T T A T G C T T C T C T T T T C A C T C T G G A G A C A G A G C T G T A G G A G A A G G A A G C T C C C T N 786 | T T T A T T A C T T A T T T G G G G T T G C A T C A G A A A T G T C T G G A G A A T A A T T C T T T G A T T A T G A C T N 787 | C C C A C C C C C A T C C C C T A T G G C T C T T C C T A G G A G A C C C C A G C A A G C A G A A C T C A C T G C T C T S 788 | C T G G A G G T G G G C A G G A A G G A C C G A G G T C T A A A G C T G G A G G T G G C T G C T C A G A G T C C C A G C N 789 | C A T C T G C T C A C T T C C T T C A A A A T G G G A A A G G T A A G T C C T G G G T A C C G G A T G C T C A G C C T T S 790 | T T T C C T G T G T G T T C C A T G C A G T A A T G T A T C C T C T G T T C C T T T C A G C C T C C T T G G A A G G G C N 791 | T A A A T T C T T C T G T T T G T T A A C A C C T T T C A G A C T T A T G T G T A T G A A G G A G T A G A A G C C A A A S 792 | G T C T T A A T C C A T C C A C C A G A G T C T A G A A G G C C A G A C G G G C C C C G C A T C T G T G A T G A G A A T N 793 | A T T C C C T A C G G A A A C T A G C A A T G G G T C T C C T G G C C A T C T G G A T C T C G T G G A A G G G A G C C T N 794 | T C T C T T C C C T T C C C C T C T C T C T T T C T T T C T T T T C T C T C C T C T T C T C T T C T T T C C T C T C T T N 795 | A A G A A G G C C A G T T G T A T G G A C C G T G T G G T G G T G G T G G G G G T G G T G G C G G C G G C G G C G G C G N 796 | G G G C G G G T C T C A G C C C C T C C T C G C C C C C A G G C T C C C A C T C C A T G A A G T A T T T C T T C A C A T S 797 | A A A C T A C C C C T A A A A G C C A A A A T G G G A A A G G A A A A G A C T C A T A T C A A C A T T G T C G T C A T T N 798 | G C A C A G C C A C T G C C G G T C C T T C C C C T G C A G A A T C T A G A G C T G C T C C G C A T C T C C C T G C T G S 799 | G A A A G A A A C G A G G A A G A C G G G T T T T C G T C T A T A G T T G A A G C T T T T A C T A G G A T C T T G C C T N 800 | C C C T C T C C T G C A G G G C C A G T C A G A G T G T T A G C A G C A G C T A C T T A A C C T G G T A T C A G C A G A N 801 | C C C C C T G C T C C T C C C C C A T C A T G T C T C C A G A T G A A G T T C G C A C T G G C A C C T A C C G C C A G C S 802 | C C T C A A T G A A A A A T C T A T T A C A A T A G T G A G G A T T A T T T T C G T T A A A C T T A T T A T T A A C A A N 803 | A T A T G A G G A A G A A A T C A A G A T T C T T A C T G A T A A A C T C A A G G A G G C A G A G A C C C G T G C T G A N 804 | C C A A G G C T A A C T T C T G T T T T T G T T A C T T A G T T G G A G A A G G A A C G A G A G A A G G A A A T T A G T S 805 | G T C T C T C T C T C T C T C T C T C T T T C T C T G C A G G T T C T C C C C A T G A C A C C A C C T G A A C G T C T C S 806 | A A A G T G T A A T T T A A A C T A G G G T C T A A A T G A T A A G T C C G G A G T C C T T C C A G T G A A A T A A G G N 807 | A A G A C T T T T T A T A G G C T G G T C A C A C C C G G A G C A G G A G T C A G C C C C A G T C A G G A C A C A G C A N 808 | T A G G A T T T G G G G A T T G A A G C C C G G C T G A T G G T A G G C A G A A C T T G G A G A C A A T G T G A G A A G N 809 | C A G A T T G T G T G G A A T G G T C C T G T G G G G G T A T T T G A A T G G G A A G C T T T T G C C C G G G G A A C C N 810 | G A G T A T G G A T T T C A C A T T C T A A C A C A T T A G A A G C T G C A G G A T G C T G A A A T T G C A A G G C T G S 811 | A G A T A G A G G A T G A C A T T G G A A G G A G G G C A G A A A T G A A T G A A C T T A T G G A G C A G A C C T C G G N 812 | G G T G A A G A C A T T G T G G C T G A C C A C G T T G C C T C T T G T G G T G T A A A C T T G T A C C A G T T T T A C N 813 | G A T G G G A G A A C C T G C G T A G A C T C C G G G A G G C T C C C T C G G G C G A C T T G G A T C T C C A T G T C G N 814 | G A G G T G A A G G A C G T C C T T C C C C A G G A G C C G G T G A G A A G C G C A G T C G G G G G C A C G G G G A T G S 815 | T G A C A T G C T G G A G T C T A G A A C C T C C C A A A C T C A T T T G G T G T T G A G A A T T G C C T G A G G T G C N 816 | T T G C T C A G C C T C G C T G C T T G C C T C C T G C A G A C G C C G C C A G G C C G A G C C A G T T C C G G G T G T S 817 | T T T T A G A A C A T T T T C A T C A C C C C T A A A A G A A A C C C T G C A C C C A T T A G C A G T C C C T C C A C A N 818 | T C C C T C T G T T G C C C T C T G G T T T C T C C C C A G G C T C C C G G A C G T C C C T G C T C C T G G C T T T T G S 819 | C G G T T G G A G A A A G G G T T C C A A G T G G T G G T G A C C T T G C G G A G A G A A G A T T T T G A T G T G G A A N 820 | T G C T G T C A A T C A A C C A A A C T T A C T G A G A A G G G A G G C C C C G A T C T G C T G T C A A T C A T C A A A N 821 | T A C T A T T A A C A C A A T T C T T T T A T G T T T C A G T T C G A T G A A T T T A A A C C T C T T G T G G A A G A G S 822 | T A A T A C A A A C A G A A A T C A G A C C T T T A A G A G T T G A G A G T A A G T A G G A G A G T T A G G A C T T G T N 823 | T C T C C C C A T A T G C A A T T T G C T T A A T G T A A C C T C T T C T T T T G C C A T G T T T C C A T T C T G C C A N 824 | A G A A G G G G A A G G A G A C G C T G C T T C A C C T G G G T A A G A G G G T C C A C A G G G C T A C T C T C C C A T S 825 | A C C A A A G C T T T T C A T G G A T T A G G A A A A A A T C A T T T T G T C T C T A T G T C A A A C A T C T T G G A G N 826 | C A G G G C T T C G A G C T C A T C T A G A T G A G G A G C T C C A A G C C A C A C T C C A C G A C T T T A G A C A T C N 827 | T A G G C T C C A G C A G G C T G G C T C A G G C T T C A T T C C A T G G T C C T C T G T T G G T T C C T A G T A G C A N 828 | G G G C A C A G T G C C A C T C A G T G C C T G T C A A A A G T A T G T G C T G A G G C T G G A A G G T G G T G C A T G S 829 | C G C T C C T G G T T C C C C C T C A T T T C C T C C C A G G G G C C A C T T T T G A C A A A C G A T C C C C T A C G T S 830 | C T G C C C T C A T G C C C T C G C G T C T T C C C C C A G G A G T G C A T C C G G G C C T G C A A G C C C G A C C T C S 831 | T G A A G A A G A C A G G G A G A C A T T C A T G A A C A A G T A A G G A T C C A G T T T A A A G G T A G A T G C A A A S 832 | C T G C T T G A G C C C A G G A G T T T G A G A C C A G C C T G G G C A A C A A A G T G A G A C T C C A T C T C T A C A N 833 | C C A G C T G A C T G T C T A C T G T C A C C T T A T T T C T A C C T C A G A T C A C C T T C T A C G A G G A C C G A G N 834 | G A T A C T G C T C A A T G G C A C A G A C C G G A A G A T T G T G T A T G A A G G T C C T G A G C T G A A C C A C G C N 835 | G A C A G T G G G T T C T T C T G T G A A G A G A A T T C C A G T G A T G A T G A T G T G A T T C T G A A A G A A G A A N 836 | C C T C C C C C T A C C C C A G G T G G C C A T C C A G C T C A A T G A C A C C C A C C C C T C C C T G G C C A T C C C N 837 | C G C T C A G C C C G C T C C T T T C A C C C T C T G C A G G A G A G C C T C G T G G C A G G C C A G T G G A G G G A C S 838 | A A A T T T T G G G A C C C G A A C T T T C C A A G C C A T G T A A G T T C A A G T T C T A T C T A G G G A A G A G G G S 839 | A A C C T C C A C T C C C A A C C C T A C C C C A G G T C T C C T A A T T T C A A T G G G A A G A C C A T A A T T C A C N 840 | C G C C C T A A C G C G G C C C C C T C G C C C C T G C A G C C T A A T G G C A C T C G A G T G C C C A T G G A A G T G S 841 | G G G C T G C G T T G C T G G T C A C A T T C C T G G C A G G T A T G G G G C G G G G C T T G C T C G G T T T T C C C C S 842 | T G G T A A T T G T G A A T A T G A C A T C A T T T T C A G G T T T G G C C T C A C A A G G A C T A C C C T C T C A T C S 843 | C C T G A G C A C A G A C G G C T G T T C T C T T T C A A G G T T A C A A G C C T G A T G A A G G G A A A C G A G G G G S 844 | C C C C C T T C T T T A G T G C C C A G C A G T C C A A C T C A T T G A A C T A T G G G G G C A T C G G C A T G G T C A N 845 | A C C A C T C T G C T T C G G G C T C T G G G A G C C C A G G T G A G T A G G A G C G G A C A C T T C T G C T T G C C C S 846 | A T T T G C A T T T T A A A A A T T T T C C T C A T T T A G C A C C A A C T G T G C A C T G A A G A A A T C T T T C A G S 847 | G C G G T T G A T T G A C A G T T T C T C C T T C C C C A G A C T G G C C A A T C A C A G G C A G G A A G A T G A A G G S 848 | A G G T T C A A C A T C G G T G G C C C C A C A T C C T C C A T T C C C A T C T T G T G C T C C T A C T T C T T T G A T N 849 | G G G C T T C T C C A G T T G C T A G C T T T C A G T T T C T T A G C C C T G T G C A G A G C C C G A G T G C G C G C T N 850 | G A T C C G C C G C C C G T C C A C A C C C G C C G C C A G G T A A G C C C G G C C A G C C G A C C G G G G C A T G C G S 851 | C C C C A C C C C T C A C T C T G C T T C T C C C C G C A G G A T G T T C C T G T C C T T C C C C A C C A C C A A G A C S 852 | C C G C T G C G G G T A C T A A G A G C C A G G C A A C A G G T A G G T G C T C C C T C C A C C C C A G G G G T C C T G S 853 | C G C C C T C C T C C C T T G T T T A G A A T T G T C T T C G T C G T C A T G A T A A A C C C G T T C T G T G T C C C C N 854 | T T T T T T T T T T C T T T T T C C A T T C A A A C T C A G T G C A C T T G T T G A G C T C G T G A A A C A C A A G C C S 855 | T C T C A C T G C C C C G A C C T C T G T C T T C T A C A G A A C A C C T T A G G C T G G T G G G G C T G C G G C A A G S 856 | C A G C T G C T A C T G G T T C T C T C G C T C C G G G A A G G C C T G G G C T G A C G C C G A C A A C T A C T G C C G N 857 | G A G A G A A A G G T G C T C C T G G A G A A A A A G G T C C C C A A G G G C C T G C A G G G A G A G A T G G A G T T C N 858 | A G G G C C C C T T A C G T T C C C C T C T T T T C C C A G A G C C G G C T T C C C A G C C C A C C A T C C C C A T C G S 859 | A C T A C T A C A A A C T G C G C A C A G A A G G A G A T G G T A A G A T G T G G A C A A C T G T C T C C A T G C C C T S 860 | A T G A C T C C C A G A C C T C C T T C T G C T T C T C A G A C T C T A T T C C G A C A T C C T C C A A C A T G G A G G S 861 | C C C A G G G A G C A C T A A G C G A G C A C T G C C C A A C A A C A C C A G C T C C T C T C C C C A G C C A A A G A A N 862 | A G T G T C A C A A G A A A T G A A A C A G G A C C C T A T C A A T G T G A A A T A C G G G A C C G A T A T G G T G G C N 863 | T C C G C T G A C G C T G C T T T G G C T G T C T C C C A G A T G T G G T G G T G C T G A A C T C C A A G A G G A C C A S 864 | A C A A A A A T G G C A A C A T G G C A G G C A T C T C A G A C C A A A G G A T C G C C A T C G A C C T G T T C A A G C N 865 | C A T C T C T A T T G T C T C T T T C C A C C T T C A C A G C T C C T A G G A A A T G A G C T G G T G A T T G T C T T G S 866 | T A A C T T T C A G G C T G T A C A A A A T G G C A G T T G G A T T T A T G C T T G C T C A T C C T T A T G G A T T T A N 867 | T C T C C G T G G T G A A T C G G G C C A A G A A A G C A G G T G G A G C T G G G G C C C G G C T G T G G G G T C A G G S 868 | G C C C T G G C G C C C A G C A C C A T G A A G A T C A A G G T G A G T C G A G G G G T T G G T G G C C C T C T G C C T S 869 | T A T A G A C T G A A T T T T G G A A G C A G T A T G T T G G T A A G C A A T T C A T T T T A T C C T C T A G C T A A T S 870 | T C C C C T C A C A G G G A A A T T T T C T T C C C A C A G G T G G A A A A G G A G G G A G C T A C T C T C A G G C T G S 871 | A A T G G A C A A G C C T C A T C C C A A A C T C C A T C A C C T T T C A T A T T A A C A C A A A A C T G G G A G T G A N 872 | G T T T T T A A T A T G C C A C T T T T T C T T T C T C A G G C A A C T C A T G C A G C A A T T C C A G A A C C C C G A S 873 | A A C T C C A C A G T C C C C C G C T G C G G G A A C C A T G G A C A A A A C T G C T T C A C T C T A C T A A A A A G T N 874 | C A A C C C G C C C T G A T C C T T T T A A G A A G T T G G C G A T T T G G C T T T T T G A A A A G C A A T A A T A C A N 875 | C C A G G G G T C T G A G T C T C A C A G C T T G T A A A G G T G A G A T T C T G G G G G T C T G A A G T G G G T G G A S 876 | G C A C A G C C A C T G C C G G T C C T T C C C C T G C A G A A T C T A G A G C T G C T C C G C A T C T C C C T G C T G S 877 | G A C A T C G A G A T C G C C A C C T A C C G C A A A C T G C T G G A G G G C G A G G A G T A C A G A A T G T C T G G A N 878 | C T G G C C T T T G C T T G G G C T T C T A G A C A C C T T T C A C T G C A C T C A C T G T C T T C T C C C C C T C C T N 879 | A T C C T T G G T T G T G T T T C C T T G G C C C T G C A G C A G G A T G A A G C T C T C C A C T G G C A T C A T T T T S 880 | G T T T T T C T T C C G C C T T C T G G T T G A C T T T A G C C A G A A C C A G T G C T T C T A T A A C T C C A G T T A S 881 | G G T T C A A G G T A A T G T T A C A G A T C G C T G A T T T A T T C T T G T A A A T T C A A A G G T A T G T C T T T T N 882 | A G C C G G G C A G A G A C A C A A C A A A A A A A A G A A T T T T A G A C C A A T A T C C T T G A T G A A C A T T G A N 883 | A G G T G G C C A C C A A C A G C G A G C T G G T G C A G A G C G G C A A G A G C G A G A T T T C G G A G C T C T G G C N 884 | A C T G G C C T C G T G G T G T A T G A C T A C C A G C A G G T G G G T A T G C C A G A C C T C C T G A C C T G G A C C S 885 | G A G A G A G A G A C C A G A A A T A A T C T T G C T T A T G C T T T C C C T C A G C C A G T G T T T A C C A T T G C A N 886 | A C A G C T G C A T T C T C A T G C T T C C T G C C G C A G T T C T T C C C C A A T C C A G G T C T C C G G A G G C T G S 887 | C G G A G A C G C T C T T C C A G C C C T C C T T C A T C G G T G A G C C C C G C T C G C C C T C G C C C C G G C C C C S 888 | A A C C T T T C C T C T G T C C T C T C T T T T G T A T A G G C C G A C C A G T G C A C G C G T C T C C A G G G C T T C S 889 | C G C A G C G G A G G T G A A G G A C G T C C T T C C C C A G G A G C C G A C T G G C C A A T C A C A G G C A G G A A G N 890 | G G G C T C T C C T G A C A C A C T C T C C C C C T G C A G A G G T C C A G G G G A C C C A A C A G C C C C A G C A A G S 891 | T G T G C T G A C A G C A G C T C A T T G C T G G G G A A G G T G A G G A G C T A A G G A A C T T C C T G G C C A G C C S 892 | C G A G T G A C A A G C C T G T A G C C C A T G T T G T A G G T A A G A G C T C T G A G G A T G T G T C T T G G A A C T S 893 | T C T G T G C C A G C T T C T T A T C C T A T A G A G G A G A A A G C T C A A A G A T G A A A T G A A T C T C C T T C T N 894 | A G C T A A G T C C T G C C C T C A T T T C C C T C T C A G G C A T G G A G T C C T G T G G C A T C C A C G A A A C T A S 895 | C T G G C C T G A G G A C C C G G C G C T G A G T G G C G G G G C C C C T G C T C C G A G G G G C T C A T G T T C A G G N 896 | T C C G G T G T G G A G T C T G G A G A C G A C G T G C A G G T A G G A G C C C G G G C G C G A C A A T C G G G G G G C S 897 | T T T T T T G A G A T G G A G T T T T G C T C T T G T T G C C C A G G C T G G A G T G C A A T G G T G C A T C T T G G C N 898 | T G C G A C A C C C T C C C G C C C T C T C G G C C G C A G G G C G C T G A T G G A C G A G A C C A T G A A G G A G T T S 899 | C C C T C G T G C G C T C C A C G A C C A A G A C C A G C G G T G A G C C A C G G G C A G G C T G G G G T C G T G G G G S 900 | A C A G T G C A T G C C A T C C T G A A C T A A G T G T C C T C T G G G G C T G G G G A C A G A G C T T G G G C C A G G N 901 | T G A T G G G G G C A G G G G A T G G A A C A T C A C A C A T G G G C A T A A A G G A A T C T C A G A G C C A G A G C A N 902 | C C A G C C T C T C T C A T G C T T T T G G C C A G A C A G G T A A G G G C C A C C C C A G G C T A T G G G A G A G T T S 903 | G C A G T G G T T G A T G A A T A C C A A G A G G A A C A G G T A A G A G T C T A A G C C T G G C T C A A A A C T T G C S 904 | T C G G C A G G T G G G C A A C A A G A C C G A G T G T G C T C T G C T A G G C T T T G T C A C A G A T C T G A A G C A N 905 | A G G G T G G T T T T T C T T T T A A C C A G T T G A G A C G G G C A A A A G C T T G A T T G C C C T T T A A C C G C C N 906 | C T G C C C C T G C C T T T G G C C C T C T A C C C A C A G G G A T G A T C C A G G C A C T G G G T G G C T T C T T C A S 907 | A G A C A C C G C C C A C C A C C A G C A G C G A C T C T G G T A A G C G A A G C C C G C C C A G G C C T G T C A A A A S 908 | C A G A C A T G A T T T C G G A T T C C C C G A G G A G G A G T T T G A T G G C C A C C A G T T C C A G A A G A C T C A N 909 | G G G A T G G A G T T T T C A A G T C C T T C C A G A G A G G T A A G A G A G A G A G C T C C C A A T C A G C A T T G T S 910 | T C C C C A G G G C A G G G A A G G C C T A A T T G G T C C A T G G C C G T G T C C T C G G G A C C C A G G A C A G C A N 911 | G G T G T G G T G G C A C G C G T C T A T A A T C C C A G C T A C T C A G G A G G C T G A G G C A G G A G A A T C A C T N 912 | C A A G A A G C C A A A G C A G T G A A G G T T G C A G A G A A T T T T C T T A C T A A A A T G C C A A G G A T T C A T N 913 | C A T T G C C T G T A T T C A G T G G A G C C T G G A G C A A T G A G G A A G A G G G A G T C C A A C A T G T C A A T A N 914 | C A G C T G A C T C C C A G A G T C C A C T C C C T G T A G G T C G G G C A G C A G G C C G T A G A A G T C T G G C A G S 915 | A G C C C T C A A C C C T T C T G T C T C A C C C T C C A G C C T A A A G C T C C T T G A C A A C T G G G A C A G C G T S 916 | T C C A T C C G C T C C T G T T G T C T C A T C C C C C A A G T G A G T T T T C T A G A T T T C C A T C A T G C C G C C S 917 | A A A C T G A G C T G A T G A T A A T T A T T A T T C T A G G C C A C A G A A C T G A A A C A T C T T C A G T G T C T A S 918 | T A T A T A T C C A C A T G T G C T C A G T A C C A G G T G G G A G T T G A T T T G T C T T C T C A C T G T T C T C T T N 919 | C A C C T G G T G T C T G C T A C C A T G A G T G G G G T C A C C A C C T G C C T G C G C T T C C C A G G C C A G C T C N 920 | T T A T G T T G T A C C A A T C A C T G T T C T T C A C A G A T C A T T C T C A G C T T C C C A G G C A G A C C C A C T S 921 | C T C C A C C T C C G G C G G C T C C T C C T A G G A A A C C A G C T C G T G G G C C G G G A A C G G G G G A G A G A A N 922 | G A C G A T A A G G A G A C C T G C T T T G C C G A G G A G G T A C T A C A G T T C T C T T C A T T T T A A T A T G T C S 923 | G A T C C C C T G A A C C C C T G C C T C T G C C T C C A G A G T G C C C C T C C G G C C T C G C C A T G A G G C T C T S 924 | T C C A A C A T G G A G G A A A C G C A A C A G A A A T C C G T G A G T G G A T G C C G T C T C C C C T A G G C G G G G S 925 | A A T A T C T C A A A G A A T A A G C C A C T C T T G A A A T T C C A A G T A G G A T T A T T T T T C T A A A T A T A G N 926 | C A T T A G C A G C A T C C C T C T A C A A G G T G C A T T T A A C T A T A A G T A T A C T G C C T G C C T A T G T G A N 927 | T T G C C C A A A A C A T C T G G C A A A G T T G A A T T G C T T C C A A A A G T T C A C A T T T A T C A G A A G G A C N 928 | G C A T C T A T C G C A T G A T C A A G C T A G G T C T A G G T A A G T A G C T T T G G T A C T T G G T G T G G C A A G S 929 | T G A C T T T G C A C C T G C T C T G T G A T T A T G A C T A T C C C A C A G T C T C C T G G T T G T C T A C C C A T G N 930 | A A C T A A A T C A A G A A G A A A T C T C T G T C A A T G G A A G A A A T T A A G T T T G T A A T T T T A A A A A T C N 931 | A A C T G A A A G T A C T C C C T C C T T T T C T G G C A G G A C G A C A A C T T A A T G C C T G C C T A T T A C A A A S 932 | C C T A A A T G T T A T G A G G A T C A T C A A C G A G C C G T A A G T A T G A A A T T C A G G G A T A C G G C A T A T S 933 | G G C T T C C C G T G C A A C C A G T T T G G G C A T C A G G T G C G C C G G G C G G A G C G G G A C G G G A C G G G G S 934 | T T T A A A A T A T T T A T C T G A T T A A G T T G T C T A A A C A A T G C T G A T T T G G T G A C C A A C T G T C A C N 935 | A T T G A T C A G T C C A T T G A C T T C G A G A T C G A C T C T G C C A T T G A C C T C C A G A T C A A C A C A C A G N 936 | T G A A G T G T C T A A T G C A T T A A C T T T T G T A A G G T A C T G A A T A C T T A A T A T G T G G G A A A C C C T N 937 | T C T G G G C T C C A G G C A G A A G C A C A G C C T C C C C G A C C T G C C C T A C G A C T A C G G C G C C C T G G A N 938 | T A G C A C A T G T G A C T G G A C T C C T C T C C C C A G G T C G T C A G T G A G G C C A C A C A G C A G C A G C A T S 939 | C C T C T C T G T A A C T A C C T G T T G T C T C T C C A G C T G T C T A T C C A C G A G C G A G A A G A C A G C C C C S 940 | T C T T T G G A G G A A C A G C T C C C T A G T G G C T T C C T C C G T C T G C A A T G T C C C T T G C A C A G C C C A N 941 | C A T A C A C C A G A A T T C A G A T C A T G A A T G A C T G A C A G A A T A T T C T G T T G G G C A G T C C T G A C T N 942 | T G T G G A G A C A T G G C T G G A T T C C G C C A A A G A A A T A A A A A A G C A G G T T C G T G G T G T C C C T T G N 943 | T C T T C A T T A T G A T C T C C A T A T T G G A C A A A T G A A C T G A A C A G A G A T A A A A A T T C C C C A T C A N 944 | T T G C A T T G T G G T C T C C C A T C A A G A C T C A G C G G C A C T C G A T C T G G G A C A G A C C T C A C T C T C N 945 | T T G T G T A T G G T G T G A G G T A G G G C T T G A G G T T T A T G T T T T G T T T A T G G G T G T A C A G T T G T T N 946 | C T G A C A G A A T G A A C C T G C A G A C C C T G C G C G G C T A C T A C A A C C A G A G C G A G G C C A G T G A G T N 947 | C A G G G A G G C A C C C C C A C G C C T G G C T T G G C T G A T G T T T G T G T T T T T A G T A G G C A C G C C G T G N 948 | G C T A T T A C T A C C A A G C C T G T T A G T T A A G G G C A A A G G C A A G A A A T T G T A A T T T G G G G C T G T N 949 | T G T G G G C C A G T T C T G G G A G G A G G G C T G C G A T G T G T G C A C C T G C A C C G A C A T G G A G G A T G C N 950 | T G T T A A T C C T A T T T T A T T G A A A A T G A A C T A A G A C T C A G A G A G A T A A A G C T G T T G C C C A A T N 951 | G A C T A A C G A C A C A T A C A T G A A A T T T A G C T G G T T A A C G G T G C C A G A A G A G T C A C T G G A C A A N 952 | G C C C C C A C T G T G G A G A T A A G A A G G G G A T G G A A T G G G G G A A G A G G A G G A G C A G G A G G C C C T N 953 | C A G A T C T T C C T C C T C T C C C A A A A C C T C C A G T G T A T G C C A C G T T G A G G G A C C T G A A A A C A C S 954 | G G G G C A G G A C T C C A C C C G A T C A T T C C C C A G A T T C A G C A G C G A C T G C A G G A G G A G C T A G A C S 955 | G G A T T T C C A G C C T C C C C T C T G C A A A G A T G T G A A C G A G T G T G C A G A C G G T G C C C A C C C C C C N 956 | G A T T C C C C T G A T C T A G C A C C C C C T C T G C A G G C G C T G C G C C C C T C A T C C T G T C T C G G A T T G S 957 | C T G G G C A G G G G G A G T T C A G G G C C C T T A A T G C T C G C C T G C C C C T G A A C A C T G A T G C C T A C T N 958 | C T C C A G G G A G G A A A G G A C T T T G G C T T T C T A G C A G A T A A T C T T C C T T G C T A C T T G G A A G T C N 959 | G C C T T G C C A A G G T C C T C C C T C T G C A G C T G C C A G A A G C A G G A G T C C C A A G T G A C A G G A C C T N 960 | A A A A G T G C T A T C T T A G T G T G T A A A A G G T C A T T C A G T G G C A T G A C T T A G A G G G A T T A G A G T N 961 | A T G A A T T T G T T C A T G A A T A T T T T T C T G T A G T G T G A A A C A G C T G C C C T G T G T G G G A C T G A G S 962 | C C C T A T T T G T G C A G T T T C C A G T T G G A G A T T T C A A T C A C T T T G A G A C C A A A T G T A C A A A A G N 963 | A A A G C T C A C A T T T C C A G A A A C A T T C C A T T T C T G C C A G C A C C T A G A A G C C A A T A T T T T G C C N 964 | T C C T C G C G T C G C G G G T G T G C C C G G A G A G G C T G A G C A G C C T G C G C C T G A G C T G G T G G A G G T N 965 | C G G C T G T C T A C A C T T G C T C T A T T T C C T G C T G C G C C A G C C C C A G G C C T G G A A G G A C C G C T T N 966 | A A C T G A A A G T A C T C C C T C C T T T T C T G G C A G G A C G A C A A C T T A A T G C C T G C C T A T T A C A A A S 967 | C C A A C A T G C C T G C C T C A T T G A G G G C A A G G G G T A A G G A C T G G G G G G T G A G G G T T G G G A G G A S 968 | G C A C T A C C A C C C T C A C C C C T T C C T C A A C A G G T A G T G G G G A T C T G G G G T G G G G G G C A G T G G S 969 | T C T T C C T C A C C C T G T C C G T G A C G T G G A T T G G T G A G A G G G G C C A T G G T T G G G G G G A T G C A G S 970 | C C A T C G A G A A G C T C G C G G T G G A A G C C C T G T C C A G C C T G G A C G G C G A C C T G G C G G G C C G A T N 971 | A G A C A G C A A A T T C C C T A T T T T A T T T C T C A G A T T G T C A C T G C T G T T C C A A G G G C A C A C G C A S 972 | A A A G A G A A T A A A A T A C C T A G G A A T C C A A C T T A C A A G G G A T G T G A A G G A C C T C T T C A A G G A N 973 | A A G C T G A G A G T G G A C C C T G T C A A C T T C A A G G T G A G C C A C C A G T C G G G T G G G G A G G G T G A G S 974 | G G C C C C A T G C C A A C C T C A A G C A G A T C C G C C T C A T C G A G A A G C A G G C C C C T G A G A A C A A G G N 975 | G A T C A A A A A A C C A A T A A A G A T G T A T C T C T A C A A C G G C T G G T G G A G T G G T A G A G T G G A A A G S 976 | A C A G C T A A A A T C C T T G C C T G T G A A T G A A A A G A A G G A A A T T T A T T G A C A G A A C A G C A A A T G N 977 | A A G T C C A C C T G C C C C A T C C T C T G T T C C C A G G T G A T C C T A G G T G G A G G C C G A A A G T A C A T G S 978 | G A C A A C G A T A C C T T C T A C G A C C T G G G C T T G A T G G G C T A T G T C A G T G G C T T C G G G G T C A T G N 979 | A A T T G A A G C T T T T T C A G A G C A G A G C A A A G A C C T C C C T G C A G C T T T C T T G G C A C C A G A A T A N 980 | T T G A A G T T G C C T A G A C C A G A G G A C A T A A G T A T C A T G T C T C C T T T A A C T A G C A T A C C C C G A N 981 | T G T T T T C T C T T C T A T T C C C T T G G C T C A C A G G G A C C C G A G T G T A T A G C C C T C C A G A G T G G A S 982 | C T A G C G G G C A G C T C G A G G A A G T G A A A C T T A C A C G T T G G T C T C C T G T T T C C T T A C C A A G C T N 983 | G C C T C G G A C T T G G A A A C G T C C G G G T T A C A G G T G A G A G C G G A G G G C A G C T C A G G G G G A T T G S 984 | A C C A A G G C T G T C A C C A C A T T C A C C A T C A C T G T G C G T C C C A A G A A C G C A G C C C T G G G T C C C N 985 | C T C C C C A C C C A C C T G T C C A C C C G C C C G C A G A T C G C T T C C T G G A G C C A G G C A A G A A C T C C A S 986 | T G C G C G G C T A C T A C A A C C A G A G C G A G G C C A G T G A G T A A C T C C G G C C C A G G G A G C A G A T C A S 987 | C C T G A A C C A C C T T C A A C T T G T T C C A C A C A G G A T G C C A G G C C A A G G T G G A G C A A C C G G T G G S 988 | C A G G T T T C T G C G G C C C C C C G G A C A G T G G C T C T G A C G G C G T T A C T G A T G G T G C T G C T C A C A N 989 | G A G T A A A C T T T T G C T G G G C T C C A A G T G A C C G C C C A T A G T T T A T T A T A A A G G T G A C T G C A C N 990 | C C T T A T G A C C C C G G C C A C C T T C C T G C C C A G G C G G G G T C G C T A A G G C C T C A G G A G G A G A A A S 991 | C T G C C A C A T G A C A C C C C C T C A A T G T T C C A G G T C T C T G G A C A C T A T G G G C A C A C G A C T C C T S 992 | A T T G C T C T G A A G C A A C A G C G T A C C A A G A A A A A T A A A G A A G A G G C T G C A G A A T A T G C T A A A N 993 | G A T C C C C G G C C T G C C T G G G C C T G G G C C T T G G T G G G T T T G G T T T T G G T T T C C T T C T C T G T C S 994 | G G A G G C A G G A A T G C A G C A T C C C T G T C T G T G G T A G G C T G G G G G C A G T G G G G C G A C C C A T G A S 995 | C C A G A C C A T T G G C T T G A G T G C A G C C G C C C G C T T T A A C C A G T G C A A C A C G A C A C G C G G C A A N 996 | T C T T A G G A G G T T T T G T C T C T T C C C T T T T A G G A T C A A T C T C C C C A T C C A G A C C T A C T C T G C S 997 | T C A G G T T G T C T G A A G T C A C T G C A C A G T G C A T C T C A G C C C A C A T A G T G A T G G T T C C C C T G T N 998 | A C C C C C G C G G C T T C T A C C T C T T T G T G G A G G G T G C G T G G T G G C C C C T G G G G A G T G G A G G A A S 999 | T A T T C T G C C C A T T T G G G G A C A C T T C C C C G C C G C T G C C A G G A C C C G C T T C T C T G A A A G G C T N 1000 | A G C T G A A G C C G T G C C T G G C T G T C T T T G C A G A T C A T C G C A C C C C C A G A G C G C A A G T A C T C A S 1001 | -------------------------------------------------------------------------------- /data/ocr17.names: -------------------------------------------------------------------------------- 1 | 1 7 2 | 3 | 1:1 numeric 4 | 2:1 numeric 5 | 3:1 numeric 6 | 4:1 numeric 7 | 5:1 numeric 8 | 6:1 numeric 9 | 7:1 numeric 10 | 8:1 numeric 11 | 9:1 numeric 12 | 10:1 numeric 13 | 11:1 numeric 14 | 12:1 numeric 15 | 13:1 numeric 16 | 14:1 numeric 17 | 1:2 numeric 18 | 2:2 numeric 19 | 3:2 numeric 20 | 4:2 numeric 21 | 5:2 numeric 22 | 6:2 numeric 23 | 7:2 numeric 24 | 8:2 numeric 25 | 9:2 numeric 26 | 10:2 numeric 27 | 11:2 numeric 28 | 12:2 numeric 29 | 13:2 numeric 30 | 14:2 numeric 31 | 1:3 numeric 32 | 2:3 numeric 33 | 3:3 numeric 34 | 4:3 numeric 35 | 5:3 numeric 36 | 6:3 numeric 37 | 7:3 numeric 38 | 8:3 numeric 39 | 9:3 numeric 40 | 10:3 numeric 41 | 11:3 numeric 42 | 12:3 numeric 43 | 13:3 numeric 44 | 14:3 numeric 45 | 1:4 numeric 46 | 2:4 numeric 47 | 3:4 numeric 48 | 4:4 numeric 49 | 5:4 numeric 50 | 6:4 numeric 51 | 7:4 numeric 52 | 8:4 numeric 53 | 9:4 numeric 54 | 10:4 numeric 55 | 11:4 numeric 56 | 12:4 numeric 57 | 13:4 numeric 58 | 14:4 numeric 59 | 1:5 numeric 60 | 2:5 numeric 61 | 3:5 numeric 62 | 4:5 numeric 63 | 5:5 numeric 64 | 6:5 numeric 65 | 7:5 numeric 66 | 8:5 numeric 67 | 9:5 numeric 68 | 10:5 numeric 69 | 11:5 numeric 70 | 12:5 numeric 71 | 13:5 numeric 72 | 14:5 numeric 73 | 1:6 numeric 74 | 2:6 numeric 75 | 3:6 numeric 76 | 4:6 numeric 77 | 5:6 numeric 78 | 6:6 numeric 79 | 7:6 numeric 80 | 8:6 numeric 81 | 9:6 numeric 82 | 10:6 numeric 83 | 11:6 numeric 84 | 12:6 numeric 85 | 13:6 numeric 86 | 14:6 numeric 87 | 1:7 numeric 88 | 2:7 numeric 89 | 3:7 numeric 90 | 4:7 numeric 91 | 5:7 numeric 92 | 6:7 numeric 93 | 7:7 numeric 94 | 8:7 numeric 95 | 9:7 numeric 96 | 10:7 numeric 97 | 11:7 numeric 98 | 12:7 numeric 99 | 13:7 numeric 100 | 14:7 numeric 101 | 1:8 numeric 102 | 2:8 numeric 103 | 3:8 numeric 104 | 4:8 numeric 105 | 5:8 numeric 106 | 6:8 numeric 107 | 7:8 numeric 108 | 8:8 numeric 109 | 9:8 numeric 110 | 10:8 numeric 111 | 11:8 numeric 112 | 12:8 numeric 113 | 13:8 numeric 114 | 14:8 numeric 115 | 1:9 numeric 116 | 2:9 numeric 117 | 3:9 numeric 118 | 4:9 numeric 119 | 5:9 numeric 120 | 6:9 numeric 121 | 7:9 numeric 122 | 8:9 numeric 123 | 9:9 numeric 124 | 10:9 numeric 125 | 11:9 numeric 126 | 12:9 numeric 127 | 13:9 numeric 128 | 14:9 numeric 129 | 1:10 numeric 130 | 2:10 numeric 131 | 3:10 numeric 132 | 4:10 numeric 133 | 5:10 numeric 134 | 6:10 numeric 135 | 7:10 numeric 136 | 8:10 numeric 137 | 9:10 numeric 138 | 10:10 numeric 139 | 11:10 numeric 140 | 12:10 numeric 141 | 13:10 numeric 142 | 14:10 numeric 143 | 1:11 numeric 144 | 2:11 numeric 145 | 3:11 numeric 146 | 4:11 numeric 147 | 5:11 numeric 148 | 6:11 numeric 149 | 7:11 numeric 150 | 8:11 numeric 151 | 9:11 numeric 152 | 10:11 numeric 153 | 11:11 numeric 154 | 12:11 numeric 155 | 13:11 numeric 156 | 14:11 numeric 157 | 1:12 numeric 158 | 2:12 numeric 159 | 3:12 numeric 160 | 4:12 numeric 161 | 5:12 numeric 162 | 6:12 numeric 163 | 7:12 numeric 164 | 8:12 numeric 165 | 9:12 numeric 166 | 10:12 numeric 167 | 11:12 numeric 168 | 12:12 numeric 169 | 13:12 numeric 170 | 14:12 numeric 171 | 1:13 numeric 172 | 2:13 numeric 173 | 3:13 numeric 174 | 4:13 numeric 175 | 5:13 numeric 176 | 6:13 numeric 177 | 7:13 numeric 178 | 8:13 numeric 179 | 9:13 numeric 180 | 10:13 numeric 181 | 11:13 numeric 182 | 12:13 numeric 183 | 13:13 numeric 184 | 14:13 numeric 185 | 1:14 numeric 186 | 2:14 numeric 187 | 3:14 numeric 188 | 4:14 numeric 189 | 5:14 numeric 190 | 6:14 numeric 191 | 7:14 numeric 192 | 8:14 numeric 193 | 9:14 numeric 194 | 10:14 numeric 195 | 11:14 numeric 196 | 12:14 numeric 197 | 13:14 numeric 198 | 14:14 numeric 199 | -------------------------------------------------------------------------------- /data/ocr49.names: -------------------------------------------------------------------------------- 1 | 4 9 2 | 3 | 1:1 numeric 4 | 2:1 numeric 5 | 3:1 numeric 6 | 4:1 numeric 7 | 5:1 numeric 8 | 6:1 numeric 9 | 7:1 numeric 10 | 8:1 numeric 11 | 9:1 numeric 12 | 10:1 numeric 13 | 11:1 numeric 14 | 12:1 numeric 15 | 13:1 numeric 16 | 14:1 numeric 17 | 1:2 numeric 18 | 2:2 numeric 19 | 3:2 numeric 20 | 4:2 numeric 21 | 5:2 numeric 22 | 6:2 numeric 23 | 7:2 numeric 24 | 8:2 numeric 25 | 9:2 numeric 26 | 10:2 numeric 27 | 11:2 numeric 28 | 12:2 numeric 29 | 13:2 numeric 30 | 14:2 numeric 31 | 1:3 numeric 32 | 2:3 numeric 33 | 3:3 numeric 34 | 4:3 numeric 35 | 5:3 numeric 36 | 6:3 numeric 37 | 7:3 numeric 38 | 8:3 numeric 39 | 9:3 numeric 40 | 10:3 numeric 41 | 11:3 numeric 42 | 12:3 numeric 43 | 13:3 numeric 44 | 14:3 numeric 45 | 1:4 numeric 46 | 2:4 numeric 47 | 3:4 numeric 48 | 4:4 numeric 49 | 5:4 numeric 50 | 6:4 numeric 51 | 7:4 numeric 52 | 8:4 numeric 53 | 9:4 numeric 54 | 10:4 numeric 55 | 11:4 numeric 56 | 12:4 numeric 57 | 13:4 numeric 58 | 14:4 numeric 59 | 1:5 numeric 60 | 2:5 numeric 61 | 3:5 numeric 62 | 4:5 numeric 63 | 5:5 numeric 64 | 6:5 numeric 65 | 7:5 numeric 66 | 8:5 numeric 67 | 9:5 numeric 68 | 10:5 numeric 69 | 11:5 numeric 70 | 12:5 numeric 71 | 13:5 numeric 72 | 14:5 numeric 73 | 1:6 numeric 74 | 2:6 numeric 75 | 3:6 numeric 76 | 4:6 numeric 77 | 5:6 numeric 78 | 6:6 numeric 79 | 7:6 numeric 80 | 8:6 numeric 81 | 9:6 numeric 82 | 10:6 numeric 83 | 11:6 numeric 84 | 12:6 numeric 85 | 13:6 numeric 86 | 14:6 numeric 87 | 1:7 numeric 88 | 2:7 numeric 89 | 3:7 numeric 90 | 4:7 numeric 91 | 5:7 numeric 92 | 6:7 numeric 93 | 7:7 numeric 94 | 8:7 numeric 95 | 9:7 numeric 96 | 10:7 numeric 97 | 11:7 numeric 98 | 12:7 numeric 99 | 13:7 numeric 100 | 14:7 numeric 101 | 1:8 numeric 102 | 2:8 numeric 103 | 3:8 numeric 104 | 4:8 numeric 105 | 5:8 numeric 106 | 6:8 numeric 107 | 7:8 numeric 108 | 8:8 numeric 109 | 9:8 numeric 110 | 10:8 numeric 111 | 11:8 numeric 112 | 12:8 numeric 113 | 13:8 numeric 114 | 14:8 numeric 115 | 1:9 numeric 116 | 2:9 numeric 117 | 3:9 numeric 118 | 4:9 numeric 119 | 5:9 numeric 120 | 6:9 numeric 121 | 7:9 numeric 122 | 8:9 numeric 123 | 9:9 numeric 124 | 10:9 numeric 125 | 11:9 numeric 126 | 12:9 numeric 127 | 13:9 numeric 128 | 14:9 numeric 129 | 1:10 numeric 130 | 2:10 numeric 131 | 3:10 numeric 132 | 4:10 numeric 133 | 5:10 numeric 134 | 6:10 numeric 135 | 7:10 numeric 136 | 8:10 numeric 137 | 9:10 numeric 138 | 10:10 numeric 139 | 11:10 numeric 140 | 12:10 numeric 141 | 13:10 numeric 142 | 14:10 numeric 143 | 1:11 numeric 144 | 2:11 numeric 145 | 3:11 numeric 146 | 4:11 numeric 147 | 5:11 numeric 148 | 6:11 numeric 149 | 7:11 numeric 150 | 8:11 numeric 151 | 9:11 numeric 152 | 10:11 numeric 153 | 11:11 numeric 154 | 12:11 numeric 155 | 13:11 numeric 156 | 14:11 numeric 157 | 1:12 numeric 158 | 2:12 numeric 159 | 3:12 numeric 160 | 4:12 numeric 161 | 5:12 numeric 162 | 6:12 numeric 163 | 7:12 numeric 164 | 8:12 numeric 165 | 9:12 numeric 166 | 10:12 numeric 167 | 11:12 numeric 168 | 12:12 numeric 169 | 13:12 numeric 170 | 14:12 numeric 171 | 1:13 numeric 172 | 2:13 numeric 173 | 3:13 numeric 174 | 4:13 numeric 175 | 5:13 numeric 176 | 6:13 numeric 177 | 7:13 numeric 178 | 8:13 numeric 179 | 9:13 numeric 180 | 10:13 numeric 181 | 11:13 numeric 182 | 12:13 numeric 183 | 13:13 numeric 184 | 14:13 numeric 185 | 1:14 numeric 186 | 2:14 numeric 187 | 3:14 numeric 188 | 4:14 numeric 189 | 5:14 numeric 190 | 6:14 numeric 191 | 7:14 numeric 192 | 8:14 numeric 193 | 9:14 numeric 194 | 10:14 numeric 195 | 11:14 numeric 196 | 12:14 numeric 197 | 13:14 numeric 198 | 14:14 numeric 199 | -------------------------------------------------------------------------------- /results/forest_census.txt-plot.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/charliermarsh/java-ml/c81ff4f30b464da9ad3d4afbdc5d5621a88cae61/results/forest_census.txt-plot.png -------------------------------------------------------------------------------- /results/forest_dna.txt-plot.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/charliermarsh/java-ml/c81ff4f30b464da9ad3d4afbdc5d5621a88cae61/results/forest_dna.txt-plot.png -------------------------------------------------------------------------------- /results/forest_ocr17.txt-plot.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/charliermarsh/java-ml/c81ff4f30b464da9ad3d4afbdc5d5621a88cae61/results/forest_ocr17.txt-plot.png -------------------------------------------------------------------------------- /results/forest_ocr49.txt-plot.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/charliermarsh/java-ml/c81ff4f30b464da9ad3d4afbdc5d5621a88cae61/results/forest_ocr49.txt-plot.png -------------------------------------------------------------------------------- /src/Adaboost.java: -------------------------------------------------------------------------------- 1 | /** 2 | * This is a decision tree class used in a decision forest implementation. The 3 | * constructor takes the training set and the attributes to use for building 4 | * the tree. Basic algorithm based on R\&N 18.3: Page 702. 5 | * 6 | * http://csmr.ca.sandia.gov/~wpk/pubs/publications/pami06.pdf 7 | */ 8 | import java.io.*; 9 | import java.util.Random; 10 | import java.util.ArrayList; 11 | import java.util.HashSet; 12 | import java.util.Collections; 13 | public class Adaboost implements Classifier{ 14 | 15 | Random random; 16 | DecisionTree[] forest; 17 | 18 | public Adaboost(DataSet data, int forestSize) { 19 | random = new Random(); 20 | 21 | forest = new DecisionTree[forestSize]; 22 | 23 | /* 24 | * This is basically a nonsensical way of choosing attributes/examples 25 | * to train each tree on. Can't really find much on optimal values. 26 | * Want to make sure the tree works before tweaking, though. 27 | * 28 | * Picks a random number of attributes/examples to train each tree. 29 | */ 30 | ArrayList attributes = new ArrayList(data.numAttrs); 31 | ArrayList examples = new ArrayList(data.numTrainExs); 32 | for (int i = 0; i < data.numAttrs; i++) { attributes.add(i); } 33 | for (int i = 0; i < data.numTrainExs; i++) { examples.add(i); } 34 | 35 | //Train each tree by choosing a subset of features. Actually just using 36 | //every feature in this case. 37 | int numFeatures = data.numAttrs; 38 | 39 | //Train each tree on a sample of 2/3 of the examples 40 | int numTrain = 2 * data.numTrainExs / 3; 41 | 42 | for (int cTree = 0; cTree < forestSize; cTree++) { 43 | 44 | /* Need to decide how to select number of features*/ 45 | //int numFeatures = random.nextInt(data.numAttrs - 1) + 1; 46 | //int numTrain = random.nextInt(data.numTrainExs); 47 | HashSet treeAttributes = new HashSet(numFeatures); 48 | ArrayList treeExamples = new ArrayList(numTrain); 49 | 50 | //Randomize the list 51 | Collections.shuffle(attributes); 52 | for (int i = 0; i < numFeatures; i++) { 53 | treeAttributes.add(attributes.get(i)); 54 | } 55 | 56 | Collections.shuffle(examples); 57 | for (int i = 0; i < numTrain; i++) { 58 | treeExamples.add(examples.get(i)); 59 | } 60 | 61 | //System.out.println(numFeatures + ":" + numTrain); 62 | forest[cTree] = new DecisionTree(data, treeAttributes, 63 | treeExamples, false); 64 | //forest[cTree] = new DecisionTree(data, treeAttributes); 65 | } 66 | } 67 | 68 | /* 69 | * Takes the majority vote of the decision trees 70 | */ 71 | public int predict(int[] ex) { 72 | int[] count = new int[2]; 73 | for (DecisionTree tree : forest) 74 | count[tree.predict(ex)]++; 75 | return (count[1] > count[0] ? 1 : 0); 76 | } 77 | 78 | /** This method should return a very brief but understandable 79 | * description of the learning algorithm that is being used, 80 | * appropriate for posting on the class website. 81 | */ 82 | public String algorithmDescription() { 83 | return "Basic decision forest - uses our DecisionTree"; 84 | } 85 | 86 | /** This method should return the "author" of this program as you 87 | * would like it to appear on the class website. You can use your 88 | * real name, or a pseudonym, or a name that identifies your 89 | * group. 90 | */ 91 | public String author() { 92 | return "dmrd"; 93 | } 94 | 95 | /* 96 | * Simple main for testing. 97 | * 98 | */ 99 | public static void main(String argv[]) 100 | throws FileNotFoundException, IOException { 101 | 102 | if (argv.length < 2) { 103 | System.err.println("argument: filestem forestSize"); 104 | return; 105 | } 106 | 107 | String filestem = argv[0]; 108 | 109 | /* 110 | * Create a cross validation set - just takes the last crossSize 111 | * elements of the set as a cross set. 112 | */ 113 | DiscreteDataSet d = new DiscreteDataSet(filestem); 114 | 115 | /* 116 | * Do the Knuth Shuffle! It sounds like more fun than it is! 117 | */ 118 | //Set seed to constant to get the same result multiple times 119 | Random random = new Random(); 120 | for (int i = 0; i < d.numTrainExs; i++) { 121 | int swap = random.nextInt(d.numTrainExs - i); 122 | int[] tempEx = d.trainEx[swap]; 123 | d.trainEx[swap] = d.trainEx[d.numTrainExs - i - 1]; 124 | d.trainEx[d.numTrainExs - i - 1] = tempEx; 125 | int tempLabel = d.trainLabel[swap]; 126 | d.trainLabel[swap] = d.trainLabel[d.numTrainExs - i - 1]; 127 | d.trainLabel[d.numTrainExs - i - 1] = tempLabel; 128 | } 129 | 130 | int crossSize = d.numTrainExs/4; 131 | 132 | int[][] crossEx = new int[crossSize][]; 133 | int[] crossLabel = new int[crossSize]; 134 | 135 | int[][] dEx = new int[d.numTrainExs - crossSize][]; 136 | int[] dLabel = new int[d.numTrainExs - crossSize]; 137 | 138 | for (int i = 0; i < d.numTrainExs - crossSize; i++) { 139 | dEx[i] = d.trainEx[i]; 140 | dLabel[i] = d.trainLabel[i]; 141 | } 142 | 143 | for (int i = 0; i < crossSize; i++) { 144 | crossEx[i] = d.trainEx[d.numTrainExs - i - 1]; 145 | crossLabel[i] = d.trainLabel[d.numTrainExs - i - 1]; 146 | } 147 | 148 | //Modify original dataset 149 | d.numTrainExs = dEx.length; 150 | d.trainEx = dEx; 151 | d.trainLabel = dLabel; 152 | 153 | System.out.println("Training classifier on " + d.numTrainExs 154 | + " examples"); 155 | 156 | Classifier c = new DecisionForest(d,Integer.parseInt(argv[1])); 157 | 158 | System.out.println("Testing classifier on " + crossEx.length 159 | + " examples"); 160 | int correct = 0; 161 | for (int ex = 0; ex < crossEx.length; ex++) { 162 | if (c.predict(crossEx[ex]) == crossLabel[ex]) 163 | correct++; 164 | } 165 | System.out.println("Performance on cross set: " 166 | + (100*correct / crossEx.length) + "%"); 167 | } 168 | 169 | } 170 | -------------------------------------------------------------------------------- /src/BaselineClassifier.java: -------------------------------------------------------------------------------- 1 | import java.io.*; 2 | 3 | /** 4 | * This is the class for an extremely simple learning algorithm that 5 | * finds the most frequent class in the training data, and then 6 | * predicts that each new test example belongs to this class. 7 | */ 8 | public class BaselineClassifier implements Classifier { 9 | 10 | private int most_frequent_class; 11 | private String author = "Rob Schapire"; 12 | private String description = "A very simple learning algorithm that, " 13 | + "on each test example, predicts with the most frequent class seen " 14 | + "during training"; 15 | 16 | /** 17 | * This constructor takes as input a dataset and computes and 18 | * stores the most frequent class 19 | */ 20 | public BaselineClassifier(DataSet d) { 21 | int count[] = new int[2]; 22 | 23 | for (int i = 0; i < d.numTrainExs; i++) 24 | count[d.trainLabel[i]]++; 25 | 26 | most_frequent_class = (count[1] > count[0] ? 1 : 0); 27 | } 28 | 29 | /** The prediction method ignores the given example and predicts 30 | * with the most frequent class seen during training. 31 | */ 32 | public int predict(int[] ex) { 33 | return most_frequent_class; 34 | } 35 | 36 | /** This method returns a description of the learning algorithm. */ 37 | public String algorithmDescription() { 38 | return description; 39 | } 40 | 41 | /** This method returns the author of this program. */ 42 | public String author() { 43 | return author; 44 | } 45 | 46 | /** A simple main for testing this algorithm. This main reads a 47 | * filestem from the command line, runs the learning algorithm on 48 | * this dataset, and prints the test predictions to filestem.testout. 49 | */ 50 | public static void main(String argv[]) 51 | throws FileNotFoundException, IOException { 52 | 53 | if (argv.length < 1) { 54 | System.err.println("argument: filestem"); 55 | return; 56 | } 57 | 58 | String filestem = argv[0]; 59 | 60 | DataSet d = new DataSet(filestem); 61 | 62 | Classifier c = new BaselineClassifier(d); 63 | 64 | int correct = 0; 65 | for (int i = 0; i < d.numTrainExs; i++) { 66 | if (c.predict(d.trainEx[i]) == d.trainLabel[i]) 67 | correct++; 68 | } 69 | 70 | System.out.println((100*correct/d.numTrainExs) + "%"); 71 | 72 | d.printTestPredictions(c, filestem); 73 | } 74 | 75 | } 76 | -------------------------------------------------------------------------------- /src/BinaryDataSet.java: -------------------------------------------------------------------------------- 1 | import java.io.*; 2 | 3 | /** 4 | * This is a subclass of NumericDataSet (which, in turn, is a 5 | * subclass of DataSet) representing a dataset all of whose 6 | * attributes are binary, i.e., numeric with only the two values 0 and 7 | * 1. In addition to all of the fields and methods inherited from 8 | * DataSet, the class includes a constructor that reads in 9 | * data from data files and converts all attributes to binary. 10 | **/ 11 | public class BinaryDataSet extends NumericDataSet { 12 | 13 | /** This constructor reads in data from the files 14 | * filestem.names, filestem.train and 15 | * filestem.test, converts all attributes to binary 16 | * format, and sets up all of the public fields. 17 | **/ 18 | public BinaryDataSet(String filestem) 19 | throws FileNotFoundException, IOException { 20 | super(filestem); 21 | 22 | int new_numAttrs = 0; 23 | 24 | int[][] cont_vals = getContVals(); 25 | 26 | for (int a = 0; a < numAttrs; a++) 27 | new_numAttrs += cont_vals[a].length - 1; 28 | 29 | for (int traintest = 0; traintest < 2; traintest++) { 30 | int[][] exs = (traintest == 1 ? trainEx : testEx); 31 | for (int i = 0; i < exs.length; i++) { 32 | int[] new_ex = new int[new_numAttrs]; 33 | for (int a = 0, na = 0; a < numAttrs; a++) { 34 | for(int j = 1; j < cont_vals[a].length; j++) 35 | new_ex[na++] = (exs[i][a] >= cont_vals[a][j] 36 | ? 1 : 0); 37 | } 38 | exs[i] = new_ex; 39 | } 40 | } 41 | 42 | String[] new_attrName = new String[new_numAttrs]; 43 | for (int a = 0, na = 0; a < numAttrs; a++) { 44 | if (cont_vals[a].length == 2 45 | && cont_vals[a][0] == 0 46 | && cont_vals[a][1] == 1) 47 | new_attrName[na++] = attrName[a]; 48 | else 49 | for(int j = 1; j < cont_vals[a].length; j++) 50 | new_attrName[na++] = attrName[a] 51 | + ">=" + cont_vals[a][j]; 52 | } 53 | 54 | attrName = new_attrName; 55 | numAttrs = new_numAttrs; 56 | 57 | attrVals = new String[numAttrs][]; 58 | } 59 | 60 | /** This constructor creates an empty binary dataset. */ 61 | public BinaryDataSet() { 62 | super(); 63 | } 64 | 65 | } 66 | -------------------------------------------------------------------------------- /src/Classifier.java: -------------------------------------------------------------------------------- 1 | /** 2 | * This is the interface for a classifier. A classifier only needs 3 | * three methods, one for evaluating examples, one for returning a 4 | * description of the learning algorithm used, and a third for 5 | * returning the "author" of the program. Generally, the actual 6 | * learning will go into the constructer so that the computed 7 | * classifier is returned. 8 | */ 9 | public interface Classifier { 10 | 11 | /** A method for predicting the label of a given example ex 12 | * represented, as in the rest of the code, as an array of values 13 | * for each of the attributes. The method should return a 14 | * prediction, i.e., 0 or 1. 15 | */ 16 | public int predict(int[] ex); 17 | 18 | /** This method should return a very brief but understandable 19 | * description of the learning algorithm that is being used, 20 | * appropriate for posting on the class website. 21 | */ 22 | public String algorithmDescription(); 23 | 24 | /** This method should return the "author" of this program as you 25 | * would like it to appear on the class website. You can use your 26 | * real name, or a pseudonym, or a name that identifies your 27 | * group. 28 | */ 29 | public String author(); 30 | } 31 | -------------------------------------------------------------------------------- /src/DataSet.java: -------------------------------------------------------------------------------- 1 | import java.util.*; 2 | import java.io.*; 3 | 4 | /** 5 | * This class represents a dataset, including names for the classes, 6 | * all attributes and their values. The class also includes a 7 | * constructor that can read the dataset from data files, as well as a 8 | * method for printing the predictions of a classifier on each of the 9 | * test examples in the format required for submission. 10 | **/ 11 | public class DataSet { 12 | 13 | /** number of training examples **/ 14 | public int numTrainExs; 15 | 16 | /** number of test examples **/ 17 | public int numTestExs; 18 | 19 | /** an array of training examples, each of which is itself an 20 | * array of integer values so that trainEx[i][a] is the 21 | * value of attribute a on example i **/ 22 | public int trainEx[][]; 23 | 24 | /** an array of labels for the training examples **/ 25 | public int trainLabel[]; 26 | 27 | /** an array of test examples, each one an array of integer values **/ 28 | public int testEx[][]; 29 | 30 | /** number of attributes **/ 31 | public int numAttrs; 32 | 33 | /** the names of the attributes **/ 34 | public String attrName[]; 35 | 36 | /** an array of names for the attribute values: 37 | * for discrete attribute a, attrVals[a][j] is 38 | * the name of the j-th value; if a is a numeric 39 | * attribute then attrVals[a] is null **/ 40 | public String attrVals[][]; 41 | 42 | /** names of the two classes **/ 43 | public String className[] = null; 44 | 45 | /** This constructor constructs an empty dataset with no training 46 | * examples, no test examples, no attributes, and two classes with 47 | * default names. 48 | */ 49 | public DataSet() { 50 | numTrainExs = 0; 51 | numTestExs = 0; 52 | trainEx = new int[0][]; 53 | trainLabel = new int[0]; 54 | testEx = new int[0][]; 55 | numAttrs = 0; 56 | attrName = new String[0]; 57 | attrVals = new String[0][]; 58 | className = new String[2]; 59 | className[0] = "0"; 60 | className[1] = "1"; 61 | } 62 | 63 | /** This constructor reads in data from the files 64 | * filestem.names, filestem.train and 65 | * filestem.test, and then sets up all of the public 66 | * fields. See assignment instructions for information on the 67 | * required format of these files. 68 | **/ 69 | public DataSet(String filestem) 70 | throws FileNotFoundException, IOException { 71 | String[] words = null; 72 | 73 | // read .names file 74 | 75 | open_file(filestem + ".names"); 76 | ArrayList attr_list = new ArrayList(); 77 | 78 | String line; 79 | while((line = read_line()) != null) { 80 | line = line.trim( ); 81 | words = line.split("\\s+"); 82 | if (line.equals("")) 83 | continue; 84 | 85 | if (className == null) { 86 | if (words.length != 2) { 87 | String err = "expected two class names at line " 88 | + line_count + " in file " + filename; 89 | System.err.println(err); 90 | throw new RuntimeException(err); 91 | } 92 | className = words; 93 | } else { 94 | if (words.length <= 1) { 95 | String err = "expected attribute description at line " 96 | + line_count + " in file " + filename; 97 | System.err.println(err); 98 | throw new RuntimeException(err); 99 | } 100 | attr_list.add(words); 101 | numAttrs++; 102 | } 103 | } 104 | 105 | in.close(); 106 | 107 | attrName = new String[numAttrs]; 108 | attrVals = new String[numAttrs][]; 109 | 110 | for (int i = 0; i < numAttrs; i++) { 111 | words = attr_list.get(i); 112 | attrName[i] = words[0]; 113 | if (words[1].equals("numeric")) { 114 | attrVals[i] = null; 115 | } else { 116 | attrVals[i] = new String[words.length - 1]; 117 | for (int j = 1; j < words.length; j++) { 118 | attrVals[i][j-1] = words[j]; 119 | } 120 | } 121 | } 122 | 123 | // read data files 124 | 125 | for(int traintest = 0; traintest < 2; traintest++) { 126 | ArrayList ex_list = new ArrayList(); 127 | ArrayList lab_list = new ArrayList(); 128 | 129 | if (traintest == 1) 130 | open_file(filestem + ".train"); 131 | else 132 | try { 133 | open_file(filestem + ".test"); 134 | } catch (FileNotFoundException e) { 135 | System.err.print("Continuing without test file...\n"); 136 | numTestExs = 0; 137 | testEx = new int[0][]; 138 | continue; 139 | } 140 | 141 | while((line = read_line()) != null) { 142 | line = line.trim( ); 143 | if (line.equals("")) 144 | continue; 145 | 146 | words = line.split("\\s+"); 147 | if (words.length != numAttrs + traintest) { 148 | String err = "wrong number of tokens at line " 149 | + line_count + " in file " + filename; 150 | System.err.println(err); 151 | throw new RuntimeException(err); 152 | } 153 | 154 | int ex[] = new int[numAttrs]; 155 | for (int i = 0; i < numAttrs; i++) { 156 | if (attrVals[i] == null) { 157 | try { 158 | ex[i] = Integer.parseInt(words[i]); 159 | } catch (NumberFormatException e) { 160 | System.err.println("Expected integer in field " 161 | +(i+1)+" at line "+line_count+ 162 | " in file "+filename); 163 | throw e; 164 | } 165 | } else { 166 | int j; 167 | for (j = 0; 168 | j < attrVals[i].length 169 | && !attrVals[i][j].equals(words[i]); 170 | j++); 171 | if (j >= attrVals[i].length) { 172 | String err = "bad attribute value in field " 173 | +(i+1)+" at line "+line_count+" in file "+filename; 174 | System.err.println(err); 175 | throw new RuntimeException(err); 176 | } 177 | ex[i] = j; 178 | } 179 | } 180 | ex_list.add(ex); 181 | if (traintest == 1) { 182 | int lab; 183 | if (words[numAttrs].equals(className[0])) { 184 | lab = 0; 185 | } else if (words[numAttrs].equals(className[1])) { 186 | lab = 1; 187 | } else { 188 | String err = "unrecognized label at line "+line_count+ 189 | " in file "+filename; 190 | System.err.println(err); 191 | throw new RuntimeException(err); 192 | } 193 | lab_list.add(new Integer(lab)); 194 | } 195 | } 196 | 197 | if (traintest == 0) { 198 | numTestExs = ex_list.size(); 199 | testEx = new int[0][]; 200 | testEx = (int[][]) ex_list.toArray(testEx); 201 | } else { 202 | numTrainExs = ex_list.size(); 203 | trainEx = new int[0][]; 204 | trainEx = (int[][]) ex_list.toArray(trainEx); 205 | trainLabel = new int[numTrainExs]; 206 | for (int i = 0; i < numTrainExs; i++) { 207 | trainLabel[i] = (lab_list.get(i)).intValue(); 208 | } 209 | } 210 | in.close(); 211 | } 212 | in = null; 213 | filename = null; 214 | } 215 | 216 | /** This method prints out the predictions of classifier 217 | * c on each of the test examples in the format required 218 | * for submission. The result is sent to the given 219 | * PrintStream. 220 | **/ 221 | public void printTestPredictions(Classifier c, 222 | PrintStream out) { 223 | out.println(c.author()); 224 | out.println("."); 225 | out.println(c.algorithmDescription()); 226 | out.println("."); 227 | for(int i = 0; i < numTestExs; i++) { 228 | out.println(className[c.predict(testEx[i])]); 229 | } 230 | } 231 | 232 | /** This method prints out the predictions of classifier 233 | * c on each of the test examples in the format required 234 | * for submission. The result is printed to the file 235 | * filestem.testout. 236 | **/ 237 | public void printTestPredictions(Classifier c, 238 | String filestem) 239 | throws FileNotFoundException { 240 | PrintStream out; 241 | 242 | try { 243 | out = new PrintStream(new BufferedOutputStream(new 244 | FileOutputStream(filestem + ".testout"))); 245 | } catch (FileNotFoundException e) { 246 | System.err.println("Cannot open file " + filestem + ".testout"); 247 | throw e; 248 | } 249 | printTestPredictions(c, out); 250 | 251 | out.close(); 252 | } 253 | 254 | /*********************** private ********************************/ 255 | 256 | private String filename; 257 | private int line_count; 258 | private BufferedReader in; 259 | 260 | private void open_file(String filename) throws FileNotFoundException { 261 | BufferedReader in; 262 | 263 | this.filename = filename; 264 | this.line_count = 0; 265 | 266 | try { 267 | in = new BufferedReader(new FileReader(filename)); 268 | } catch (FileNotFoundException e) { 269 | System.err.print("File "+filename+" not found.\n"); 270 | throw e; 271 | } 272 | this.in = in; 273 | } 274 | 275 | private String read_line() throws IOException { 276 | String line; 277 | 278 | line_count++; 279 | 280 | try { 281 | line = in.readLine(); 282 | } 283 | catch (IOException e) { 284 | System.err.println("Error reading line "+line_count+" in file "+filename); 285 | throw e; 286 | } 287 | return line; 288 | } 289 | 290 | 291 | protected int[][] getContVals() { 292 | int[][] vals = new int[numAttrs][]; 293 | for (int a = 0; a < numAttrs; a++) { 294 | if (attrVals[a] != null) 295 | continue; 296 | 297 | TreeSet t = new TreeSet(); 298 | 299 | for (int traintest = 0; traintest < 2; traintest++) { 300 | int[][] exs = (traintest == 1 ? trainEx : testEx); 301 | for (int i = 0; i < exs.length; i++) { 302 | t.add(new Integer(exs[i][a])); 303 | } 304 | } 305 | 306 | vals[a] = new int[t.size()]; 307 | 308 | Iterator it = t.iterator(); 309 | int i = 0; 310 | while(it.hasNext()) { 311 | vals[a][i++] = (it.next()).intValue(); 312 | } 313 | } 314 | return vals; 315 | } 316 | 317 | } 318 | -------------------------------------------------------------------------------- /src/DecisionForest.java: -------------------------------------------------------------------------------- 1 | /** 2 | * This is a decision tree class used in a decision forest implementation. The 3 | * constructor takes the training set and the attributes to use for building 4 | * the tree. Basic algorithm based on R\&N 18.3: Page 702. 5 | * 6 | * http://csmr.ca.sandia.gov/~wpk/pubs/publications/pami06.pdf 7 | */ 8 | import java.io.*; 9 | import java.util.Random; 10 | import java.util.ArrayList; 11 | import java.util.HashSet; 12 | import java.util.Collections; 13 | public class DecisionForest implements Classifier{ 14 | 15 | Random random; 16 | DecisionTree[] forest; 17 | 18 | public DecisionForest(DataSet data, int forestSize) { 19 | random = new Random(); 20 | 21 | forest = new DecisionTree[forestSize]; 22 | 23 | /* 24 | * This is basically a nonsensical way of choosing attributes/examples 25 | * to train each tree on. Can't really find much on optimal values. 26 | * Want to make sure the tree works before tweaking, though. 27 | * 28 | * Picks a random number of attributes/examples to train each tree. 29 | */ 30 | ArrayList attributes = new ArrayList(data.numAttrs); 31 | ArrayList examples = new ArrayList(data.numTrainExs); 32 | for (int i = 0; i < data.numAttrs; i++) { attributes.add(i); } 33 | for (int i = 0; i < data.numTrainExs; i++) { examples.add(i); } 34 | 35 | //Train each tree by choosing a subset of features. Actually just using 36 | //every feature in this case. 37 | int numFeatures = data.numAttrs; 38 | 39 | //Train each tree on a sample of 2/3 of the examples 40 | int numTrain = 2 * data.numTrainExs / 3; 41 | 42 | for (int cTree = 0; cTree < forestSize; cTree++) { 43 | 44 | /* Need to decide how to select number of features*/ 45 | //int numFeatures = random.nextInt(data.numAttrs - 1) + 1; 46 | //int numTrain = random.nextInt(data.numTrainExs); 47 | HashSet treeAttributes = new HashSet(numFeatures); 48 | ArrayList treeExamples = new ArrayList(numTrain); 49 | 50 | //Randomize the list 51 | Collections.shuffle(attributes); 52 | for (int i = 0; i < numFeatures; i++) { 53 | treeAttributes.add(attributes.get(i)); 54 | } 55 | 56 | Collections.shuffle(examples); 57 | for (int i = 0; i < numTrain; i++) { 58 | treeExamples.add(examples.get(i)); 59 | } 60 | 61 | //System.out.println(numFeatures + ":" + numTrain); 62 | forest[cTree] = new DecisionTree(data, treeAttributes, 63 | treeExamples, true); 64 | //forest[cTree] = new DecisionTree(data, treeAttributes); 65 | } 66 | } 67 | 68 | /* 69 | * Takes the majority vote of the decision trees 70 | */ 71 | public int predict(int[] ex) { 72 | int[] count = new int[2]; 73 | for (DecisionTree tree : forest) 74 | count[tree.predict(ex)]++; 75 | return (count[1] > count[0] ? 1 : 0); 76 | } 77 | 78 | /** This method should return a very brief but understandable 79 | * description of the learning algorithm that is being used, 80 | * appropriate for posting on the class website. 81 | */ 82 | public String algorithmDescription() { 83 | return "Random forest - combines bagging with unpruned random decision trees. Tree chooses best among subset of attributes to split on at each node."; 84 | } 85 | 86 | /** This method should return the "author" of this program as you 87 | * would like it to appear on the class website. You can use your 88 | * real name, or a pseudonym, or a name that identifies your 89 | * group. 90 | */ 91 | public String author() { 92 | return "crm & dmrd"; 93 | } 94 | 95 | /* 96 | * Simple main for testing. 97 | * 98 | */ 99 | public static void main(String argv[]) 100 | throws FileNotFoundException, IOException { 101 | 102 | if (argv.length < 2) { 103 | System.err.println("argument: filestem forestSize"); 104 | return; 105 | } 106 | 107 | String filestem = argv[0]; 108 | 109 | /* 110 | * Create a cross validation set - just takes the last crossSize 111 | * elements of the set as a cross set. 112 | */ 113 | DiscreteDataSet d = new DiscreteDataSet(filestem); 114 | 115 | System.out.println("Training classifier on " + d.numTrainExs 116 | + " examples"); 117 | 118 | Classifier c = new DecisionForest(d,Integer.parseInt(argv[1])); 119 | 120 | System.out.println("Running on test set..."); 121 | d.printTestPredictions(c, filestem); 122 | } 123 | } 124 | -------------------------------------------------------------------------------- /src/DecisionTree.java: -------------------------------------------------------------------------------- 1 | /** 2 | * This is a decision tree class used in a decision forest implementation. The 3 | * constructor takes the training set and the attributes to use for building 4 | * the tree. Basic algorithm based on R\&N 18.3: Page 702. 5 | */ 6 | import java.io.*; 7 | import java.util.Random; 8 | import java.util.ArrayList; 9 | import java.util.HashSet; 10 | public class DecisionTree implements Classifier{ 11 | 12 | Node treeRoot; 13 | 14 | Random random; 15 | 16 | /* 17 | * Randomize tree (for random forest)? If true, chooses some subset of 18 | * attributes to choose best from at each node 19 | */ 20 | boolean randomize; 21 | 22 | /* 23 | * How many features do we want to choose from at each node in randomized 24 | * tree? 25 | */ 26 | private int numFeatures(int total) { 27 | return (int)Math.sqrt(total) + 1; 28 | //return (int)Math.log(total) + 1; 29 | } 30 | 31 | /* 32 | * Inner class to represent tree structure - Splits on binary features 33 | */ 34 | private class Node { 35 | /* 36 | * What attribute does this branch split on? -1 indicates nothing 37 | */ 38 | public int attribute; 39 | /* 40 | * What is the label for this node? 41 | */ 42 | public int label; 43 | 44 | public Node[] children; 45 | 46 | /* 47 | * Entropy of boolean variable - n negative examples, p positive 48 | * examples 49 | */ 50 | double entropy(double n, double p) { 51 | if (n == 0 || p == 0) 52 | return 0.0; 53 | return -1.0 54 | * ( ((n/(n+p)) * Math.log(n/(n+p))) 55 | + ( (p/(n+p)) * Math.log(p/(n+p)))) / Math.log(2); 56 | } 57 | 58 | /* 59 | * Pick the most important attribute - want to get the best one to 60 | * split on - see R&N 18.3.4 on page 703. Trying to minimize 61 | * Remainder(A) since B(.) will remain the same for every 62 | * attribute. 63 | * 64 | * Calculates max gain - for a better explanation, see 65 | * http://dms.irb.hr/tutorial/tut_dtrees.php 66 | * http://decisiontrees.net/decision-trees-tutorial/tutorial-4-id3/ 67 | */ 68 | int chooseAttribute(DataSet data, HashSet attributes, 69 | ArrayList examples) { 70 | int bestAttr = -1; 71 | double bestGain = -1; 72 | int[] labelCount = new int[2]; 73 | for (int ex : examples) { 74 | //Yay array index beauty... 75 | labelCount[data.trainLabel[ex]]++; 76 | } 77 | double setEntropy = entropy(labelCount[0], labelCount[1]); 78 | for (int attr : attributes) { 79 | //Set to something in the case that there is no gain 80 | //if (bestAttr == -1) { bestAttr = attr; } 81 | /* 82 | * [value][label] : so [0][0] + [0][1] would be the number of 83 | * examples with a value of 0 for the attributes. 84 | * [0][1] + [1][1] would be number of examples with label 1 in 85 | * examples. Use this to calculate entropy and gain. 86 | */ 87 | double[][] count = new double[data.attrVals[attr].length][2]; 88 | for (int ex : examples) { 89 | //Yay array index beauty... 90 | count[data.trainEx[ex][attr]][data.trainLabel[ex]]++; 91 | } 92 | 93 | //Not needed, but using until I am sure my code works 94 | //(recalculates current entropy every time) 95 | //double gain = entropy(count[0][0] + count[1][0], count[0][1] 96 | //+ count[1][1]); 97 | double gain = setEntropy; 98 | for (int val = 0; val < data.attrVals[attr].length; val++) { 99 | //Get number of examples with this attribute value 100 | gain -= ((count[val][0] + count[val][1]) / examples.size()) 101 | * entropy(count[val][0], count[val][1]); 102 | } 103 | 104 | if (gain >= bestGain) { 105 | bestAttr = attr; 106 | bestGain = gain; 107 | } 108 | //System.out.println(gain); 109 | } 110 | //System.out.println(bestAttr + "\t" + bestGain + "\t" + setEntropy 111 | //+ "\t" + attributes.size()); 112 | return bestAttr; 113 | } 114 | 115 | /* 116 | * Accepts the dataset then an array of indices for remaining examples. 117 | * So each integer in examples is just the index of a training example 118 | * in data.trainEx. Avoid making too many copies of the examples this 119 | * way. attributes is a set of all remaining attributes to split on. 120 | * This is modified and passed to children by each node. 121 | */ 122 | Node(DataSet data, HashSet attributes, 123 | ArrayList examples) { 124 | 125 | this.label = -1; 126 | 127 | if (examples.size() == 0) { 128 | this.attribute = -1; 129 | this.label = 0; //To avoid crashes, although parent must set 130 | return; // the label on this node 131 | } 132 | 133 | /* 134 | * Calculates majority class 135 | */ 136 | int majority = 0; 137 | int count[] = new int[2]; 138 | 139 | for (int ex : examples) { 140 | count[data.trainLabel[ex]]++; 141 | } 142 | 143 | majority = (count[1] > count[0] ? 1 : 0); 144 | 145 | /* 146 | * They all have the same label or there are no more attributes to 147 | * split on 148 | */ 149 | if (count[majority] == examples.size() || attributes.size() == 0) { 150 | //System.out.println(count[0] + ":" + count[1]); 151 | this.attribute = -1; 152 | this.label = majority; 153 | return; 154 | } 155 | 156 | /* 157 | * If randomization is on (i.e. being used in a random forest), then 158 | * we want to choose some random subset of features to choose best 159 | * split feature in. 160 | */ 161 | if (randomize) { 162 | int numAttr = numFeatures(attributes.size()); 163 | //More efficients ways to do this, but this works well enough 164 | HashSet attrSample = new HashSet(numAttr); 165 | for (int attr : attributes) { 166 | /* 167 | * Add each with a probability of numAttr/number attr 168 | * Also adds some variance so we slightly randomize the 169 | * exact number 170 | */ 171 | if (random.nextInt(attributes.size()) < numAttr) { 172 | attrSample.add(attr); 173 | } 174 | } 175 | //System.out.println(numAttr + " : " + attrSample.size()); 176 | this.attribute = chooseAttribute(data, attrSample, examples); 177 | } else { 178 | this.attribute = chooseAttribute(data, attributes, examples); 179 | } 180 | 181 | /*No best attribute*/ 182 | if (this.attribute == -1) { 183 | this.label = majority; 184 | return; 185 | } 186 | 187 | //Remove the attribute so it cannot be used again in child branches 188 | //Add it back in before returning 189 | attributes.remove(this.attribute); 190 | 191 | //Initialize list of examples that will be passed to children 192 | ArrayList> childExamples = new 193 | ArrayList> 194 | (data.attrVals[this.attribute].length); 195 | for (int i = 0; i < data.attrVals[this.attribute].length; i++) { 196 | childExamples.add(new ArrayList()); 197 | } 198 | //for (ArrayList l : childExamples) { 199 | //l = new ArrayList(); 200 | //} 201 | 202 | /* 203 | * Split examples based on the chosen attribute 204 | */ 205 | for (int ex : examples) { 206 | childExamples.get(data.trainEx[ex][this.attribute]).add(ex); 207 | } 208 | 209 | children = new Node[data.attrVals[this.attribute].length]; 210 | /*Create children trees*/ 211 | for (int i = 0; i < data.attrVals[this.attribute].length; i++) { 212 | children[i] = new Node(data, 213 | attributes, 214 | childExamples.get(i)); 215 | /* 216 | * Need to set child label if they don't have any examples to train 217 | * on 218 | */ 219 | if (childExamples.get(i).size() == 0) { 220 | children[i].label = majority; 221 | } 222 | } 223 | attributes.add(this.attribute); 224 | } 225 | } 226 | 227 | /*Just takes dataset - uses all attributes in training*/ 228 | public DecisionTree(DataSet data, boolean rand) { 229 | random = new Random(); 230 | 231 | this.randomize = rand; 232 | HashSet attributes = new HashSet(data.numAttrs); 233 | ArrayList examples = new ArrayList(data.numTrainExs); 234 | 235 | /*Initialize example and attribute lists*/ 236 | for (int i = 0; i < data.numAttrs; i++) { attributes.add(i); } 237 | for (int i = 0; i < data.numTrainExs; i++) { examples.add(i); } 238 | 239 | treeRoot = new Node(data, attributes, examples); 240 | } 241 | 242 | /*Takes the dataset and attributes to use in training*/ 243 | public DecisionTree(DataSet data, HashSet attributes, boolean rand) { 244 | random = new Random(); 245 | 246 | this.randomize = rand; //Randomized tree? 247 | 248 | /*Initialize example lists to include all examples*/ 249 | ArrayList examples = new ArrayList(data.numTrainExs); 250 | for (int i = 0; i < data.numTrainExs; i++) { examples.add(i); } 251 | 252 | treeRoot = new Node(data, attributes, examples); 253 | } 254 | 255 | /*Take both attributes and examples to use for training*/ 256 | public DecisionTree(DataSet data, HashSet attributes, 257 | ArrayList examples, boolean rand) { 258 | random = new Random(); 259 | this.randomize = rand; 260 | treeRoot = new Node(data, attributes, examples); 261 | } 262 | 263 | /** 264 | * Walks down the generated tree to return a label for the example. 265 | */ 266 | public int predict(int[] ex) { 267 | Node current = treeRoot; 268 | int depth = 0; 269 | while (current.attribute != -1) { 270 | current = current.children[ex[current.attribute]]; 271 | } 272 | //System.out.println(depth); 273 | //System.out.println(current.attribute + "\t" + current.label); 274 | return current.label; 275 | } 276 | 277 | /** This method should return a very brief but understandable 278 | * description of the learning algorithm that is being used, 279 | * appropriate for posting on the class website. 280 | */ 281 | public String algorithmDescription() { 282 | return "Basic decision tree for use with random forests"; 283 | } 284 | 285 | /** This method should return the "author" of this program as you 286 | * would like it to appear on the class website. You can use your 287 | * real name, or a pseudonym, or a name that identifies your 288 | * group. 289 | */ 290 | public String author() { 291 | return "dmrd"; 292 | } 293 | 294 | /* 295 | * Simple main for testing. 296 | */ 297 | public static void main(String argv[]) 298 | throws FileNotFoundException, IOException { 299 | 300 | if (argv.length < 1) { 301 | System.err.println("argument: filestem"); 302 | return; 303 | } 304 | 305 | String filestem = argv[0]; 306 | 307 | /* 308 | * Create a cross validation set - just takes the last crossSize 309 | * elements of the set as a cross set. 310 | */ 311 | DiscreteDataSet d = new DiscreteDataSet(filestem); 312 | 313 | /* 314 | * Do the Knuth Shuffle! It sounds like more fun than it is! 315 | */ 316 | //Set seed to constant to get the same result multiple times 317 | Random random = new Random(); 318 | for (int i = 0; i < d.numTrainExs; i++) { 319 | int swap = random.nextInt(d.numTrainExs - i); 320 | int[] tempEx = d.trainEx[swap]; 321 | d.trainEx[swap] = d.trainEx[d.numTrainExs - i - 1]; 322 | d.trainEx[d.numTrainExs - i - 1] = tempEx; 323 | int tempLabel = d.trainLabel[swap]; 324 | d.trainLabel[swap] = d.trainLabel[d.numTrainExs - i - 1]; 325 | d.trainLabel[d.numTrainExs - i - 1] = tempLabel; 326 | } 327 | 328 | //What proportion of the dataset to use for testing 329 | int crossSize = d.numTrainExs/8; 330 | 331 | int[][] crossEx = new int[crossSize][]; 332 | int[] crossLabel = new int[crossSize]; 333 | 334 | int[][] dEx = new int[d.numTrainExs - crossSize][]; 335 | int[] dLabel = new int[d.numTrainExs - crossSize]; 336 | 337 | for (int i = 0; i < d.numTrainExs - crossSize; i++) { 338 | dEx[i] = d.trainEx[i]; 339 | dLabel[i] = d.trainLabel[i]; 340 | } 341 | 342 | for (int i = 0; i < crossSize; i++) { 343 | crossEx[i] = d.trainEx[d.numTrainExs - i - 1]; 344 | crossLabel[i] = d.trainLabel[d.numTrainExs - i - 1]; 345 | } 346 | 347 | //Modify original dataset 348 | d.numTrainExs = dEx.length; 349 | d.trainEx = dEx; 350 | d.trainLabel = dLabel; 351 | System.out.println("Training classifier on " + d.numTrainExs 352 | + " examples"); 353 | 354 | Classifier c = new DecisionTree(d, false); 355 | 356 | System.out.println("Testing classifier on " + crossEx.length 357 | + " examples"); 358 | int correct = 0; 359 | for (int ex = 0; ex < crossEx.length; ex++) { 360 | if (c.predict(crossEx[ex]) == crossLabel[ex]) 361 | correct++; 362 | } 363 | System.out.println("Performance on cross set: " 364 | + (100*correct / crossEx.length) + "%"); 365 | } 366 | } 367 | -------------------------------------------------------------------------------- /src/DiscreteDataSet.java: -------------------------------------------------------------------------------- 1 | import java.io.*; 2 | 3 | /** 4 | * This is a subclass of DataSet representing a dataset all 5 | * of whose attributes are discrete. In addition to all of the fields 6 | * and methods inherited from DataSet, the class includes a 7 | * constructor that reads in data from data files and converts all 8 | * numeric attributes to discrete. 9 | **/ 10 | public class DiscreteDataSet extends DataSet { 11 | 12 | /** This constructor reads in data from the files 13 | * filestem.names, filestem.train and 14 | * filestem.test, converts all attributes to discrete 15 | * format, and sets up all of the public fields. 16 | **/ 17 | public DiscreteDataSet(String filestem) 18 | throws FileNotFoundException, IOException { 19 | super(filestem); 20 | 21 | int[][] cont_vals = getContVals(); 22 | 23 | for (int j = 0; j < numAttrs; j++) { 24 | if (attrVals[j] != null) 25 | continue; 26 | attrVals[j] = new String[cont_vals[j].length]; 27 | for (int k = 0; k < cont_vals[j].length; k++) { 28 | attrVals[j][k] = Integer.toString(cont_vals[j][k]); 29 | } 30 | for (int traintest = 0; traintest < 2; traintest++) { 31 | int[][] exs = (traintest == 1 ? trainEx : testEx); 32 | for (int i = 0; i < exs.length; i++) { 33 | int k = 0; 34 | while(exs[i][j] != cont_vals[j][k]) 35 | k++; 36 | exs[i][j] = k; 37 | } 38 | } 39 | } 40 | } 41 | 42 | /** This constructor creates an empty discrete dataset. */ 43 | public DiscreteDataSet() { 44 | super(); 45 | } 46 | 47 | } 48 | -------------------------------------------------------------------------------- /src/Makefile: -------------------------------------------------------------------------------- 1 | all: 2 | javac BaselineClassifier.java BinaryDataSet.java Classifier.java DataSet.java DiscreteDataSet.java NumericDataSet.java -d ../bin/ 3 | clean: 4 | rm ../bin/*.class 5 | -------------------------------------------------------------------------------- /src/MultiLayerNeuralNet.java: -------------------------------------------------------------------------------- 1 | import java.io.FileNotFoundException; 2 | import java.io.IOException; 3 | import java.util.LinkedList; 4 | import java.util.Arrays; 5 | 6 | public class MultiLayerNeuralNet implements Classifier { 7 | /* algorithm's learning rate. */ 8 | private final double learningRate = 0.5; 9 | /* algorithm's momentum parameter. */ 10 | private final double momentumFactor = 0.1; 11 | /* weights[i][j] is the weight on edge from node i -> j. 12 | * as a special case, weights[i][i] is the threshold value 13 | * of the perceptron i. */ 14 | private double[][] weights; 15 | /* outgoingEdges[i] is a list of edges from i. */ 16 | private final LinkedList[] outgoingEdges; 17 | /* incomingEdges[i] is a list of edges to i. */ 18 | private final LinkedList[] incomingEdges; 19 | /* layer[i] is a list of nodes in layer i. */ 20 | private final LinkedList[] layer; 21 | /* data set on which to make predictions. */ 22 | private final DataSet d; 23 | /* number of attributes in data set. */ 24 | private final int N; 25 | /* number of nodes in the network. */ 26 | private final int numNodes; 27 | 28 | /** Calculates the error on the training examples of 29 | * data set d. */ 30 | private double error(DataSet d) { 31 | if (d.numTrainExs == 0) return 0.0; 32 | double sum = 0.0; 33 | for (int i = 0; i < d.numTrainExs; i++) 34 | sum += Math.abs(d.trainLabel[i] - predict(d.trainEx[i])); 35 | return sum/d.numTrainExs; 36 | } 37 | 38 | /** Runs an activation threshold function g on some 39 | * input value d. 40 | */ 41 | private double g(double d) { 42 | return 1.0/(1.0 + Math.exp(-d)); 43 | } 44 | 45 | /** Runs an activation threshold function g's derivative 46 | * on some input value d. 47 | */ 48 | private double gPrime(double d) { 49 | double g = g(d); 50 | return g * (1.0 - g); 51 | } 52 | 53 | /** 54 | * Trains the neural network on every example in data set d 55 | * using previous deltas prevDelta. 56 | */ 57 | private void backProp(DataSet d, double[] prevDelta) { 58 | for (int i = 0; i < d.numTrainExs; i++) 59 | backProp(d.trainEx[i], d.trainLabel[i], prevDelta); 60 | } 61 | 62 | /** Trains the neural network on an example ex by using 63 | * the back propagation technique to adjust the net's weights. 64 | * Example ex is known to be of classification label. Uses 65 | * 0 for all previous deltas. 66 | */ 67 | private void backProp(int[] ex, int label) { 68 | backProp(ex, label, new double[this.numNodes]); 69 | } 70 | 71 | /** Trains the neural network on an example ex by using 72 | * the back propagation technique to adjust the net's weights. 73 | * Example ex is known to be of classification label. It is 74 | * assumed that prevDelta are the deltas for time t-1, used 75 | * to add momentum to the gradient descent calculation. Stores 76 | * the calculated deltas in prevDelta for future use. 77 | */ 78 | private void backProp(int[] ex, int label, double[] prevDelta) { 79 | // output of each node 80 | double[] a = new double[this.numNodes]; 81 | // input to each node 82 | double[] in = new double[this.numNodes]; 83 | // delta for adjusting each edge weight 84 | double[] delta = new double[this.numNodes]; 85 | 86 | // first N nodes are input nodes 87 | for (int i = 0; i < this.N; i++) 88 | a[i] = ex[i]; 89 | 90 | // compute outputs by propagating inputs forward 91 | for (int l = 1; l < this.layer.length; l++) { 92 | for (int dest : this.layer[l]) { 93 | for (int src : this.incomingEdges[dest]) { 94 | in[dest] += this.weights[src][dest]*a[src]; 95 | } 96 | // subtract threshold value 97 | in[dest] -= this.weights[dest][dest]; 98 | a[dest] = g(in[dest]); 99 | } 100 | } 101 | // return if accurate prediction 102 | if (predict(a[this.numNodes - 1]) == label) return; 103 | 104 | // compute deltas by propagating backward 105 | // degin with delta of output layer as base case 106 | delta[this.numNodes - 1] = 107 | gPrime(a[this.numNodes - 1])*(label - (int)Math.round(a[this.numNodes - 1])); 108 | for (int l = this.layer.length - 2; l >= 0; l--) { 109 | for (int src : this.layer[l]) { 110 | double sum = 0; 111 | for (int dest : this.outgoingEdges[src]) { 112 | sum += this.weights[src][dest]*delta[dest]; 113 | } 114 | // compute delta and add momentum factor 115 | delta[src] = gPrime(a[src])*sum; 116 | delta[src] += this.momentumFactor*prevDelta[src]; 117 | // store momentum for future use 118 | prevDelta[src] = delta[src]; 119 | } 120 | } 121 | 122 | // adjust weights 123 | for (int i = 0; i < this.weights.length; i++) { 124 | for (int j = i+1; j < this.weights.length; j++) { 125 | this.weights[i][j] += this.learningRate*a[i]*delta[j]; 126 | this.weights[j][i] = this.weights[i][j]; 127 | } 128 | } 129 | } 130 | 131 | /** Returns a random weight for an edge. */ 132 | private double randomWeight() { 133 | return 0.5 - Math.random(); 134 | } 135 | 136 | 137 | /** Resets the weights of a neural network to avoid getting caught 138 | * in a local minimum. 139 | */ 140 | private void randomizeWeights() { 141 | for (int i = 0; i < this.weights.length; i++) { 142 | for (int j = i+1; j < this.weights.length; j++) { 143 | this.weights[i][j] = randomWeight(); 144 | this.weights[j][i] = this.weights[i][j]; 145 | } 146 | } 147 | } 148 | 149 | /** Constructor for the MultiLayerNeuralNet class that 150 | * creates a multi-layer, feed-forward neural network 151 | * from a data set. 152 | */ 153 | @SuppressWarnings("unchecked") 154 | public MultiLayerNeuralNet(DataSet d) { 155 | this.d = d; 156 | this.N = this.d.numAttrs; 157 | // number of nodes in hidden layer 158 | int numHidden = this.N; 159 | this.numNodes = this.N + numHidden + 1; 160 | this.weights = new double[this.numNodes][this.numNodes]; 161 | randomizeWeights(); 162 | // create and initialize list of edges 163 | this.incomingEdges = (LinkedList[]) new LinkedList[this.numNodes]; 164 | this.outgoingEdges = (LinkedList[]) new LinkedList[this.numNodes]; 165 | for (int i = 0; i < this.numNodes; i++) { 166 | this.incomingEdges[i] = new LinkedList(); 167 | this.outgoingEdges[i] = new LinkedList(); 168 | } 169 | // number of layers to be included 170 | int numLayers = 3; 171 | this.layer = (LinkedList[]) new LinkedList[numLayers]; 172 | for (int i = 0; i < numLayers; i++) 173 | this.layer[i] = new LinkedList(); 174 | 175 | /* Create first layer and links to hidden layer. */ 176 | for (int i = 0; i < this.N; i++) { 177 | this.layer[0].add(i); 178 | // add incoming and outgoing edges 179 | for (int j = this.N; j < this.N + numHidden; j++) { 180 | this.outgoingEdges[i].add(j); 181 | this.incomingEdges[j].add(i); 182 | } 183 | } 184 | 185 | /* Create second layer and links to third layer. */ 186 | for (int i = this.N; i < this.N + numHidden; i++) { 187 | this.layer[1].add(i); 188 | this.incomingEdges[this.numNodes - 1].add(i); 189 | this.outgoingEdges[i].add(this.numNodes - 1); 190 | } 191 | 192 | /* Create list of third layer (output node). */ 193 | this.layer[2].add(this.numNodes - 1); 194 | 195 | // train neural net on each training example 196 | // run until epsilon threshold error is breached 197 | double epsilon = 0.05; 198 | double minError = Double.MAX_VALUE; 199 | double lastError = Double.MAX_VALUE; 200 | double[][] bestWeights = new double[this.weights.length][this.weights.length]; 201 | double[] prevDelta = new double[this.numNodes]; 202 | int maxRuns = 100; 203 | for (int runs = 0; runs < maxRuns; runs++) { 204 | // run back prop 205 | backProp(this.d, prevDelta); 206 | double error = error(this.d); 207 | // if error is sufficiently low, cut-off 208 | if (error < epsilon) { 209 | bestWeights = this.weights; 210 | break; 211 | } 212 | // if error has not improved, reset 213 | else if (error >= lastError) { 214 | randomizeWeights(); 215 | lastError = Double.MAX_VALUE; 216 | } 217 | else { 218 | lastError = error; 219 | // if error is best seen, remember weights 220 | if (error < minError) { 221 | minError = error; 222 | for (int i = 0; i < this.weights.length; i++) 223 | System.arraycopy(this.weights[i], 0, bestWeights[i], 0, this.weights.length); 224 | } 225 | } 226 | } 227 | // assign permanent weights to the best weights observed 228 | this.weights = bestWeights; 229 | } 230 | 231 | /** A method for predicting the label of a given example ex 232 | * represented, as in the rest of the code, as an array of values 233 | * for each of the attributes. The method should return a 234 | * prediction, i.e., 0 or 1. 235 | */ 236 | public int predict(int[] ex) { 237 | double[] a = new double[this.numNodes]; 238 | double[] in = new double[this.numNodes]; 239 | 240 | // First N nodes are input nodes 241 | for (int i = 0; i < this.N; i++) 242 | a[i] = ex[i]; 243 | 244 | // Compute outputs by propagating inputs forward 245 | for (int l = 1; l < this.layer.length; l++) { 246 | for (int dest : this.layer[l]) { 247 | for (int src : this.incomingEdges[dest]) { 248 | in[dest] += this.weights[src][dest]*a[src]; 249 | } 250 | a[dest] = g(in[dest]); 251 | } 252 | } 253 | 254 | // Return based on output of sigmoid function 255 | return predict(a[this.numNodes - 1]); 256 | } 257 | 258 | /** Makes a prediction based on some input value a, which 259 | * should--in practice--be the output value of the final 260 | * perceptron. 261 | */ 262 | private int predict(double a) { 263 | return (int)Math.round(a); 264 | } 265 | 266 | /** This method should return a very brief but understandable 267 | * description of the learning algorithm that is being used, 268 | * appropriate for posting on the class website. 269 | */ 270 | public String algorithmDescription() { 271 | return "A multi layer neural network."; 272 | } 273 | 274 | /** This method should return the "author" of this program as you 275 | * would like it to appear on the class website. You can use your 276 | * real name, or a pseudonym, or a name that identifies your 277 | * group. 278 | */ 279 | public String author() { 280 | return "crm"; 281 | } 282 | 283 | /** A simple main for testing this algorithm. This main reads a 284 | * filestem from the command line, runs the learning algorithm on 285 | * this dataset, and prints the test predictions to filestem.testout. 286 | */ 287 | public static void main(String argv[]) 288 | throws FileNotFoundException, IOException { 289 | if (argv.length < 1) { 290 | System.err.println("argument: filestem"); 291 | return; 292 | } 293 | 294 | String filestem = argv[0]; 295 | 296 | DataSet d = new BinaryDataSet(filestem); 297 | 298 | Classifier c = new MultiLayerNeuralNet(d); 299 | 300 | d.printTestPredictions(c, filestem); 301 | } 302 | } 303 | -------------------------------------------------------------------------------- /src/NumericDataSet.java: -------------------------------------------------------------------------------- 1 | import java.io.*; 2 | 3 | /** 4 | * This is a subclass of DataSet representing a dataset all 5 | * of whose attributes are numeric. In addition to all of the fields 6 | * and methods inherited from DataSet, the class includes a 7 | * constructor that reads in data from data files and converts all 8 | * discrete attributes to numeric. 9 | **/ 10 | public class NumericDataSet extends DataSet { 11 | 12 | /** This constructor reads in data from the files 13 | * filestem.names, filestem.train and 14 | * filestem.test, converts all attributes to numeric * format, and sets up all of the public fields. 15 | **/ 16 | public NumericDataSet(String filestem) 17 | throws FileNotFoundException, IOException { 18 | super(filestem); 19 | 20 | int new_numAttrs = 0; 21 | 22 | for (int a = 0; a < numAttrs; a++) 23 | new_numAttrs += (attrVals[a] == null ? 1 : attrVals[a].length); 24 | 25 | for (int traintest = 0; traintest < 2; traintest++) { 26 | int[][] exs = (traintest == 1 ? trainEx : testEx); 27 | for (int i = 0; i < exs.length; i++) { 28 | int[] new_ex = new int[new_numAttrs]; 29 | for (int a = 0, na = 0; a < numAttrs; a++) { 30 | if (attrVals[a] == null) 31 | new_ex[na++] = exs[i][a]; 32 | else 33 | for(int j = 0; j < attrVals[a].length; j++) 34 | new_ex[na++] = (exs[i][a] == j ? 1 : 0); 35 | } 36 | exs[i] = new_ex; 37 | } 38 | } 39 | 40 | String[] new_attrName = new String[new_numAttrs]; 41 | for (int a = 0, na = 0; a < numAttrs; a++) { 42 | if (attrVals[a] == null) 43 | new_attrName[na++] = attrName[a]; 44 | else 45 | for(int j = 0; j < attrVals[a].length; j++) 46 | new_attrName[na++] = attrName[a] + "=" + attrVals[a][j]; 47 | } 48 | 49 | attrName = new_attrName; 50 | numAttrs = new_numAttrs; 51 | 52 | attrVals = new String[numAttrs][]; 53 | } 54 | 55 | /** This constructor creates an empty numeric dataset. */ 56 | public NumericDataSet() { 57 | super(); 58 | } 59 | 60 | } 61 | -------------------------------------------------------------------------------- /src/SingleLayerNeuralNet.java: -------------------------------------------------------------------------------- 1 | import java.io.FileNotFoundException; 2 | import java.io.IOException; 3 | 4 | public class SingleLayerNeuralNet implements Classifier { 5 | /* algorithm's learning rate. */ 6 | private final double learning_rate = .1; 7 | /* weights[i] is the weight from input perceptron i to 8 | the output perceptron. */ 9 | private final double[] weights; 10 | /* data set on which to make predictions. */ 11 | private final DataSet d; 12 | /* number of attributes in data set. */ 13 | private final int N; 14 | 15 | /** Calculates the error on the training examples of 16 | * data set d. */ 17 | private double error(DataSet d) { 18 | double sum = 0.0; 19 | for (int i = 0; i < d.numTrainExs; i++) 20 | sum += Math.abs(d.trainLabel[i] - predict(d.trainEx[i])); 21 | return sum/d.numTrainExs; 22 | } 23 | 24 | /** Trains the neural network on an example ex by using 25 | * the back propagation technique to adjust the net's weights. 26 | * Example ex is known to be of classification label. 27 | */ 28 | private void back_prop(int[] ex, int label) { 29 | int output = predict(ex); 30 | if (output == label) return; 31 | 32 | for (int i = 0; i < this.N; i++) { 33 | double delta = this.learning_rate*(label-output)*ex[i]; 34 | this.weights[i] += delta; 35 | } 36 | } 37 | 38 | /** Constructor for the SingleLayerNeuralNet class that 39 | * creates a single layer neural network from a data set. 40 | */ 41 | public SingleLayerNeuralNet(DataSet d) { 42 | this.d = d; 43 | this.N = this.d.numAttrs; 44 | // N+1 perceptrons, with perceptron N = output 45 | this.weights = new double[this.N]; 46 | double epsilon = 0.05; 47 | int maxRuns = 100000; 48 | 49 | int runs = 0; 50 | // train neural net on each training example 51 | while (runs < maxRuns && error(this.d) > epsilon) { 52 | for (int i = 0; i < this.d.numTrainExs; i++) 53 | back_prop(this.d.trainEx[i], this.d.trainLabel[i]); 54 | runs++; 55 | } 56 | } 57 | 58 | /** A method for predicting the label of a given example ex 59 | * represented, as in the rest of the code, as an array of values 60 | * for each of the attributes. The method should return a 61 | * prediction, i.e., 0 or 1. Example ex is guaranteed to be represented 62 | * as an array of binary values. 63 | */ 64 | public int predict(int[] ex) { 65 | double sum = 0.0; 66 | for (int i = 0; i < this.N; i++) 67 | sum += weights[i]*ex[i]; 68 | if (sum > 0) return 1; 69 | return 0; 70 | } 71 | 72 | /** This method should return a very brief but understandable 73 | * description of the learning algorithm that is being used, 74 | * appropriate for posting on the class website. 75 | */ 76 | public String algorithmDescription() { 77 | return "A single layer neural network."; 78 | } 79 | 80 | /** This method should return the "author" of this program as you 81 | * would like it to appear on the class website. You can use your 82 | * real name, or a pseudonym, or a name that identifies your 83 | * group. 84 | */ 85 | public String author() { 86 | return "crm"; 87 | } 88 | 89 | /** A simple main for testing this algorithm. This main reads a 90 | * filestem from the command line, runs the learning algorithm on 91 | * this dataset, and prints the test predictions to filestem.testout. 92 | */ 93 | public static void main(String argv[]) 94 | throws FileNotFoundException, IOException { 95 | if (argv.length < 1) { 96 | System.err.println("argument: filestem"); 97 | return; 98 | } 99 | 100 | String filestem = argv[0]; 101 | 102 | DataSet d = new BinaryDataSet(filestem); 103 | 104 | Classifier c = new SingleLayerNeuralNet(d); 105 | 106 | d.printTestPredictions(c, filestem); 107 | } 108 | } 109 | -------------------------------------------------------------------------------- /src/TestForestSize.java: -------------------------------------------------------------------------------- 1 | import java.io.FileNotFoundException; 2 | import java.io.IOException; 3 | 4 | public class TestForestSize { 5 | public static void main(String[] argv) throws FileNotFoundException, IOException { 6 | if (argv.length < 4) { 7 | System.err.println("argument: filestem forestMin forestMax increment numTrials"); 8 | return; 9 | } 10 | 11 | // data set from filestem 12 | DataSet d = new DiscreteDataSet(argv[0]); 13 | // min and max sizes for decision forest 14 | int forestMin = Integer.parseInt(argv[1]); 15 | int forestMax = Integer.parseInt(argv[2]); 16 | int increment = Integer.parseInt(argv[3]); 17 | // number of trials to be run per forest size 18 | int numTrials = Integer.parseInt(argv[4]); 19 | 20 | System.out.println("Data set contains " + d.numTrainExs + " examples."); 21 | System.out.println("[forest size], [trialNum], [training error], [cross-set error]"); 22 | for (int forestSize = forestMin; forestSize <= forestMax; forestSize += increment) { 23 | // data set from filestem 24 | d = new DiscreteDataSet(argv[0]); 25 | double[][] error = TestHarness.computeError(d, numTrials, TestHarness.classifier.DF, forestSize, false, 0, 0); 26 | for (int j = 0; j < numTrials; j++) { 27 | System.out.printf("%d, %d, %f, %f\n", forestSize, j, error[j][0], error[j][1]); 28 | } 29 | } 30 | } 31 | } 32 | -------------------------------------------------------------------------------- /src/TestHarness.java: -------------------------------------------------------------------------------- 1 | /* 2 | * A basic test harness for the different algorithms 3 | */ 4 | import java.util.*; 5 | import java.io.*; 6 | public class TestHarness { 7 | 8 | private static int FOLDNUM = 1; 9 | private static int FOLDDENOM = 8; 10 | private static int numTrees = 100; 11 | 12 | public enum classifier { 13 | DT, DF, SLNN, MLNN, KNN, BASE 14 | }; 15 | 16 | static classifier algo; 17 | 18 | /* Computes trial results for a data set, returning an array 19 | * where a[i][0] is the training error for the ith trial 20 | * and a[i][1] is the error on a cross set for the ith trial. 21 | * Uses an algorithm specified by algo, and allows the user 22 | * to set the forest size or randomization of decision forests 23 | * and decision trees (respectively); these arguments are ignored 24 | * if a different algorithm is used. */ 25 | public static double[][] computeError(DataSet d, int numTrials, classifier algo, 26 | int numTrees, boolean rand, int numIters, int k) { 27 | double[][] results = new double[numTrials][2]; 28 | Random random = new Random(); 29 | int crossSize = FOLDNUM * d.numTrainExs / FOLDDENOM; 30 | int[][] oEx = new int[d.numTrainExs][]; 31 | int[] oLabel = new int[d.numTrainExs]; 32 | for (int i = 0; i < d.numTrainExs; i++) { 33 | oEx[i] = d.trainEx[i]; 34 | oLabel[i] = d.trainLabel[i]; 35 | } 36 | 37 | d.numTrainExs -= crossSize; 38 | d.trainEx = new int[d.numTrainExs][]; 39 | d.trainLabel = new int[d.numTrainExs]; 40 | 41 | for (int trial = 0; trial < numTrials; trial++) { 42 | 43 | /*Shuffle the dataset to get a training/test set for each trial*/ 44 | for (int i = 0; i < oEx.length; i++) { 45 | int swap = random.nextInt(oEx.length - i); 46 | int[] tempEx = oEx[swap]; 47 | oEx[swap] = oEx[oEx.length - i - 1]; 48 | oEx[oEx.length - i - 1] = tempEx; 49 | 50 | /*Same for labels*/ 51 | int tempLabel = oLabel[swap]; 52 | oLabel[swap] = oLabel[oEx.length - i - 1]; 53 | oLabel[oEx.length - i - 1] = tempLabel; 54 | } 55 | 56 | for (int i = 0; i < d.numTrainExs; i++) { 57 | d.trainEx[i] = oEx[i]; 58 | d.trainLabel[i] = oLabel[i]; 59 | } 60 | 61 | Classifier c; 62 | switch (algo) { 63 | case DT: 64 | c = new DecisionTree(d, false); 65 | break; 66 | case DF: 67 | c = new DecisionForest(d, numTrees); 68 | break; 69 | case KNN: 70 | c = new kNN(d, k, numIters); 71 | break; 72 | case SLNN: 73 | c = new SingleLayerNeuralNet(d); 74 | break; 75 | case MLNN: 76 | c = new MultiLayerNeuralNet(d); 77 | break; 78 | default: 79 | c = new BaselineClassifier(d); 80 | } 81 | 82 | int correct = 0; 83 | for (int ex = 0; ex < d.numTrainExs; ex++) { 84 | if (c.predict(d.trainEx[ex]) == d.trainLabel[ex]) 85 | correct++; 86 | } 87 | results[trial][0] = 100.0 - (100.0*correct/d.numTrainExs); 88 | 89 | correct = 0; 90 | for (int ex = oEx.length - crossSize; ex < oEx.length; ex++) { 91 | if (c.predict(oEx[ex]) == oLabel[ex]) 92 | correct++; 93 | } 94 | 95 | results[trial][1] = 100.0 - (100.0*correct / crossSize); 96 | } 97 | 98 | return results; 99 | } 100 | 101 | /* Prints trial and cross error on data set d. */ 102 | public static void runTrials(DataSet d, int numTrials) { 103 | Random random = new Random(); 104 | int crossSize = FOLDNUM * d.numTrainExs / FOLDDENOM; 105 | int[][] oEx = new int[d.numTrainExs][]; 106 | int[] oLabel = new int[d.numTrainExs]; 107 | for (int i = 0; i < d.numTrainExs; i++) { 108 | oEx[i] = d.trainEx[i]; 109 | oLabel[i] = d.trainLabel[i]; 110 | } 111 | 112 | d.numTrainExs -= crossSize; 113 | d.trainEx = new int[d.numTrainExs][]; 114 | d.trainLabel = new int[d.numTrainExs]; 115 | 116 | System.out.println("Training classifier on " + d.numTrainExs 117 | + " examples with " + numTrials + " trials. Testing on " 118 | + crossSize + " examples"); 119 | int totalCorrect = 0; 120 | for (int trial = 0; trial < numTrials; trial++) { 121 | 122 | /*Shuffle the dataset to get a training/test set for each trial*/ 123 | for (int i = 0; i < oEx.length; i++) { 124 | int swap = random.nextInt(oEx.length - i); 125 | int[] tempEx = oEx[swap]; 126 | oEx[swap] = oEx[oEx.length - i - 1]; 127 | oEx[oEx.length - i - 1] = tempEx; 128 | 129 | /*Same for labels*/ 130 | int tempLabel = oLabel[swap]; 131 | oLabel[swap] = oLabel[oEx.length - i - 1]; 132 | oLabel[oEx.length - i - 1] = tempLabel; 133 | } 134 | 135 | for (int i = 0; i < d.numTrainExs; i++) { 136 | d.trainEx[i] = oEx[i]; 137 | d.trainLabel[i] = oLabel[i]; 138 | } 139 | 140 | Classifier c; 141 | switch (algo) { 142 | case DT: 143 | c = new DecisionTree(d, false); 144 | break; 145 | case DF: 146 | c = new DecisionForest(d, numTrees); 147 | break; 148 | case KNN: 149 | c = new kNN(d); 150 | break; 151 | case SLNN: 152 | c = new SingleLayerNeuralNet(d); 153 | break; 154 | case MLNN: 155 | c = new MultiLayerNeuralNet(d); 156 | break; 157 | default: 158 | c = new BaselineClassifier(d); 159 | } 160 | 161 | System.out.println("Trial " + (trial + 1) + ": "); 162 | int correct = 0; 163 | for (int ex = 0; ex < d.numTrainExs; ex++) { 164 | if (c.predict(d.trainEx[ex]) == d.trainLabel[ex]) 165 | correct++; 166 | } 167 | System.out.println("\tPerformance on train set: " 168 | + (100.0*correct/d.numTrainExs) + "%"); 169 | 170 | correct = 0; 171 | for (int ex = oEx.length - crossSize; ex < oEx.length; ex++) { 172 | if (c.predict(oEx[ex]) == oLabel[ex]) 173 | correct++; 174 | } 175 | 176 | totalCorrect += correct; 177 | System.out.println("\tPerformance on cross set: " 178 | + (100.0*correct / crossSize) + "%"); 179 | } 180 | 181 | System.out.println("Average percent correct: " 182 | + (100.0*totalCorrect / (crossSize * numTrials)) + "%"); 183 | return; 184 | } 185 | 186 | /* 187 | * Simple main for testing. 188 | */ 189 | public static void main(String argv[]) 190 | throws FileNotFoundException, IOException { 191 | 192 | if (argv.length < 3) { 193 | System.err.println("argument: filestem classifier #runs classifierArgs"); 194 | System.err.println("Classifier options: dt, df, knn, slnn, mlnn"); 195 | return; 196 | } 197 | 198 | DataSet d; 199 | if (argv[1].equals("dt")) { 200 | System.out.print("Using decision tree"); 201 | algo = classifier.DT; 202 | d = new DiscreteDataSet(argv[0]); 203 | } else if (argv[1].equals("df")) { 204 | System.out.print("Using decision forest"); 205 | if (argv.length == 4) { numTrees = Integer.parseInt(argv[3]); } 206 | algo = classifier.DF; 207 | //d = new DiscreteDataSet(argv[0]); 208 | d = new DiscreteDataSet(argv[0]); 209 | } else if (argv[1].equals("knn")) { 210 | System.out.print("Using k-nearest-neighbor"); 211 | algo = classifier.KNN; 212 | d = new BinaryDataSet(argv[0]); 213 | } else if (argv[1].equals("slnn")) { 214 | System.out.print("Using single layer neural net"); 215 | algo = classifier.SLNN; 216 | d = new BinaryDataSet(argv[0]); 217 | } else if (argv[1].equals("mlnn")) { 218 | System.out.print("Using multilayer neural net"); 219 | algo = classifier.MLNN; 220 | d = new BinaryDataSet(argv[0]); 221 | } else { 222 | System.out.print("Using baseline classifier"); 223 | algo = classifier.BASE; 224 | d = new DataSet(argv[0]); 225 | } 226 | System.out.println(" on " + argv[0]); 227 | 228 | runTrials(d, Integer.parseInt(argv[2])); 229 | } 230 | } 231 | -------------------------------------------------------------------------------- /src/TestkNN.java: -------------------------------------------------------------------------------- 1 | import java.io.FileNotFoundException; 2 | import java.io.IOException; 3 | 4 | public class TestkNN { 5 | public static void main(String[] argv) throws FileNotFoundException, IOException { 6 | if (argv.length < 5) { 7 | System.err.println("argument: filestem k iterMin iterMax increment numTrials"); 8 | return; 9 | } 10 | 11 | // data set from filestem 12 | DataSet d = new DiscreteDataSet(argv[0]); 13 | // number of data points to take for kNN 14 | int k = Integer.parseInt(argv[1]); 15 | // min and max number of iterations for weight training 16 | int iterMin = Integer.parseInt(argv[2]); 17 | int iterMax = Integer.parseInt(argv[3]); 18 | int increment = Integer.parseInt(argv[4]); 19 | // number of trials to be run per forest size 20 | int numTrials = Integer.parseInt(argv[5]); 21 | 22 | System.out.println("Data set contains " + d.numTrainExs + " examples."); 23 | System.out.println("Using " + k + " nearest points."); 24 | System.out.println("[numIters], [trialNum], [training error], [cross-set error]"); 25 | for (int numIters = iterMin; numIters <= iterMax; numIters += increment) { 26 | // data set from filestem 27 | d = new DiscreteDataSet(argv[0]); 28 | double[][] error = TestHarness.computeError(d, numTrials, TestHarness.classifier.KNN, 0, false, k, numIters); 29 | for (int j = 0; j < numTrials; j++) { 30 | System.out.printf("%d, %d, %f, %f\n", numIters, j, error[j][0], error[j][1]); 31 | } 32 | } 33 | } 34 | } 35 | -------------------------------------------------------------------------------- /src/forestGraph.py: -------------------------------------------------------------------------------- 1 | import csv 2 | import sys 3 | import matplotlib.pyplot as plt 4 | 5 | res = csv.reader(open(sys.argv[1]), delimiter=",") 6 | 7 | cTrees = -1 8 | current = [] #[numTrees, [train error], [test error] 9 | total = [[],[],[]] 10 | 11 | def appendPoint(): 12 | total[0].append(cTrees) 13 | total[1].append(100-sum(current[1])/len(current[1])) 14 | total[2].append(100-sum(current[2])/len(current[2])) 15 | 16 | for row in res: 17 | if (not row[0][0].isdigit()): 18 | continue 19 | #print(row) 20 | if (int(row[0]) != cTrees): 21 | if (cTrees > 0): 22 | appendPoint() 23 | current = [int(row[0]), [], []] 24 | cTrees = current[0] 25 | current[1].append(float(row[2])) #Test error 26 | current[2].append(float(row[3])) #Training error 27 | 28 | #for i in range(len(total[0])): 29 | #print(total[0][i], end='\t') 30 | #print(total[1][i], end='\t') 31 | #print(total[2][i]) 32 | 33 | 34 | if (cTrees > 0): 35 | appendPoint() 36 | plt.ylabel("Error") 37 | plt.xlabel("Number trees") 38 | ax = plt.subplot(1,1,1) 39 | p1 = ax.plot(total[0], total[1],color='black', label="Test") 40 | p2 = ax.plot(total[0], total[2],color='black', ls=':', label="Train") 41 | handles,labels = ax.get_legend_handles_labels() 42 | plt.legend(handles,labels,loc=4) 43 | #plt.ylim(0,100) 44 | 45 | plt.show() 46 | #plt.savefig(base+"plot") 47 | -------------------------------------------------------------------------------- /src/kNN.java: -------------------------------------------------------------------------------- 1 | import java.io.FileNotFoundException; 2 | import java.io.IOException; 3 | import java.util.Arrays; 4 | import java.util.Comparator; 5 | import java.util.PriorityQueue; 6 | import java.util.Random; 7 | 8 | public class kNN implements Classifier { 9 | 10 | // data set of training examples 11 | private DataSet d; 12 | // minimum possible value of k 13 | private int kMin = 1; 14 | // maximum possible value of k 15 | private int kMax = 15; 16 | // cross-validated, optimized value of k 17 | private int kOpt = 7; 18 | // elimAttr[i] is true if attribute i has been eliminated 19 | private boolean[] elimAttr; 20 | // learning rate for weight training 21 | private double learningRate = 0.05; 22 | // instanceWeights for training examples 23 | private double[] instanceWeights; 24 | 25 | 26 | /** Constructor for the kNN machine learning algorithm. 27 | * Takes as argument a data set. From then on, examples 28 | * in the data set can be fed to predict() in return for 29 | * classifications. 30 | */ 31 | public kNN(DataSet d) { 32 | /* Setup array labelledData so that it contains all the training 33 | data attributes along with that example's label. */ 34 | this.d = d; 35 | this.elimAttr = new boolean[this.d.numAttrs]; 36 | this.instanceWeights = new double[this.d.numTrainExs]; 37 | for (int i = 0; i < this.instanceWeights.length; i++) 38 | this.instanceWeights[i] = 1.0; 39 | 40 | this.kOpt = optimizeK(this.kMin, this.kMax); 41 | backwardsElimination(); 42 | traininstanceWeights(100000); 43 | } 44 | 45 | /** Constructor for the kNN machine learning algorithm. 46 | * Mainly used for testing the weight training heuristic. 47 | */ 48 | public kNN(DataSet d, int kOpt, int T) { 49 | /* Setup array labelledData so that it contains all the training 50 | data attributes along with that example's label. */ 51 | this.d = d; 52 | this.elimAttr = new boolean[this.d.numAttrs]; 53 | this.instanceWeights = new double[this.d.numTrainExs]; 54 | for (int i = 0; i < this.instanceWeights.length; i++) 55 | this.instanceWeights[i] = 1.0; 56 | 57 | this.kOpt = kOpt; 58 | traininstanceWeights(T); 59 | } 60 | 61 | /** Constructor for the kNN machine learning algorithm. 62 | * Takes as argument a data set and two indices, ignoring 63 | * training examples between those indices 64 | * to allow for cross validation. Additionally, takes 65 | * an array of eliminated attributes, instanceWeights for each training 66 | * example, and an optimal k value. From then on, examples 67 | * in the data set can be fed to predict() in return for 68 | * classifications. This constructor is used when a kNN 69 | * instance is required for which no optimizations should 70 | * be performed. 71 | */ 72 | public kNN(DataSet d, int from, int to, int kOpt, 73 | boolean[] elimAttr, double[] instanceWeights) { 74 | /* Setup array labelledData so that it contains all the training 75 | data attributes along with that example's label. */ 76 | // create data set, excluding firstEx to lastEx examples 77 | DataSet subset = new DataSet(); 78 | subset.numAttrs = d.numAttrs; 79 | subset.numTrainExs = d.numTrainExs - (to - from); 80 | subset.trainEx = new int[subset.numTrainExs][subset.numAttrs]; 81 | subset.trainLabel = new int[subset.numTrainExs]; 82 | for (int i = 0; i < from; i++) { 83 | subset.trainEx[i] = d.trainEx[i]; 84 | subset.trainLabel[i] = d.trainLabel[i]; 85 | } 86 | for (int i = to; i < d.numTrainExs; i++) { 87 | subset.trainEx[i - to + from] = d.trainEx[i]; 88 | subset.trainLabel[i - to + from] = d.trainLabel[i]; 89 | } 90 | 91 | this.d = subset; 92 | this.kOpt = kOpt; 93 | this.elimAttr = elimAttr; 94 | this.instanceWeights = instanceWeights; 95 | } 96 | 97 | /** Computes the squared distance between two integer 98 | * vectors a and b. 99 | */ 100 | private double dist(int[] a, int[] b) { 101 | int len = Math.min(a.length, b.length); 102 | int sum = 0; 103 | for (int i = 0; i < len; i++) { 104 | // skip if attribute is eliminated 105 | if (this.elimAttr[i]) continue; 106 | sum += Math.abs(a[i] - b[i]); 107 | } 108 | return sum; 109 | } 110 | 111 | /** Calculates the error over a labeled data set, returning 112 | * a double that represents the percent error. 113 | */ 114 | private double error() { 115 | double error = 0.0; 116 | 117 | // use 8 different sets for cross validation 118 | int numSets = 8; 119 | for (int setNum = 0; setNum < numSets; setNum++) { 120 | int from = setNum*this.d.numTrainExs/numSets; 121 | int to = (setNum+1)*this.d.numTrainExs/numSets; 122 | 123 | // create new kNN using subset of data set 124 | kNN knn = new kNN(this.d, from, to, this.kOpt, 125 | this.elimAttr, this.instanceWeights); 126 | 127 | for (int t = from; t < to; t++) { 128 | if (knn.predict(this.d.trainEx[t]) != this.d.trainLabel[t]) 129 | error++; 130 | } 131 | } 132 | return error; 133 | } 134 | 135 | /** Calculates the error over a labeled data set using pre-computed 136 | * set of indices a in which a[i][j] represents index in training 137 | * set d of the jth closest example to i. Returns a double that 138 | * represents the percent error. 139 | */ 140 | private double error(int[][] a) { 141 | double error = 0.0; 142 | for (int i = 0; i < this.d.numTrainExs; i++) { 143 | if (voteCount(a[i]) != this.d.trainLabel[i]) 144 | error++; 145 | } 146 | return error; 147 | } 148 | 149 | /** Trains the instanceWeights of the attributes using backwards 150 | * propagation on data set d, running T iterations. 151 | */ 152 | private void traininstanceWeights(int T) { 153 | // get k nearest indices for each training example 154 | // as determined by cross validation 155 | int[][] kBest = new int[this.d.numTrainExs][this.kOpt]; 156 | 157 | // use 8 different sets for cross validation 158 | int numSets = 8; 159 | for (int setNum = 0; setNum < numSets; setNum++) { 160 | int from = setNum*this.d.numTrainExs/numSets; 161 | int to = (setNum+1)*this.d.numTrainExs/numSets; 162 | 163 | // create new kNN using subset of data set 164 | kNN knn = new kNN(this.d, from, to, this.kOpt, 165 | this.elimAttr, this.instanceWeights); 166 | 167 | for (int t = from; t < to; t++) 168 | kBest[t] = 169 | knn.kNearest(this.kOpt, this.d.trainEx[t]); 170 | } 171 | 172 | // run T iterations of weight training 173 | for (int t = 0; t < T; t++) { 174 | 175 | // alter instanceWeights on each example 176 | for (int i = 0; i < this.d.numTrainExs; i++) { 177 | 178 | // modify instanceWeights to satisfy example 179 | while (this.d.trainLabel[i] != voteCount(kBest[i])) { 180 | for (int k = 0; k < this.kOpt; k++) { 181 | int neighborIndex = kBest[i][k]; 182 | if (this.d.trainLabel[neighborIndex] != this.d.trainLabel[i]) 183 | this.instanceWeights[neighborIndex] -= this.learningRate; 184 | else 185 | this.instanceWeights[neighborIndex] += this.learningRate; 186 | } 187 | } 188 | } 189 | } 190 | //System.out.println("instanceWeights trained."); 191 | } 192 | 193 | /** Uses backwards elimination to remove attributes from consideration 194 | * that decrease the classifier's performance. To avoid recomputing 195 | * distances, employs a linear-time distance update that just alters 196 | * pre-computed distances based on attribute in question. 197 | */ 198 | private void backwardsElimination() { 199 | // calculate all distances to avoid recomputation 200 | double[][] dists = new double[this.d.numTrainExs][this.d.numTrainExs]; 201 | 202 | // use 8 different sets for cross validation (set dist to infinity) 203 | int numSets = 8; 204 | for (int setNum = 0; setNum < numSets; setNum++) { 205 | int from = setNum*this.d.numTrainExs/numSets; 206 | int to = (setNum+1)*this.d.numTrainExs/numSets; 207 | 208 | for (int t = from; t < to; t++) { 209 | for (int s = t+1; s < to; s++) { 210 | dists[t][s] = Double.POSITIVE_INFINITY; 211 | dists[s][t] = dists[t][s]; 212 | } 213 | } 214 | } 215 | 216 | for (int i = 0; i < dists.length; i++) { 217 | for (int j = i+1; j < dists.length; j++) { 218 | if (dists[i][j] == Double.POSITIVE_INFINITY) continue; 219 | 220 | dists[i][j] = dist(this.d.trainEx[i], this.d.trainEx[j]); 221 | dists[j][i] = dists[i][j]; 222 | } 223 | } 224 | 225 | // orderedIndices[i][j] is index of jth closest example to i 226 | int[][] orderedIndices = new int[this.d.numTrainExs][this.d.numTrainExs]; 227 | for (int i = 0; i < orderedIndices.length; i++) { 228 | // annoying Integer to int casting issues 229 | Integer[] a = new Integer[this.d.numTrainExs]; 230 | for (int j = 0; j < orderedIndices.length; j++) { 231 | a[j] = j; 232 | } 233 | exComparator comp = new exComparator(dists[i], i); 234 | comp.descending = true; 235 | Arrays.sort(a, comp); 236 | for (int j = 0; j < orderedIndices.length; j++) { 237 | orderedIndices[i][j] = a[j]; 238 | } 239 | } 240 | 241 | // calculate base error with no attribute elimination 242 | double baselineError = error(orderedIndices); 243 | 244 | int sum = 0; 245 | // iterate over each attribute 246 | for (int m = 0; m < this.d.numAttrs; m++) { 247 | double[][] newDists = new double[this.d.numTrainExs][this.d.numTrainExs]; 248 | 249 | // linear-time distance update 250 | for (int i = 0; i < dists.length; i++) { 251 | for (int j = i+1; j < dists.length; j++) { 252 | newDists[i][j] = dists[i][j] - 253 | Math.abs(this.d.trainEx[i][m] - this.d.trainEx[j][m]); 254 | newDists[j][i] = newDists[i][j]; 255 | } 256 | } 257 | 258 | // compute new k nearest 259 | for (int i = 0; i < this.d.numTrainExs; i++) { 260 | // annoying Integer to int casting issues 261 | Integer[] a = new Integer[this.d.numTrainExs]; 262 | for (int j = 0; j < orderedIndices.length; j++) { 263 | a[j] = orderedIndices[i][j]; 264 | } 265 | exComparator comp = new exComparator(newDists[i], i); 266 | comp.descending = true; 267 | Arrays.sort(a, comp); 268 | for (int j = 0; j < orderedIndices.length; j++) { 269 | orderedIndices[i][j] = a[j]; 270 | } 271 | } 272 | 273 | double adjustedError = error(orderedIndices); 274 | 275 | // if error improved, keep attribute eliminated; else, retain 276 | if (adjustedError < baselineError) { 277 | this.elimAttr[m] = true; 278 | baselineError = adjustedError; 279 | dists = newDists; 280 | sum++; 281 | } 282 | } 283 | //System.out.printf("%d attributes removed.\n", sum); 284 | } 285 | 286 | /** Uses forward selection to add attributes to a distance function, 287 | * greedily adding the attribute that minimizes error. 288 | */ 289 | private void forwardsSelection() { 290 | // remove all attributes 291 | for (int i = 0; i < this.elimAttr.length; i++) 292 | this.elimAttr[i] = true; 293 | 294 | // calculate all distances to avoid recomputation 295 | double[][] dists = new double[this.d.numTrainExs][this.d.numTrainExs]; 296 | 297 | // use 8 different sets for cross validation (set dist to infinity) 298 | int numSets = 8; 299 | for (int setNum = 0; setNum < numSets; setNum++) { 300 | int from = setNum*this.d.numTrainExs/numSets; 301 | int to = (setNum+1)*this.d.numTrainExs/numSets; 302 | 303 | for (int t = from; t < to; t++) { 304 | for (int s = t+1; s < to; s++) { 305 | dists[t][s] = Double.POSITIVE_INFINITY; 306 | dists[s][t] = dists[t][s]; 307 | } 308 | } 309 | } 310 | 311 | // orderedIndices[i][j] is index of jth closest example to i 312 | int[][] orderedIndices = new int[this.d.numTrainExs][this.d.numTrainExs]; 313 | for (int i = 0; i < orderedIndices.length; i++) { 314 | // annoying Integer to int casting issues 315 | Integer[] a = new Integer[this.d.numTrainExs]; 316 | for (int j = 0; j < orderedIndices.length; j++) { 317 | a[j] = j; 318 | } 319 | exComparator comp = new exComparator(dists[i], i); 320 | comp.descending = true; 321 | Arrays.sort(a, comp); 322 | for (int j = 0; j < orderedIndices.length; j++) { 323 | orderedIndices[i][j] = a[j]; 324 | } 325 | } 326 | 327 | // calculate base error with no attribute elimination 328 | double baselineError = error(orderedIndices); 329 | boolean attributeAdded; 330 | 331 | do { 332 | attributeAdded = false; 333 | double minError = Double.POSITIVE_INFINITY; 334 | int minErrorIndex = -1; 335 | double[][] minErrorDists = new double[this.d.numTrainExs][this.d.numTrainExs]; 336 | 337 | // iterate over each attribute 338 | for (int m = 0; m < this.d.numAttrs; m++) { 339 | if (!this.elimAttr[m]) continue; 340 | 341 | double[][] newDists = new double[this.d.numTrainExs][this.d.numTrainExs]; 342 | 343 | // linear-time distance update 344 | for (int i = 0; i < dists.length; i++) { 345 | for (int j = i+1; j < dists.length; j++) { 346 | newDists[i][j] = dists[i][j] + 347 | Math.abs(this.d.trainEx[i][m] - this.d.trainEx[j][m]); 348 | newDists[j][i] = newDists[i][j]; 349 | } 350 | } 351 | 352 | // compute new k nearest 353 | for (int i = 0; i < this.d.numTrainExs; i++) { 354 | // annoying Integer to int casting issues 355 | Integer[] a = new Integer[this.d.numTrainExs]; 356 | for (int j = 0; j < orderedIndices.length; j++) { 357 | a[j] = orderedIndices[i][j]; 358 | } 359 | exComparator comp = new exComparator(newDists[i], i); 360 | comp.descending = true; 361 | Arrays.sort(a, comp); 362 | for (int j = 0; j < orderedIndices.length; j++) { 363 | orderedIndices[i][j] = a[j]; 364 | } 365 | } 366 | 367 | double adjustedError = error(orderedIndices); 368 | 369 | // if error improved, keep attribute eliminated; else, retain 370 | if (adjustedError < minError) { 371 | minError = adjustedError; 372 | minErrorIndex = m; 373 | minErrorDists = newDists; 374 | } 375 | } 376 | 377 | if (minError < baselineError) { 378 | this.elimAttr[minErrorIndex] = false; 379 | dists = minErrorDists; 380 | attributeAdded = true; 381 | //System.out.println("Added attribute " + minErrorIndex); 382 | } 383 | } while (attributeAdded); 384 | } 385 | 386 | /** Uses a cross-validation technique to find the optimal value of 387 | * k for the kNN algorithm on data set d. Returns an integer k 388 | * between k_min and k_max (inclusive). 389 | */ 390 | private int optimizeK(int kMin, int kMax) { 391 | assert(kMax >= kMin); 392 | 393 | // use 8 different sets for cross validation 394 | int numSets = 8; 395 | int[] kErrors = new int[kMax - kMin + 1]; 396 | 397 | for (int setNum = 0; setNum < numSets; setNum++) { 398 | int from = setNum*this.d.numTrainExs/numSets; 399 | int to = (setNum+1)*this.d.numTrainExs/numSets; 400 | 401 | // create new kNN using subset of data set 402 | kNN knn = new kNN(this.d, from, to, this.kOpt, 403 | this.elimAttr, this.instanceWeights); 404 | 405 | // test on held-out training examples 406 | for (int t = from; t < to; t++) { 407 | 408 | // get k_max best examples 409 | int[] best = knn.kNearest(kMax, this.d.trainEx[t]); 410 | 411 | // count votes by value of k 412 | double vote_0 = 0; 413 | double vote_1 = 0; 414 | for (int k = 0; k < kMax; k++) { 415 | int i = best[k]; 416 | 417 | // track errors for appropriate k 418 | if (k >= kMin) { 419 | int result = (vote_1 > vote_0)? 1 : 0; 420 | if (result != this.d.trainLabel[t]) 421 | kErrors[k - kMin]++; 422 | } 423 | 424 | // continue to increment vote counts 425 | if (this.d.trainLabel[i] == 1) 426 | vote_1 += this.instanceWeights[i]; 427 | else 428 | vote_0 += this.instanceWeights[i]; 429 | } 430 | int result = (vote_1 > vote_0)? 1 : 0; 431 | if (result != this.d.trainLabel[t]) 432 | kErrors[kMax - kMin]++; 433 | } 434 | } 435 | // set k to that of minimized error 436 | double min = Double.MAX_VALUE; 437 | int minK = 0; 438 | for (int k = kMin; k <= kMax; k++) { 439 | if (kErrors[k - kMin] < min) { 440 | min = kErrors[k - kMin]; 441 | minK = k; 442 | } 443 | } 444 | //System.out.printf("Optimal k chosen at k = %d\n", minK); 445 | return minK; 446 | } 447 | 448 | /** A class used to modularize comparisons for training 449 | * examples based on a specific reference example ex. 450 | * ex_index is used to avoid using the same training 451 | * example as the reference example. 452 | */ 453 | private class exComparator implements Comparator { 454 | private double[] dists; 455 | private int[] ex; 456 | private int exIndex = -1; 457 | public boolean descending = false; 458 | 459 | private exComparator(int[] ex) { 460 | this.ex = ex; 461 | } 462 | 463 | /* Constructor which assumes base ex may be used in comparison */ 464 | private exComparator(int[] ex, int exIndex) { 465 | this.ex = ex; 466 | this.exIndex = exIndex; 467 | } 468 | 469 | /* Constructor which allows for precomputed distances */ 470 | private exComparator(double[] dists, int exIndex) { 471 | this.dists = dists; 472 | this.exIndex = exIndex; 473 | } 474 | 475 | public int compare(Object o1, Object o2) { 476 | int i = (int)(Integer)o1; 477 | int j = (int)(Integer)o2; 478 | 479 | // ignore if training example is in data set 480 | if (i == this.exIndex) return 1; 481 | if (j == this.exIndex) return -1; 482 | 483 | // take column of min distance 484 | double d1; 485 | double d2; 486 | if (this.dists == null) { 487 | d1 = dist(d.trainEx[i], this.ex); 488 | d2 = dist(d.trainEx[j], this.ex); 489 | } 490 | else { 491 | d1 = this.dists[i]; 492 | d2 = this.dists[j]; 493 | } 494 | int result = 0; 495 | if (d1 > d2) result = -1; 496 | if (d2 > d1) result = 1; 497 | if (this.descending) result *= -1; 498 | return result; 499 | } 500 | } 501 | 502 | /** Calculates the indices of the k nearest training 503 | * examples in data set d to example ex. Returns an 504 | * array a in which a[i] is the index of the ith 505 | * closest training example. 506 | */ 507 | private int[] kNearest(int k, int[] ex) { 508 | // indices of k best examples 509 | int[] indices = new int[k]; 510 | 511 | // record distances to avoid recalculation 512 | double[] dists = new double[this.d.numTrainExs]; 513 | 514 | // store indices in priority queue, sorted by distance to ex 515 | PriorityQueue pq = new PriorityQueue(k, new exComparator(ex)); 516 | 517 | // search every example 518 | for (int i = 0; i < this.d.numTrainExs; i++) { 519 | dists[i] = dist(this.d.trainEx[i], ex); 520 | if (pq.size() >= k) { 521 | if (dists[i] < dists[pq.peek()]) { 522 | pq.remove(); 523 | pq.add(i); 524 | } 525 | } 526 | else { 527 | pq.add(i); 528 | } 529 | } 530 | 531 | // pq returns worst index first; store backwards in indices 532 | for (int i = indices.length - 1; i >= 0; i--) 533 | indices[i] = pq.remove(); 534 | return indices; 535 | } 536 | 537 | /** Counts up the votes for the training examples with labels 538 | * at indices listed in array a. Returns 0 or 1. 539 | */ 540 | private int voteCount(int[] a) { 541 | double vote_1 = 0; 542 | double vote_0 = 0; 543 | int len = Math.min(a.length, this.kOpt); 544 | for (int k = 0; k < len; k++) { 545 | int i = a[k]; 546 | if (this.d.trainLabel[i] == 1) 547 | vote_1 += this.instanceWeights[i]; 548 | else 549 | vote_0 += this.instanceWeights[i]; 550 | } 551 | return (vote_1 > vote_0)? 1 : 0; 552 | } 553 | 554 | /** A method for predicting the label of a given example ex 555 | * represented, as in the rest of the code, as an array of values 556 | * for each of the attributes. The method should return a 557 | * prediction, i.e., 0 or 1. 558 | */ 559 | public int predict(int[] ex) { 560 | int[] indices = kNearest(this.kOpt, ex); 561 | return voteCount(indices); 562 | } 563 | 564 | /** This method should return a very brief but understandable 565 | * description of the learning algorithm that is being used, 566 | * appropriate for posting on the class website. 567 | */ 568 | public String algorithmDescription() { 569 | return "A kNN implementation with cross-validation, backwards elimination, and weight training."; 570 | } 571 | 572 | /** This method should return the "author" of this program as you 573 | * would like it to appear on the class website. You can use your 574 | * real name, or a pseudonym, or a name that identifies your 575 | * group. 576 | */ 577 | public String author() { 578 | return "crm & dmrd"; 579 | } 580 | 581 | /** A simple main for testing this algorithm. This main reads a 582 | * filestem from the command line, runs the learning algorithm on 583 | * this dataset, and prints the test predictions to filestem.testout. 584 | */ 585 | public static void main(String argv[]) 586 | throws FileNotFoundException, IOException { 587 | if (argv.length < 1) { 588 | System.err.println("argument: filestem"); 589 | return; 590 | } 591 | 592 | String filestem = argv[0]; 593 | 594 | DataSet d = new BinaryDataSet(filestem); 595 | 596 | Classifier c = new kNN(d); 597 | 598 | d.printTestPredictions(c, filestem); 599 | } 600 | } 601 | --------------------------------------------------------------------------------