├── .gitignore ├── AdaBoost ├── AdaBoost.py ├── horseColicTest2.txt └── horseColicTraining2.txt ├── Bayes ├── NativeBayes.py └── email │ ├── ham │ ├── 1.txt │ ├── 10.txt │ ├── 11.txt │ ├── 12.txt │ ├── 13.txt │ ├── 14.txt │ ├── 15.txt │ ├── 16.txt │ ├── 17.txt │ ├── 18.txt │ ├── 19.txt │ ├── 2.txt │ ├── 20.txt │ ├── 21.txt │ ├── 22.txt │ ├── 23.txt │ ├── 24.txt │ ├── 25.txt │ ├── 3.txt │ ├── 4.txt │ ├── 5.txt │ ├── 6.txt │ ├── 7.txt │ ├── 8.txt │ └── 9.txt │ └── spam │ ├── 1.txt │ ├── 10.txt │ ├── 11.txt │ ├── 12.txt │ ├── 13.txt │ ├── 14.txt │ ├── 15.txt │ ├── 16.txt │ ├── 17.txt │ ├── 18.txt │ ├── 19.txt │ ├── 2.txt │ ├── 20.txt │ ├── 21.txt │ ├── 22.txt │ ├── 23.txt │ ├── 24.txt │ ├── 25.txt │ ├── 3.txt │ ├── 4.txt │ ├── 5.txt │ ├── 6.txt │ ├── 7.txt │ ├── 8.txt │ └── 9.txt ├── CF ├── 0_5.txt ├── CFItemBased.py └── __init__.py ├── DecisionTree ├── C45.py ├── ID3.py ├── __init__.py ├── randomforest.py ├── trainset.txt └── treePlotter.py ├── Kmeans ├── 1.jpg ├── KmeansImg.py └── test.py ├── LogicRegression ├── EXTRAS │ ├── README.txt │ ├── plot2D.py │ ├── plotGD.py │ ├── plotSDerror.py │ └── sigmoidPlot.py ├── horseColicTest.txt ├── horseColicTraining.txt ├── logicRegression.py └── testSet.txt ├── MaxEntropy ├── MaxEntropy.py └── train.txt ├── MaxEntropyWeb ├── MaxEnt.py ├── MaxEnt.pyc ├── MaxEntMain.py ├── __init__.py ├── __init__.pyc └── train.txt ├── MutualInformation └── MI.py ├── NoneParamDP └── CRP.py ├── README.md ├── Regression ├── abalone.txt ├── ex0.txt ├── ex1.txt └── regression.py ├── TreesRegression ├── bikeSpeedVsIq_test.txt ├── bikeSpeedVsIq_train.txt ├── cart.py ├── ex0.txt ├── ex00.txt ├── ex2.txt ├── ex2test.txt ├── exp.txt ├── exp2.txt ├── expTest.txt └── sine.txt ├── greenhat.py ├── htmlProcess.py ├── realwork.txt ├── taskAssignment.py └── test.py /.gitignore: -------------------------------------------------------------------------------- 1 | .idea/ 2 | .ropeproject 3 | *.swp 4 | *.pyc 5 | -------------------------------------------------------------------------------- /AdaBoost/AdaBoost.py: -------------------------------------------------------------------------------- 1 | # encoding:utf-8 2 | 3 | 4 | import numpy as np 5 | 6 | 7 | def loadSimpData(): 8 | dataMat = np.mat([[1., 2.1], 9 | [2., 1.1], 10 | [1.3, 1.], 11 | [1., 1.], 12 | [2., 1.]]) 13 | labelMat = np.mat([1.0, 1.0, -1.0, -1.0, 1.0]).T 14 | return dataMat, labelMat 15 | 16 | 17 | def loadDataFromFile(filename): 18 | fr = open(filename) 19 | dataArr = [] 20 | labelArr = [] 21 | for line in fr.readlines(): 22 | rowArr = [] 23 | lineArr = line.strip().split('\t') 24 | for i in range(len(lineArr) - 1): 25 | rowArr.append(float(lineArr[i])) 26 | dataArr.append(rowArr) 27 | labelArr.append(float(lineArr[-1])) 28 | return np.mat(dataArr), np.mat(labelArr).T 29 | 30 | 31 | def stumpClassify(dataMat, dim, threshVal, threshIneq): 32 | m = np.shape(dataMat)[0] 33 | retArr = np.ones((m, 1)) # predicted class vector 34 | if threshIneq == 'lt': 35 | retArr[dataMat[:, dim] <= threshVal] = -1.0 36 | else: 37 | retArr[dataMat[:, dim] > threshVal] = -1.0 38 | return retArr 39 | 40 | 41 | def buildStump(dataMat, labelMat, D): 42 | m, n = np.shape(dataMat) 43 | numSteps = 10.0 44 | bestStump = {} 45 | bestClassEst = np.mat(np.zeros((m, 1))) 46 | minError = np.inf 47 | for i in range(n): 48 | rangeMin = dataMat[:, i].min() 49 | rangeMax = dataMat[:, i].max() 50 | stepSize = (rangeMax - rangeMin) / numSteps 51 | # loop from min to max in current dimension 52 | for j in range(-1, int(numSteps) + 1): 53 | for threshIneq in ['lt', 'gt']: 54 | threshVal = rangeMin + float(j) * stepSize 55 | predictedVals = stumpClassify(dataMat, i, 56 | threshVal, threshIneq) 57 | errArr = np.mat(np.ones((m, 1))) 58 | errArr[predictedVals == labelMat] = 0 59 | # calculate the total error by matrix multiplication 60 | weightedErr = D.T * errArr 61 | if weightedErr < minError: 62 | minError = weightedErr 63 | bestClassEst = predictedVals.copy() 64 | bestStump['dim'] = i 65 | bestStump['threshVal'] = threshVal 66 | bestStump['threshIneq'] = threshIneq 67 | return bestStump, minError, bestClassEst 68 | 69 | 70 | def adaBoostTrainDS(dataMat, labelMat, numIter=40): 71 | """ 72 | " param dataMat: 73 | " param labelMat: column vector 74 | """ 75 | weakClassArr = [] 76 | m = np.shape(dataMat)[0] 77 | D = np.mat(np.ones((m, 1)) / m) 78 | aggClassEst = np.mat(np.zeros((m, 1))) 79 | for i in range(numIter): 80 | bestStump, error, classEst = buildStump(dataMat, labelMat, D) 81 | alpha = float(0.5 * np.log((1.0 - error) / max(error, 1e-16))) 82 | bestStump['alpha'] = alpha 83 | weakClassArr.append(bestStump) 84 | # calculate the exponent of D 85 | expon = np.multiply(-1 * alpha * labelMat, classEst) 86 | D = np.multiply(D, np.exp(expon)) 87 | D = D / D.sum() 88 | 89 | # calculate the aggregate error 90 | aggClassEst += alpha * classEst 91 | aggErrors = np.multiply( 92 | np.sign(aggClassEst) != labelMat, np.ones((m, 1))) 93 | errorRate = aggErrors.sum() / m 94 | print 'total error: ', errorRate 95 | if errorRate == 0.0: 96 | break 97 | return weakClassArr, aggClassEst 98 | 99 | 100 | def adaClassify(dataToClass, classifierArr): 101 | """ 102 | " predict function 103 | """ 104 | dataMat = np.mat(dataToClass) 105 | m = np.shape(dataMat)[0] 106 | aggClassEst = np.mat(np.zeros((m, 1))) 107 | for classifier in classifierArr: 108 | classEst = stumpClassify( 109 | dataMat, classifier['dim'], 110 | classifier['threshVal'], classifier['threshIneq']) 111 | aggClassEst += classifier['alpha'] * classEst 112 | # print aggClassEst 113 | return np.sign(aggClassEst) 114 | 115 | if __name__ == '__main__': 116 | print 'start' 117 | # dataMat, labelMat = loadSimpData() 118 | # classifierArr, aggClassEst = adaBoostTrainDS(dataMat, labelMat) 119 | # result = adaClassify([0, 0], classifierArr) 120 | # result = adaClassify([[5, 5], [0, 0]], classifierArr) 121 | # print result 122 | 123 | trainMat, trainLabelMat = loadDataFromFile('horseColicTraining2.txt') 124 | testMat, testLabelMat = loadDataFromFile('horseColicTest2.txt') 125 | classifierArr, aggClassEst = adaBoostTrainDS(trainMat, trainLabelMat, 70) 126 | result = adaClassify(testMat, classifierArr) 127 | errorArr = np.mat(np.ones((67, 1))) 128 | errorNum = errorArr[result != testLabelMat].sum() 129 | print 'test error: ', errorNum / 67.00 130 | # print isinstance(1.0, int) 131 | -------------------------------------------------------------------------------- /AdaBoost/horseColicTest2.txt: -------------------------------------------------------------------------------- 1 | 2.000000 1.000000 38.500000 54.000000 20.000000 0.000000 1.000000 2.000000 2.000000 3.000000 4.000000 1.000000 2.000000 2.000000 5.900000 0.000000 2.000000 42.000000 6.300000 0.000000 0.000000 1.000000 2 | 2.000000 1.000000 37.600000 48.000000 36.000000 0.000000 0.000000 1.000000 1.000000 0.000000 3.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 44.000000 6.300000 1.000000 5.000000 1.000000 3 | 1.000000 1.000000 37.700000 44.000000 28.000000 0.000000 4.000000 3.000000 2.000000 5.000000 4.000000 4.000000 1.000000 1.000000 0.000000 3.000000 5.000000 45.000000 70.000000 3.000000 2.000000 1.000000 4 | 1.000000 1.000000 37.000000 56.000000 24.000000 3.000000 1.000000 4.000000 2.000000 4.000000 4.000000 3.000000 1.000000 1.000000 0.000000 0.000000 0.000000 35.000000 61.000000 3.000000 2.000000 -1.000000 5 | 2.000000 1.000000 38.000000 42.000000 12.000000 3.000000 0.000000 3.000000 1.000000 1.000000 0.000000 1.000000 0.000000 0.000000 0.000000 0.000000 2.000000 37.000000 5.800000 0.000000 0.000000 1.000000 6 | 1.000000 1.000000 0.000000 60.000000 40.000000 3.000000 0.000000 1.000000 1.000000 0.000000 4.000000 0.000000 3.000000 2.000000 0.000000 0.000000 5.000000 42.000000 72.000000 0.000000 0.000000 1.000000 7 | 2.000000 1.000000 38.400000 80.000000 60.000000 3.000000 2.000000 2.000000 1.000000 3.000000 2.000000 1.000000 2.000000 2.000000 0.000000 1.000000 1.000000 54.000000 6.900000 0.000000 0.000000 1.000000 8 | 2.000000 1.000000 37.800000 48.000000 12.000000 2.000000 1.000000 2.000000 1.000000 3.000000 0.000000 1.000000 2.000000 0.000000 0.000000 2.000000 0.000000 48.000000 7.300000 1.000000 0.000000 1.000000 9 | 2.000000 1.000000 37.900000 45.000000 36.000000 3.000000 3.000000 3.000000 2.000000 2.000000 3.000000 1.000000 2.000000 1.000000 0.000000 3.000000 0.000000 33.000000 5.700000 3.000000 0.000000 1.000000 10 | 2.000000 1.000000 39.000000 84.000000 12.000000 3.000000 1.000000 5.000000 1.000000 2.000000 4.000000 2.000000 1.000000 2.000000 7.000000 0.000000 4.000000 62.000000 5.900000 2.000000 2.200000 -1.000000 11 | 2.000000 1.000000 38.200000 60.000000 24.000000 3.000000 1.000000 3.000000 2.000000 3.000000 3.000000 2.000000 3.000000 3.000000 0.000000 4.000000 4.000000 53.000000 7.500000 2.000000 1.400000 1.000000 12 | 1.000000 1.000000 0.000000 140.000000 0.000000 0.000000 0.000000 4.000000 2.000000 5.000000 4.000000 4.000000 1.000000 1.000000 0.000000 0.000000 5.000000 30.000000 69.000000 0.000000 0.000000 -1.000000 13 | 1.000000 1.000000 37.900000 120.000000 60.000000 3.000000 3.000000 3.000000 1.000000 5.000000 4.000000 4.000000 2.000000 2.000000 7.500000 4.000000 5.000000 52.000000 6.600000 3.000000 1.800000 -1.000000 14 | 2.000000 1.000000 38.000000 72.000000 36.000000 1.000000 1.000000 3.000000 1.000000 3.000000 0.000000 2.000000 2.000000 1.000000 0.000000 3.000000 5.000000 38.000000 6.800000 2.000000 2.000000 1.000000 15 | 2.000000 9.000000 38.000000 92.000000 28.000000 1.000000 1.000000 2.000000 1.000000 1.000000 3.000000 2.000000 3.000000 0.000000 7.200000 0.000000 0.000000 37.000000 6.100000 1.000000 1.100000 1.000000 16 | 1.000000 1.000000 38.300000 66.000000 30.000000 2.000000 3.000000 1.000000 1.000000 2.000000 4.000000 3.000000 3.000000 2.000000 8.500000 4.000000 5.000000 37.000000 6.000000 0.000000 0.000000 1.000000 17 | 2.000000 1.000000 37.500000 48.000000 24.000000 3.000000 1.000000 1.000000 1.000000 2.000000 1.000000 0.000000 1.000000 1.000000 0.000000 3.000000 2.000000 43.000000 6.000000 1.000000 2.800000 1.000000 18 | 1.000000 1.000000 37.500000 88.000000 20.000000 2.000000 3.000000 3.000000 1.000000 4.000000 3.000000 3.000000 0.000000 0.000000 0.000000 0.000000 0.000000 35.000000 6.400000 1.000000 0.000000 -1.000000 19 | 2.000000 9.000000 0.000000 150.000000 60.000000 4.000000 4.000000 4.000000 2.000000 5.000000 4.000000 4.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 -1.000000 20 | 1.000000 1.000000 39.700000 100.000000 30.000000 0.000000 0.000000 6.000000 2.000000 4.000000 4.000000 3.000000 1.000000 0.000000 0.000000 4.000000 5.000000 65.000000 75.000000 0.000000 0.000000 -1.000000 21 | 1.000000 1.000000 38.300000 80.000000 0.000000 3.000000 3.000000 4.000000 2.000000 5.000000 4.000000 3.000000 2.000000 1.000000 0.000000 4.000000 4.000000 45.000000 7.500000 2.000000 4.600000 1.000000 22 | 2.000000 1.000000 37.500000 40.000000 32.000000 3.000000 1.000000 3.000000 1.000000 3.000000 2.000000 3.000000 2.000000 1.000000 0.000000 0.000000 5.000000 32.000000 6.400000 1.000000 1.100000 1.000000 23 | 1.000000 1.000000 38.400000 84.000000 30.000000 3.000000 1.000000 5.000000 2.000000 4.000000 3.000000 3.000000 2.000000 3.000000 6.500000 4.000000 4.000000 47.000000 7.500000 3.000000 0.000000 -1.000000 24 | 1.000000 1.000000 38.100000 84.000000 44.000000 4.000000 0.000000 4.000000 2.000000 5.000000 3.000000 1.000000 1.000000 3.000000 5.000000 0.000000 4.000000 60.000000 6.800000 0.000000 5.700000 -1.000000 25 | 2.000000 1.000000 38.700000 52.000000 0.000000 1.000000 1.000000 1.000000 1.000000 1.000000 3.000000 1.000000 0.000000 0.000000 0.000000 1.000000 3.000000 4.000000 74.000000 0.000000 0.000000 1.000000 26 | 2.000000 1.000000 38.100000 44.000000 40.000000 2.000000 1.000000 3.000000 1.000000 3.000000 3.000000 1.000000 0.000000 0.000000 0.000000 1.000000 3.000000 35.000000 6.800000 0.000000 0.000000 1.000000 27 | 2.000000 1.000000 38.400000 52.000000 20.000000 2.000000 1.000000 3.000000 1.000000 1.000000 3.000000 2.000000 2.000000 1.000000 0.000000 3.000000 5.000000 41.000000 63.000000 1.000000 1.000000 1.000000 28 | 1.000000 1.000000 38.200000 60.000000 0.000000 1.000000 0.000000 3.000000 1.000000 2.000000 1.000000 1.000000 1.000000 1.000000 0.000000 4.000000 4.000000 43.000000 6.200000 2.000000 3.900000 1.000000 29 | 2.000000 1.000000 37.700000 40.000000 18.000000 1.000000 1.000000 1.000000 0.000000 3.000000 2.000000 1.000000 1.000000 1.000000 0.000000 3.000000 3.000000 36.000000 3.500000 0.000000 0.000000 1.000000 30 | 1.000000 1.000000 39.100000 60.000000 10.000000 0.000000 1.000000 1.000000 0.000000 2.000000 3.000000 0.000000 0.000000 0.000000 0.000000 4.000000 4.000000 0.000000 0.000000 0.000000 0.000000 1.000000 31 | 2.000000 1.000000 37.800000 48.000000 16.000000 1.000000 1.000000 1.000000 1.000000 0.000000 1.000000 1.000000 2.000000 1.000000 0.000000 4.000000 3.000000 43.000000 7.500000 0.000000 0.000000 1.000000 32 | 1.000000 1.000000 39.000000 120.000000 0.000000 4.000000 3.000000 5.000000 2.000000 2.000000 4.000000 3.000000 2.000000 3.000000 8.000000 0.000000 0.000000 65.000000 8.200000 3.000000 4.600000 1.000000 33 | 1.000000 1.000000 38.200000 76.000000 0.000000 2.000000 3.000000 2.000000 1.000000 5.000000 3.000000 3.000000 1.000000 2.000000 6.000000 1.000000 5.000000 35.000000 6.500000 2.000000 0.900000 1.000000 34 | 2.000000 1.000000 38.300000 88.000000 0.000000 0.000000 0.000000 6.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 -1.000000 35 | 1.000000 1.000000 38.000000 80.000000 30.000000 3.000000 3.000000 3.000000 1.000000 0.000000 0.000000 0.000000 0.000000 0.000000 6.000000 0.000000 0.000000 48.000000 8.300000 0.000000 4.300000 1.000000 36 | 1.000000 1.000000 0.000000 0.000000 0.000000 3.000000 1.000000 1.000000 1.000000 2.000000 3.000000 3.000000 1.000000 3.000000 6.000000 4.000000 4.000000 0.000000 0.000000 2.000000 0.000000 -1.000000 37 | 1.000000 1.000000 37.600000 40.000000 0.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 0.000000 0.000000 0.000000 1.000000 1.000000 0.000000 0.000000 2.000000 2.100000 1.000000 38 | 2.000000 1.000000 37.500000 44.000000 0.000000 1.000000 1.000000 1.000000 1.000000 3.000000 3.000000 2.000000 0.000000 0.000000 0.000000 0.000000 0.000000 45.000000 5.800000 2.000000 1.400000 1.000000 39 | 2.000000 1.000000 38.200000 42.000000 16.000000 1.000000 1.000000 3.000000 1.000000 1.000000 3.000000 1.000000 0.000000 0.000000 0.000000 1.000000 0.000000 35.000000 60.000000 1.000000 1.000000 1.000000 40 | 2.000000 1.000000 38.000000 56.000000 44.000000 3.000000 3.000000 3.000000 0.000000 0.000000 1.000000 1.000000 2.000000 1.000000 0.000000 4.000000 0.000000 47.000000 70.000000 2.000000 1.000000 1.000000 41 | 2.000000 1.000000 38.300000 45.000000 20.000000 3.000000 3.000000 2.000000 2.000000 2.000000 4.000000 1.000000 2.000000 0.000000 0.000000 4.000000 0.000000 0.000000 0.000000 0.000000 0.000000 1.000000 42 | 1.000000 1.000000 0.000000 48.000000 96.000000 1.000000 1.000000 3.000000 1.000000 0.000000 4.000000 1.000000 2.000000 1.000000 0.000000 1.000000 4.000000 42.000000 8.000000 1.000000 0.000000 1.000000 43 | 1.000000 1.000000 37.700000 55.000000 28.000000 2.000000 1.000000 2.000000 1.000000 2.000000 3.000000 3.000000 0.000000 3.000000 5.000000 4.000000 5.000000 0.000000 0.000000 0.000000 0.000000 1.000000 44 | 2.000000 1.000000 36.000000 100.000000 20.000000 4.000000 3.000000 6.000000 2.000000 2.000000 4.000000 3.000000 1.000000 1.000000 0.000000 4.000000 5.000000 74.000000 5.700000 2.000000 2.500000 -1.000000 45 | 1.000000 1.000000 37.100000 60.000000 20.000000 2.000000 0.000000 4.000000 1.000000 3.000000 0.000000 3.000000 0.000000 2.000000 5.000000 3.000000 4.000000 64.000000 8.500000 2.000000 0.000000 1.000000 46 | 2.000000 1.000000 37.100000 114.000000 40.000000 3.000000 0.000000 3.000000 2.000000 2.000000 2.000000 1.000000 0.000000 0.000000 0.000000 0.000000 3.000000 32.000000 0.000000 3.000000 6.500000 1.000000 47 | 1.000000 1.000000 38.100000 72.000000 30.000000 3.000000 3.000000 3.000000 1.000000 4.000000 4.000000 3.000000 2.000000 1.000000 0.000000 3.000000 5.000000 37.000000 56.000000 3.000000 1.000000 1.000000 48 | 1.000000 1.000000 37.000000 44.000000 12.000000 3.000000 1.000000 1.000000 2.000000 1.000000 1.000000 1.000000 0.000000 0.000000 0.000000 4.000000 2.000000 40.000000 6.700000 3.000000 8.000000 1.000000 49 | 1.000000 1.000000 38.600000 48.000000 20.000000 3.000000 1.000000 1.000000 1.000000 4.000000 3.000000 1.000000 0.000000 0.000000 0.000000 3.000000 0.000000 37.000000 75.000000 0.000000 0.000000 1.000000 50 | 1.000000 1.000000 0.000000 82.000000 72.000000 3.000000 1.000000 4.000000 1.000000 2.000000 3.000000 3.000000 0.000000 3.000000 0.000000 4.000000 4.000000 53.000000 65.000000 3.000000 2.000000 -1.000000 51 | 1.000000 9.000000 38.200000 78.000000 60.000000 4.000000 4.000000 6.000000 0.000000 3.000000 3.000000 3.000000 0.000000 0.000000 0.000000 1.000000 0.000000 59.000000 5.800000 3.000000 3.100000 -1.000000 52 | 2.000000 1.000000 37.800000 60.000000 16.000000 1.000000 1.000000 3.000000 1.000000 2.000000 3.000000 2.000000 1.000000 2.000000 0.000000 3.000000 0.000000 41.000000 73.000000 0.000000 0.000000 -1.000000 53 | 1.000000 1.000000 38.700000 34.000000 30.000000 2.000000 0.000000 3.000000 1.000000 2.000000 3.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 33.000000 69.000000 0.000000 2.000000 -1.000000 54 | 1.000000 1.000000 0.000000 36.000000 12.000000 1.000000 1.000000 1.000000 1.000000 1.000000 2.000000 1.000000 1.000000 1.000000 0.000000 1.000000 5.000000 44.000000 0.000000 0.000000 0.000000 1.000000 55 | 2.000000 1.000000 38.300000 44.000000 60.000000 0.000000 0.000000 1.000000 1.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 6.400000 36.000000 0.000000 0.000000 1.000000 56 | 2.000000 1.000000 37.400000 54.000000 18.000000 3.000000 0.000000 1.000000 1.000000 3.000000 4.000000 3.000000 2.000000 2.000000 0.000000 4.000000 5.000000 30.000000 7.100000 2.000000 0.000000 1.000000 57 | 1.000000 1.000000 0.000000 0.000000 0.000000 4.000000 3.000000 0.000000 2.000000 2.000000 4.000000 1.000000 0.000000 0.000000 0.000000 0.000000 0.000000 54.000000 76.000000 3.000000 2.000000 1.000000 58 | 1.000000 1.000000 36.600000 48.000000 16.000000 3.000000 1.000000 3.000000 1.000000 4.000000 1.000000 1.000000 1.000000 1.000000 0.000000 0.000000 0.000000 27.000000 56.000000 0.000000 0.000000 -1.000000 59 | 1.000000 1.000000 38.500000 90.000000 0.000000 1.000000 1.000000 3.000000 1.000000 3.000000 3.000000 3.000000 2.000000 3.000000 2.000000 4.000000 5.000000 47.000000 79.000000 0.000000 0.000000 1.000000 60 | 1.000000 1.000000 0.000000 75.000000 12.000000 1.000000 1.000000 4.000000 1.000000 5.000000 3.000000 3.000000 0.000000 3.000000 5.800000 0.000000 0.000000 58.000000 8.500000 1.000000 0.000000 1.000000 61 | 2.000000 1.000000 38.200000 42.000000 0.000000 3.000000 1.000000 1.000000 1.000000 1.000000 1.000000 2.000000 2.000000 1.000000 0.000000 3.000000 2.000000 35.000000 5.900000 2.000000 0.000000 1.000000 62 | 1.000000 9.000000 38.200000 78.000000 60.000000 4.000000 4.000000 6.000000 0.000000 3.000000 3.000000 3.000000 0.000000 0.000000 0.000000 1.000000 0.000000 59.000000 5.800000 3.000000 3.100000 -1.000000 63 | 2.000000 1.000000 38.600000 60.000000 30.000000 1.000000 1.000000 3.000000 1.000000 4.000000 2.000000 2.000000 1.000000 1.000000 0.000000 0.000000 0.000000 40.000000 6.000000 1.000000 0.000000 1.000000 64 | 2.000000 1.000000 37.800000 42.000000 40.000000 1.000000 1.000000 1.000000 1.000000 1.000000 3.000000 1.000000 0.000000 0.000000 0.000000 3.000000 3.000000 36.000000 6.200000 0.000000 0.000000 1.000000 65 | 1.000000 1.000000 38.000000 60.000000 12.000000 1.000000 1.000000 2.000000 1.000000 2.000000 1.000000 1.000000 1.000000 1.000000 0.000000 1.000000 4.000000 44.000000 65.000000 3.000000 2.000000 -1.000000 66 | 2.000000 1.000000 38.000000 42.000000 12.000000 3.000000 0.000000 3.000000 1.000000 1.000000 1.000000 1.000000 0.000000 0.000000 0.000000 0.000000 1.000000 37.000000 5.800000 0.000000 0.000000 1.000000 67 | 2.000000 1.000000 37.600000 88.000000 36.000000 3.000000 1.000000 1.000000 1.000000 3.000000 3.000000 2.000000 1.000000 3.000000 1.500000 0.000000 0.000000 44.000000 6.000000 0.000000 0.000000 -1.000000 68 | -------------------------------------------------------------------------------- /Bayes/NativeBayes.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | 3 | import numpy as np 4 | 5 | def createVocabList(dataSet): 6 | vocabSet = set([]) 7 | for document in dataSet: 8 | vocabSet = vocabSet | set(document) 9 | return list(vocabSet) 10 | 11 | 12 | def bagOfWords2VecMN(vocabList, inputSet): 13 | returnVec = [0] * len(vocabList) 14 | for word in inputSet: 15 | if word in vocabList: 16 | returnVec[vocabList.index(word)] += 1 17 | return returnVec 18 | 19 | 20 | def trainNB(trainMat, labelMat): 21 | numTrainDocs = len(trainMat) 22 | numWords = len(trainMat[0]) 23 | # in labelMat, 1 stand for abusive; 0 for normal 24 | pClass1 = sum(labelMat) / float(numTrainDocs) 25 | p0Num = np.ones(numWords) 26 | p1Num = np.ones(numWords) 27 | p0Denom = 2.0 28 | p1Denom = 2.0 29 | for i in range(numTrainDocs): 30 | if labelMat[i] == 1: 31 | p1Num += trainMat[i] 32 | p1Denom += sum(trainMat[i]) 33 | else: 34 | p0Num += trainMat[i] 35 | p0Denom += sum(trainMat[i]) 36 | # regarding the accuracy of float in python 37 | # change it to log 38 | p0Vec = np.log(p0Num / p0Denom) 39 | p1Vec = np.log(p1Num / p1Denom) 40 | return p0Vec, p1Vec, pClass1 41 | 42 | 43 | def classifyBN(vec2Classify, p0Vec, p1Vec, pClass1): 44 | p0 = sum(vec2Classify * p0Vec) + np.log(1 - pClass1) 45 | p1 = sum(vec2Classify * p1Vec) + np.log(pClass1) 46 | if p0 > p1: 47 | return 0 48 | else: 49 | return 1 50 | 51 | 52 | def textParse(bigString): 53 | """ 54 | " filter too short words 55 | " input is big string, output is a list 56 | """ 57 | import re 58 | listOfTokens = re.split(r'\W*', bigString) 59 | return [tok.lower() for tok in listOfTokens if len(tok) > 2] 60 | 61 | 62 | def spamTest(): 63 | docList = [] 64 | classList = [] 65 | for i in range(1, 26): 66 | wordList = textParse(open('email/spam/%d.txt' % i).read()) 67 | docList.append(wordList) 68 | classList.append(1) 69 | 70 | wordList = textParse(open('email/ham/%d.txt' % i).read()) 71 | docList.append(wordList) 72 | classList.append(0) 73 | 74 | vocabList = createVocabList(docList) 75 | trainingSet = range(50) 76 | testSet = [] 77 | for i in range(10): 78 | randIndex = int(np.random.uniform(0, len(trainingSet))) 79 | testSet.append(trainingSet[randIndex]) 80 | del(trainingSet[randIndex]) 81 | 82 | trainMat = [] 83 | labelMat = [] 84 | for docId in trainingSet: 85 | trainMat.append(bagOfWords2VecMN(vocabList, docList[docId])) 86 | labelMat.append(classList[docId]) 87 | 88 | p0Vec, p1Vec, pSpam = trainNB(np.array(trainMat), np.array(labelMat)) 89 | errorCount = 0 90 | for docId in testSet: 91 | wordVec = bagOfWords2VecMN(vocabList, docList[docId]) 92 | if classifyBN(wordVec, p0Vec, p1Vec, pSpam) != classList[docId]: 93 | errorCount += 1 94 | print "classfiy wrongly", docList[docId] 95 | 96 | print 'the error rate is: ', float(errorCount) / len(testSet) 97 | 98 | 99 | 100 | 101 | 102 | 103 | 104 | 105 | 106 | if __name__ == '__main__': 107 | print 'begin' 108 | spamTest() 109 | print 'end' 110 | 111 | 112 | -------------------------------------------------------------------------------- /Bayes/email/ham/1.txt: -------------------------------------------------------------------------------- 1 | Hi Peter, 2 | 3 | With Jose out of town, do you want to 4 | meet once in a while to keep things 5 | going and do some interesting stuff? 6 | 7 | Let me know 8 | Eugene -------------------------------------------------------------------------------- /Bayes/email/ham/10.txt: -------------------------------------------------------------------------------- 1 | Ryan Whybrew commented on your status. 2 | 3 | Ryan wrote: 4 | "turd ferguson or butt horn." 5 | -------------------------------------------------------------------------------- /Bayes/email/ham/11.txt: -------------------------------------------------------------------------------- 1 | Arvind Thirumalai commented on your status. 2 | 3 | Arvind wrote: 4 | ""you know"" 5 | 6 | 7 | Reply to this email to comment on this status. 8 | 9 | -------------------------------------------------------------------------------- /Bayes/email/ham/12.txt: -------------------------------------------------------------------------------- 1 | Thanks Peter. 2 | 3 | I'll definitely check in on this. How is your book 4 | going? I heard chapter 1 came in and it was in 5 | good shape. ;-) 6 | 7 | I hope you are doing well. 8 | 9 | Cheers, 10 | 11 | Troy -------------------------------------------------------------------------------- /Bayes/email/ham/13.txt: -------------------------------------------------------------------------------- 1 | Jay Stepp commented on your status. 2 | 3 | Jay wrote: 4 | ""to the" ???" 5 | 6 | 7 | Reply to this email to comment on this status. 8 | 9 | To see the comment thread, follow the link below: 10 | 11 | -------------------------------------------------------------------------------- /Bayes/email/ham/14.txt: -------------------------------------------------------------------------------- 1 | LinkedIn 2 | 3 | Kerry Haloney requested to add you as a connection on LinkedIn: 4 | 5 | Peter, 6 | 7 | I'd like to add you to my professional network on LinkedIn. 8 | 9 | - Kerry Haloney 10 | 11 | -------------------------------------------------------------------------------- /Bayes/email/ham/15.txt: -------------------------------------------------------------------------------- 1 | Hi Peter, 2 | 3 | The hotels are the ones that rent out the tent. They are all lined up on the hotel grounds : )) So much for being one with nature, more like being one with a couple dozen tour groups and nature. 4 | I have about 100M of pictures from that trip. I can go through them and get you jpgs of my favorite scenic pictures. 5 | 6 | Where are you and Jocelyn now? New York? Will you come to Tokyo for Chinese New Year? Perhaps to see the two of you then. I will go to Thailand for winter holiday to see my mom : ) 7 | 8 | Take care, 9 | D 10 | -------------------------------------------------------------------------------- /Bayes/email/ham/16.txt: -------------------------------------------------------------------------------- 1 | yeah I am ready. I may not be here because Jar Jar has plane tickets to Germany for me. -------------------------------------------------------------------------------- /Bayes/email/ham/17.txt: -------------------------------------------------------------------------------- 1 | Benoit Mandelbrot 1924-2010 2 | 3 | Benoit Mandelbrot 1924-2010 4 | 5 | Wilmott Team 6 | 7 | Benoit Mandelbrot, the mathematician, the father of fractal mathematics, and advocate of more sophisticated modelling in quantitative finance, died on 14th October 2010 aged 85. 8 | 9 | Wilmott magazine has often featured Mandelbrot, his ideas, and the work of others inspired by his fundamental insights. 10 | 11 | You must be logged on to view these articles from past issues of Wilmott Magazine. -------------------------------------------------------------------------------- /Bayes/email/ham/18.txt: -------------------------------------------------------------------------------- 1 | Hi Peter, 2 | 3 | Sure thing. Sounds good. Let me know what time would be good for you. 4 | I will come prepared with some ideas and we can go from there. 5 | 6 | Regards, 7 | 8 | -Vivek. -------------------------------------------------------------------------------- /Bayes/email/ham/19.txt: -------------------------------------------------------------------------------- 1 | LinkedIn 2 | 3 | Julius O requested to add you as a connection on LinkedIn: 4 | 5 | Hi Peter. 6 | 7 | Looking forward to the book! 8 | 9 | 10 | Accept View invitation from Julius O 11 | -------------------------------------------------------------------------------- /Bayes/email/ham/2.txt: -------------------------------------------------------------------------------- 1 | Yay to you both doing fine! 2 | 3 | I'm working on an MBA in Design Strategy at CCA (top art school.) It's a new program focusing on more of a right-brained creative and strategic approach to management. I'm an 1/8 of the way done today! -------------------------------------------------------------------------------- /Bayes/email/ham/20.txt: -------------------------------------------------------------------------------- 1 | I've thought about this and think it's possible. We should get another 2 | lunch. I have a car now and could come pick you up this time. Does 3 | this wednesday work? 11:50? 4 | 5 | Can I have a signed copy of you book? -------------------------------------------------------------------------------- /Bayes/email/ham/21.txt: -------------------------------------------------------------------------------- 1 | we saw this on the way to the coast...thought u might like it 2 | 3 | hangzhou is huge, one day wasn't enough, but we got a glimpse... 4 | 5 | we went inside the china pavilion at expo, it is pretty interesting, 6 | each province has an exhibit... -------------------------------------------------------------------------------- /Bayes/email/ham/22.txt: -------------------------------------------------------------------------------- 1 | Hi Hommies, 2 | 3 | Just got a phone call from the roofer, they will come and spaying the foaming today. it will be dusty. pls close all the doors and windows. 4 | Could you help me to close my bathroom window, cat window and the sliding door behind the TV? 5 | I don't know how can those 2 cats survive...... 6 | 7 | Sorry for any inconvenience! -------------------------------------------------------------------------------- /Bayes/email/ham/23.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yinchuandong/NLPinPython/b9baf77d848e8acbf8b5a6459947c853c1b4a7fc/Bayes/email/ham/23.txt -------------------------------------------------------------------------------- /Bayes/email/ham/24.txt: -------------------------------------------------------------------------------- 1 | Ok I will be there by 10:00 at the latest. -------------------------------------------------------------------------------- /Bayes/email/ham/25.txt: -------------------------------------------------------------------------------- 1 | That is cold. Is there going to be a retirement party? 2 | Are the leaves changing color? -------------------------------------------------------------------------------- /Bayes/email/ham/3.txt: -------------------------------------------------------------------------------- 1 | WHat is going on there? 2 | I talked to John on email. We talked about some computer stuff that's it. 3 | 4 | I went bike riding in the rain, it was not that cold. 5 | 6 | We went to the museum in SF yesterday it was $3 to get in and they had 7 | free food. At the same time was a SF Giants game, when we got done we 8 | had to take the train with all the Giants fans, they are 1/2 drunk. -------------------------------------------------------------------------------- /Bayes/email/ham/4.txt: -------------------------------------------------------------------------------- 1 | Yo. I've been working on my running website. I'm using jquery and the jqplot plugin. I'm not too far away from having a prototype to launch. 2 | 3 | You used jqplot right? If not, I think you would like it. -------------------------------------------------------------------------------- /Bayes/email/ham/5.txt: -------------------------------------------------------------------------------- 1 | There was a guy at the gas station who told me that if I knew Mandarin 2 | and Python I could get a job with the FBI. -------------------------------------------------------------------------------- /Bayes/email/ham/6.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yinchuandong/NLPinPython/b9baf77d848e8acbf8b5a6459947c853c1b4a7fc/Bayes/email/ham/6.txt -------------------------------------------------------------------------------- /Bayes/email/ham/7.txt: -------------------------------------------------------------------------------- 1 | Zach Hamm commented on your status. 2 | 3 | Zach wrote: 4 | "doggy style - enough said, thank you & good night" 5 | 6 | 7 | -------------------------------------------------------------------------------- /Bayes/email/ham/8.txt: -------------------------------------------------------------------------------- 1 | This e-mail was sent from a notification-only address that cannot accept incoming e-mail. Please do not reply to this message. 2 | 3 | Thank you for your online reservation. The store you selected has located the item you requested and has placed it on hold in your name. Please note that all items are held for 1 day. Please note store prices may differ from those online. 4 | 5 | If you have questions or need assistance with your reservation, please contact the store at the phone number listed below. You can also access store information, such as store hours and location, on the web at http://www.borders.com/online/store/StoreDetailView_98. -------------------------------------------------------------------------------- /Bayes/email/ham/9.txt: -------------------------------------------------------------------------------- 1 | Hi Peter, 2 | 3 | These are the only good scenic ones and it's too bad there was a girl's back in one of them. Just try to enjoy the blue sky : )) 4 | 5 | D -------------------------------------------------------------------------------- /Bayes/email/spam/1.txt: -------------------------------------------------------------------------------- 1 | --- Codeine 15mg -- 30 for $203.70 -- VISA Only!!! -- 2 | 3 | -- Codeine (Methylmorphine) is a narcotic (opioid) pain reliever 4 | -- We have 15mg & 30mg pills -- 30/15mg for $203.70 - 60/15mg for $385.80 - 90/15mg for $562.50 -- VISA Only!!! --- -------------------------------------------------------------------------------- /Bayes/email/spam/10.txt: -------------------------------------------------------------------------------- 1 | OrderCializViagra Online & Save 75-90% 2 | 3 | 0nline Pharmacy NoPrescription required 4 | Buy Canadian Drugs at Wholesale Prices and Save 75-90% 5 | FDA-Approved drugs + Superb Quality Drugs only! 6 | Accept all major credit cards -------------------------------------------------------------------------------- /Bayes/email/spam/11.txt: -------------------------------------------------------------------------------- 1 | You Have Everything To Gain! 2 | 3 | Incredib1e gains in length of 3-4 inches to yourPenis, PERMANANTLY 4 | 5 | Amazing increase in thickness of yourPenis, up to 30% 6 | BetterEjacu1ation control 7 | Experience Rock-HardErecetions 8 | Explosive, intenseOrgasns 9 | Increase volume ofEjacu1ate 10 | Doctor designed and endorsed 11 | 100% herbal, 100% Natural, 100% Safe 12 | The proven NaturalPenisEnhancement that works! 13 | 100% MoneyBack Guaranteeed -------------------------------------------------------------------------------- /Bayes/email/spam/12.txt: -------------------------------------------------------------------------------- 1 | Buy Ambiem (Zolpidem) 5mg/10mg @ $2.39/- pill 2 | 3 | 30 pills x 5 mg - $129.00 4 | 60 pills x 5 mg - $199.20 5 | 180 pills x 5 mg - $430.20 6 | 30 pills x 10 mg - $ 138.00 7 | 120 pills x 10 mg - $ 322.80 -------------------------------------------------------------------------------- /Bayes/email/spam/13.txt: -------------------------------------------------------------------------------- 1 | OrderCializViagra Online & Save 75-90% 2 | 3 | 0nline Pharmacy NoPrescription required 4 | Buy Canadian Drugs at Wholesale Prices and Save 75-90% 5 | FDA-Approved drugs + Superb Quality Drugs only! 6 | Accept all major credit cards 7 | Order Today! From $1.38 8 | -------------------------------------------------------------------------------- /Bayes/email/spam/14.txt: -------------------------------------------------------------------------------- 1 | BuyVIAGRA 25mg, 50mg, 100mg, 2 | BrandViagra, FemaleViagra from $1.15 per pill 3 | 4 | 5 | ViagraNoPrescription needed - from Certified Canadian Pharmacy 6 | 7 | Buy Here... We accept VISA, AMEX, E-Check... Worldwide Delivery -------------------------------------------------------------------------------- /Bayes/email/spam/15.txt: -------------------------------------------------------------------------------- 1 | You Have Everything To Gain! 2 | 3 | Incredib1e gains in length of 3-4 inches to yourPenis, PERMANANTLY 4 | 5 | Amazing increase in thickness of yourPenis, up to 30% 6 | BetterEjacu1ation control 7 | Experience Rock-HardErecetions 8 | Explosive, intenseOrgasns 9 | Increase volume ofEjacu1ate 10 | Doctor designed and endorsed 11 | 100% herbal, 100% Natural, 100% Safe -------------------------------------------------------------------------------- /Bayes/email/spam/16.txt: -------------------------------------------------------------------------------- 1 | You Have Everything To Gain! 2 | 3 | Incredib1e gains in length of 3-4 inches to yourPenis, PERMANANTLY 4 | 5 | Amazing increase in thickness of yourPenis, up to 30% 6 | BetterEjacu1ation control 7 | Experience Rock-HardErecetions 8 | Explosive, intenseOrgasns 9 | Increase volume ofEjacu1ate 10 | Doctor designed and endorsed 11 | 100% herbal, 100% Natural, 100% Safe -------------------------------------------------------------------------------- /Bayes/email/spam/17.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yinchuandong/NLPinPython/b9baf77d848e8acbf8b5a6459947c853c1b4a7fc/Bayes/email/spam/17.txt -------------------------------------------------------------------------------- /Bayes/email/spam/18.txt: -------------------------------------------------------------------------------- 1 | Codeine (the most competitive price on NET!) 2 | 3 | Codeine (WILSON) 30mg x 30 $156.00 4 | Codeine (WILSON) 30mg x 60 $291.00 (+4 FreeViagra pills) 5 | Codeine (WILSON) 30mg x 90 $396.00 (+4 FreeViagra pills) 6 | Codeine (WILSON) 30mg x 120 $492.00 (+10 FreeViagra pills) -------------------------------------------------------------------------------- /Bayes/email/spam/19.txt: -------------------------------------------------------------------------------- 1 | Get Up to 75% OFF at Online WatchesStore 2 | 3 | Discount Watches for All Famous Brands 4 | 5 | * Watches: aRolexBvlgari, Dior, Hermes, Oris, Cartier, AP and more brands 6 | * Louis Vuitton Bags & Wallets 7 | * Gucci Bags 8 | * Tiffany & Co Jewerly 9 | 10 | Enjoy a full 1 year WARRANTY 11 | Shipment via reputable courier: FEDEX, UPS, DHL and EMS Speedpost 12 | You will 100% recieve your order 13 | Save Up to 75% OFF Quality Watches -------------------------------------------------------------------------------- /Bayes/email/spam/2.txt: -------------------------------------------------------------------------------- 1 | Hydrocodone/Vicodin ES/Brand Watson 2 | 3 | Vicodin ES - 7.5/750 mg: 30 - $195 / 120 $570 4 | Brand Watson - 7.5/750 mg: 30 - $195 / 120 $570 5 | Brand Watson - 10/325 mg: 30 - $199 / 120 - $588 6 | NoPrescription Required 7 | FREE Express FedEx (3-5 days Delivery) for over $200 order 8 | Major Credit Cards + E-CHECK -------------------------------------------------------------------------------- /Bayes/email/spam/20.txt: -------------------------------------------------------------------------------- 1 | Get Up to 75% OFF at Online WatchesStore 2 | 3 | Discount Watches for All Famous Brands 4 | 5 | * Watches: aRolexBvlgari, Dior, Hermes, Oris, Cartier, AP and more brands 6 | * Louis Vuitton Bags & Wallets 7 | * Gucci Bags 8 | * Tiffany & Co Jewerly 9 | 10 | Enjoy a full 1 year WARRANTY 11 | Shipment via reputable courier: FEDEX, UPS, DHL and EMS Speedpost 12 | You will 100% recieve your order -------------------------------------------------------------------------------- /Bayes/email/spam/21.txt: -------------------------------------------------------------------------------- 1 | Percocet 10/625 mg withoutPrescription 30 tabs - $225! 2 | Percocet, a narcotic analgesic, is used to treat moderate to moderately SeverePain 3 | Top Quality, EXPRESS Shipping, 100% Safe & Discreet & Private. 4 | Buy Cheap Percocet Online -------------------------------------------------------------------------------- /Bayes/email/spam/22.txt: -------------------------------------------------------------------------------- 1 | Get Up to 75% OFF at Online WatchesStore 2 | 3 | Discount Watches for All Famous Brands 4 | 5 | * Watches: aRolexBvlgari, Dior, Hermes, Oris, Cartier, AP and more brands 6 | * Louis Vuitton Bags & Wallets 7 | * Gucci Bags 8 | * Tiffany & Co Jewerly 9 | 10 | Enjoy a full 1 year WARRANTY 11 | Shipment via reputable courier: FEDEX, UPS, DHL and EMS Speedpost 12 | You will 100% recieve your order -------------------------------------------------------------------------------- /Bayes/email/spam/23.txt: -------------------------------------------------------------------------------- 1 | You Have Everything To Gain! 2 | 3 | Incredib1e gains in length of 3-4 inches to yourPenis, PERMANANTLY 4 | 5 | Amazing increase in thickness of yourPenis, up to 30% 6 | BetterEjacu1ation control 7 | Experience Rock-HardErecetions 8 | Explosive, intenseOrgasns 9 | Increase volume ofEjacu1ate 10 | Doctor designed and endorsed 11 | 100% herbal, 100% Natural, 100% Safe -------------------------------------------------------------------------------- /Bayes/email/spam/24.txt: -------------------------------------------------------------------------------- 1 | You Have Everything To Gain! 2 | 3 | Incredib1e gains in length of 3-4 inches to yourPenis, PERMANANTLY 4 | 5 | Amazing increase in thickness of yourPenis, up to 30% 6 | BetterEjacu1ation control 7 | Experience Rock-HardErecetions 8 | Explosive, intenseOrgasns 9 | Increase volume ofEjacu1ate 10 | Doctor designed and endorsed 11 | 100% herbal, 100% Natural, 100% Safe -------------------------------------------------------------------------------- /Bayes/email/spam/25.txt: -------------------------------------------------------------------------------- 1 | Experience with BiggerPenis Today! Grow 3-inches more 2 | 3 | The Safest & Most Effective Methods Of_PenisEn1argement. 4 | Save your time and money! 5 | BetterErections with effective Ma1eEnhancement products. 6 | 7 | #1 Ma1eEnhancement Supplement. Trusted by Millions. Buy Today! -------------------------------------------------------------------------------- /Bayes/email/spam/3.txt: -------------------------------------------------------------------------------- 1 | You Have Everything To Gain! 2 | 3 | Incredib1e gains in length of 3-4 inches to yourPenis, PERMANANTLY 4 | 5 | Amazing increase in thickness of yourPenis, up to 30% 6 | BetterEjacu1ation control 7 | Experience Rock-HardErecetions 8 | Explosive, intenseOrgasns 9 | Increase volume ofEjacu1ate 10 | Doctor designed and endorsed 11 | 100% herbal, 100% Natural, 100% Safe 12 | The proven NaturalPenisEnhancement that works! 13 | 100% MoneyBack Guaranteeed -------------------------------------------------------------------------------- /Bayes/email/spam/4.txt: -------------------------------------------------------------------------------- 1 | Percocet 10/625 mg withoutPrescription 30 tabs - $225! 2 | Percocet, a narcotic analgesic, is used to treat moderate to moderately SeverePain 3 | Top Quality, EXPRESS Shipping, 100% Safe & Discreet & Private. 4 | Buy Cheap Percocet Online -------------------------------------------------------------------------------- /Bayes/email/spam/5.txt: -------------------------------------------------------------------------------- 1 | --- Codeine 15mg -- 30 for $203.70 -- VISA Only!!! -- 2 | 3 | -- Codeine (Methylmorphine) is a narcotic (opioid) pain reliever 4 | -- We have 15mg & 30mg pills -- 30/15mg for $203.70 - 60/15mg for $385.80 - 90/15mg for $562.50 -- VISA Only!!! --- -------------------------------------------------------------------------------- /Bayes/email/spam/6.txt: -------------------------------------------------------------------------------- 1 | OEM Adobe & Microsoft softwares 2 | Fast order and download 3 | 4 | Microsoft Office Professional Plus 2007/2010 $129 5 | Microsoft Windows 7 Ultimate $119 6 | Adobe Photoshop CS5 Extended 7 | Adobe Acrobat 9 Pro Extended 8 | Windows XP Professional & thousand more titles -------------------------------------------------------------------------------- /Bayes/email/spam/7.txt: -------------------------------------------------------------------------------- 1 | Bargains Here! Buy Phentermin 37.5 mg (K-25) 2 | 3 | Buy Genuine Phentermin at Low Cost 4 | VISA Accepted 5 | 30 - $130.50 6 | 60 - $219.00 7 | 90 - $292.50 8 | 120 - $366.00 9 | 180 - $513.00 -------------------------------------------------------------------------------- /Bayes/email/spam/8.txt: -------------------------------------------------------------------------------- 1 | You Have Everything To Gain! 2 | 3 | Incredib1e gains in length of 3-4 inches to yourPenis, PERMANANTLY 4 | 5 | Amazing increase in thickness of yourPenis, up to 30% 6 | BetterEjacu1ation control 7 | Experience Rock-HardErecetions 8 | Explosive, intenseOrgasns 9 | Increase volume ofEjacu1ate 10 | Doctor designed and endorsed 11 | 100% herbal, 100% Natural, 100% Safe -------------------------------------------------------------------------------- /Bayes/email/spam/9.txt: -------------------------------------------------------------------------------- 1 | Bargains Here! Buy Phentermin 37.5 mg (K-25) 2 | 3 | Buy Genuine Phentermin at Low Cost 4 | VISA Accepted 5 | 30 - $130.50 6 | 60 - $219.00 7 | 90 - $292.50 8 | 120 - $366.00 9 | 180 - $513.00 -------------------------------------------------------------------------------- /CF/0_5.txt: -------------------------------------------------------------------------------- 1 | 00000000000000110000000000000000 2 | 00000000000011111100000000000000 3 | 00000000000111111110000000000000 4 | 00000000001111111111000000000000 5 | 00000000111111111111100000000000 6 | 00000001111111111111110000000000 7 | 00000000111111111111111000000000 8 | 00000000111111100001111100000000 9 | 00000001111111000001111100000000 10 | 00000011111100000000111100000000 11 | 00000011111100000000111110000000 12 | 00000011111100000000011110000000 13 | 00000011111100000000011110000000 14 | 00000001111110000000001111000000 15 | 00000011111110000000001111000000 16 | 00000011111100000000001111000000 17 | 00000001111100000000001111000000 18 | 00000011111100000000001111000000 19 | 00000001111100000000001111000000 20 | 00000001111100000000011111000000 21 | 00000000111110000000001111100000 22 | 00000000111110000000001111100000 23 | 00000000111110000000001111100000 24 | 00000000111110000000011111000000 25 | 00000000111110000000111111000000 26 | 00000000111111000001111110000000 27 | 00000000011111111111111110000000 28 | 00000000001111111111111110000000 29 | 00000000001111111111111110000000 30 | 00000000000111111111111000000000 31 | 00000000000011111111110000000000 32 | 00000000000000111111000000000000 33 | -------------------------------------------------------------------------------- /CF/CFItemBased.py: -------------------------------------------------------------------------------- 1 | # encoding:utf-8 2 | 3 | import numpy as np 4 | import numpy.linalg as la 5 | 6 | 7 | def loadExData(): 8 | return[[0, 0, 0, 2, 2], 9 | [0, 0, 0, 3, 3], 10 | [0, 0, 0, 1, 1], 11 | [1, 1, 1, 0, 0], 12 | [2, 2, 2, 0, 0], 13 | [5, 5, 5, 0, 0], 14 | [1, 1, 1, 0, 0]] 15 | 16 | 17 | def ecludSim(inA, inB): 18 | return 1.0 / (1.0 + la.norm(inA - inB)) 19 | 20 | 21 | def pearsSim(inA, inB): 22 | if len(inA) < 3: 23 | return 1.0 24 | return 0.5 + 0.5 * np.corrcoef(inA, inB, rowvar=0)[0][1] 25 | 26 | 27 | def cosSim(inA, inB): 28 | num = float(inA.T * inB) 29 | denom = la.norm(inA) * la.norm(inB) 30 | return 0.5 + 0.5 * (num / denom) 31 | 32 | 33 | def standEst(dataMat, user, simMeas, item): 34 | n = np.shape(dataMat)[1] 35 | simTotal = 0.0 36 | ratSimTotal = 0.0 37 | for j in range(n): 38 | userRating = dataMat[user, j] 39 | if userRating == 0: 40 | continue 41 | overLap = np.nonzero(np.logical_and( 42 | dataMat[:, item].A > 0, 43 | dataMat[:, j].A > 0 44 | ))[0] 45 | if len(overLap) == 0: 46 | similarity = 0 47 | else: 48 | similarity = simMeas(dataMat[overLap, item], 49 | dataMat[overLap, j]) 50 | print 'the item %d and %d similarity is: %f' % (item, j, similarity) 51 | simTotal += similarity 52 | ratSimTotal += similarity * userRating 53 | if simTotal == 0: 54 | return 0 55 | else: 56 | return ratSimTotal / simTotal 57 | 58 | def recommend(dataMat, user, N=3, simMeas=cosSim, estMethod=standEst): 59 | unratedItems = np.nonzero(dataMat[user, :].A == 0)[1] 60 | if len(unratedItems) == 0: 61 | print 'every items are rated' 62 | return [] 63 | itemScores = [] 64 | for item in unratedItems: 65 | estimatedScore = estMethod(dataMat, user, simMeas, item) 66 | itemScores.append((item, estimatedScore)) 67 | 68 | return sorted(itemScores, key=lambda jj: jj[1], reverse=True)[:N] 69 | 70 | 71 | def printMat(inMat, thresh=0.8): 72 | for i in range(32): 73 | for k in range(32): 74 | if float(inMat[i, k]) > thresh: 75 | print 1, 76 | else: 77 | print 0, 78 | print '' 79 | 80 | 81 | def imgCompress(numSV=3, thresh=0.8): 82 | myl = [] 83 | for line in open('0_5.txt').readlines(): 84 | newRow = [] 85 | for i in range(32): 86 | newRow.append(int(line[i])) 87 | myl.append(newRow) 88 | myMat = np.mat(myl) 89 | print "****original matrix******" 90 | printMat(myMat, thresh) 91 | U, Sigma, VT = la.svd(myMat) 92 | SigRecon = np.mat(np.zeros((numSV, numSV))) 93 | for k in range(numSV): # construct diagonal matrix from vector 94 | SigRecon[k, k] = Sigma[k] 95 | reconMat = U[:, :numSV] * SigRecon * VT[:numSV, :] 96 | print "****reconstructed matrix using %d singular values******" % numSV 97 | printMat(reconMat, thresh) 98 | 99 | def main(): 100 | # dataMat = np.mat(loadExData()) 101 | # dataMat[0, 1] = dataMat[0, 0] = dataMat[1, 0] = dataMat[2, 0] = 4 102 | # dataMat[3, 3] = 2 103 | dataMat = np.mat([ 104 | [4, 4, 0, 2, 2], 105 | [4, 0, 0, 3, 3], 106 | [4, 0, 0, 1, 1], 107 | [1, 1, 1, 2, 0], 108 | [2, 2, 2, 0, 0], 109 | [1, 1, 1, 0, 0], 110 | [5, 5, 5, 0, 0]]) 111 | 112 | inA = dataMat[:, 0] 113 | inB = dataMat[:, 4] 114 | 115 | imgCompress() 116 | # print ecludSim(inA, inB) 117 | # print pearsSim(inA, inB) 118 | # print cosSim(inA, inB) 119 | 120 | # print standEst(dataMat, 2, cosSim, 2) 121 | # print recommend(dataMat, 2) 122 | 123 | return 124 | 125 | if __name__ == '__main__': 126 | main() 127 | -------------------------------------------------------------------------------- /CF/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yinchuandong/NLPinPython/b9baf77d848e8acbf8b5a6459947c853c1b4a7fc/CF/__init__.py -------------------------------------------------------------------------------- /DecisionTree/C45.py: -------------------------------------------------------------------------------- 1 | # encoding:utf-8 2 | __author__ = 'yinchuandong' 3 | 4 | import json 5 | from math import log 6 | 7 | from ID3 import calcEntropy, splitDataSet, loadFile 8 | 9 | 10 | def selectMaxGaniRatio(dataSet): 11 | numsOfCol = len(dataSet[0]) - 1 12 | numsOfData = len(dataSet) 13 | 14 | maxGainRatio = 0.0 15 | maxFeatCol = 0 16 | entropyS = calcEntropy(dataSet) 17 | for col in range(numsOfCol): 18 | subData = splitDataSet(dataSet, col) 19 | gain = entropyS 20 | splitInfo = 0.0 21 | for key in subData: 22 | entropy = calcEntropy(subData[key]) 23 | prob = len(subData[key]) / float(numsOfData) 24 | gain -= prob * entropy 25 | splitInfo -= prob * log(prob) 26 | gainRatio = gain / splitInfo 27 | if gainRatio > maxGainRatio: 28 | maxGainRatio = gainRatio 29 | maxFeatCol = col 30 | return maxFeatCol 31 | 32 | 33 | def createTree(dataSet, labels): 34 | classList = [featVec[-1] for featVec in dataSet] 35 | if len(set(classList)) == 1: 36 | return classList[0] 37 | bestCol = selectMaxGaniRatio(dataSet) 38 | bestColLabel = labels[bestCol] 39 | tree = {} 40 | tree[bestColLabel] = {} 41 | labels = labels[:] # copy in case of bad reference 42 | del(labels[bestCol]) # del after spliting on it 43 | featVals = [featVec[bestCol] for featVec in dataSet] 44 | uniqueVals = set(featVals) 45 | subData = splitDataSet(dataSet, bestCol) 46 | for val in uniqueVals: 47 | subtree = createTree(subData[val], labels) 48 | tree[bestColLabel][val] = subtree 49 | return tree 50 | 51 | 52 | def main(): 53 | dataSet = loadFile('trainset.txt') 54 | labels = ['Outlook', 'Temperature', 'Humidity', 'Wind'] 55 | tree = createTree(dataSet, labels) 56 | print json.dumps(tree, indent=4) 57 | return 58 | 59 | if __name__ == '__main__': 60 | main() 61 | -------------------------------------------------------------------------------- /DecisionTree/ID3.py: -------------------------------------------------------------------------------- 1 | # encoding:utf-8 2 | import json 3 | from math import log 4 | import operator 5 | 6 | 7 | def loadFile(filename): 8 | """ 9 | 加载数据文件 10 | :param filename: 文件路径 11 | :return: void 12 | """ 13 | fr = open(filename) 14 | lines = fr.readlines() 15 | 16 | dataSet = [] 17 | for lineStr in lines: 18 | lineArr = lineStr.strip().split(' ') 19 | dataSet.append(lineArr) 20 | return dataSet 21 | 22 | 23 | def calcEntropy(dataSet): 24 | nums = len(dataSet) 25 | labelsCount = {} 26 | for featVec in dataSet: 27 | label = featVec[-1] 28 | if label not in labelsCount: 29 | labelsCount[label] = 1 30 | else: 31 | labelsCount[label] += 1 32 | entropy = 0.0 33 | for label in labelsCount: 34 | prob = float(labelsCount[label]) / nums 35 | entropy -= prob * log(prob) 36 | return entropy 37 | 38 | 39 | def splitDataSet(dataSet, col): 40 | result = {} 41 | for featVec in dataSet: 42 | key = featVec[col] 43 | tmpArr = featVec[:col] 44 | tmpArr.extend(featVec[col + 1:]) 45 | if key not in result: 46 | result[key] = [] 47 | result[key].append(tmpArr) 48 | else: 49 | result[key].append(tmpArr) 50 | return result 51 | 52 | 53 | def selectMaxGainCol(dataSet): 54 | """ 55 | 选择信息最大信息增益的列标号 56 | :param dataSet: 57 | :return: 58 | """ 59 | numsOfCol = len(dataSet[0]) - 1 # 数据集的列数,最后一列为类标号 60 | numsOfData = len(dataSet) # 数据集的条目数量 61 | maxGain = 0.0 # 最大的信息增益 62 | maxFeatCol = -1 # 最大信息增益对应的列 63 | # Entropy(S) 64 | entropyS = calcEntropy(dataSet) 65 | for col in range(0, numsOfCol): 66 | featDict = splitDataSet(dataSet, col) 67 | tmpGain = entropyS 68 | for key in featDict.keys(): 69 | featArr = featDict[key] 70 | entropyFeat = calcEntropy(featArr) 71 | delta = (len(featArr) / float(numsOfData)) * entropyFeat 72 | tmpGain -= delta 73 | # print "Gain(", labels[col], ") =", tmpGain 74 | if tmpGain > maxGain: 75 | maxGain = tmpGain 76 | maxFeatCol = col 77 | return maxFeatCol 78 | 79 | 80 | def majorityCnt(classList): 81 | clsMap = {} 82 | for cls in classList: 83 | if cls not in clsMap: 84 | clsMap[cls] = 1 85 | else: 86 | clsMap[cls] += 1 87 | sortedClsMap = sorted(clsMap.iteritems(), key=operator.itemgetter(1), reverse=True) 88 | return sortedClsMap[0][0] 89 | 90 | 91 | def createTree(dataSet, labels): 92 | classList = [featVec[-1] for featVec in dataSet] 93 | if len(set(classList)) == 1: 94 | return classList[0] 95 | bestCol = selectMaxGainCol(dataSet) 96 | if len(dataSet[0]) == 1 or bestCol == -1: 97 | return majorityCnt(classList) 98 | bestColLabel = labels[bestCol] 99 | tree = {} 100 | tree[bestColLabel] = {} 101 | labels = labels[:] # copy in case of bad reference 102 | del(labels[bestCol]) # del after spliting on it 103 | featVals = [featVec[bestCol] for featVec in dataSet] 104 | uniqueVals = set(featVals) 105 | subData = splitDataSet(dataSet, bestCol) 106 | for val in uniqueVals: 107 | subtree = createTree(subData[val], labels) 108 | tree[bestColLabel][val] = subtree 109 | return tree 110 | 111 | 112 | def classify(tree, featLabels, testVec): 113 | firstKey = tree.keys()[0] 114 | secondDict = tree[firstKey] 115 | featIndex = featLabels.index(firstKey) 116 | key = testVec[featIndex] 117 | featValue = secondDict[key] 118 | if isinstance(featValue, dict): 119 | classLabel = classify(featValue, featLabels, testVec) 120 | else: 121 | classLabel = featValue 122 | return classLabel 123 | 124 | 125 | if __name__ == '__main__': 126 | 127 | dataSet = loadFile('trainset.txt') 128 | featLabels = ['Outlook', 'Temperature', 'Humidity', 'Wind'] 129 | testVec = "Rain Mild High Weak".split(" ") 130 | testVec2 = "Overcast Mild High Weak".split(" ") 131 | tree = createTree(dataSet, featLabels) 132 | print json.dumps(tree, indent=4) 133 | result = classify(tree, featLabels, testVec) 134 | print result 135 | -------------------------------------------------------------------------------- /DecisionTree/__init__.py: -------------------------------------------------------------------------------- 1 | __author__ = 'wangjiewen' 2 | -------------------------------------------------------------------------------- /DecisionTree/randomforest.py: -------------------------------------------------------------------------------- 1 | # encoding=utf-8 2 | # 1.choose N examples randomly 3 | # 2.choose m variables (M totally): round(log(M)/log(2)+1); 4 | # 3.repeat 1-2 for k times, and create k decision trees 5 | # 4.out of bag errors 6 | import random 7 | import math 8 | import json 9 | from ID3 import loadFile, createTree, classify 10 | 11 | 12 | def bagNExample(dataset, labels, K, M): 13 | N = len(dataset) - 1 14 | m = int(round(math.log(M) / math.log(2) + 1)) 15 | T = [] 16 | L = [] 17 | for k in range(K): 18 | # bag of example 19 | subset = [] 20 | for i in range(N): 21 | randId = random.randint(0, N - 1) 22 | subset.append(randId) 23 | # bag of variable 24 | varset = [] 25 | for j in range(m): 26 | randId = random.randint(0, M - 1) 27 | while(randId in varset): 28 | randId = random.randint(0, M - 1) 29 | varset.append(randId) 30 | varset = sorted(varset) 31 | 32 | Tk = [] 33 | for i in subset: 34 | vec = [] 35 | for j in varset: 36 | vec.append(dataset[i][j]) 37 | vec.append(dataset[i][-1]) 38 | Tk.append(vec) 39 | T.append(Tk) 40 | Lk = [labels[j] for j in varset] 41 | L.append(Lk) 42 | return T, L 43 | 44 | 45 | def buildForest(dataset, labels, K, M): 46 | T, L = bagNExample(dataset, labels, K=K, M=M) 47 | forest = [] 48 | for k in range(K): 49 | dtree = createTree(T[k], L[k]) 50 | # if each in Tk is identical, it just return Yes. Just skip it 51 | if isinstance(dtree, dict): 52 | forest.append(dtree) 53 | # print '----------' 54 | return forest 55 | 56 | 57 | def voteMajority(forest, labels, testVec): 58 | result = {} 59 | for i, dtree in enumerate(forest): 60 | try: 61 | cls = classify(dtree, labels, testVec) 62 | except Exception, e: 63 | # test vector may has some values not included in dtree 64 | # haven't come up a good solution, just mark it 65 | print 'id:', i, '--', e 66 | # print json.dumps(dtree, indent=4) 67 | else: 68 | if cls not in result: 69 | result[cls] = 1 70 | else: 71 | result[cls] += 1 72 | print result 73 | return 74 | 75 | 76 | def main(): 77 | dataset = loadFile('trainset.txt') 78 | labels = ['Outlook', 'Temperature', 'Humidity', 'Wind'] 79 | testVec = "Rain Mild High Weak".split(" ") 80 | forest = buildForest(dataset, labels, K=100, M=4) 81 | # print json.dumps(forest, indent=4) 82 | voteMajority(forest, labels, testVec) 83 | # print forest 84 | 85 | if __name__ == '__main__': 86 | print "start" 87 | main() 88 | -------------------------------------------------------------------------------- /DecisionTree/trainset.txt: -------------------------------------------------------------------------------- 1 | Sunny Hot High Weak No 2 | Sunny Hot High Strong No 3 | Overcast Hot High Weak Yes 4 | Rain Mild High Weak Yes 5 | Rain Cool Normal Weak Yes 6 | Rain Cool Normal Strong No 7 | Overcast Cool Normal Strong Yes 8 | Sunny Mild High Weak No 9 | Sunny Cool Normal Weak Yes 10 | Rain Mild Normal Weak Yes 11 | Sunny Mild Normal Strong Yes 12 | Overcast Mild High Strong Yes 13 | Overcast Hot Normal Weak Yes 14 | Rain Mild High Strong No -------------------------------------------------------------------------------- /DecisionTree/treePlotter.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Created on Oct 14, 2010 3 | 4 | @author: Peter Harrington 5 | ''' 6 | import matplotlib.pyplot as plt 7 | 8 | decisionNode = dict(boxstyle="sawtooth", fc="0.8") 9 | leafNode = dict(boxstyle="round4", fc="0.8") 10 | arrow_args = dict(arrowstyle="<-") 11 | 12 | def getNumLeafs(myTree): 13 | numLeafs = 0 14 | firstStr = myTree.keys()[0] 15 | secondDict = myTree[firstStr] 16 | for key in secondDict.keys(): 17 | if type(secondDict[key]).__name__=='dict':#test to see if the nodes are dictonaires, if not they are leaf nodes 18 | numLeafs += getNumLeafs(secondDict[key]) 19 | else: numLeafs +=1 20 | return numLeafs 21 | 22 | def getTreeDepth(myTree): 23 | maxDepth = 0 24 | firstStr = myTree.keys()[0] 25 | secondDict = myTree[firstStr] 26 | for key in secondDict.keys(): 27 | if type(secondDict[key]).__name__=='dict':#test to see if the nodes are dictonaires, if not they are leaf nodes 28 | thisDepth = 1 + getTreeDepth(secondDict[key]) 29 | else: thisDepth = 1 30 | if thisDepth > maxDepth: maxDepth = thisDepth 31 | return maxDepth 32 | 33 | def plotNode(nodeTxt, centerPt, parentPt, nodeType): 34 | createPlot.ax1.annotate(nodeTxt, xy=parentPt, xycoords='axes fraction', 35 | xytext=centerPt, textcoords='axes fraction', 36 | va="center", ha="center", bbox=nodeType, arrowprops=arrow_args ) 37 | 38 | def plotMidText(cntrPt, parentPt, txtString): 39 | xMid = (parentPt[0]-cntrPt[0])/2.0 + cntrPt[0] 40 | yMid = (parentPt[1]-cntrPt[1])/2.0 + cntrPt[1] 41 | createPlot.ax1.text(xMid, yMid, txtString, va="center", ha="center", rotation=30) 42 | 43 | def plotTree(myTree, parentPt, nodeTxt):#if the first key tells you what feat was split on 44 | numLeafs = getNumLeafs(myTree) #this determines the x width of this tree 45 | depth = getTreeDepth(myTree) 46 | firstStr = myTree.keys()[0] #the text label for this node should be this 47 | cntrPt = (plotTree.xOff + (1.0 + float(numLeafs))/2.0/plotTree.totalW, plotTree.yOff) 48 | plotMidText(cntrPt, parentPt, nodeTxt) 49 | plotNode(firstStr, cntrPt, parentPt, decisionNode) 50 | secondDict = myTree[firstStr] 51 | plotTree.yOff = plotTree.yOff - 1.0/plotTree.totalD 52 | for key in secondDict.keys(): 53 | if type(secondDict[key]).__name__=='dict':#test to see if the nodes are dictonaires, if not they are leaf nodes 54 | plotTree(secondDict[key],cntrPt,str(key)) #recursion 55 | else: #it's a leaf node print the leaf node 56 | plotTree.xOff = plotTree.xOff + 1.0/plotTree.totalW 57 | plotNode(secondDict[key], (plotTree.xOff, plotTree.yOff), cntrPt, leafNode) 58 | plotMidText((plotTree.xOff, plotTree.yOff), cntrPt, str(key)) 59 | plotTree.yOff = plotTree.yOff + 1.0/plotTree.totalD 60 | #if you do get a dictonary you know it's a tree, and the first element will be another dict 61 | 62 | def createPlot(inTree): 63 | fig = plt.figure(1, facecolor='white') 64 | fig.clf() 65 | axprops = dict(xticks=[], yticks=[]) 66 | createPlot.ax1 = plt.subplot(111, frameon=False, **axprops) #no ticks 67 | #createPlot.ax1 = plt.subplot(111, frameon=False) #ticks for demo puropses 68 | plotTree.totalW = float(getNumLeafs(inTree)) 69 | plotTree.totalD = float(getTreeDepth(inTree)) 70 | plotTree.xOff = -0.5/plotTree.totalW; plotTree.yOff = 1.0; 71 | plotTree(inTree, (0.5,1.0), '') 72 | plt.show() 73 | 74 | def createPlotOld(): 75 | fig = plt.figure(1, facecolor='white') 76 | fig.clf() 77 | createPlot.ax1 = plt.subplot(111, frameon=False) #ticks for demo puropses 78 | plotNode('a decision node', (0.5, 0.1), (0.1, 0.5), decisionNode) 79 | plotNode('a leaf node', (0.8, 0.1), (0.3, 0.8), leafNode) 80 | plt.show() 81 | 82 | def retrieveTree(i): 83 | listOfTrees =[{'no surfacing': {0: 'no', 1: {'flippers': {0: 'no', 1: 'yes'}}}}, 84 | {'no surfacing': {0: 'no', 1: {'flippers': {0: {'head': {0: 'no', 1: 'yes'}}, 1: 'no'}}}} 85 | ] 86 | return listOfTrees[i] 87 | 88 | 89 | 90 | # createPlot(thisTree) 91 | # createPlotOldR -------------------------------------------------------------------------------- /Kmeans/1.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yinchuandong/NLPinPython/b9baf77d848e8acbf8b5a6459947c853c1b4a7fc/Kmeans/1.jpg -------------------------------------------------------------------------------- /Kmeans/KmeansImg.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from PIL import Image 3 | from sklearn import metrics 4 | from sklearn.cluster import KMeans 5 | 6 | 7 | def main(): 8 | im = Image.open('1.jpg') 9 | mat = np.array(im) 10 | print np.shape(mat) 11 | 12 | return 13 | 14 | def testKMeans(): 15 | im = Image.open('1.jpg') 16 | data = im.getdata() 17 | km = KMeans() 18 | rt = km.fit(data) 19 | print rt.labels_[200:500] 20 | return 21 | 22 | def kmeans_in_matrix(): 23 | # 5 points, 2 feature 24 | X = [ 25 | [1, -1, 0, 0, 0], 26 | [0, 0, 2, 3, 4]] 27 | A = [ 28 | [1, 0], 29 | [1, 0], 30 | [0, 1], 31 | [0, 1], 32 | [0, 1] 33 | ] 34 | # A = [ 35 | # [1, 0, 0], 36 | # [1, 0, 0], 37 | # [0, 1, 0], 38 | # [0, 1, 0], 39 | # [0, 0, 1] 40 | # ] 41 | X = np.mat(X) 42 | A = np.mat(A) 43 | n1 = np.ones((5, 1)) 44 | n1 = np.mat(n1) 45 | t = (n1.T * A) 46 | t = np.diag(t.A1) 47 | t = np.mat(t) 48 | C = X * A * t.I 49 | print C 50 | D = X - C * A.T 51 | print D 52 | r = np.power(D, 2) 53 | print np.sum(r) 54 | 55 | return 56 | 57 | if __name__ == '__main__': 58 | # main() 59 | # testKMeans() -------------------------------------------------------------------------------- /Kmeans/test.py: -------------------------------------------------------------------------------- 1 | __author__ = 'wangjiewen' 2 | 3 | from numpy import * 4 | 5 | dataSet = mat([[0, 2], [0, 1], [1, 1], [2, 1], [3, 2]]) 6 | 7 | best = mat([[3, 4], [5, 6], [7, 8]]) 8 | 9 | dataSet[nonzero(dataSet[:, 1].A == 1)[0], :] = best 10 | 11 | print dataSet 12 | -------------------------------------------------------------------------------- /LogicRegression/EXTRAS/README.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yinchuandong/NLPinPython/b9baf77d848e8acbf8b5a6459947c853c1b4a7fc/LogicRegression/EXTRAS/README.txt -------------------------------------------------------------------------------- /LogicRegression/EXTRAS/plot2D.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Created on Oct 6, 2010 3 | 4 | @author: Peter 5 | ''' 6 | from numpy import * 7 | import matplotlib 8 | import matplotlib.pyplot as plt 9 | from matplotlib.patches import Rectangle 10 | import logRegres 11 | 12 | dataMat,labelMat=logRegres.loadDataSet() 13 | dataArr = array(dataMat) 14 | weights = logRegres.stocGradAscent0(dataArr,labelMat) 15 | 16 | n = shape(dataArr)[0] #number of points to create 17 | xcord1 = []; ycord1 = [] 18 | xcord2 = []; ycord2 = [] 19 | 20 | markers =[] 21 | colors =[] 22 | for i in range(n): 23 | if int(labelMat[i])== 1: 24 | xcord1.append(dataArr[i,1]); ycord1.append(dataArr[i,2]) 25 | else: 26 | xcord2.append(dataArr[i,1]); ycord2.append(dataArr[i,2]) 27 | 28 | fig = plt.figure() 29 | ax = fig.add_subplot(111) 30 | #ax.scatter(xcord,ycord, c=colors, s=markers) 31 | type1 = ax.scatter(xcord1, ycord1, s=30, c='red', marker='s') 32 | type2 = ax.scatter(xcord2, ycord2, s=30, c='green') 33 | x = arange(-3.0, 3.0, 0.1) 34 | #weights = [-2.9, 0.72, 1.29] 35 | #weights = [-5, 1.09, 1.42] 36 | weights = [13.03822793, 1.32877317, -1.96702074] 37 | weights = [4.12, 0.48, -0.6168] 38 | y = (-weights[0]-weights[1]*x)/weights[2] 39 | type3 = ax.plot(x, y) 40 | #ax.legend([type1, type2, type3], ["Did Not Like", "Liked in Small Doses", "Liked in Large Doses"], loc=2) 41 | #ax.axis([-5000,100000,-2,25]) 42 | plt.xlabel('X1') 43 | plt.ylabel('X2') 44 | plt.show() -------------------------------------------------------------------------------- /LogicRegression/EXTRAS/plotGD.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Created on Oct 28, 2010 3 | 4 | @author: Peter 5 | ''' 6 | import matplotlib 7 | import numpy as np 8 | import matplotlib.cm as cm 9 | import matplotlib.mlab as mlab 10 | import matplotlib.pyplot as plt 11 | 12 | leafNode = dict(boxstyle="round4", fc="0.8") 13 | arrow_args = dict(arrowstyle="<-") 14 | 15 | matplotlib.rcParams['xtick.direction'] = 'out' 16 | matplotlib.rcParams['ytick.direction'] = 'out' 17 | 18 | delta = 0.025 19 | x = np.arange(-2.0, 2.0, delta) 20 | y = np.arange(-2.0, 2.0, delta) 21 | X, Y = np.meshgrid(x, y) 22 | Z1 = -((X-1)**2) 23 | Z2 = -(Y**2) 24 | #Z1 = mlab.bivariate_normal(X, Y, 1.0, 1.0, 0.0, 0.0) 25 | #Z2 = mlab.bivariate_normal(X, Y, 1.5, 0.5, 1, 1) 26 | # difference of Gaussians 27 | Z = 1.0 * (Z2 + Z1)+5.0 28 | 29 | # Create a simple contour plot with labels using default colors. The 30 | # inline argument to clabel will control whether the labels are draw 31 | # over the line segments of the contour, removing the lines beneath 32 | # the label 33 | plt.figure() 34 | CS = plt.contour(X, Y, Z) 35 | plt.annotate('', xy=(0.05, 0.05), xycoords='axes fraction', 36 | xytext=(0.2,0.2), textcoords='axes fraction', 37 | va="center", ha="center", bbox=leafNode, arrowprops=arrow_args ) 38 | plt.text(-1.9, -1.8, 'P0') 39 | plt.annotate('', xy=(0.2,0.2), xycoords='axes fraction', 40 | xytext=(0.35,0.3), textcoords='axes fraction', 41 | va="center", ha="center", bbox=leafNode, arrowprops=arrow_args ) 42 | plt.text(-1.35, -1.23, 'P1') 43 | plt.annotate('', xy=(0.35,0.3), xycoords='axes fraction', 44 | xytext=(0.45,0.35), textcoords='axes fraction', 45 | va="center", ha="center", bbox=leafNode, arrowprops=arrow_args ) 46 | plt.text(-0.7, -0.8, 'P2') 47 | plt.text(-0.3, -0.6, 'P3') 48 | plt.clabel(CS, inline=1, fontsize=10) 49 | plt.title('Gradient Ascent') 50 | plt.xlabel('x') 51 | plt.ylabel('y') 52 | plt.show() 53 | -------------------------------------------------------------------------------- /LogicRegression/EXTRAS/plotSDerror.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Created on Oct 6, 2010 3 | 4 | @author: Peter 5 | ''' 6 | from numpy import * 7 | import matplotlib 8 | import matplotlib.pyplot as plt 9 | from matplotlib.patches import Rectangle 10 | import logRegres 11 | 12 | def stocGradAscent0(dataMatrix, classLabels): 13 | m,n = shape(dataMatrix) 14 | alpha = 0.5 15 | weights = ones(n) #initialize to all ones 16 | weightsHistory=zeros((500*m,n)) 17 | for j in range(500): 18 | for i in range(m): 19 | h = logRegres.sigmoid(sum(dataMatrix[i]*weights)) 20 | error = classLabels[i] - h 21 | weights = weights + alpha * error * dataMatrix[i] 22 | weightsHistory[j*m + i,:] = weights 23 | return weightsHistory 24 | 25 | def stocGradAscent1(dataMatrix, classLabels): 26 | m,n = shape(dataMatrix) 27 | alpha = 0.4 28 | weights = ones(n) #initialize to all ones 29 | weightsHistory=zeros((40*m,n)) 30 | for j in range(40): 31 | dataIndex = range(m) 32 | for i in range(m): 33 | alpha = 4/(1.0+j+i)+0.01 34 | randIndex = int(random.uniform(0,len(dataIndex))) 35 | h = logRegres.sigmoid(sum(dataMatrix[randIndex]*weights)) 36 | error = classLabels[randIndex] - h 37 | #print error 38 | weights = weights + alpha * error * dataMatrix[randIndex] 39 | weightsHistory[j*m + i,:] = weights 40 | del(dataIndex[randIndex]) 41 | print weights 42 | return weightsHistory 43 | 44 | 45 | dataMat,labelMat=logRegres.loadDataSet() 46 | dataArr = array(dataMat) 47 | myHist = stocGradAscent1(dataArr,labelMat) 48 | 49 | 50 | n = shape(dataArr)[0] #number of points to create 51 | xcord1 = []; ycord1 = [] 52 | xcord2 = []; ycord2 = [] 53 | 54 | markers =[] 55 | colors =[] 56 | 57 | 58 | fig = plt.figure() 59 | ax = fig.add_subplot(311) 60 | type1 = ax.plot(myHist[:,0]) 61 | plt.ylabel('X0') 62 | ax = fig.add_subplot(312) 63 | type1 = ax.plot(myHist[:,1]) 64 | plt.ylabel('X1') 65 | ax = fig.add_subplot(313) 66 | type1 = ax.plot(myHist[:,2]) 67 | plt.xlabel('iteration') 68 | plt.ylabel('X2') 69 | plt.show() -------------------------------------------------------------------------------- /LogicRegression/EXTRAS/sigmoidPlot.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Created on Oct 6, 2010 3 | 4 | @author: Peter 5 | ''' 6 | import sys 7 | from pylab import * 8 | 9 | t = arange(-60.0, 60.3, 0.1) 10 | s = 1/(1 + exp(-t)) 11 | ax = subplot(211) 12 | ax.plot(t,s) 13 | ax.axis([-5,5,0,1]) 14 | plt.xlabel('x') 15 | plt.ylabel('Sigmoid(x)') 16 | ax = subplot(212) 17 | ax.plot(t,s) 18 | ax.axis([-60,60,0,1]) 19 | plt.xlabel('x') 20 | plt.ylabel('Sigmoid(x)') 21 | show() -------------------------------------------------------------------------------- /LogicRegression/horseColicTest.txt: -------------------------------------------------------------------------------- 1 | 2 1 38.50 54 20 0 1 2 2 3 4 1 2 2 5.90 0 2 42.00 6.30 0 0 1 2 | 2 1 37.60 48 36 0 0 1 1 0 3 0 0 0 0 0 0 44.00 6.30 1 5.00 1 3 | 1 1 37.7 44 28 0 4 3 2 5 4 4 1 1 0 3 5 45 70 3 2 1 4 | 1 1 37 56 24 3 1 4 2 4 4 3 1 1 0 0 0 35 61 3 2 0 5 | 2 1 38.00 42 12 3 0 3 1 1 0 1 0 0 0 0 2 37.00 5.80 0 0 1 6 | 1 1 0 60 40 3 0 1 1 0 4 0 3 2 0 0 5 42 72 0 0 1 7 | 2 1 38.40 80 60 3 2 2 1 3 2 1 2 2 0 1 1 54.00 6.90 0 0 1 8 | 2 1 37.80 48 12 2 1 2 1 3 0 1 2 0 0 2 0 48.00 7.30 1 0 1 9 | 2 1 37.90 45 36 3 3 3 2 2 3 1 2 1 0 3 0 33.00 5.70 3 0 1 10 | 2 1 39.00 84 12 3 1 5 1 2 4 2 1 2 7.00 0 4 62.00 5.90 2 2.20 0 11 | 2 1 38.20 60 24 3 1 3 2 3 3 2 3 3 0 4 4 53.00 7.50 2 1.40 1 12 | 1 1 0 140 0 0 0 4 2 5 4 4 1 1 0 0 5 30 69 0 0 0 13 | 1 1 37.90 120 60 3 3 3 1 5 4 4 2 2 7.50 4 5 52.00 6.60 3 1.80 0 14 | 2 1 38.00 72 36 1 1 3 1 3 0 2 2 1 0 3 5 38.00 6.80 2 2.00 1 15 | 2 9 38.00 92 28 1 1 2 1 1 3 2 3 0 7.20 0 0 37.00 6.10 1 1.10 1 16 | 1 1 38.30 66 30 2 3 1 1 2 4 3 3 2 8.50 4 5 37.00 6.00 0 0 1 17 | 2 1 37.50 48 24 3 1 1 1 2 1 0 1 1 0 3 2 43.00 6.00 1 2.80 1 18 | 1 1 37.50 88 20 2 3 3 1 4 3 3 0 0 0 0 0 35.00 6.40 1 0 0 19 | 2 9 0 150 60 4 4 4 2 5 4 4 0 0 0 0 0 0 0 0 0 0 20 | 1 1 39.7 100 30 0 0 6 2 4 4 3 1 0 0 4 5 65 75 0 0 0 21 | 1 1 38.30 80 0 3 3 4 2 5 4 3 2 1 0 4 4 45.00 7.50 2 4.60 1 22 | 2 1 37.50 40 32 3 1 3 1 3 2 3 2 1 0 0 5 32.00 6.40 1 1.10 1 23 | 1 1 38.40 84 30 3 1 5 2 4 3 3 2 3 6.50 4 4 47.00 7.50 3 0 0 24 | 1 1 38.10 84 44 4 0 4 2 5 3 1 1 3 5.00 0 4 60.00 6.80 0 5.70 0 25 | 2 1 38.70 52 0 1 1 1 1 1 3 1 0 0 0 1 3 4.00 74.00 0 0 1 26 | 2 1 38.10 44 40 2 1 3 1 3 3 1 0 0 0 1 3 35.00 6.80 0 0 1 27 | 2 1 38.4 52 20 2 1 3 1 1 3 2 2 1 0 3 5 41 63 1 1 1 28 | 1 1 38.20 60 0 1 0 3 1 2 1 1 1 1 0 4 4 43.00 6.20 2 3.90 1 29 | 2 1 37.70 40 18 1 1 1 0 3 2 1 1 1 0 3 3 36.00 3.50 0 0 1 30 | 1 1 39.1 60 10 0 1 1 0 2 3 0 0 0 0 4 4 0 0 0 0 1 31 | 2 1 37.80 48 16 1 1 1 1 0 1 1 2 1 0 4 3 43.00 7.50 0 0 1 32 | 1 1 39.00 120 0 4 3 5 2 2 4 3 2 3 8.00 0 0 65.00 8.20 3 4.60 1 33 | 1 1 38.20 76 0 2 3 2 1 5 3 3 1 2 6.00 1 5 35.00 6.50 2 0.90 1 34 | 2 1 38.30 88 0 0 0 6 0 0 0 0 0 0 0 0 0 0 0 0 0 0 35 | 1 1 38.00 80 30 3 3 3 1 0 0 0 0 0 6.00 0 0 48.00 8.30 0 4.30 1 36 | 1 1 0 0 0 3 1 1 1 2 3 3 1 3 6.00 4 4 0 0 2 0 0 37 | 1 1 37.60 40 0 1 1 1 1 1 1 1 0 0 0 1 1 0 0 2 2.10 1 38 | 2 1 37.50 44 0 1 1 1 1 3 3 2 0 0 0 0 0 45.00 5.80 2 1.40 1 39 | 2 1 38.2 42 16 1 1 3 1 1 3 1 0 0 0 1 0 35 60 1 1 1 40 | 2 1 38 56 44 3 3 3 0 0 1 1 2 1 0 4 0 47 70 2 1 1 41 | 2 1 38.30 45 20 3 3 2 2 2 4 1 2 0 0 4 0 0 0 0 0 1 42 | 1 1 0 48 96 1 1 3 1 0 4 1 2 1 0 1 4 42.00 8.00 1 0 1 43 | 1 1 37.70 55 28 2 1 2 1 2 3 3 0 3 5.00 4 5 0 0 0 0 1 44 | 2 1 36.00 100 20 4 3 6 2 2 4 3 1 1 0 4 5 74.00 5.70 2 2.50 0 45 | 1 1 37.10 60 20 2 0 4 1 3 0 3 0 2 5.00 3 4 64.00 8.50 2 0 1 46 | 2 1 37.10 114 40 3 0 3 2 2 2 1 0 0 0 0 3 32.00 0 3 6.50 1 47 | 1 1 38.1 72 30 3 3 3 1 4 4 3 2 1 0 3 5 37 56 3 1 1 48 | 1 1 37.00 44 12 3 1 1 2 1 1 1 0 0 0 4 2 40.00 6.70 3 8.00 1 49 | 1 1 38.6 48 20 3 1 1 1 4 3 1 0 0 0 3 0 37 75 0 0 1 50 | 1 1 0 82 72 3 1 4 1 2 3 3 0 3 0 4 4 53 65 3 2 0 51 | 1 9 38.20 78 60 4 4 6 0 3 3 3 0 0 0 1 0 59.00 5.80 3 3.10 0 52 | 2 1 37.8 60 16 1 1 3 1 2 3 2 1 2 0 3 0 41 73 0 0 0 53 | 1 1 38.7 34 30 2 0 3 1 2 3 0 0 0 0 0 0 33 69 0 2 0 54 | 1 1 0 36 12 1 1 1 1 1 2 1 1 1 0 1 5 44.00 0 0 0 1 55 | 2 1 38.30 44 60 0 0 1 1 0 0 0 0 0 0 0 0 6.40 36.00 0 0 1 56 | 2 1 37.40 54 18 3 0 1 1 3 4 3 2 2 0 4 5 30.00 7.10 2 0 1 57 | 1 1 0 0 0 4 3 0 2 2 4 1 0 0 0 0 0 54 76 3 2 1 58 | 1 1 36.6 48 16 3 1 3 1 4 1 1 1 1 0 0 0 27 56 0 0 0 59 | 1 1 38.5 90 0 1 1 3 1 3 3 3 2 3 2 4 5 47 79 0 0 1 60 | 1 1 0 75 12 1 1 4 1 5 3 3 0 3 5.80 0 0 58.00 8.50 1 0 1 61 | 2 1 38.20 42 0 3 1 1 1 1 1 2 2 1 0 3 2 35.00 5.90 2 0 1 62 | 1 9 38.20 78 60 4 4 6 0 3 3 3 0 0 0 1 0 59.00 5.80 3 3.10 0 63 | 2 1 38.60 60 30 1 1 3 1 4 2 2 1 1 0 0 0 40.00 6.00 1 0 1 64 | 2 1 37.80 42 40 1 1 1 1 1 3 1 0 0 0 3 3 36.00 6.20 0 0 1 65 | 1 1 38 60 12 1 1 2 1 2 1 1 1 1 0 1 4 44 65 3 2 0 66 | 2 1 38.00 42 12 3 0 3 1 1 1 1 0 0 0 0 1 37.00 5.80 0 0 1 67 | 2 1 37.60 88 36 3 1 1 1 3 3 2 1 3 1.50 0 0 44.00 6.00 0 0 0 -------------------------------------------------------------------------------- /LogicRegression/logicRegression.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | 3 | 4 | import numpy as np 5 | 6 | 7 | def loadDataSet(): 8 | dataMat = [] 9 | labelMat = [] 10 | fr = open('testSet.txt') 11 | for line in fr.readlines(): 12 | lineArr = line.strip().split() 13 | dataMat.append([1.0, float(lineArr[0]), float(lineArr[1])]) 14 | labelMat.append(int(lineArr[2])) 15 | return dataMat, labelMat 16 | 17 | 18 | def sigmoid(inX): 19 | return 1.0 / (1.0 + np.exp(-inX)) 20 | 21 | 22 | def gradAscent(dataMatIn, labelMatIn): 23 | dataMat = np.mat(dataMatIn) 24 | labelMat = np.mat(labelMatIn).T 25 | m, n = np.shape(dataMat) 26 | alpha = 0.001 27 | maxCycles = 500 28 | weights = np.ones((n, 1)) 29 | for k in range(maxCycles): 30 | h = sigmoid(dataMat * weights) 31 | error = (labelMat - h) 32 | weights = weights + alpha * dataMat.T * error 33 | return weights 34 | 35 | 36 | def stocGradAscent(dataMatIn, labelMatIn, numIter=150): 37 | m, n = np.shape(dataMatIn) 38 | weights = np.ones(n) 39 | for j in range(numIter): 40 | dataIndex = range(m) 41 | for i in range(m): 42 | alpha = 4.0 / (1.0 + j + i) + 0.0001 43 | randIndex = int(np.random.uniform(0, len(dataIndex))) 44 | h = sigmoid(sum(dataMatIn[randIndex] * weights)) 45 | error = labelMatIn[randIndex] - h 46 | weights = weights + alpha * error * dataMatIn[randIndex] 47 | del(dataIndex[randIndex]) 48 | return weights 49 | 50 | 51 | def classifyVector(inX, weights): 52 | prob = sigmoid(sum(inX * weights)) 53 | if prob > 0.5: 54 | return 1.0 55 | else: 56 | return 0.0 57 | 58 | 59 | def horseColicTest(): 60 | frTrain = open('horseColicTraining.txt') 61 | frTest = open('horseColicTest.txt') 62 | trainSet = [] 63 | trainLabel = [] 64 | for line in frTrain.readlines(): 65 | curLine = line.strip().split('\t') 66 | lineArr = [] 67 | for i in range(21): 68 | lineArr.append(float(curLine[i])) 69 | trainSet.append(lineArr) 70 | trainLabel.append(float(curLine[21])) 71 | 72 | trainWeights = stocGradAscent(np.array(trainSet), trainLabel, 1000) 73 | errorCount = 0 74 | numTestVec = 0.0 75 | for line in frTest.readlines(): 76 | numTestVec += 1.0 77 | curLine = line.strip().split('\t') 78 | lineArr = [] 79 | for i in range(21): 80 | lineArr.append(float(curLine[i])) 81 | if int(classifyVector(np.array(lineArr), trainWeights)) != int(curLine[21]): 82 | errorCount += 1 83 | errorRate = float(errorCount) / numTestVec 84 | print 'error rate is: %f' % errorRate 85 | return errorRate 86 | 87 | 88 | def multiTest(): 89 | numTests = 10 90 | errorSum = 0.0 91 | for k in range(numTests): 92 | errorSum += horseColicTest() 93 | print 'after %d iterations, the average error rate is: %f' % (numTests, errorSum / float(numTests)) 94 | 95 | 96 | 97 | def plotBestFit(weights): 98 | import matplotlib.pyplot as plt 99 | dataMat, labelMat = loadDataSet() 100 | dataArr = np.array(dataMat) 101 | n = np.shape(dataArr)[0] 102 | xcord1 = [] 103 | ycord1 = [] 104 | xcord2 = [] 105 | ycord2 = [] 106 | for i in range(n): 107 | if int(labelMat[i]) == 1: 108 | xcord1.append(dataArr[i, 1]) 109 | ycord1.append(dataArr[i, 2]) 110 | else: 111 | xcord2.append(dataArr[i, 1]) 112 | ycord2.append(dataArr[i, 2]) 113 | fig = plt.figure() 114 | ax = fig.add_subplot(111) 115 | ax.scatter(xcord1, ycord1, s=30, c='red', marker='s') 116 | ax.scatter(xcord2, ycord2, s=30, c='green') 117 | x = np.arange(-3.0, 3.0, 0.1) 118 | y = (-weights[0] - weights[1] * x) / weights[2] 119 | ax.plot(x, y) 120 | plt.xlabel('X1') 121 | plt.ylabel('X2') 122 | plt.show() 123 | 124 | 125 | 126 | if __name__ == '__main__': 127 | dataMat, labelMat = loadDataSet() 128 | weights = gradAscent(dataMat, labelMat) 129 | plotBestFit(weights.getA()) 130 | weights = stocGradAscent(np.array(dataMat), labelMat, 200) 131 | weights = horseColicTest() 132 | plotBestFit(weights) 133 | print weights 134 | 135 | # multiTest() 136 | 137 | 138 | 139 | 140 | 141 | 142 | 143 | -------------------------------------------------------------------------------- /LogicRegression/testSet.txt: -------------------------------------------------------------------------------- 1 | -0.017612 14.053064 0 2 | -1.395634 4.662541 1 3 | -0.752157 6.538620 0 4 | -1.322371 7.152853 0 5 | 0.423363 11.054677 0 6 | 0.406704 7.067335 1 7 | 0.667394 12.741452 0 8 | -2.460150 6.866805 1 9 | 0.569411 9.548755 0 10 | -0.026632 10.427743 0 11 | 0.850433 6.920334 1 12 | 1.347183 13.175500 0 13 | 1.176813 3.167020 1 14 | -1.781871 9.097953 0 15 | -0.566606 5.749003 1 16 | 0.931635 1.589505 1 17 | -0.024205 6.151823 1 18 | -0.036453 2.690988 1 19 | -0.196949 0.444165 1 20 | 1.014459 5.754399 1 21 | 1.985298 3.230619 1 22 | -1.693453 -0.557540 1 23 | -0.576525 11.778922 0 24 | -0.346811 -1.678730 1 25 | -2.124484 2.672471 1 26 | 1.217916 9.597015 0 27 | -0.733928 9.098687 0 28 | -3.642001 -1.618087 1 29 | 0.315985 3.523953 1 30 | 1.416614 9.619232 0 31 | -0.386323 3.989286 1 32 | 0.556921 8.294984 1 33 | 1.224863 11.587360 0 34 | -1.347803 -2.406051 1 35 | 1.196604 4.951851 1 36 | 0.275221 9.543647 0 37 | 0.470575 9.332488 0 38 | -1.889567 9.542662 0 39 | -1.527893 12.150579 0 40 | -1.185247 11.309318 0 41 | -0.445678 3.297303 1 42 | 1.042222 6.105155 1 43 | -0.618787 10.320986 0 44 | 1.152083 0.548467 1 45 | 0.828534 2.676045 1 46 | -1.237728 10.549033 0 47 | -0.683565 -2.166125 1 48 | 0.229456 5.921938 1 49 | -0.959885 11.555336 0 50 | 0.492911 10.993324 0 51 | 0.184992 8.721488 0 52 | -0.355715 10.325976 0 53 | -0.397822 8.058397 0 54 | 0.824839 13.730343 0 55 | 1.507278 5.027866 1 56 | 0.099671 6.835839 1 57 | -0.344008 10.717485 0 58 | 1.785928 7.718645 1 59 | -0.918801 11.560217 0 60 | -0.364009 4.747300 1 61 | -0.841722 4.119083 1 62 | 0.490426 1.960539 1 63 | -0.007194 9.075792 0 64 | 0.356107 12.447863 0 65 | 0.342578 12.281162 0 66 | -0.810823 -1.466018 1 67 | 2.530777 6.476801 1 68 | 1.296683 11.607559 0 69 | 0.475487 12.040035 0 70 | -0.783277 11.009725 0 71 | 0.074798 11.023650 0 72 | -1.337472 0.468339 1 73 | -0.102781 13.763651 0 74 | -0.147324 2.874846 1 75 | 0.518389 9.887035 0 76 | 1.015399 7.571882 0 77 | -1.658086 -0.027255 1 78 | 1.319944 2.171228 1 79 | 2.056216 5.019981 1 80 | -0.851633 4.375691 1 81 | -1.510047 6.061992 0 82 | -1.076637 -3.181888 1 83 | 1.821096 10.283990 0 84 | 3.010150 8.401766 1 85 | -1.099458 1.688274 1 86 | -0.834872 -1.733869 1 87 | -0.846637 3.849075 1 88 | 1.400102 12.628781 0 89 | 1.752842 5.468166 1 90 | 0.078557 0.059736 1 91 | 0.089392 -0.715300 1 92 | 1.825662 12.693808 0 93 | 0.197445 9.744638 0 94 | 0.126117 0.922311 1 95 | -0.679797 1.220530 1 96 | 0.677983 2.556666 1 97 | 0.761349 10.693862 0 98 | -2.168791 0.143632 1 99 | 1.388610 9.341997 0 100 | 0.317029 14.739025 0 101 | -------------------------------------------------------------------------------- /MaxEntropy/MaxEntropy.py: -------------------------------------------------------------------------------- 1 | #encoding:utf-8 2 | __author__ = 'wangjiewen' 3 | 4 | from collections import defaultdict 5 | import math 6 | 7 | class MaxEntropy(object): 8 | 9 | def __init__(self): 10 | self.trainList = [] 11 | self.featureDict = defaultdict(int) 12 | self.indexDict = defaultdict(int) 13 | self.labels = set() 14 | 15 | self.ep = [] 16 | self.ep_ = [] 17 | self.lambdaNew = [] 18 | self.lambdaOld = [] 19 | self.epsilon = 0.01 20 | self.maxIterator = 1000 21 | self.N = 0 22 | self.C = 0 23 | 24 | 25 | def loadData(self, filepath): 26 | files = open(filepath) 27 | for line in files: 28 | fields = line.strip().split(' ') 29 | label = fields[0] 30 | self.labels.add(label) 31 | 32 | for feature in fields[1:]: 33 | self.featureDict[(label, feature)] += 1 34 | 35 | self.trainList.append(fields) 36 | 37 | def initParams(self): 38 | self.N = len(self.trainList) 39 | self.C = max([len(record) - 1 for record in self.trainList]) 40 | 41 | self.ep_ = [0.0] * len(self.featureDict) 42 | for i, feature in enumerate(self.featureDict): 43 | prob = float(self.featureDict[feature]) / float(self.N) 44 | self.ep_[i] = prob 45 | self.indexDict[feature] = i 46 | 47 | self.lambdaNew = [0.0] * len(self.featureDict) 48 | self.lambdaOld = self.lambdaNew 49 | 50 | 51 | def zFunc(self, features, label): 52 | """ 53 | 计算Z里面的 exp(∑ λi*fi(a,b)) 54 | 55 | :param features: string[] 特征的数组 56 | :param label: string 57 | :return: double 58 | """ 59 | weight = 0.0 60 | for f in features: 61 | if (label, f) in self.featureDict: 62 | index = self.indexDict[(label, f)] 63 | weight += self.lambdaNew[index] 64 | 65 | return math.exp(weight) 66 | 67 | 68 | def pFunc(self, features, label): 69 | Z = 0.0 70 | for l in self.labels: 71 | Z += self.zFunc(features, l) 72 | 73 | prob = (1.0 / Z) * self.zFunc(features,label) 74 | return prob 75 | 76 | def calcEp(self): 77 | ep = [0.0] * len(self.featureDict) 78 | 79 | for record in self.trainList: 80 | features = record[1:] 81 | for label in self.labels: 82 | prob = self.pFunc(features, label) 83 | 84 | # ∑ p(a) * p(b|a) * f(a,b), p(a) = 1/N 85 | for f in features: 86 | if (label, f) in self.featureDict: 87 | index = self.indexDict[(label, f)] 88 | ep[index] += (1.0 / self.N) * prob 89 | return ep 90 | 91 | def isConvergent(self, lambdaNew, lambdaOld): 92 | for l1, l2 in zip(lambdaNew, lambdaOld): 93 | if abs(l1 - l2) >= self.epsilon: 94 | return False 95 | return True 96 | 97 | def train(self): 98 | self.initParams() 99 | 100 | for k in range(0, self.maxIterator): 101 | self.ep = self.calcEp() 102 | self.lambdaOld = self.lambdaNew[:] 103 | 104 | for i, l1 in enumerate(self.lambdaNew): 105 | delta = 1.0 / self.C * math.log(self.ep_[i] / self.ep[i]) 106 | self.lambdaNew[i] += delta 107 | 108 | if self.isConvergent(self.lambdaNew, self.lambdaOld): 109 | break 110 | 111 | 112 | def predict(self, features): 113 | for label in self.labels: 114 | prob = self.pFunc(features, label) 115 | print (prob, label) 116 | 117 | 118 | model = MaxEntropy() 119 | model.loadData('train.txt') 120 | model.train() 121 | 122 | model.predict(['Sunny', 'Happy']) 123 | -------------------------------------------------------------------------------- /MaxEntropy/train.txt: -------------------------------------------------------------------------------- 1 | Outdoor Sunny Happy 2 | Outdoor Sunny Happy Dry 3 | Outdoor Sunny Happy Humid 4 | Outdoor Sunny Sad Dry 5 | Outdoor Sunny Sad Humid 6 | Outdoor Cloudy Happy Humid 7 | Outdoor Cloudy Happy Humid 8 | Outdoor Cloudy Sad Humid 9 | Outdoor Cloudy Sad Humid 10 | Indoor Rainy Happy Humid 11 | Indoor Rainy Happy Dry 12 | Indoor Rainy Sad Dry 13 | Indoor Rainy Sad Humid 14 | Indoor Cloudy Sad Humid 15 | Indoor Cloudy Sad Humid -------------------------------------------------------------------------------- /MaxEntropyWeb/MaxEnt.py: -------------------------------------------------------------------------------- 1 | __author__ = 'wangjiewen' 2 | 3 | from collections import defaultdict 4 | import math 5 | 6 | class MaxEnt(object): 7 | def __init__(self): 8 | # 9 | self.feats = defaultdict(int) 10 | self.trainset = [] 11 | self.labels = set() 12 | 13 | #load train file 14 | def load_data(self,file): 15 | for line in open(file): 16 | fields = line.strip().split() 17 | # at least two columns 18 | if len(fields) < 2: continue 19 | # the first column is label 20 | label = fields[0] 21 | self.labels.add(label) 22 | for f in set(fields[1:]): 23 | # (label,f) tuple is feature 24 | self.feats[(label,f)] += 1 25 | self.trainset.append(fields) 26 | 27 | def _initparams(self): 28 | self.size = len(self.trainset) 29 | # M param for GIS training algorithm 30 | self.M = max([len(record)-1 for record in self.trainset]) 31 | self.ep_ = [0.0]*len(self.feats) 32 | for i,f in enumerate(self.feats): 33 | # calculate feature expectation on empirical distribution 34 | self.ep_[i] = float(self.feats[f])/float(self.size) 35 | # each feature function correspond to id 36 | self.feats[f] = i 37 | # init weight for each feature 38 | self.w = [0.0]*len(self.feats) 39 | self.lastw = self.w 40 | 41 | def probwgt(self,features,label): 42 | wgt = 0.0 43 | for f in features: 44 | if (label,f) in self.feats: 45 | wgt += self.w[self.feats[(label,f)]] 46 | return math.exp(wgt) 47 | 48 | def calprob(self,features): 49 | wgts = [(self.probwgt(features, l),l) for l in self.labels] 50 | Z = sum([ w for w,l in wgts]) 51 | prob = [ (w/Z,l) for w,l in wgts] 52 | return prob 53 | 54 | """ 55 | calculate feature expectation on model distribution 56 | """ 57 | def Ep(self): 58 | ep = [0.0]*len(self.feats) 59 | for record in self.trainset: 60 | features = record[1:] 61 | # calculate p(y|x) 62 | prob = self.calprob(features) 63 | for f in features: 64 | for w,l in prob: 65 | # only focus on features from training data. 66 | if (l,f) in self.feats: 67 | # get feature id 68 | idx = self.feats[(l,f)] 69 | # sum(1/N * f(y,x)*p(y|x)), p(x) = 1/N 70 | ep[idx] += w * (1.0/self.size) 71 | return ep 72 | 73 | def _convergence(self,lastw,w): 74 | for w1,w2 in zip(lastw,w): 75 | if abs(w1-w2) >= 0.01: 76 | return False 77 | return True 78 | 79 | 80 | def train(self, max_iter =1000): 81 | self._initparams() 82 | for i in range(max_iter): 83 | print 'iter %d ...'%(i+1) 84 | # calculate feature expectation on model distribution 85 | self.ep = self.Ep() 86 | self.lastw = self.w[:] 87 | for i,w in enumerate(self.w): 88 | delta = 1.0/self.M * math.log(self.ep_[i]/self.ep[i]) 89 | # update w 90 | self.w[i] += delta 91 | print self.w 92 | # test if the algorithm is convergence 93 | if self._convergence(self.lastw,self.w): 94 | break 95 | 96 | 97 | def predict(self,input): 98 | features = input.strip().split() 99 | prob = self.calprob(features) 100 | prob.sort(reverse=True) 101 | return prob -------------------------------------------------------------------------------- /MaxEntropyWeb/MaxEnt.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yinchuandong/NLPinPython/b9baf77d848e8acbf8b5a6459947c853c1b4a7fc/MaxEntropyWeb/MaxEnt.pyc -------------------------------------------------------------------------------- /MaxEntropyWeb/MaxEntMain.py: -------------------------------------------------------------------------------- 1 | from MaxEntropyWeb import MaxEnt 2 | 3 | __author__ = 'wangjiewen' 4 | 5 | model = MaxEnt.MaxEnt() 6 | 7 | model.load_data('train.txt') 8 | model.train() 9 | 10 | print 11 | print '---------------------' 12 | 13 | probA = model.predict('Sunny') 14 | print probA 15 | 16 | probB = model.predict('Rainy') 17 | print probB -------------------------------------------------------------------------------- /MaxEntropyWeb/__init__.py: -------------------------------------------------------------------------------- 1 | __author__ = 'wangjiewen' 2 | -------------------------------------------------------------------------------- /MaxEntropyWeb/__init__.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yinchuandong/NLPinPython/b9baf77d848e8acbf8b5a6459947c853c1b4a7fc/MaxEntropyWeb/__init__.pyc -------------------------------------------------------------------------------- /MaxEntropyWeb/train.txt: -------------------------------------------------------------------------------- 1 | Outdoor Sunny Happy 2 | Outdoor Sunny Happy Dry 3 | Outdoor Sunny Happy Humid 4 | Outdoor Sunny Sad Dry 5 | Outdoor Sunny Sad Humid 6 | Outdoor Cloudy Happy Humid 7 | Outdoor Cloudy Happy Humid 8 | Outdoor Cloudy Sad Humid 9 | Outdoor Cloudy Sad Humid 10 | Indoor Rainy Happy Humid 11 | Indoor Rainy Happy Dry 12 | Indoor Rainy Sad Dry 13 | Indoor Rainy Sad Humid 14 | Indoor Cloudy Sad Humid 15 | Indoor Cloudy Sad Humid -------------------------------------------------------------------------------- /MutualInformation/MI.py: -------------------------------------------------------------------------------- 1 | """ 2 | Script to calculate Mutual Information between two discrete random variables 3 | 4 | Roberto maestre - rmaestre@gmail.com 5 | Bojan Mihaljevic - boki.mihaljevic@gmail.com 6 | """ 7 | from __future__ import division 8 | from numpy import array, shape, where, in1d 9 | import math 10 | import time 11 | import nose 12 | 13 | class InformationTheoryTool: 14 | 15 | def __init__(self, data): 16 | """ 17 | """ 18 | # Check if all rows have the same length 19 | assert (len(data.shape) == 2) 20 | # Save data 21 | self.data = data 22 | self.n_rows = data.shape[0] 23 | self.n_cols = data.shape[1] 24 | 25 | 26 | def single_entropy(self, x_index, log_base, debug = False): 27 | """ 28 | Calculate the entropy of a random variable 29 | """ 30 | # Check if index are into the bounds 31 | assert (x_index >= 0 and x_index <= self.n_rows) 32 | # Variable to return entropy 33 | summation = 0.0 34 | # Get uniques values of random variables 35 | values_x = set(data[x_index]) 36 | # Print debug info 37 | if debug: 38 | print 'Entropy of' 39 | print data[x_index] 40 | # For each random 41 | for value_x in values_x: 42 | px = shape(where(data[x_index]==value_x))[1] / self.n_cols 43 | if px > 0.0: 44 | summation += px * math.log(px, log_base) 45 | if debug: 46 | print '(%d) px:%f' % (value_x, px) 47 | if summation == 0.0: 48 | return summation 49 | else: 50 | return - summation 51 | 52 | 53 | def entropy(self, x_index, y_index, log_base, debug = False): 54 | """ 55 | Calculate the entropy between two random variable 56 | """ 57 | assert (x_index >= 0 and x_index <= self.n_rows) 58 | assert (y_index >= 0 and y_index <= self.n_rows) 59 | # Variable to return MI 60 | summation = 0.0 61 | # Get uniques values of random variables 62 | values_x = set(data[x_index]) 63 | values_y = set(data[y_index]) 64 | # Print debug info 65 | if debug: 66 | print 'Entropy between' 67 | print data[x_index] 68 | print data[y_index] 69 | # For each random 70 | for value_x in values_x: 71 | for value_y in values_y: 72 | pxy = len(where(in1d(where(data[x_index]==value_x)[0], 73 | where(data[y_index]==value_y)[0])==True)[0]) / self.n_cols 74 | if pxy > 0.0: 75 | summation += pxy * math.log(pxy, log_base) 76 | if debug: 77 | print '(%d,%d) pxy:%f' % (value_x, value_y, pxy) 78 | if summation == 0.0: 79 | return summation 80 | else: 81 | return - summation 82 | 83 | 84 | 85 | def mutual_information(self, x_index, y_index, log_base, debug = False): 86 | """ 87 | Calculate and return Mutual information between two random variables 88 | """ 89 | # Check if index are into the bounds 90 | assert (x_index >= 0 and x_index <= self.n_rows) 91 | assert (y_index >= 0 and y_index <= self.n_rows) 92 | # Variable to return MI 93 | summation = 0.0 94 | # Get uniques values of random variables 95 | values_x = set(data[x_index]) 96 | values_y = set(data[y_index]) 97 | # Print debug info 98 | if debug: 99 | print 'MI between' 100 | print data[x_index] 101 | print data[y_index] 102 | # For each random 103 | for value_x in values_x: 104 | for value_y in values_y: 105 | if value_y != 3: continue 106 | px = shape(where(data[x_index]==value_x))[1] / self.n_cols 107 | py = shape(where(data[y_index]==value_y))[1] / self.n_cols 108 | pxy = len(where(in1d(where(data[x_index]==value_x)[0], 109 | where(data[y_index]==value_y)[0])==True)[0]) / self.n_cols 110 | print where(in1d(where(data[x_index]==value_x)[0], 111 | where(data[y_index]==value_y)[0])) 112 | if pxy > 0.0: 113 | summation += pxy * math.log((pxy / (px*py)), log_base) 114 | if debug: 115 | print '(%d,%d) px:%f py:%f pxy:%f' % (value_x, value_y, px, py, pxy) 116 | import sys 117 | sys.exit() 118 | return summation 119 | 120 | 121 | 122 | # Define data array 123 | data = array( [ (0, 0, 1, 1, 0, 1, 1, 2, 2, 2), 124 | (3, 4, 5, 5, 3, 2, 2, 6, 6, 1), 125 | (7, 2, 1, 3, 2, 8, 9, 1, 2, 0), 126 | (7, 7, 7, 7, 7, 7, 7, 7, 7, 7), 127 | (0, 1, 2, 3, 4, 5, 6, 7, 1, 1)]) 128 | # Create object 129 | it_tool = InformationTheoryTool(data) 130 | 131 | 132 | # --- Checking single random var entropy 133 | 134 | # entropy of X_1 (3, 4, 5, 5, 3, 2, 2, 6, 6, 1) 135 | # t_start = time.time() 136 | # print 'Entropy(X_1): %f' % it_tool.single_entropy(1, 10, False) 137 | # print 'Elapsed time: %f\n' % (time.time() - t_start) 138 | 139 | # # entropy of X_3 (7, 7, 7, 7, 7, 7, 7, 7, 7, 7) 140 | # t_start = time.time() 141 | # print 'Entropy(X_3): %f' % it_tool.single_entropy(3, 10) 142 | # print 'Elapsed time: %f\n' % (time.time() - t_start) 143 | 144 | # # entropy of X_4 (0, 1, 2, 3, 4, 5, 6, 7, 8, 9) 145 | # t_start = time.time() 146 | # print 'Entropy(X_4): %f' % it_tool.single_entropy(4, 10) 147 | # print 'Elapsed time: %f\n' % (time.time() - t_start) 148 | 149 | 150 | 151 | # # --- Checking entropy between two random variables 152 | 153 | # # entropy of X_0 (0, 0, 1, 1, 0, 1, 1, 2, 2, 2) and X_1 (3, 4, 5, 5, 3, 2, 2, 6, 6, 1) 154 | # t_start = time.time() 155 | # print 'Entropy(X_0, X_1): %f' % it_tool.entropy(0, 1, 10) 156 | # print 'Elapsed time: %f\n' % (time.time() - t_start) 157 | 158 | # # entropy of X_3 (7, 7, 7, 7, 7, 7, 7, 7, 7, 7) and X_3 (7, 7, 7, 7, 7, 7, 7, 7, 7, 7) 159 | # t_start = time.time() 160 | # print 'Entropy(X_3, X_3): %f' % it_tool.entropy(3, 3, 10) 161 | # print 'Elapsed time: %f\n' % (time.time() - t_start) 162 | 163 | 164 | 165 | # ---Checking Mutual Information between two random variables 166 | 167 | # Print mutual information between X_0 (0,0,1,1,0,1,1,2,2,2) and X_1 (3,4,5,5,3,2,2,6,6,1) 168 | t_start = time.time() 169 | print 'MI(X_0, X_1): %f' % it_tool.mutual_information(0, 1, 10, True) 170 | print 'Elapsed time: %f\n' % (time.time() - t_start) 171 | 172 | # Print mutual information between X_1 (3,4,5,5,3,2,2,6,6,1) and X_2 (7,2,1,3,2,8,9,1,2,0) 173 | # t_start = time.time() 174 | # print 'MI(X_1, X_2): %f' % it_tool.mutual_information(1, 2, 10) 175 | # print 'Elapsed time: %f\n' % (time.time() - t_start) 176 | 177 | 178 | 179 | # --- Checking results 180 | 181 | # Checking entropy results 182 | for i in range(0,data.shape[0]): 183 | assert(it_tool.entropy(i, i, 10) == it_tool.single_entropy(i, 10)) 184 | 185 | # Checking mutual information results 186 | # MI(X,Y) = H(X) + H(Y) - H(X,Y) 187 | n_rows = data.shape[0] 188 | i = 0 189 | while i < n_rows: 190 | j = i + 1 191 | while j < n_rows: 192 | if j != i: 193 | nose.tools.assert_almost_equal(it_tool.mutual_information(i, j, 10), 194 | it_tool.single_entropy(i, 10)+it_tool.single_entropy(j, 10)-it_tool.entropy(i, j, 10)) 195 | j += 1 196 | i += 1 197 | 198 | 199 | 200 | 201 | 202 | -------------------------------------------------------------------------------- /NoneParamDP/CRP.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import random 3 | 4 | def Chinese_Restaurant_Process(num_customer, alpha): 5 | if num_customer == 0: 6 | return [] 7 | table_assignments = [1] 8 | next_open_table = 2 9 | for i in range(1, num_customer - 1): 10 | prob = float(alpha) / (alpha + i) 11 | rand = random.uniform(0, 1) 12 | if rand < prob: 13 | table_assignments.append(next_open_table) 14 | next_open_table = next_open_table + 1 15 | else: 16 | randId = int(random.uniform(0, len(table_assignments))) 17 | which_table = table_assignments[randId] 18 | table_assignments.append(which_table) 19 | return table_assignments 20 | 21 | if __name__ == '__main__': 22 | print Chinese_Restaurant_Process(10, 3) 23 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | NLPinPython 2 | =========== 3 | 4 | 自然语言处理的一些算法,用Python实现 5 | -------------------------------------------------------------------------------- /Regression/ex0.txt: -------------------------------------------------------------------------------- 1 | 1.000000 0.067732 3.176513 2 | 1.000000 0.427810 3.816464 3 | 1.000000 0.995731 4.550095 4 | 1.000000 0.738336 4.256571 5 | 1.000000 0.981083 4.560815 6 | 1.000000 0.526171 3.929515 7 | 1.000000 0.378887 3.526170 8 | 1.000000 0.033859 3.156393 9 | 1.000000 0.132791 3.110301 10 | 1.000000 0.138306 3.149813 11 | 1.000000 0.247809 3.476346 12 | 1.000000 0.648270 4.119688 13 | 1.000000 0.731209 4.282233 14 | 1.000000 0.236833 3.486582 15 | 1.000000 0.969788 4.655492 16 | 1.000000 0.607492 3.965162 17 | 1.000000 0.358622 3.514900 18 | 1.000000 0.147846 3.125947 19 | 1.000000 0.637820 4.094115 20 | 1.000000 0.230372 3.476039 21 | 1.000000 0.070237 3.210610 22 | 1.000000 0.067154 3.190612 23 | 1.000000 0.925577 4.631504 24 | 1.000000 0.717733 4.295890 25 | 1.000000 0.015371 3.085028 26 | 1.000000 0.335070 3.448080 27 | 1.000000 0.040486 3.167440 28 | 1.000000 0.212575 3.364266 29 | 1.000000 0.617218 3.993482 30 | 1.000000 0.541196 3.891471 31 | 1.000000 0.045353 3.143259 32 | 1.000000 0.126762 3.114204 33 | 1.000000 0.556486 3.851484 34 | 1.000000 0.901144 4.621899 35 | 1.000000 0.958476 4.580768 36 | 1.000000 0.274561 3.620992 37 | 1.000000 0.394396 3.580501 38 | 1.000000 0.872480 4.618706 39 | 1.000000 0.409932 3.676867 40 | 1.000000 0.908969 4.641845 41 | 1.000000 0.166819 3.175939 42 | 1.000000 0.665016 4.264980 43 | 1.000000 0.263727 3.558448 44 | 1.000000 0.231214 3.436632 45 | 1.000000 0.552928 3.831052 46 | 1.000000 0.047744 3.182853 47 | 1.000000 0.365746 3.498906 48 | 1.000000 0.495002 3.946833 49 | 1.000000 0.493466 3.900583 50 | 1.000000 0.792101 4.238522 51 | 1.000000 0.769660 4.233080 52 | 1.000000 0.251821 3.521557 53 | 1.000000 0.181951 3.203344 54 | 1.000000 0.808177 4.278105 55 | 1.000000 0.334116 3.555705 56 | 1.000000 0.338630 3.502661 57 | 1.000000 0.452584 3.859776 58 | 1.000000 0.694770 4.275956 59 | 1.000000 0.590902 3.916191 60 | 1.000000 0.307928 3.587961 61 | 1.000000 0.148364 3.183004 62 | 1.000000 0.702180 4.225236 63 | 1.000000 0.721544 4.231083 64 | 1.000000 0.666886 4.240544 65 | 1.000000 0.124931 3.222372 66 | 1.000000 0.618286 4.021445 67 | 1.000000 0.381086 3.567479 68 | 1.000000 0.385643 3.562580 69 | 1.000000 0.777175 4.262059 70 | 1.000000 0.116089 3.208813 71 | 1.000000 0.115487 3.169825 72 | 1.000000 0.663510 4.193949 73 | 1.000000 0.254884 3.491678 74 | 1.000000 0.993888 4.533306 75 | 1.000000 0.295434 3.550108 76 | 1.000000 0.952523 4.636427 77 | 1.000000 0.307047 3.557078 78 | 1.000000 0.277261 3.552874 79 | 1.000000 0.279101 3.494159 80 | 1.000000 0.175724 3.206828 81 | 1.000000 0.156383 3.195266 82 | 1.000000 0.733165 4.221292 83 | 1.000000 0.848142 4.413372 84 | 1.000000 0.771184 4.184347 85 | 1.000000 0.429492 3.742878 86 | 1.000000 0.162176 3.201878 87 | 1.000000 0.917064 4.648964 88 | 1.000000 0.315044 3.510117 89 | 1.000000 0.201473 3.274434 90 | 1.000000 0.297038 3.579622 91 | 1.000000 0.336647 3.489244 92 | 1.000000 0.666109 4.237386 93 | 1.000000 0.583888 3.913749 94 | 1.000000 0.085031 3.228990 95 | 1.000000 0.687006 4.286286 96 | 1.000000 0.949655 4.628614 97 | 1.000000 0.189912 3.239536 98 | 1.000000 0.844027 4.457997 99 | 1.000000 0.333288 3.513384 100 | 1.000000 0.427035 3.729674 101 | 1.000000 0.466369 3.834274 102 | 1.000000 0.550659 3.811155 103 | 1.000000 0.278213 3.598316 104 | 1.000000 0.918769 4.692514 105 | 1.000000 0.886555 4.604859 106 | 1.000000 0.569488 3.864912 107 | 1.000000 0.066379 3.184236 108 | 1.000000 0.335751 3.500796 109 | 1.000000 0.426863 3.743365 110 | 1.000000 0.395746 3.622905 111 | 1.000000 0.694221 4.310796 112 | 1.000000 0.272760 3.583357 113 | 1.000000 0.503495 3.901852 114 | 1.000000 0.067119 3.233521 115 | 1.000000 0.038326 3.105266 116 | 1.000000 0.599122 3.865544 117 | 1.000000 0.947054 4.628625 118 | 1.000000 0.671279 4.231213 119 | 1.000000 0.434811 3.791149 120 | 1.000000 0.509381 3.968271 121 | 1.000000 0.749442 4.253910 122 | 1.000000 0.058014 3.194710 123 | 1.000000 0.482978 3.996503 124 | 1.000000 0.466776 3.904358 125 | 1.000000 0.357767 3.503976 126 | 1.000000 0.949123 4.557545 127 | 1.000000 0.417320 3.699876 128 | 1.000000 0.920461 4.613614 129 | 1.000000 0.156433 3.140401 130 | 1.000000 0.656662 4.206717 131 | 1.000000 0.616418 3.969524 132 | 1.000000 0.853428 4.476096 133 | 1.000000 0.133295 3.136528 134 | 1.000000 0.693007 4.279071 135 | 1.000000 0.178449 3.200603 136 | 1.000000 0.199526 3.299012 137 | 1.000000 0.073224 3.209873 138 | 1.000000 0.286515 3.632942 139 | 1.000000 0.182026 3.248361 140 | 1.000000 0.621523 3.995783 141 | 1.000000 0.344584 3.563262 142 | 1.000000 0.398556 3.649712 143 | 1.000000 0.480369 3.951845 144 | 1.000000 0.153350 3.145031 145 | 1.000000 0.171846 3.181577 146 | 1.000000 0.867082 4.637087 147 | 1.000000 0.223855 3.404964 148 | 1.000000 0.528301 3.873188 149 | 1.000000 0.890192 4.633648 150 | 1.000000 0.106352 3.154768 151 | 1.000000 0.917886 4.623637 152 | 1.000000 0.014855 3.078132 153 | 1.000000 0.567682 3.913596 154 | 1.000000 0.068854 3.221817 155 | 1.000000 0.603535 3.938071 156 | 1.000000 0.532050 3.880822 157 | 1.000000 0.651362 4.176436 158 | 1.000000 0.901225 4.648161 159 | 1.000000 0.204337 3.332312 160 | 1.000000 0.696081 4.240614 161 | 1.000000 0.963924 4.532224 162 | 1.000000 0.981390 4.557105 163 | 1.000000 0.987911 4.610072 164 | 1.000000 0.990947 4.636569 165 | 1.000000 0.736021 4.229813 166 | 1.000000 0.253574 3.500860 167 | 1.000000 0.674722 4.245514 168 | 1.000000 0.939368 4.605182 169 | 1.000000 0.235419 3.454340 170 | 1.000000 0.110521 3.180775 171 | 1.000000 0.218023 3.380820 172 | 1.000000 0.869778 4.565020 173 | 1.000000 0.196830 3.279973 174 | 1.000000 0.958178 4.554241 175 | 1.000000 0.972673 4.633520 176 | 1.000000 0.745797 4.281037 177 | 1.000000 0.445674 3.844426 178 | 1.000000 0.470557 3.891601 179 | 1.000000 0.549236 3.849728 180 | 1.000000 0.335691 3.492215 181 | 1.000000 0.884739 4.592374 182 | 1.000000 0.918916 4.632025 183 | 1.000000 0.441815 3.756750 184 | 1.000000 0.116598 3.133555 185 | 1.000000 0.359274 3.567919 186 | 1.000000 0.814811 4.363382 187 | 1.000000 0.387125 3.560165 188 | 1.000000 0.982243 4.564305 189 | 1.000000 0.780880 4.215055 190 | 1.000000 0.652565 4.174999 191 | 1.000000 0.870030 4.586640 192 | 1.000000 0.604755 3.960008 193 | 1.000000 0.255212 3.529963 194 | 1.000000 0.730546 4.213412 195 | 1.000000 0.493829 3.908685 196 | 1.000000 0.257017 3.585821 197 | 1.000000 0.833735 4.374394 198 | 1.000000 0.070095 3.213817 199 | 1.000000 0.527070 3.952681 200 | 1.000000 0.116163 3.129283 201 | -------------------------------------------------------------------------------- /Regression/ex1.txt: -------------------------------------------------------------------------------- 1 | 1.000000 0.635975 4.093119 2 | 1.000000 0.552438 3.804358 3 | 1.000000 0.855922 4.456531 4 | 1.000000 0.083386 3.187049 5 | 1.000000 0.975802 4.506176 6 | 1.000000 0.181269 3.171914 7 | 1.000000 0.129156 3.053996 8 | 1.000000 0.605648 3.974659 9 | 1.000000 0.301625 3.542525 10 | 1.000000 0.698805 4.234199 11 | 1.000000 0.226419 3.405937 12 | 1.000000 0.519290 3.932469 13 | 1.000000 0.354424 3.514051 14 | 1.000000 0.118380 3.105317 15 | 1.000000 0.512811 3.843351 16 | 1.000000 0.236795 3.576074 17 | 1.000000 0.353509 3.544471 18 | 1.000000 0.481447 3.934625 19 | 1.000000 0.060509 3.228226 20 | 1.000000 0.174090 3.300232 21 | 1.000000 0.806818 4.331785 22 | 1.000000 0.531462 3.908166 23 | 1.000000 0.853167 4.386918 24 | 1.000000 0.304804 3.617260 25 | 1.000000 0.612021 4.082411 26 | 1.000000 0.620880 3.949470 27 | 1.000000 0.580245 3.984041 28 | 1.000000 0.742443 4.251907 29 | 1.000000 0.110770 3.115214 30 | 1.000000 0.742687 4.234319 31 | 1.000000 0.574390 3.947544 32 | 1.000000 0.986378 4.532519 33 | 1.000000 0.294867 3.510392 34 | 1.000000 0.472125 3.927832 35 | 1.000000 0.872321 4.631825 36 | 1.000000 0.843537 4.482263 37 | 1.000000 0.864577 4.487656 38 | 1.000000 0.341874 3.486371 39 | 1.000000 0.097980 3.137514 40 | 1.000000 0.757874 4.212660 41 | 1.000000 0.877656 4.506268 42 | 1.000000 0.457993 3.800973 43 | 1.000000 0.475341 3.975979 44 | 1.000000 0.848391 4.494447 45 | 1.000000 0.746059 4.244715 46 | 1.000000 0.153462 3.019251 47 | 1.000000 0.694256 4.277945 48 | 1.000000 0.498712 3.812414 49 | 1.000000 0.023580 3.116973 50 | 1.000000 0.976826 4.617363 51 | 1.000000 0.624004 4.005158 52 | 1.000000 0.472220 3.874188 53 | 1.000000 0.390551 3.630228 54 | 1.000000 0.021349 3.145849 55 | 1.000000 0.173488 3.192618 56 | 1.000000 0.971028 4.540226 57 | 1.000000 0.595302 3.835879 58 | 1.000000 0.097638 3.141948 59 | 1.000000 0.745972 4.323316 60 | 1.000000 0.676390 4.204829 61 | 1.000000 0.488949 3.946710 62 | 1.000000 0.982873 4.666332 63 | 1.000000 0.296060 3.482348 64 | 1.000000 0.228008 3.451286 65 | 1.000000 0.671059 4.186388 66 | 1.000000 0.379419 3.595223 67 | 1.000000 0.285170 3.534446 68 | 1.000000 0.236314 3.420891 69 | 1.000000 0.629803 4.115553 70 | 1.000000 0.770272 4.257463 71 | 1.000000 0.493052 3.934798 72 | 1.000000 0.631592 4.154963 73 | 1.000000 0.965676 4.587470 74 | 1.000000 0.598675 3.944766 75 | 1.000000 0.351997 3.480517 76 | 1.000000 0.342001 3.481382 77 | 1.000000 0.661424 4.253286 78 | 1.000000 0.140912 3.131670 79 | 1.000000 0.373574 3.527099 80 | 1.000000 0.223166 3.378051 81 | 1.000000 0.908785 4.578960 82 | 1.000000 0.915102 4.551773 83 | 1.000000 0.410940 3.634259 84 | 1.000000 0.754921 4.167016 85 | 1.000000 0.764453 4.217570 86 | 1.000000 0.101534 3.237201 87 | 1.000000 0.780368 4.353163 88 | 1.000000 0.819868 4.342184 89 | 1.000000 0.173990 3.236950 90 | 1.000000 0.330472 3.509404 91 | 1.000000 0.162656 3.242535 92 | 1.000000 0.476283 3.907937 93 | 1.000000 0.636391 4.108455 94 | 1.000000 0.758737 4.181959 95 | 1.000000 0.778372 4.251103 96 | 1.000000 0.936287 4.538462 97 | 1.000000 0.510904 3.848193 98 | 1.000000 0.515737 3.974757 99 | 1.000000 0.437823 3.708323 100 | 1.000000 0.828607 4.385210 101 | 1.000000 0.556100 3.927788 102 | 1.000000 0.038209 3.187881 103 | 1.000000 0.321993 3.444542 104 | 1.000000 0.067288 3.199263 105 | 1.000000 0.774989 4.285745 106 | 1.000000 0.566077 3.878557 107 | 1.000000 0.796314 4.155745 108 | 1.000000 0.746600 4.197772 109 | 1.000000 0.360778 3.524928 110 | 1.000000 0.397321 3.525692 111 | 1.000000 0.062142 3.211318 112 | 1.000000 0.379250 3.570495 113 | 1.000000 0.248238 3.462431 114 | 1.000000 0.682561 4.206177 115 | 1.000000 0.355393 3.562322 116 | 1.000000 0.889051 4.595215 117 | 1.000000 0.733806 4.182694 118 | 1.000000 0.153949 3.320695 119 | 1.000000 0.036104 3.122670 120 | 1.000000 0.388577 3.541312 121 | 1.000000 0.274481 3.502135 122 | 1.000000 0.319401 3.537559 123 | 1.000000 0.431653 3.712609 124 | 1.000000 0.960398 4.504875 125 | 1.000000 0.083660 3.262164 126 | 1.000000 0.122098 3.105583 127 | 1.000000 0.415299 3.742634 128 | 1.000000 0.854192 4.566589 129 | 1.000000 0.925574 4.630884 130 | 1.000000 0.109306 3.190539 131 | 1.000000 0.805161 4.289105 132 | 1.000000 0.344474 3.406602 133 | 1.000000 0.769116 4.251899 134 | 1.000000 0.182003 3.183214 135 | 1.000000 0.225972 3.342508 136 | 1.000000 0.413088 3.747926 137 | 1.000000 0.964444 4.499998 138 | 1.000000 0.203334 3.350089 139 | 1.000000 0.285574 3.539554 140 | 1.000000 0.850209 4.443465 141 | 1.000000 0.061561 3.290370 142 | 1.000000 0.426935 3.733302 143 | 1.000000 0.389376 3.614803 144 | 1.000000 0.096918 3.175132 145 | 1.000000 0.148938 3.164284 146 | 1.000000 0.893738 4.619629 147 | 1.000000 0.195527 3.426648 148 | 1.000000 0.407248 3.670722 149 | 1.000000 0.224357 3.412571 150 | 1.000000 0.045963 3.110330 151 | 1.000000 0.944647 4.647928 152 | 1.000000 0.756552 4.164515 153 | 1.000000 0.432098 3.730603 154 | 1.000000 0.990511 4.609868 155 | 1.000000 0.649699 4.094111 156 | 1.000000 0.584879 3.907636 157 | 1.000000 0.785934 4.240814 158 | 1.000000 0.029945 3.106915 159 | 1.000000 0.075747 3.201181 160 | 1.000000 0.408408 3.872302 161 | 1.000000 0.583851 3.860890 162 | 1.000000 0.497759 3.884108 163 | 1.000000 0.421301 3.696816 164 | 1.000000 0.140320 3.114540 165 | 1.000000 0.546465 3.791233 166 | 1.000000 0.843181 4.443487 167 | 1.000000 0.295390 3.535337 168 | 1.000000 0.825059 4.417975 169 | 1.000000 0.946343 4.742471 170 | 1.000000 0.350404 3.470964 171 | 1.000000 0.042787 3.113381 172 | 1.000000 0.352487 3.594600 173 | 1.000000 0.590736 3.914875 174 | 1.000000 0.120748 3.108492 175 | 1.000000 0.143140 3.152725 176 | 1.000000 0.511926 3.994118 177 | 1.000000 0.496358 3.933417 178 | 1.000000 0.382802 3.510829 179 | 1.000000 0.252464 3.498402 180 | 1.000000 0.845894 4.460441 181 | 1.000000 0.132023 3.245277 182 | 1.000000 0.442301 3.771067 183 | 1.000000 0.266889 3.434771 184 | 1.000000 0.008575 2.999612 185 | 1.000000 0.897632 4.454221 186 | 1.000000 0.533171 3.985348 187 | 1.000000 0.285243 3.557982 188 | 1.000000 0.377258 3.625972 189 | 1.000000 0.486995 3.922226 190 | 1.000000 0.305993 3.547421 191 | 1.000000 0.277528 3.580944 192 | 1.000000 0.750899 4.268081 193 | 1.000000 0.694756 4.278096 194 | 1.000000 0.870158 4.517640 195 | 1.000000 0.276457 3.555461 196 | 1.000000 0.017761 3.055026 197 | 1.000000 0.802046 4.354819 198 | 1.000000 0.559275 3.894387 199 | 1.000000 0.941305 4.597773 200 | 1.000000 0.856877 4.523616 201 | -------------------------------------------------------------------------------- /Regression/regression.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | 4 | def loadData(filename): 5 | dataArr = [] 6 | labelArr = [] 7 | fr = open(filename) 8 | for line in fr.readlines(): 9 | lineArr = line.strip().split('\t') 10 | row = [] 11 | for i in range(len(lineArr) - 1): 12 | row.append(float(lineArr[i])) 13 | dataArr.append(row) 14 | labelArr.append(float(lineArr[-1])) 15 | return np.mat(dataArr), np.mat(labelArr).T 16 | 17 | 18 | def standardRegression(dataMat, labelMat): 19 | xTx = dataMat.T * dataMat 20 | if np.linalg.det(xTx) == 0: 21 | raise NameError('x is a singular matrix') 22 | theta = xTx.I * dataMat.T * labelMat 23 | return theta 24 | 25 | 26 | def gradDescent(dataMat, labelMat, numIter=1000): 27 | alpha = 0.001 28 | m, n = np.shape(dataMat) 29 | theta = np.ones((n, 1)) # shape: n x 1 30 | for j in range(numIter): 31 | h = dataMat * theta 32 | error = h - labelMat # shape: m x 1 33 | # shape: (n x m) * (m x 1) 34 | theta = theta - alpha * dataMat.T * error 35 | return theta 36 | 37 | 38 | def stocGradDescent(dataMat, labelMat, numIter=1000): 39 | m, n = np.shape(dataMat) 40 | theta = np.ones((n, 1)) # shape: n x 1 41 | for j in range(numIter): 42 | dataIndex = range(m) 43 | for i in range(m): 44 | alpha = 4 / (1.0 + i + j) + 0.0001 45 | randId = int(np.random.uniform(0, len(dataIndex))) 46 | h = dataMat[randId] * theta # shape:(1 x n) * (n * 1) 47 | # shape: 1 x 1 48 | error = h - labelMat[randId] 49 | # shape: (n x 1) = (n x 1) * (1 x 1) 50 | theta = theta - alpha * dataMat[randId].T * error 51 | del(dataIndex[randId]) 52 | return theta 53 | 54 | 55 | def testForSimppleRegression(): 56 | dataMat, labelMat = loadData('ex0.txt') 57 | 58 | theta = standardRegression(dataMat, labelMat) 59 | print theta 60 | 61 | theta = gradDescent(dataMat, labelMat) 62 | print theta 63 | 64 | theta = stocGradDescent(dataMat, labelMat) 65 | print theta 66 | 67 | 68 | def gradLWLR(inX, dataMat, labelMat, k=0.01, numIter=500): 69 | alpha = 0.001 70 | m, n = np.shape(dataMat) 71 | theta = np.ones((n, 1)) 72 | W = np.eye(m) 73 | for i in range(m): 74 | diffMat = inX - dataMat[i, :] 75 | W[i, i] = np.exp((diffMat * diffMat.T) / (-2.0 * k ** 2)) 76 | for j in range(numIter): 77 | h = dataMat * theta 78 | error = h - labelMat 79 | theta = theta - alpha * dataMat.T * W * error 80 | return inX * theta 81 | 82 | def testLWLR(): 83 | dataMat, labelMat = loadData('ex0.txt') 84 | print labelMat[0] 85 | outY = gradLWLR(dataMat[0], dataMat, labelMat, 1) 86 | print outY 87 | 88 | 89 | def ridgeRegression(dataMat, labelMat, lam=0.2, numIter=500): 90 | m, n = np.shape(dataMat) 91 | alpha = 0.001 92 | theta = np.ones((n, 1)) 93 | for i in range(numIter): 94 | h = dataMat * theta 95 | error = h - labelMat 96 | theta = theta - alpha * (dataMat.T * error + lam * theta) 97 | return theta 98 | 99 | def testRidgeRegress(): 100 | dataMat, labelMat = loadData('ex0.txt') 101 | theta = ridgeRegression(dataMat, labelMat, 0.2) 102 | print theta 103 | 104 | if __name__ == '__main__': 105 | print 'start:' 106 | # testForSimppleRegression() 107 | # testLWLR() 108 | testRidgeRegress() 109 | 110 | 111 | 112 | 113 | -------------------------------------------------------------------------------- /TreesRegression/bikeSpeedVsIq_test.txt: -------------------------------------------------------------------------------- 1 | 12.000000 121.010516 2 | 19.000000 157.337044 3 | 12.000000 116.031825 4 | 15.000000 132.124872 5 | 2.000000 52.719612 6 | 6.000000 39.058368 7 | 3.000000 50.757763 8 | 20.000000 166.740333 9 | 11.000000 115.808227 10 | 21.000000 165.582995 11 | 3.000000 41.956087 12 | 3.000000 34.432370 13 | 13.000000 116.954676 14 | 1.000000 32.112553 15 | 7.000000 50.380243 16 | 7.000000 94.107791 17 | 23.000000 188.943179 18 | 18.000000 152.637773 19 | 9.000000 104.122082 20 | 18.000000 127.805226 21 | 0.000000 83.083232 22 | 15.000000 148.180104 23 | 3.000000 38.480247 24 | 8.000000 77.597839 25 | 7.000000 75.625803 26 | 11.000000 124.620208 27 | 13.000000 125.186698 28 | 5.000000 51.165922 29 | 3.000000 31.179113 30 | 15.000000 132.505727 31 | 19.000000 137.978043 32 | 9.000000 106.481123 33 | 20.000000 172.149955 34 | 11.000000 104.116556 35 | 4.000000 22.457996 36 | 20.000000 175.735047 37 | 18.000000 165.350412 38 | 22.000000 177.461724 39 | 16.000000 138.672986 40 | 17.000000 156.791788 41 | 19.000000 150.327544 42 | 19.000000 156.992196 43 | 23.000000 163.624262 44 | 8.000000 92.537227 45 | 3.000000 32.341399 46 | 16.000000 144.445614 47 | 11.000000 119.985586 48 | 16.000000 145.149335 49 | 12.000000 113.284662 50 | 5.000000 47.742716 51 | 11.000000 115.852585 52 | 3.000000 31.579325 53 | 1.000000 43.758671 54 | 1.000000 61.049125 55 | 13.000000 132.751826 56 | 23.000000 163.233087 57 | 12.000000 115.134296 58 | 8.000000 91.370839 59 | 8.000000 86.137955 60 | 14.000000 120.857934 61 | 3.000000 33.777477 62 | 10.000000 110.831763 63 | 10.000000 104.174775 64 | 20.000000 155.920696 65 | 4.000000 30.619132 66 | 0.000000 71.880474 67 | 7.000000 86.399516 68 | 7.000000 72.632906 69 | 5.000000 58.632985 70 | 18.000000 143.584511 71 | 23.000000 187.059504 72 | 6.000000 65.067119 73 | 6.000000 69.110280 74 | 19.000000 142.388056 75 | 15.000000 137.174489 76 | 21.000000 159.719092 77 | 9.000000 102.179638 78 | 20.000000 176.416294 79 | 21.000000 146.516385 80 | 18.000000 147.808343 81 | 23.000000 154.790810 82 | 16.000000 137.385285 83 | 18.000000 166.885975 84 | 15.000000 136.989000 85 | 20.000000 144.668679 86 | 14.000000 137.060671 87 | 19.000000 140.468283 88 | 11.000000 98.344084 89 | 16.000000 132.497910 90 | 1.000000 59.143101 91 | 20.000000 152.299381 92 | 13.000000 134.487271 93 | 0.000000 77.805718 94 | 3.000000 28.543764 95 | 10.000000 97.751817 96 | 4.000000 41.223659 97 | 11.000000 110.017015 98 | 12.000000 119.391386 99 | 20.000000 158.872126 100 | 2.000000 38.776222 101 | 19.000000 150.496148 102 | 15.000000 131.505967 103 | 22.000000 179.856157 104 | 13.000000 143.090102 105 | 14.000000 142.611861 106 | 13.000000 120.757410 107 | 4.000000 27.929324 108 | 16.000000 151.530849 109 | 15.000000 148.149702 110 | 5.000000 44.188084 111 | 16.000000 141.135406 112 | 12.000000 119.817665 113 | 8.000000 80.991524 114 | 3.000000 29.308640 115 | 6.000000 48.203468 116 | 8.000000 92.179834 117 | 22.000000 162.720371 118 | 10.000000 91.971158 119 | 2.000000 33.481943 120 | 8.000000 88.528612 121 | 1.000000 54.042173 122 | 8.000000 92.002928 123 | 5.000000 45.614646 124 | 3.000000 34.319635 125 | 14.000000 129.140558 126 | 17.000000 146.807901 127 | 17.000000 157.694058 128 | 4.000000 37.080929 129 | 20.000000 169.942381 130 | 10.000000 114.675638 131 | 5.000000 34.913029 132 | 14.000000 137.889747 133 | 0.000000 79.043129 134 | 16.000000 139.084390 135 | 6.000000 53.340135 136 | 13.000000 142.772612 137 | 0.000000 73.103173 138 | 3.000000 37.717487 139 | 15.000000 134.116395 140 | 18.000000 138.748257 141 | 23.000000 180.779121 142 | 10.000000 93.721894 143 | 23.000000 166.958335 144 | 6.000000 74.473589 145 | 6.000000 73.006291 146 | 3.000000 34.178656 147 | 1.000000 33.395482 148 | 22.000000 149.933384 149 | 18.000000 154.858982 150 | 6.000000 66.121084 151 | 1.000000 60.816800 152 | 5.000000 55.681020 153 | 6.000000 61.251558 154 | 15.000000 125.452206 155 | 16.000000 134.310255 156 | 19.000000 167.999681 157 | 5.000000 40.074830 158 | 22.000000 162.658997 159 | 12.000000 109.473909 160 | 4.000000 44.743405 161 | 11.000000 122.419496 162 | 14.000000 139.852014 163 | 21.000000 160.045407 164 | 15.000000 131.999358 165 | 15.000000 135.577799 166 | 20.000000 173.494629 167 | 8.000000 82.497177 168 | 12.000000 123.122032 169 | 10.000000 97.592026 170 | 16.000000 141.345706 171 | 8.000000 79.588881 172 | 3.000000 54.308878 173 | 4.000000 36.112937 174 | 19.000000 165.005336 175 | 23.000000 172.198031 176 | 15.000000 127.699625 177 | 1.000000 47.305217 178 | 13.000000 115.489379 179 | 8.000000 103.956569 180 | 4.000000 53.669477 181 | 0.000000 76.220652 182 | 12.000000 114.153306 183 | 6.000000 74.608728 184 | 3.000000 41.339299 185 | 5.000000 21.944048 186 | 22.000000 181.455655 187 | 20.000000 171.691444 188 | 10.000000 104.299002 189 | 21.000000 168.307123 190 | 20.000000 169.556523 191 | 23.000000 175.960552 192 | 1.000000 42.554778 193 | 14.000000 137.286185 194 | 16.000000 136.126561 195 | 12.000000 119.269042 196 | 6.000000 63.426977 197 | 4.000000 27.728212 198 | 4.000000 32.687588 199 | 23.000000 151.153204 200 | 15.000000 129.767331 201 | -------------------------------------------------------------------------------- /TreesRegression/bikeSpeedVsIq_train.txt: -------------------------------------------------------------------------------- 1 | 3.000000 46.852122 2 | 23.000000 178.676107 3 | 0.000000 86.154024 4 | 6.000000 68.707614 5 | 15.000000 139.737693 6 | 17.000000 141.988903 7 | 12.000000 94.477135 8 | 8.000000 86.083788 9 | 9.000000 97.265824 10 | 7.000000 80.400027 11 | 8.000000 83.414554 12 | 1.000000 52.525471 13 | 16.000000 127.060008 14 | 9.000000 101.639269 15 | 14.000000 146.412680 16 | 15.000000 144.157101 17 | 17.000000 152.699910 18 | 19.000000 136.669023 19 | 21.000000 166.971736 20 | 21.000000 165.467251 21 | 3.000000 38.455193 22 | 6.000000 75.557721 23 | 4.000000 22.171763 24 | 5.000000 50.321915 25 | 0.000000 74.412428 26 | 5.000000 42.052392 27 | 1.000000 42.489057 28 | 14.000000 139.185416 29 | 21.000000 140.713725 30 | 5.000000 63.222944 31 | 5.000000 56.294626 32 | 9.000000 91.674826 33 | 22.000000 173.497655 34 | 17.000000 152.692482 35 | 9.000000 113.920633 36 | 1.000000 51.552411 37 | 9.000000 100.075315 38 | 16.000000 137.803868 39 | 18.000000 135.925777 40 | 3.000000 45.550762 41 | 16.000000 149.933224 42 | 2.000000 27.914173 43 | 6.000000 62.103546 44 | 20.000000 173.942381 45 | 12.000000 119.200505 46 | 6.000000 70.730214 47 | 16.000000 156.260832 48 | 15.000000 132.467643 49 | 19.000000 161.164086 50 | 17.000000 138.031844 51 | 23.000000 169.747881 52 | 11.000000 116.761920 53 | 4.000000 34.305905 54 | 6.000000 68.841160 55 | 10.000000 119.535227 56 | 20.000000 158.104763 57 | 18.000000 138.390511 58 | 5.000000 59.375794 59 | 7.000000 80.802300 60 | 11.000000 108.611485 61 | 10.000000 91.169028 62 | 15.000000 154.104819 63 | 5.000000 51.100287 64 | 3.000000 32.334330 65 | 15.000000 150.551655 66 | 10.000000 111.023073 67 | 0.000000 87.489950 68 | 2.000000 46.726299 69 | 7.000000 92.540440 70 | 15.000000 135.715438 71 | 19.000000 152.960552 72 | 19.000000 162.789223 73 | 21.000000 167.176240 74 | 22.000000 164.323358 75 | 12.000000 104.823071 76 | 1.000000 35.554328 77 | 11.000000 114.784640 78 | 1.000000 36.819570 79 | 12.000000 130.266826 80 | 12.000000 126.053312 81 | 18.000000 153.378289 82 | 7.000000 70.089159 83 | 15.000000 139.528624 84 | 19.000000 157.137999 85 | 23.000000 183.595248 86 | 7.000000 73.431043 87 | 11.000000 128.176167 88 | 22.000000 183.181247 89 | 13.000000 112.685801 90 | 18.000000 161.634783 91 | 6.000000 63.169478 92 | 7.000000 63.393975 93 | 19.000000 165.779578 94 | 14.000000 143.973398 95 | 22.000000 185.131852 96 | 3.000000 45.275591 97 | 6.000000 62.018003 98 | 0.000000 83.193398 99 | 7.000000 76.847802 100 | 19.000000 147.087386 101 | 7.000000 62.812086 102 | 1.000000 49.910068 103 | 11.000000 102.169335 104 | 11.000000 105.108121 105 | 6.000000 63.429817 106 | 12.000000 121.301542 107 | 17.000000 163.253962 108 | 13.000000 119.588698 109 | 0.000000 87.333807 110 | 20.000000 144.484066 111 | 21.000000 168.792482 112 | 23.000000 159.751246 113 | 20.000000 162.843592 114 | 14.000000 145.664069 115 | 19.000000 146.838515 116 | 12.000000 132.049377 117 | 18.000000 155.756119 118 | 22.000000 155.686345 119 | 7.000000 73.913958 120 | 1.000000 66.761881 121 | 7.000000 65.855450 122 | 6.000000 56.271026 123 | 19.000000 155.308523 124 | 12.000000 124.372873 125 | 17.000000 136.025960 126 | 14.000000 132.996861 127 | 21.000000 172.639791 128 | 17.000000 135.672594 129 | 8.000000 90.323742 130 | 5.000000 62.462698 131 | 16.000000 159.048794 132 | 14.000000 139.991227 133 | 3.000000 37.026678 134 | 9.000000 100.839901 135 | 9.000000 93.097395 136 | 15.000000 123.645221 137 | 15.000000 147.327185 138 | 1.000000 40.055830 139 | 0.000000 88.192829 140 | 17.000000 139.174517 141 | 22.000000 169.354493 142 | 17.000000 136.354272 143 | 9.000000 90.692829 144 | 7.000000 63.987997 145 | 14.000000 128.972231 146 | 10.000000 108.433394 147 | 2.000000 49.321034 148 | 19.000000 171.615671 149 | 9.000000 97.894855 150 | 0.000000 68.962453 151 | 9.000000 72.063371 152 | 22.000000 157.000070 153 | 12.000000 114.461754 154 | 6.000000 58.239465 155 | 9.000000 104.601048 156 | 8.000000 90.772359 157 | 22.000000 164.428791 158 | 5.000000 34.804083 159 | 5.000000 37.089459 160 | 22.000000 177.987605 161 | 10.000000 89.439608 162 | 6.000000 70.711362 163 | 23.000000 181.731482 164 | 20.000000 151.538932 165 | 7.000000 66.067228 166 | 6.000000 61.565125 167 | 20.000000 184.441687 168 | 9.000000 91.569158 169 | 9.000000 98.833425 170 | 17.000000 144.352866 171 | 9.000000 94.498314 172 | 15.000000 121.922732 173 | 18.000000 166.408274 174 | 10.000000 89.571299 175 | 8.000000 75.373772 176 | 22.000000 161.001478 177 | 8.000000 90.594227 178 | 5.000000 57.180933 179 | 20.000000 161.643007 180 | 8.000000 87.197370 181 | 8.000000 95.584308 182 | 15.000000 126.207221 183 | 7.000000 84.528209 184 | 18.000000 161.056986 185 | 10.000000 86.762615 186 | 1.000000 33.325906 187 | 9.000000 105.095502 188 | 2.000000 22.440421 189 | 9.000000 93.449284 190 | 14.000000 106.249595 191 | 21.000000 163.254385 192 | 22.000000 161.746628 193 | 20.000000 152.973085 194 | 17.000000 122.918987 195 | 7.000000 58.536412 196 | 1.000000 45.013277 197 | 13.000000 137.294148 198 | 10.000000 88.123737 199 | 2.000000 45.847376 200 | 20.000000 163.385797 201 | -------------------------------------------------------------------------------- /TreesRegression/cart.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import json 3 | import time 4 | 5 | 6 | def loadData(filename): 7 | dataArr = [] 8 | fr = open(filename) 9 | for line in fr.readlines(): 10 | lineArr = line.strip().split('\t') 11 | row = map(float, lineArr) 12 | dataArr.append(row) 13 | return np.mat(dataArr) 14 | 15 | 16 | def binSplit(dataMat, featureId, value): 17 | idx0 = np.nonzero(dataMat[:, featureId] > value)[0] 18 | idx1 = np.nonzero(dataMat[:, featureId] <= value)[0] 19 | return dataMat[idx0], dataMat[idx1] 20 | 21 | 22 | def regLeaf(dataMat): 23 | # caluate the mean value for each leaf 24 | return np.mean(dataMat[:, -1]) 25 | 26 | 27 | def regError(dataMat): 28 | # calculate the variance 29 | return np.var(dataMat[:, -1]) * np.shape(dataMat)[0] 30 | 31 | 32 | def chooseBestFeature(dataMat, leafType=regLeaf, errType=regError, ops=(1, 4)): 33 | """ 34 | Parameters 35 | ------------ 36 | dataMat : numpy.matrix 37 | leafType : regLeaf(dataMat), optional 38 | errType : regError(dataMat), optional 39 | ops: (tolS, tolN), optional 40 | for prepruning trees 41 | tolS is a tolerance on the error reduction 42 | tolN is the minimum data instances to include in a split 43 | """ 44 | tolS = ops[0] 45 | tolN = ops[1] 46 | # if all values are the same, quit and return the values 47 | if len(set(dataMat[:, -1].T.tolist()[0])) == 1: 48 | return None, leafType(dataMat) 49 | m, n = np.shape(dataMat) 50 | S = errType(dataMat) # sum of dataMat error 51 | bestS = np.inf 52 | bestIndex = 0 53 | bestValue = 0 54 | for featIndex in range(n - 1): 55 | for featValue in set(dataMat[:, featIndex].T.tolist()[0]): 56 | mat0, mat1 = binSplit(dataMat, featIndex, featValue) 57 | if np.shape(mat0)[0] < tolN or np.shape(mat1)[0] < tolN: 58 | continue 59 | newS = errType(mat0) + errType(mat1) 60 | if newS < bestS: 61 | bestS = newS 62 | bestIndex = featIndex 63 | bestValue = featValue 64 | 65 | # if the decrease (S - newS) is less than threshold tolS, 66 | # then stop spliting 67 | if (S - bestS) < tolS: 68 | return None, leafType(dataMat) 69 | 70 | mat0, mat1 = binSplit(dataMat, bestIndex, bestValue) 71 | if np.shape(mat0)[0] < tolN or np.shape(mat1)[0] < tolN: 72 | return None, leafType(dataMat) 73 | return bestIndex, bestValue 74 | 75 | 76 | def createTree(dataMat, leafType=regLeaf, errType=regError, ops=(1, 4)): 77 | featIndex, featValue = chooseBestFeature(dataMat, leafType, errType, ops) 78 | if featIndex is None: 79 | return featValue 80 | 81 | retTree = {} 82 | retTree['featIndex'] = featIndex 83 | retTree['featValue'] = featValue 84 | 85 | nodeStack = [] 86 | nodeStack.append(retTree) 87 | dataStack = [] 88 | dataStack.append(dataMat) 89 | while len(nodeStack) != 0: 90 | curNode = nodeStack.pop() 91 | if curNode['featIndex'] is None: 92 | continue 93 | # cannot always binSplit dataMat, need a stack 94 | curMat = dataStack.pop() 95 | leftMat, rightMat = binSplit(curMat, curNode['featIndex'], curNode['featValue']) 96 | leftIndex, leftValue = chooseBestFeature(leftMat, leafType, errType, ops) 97 | rightIndex, rightValue = chooseBestFeature(rightMat, leafType, errType, ops) 98 | # need to judge whethe leftIndex is None 99 | if rightIndex is None: 100 | curNode['right'] = rightValue 101 | else: 102 | rightTree = {} 103 | rightTree['featIndex'] = rightIndex 104 | rightTree['featValue'] = rightValue 105 | curNode['right'] = rightTree 106 | dataStack.append(rightMat) 107 | nodeStack.append(rightTree) 108 | 109 | if leftIndex is None: 110 | curNode['left'] = leftValue 111 | else: 112 | leftTree = {} 113 | leftTree['featIndex'] = leftIndex 114 | leftTree['featValue'] = leftValue 115 | curNode['left'] = leftTree 116 | dataStack.append(leftMat) 117 | nodeStack.append(leftTree) 118 | 119 | return retTree 120 | 121 | 122 | def isTree(obj): 123 | return type(obj).__name__ == 'dict' 124 | 125 | 126 | def getMean2(tree): 127 | """ 128 | @deprecated 129 | non-recurrent algorithm, but cost more time on small data set 130 | """ 131 | nodeStack = [] 132 | nodeStack.append(tree) 133 | stateStack = [] 134 | stateStack.append(None) 135 | curNode = tree 136 | while len(nodeStack) != 0: 137 | if isTree(curNode['left']): 138 | curNode = curNode['left'] 139 | nodeStack.append(curNode) 140 | stateStack.append('left') 141 | if isTree(curNode['right']): 142 | curNode = curNode['right'] 143 | nodeStack.append(curNode) 144 | stateStack.append('right') 145 | if not isTree(curNode['left']) and not isTree(curNode['right']): 146 | curNode = nodeStack.pop() 147 | tmp = stateStack.pop() 148 | parent = nodeStack[-1] if len(nodeStack) != 0 else None 149 | if tmp is not None: 150 | parent[tmp] = (curNode['left'] + curNode['right']) / 2.0 151 | curNode = parent 152 | else: 153 | tree = (curNode['left'] + curNode['right']) / 2.0 154 | return tree 155 | 156 | 157 | def getMean(tree): 158 | if isTree(tree['right']): 159 | tree['right'] = getMean(tree['right']) 160 | if isTree(tree['left']): 161 | tree['left'] = getMean(tree['left']) 162 | return (tree['left'] + tree['right']) / 2.0 163 | 164 | 165 | def prune(tree, dataMat): 166 | # no data to split 167 | if np.shape(dataMat)[0] == 0: 168 | return getMean(tree) 169 | if isTree(tree['left']) or isTree(tree['right']): 170 | lMat, rMat = binSplit(dataMat, tree['featIndex'], tree['featValue']) 171 | if isTree(tree['left']): 172 | tree['left'] = prune(tree['left'], lMat) 173 | if isTree(tree['right']): 174 | tree['right'] = prune(tree['right'], rMat) 175 | 176 | if not isTree(tree['left']) and not isTree(tree['right']): 177 | lMat, rMat = binSplit(dataMat, tree['featIndex'], tree['featValue']) 178 | errNoMerge = np.sum(np.power(lMat[:, -1] - tree['left'], 2)) 179 | errNoMerge = np.sum(np.power(rMat[:, -1] - tree['right'], 2)) + errNoMerge 180 | treeMean = (tree['left'] + tree['right']) / 2.0 181 | errMerge = np.sum(np.power(dataMat[:, -1] - treeMean, 2)) 182 | if errMerge < errNoMerge: 183 | print 'merging' 184 | return treeMean 185 | else: 186 | return tree 187 | else: 188 | return tree 189 | 190 | 191 | if __name__ == '__main__': 192 | print 'start' 193 | dataMat = loadData('ex2test.txt') 194 | # subMat1, subMat2 = binSplit(dataMat, 2, 2.4) 195 | retTree = createTree(dataMat, ops=(0, 1)) 196 | retTree = prune(retTree, dataMat) 197 | print json.dumps(retTree, indent=4) 198 | # start = time.clock() 199 | # print getMean2(retTree) 200 | # print 'time:', time.clock() - start 201 | # start = time.clock() 202 | # print getMean(retTree) 203 | # print 'time:', time.clock() - start 204 | -------------------------------------------------------------------------------- /TreesRegression/ex0.txt: -------------------------------------------------------------------------------- 1 | 1.000000 0.409175 1.883180 2 | 1.000000 0.182603 0.063908 3 | 1.000000 0.663687 3.042257 4 | 1.000000 0.517395 2.305004 5 | 1.000000 0.013643 -0.067698 6 | 1.000000 0.469643 1.662809 7 | 1.000000 0.725426 3.275749 8 | 1.000000 0.394350 1.118077 9 | 1.000000 0.507760 2.095059 10 | 1.000000 0.237395 1.181912 11 | 1.000000 0.057534 0.221663 12 | 1.000000 0.369820 0.938453 13 | 1.000000 0.976819 4.149409 14 | 1.000000 0.616051 3.105444 15 | 1.000000 0.413700 1.896278 16 | 1.000000 0.105279 -0.121345 17 | 1.000000 0.670273 3.161652 18 | 1.000000 0.952758 4.135358 19 | 1.000000 0.272316 0.859063 20 | 1.000000 0.303697 1.170272 21 | 1.000000 0.486698 1.687960 22 | 1.000000 0.511810 1.979745 23 | 1.000000 0.195865 0.068690 24 | 1.000000 0.986769 4.052137 25 | 1.000000 0.785623 3.156316 26 | 1.000000 0.797583 2.950630 27 | 1.000000 0.081306 0.068935 28 | 1.000000 0.659753 2.854020 29 | 1.000000 0.375270 0.999743 30 | 1.000000 0.819136 4.048082 31 | 1.000000 0.142432 0.230923 32 | 1.000000 0.215112 0.816693 33 | 1.000000 0.041270 0.130713 34 | 1.000000 0.044136 -0.537706 35 | 1.000000 0.131337 -0.339109 36 | 1.000000 0.463444 2.124538 37 | 1.000000 0.671905 2.708292 38 | 1.000000 0.946559 4.017390 39 | 1.000000 0.904176 4.004021 40 | 1.000000 0.306674 1.022555 41 | 1.000000 0.819006 3.657442 42 | 1.000000 0.845472 4.073619 43 | 1.000000 0.156258 0.011994 44 | 1.000000 0.857185 3.640429 45 | 1.000000 0.400158 1.808497 46 | 1.000000 0.375395 1.431404 47 | 1.000000 0.885807 3.935544 48 | 1.000000 0.239960 1.162152 49 | 1.000000 0.148640 -0.227330 50 | 1.000000 0.143143 -0.068728 51 | 1.000000 0.321582 0.825051 52 | 1.000000 0.509393 2.008645 53 | 1.000000 0.355891 0.664566 54 | 1.000000 0.938633 4.180202 55 | 1.000000 0.348057 0.864845 56 | 1.000000 0.438898 1.851174 57 | 1.000000 0.781419 2.761993 58 | 1.000000 0.911333 4.075914 59 | 1.000000 0.032469 0.110229 60 | 1.000000 0.499985 2.181987 61 | 1.000000 0.771663 3.152528 62 | 1.000000 0.670361 3.046564 63 | 1.000000 0.176202 0.128954 64 | 1.000000 0.392170 1.062726 65 | 1.000000 0.911188 3.651742 66 | 1.000000 0.872288 4.401950 67 | 1.000000 0.733107 3.022888 68 | 1.000000 0.610239 2.874917 69 | 1.000000 0.732739 2.946801 70 | 1.000000 0.714825 2.893644 71 | 1.000000 0.076386 0.072131 72 | 1.000000 0.559009 1.748275 73 | 1.000000 0.427258 1.912047 74 | 1.000000 0.841875 3.710686 75 | 1.000000 0.558918 1.719148 76 | 1.000000 0.533241 2.174090 77 | 1.000000 0.956665 3.656357 78 | 1.000000 0.620393 3.522504 79 | 1.000000 0.566120 2.234126 80 | 1.000000 0.523258 1.859772 81 | 1.000000 0.476884 2.097017 82 | 1.000000 0.176408 0.001794 83 | 1.000000 0.303094 1.231928 84 | 1.000000 0.609731 2.953862 85 | 1.000000 0.017774 -0.116803 86 | 1.000000 0.622616 2.638864 87 | 1.000000 0.886539 3.943428 88 | 1.000000 0.148654 -0.328513 89 | 1.000000 0.104350 -0.099866 90 | 1.000000 0.116868 -0.030836 91 | 1.000000 0.516514 2.359786 92 | 1.000000 0.664896 3.212581 93 | 1.000000 0.004327 0.188975 94 | 1.000000 0.425559 1.904109 95 | 1.000000 0.743671 3.007114 96 | 1.000000 0.935185 3.845834 97 | 1.000000 0.697300 3.079411 98 | 1.000000 0.444551 1.939739 99 | 1.000000 0.683753 2.880078 100 | 1.000000 0.755993 3.063577 101 | 1.000000 0.902690 4.116296 102 | 1.000000 0.094491 -0.240963 103 | 1.000000 0.873831 4.066299 104 | 1.000000 0.991810 4.011834 105 | 1.000000 0.185611 0.077710 106 | 1.000000 0.694551 3.103069 107 | 1.000000 0.657275 2.811897 108 | 1.000000 0.118746 -0.104630 109 | 1.000000 0.084302 0.025216 110 | 1.000000 0.945341 4.330063 111 | 1.000000 0.785827 3.087091 112 | 1.000000 0.530933 2.269988 113 | 1.000000 0.879594 4.010701 114 | 1.000000 0.652770 3.119542 115 | 1.000000 0.879338 3.723411 116 | 1.000000 0.764739 2.792078 117 | 1.000000 0.504884 2.192787 118 | 1.000000 0.554203 2.081305 119 | 1.000000 0.493209 1.714463 120 | 1.000000 0.363783 0.885854 121 | 1.000000 0.316465 1.028187 122 | 1.000000 0.580283 1.951497 123 | 1.000000 0.542898 1.709427 124 | 1.000000 0.112661 0.144068 125 | 1.000000 0.816742 3.880240 126 | 1.000000 0.234175 0.921876 127 | 1.000000 0.402804 1.979316 128 | 1.000000 0.709423 3.085768 129 | 1.000000 0.867298 3.476122 130 | 1.000000 0.993392 3.993679 131 | 1.000000 0.711580 3.077880 132 | 1.000000 0.133643 -0.105365 133 | 1.000000 0.052031 -0.164703 134 | 1.000000 0.366806 1.096814 135 | 1.000000 0.697521 3.092879 136 | 1.000000 0.787262 2.987926 137 | 1.000000 0.476710 2.061264 138 | 1.000000 0.721417 2.746854 139 | 1.000000 0.230376 0.716710 140 | 1.000000 0.104397 0.103831 141 | 1.000000 0.197834 0.023776 142 | 1.000000 0.129291 -0.033299 143 | 1.000000 0.528528 1.942286 144 | 1.000000 0.009493 -0.006338 145 | 1.000000 0.998533 3.808753 146 | 1.000000 0.363522 0.652799 147 | 1.000000 0.901386 4.053747 148 | 1.000000 0.832693 4.569290 149 | 1.000000 0.119002 -0.032773 150 | 1.000000 0.487638 2.066236 151 | 1.000000 0.153667 0.222785 152 | 1.000000 0.238619 1.089268 153 | 1.000000 0.208197 1.487788 154 | 1.000000 0.750921 2.852033 155 | 1.000000 0.183403 0.024486 156 | 1.000000 0.995608 3.737750 157 | 1.000000 0.151311 0.045017 158 | 1.000000 0.126804 0.001238 159 | 1.000000 0.983153 3.892763 160 | 1.000000 0.772495 2.819376 161 | 1.000000 0.784133 2.830665 162 | 1.000000 0.056934 0.234633 163 | 1.000000 0.425584 1.810782 164 | 1.000000 0.998709 4.237235 165 | 1.000000 0.707815 3.034768 166 | 1.000000 0.413816 1.742106 167 | 1.000000 0.217152 1.169250 168 | 1.000000 0.360503 0.831165 169 | 1.000000 0.977989 3.729376 170 | 1.000000 0.507953 1.823205 171 | 1.000000 0.920771 4.021970 172 | 1.000000 0.210542 1.262939 173 | 1.000000 0.928611 4.159518 174 | 1.000000 0.580373 2.039114 175 | 1.000000 0.841390 4.101837 176 | 1.000000 0.681530 2.778672 177 | 1.000000 0.292795 1.228284 178 | 1.000000 0.456918 1.736620 179 | 1.000000 0.134128 -0.195046 180 | 1.000000 0.016241 -0.063215 181 | 1.000000 0.691214 3.305268 182 | 1.000000 0.582002 2.063627 183 | 1.000000 0.303102 0.898840 184 | 1.000000 0.622598 2.701692 185 | 1.000000 0.525024 1.992909 186 | 1.000000 0.996775 3.811393 187 | 1.000000 0.881025 4.353857 188 | 1.000000 0.723457 2.635641 189 | 1.000000 0.676346 2.856311 190 | 1.000000 0.254625 1.352682 191 | 1.000000 0.488632 2.336459 192 | 1.000000 0.519875 2.111651 193 | 1.000000 0.160176 0.121726 194 | 1.000000 0.609483 3.264605 195 | 1.000000 0.531881 2.103446 196 | 1.000000 0.321632 0.896855 197 | 1.000000 0.845148 4.220850 198 | 1.000000 0.012003 -0.217283 199 | 1.000000 0.018883 -0.300577 200 | 1.000000 0.071476 0.006014 201 | -------------------------------------------------------------------------------- /TreesRegression/ex00.txt: -------------------------------------------------------------------------------- 1 | 0.036098 0.155096 2 | 0.993349 1.077553 3 | 0.530897 0.893462 4 | 0.712386 0.564858 5 | 0.343554 -0.371700 6 | 0.098016 -0.332760 7 | 0.691115 0.834391 8 | 0.091358 0.099935 9 | 0.727098 1.000567 10 | 0.951949 0.945255 11 | 0.768596 0.760219 12 | 0.541314 0.893748 13 | 0.146366 0.034283 14 | 0.673195 0.915077 15 | 0.183510 0.184843 16 | 0.339563 0.206783 17 | 0.517921 1.493586 18 | 0.703755 1.101678 19 | 0.008307 0.069976 20 | 0.243909 -0.029467 21 | 0.306964 -0.177321 22 | 0.036492 0.408155 23 | 0.295511 0.002882 24 | 0.837522 1.229373 25 | 0.202054 -0.087744 26 | 0.919384 1.029889 27 | 0.377201 -0.243550 28 | 0.814825 1.095206 29 | 0.611270 0.982036 30 | 0.072243 -0.420983 31 | 0.410230 0.331722 32 | 0.869077 1.114825 33 | 0.620599 1.334421 34 | 0.101149 0.068834 35 | 0.820802 1.325907 36 | 0.520044 0.961983 37 | 0.488130 -0.097791 38 | 0.819823 0.835264 39 | 0.975022 0.673579 40 | 0.953112 1.064690 41 | 0.475976 -0.163707 42 | 0.273147 -0.455219 43 | 0.804586 0.924033 44 | 0.074795 -0.349692 45 | 0.625336 0.623696 46 | 0.656218 0.958506 47 | 0.834078 1.010580 48 | 0.781930 1.074488 49 | 0.009849 0.056594 50 | 0.302217 -0.148650 51 | 0.678287 0.907727 52 | 0.180506 0.103676 53 | 0.193641 -0.327589 54 | 0.343479 0.175264 55 | 0.145809 0.136979 56 | 0.996757 1.035533 57 | 0.590210 1.336661 58 | 0.238070 -0.358459 59 | 0.561362 1.070529 60 | 0.377597 0.088505 61 | 0.099142 0.025280 62 | 0.539558 1.053846 63 | 0.790240 0.533214 64 | 0.242204 0.209359 65 | 0.152324 0.132858 66 | 0.252649 -0.055613 67 | 0.895930 1.077275 68 | 0.133300 -0.223143 69 | 0.559763 1.253151 70 | 0.643665 1.024241 71 | 0.877241 0.797005 72 | 0.613765 1.621091 73 | 0.645762 1.026886 74 | 0.651376 1.315384 75 | 0.697718 1.212434 76 | 0.742527 1.087056 77 | 0.901056 1.055900 78 | 0.362314 -0.556464 79 | 0.948268 0.631862 80 | 0.000234 0.060903 81 | 0.750078 0.906291 82 | 0.325412 -0.219245 83 | 0.726828 1.017112 84 | 0.348013 0.048939 85 | 0.458121 -0.061456 86 | 0.280738 -0.228880 87 | 0.567704 0.969058 88 | 0.750918 0.748104 89 | 0.575805 0.899090 90 | 0.507940 1.107265 91 | 0.071769 -0.110946 92 | 0.553520 1.391273 93 | 0.401152 -0.121640 94 | 0.406649 -0.366317 95 | 0.652121 1.004346 96 | 0.347837 -0.153405 97 | 0.081931 -0.269756 98 | 0.821648 1.280895 99 | 0.048014 0.064496 100 | 0.130962 0.184241 101 | 0.773422 1.125943 102 | 0.789625 0.552614 103 | 0.096994 0.227167 104 | 0.625791 1.244731 105 | 0.589575 1.185812 106 | 0.323181 0.180811 107 | 0.822443 1.086648 108 | 0.360323 -0.204830 109 | 0.950153 1.022906 110 | 0.527505 0.879560 111 | 0.860049 0.717490 112 | 0.007044 0.094150 113 | 0.438367 0.034014 114 | 0.574573 1.066130 115 | 0.536689 0.867284 116 | 0.782167 0.886049 117 | 0.989888 0.744207 118 | 0.761474 1.058262 119 | 0.985425 1.227946 120 | 0.132543 -0.329372 121 | 0.346986 -0.150389 122 | 0.768784 0.899705 123 | 0.848921 1.170959 124 | 0.449280 0.069098 125 | 0.066172 0.052439 126 | 0.813719 0.706601 127 | 0.661923 0.767040 128 | 0.529491 1.022206 129 | 0.846455 0.720030 130 | 0.448656 0.026974 131 | 0.795072 0.965721 132 | 0.118156 -0.077409 133 | 0.084248 -0.019547 134 | 0.845815 0.952617 135 | 0.576946 1.234129 136 | 0.772083 1.299018 137 | 0.696648 0.845423 138 | 0.595012 1.213435 139 | 0.648675 1.287407 140 | 0.897094 1.240209 141 | 0.552990 1.036158 142 | 0.332982 0.210084 143 | 0.065615 -0.306970 144 | 0.278661 0.253628 145 | 0.773168 1.140917 146 | 0.203693 -0.064036 147 | 0.355688 -0.119399 148 | 0.988852 1.069062 149 | 0.518735 1.037179 150 | 0.514563 1.156648 151 | 0.976414 0.862911 152 | 0.919074 1.123413 153 | 0.697777 0.827805 154 | 0.928097 0.883225 155 | 0.900272 0.996871 156 | 0.344102 -0.061539 157 | 0.148049 0.204298 158 | 0.130052 -0.026167 159 | 0.302001 0.317135 160 | 0.337100 0.026332 161 | 0.314924 -0.001952 162 | 0.269681 -0.165971 163 | 0.196005 -0.048847 164 | 0.129061 0.305107 165 | 0.936783 1.026258 166 | 0.305540 -0.115991 167 | 0.683921 1.414382 168 | 0.622398 0.766330 169 | 0.902532 0.861601 170 | 0.712503 0.933490 171 | 0.590062 0.705531 172 | 0.723120 1.307248 173 | 0.188218 0.113685 174 | 0.643601 0.782552 175 | 0.520207 1.209557 176 | 0.233115 -0.348147 177 | 0.465625 -0.152940 178 | 0.884512 1.117833 179 | 0.663200 0.701634 180 | 0.268857 0.073447 181 | 0.729234 0.931956 182 | 0.429664 -0.188659 183 | 0.737189 1.200781 184 | 0.378595 -0.296094 185 | 0.930173 1.035645 186 | 0.774301 0.836763 187 | 0.273940 -0.085713 188 | 0.824442 1.082153 189 | 0.626011 0.840544 190 | 0.679390 1.307217 191 | 0.578252 0.921885 192 | 0.785541 1.165296 193 | 0.597409 0.974770 194 | 0.014083 -0.132525 195 | 0.663870 1.187129 196 | 0.552381 1.369630 197 | 0.683886 0.999985 198 | 0.210334 -0.006899 199 | 0.604529 1.212685 200 | 0.250744 0.046297 201 | -------------------------------------------------------------------------------- /TreesRegression/ex2.txt: -------------------------------------------------------------------------------- 1 | 0.228628 -2.266273 2 | 0.965969 112.386764 3 | 0.342761 -31.584855 4 | 0.901444 87.300625 5 | 0.585413 125.295113 6 | 0.334900 18.976650 7 | 0.769043 64.041941 8 | 0.297107 -1.798377 9 | 0.901421 100.133819 10 | 0.176523 0.946348 11 | 0.710234 108.553919 12 | 0.981980 86.399637 13 | 0.085873 -10.137104 14 | 0.537834 90.995536 15 | 0.806158 62.877698 16 | 0.708890 135.416767 17 | 0.787755 118.642009 18 | 0.463241 17.171057 19 | 0.300318 -18.051318 20 | 0.815215 118.319942 21 | 0.139880 7.336784 22 | 0.068373 -15.160836 23 | 0.457563 -34.044555 24 | 0.665652 105.547997 25 | 0.084661 -24.132226 26 | 0.954711 100.935789 27 | 0.953902 130.926480 28 | 0.487381 27.729263 29 | 0.759504 81.106762 30 | 0.454312 -20.360067 31 | 0.295993 -14.988279 32 | 0.156067 7.557349 33 | 0.428582 15.224266 34 | 0.847219 76.240984 35 | 0.499171 11.924204 36 | 0.203993 -22.379119 37 | 0.548539 83.114502 38 | 0.790312 110.159730 39 | 0.937766 119.949824 40 | 0.218321 1.410768 41 | 0.223200 15.501642 42 | 0.896683 107.001620 43 | 0.582311 82.589328 44 | 0.698920 92.470636 45 | 0.823848 59.342323 46 | 0.385021 24.816941 47 | 0.061219 6.695567 48 | 0.841547 115.669032 49 | 0.763328 115.199195 50 | 0.934853 115.753994 51 | 0.222271 -9.255852 52 | 0.217214 -3.958752 53 | 0.706961 106.180427 54 | 0.888426 94.896354 55 | 0.549814 137.267576 56 | 0.107960 -1.293195 57 | 0.085111 37.820659 58 | 0.388789 21.578007 59 | 0.467383 -9.712925 60 | 0.623909 87.181863 61 | 0.373501 -8.228297 62 | 0.513332 101.075609 63 | 0.350725 -40.086564 64 | 0.716211 103.345308 65 | 0.731636 73.912028 66 | 0.273863 -9.457556 67 | 0.211633 -8.332207 68 | 0.944221 100.120253 69 | 0.053764 -13.731698 70 | 0.126833 22.891675 71 | 0.952833 100.649591 72 | 0.391609 3.001104 73 | 0.560301 82.903945 74 | 0.124723 -1.402796 75 | 0.465680 -23.777531 76 | 0.699873 115.586605 77 | 0.164134 -27.405211 78 | 0.455761 9.841938 79 | 0.508542 96.403373 80 | 0.138619 -29.087463 81 | 0.335182 2.768225 82 | 0.908629 118.513475 83 | 0.546601 96.319043 84 | 0.378965 13.583555 85 | 0.968621 98.648346 86 | 0.637999 91.656617 87 | 0.350065 -1.319852 88 | 0.632691 93.645293 89 | 0.936524 65.548418 90 | 0.310956 -49.939516 91 | 0.437652 19.745224 92 | 0.166765 -14.740059 93 | 0.571214 114.872056 94 | 0.952377 73.520802 95 | 0.665329 121.980607 96 | 0.258070 -20.425137 97 | 0.912161 85.005351 98 | 0.777582 100.838446 99 | 0.642707 82.500766 100 | 0.885676 108.045948 101 | 0.080061 2.229873 102 | 0.039914 11.220099 103 | 0.958512 135.837013 104 | 0.377383 5.241196 105 | 0.661073 115.687524 106 | 0.454375 3.043912 107 | 0.412516 -26.419289 108 | 0.854970 89.209930 109 | 0.698472 120.521925 110 | 0.465561 30.051931 111 | 0.328890 39.783113 112 | 0.309133 8.814725 113 | 0.418943 44.161493 114 | 0.553797 120.857321 115 | 0.799873 91.368473 116 | 0.811363 112.981216 117 | 0.785574 107.024467 118 | 0.949198 105.752508 119 | 0.666452 120.014736 120 | 0.652462 112.715799 121 | 0.290749 -14.391613 122 | 0.508548 93.292829 123 | 0.680486 110.367074 124 | 0.356790 -19.526539 125 | 0.199903 -3.372472 126 | 0.264926 5.280579 127 | 0.166431 -6.512506 128 | 0.370042 -32.124495 129 | 0.628061 117.628346 130 | 0.228473 19.425158 131 | 0.044737 3.855393 132 | 0.193282 18.208423 133 | 0.519150 116.176162 134 | 0.351478 -0.461116 135 | 0.872199 111.552716 136 | 0.115150 13.795828 137 | 0.324274 -13.189243 138 | 0.446196 -5.108172 139 | 0.613004 168.180746 140 | 0.533511 129.766743 141 | 0.740859 93.773929 142 | 0.667851 92.449664 143 | 0.900699 109.188248 144 | 0.599142 130.378529 145 | 0.232802 1.222318 146 | 0.838587 134.089674 147 | 0.284794 35.623746 148 | 0.130626 -39.524461 149 | 0.642373 140.613941 150 | 0.786865 100.598825 151 | 0.403228 -1.729244 152 | 0.883615 95.348184 153 | 0.910975 106.814667 154 | 0.819722 70.054508 155 | 0.798198 76.853728 156 | 0.606417 93.521396 157 | 0.108801 -16.106164 158 | 0.318309 -27.605424 159 | 0.856421 107.166848 160 | 0.842940 95.893131 161 | 0.618868 76.917665 162 | 0.531944 124.795495 163 | 0.028546 -8.377094 164 | 0.915263 96.717610 165 | 0.925782 92.074619 166 | 0.624827 105.970743 167 | 0.331364 -1.290825 168 | 0.341700 -23.547711 169 | 0.342155 -16.930416 170 | 0.729397 110.902830 171 | 0.640515 82.713621 172 | 0.228751 -30.812912 173 | 0.948822 69.318649 174 | 0.706390 105.062147 175 | 0.079632 29.420068 176 | 0.451087 -28.724685 177 | 0.833026 76.723835 178 | 0.589806 98.674874 179 | 0.426711 -21.594268 180 | 0.872883 95.887712 181 | 0.866451 94.402102 182 | 0.960398 123.559747 183 | 0.483803 5.224234 184 | 0.811602 99.841379 185 | 0.757527 63.549854 186 | 0.569327 108.435392 187 | 0.841625 60.552308 188 | 0.264639 2.557923 189 | 0.202161 -1.983889 190 | 0.055862 -3.131497 191 | 0.543843 98.362010 192 | 0.689099 112.378209 193 | 0.956951 82.016541 194 | 0.382037 -29.007783 195 | 0.131833 22.478291 196 | 0.156273 0.225886 197 | 0.000256 9.668106 198 | 0.892999 82.436686 199 | 0.206207 -12.619036 200 | 0.487537 5.149336 201 | -------------------------------------------------------------------------------- /TreesRegression/ex2test.txt: -------------------------------------------------------------------------------- 1 | 0.421862 10.830241 2 | 0.105349 -2.241611 3 | 0.155196 21.872976 4 | 0.161152 2.015418 5 | 0.382632 -38.778979 6 | 0.017710 20.109113 7 | 0.129656 15.266887 8 | 0.613926 111.900063 9 | 0.409277 1.874731 10 | 0.807556 111.223754 11 | 0.593722 133.835486 12 | 0.953239 110.465070 13 | 0.257402 15.332899 14 | 0.645385 93.983054 15 | 0.563460 93.645277 16 | 0.408338 -30.719878 17 | 0.874394 91.873505 18 | 0.263805 -0.192752 19 | 0.411198 10.751118 20 | 0.449884 9.211901 21 | 0.646315 113.533660 22 | 0.673718 125.135638 23 | 0.805148 113.300462 24 | 0.759327 72.668572 25 | 0.519172 82.131698 26 | 0.741031 106.777146 27 | 0.030937 9.859127 28 | 0.268848 -34.137955 29 | 0.474901 -11.201301 30 | 0.588266 120.501998 31 | 0.893936 142.826476 32 | 0.870990 105.751746 33 | 0.430763 39.146258 34 | 0.057665 15.371897 35 | 0.100076 9.131761 36 | 0.980716 116.145896 37 | 0.235289 -13.691224 38 | 0.228098 16.089151 39 | 0.622248 99.345551 40 | 0.401467 -1.694383 41 | 0.960334 110.795415 42 | 0.031214 -5.330042 43 | 0.504228 96.003525 44 | 0.779660 75.921582 45 | 0.504496 101.341462 46 | 0.850974 96.293064 47 | 0.701119 102.333839 48 | 0.191551 5.072326 49 | 0.667116 92.310019 50 | 0.555584 80.367129 51 | 0.680006 132.965442 52 | 0.393899 38.605283 53 | 0.048940 -9.861871 54 | 0.963282 115.407485 55 | 0.655496 104.269918 56 | 0.576463 141.127267 57 | 0.675708 96.227996 58 | 0.853457 114.252288 59 | 0.003933 -12.182861 60 | 0.549512 97.927224 61 | 0.218967 -4.712462 62 | 0.659972 120.950439 63 | 0.008256 8.026816 64 | 0.099500 -14.318434 65 | 0.352215 -3.747546 66 | 0.874926 89.247356 67 | 0.635084 99.496059 68 | 0.039641 14.147109 69 | 0.665111 103.298719 70 | 0.156583 -2.540703 71 | 0.648843 119.333019 72 | 0.893237 95.209585 73 | 0.128807 5.558479 74 | 0.137438 5.567685 75 | 0.630538 98.462792 76 | 0.296084 -41.799438 77 | 0.632099 84.895098 78 | 0.987681 106.726447 79 | 0.744909 111.279705 80 | 0.862030 104.581156 81 | 0.080649 -7.679985 82 | 0.831277 59.053356 83 | 0.198716 26.878801 84 | 0.860932 90.632930 85 | 0.883250 92.759595 86 | 0.818003 110.272219 87 | 0.949216 115.200237 88 | 0.460078 -35.957981 89 | 0.561077 93.545761 90 | 0.863767 114.125786 91 | 0.476891 -29.774060 92 | 0.537826 81.587922 93 | 0.686224 110.911198 94 | 0.982327 119.114523 95 | 0.944453 92.033481 96 | 0.078227 30.216873 97 | 0.782937 92.588646 98 | 0.465886 2.222139 99 | 0.885024 90.247890 100 | 0.186077 7.144415 101 | 0.915828 84.010074 102 | 0.796649 115.572156 103 | 0.127821 28.933688 104 | 0.433429 6.782575 105 | 0.946796 108.574116 106 | 0.386915 -17.404601 107 | 0.561192 92.142700 108 | 0.182490 10.764616 109 | 0.878792 95.289476 110 | 0.381342 -6.177464 111 | 0.358474 -11.731754 112 | 0.270647 13.793201 113 | 0.488904 -17.641832 114 | 0.106773 5.684757 115 | 0.270112 4.335675 116 | 0.754985 75.860433 117 | 0.585174 111.640154 118 | 0.458821 12.029692 119 | 0.218017 -26.234872 120 | 0.583887 99.413850 121 | 0.923626 107.802298 122 | 0.833620 104.179678 123 | 0.870691 93.132591 124 | 0.249896 -8.618404 125 | 0.748230 109.160652 126 | 0.019365 34.048884 127 | 0.837588 101.239275 128 | 0.529251 115.514729 129 | 0.742898 67.038771 130 | 0.522034 64.160799 131 | 0.498982 3.983061 132 | 0.479439 24.355908 133 | 0.314834 -14.256200 134 | 0.753251 85.017092 135 | 0.479362 -17.480446 136 | 0.950593 99.072784 137 | 0.718623 58.080256 138 | 0.218720 -19.605593 139 | 0.664113 94.437159 140 | 0.942900 131.725134 141 | 0.314226 18.904871 142 | 0.284509 11.779346 143 | 0.004962 -14.624176 144 | 0.224087 -50.547649 145 | 0.974331 112.822725 146 | 0.894610 112.863995 147 | 0.167350 0.073380 148 | 0.753644 105.024456 149 | 0.632241 108.625812 150 | 0.314189 -6.090797 151 | 0.965527 87.418343 152 | 0.820919 94.610538 153 | 0.144107 -4.748387 154 | 0.072556 -5.682008 155 | 0.002447 29.685714 156 | 0.851007 79.632376 157 | 0.458024 -12.326026 158 | 0.627503 139.458881 159 | 0.422259 -29.827405 160 | 0.714659 63.480271 161 | 0.672320 93.608554 162 | 0.498592 37.112975 163 | 0.698906 96.282845 164 | 0.861441 99.699230 165 | 0.112425 -12.419909 166 | 0.164784 5.244704 167 | 0.481531 -18.070497 168 | 0.375482 1.779411 169 | 0.089325 -14.216755 170 | 0.036609 -6.264372 171 | 0.945004 54.723563 172 | 0.136608 14.970936 173 | 0.292285 -41.723711 174 | 0.029195 -0.660279 175 | 0.998307 100.124230 176 | 0.303928 -5.492264 177 | 0.957863 117.824392 178 | 0.815089 113.377704 179 | 0.466399 -10.249874 180 | 0.876693 115.617275 181 | 0.536121 102.997087 182 | 0.373984 -37.359936 183 | 0.565162 74.967476 184 | 0.085412 -21.449563 185 | 0.686411 64.859620 186 | 0.908752 107.983366 187 | 0.982829 98.005424 188 | 0.052766 -42.139502 189 | 0.777552 91.899340 190 | 0.374316 -3.522501 191 | 0.060231 10.008227 192 | 0.526225 87.317722 193 | 0.583872 67.104433 194 | 0.238276 10.615159 195 | 0.678747 60.624273 196 | 0.067649 15.947398 197 | 0.530182 105.030933 198 | 0.869389 104.969996 199 | 0.698410 75.460417 200 | 0.549430 82.558068 201 | -------------------------------------------------------------------------------- /TreesRegression/exp.txt: -------------------------------------------------------------------------------- 1 | 0.529582 100.737303 2 | 0.985730 103.106872 3 | 0.797869 99.666151 4 | 0.393473 -1.773056 5 | 0.272568 -1.170222 6 | 0.758825 96.752440 7 | 0.218359 2.337347 8 | 0.926357 98.343231 9 | 0.726881 99.633009 10 | 0.805311 102.253834 11 | 0.208632 0.493174 12 | 0.184921 -2.231071 13 | 0.660135 100.139355 14 | 0.871875 96.637420 15 | 0.657182 100.345442 16 | 0.942481 97.751546 17 | 0.427843 -1.380170 18 | 0.845958 98.195303 19 | 0.878696 99.380485 20 | 0.582034 100.971036 21 | 0.118114 2.397033 22 | 0.144718 1.304535 23 | 0.576046 101.624714 24 | 0.750305 97.601324 25 | 0.518281 100.093634 26 | 0.260793 -1.361888 27 | 0.390245 -2.973759 28 | 0.963020 98.877859 29 | 0.880661 97.631997 30 | 0.291780 -1.638124 31 | 0.192903 -2.221257 32 | 0.461442 -1.074725 33 | 0.821171 99.372052 34 | 0.144557 2.589464 35 | 0.379346 0.991090 36 | 0.383822 1.832389 37 | 0.055406 -1.870700 38 | 0.084308 -0.611701 39 | 0.719578 100.087948 40 | 0.417471 -0.510292 41 | 0.477894 -3.426525 42 | 0.871228 100.307522 43 | 0.113074 -1.011079 44 | 0.409434 -0.616173 45 | 0.967141 96.551856 46 | 0.938254 97.052196 47 | 0.079989 2.083496 48 | 0.150207 1.285491 49 | 0.417339 -0.462985 50 | 0.038787 -2.237234 51 | 0.954657 102.111432 52 | 0.844894 98.350138 53 | 0.106770 -0.998182 54 | 0.247831 2.483594 55 | 0.108687 -0.920229 56 | 0.758165 98.079399 57 | 0.199978 -3.490410 58 | 0.600602 99.850119 59 | 0.026466 1.342825 60 | 0.141239 -0.949858 61 | 0.181437 -2.223725 62 | 0.352656 2.251362 63 | 0.803371 99.647157 64 | 0.677303 100.414859 65 | 0.561674 99.133372 66 | 0.497533 -3.764935 67 | 0.523327 98.452850 68 | 0.507075 103.807755 69 | 0.791978 99.414598 70 | 0.956890 95.977239 71 | 0.487927 1.199149 72 | 0.788795 100.012047 73 | 0.554283 98.522458 74 | 0.814361 97.642150 75 | 0.788940 97.399942 76 | 0.515845 102.240479 77 | 0.758538 97.461917 78 | 0.041824 -3.294141 79 | 0.341352 1.246559 80 | 0.194801 -2.285278 81 | 0.805528 99.023113 82 | 0.435762 0.361749 83 | 0.941615 100.746547 84 | 0.478234 0.791146 85 | 0.057445 -4.266792 86 | 0.510079 98.845273 87 | 0.209900 -0.861890 88 | 0.902668 101.429190 89 | 0.456602 -2.856392 90 | 0.997595 99.828241 91 | 0.048240 -0.268920 92 | 0.319531 0.896696 93 | 0.264929 -1.000487 94 | 0.432727 -4.630489 95 | 0.419828 1.260534 96 | 0.667056 99.456518 97 | 0.488173 1.574322 98 | 0.746300 100.563503 99 | 0.528660 100.736739 100 | 0.624185 99.562872 101 | 0.169411 1.809929 102 | 0.011025 4.132846 103 | 0.974164 98.706049 104 | 0.267957 0.297803 105 | 0.726093 99.381040 106 | 0.465163 -2.344545 107 | 0.993698 101.507792 108 | 0.816513 99.903496 109 | 0.398756 0.378060 110 | 0.054974 -0.588770 111 | 0.857067 100.322945 112 | 0.362328 2.551786 113 | 0.316961 -0.528283 114 | 0.167881 -0.376517 115 | 0.393776 3.658204 116 | 0.739991 100.426554 117 | 0.457949 0.857428 118 | 0.060635 2.484776 119 | 0.942634 101.254420 120 | 0.553691 102.467820 121 | 0.394694 -0.248353 122 | 0.714625 99.650556 123 | 0.273503 1.111820 124 | 0.471886 -5.665559 125 | 0.746476 98.720163 126 | 0.140209 0.471820 127 | 0.024197 -2.854251 128 | 0.521287 99.703915 129 | 0.672280 100.463227 130 | 0.380342 -0.785713 131 | 0.956380 99.482209 132 | 0.455254 1.613841 133 | 0.647551 101.591193 134 | 0.682498 98.267734 135 | 0.054839 -2.286019 136 | 0.716849 100.614510 137 | 0.217732 -2.161633 138 | 0.918885 100.260067 139 | 0.576026 101.719788 140 | 0.868511 100.669152 141 | 0.661135 97.637969 142 | 0.166334 1.374014 143 | 0.106850 -3.658050 144 | 0.768242 104.193841 145 | 0.240916 -0.368100 146 | 0.124957 2.821672 147 | 0.984335 98.571444 148 | 0.908524 101.777344 149 | 0.861217 98.656403 150 | 0.944295 100.154508 151 | 0.527278 101.052710 152 | 0.717072 100.788373 153 | 0.130227 0.115694 154 | 0.494734 -1.220681 155 | 0.498733 0.961514 156 | 0.519411 101.331622 157 | 0.712409 104.891067 158 | 0.933858 98.180299 159 | 0.266051 0.398961 160 | 0.153690 -0.657128 161 | 0.209181 1.486816 162 | 0.942699 102.187578 163 | 0.766799 100.213348 164 | 0.862578 101.816969 165 | 0.223266 2.854445 166 | 0.611394 103.428497 167 | 0.996212 98.494158 168 | 0.724945 99.098450 169 | 0.399346 0.879259 170 | 0.750510 98.729864 171 | 0.446060 0.639843 172 | 0.999913 101.502887 173 | 0.111561 3.256383 174 | 0.094755 0.170475 175 | 0.366547 0.488994 176 | 0.179924 -0.871567 177 | 0.969023 99.982789 178 | 0.941420 100.416754 179 | 0.656851 98.520940 180 | 0.983166 99.546591 181 | 0.167843 0.033922 182 | 0.316245 2.171137 183 | 0.817118 102.849575 184 | 0.173642 1.209173 185 | 0.411030 2.022640 186 | 0.265041 2.216470 187 | 0.779660 98.475428 188 | 0.059354 -0.929568 189 | 0.722092 97.974003 190 | 0.511958 101.924447 191 | 0.371938 -0.640602 192 | 0.851009 97.873330 193 | 0.375918 -5.308115 194 | 0.797332 99.763778 195 | 0.107749 -3.770092 196 | 0.156937 -0.876724 197 | 0.960447 99.597097 198 | 0.413434 2.408090 199 | 0.644257 100.453125 200 | 0.119332 -0.495588 201 | -------------------------------------------------------------------------------- /TreesRegression/exp2.txt: -------------------------------------------------------------------------------- 1 | 0.070670 3.470829 2 | 0.534076 6.377132 3 | 0.747221 8.949407 4 | 0.668970 8.034081 5 | 0.586082 6.997721 6 | 0.764962 9.318110 7 | 0.658125 7.880333 8 | 0.346734 4.213359 9 | 0.313967 3.762496 10 | 0.601418 7.188805 11 | 0.404396 4.893403 12 | 0.154345 3.683175 13 | 0.984061 11.712928 14 | 0.597514 7.146694 15 | 0.005144 3.333150 16 | 0.142295 3.743681 17 | 0.280007 3.737376 18 | 0.542008 6.494275 19 | 0.466781 5.532255 20 | 0.706970 8.476718 21 | 0.191038 3.673921 22 | 0.756591 9.176722 23 | 0.912879 10.850358 24 | 0.524701 6.067444 25 | 0.306090 3.681148 26 | 0.429009 5.032168 27 | 0.695091 8.209058 28 | 0.984495 11.909595 29 | 0.702748 8.298454 30 | 0.551771 6.715210 31 | 0.272894 3.983313 32 | 0.014611 3.559081 33 | 0.699852 8.417306 34 | 0.309710 3.739053 35 | 0.444877 5.219649 36 | 0.717509 8.483072 37 | 0.576550 6.894860 38 | 0.284200 3.792626 39 | 0.675922 8.067282 40 | 0.304401 3.671373 41 | 0.233675 3.795962 42 | 0.453779 5.477533 43 | 0.900938 10.701447 44 | 0.502418 6.046703 45 | 0.781843 9.254690 46 | 0.226271 3.546938 47 | 0.619535 7.703312 48 | 0.519998 6.202835 49 | 0.399447 4.934647 50 | 0.785298 9.497564 51 | 0.010767 3.565835 52 | 0.696399 8.307487 53 | 0.524366 6.266060 54 | 0.396583 4.611390 55 | 0.059988 3.484805 56 | 0.946702 11.263118 57 | 0.417559 4.895128 58 | 0.609194 7.239316 59 | 0.730687 8.858371 60 | 0.586694 7.061601 61 | 0.829567 9.937968 62 | 0.964229 11.521595 63 | 0.276813 3.756406 64 | 0.987041 11.947913 65 | 0.876107 10.440538 66 | 0.747582 8.942278 67 | 0.117348 3.567821 68 | 0.188617 3.976420 69 | 0.416655 4.928907 70 | 0.192995 3.978365 71 | 0.244888 3.777018 72 | 0.806349 9.685831 73 | 0.417555 4.990148 74 | 0.233805 3.740022 75 | 0.357325 4.325355 76 | 0.190201 3.638493 77 | 0.705127 8.432886 78 | 0.336599 3.868493 79 | 0.473786 5.871813 80 | 0.384794 4.830712 81 | 0.502217 6.117244 82 | 0.788220 9.454959 83 | 0.478773 5.681631 84 | 0.064296 3.642040 85 | 0.332143 3.886628 86 | 0.618869 7.312725 87 | 0.854981 10.306697 88 | 0.570000 6.764615 89 | 0.512739 6.166836 90 | 0.112285 3.545863 91 | 0.723700 8.526944 92 | 0.192256 3.661033 93 | 0.181268 3.678579 94 | 0.196731 3.916622 95 | 0.510342 6.026652 96 | 0.263713 3.723018 97 | 0.141105 3.529595 98 | 0.150262 3.552314 99 | 0.824724 9.973690 100 | 0.588088 6.893128 101 | 0.411291 4.856380 102 | 0.763717 9.199101 103 | 0.212118 3.740024 104 | 0.264587 3.742917 105 | 0.973524 11.683243 106 | 0.250670 3.679117 107 | 0.823460 9.743861 108 | 0.253752 3.781488 109 | 0.838332 10.172180 110 | 0.501156 6.113263 111 | 0.097275 3.472367 112 | 0.667199 7.948868 113 | 0.487320 6.022060 114 | 0.654640 7.809457 115 | 0.906907 10.775188 116 | 0.821941 9.936140 117 | 0.859396 10.428255 118 | 0.078696 3.490510 119 | 0.938092 11.252471 120 | 0.998868 11.863062 121 | 0.025501 3.515624 122 | 0.451806 5.441171 123 | 0.883872 10.498912 124 | 0.583567 6.912334 125 | 0.823688 10.003723 126 | 0.891032 10.818109 127 | 0.879259 10.639263 128 | 0.163007 3.662715 129 | 0.344263 4.169705 130 | 0.796083 9.422591 131 | 0.903683 10.978834 132 | 0.050129 3.575105 133 | 0.605553 7.306014 134 | 0.628951 7.556742 135 | 0.877052 10.444055 136 | 0.829402 9.856432 137 | 0.121422 3.638276 138 | 0.721517 8.663569 139 | 0.066532 3.673471 140 | 0.996587 11.782002 141 | 0.653384 7.804568 142 | 0.739494 8.817809 143 | 0.640341 7.636812 144 | 0.337828 3.971613 145 | 0.220512 3.713645 146 | 0.368815 4.381696 147 | 0.782509 9.349428 148 | 0.645825 7.790882 149 | 0.277391 3.834258 150 | 0.092569 3.643274 151 | 0.284320 3.609353 152 | 0.344465 4.023259 153 | 0.182523 3.749195 154 | 0.385001 4.426970 155 | 0.747609 8.966676 156 | 0.188907 3.711018 157 | 0.806244 9.610438 158 | 0.014211 3.517818 159 | 0.574813 7.040672 160 | 0.714500 8.525624 161 | 0.538982 6.393940 162 | 0.384638 4.649362 163 | 0.915586 10.936577 164 | 0.883513 10.441493 165 | 0.804148 9.742851 166 | 0.466011 5.833439 167 | 0.800574 9.638874 168 | 0.654980 8.028558 169 | 0.348564 4.064616 170 | 0.978595 11.720218 171 | 0.915906 10.833902 172 | 0.285477 3.818961 173 | 0.988631 11.684010 174 | 0.531069 6.305005 175 | 0.181658 3.806995 176 | 0.039657 3.356861 177 | 0.893344 10.776799 178 | 0.355214 4.263666 179 | 0.783508 9.475445 180 | 0.039768 3.429691 181 | 0.546308 6.472749 182 | 0.786882 9.398951 183 | 0.168282 3.564189 184 | 0.374900 4.399040 185 | 0.737767 8.888536 186 | 0.059849 3.431537 187 | 0.861891 10.246888 188 | 0.597578 7.112627 189 | 0.126050 3.611641 190 | 0.074795 3.609222 191 | 0.634401 7.627416 192 | 0.831633 9.926548 193 | 0.019095 3.470285 194 | 0.396533 4.773104 195 | 0.794973 9.492009 196 | 0.889088 10.420003 197 | 0.003174 3.587139 198 | 0.176767 3.554071 199 | 0.943730 11.227731 200 | 0.758564 8.885337 201 | -------------------------------------------------------------------------------- /TreesRegression/expTest.txt: -------------------------------------------------------------------------------- 1 | 0.042621 0.705087 2 | 0.140649 1.676077 3 | 0.729711 98.287450 4 | 0.420368 0.893020 5 | 0.055112 -1.784342 6 | 0.335700 -2.039774 7 | 0.480745 -1.165972 8 | 0.039408 -2.453546 9 | 0.713000 99.181124 10 | 0.437107 2.288551 11 | 0.553328 99.909260 12 | 0.146352 -3.900741 13 | 0.753615 97.640436 14 | 0.739062 100.411664 15 | 0.391077 0.380562 16 | 0.887119 102.018433 17 | 0.090234 -1.872570 18 | 0.870459 97.253294 19 | 0.174066 0.716029 20 | 0.698476 96.591450 21 | 0.463064 0.197371 22 | 0.201708 -3.424533 23 | 0.335499 -2.823621 24 | 0.873611 101.105294 25 | 0.315239 1.893852 26 | 0.258688 -0.604888 27 | 0.331030 2.185822 28 | 0.938692 98.758321 29 | 0.390971 5.619469 30 | 0.946373 101.358201 31 | 0.841116 100.136301 32 | 0.652268 101.167615 33 | 0.488903 1.912745 34 | 0.076776 0.631315 35 | 0.078587 -0.173226 36 | 0.690439 103.351735 37 | 0.992771 99.322329 38 | 0.357646 -1.662827 39 | 0.996224 100.969483 40 | 0.431983 -2.332204 41 | 0.084956 -0.153660 42 | 0.416978 -3.185275 43 | 0.483920 -0.400342 44 | 0.351282 -0.212100 45 | 0.696687 100.399345 46 | 0.610816 100.447063 47 | 0.876386 97.717446 48 | 0.290065 -1.402790 49 | 0.561540 97.719979 50 | 0.521387 102.671802 51 | 0.124250 -1.447424 52 | 0.760795 100.973153 53 | 0.813137 98.418078 54 | 0.322203 -0.210448 55 | 0.222080 -2.382876 56 | 0.012078 0.145758 57 | 0.215864 -1.753234 58 | 0.286381 -0.029690 59 | 0.504148 100.382630 60 | 0.853875 97.561672 61 | 0.077604 1.836922 62 | 0.533825 100.804076 63 | 0.197164 -1.982653 64 | 0.915268 96.773211 65 | 0.637298 98.012823 66 | 0.222793 0.879413 67 | 0.403267 1.696757 68 | 0.365798 -1.228388 69 | 0.470756 -3.196883 70 | 0.007890 -0.725592 71 | 0.348122 3.658900 72 | 0.816112 102.003904 73 | 0.752076 101.766783 74 | 0.722139 99.311245 75 | 0.050637 -0.053007 76 | 0.794114 96.183380 77 | 0.416684 -2.133790 78 | 0.019078 -2.772976 79 | 0.875982 99.771033 80 | 0.393920 -0.334077 81 | 0.240991 -1.351481 82 | 0.975677 98.774986 83 | 0.790547 99.321853 84 | 0.437987 -1.925655 85 | 0.164944 1.045779 86 | 0.197404 0.812910 87 | 0.679754 101.643453 88 | 0.579659 101.453164 89 | 0.022060 -0.116585 90 | 0.181261 -2.269127 91 | 0.223999 -2.179047 92 | 0.409925 1.365931 93 | 0.360634 -4.286442 94 | 0.164986 -0.749713 95 | 0.583409 99.378572 96 | 0.741431 102.861904 97 | 0.494034 -1.145858 98 | 0.411789 2.687350 99 | 0.940651 102.052953 100 | 0.680743 99.299124 101 | 0.453674 -3.107414 102 | 0.164892 1.666987 103 | 0.778335 99.863542 104 | 0.336990 0.938736 105 | 0.501560 101.008483 106 | 0.855588 101.125709 107 | 0.654224 100.980805 108 | 0.653707 98.019920 109 | 0.588863 96.945577 110 | 0.385631 3.146359 111 | 0.050457 -0.106757 112 | 0.822597 100.607049 113 | 0.208452 -0.460245 114 | 0.040589 0.069251 115 | 0.731871 104.981635 116 | 0.427191 -3.934995 117 | 0.623521 97.676660 118 | 0.203501 -0.529907 119 | 0.181543 0.705354 120 | 0.289069 1.085134 121 | 0.652419 98.896461 122 | 0.111964 3.514297 123 | 0.277014 2.301090 124 | 0.497381 -1.877630 125 | 0.994973 98.092916 126 | 0.084255 3.147329 127 | 0.084836 -2.263086 128 | 0.629725 103.448042 129 | 0.741841 99.908137 130 | 0.788823 99.790969 131 | 0.063125 -2.847334 132 | 0.413608 -2.245895 133 | 0.527976 101.466569 134 | 0.596276 101.079191 135 | 0.845748 100.308275 136 | 0.976452 100.197745 137 | 0.475051 2.563985 138 | 0.694542 99.125422 139 | 0.390583 -1.652652 140 | 0.580233 99.861938 141 | 0.622445 97.933787 142 | 0.744950 102.392552 143 | 0.414662 -1.727387 144 | 0.648774 101.371751 145 | 0.013468 -1.718182 146 | 0.781245 98.393098 147 | 0.871697 103.241025 148 | 0.198555 0.407556 149 | 0.427669 -1.826682 150 | 0.281457 0.137682 151 | 0.837984 98.909162 152 | 0.424066 1.060564 153 | 0.837252 100.688719 154 | 0.369463 1.061182 155 | 0.034532 -0.423989 156 | 0.481137 -0.008675 157 | 0.156752 -0.713391 158 | 0.661411 99.255937 159 | 0.176114 -0.302831 160 | 0.478959 -0.367422 161 | 0.874168 97.783253 162 | 0.167500 -0.829583 163 | 0.864995 99.961025 164 | 0.915850 99.090509 165 | 0.717802 100.059025 166 | 0.497465 -2.379605 167 | 0.351879 -1.832181 168 | 0.600021 99.967671 169 | 0.653842 100.114605 170 | 0.235046 -0.002983 171 | 0.608262 99.428381 172 | 0.979362 95.533709 173 | 0.178479 -0.697517 174 | 0.770679 99.313631 175 | 0.605045 101.927861 176 | 0.342313 -1.473575 177 | 0.927246 101.401583 178 | 0.623712 100.875627 179 | 0.764501 97.575820 180 | 0.670568 101.465970 181 | 0.799404 100.978750 182 | 0.999679 95.883283 183 | 0.341203 -2.047895 184 | 0.640206 98.109133 185 | 0.898167 100.648327 186 | 0.538279 97.178557 187 | 0.804254 102.052744 188 | 0.641926 99.911401 189 | 0.248823 -1.025944 190 | 0.830591 100.349505 191 | 0.468414 -2.691770 192 | 0.492944 0.405210 193 | 0.309762 1.995071 194 | 0.951799 99.978873 195 | 0.935369 104.094296 196 | 0.336673 -4.239911 197 | 0.872527 102.585224 198 | 0.837085 103.322194 199 | 0.525039 99.419610 200 | 0.504804 102.986424 201 | -------------------------------------------------------------------------------- /TreesRegression/sine.txt: -------------------------------------------------------------------------------- 1 | 0.190350 0.878049 2 | 0.306657 -0.109413 3 | 0.017568 0.030917 4 | 0.122328 0.951109 5 | 0.076274 0.774632 6 | 0.614127 -0.250042 7 | 0.220722 0.807741 8 | 0.089430 0.840491 9 | 0.278817 0.342210 10 | 0.520287 -0.950301 11 | 0.726976 0.852224 12 | 0.180485 1.141859 13 | 0.801524 1.012061 14 | 0.474273 -1.311226 15 | 0.345116 -0.319911 16 | 0.981951 -0.374203 17 | 0.127349 1.039361 18 | 0.757120 1.040152 19 | 0.345419 -0.429760 20 | 0.314532 -0.075762 21 | 0.250828 0.657169 22 | 0.431255 -0.905443 23 | 0.386669 -0.508875 24 | 0.143794 0.844105 25 | 0.470839 -0.951757 26 | 0.093065 0.785034 27 | 0.205377 0.715400 28 | 0.083329 0.853025 29 | 0.243475 0.699252 30 | 0.062389 0.567589 31 | 0.764116 0.834931 32 | 0.018287 0.199875 33 | 0.973603 -0.359748 34 | 0.458826 -1.113178 35 | 0.511200 -1.082561 36 | 0.712587 0.615108 37 | 0.464745 -0.835752 38 | 0.984328 -0.332495 39 | 0.414291 -0.808822 40 | 0.799551 1.072052 41 | 0.499037 -0.924499 42 | 0.966757 -0.191643 43 | 0.756594 0.991844 44 | 0.444938 -0.969528 45 | 0.410167 -0.773426 46 | 0.532335 -0.631770 47 | 0.343909 -0.313313 48 | 0.854302 0.719307 49 | 0.846882 0.916509 50 | 0.740758 1.009525 51 | 0.150668 0.832433 52 | 0.177606 0.893017 53 | 0.445289 -0.898242 54 | 0.734653 0.787282 55 | 0.559488 -0.663482 56 | 0.232311 0.499122 57 | 0.934435 -0.121533 58 | 0.219089 0.823206 59 | 0.636525 0.053113 60 | 0.307605 0.027500 61 | 0.713198 0.693978 62 | 0.116343 1.242458 63 | 0.680737 0.368910 64 | 0.484730 -0.891940 65 | 0.929408 0.234913 66 | 0.008507 0.103505 67 | 0.872161 0.816191 68 | 0.755530 0.985723 69 | 0.620671 0.026417 70 | 0.472260 -0.967451 71 | 0.257488 0.630100 72 | 0.130654 1.025693 73 | 0.512333 -0.884296 74 | 0.747710 0.849468 75 | 0.669948 0.413745 76 | 0.644856 0.253455 77 | 0.894206 0.482933 78 | 0.820471 0.899981 79 | 0.790796 0.922645 80 | 0.010729 0.032106 81 | 0.846777 0.768675 82 | 0.349175 -0.322929 83 | 0.453662 -0.957712 84 | 0.624017 -0.169913 85 | 0.211074 0.869840 86 | 0.062555 0.607180 87 | 0.739709 0.859793 88 | 0.985896 -0.433632 89 | 0.782088 0.976380 90 | 0.642561 0.147023 91 | 0.779007 0.913765 92 | 0.185631 1.021408 93 | 0.525250 -0.706217 94 | 0.236802 0.564723 95 | 0.440958 -0.993781 96 | 0.397580 -0.708189 97 | 0.823146 0.860086 98 | 0.370173 -0.649231 99 | 0.791675 1.162927 100 | 0.456647 -0.956843 101 | 0.113350 0.850107 102 | 0.351074 -0.306095 103 | 0.182684 0.825728 104 | 0.914034 0.305636 105 | 0.751486 0.898875 106 | 0.216572 0.974637 107 | 0.013273 0.062439 108 | 0.469726 -1.226188 109 | 0.060676 0.599451 110 | 0.776310 0.902315 111 | 0.061648 0.464446 112 | 0.714077 0.947507 113 | 0.559264 -0.715111 114 | 0.121876 0.791703 115 | 0.330586 -0.165819 116 | 0.662909 0.379236 117 | 0.785142 0.967030 118 | 0.161352 0.979553 119 | 0.985215 -0.317699 120 | 0.457734 -0.890725 121 | 0.171574 0.963749 122 | 0.334277 -0.266228 123 | 0.501065 -0.910313 124 | 0.988736 -0.476222 125 | 0.659242 0.218365 126 | 0.359861 -0.338734 127 | 0.790434 0.843387 128 | 0.462458 -0.911647 129 | 0.823012 0.813427 130 | 0.594668 -0.603016 131 | 0.498207 -0.878847 132 | 0.574882 -0.419598 133 | 0.570048 -0.442087 134 | 0.331570 -0.347567 135 | 0.195407 0.822284 136 | 0.814327 0.974355 137 | 0.641925 0.073217 138 | 0.238778 0.657767 139 | 0.400138 -0.715598 140 | 0.670479 0.469662 141 | 0.069076 0.680958 142 | 0.294373 0.145767 143 | 0.025628 0.179822 144 | 0.697772 0.506253 145 | 0.729626 0.786519 146 | 0.293071 0.259997 147 | 0.531802 -1.095833 148 | 0.487338 -1.034481 149 | 0.215780 0.933506 150 | 0.625818 0.103845 151 | 0.179389 0.892237 152 | 0.192552 0.915516 153 | 0.671661 0.330361 154 | 0.952391 -0.060263 155 | 0.795133 0.945157 156 | 0.950494 -0.071855 157 | 0.194894 1.000860 158 | 0.351460 -0.227946 159 | 0.863456 0.648456 160 | 0.945221 -0.045667 161 | 0.779840 0.979954 162 | 0.996606 -0.450501 163 | 0.632184 -0.036506 164 | 0.790898 0.994890 165 | 0.022503 0.386394 166 | 0.318983 -0.152749 167 | 0.369633 -0.423960 168 | 0.157300 0.962858 169 | 0.153223 0.882873 170 | 0.360068 -0.653742 171 | 0.433917 -0.872498 172 | 0.133461 0.879002 173 | 0.757252 1.123667 174 | 0.309391 -0.102064 175 | 0.195586 0.925339 176 | 0.240259 0.689117 177 | 0.340591 -0.455040 178 | 0.243436 0.415760 179 | 0.612755 -0.180844 180 | 0.089407 0.723702 181 | 0.469695 -0.987859 182 | 0.943560 -0.097303 183 | 0.177241 0.918082 184 | 0.317756 -0.222902 185 | 0.515337 -0.733668 186 | 0.344773 -0.256893 187 | 0.537029 -0.797272 188 | 0.626878 0.048719 189 | 0.208940 0.836531 190 | 0.470697 -1.080283 191 | 0.054448 0.624676 192 | 0.109230 0.816921 193 | 0.158325 1.044485 194 | 0.976650 -0.309060 195 | 0.643441 0.267336 196 | 0.215841 1.018817 197 | 0.905337 0.409871 198 | 0.154354 0.920009 199 | 0.947922 -0.112378 200 | 0.201391 0.768894 201 | -------------------------------------------------------------------------------- /greenhat.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2015 Angus H. (4148) 2 | # Distributed under the GNU General Public License v3.0 (GPLv3). 3 | 4 | from datetime import date, timedelta 5 | from random import randint 6 | from time import sleep 7 | import sys 8 | import subprocess 9 | import os 10 | 11 | # returns a date string for the date that is N days before STARTDATE 12 | def get_date_string(n, startdate): 13 | d = startdate - timedelta(days=n) 14 | rtn = d.strftime("%a %b %d %X %Y %z -0400") 15 | return rtn 16 | 17 | # main app 18 | def main(argv): 19 | if len(argv) < 1 or len(argv) > 2: 20 | print "Error: Bad input." 21 | sys.exit(1) 22 | n = int(argv[0]) 23 | if len(argv) == 1: 24 | startdate = date.today() 25 | if len(argv) == 2: 26 | startdate = date(int(argv[1][0:4]), int( 27 | argv[1][5:7]), int(argv[1][8:10])) 28 | i = 0 29 | while i <= n: 30 | curdate = get_date_string(i, startdate) 31 | num_commits = randint(1, 10) 32 | for commit in range(0, num_commits): 33 | subprocess.call("echo '" + curdate + str(randint(0, 1000000)) + "' > realwork.txt; git add -A; GIT_AUTHOR_DATE='" + 34 | curdate + "' GIT_COMMITTER_DATE='" + curdate + "' git commit -m 'update'; git push;", shell=True) 35 | sleep(.5) 36 | i += 1 37 | 38 | if __name__ == "__main__": 39 | main(sys.argv[1:]) 40 | -------------------------------------------------------------------------------- /htmlProcess.py: -------------------------------------------------------------------------------- 1 | from __future__ import division 2 | __author__ = 'wangjiewen' 3 | 4 | import BeautifulSoup 5 | import nltk, re, pprint 6 | from urllib import urlopen 7 | 8 | 9 | def test_parse_html(): 10 | urlpath = 'http://www.superlib.cn' 11 | html = urlopen(urlpath).read() 12 | soup = BeautifulSoup.BeautifulSoup(html.decode('utf-8')) 13 | title = soup.find('title') 14 | print soup.title.text 15 | print(title.name) 16 | print(title.text) 17 | 18 | detail = soup.find('div', {'class': 'detail'}) 19 | span = detail.find('p') 20 | print span.text 21 | return 22 | 23 | if __name__ == '__main__': 24 | test_parse_html() -------------------------------------------------------------------------------- /realwork.txt: -------------------------------------------------------------------------------- 1 | Sun Oct 04 00:00:00 2015 -0400677583 2 | -------------------------------------------------------------------------------- /taskAssignment.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | 4 | def loadData(): 5 | # mat = [ 6 | # [13, 16, 12, 11], 7 | # [15, 17, 12, 12], 8 | # [14, 14, 13, 13], 9 | # [13, 10, 10, 11] 10 | # ] 11 | mat = [ 12 | [9, 2, 7, 8], 13 | [6, 4, 3, 7], 14 | [5, 8, 1, 8], 15 | [7, 6, 9, 4] 16 | ] 17 | return mat 18 | 19 | def test(): 20 | from munkres import Munkres, print_matrix 21 | 22 | matrix = loadData() 23 | 24 | m = Munkres() 25 | indexes = m.compute(matrix) 26 | print_matrix(matrix, msg='Lowest cost through this matrix:') 27 | total = 0 28 | for row, column in indexes: 29 | value = matrix[row][column] 30 | total += value 31 | print '(%d, %d) -> %d' % (row, column, value) 32 | print 'total cost: %d' % total 33 | 34 | 35 | 36 | if __name__ == '__main__': 37 | test() 38 | -------------------------------------------------------------------------------- /test.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | 4 | def main(): 5 | D = [0, 0, 1, 1] 6 | X = [ 7 | [1, 2], 8 | [3, 4], 9 | [5, 6], 10 | [7, 8] 11 | ] 12 | C = [ 13 | [2, 3], 14 | [6, 7] 15 | ] 16 | D = np.mat(D).T 17 | X = np.mat(X) 18 | C = np.mat(C) 19 | print C[D] 20 | # print X[(D[:, 0].A == 1)] 21 | # print np.nonzero(D[:, 0].A == 0) 22 | # print X[np.nonzero(D[:, 0].A == 1)[0]] 23 | return 24 | 25 | 26 | 27 | if __name__ == '__main__': 28 | main() --------------------------------------------------------------------------------