├── Base
    ├── books
    │   ├── JTZHBC
    │   │   ├── 2.recommendation
    │   │   │   └── recommendations.py
    │   │   ├── 3.discovery
    │   │   │   ├── 3test.py
    │   │   │   ├── clusters.py
    │   │   │   └── generatefeedvector.py
    │   │   ├── 4.searchengine
    │   │   │   ├── nn.py
    │   │   │   └── searchengine.py
    │   │   ├── 5.optimization
    │   │   │   ├── dorm.py
    │   │   │   ├── optimization.py
    │   │   │   ├── socialnetwork.py
    │   │   │   └── test.py
    │   │   └── readme.md
    │   └── ML_in_action
    │   │   ├── AdaBoost
    │   │       ├── adaboost.py
    │   │       └── test.py
    │   │   ├── Bayes
    │   │       ├── bayes.py
    │   │       └── test.py
    │   │   ├── DecisionTree
    │   │       ├── test.py
    │   │       ├── treePlotter.py
    │   │       └── trees.py
    │   │   ├── Logistic
    │   │       ├── logRegres.py
    │   │       ├── test.py
    │   │       └── testSet.txt
    │   │   ├── Regression
    │   │       ├── regression.py
    │   │       └── test.py
    │   │   ├── SVM
    │   │       ├── svm.py
    │   │       └── test.py
    │   │   ├── kMeans
    │   │       ├── kMeans.py
    │   │       └── test.py
    │   │   ├── kNN
    │   │       ├── kNN.py
    │   │       └── test.py
    │   │   └── readme.md
    ├── challenge
    │   ├── AIchallenge
    │   │   ├── AI_pic2text_pre.ipynb
    │   │   ├── AI_pic2text_pre2.ipynb
    │   │   └── AI_pic2text_pre3.ipynb
    │   ├── DataFountain
    │   │   └── writing_classify
    │   │   │   └── pre_1.ipynb
    │   ├── biendata
    │   │   └── mobike
    │   │   │   ├── example_learn_mobike.ipynb
    │   │   │   └── mobike_mine.ipynb
    │   ├── kaggle
    │   │   ├── HousePrices
    │   │   │   ├── House Prices.ipynb
    │   │   │   ├── predictions.csv
    │   │   │   ├── test.csv
    │   │   │   └── train.csv
    │   │   ├── Titanic
    │   │   │   ├── Titanic.ipynb
    │   │   │   ├── pre.csv
    │   │   │   ├── test.csv
    │   │   │   └── train.csv
    │   │   ├── readme.md
    │   │   └── searchrelevance.pdf
    │   └── tianchi
    │   │   └── shop_location
    │   │       ├── baseline1.ipynb
    │   │       ├── baseline2.ipynb
    │   │       └── shop_pre1.ipynb
    ├── courses
    │   ├── DL_AndrewNg
    │   │   ├── .DS_Store
    │   │   ├── README.md
    │   │   ├── course1
    │   │   │   ├── BuildingDNNv3.ipynb
    │   │   │   ├── DNNApplicationv3.ipynb
    │   │   │   ├── LRwithNN.ipynb
    │   │   │   └── week3NN.ipynb
    │   │   ├── course2
    │   │   │   ├── GradientChecking.ipynb
    │   │   │   ├── Initialization.ipynb
    │   │   │   ├── OptimizationMethods.ipynb
    │   │   │   ├── Regularization.ipynb
    │   │   │   └── TensorflowTutorial.ipynb
    │   │   ├── course4
    │   │   │   ├── .DS_Store
    │   │   │   ├── ArtGenerationwithNeuralStyleTransfer2.ipynb
    │   │   │   ├── AutonomousdrivingapplicationCardetectionv3.ipynb
    │   │   │   ├── ConvolutionmodelApplicationv1.ipynb
    │   │   │   ├── ConvolutionmodelStepbyStepv2.ipynb
    │   │   │   ├── FaceRecognitionfortheHappyHousev3.ipynb
    │   │   │   ├── KerasTutorialHappyHousev2.ipynb
    │   │   │   └── ResidualNetworksv2.ipynb
    │   │   └── course5
    │   │   │   ├── .DS_Store
    │   │   │   ├── BuildingaRecurrentNeuralNetworkStepbyStepv3.ipynb
    │   │   │   ├── DinosaurusIslandCharacterlevellanguagemodelfinalv3.ipynb
    │   │   │   ├── Emojifyv2.ipynb
    │   │   │   ├── ImproviseaJazzSolowithanLSTMNetworkv3.ipynb
    │   │   │   ├── Neuralmachinetranslationwithattentionv3.ipynb
    │   │   │   ├── Operationsonwordvectorsv2.ipynb
    │   │   │   ├── Triggerworddetectionv1.ipynb
    │   │   │   └── rnn_utils.py
    │   ├── coursera_ML
    │   │   ├── ex1_liner
    │   │   │   ├── computeCost.m
    │   │   │   ├── ex1.m
    │   │   │   └── gradientDescent.m
    │   │   ├── ex2_logistc
    │   │   │   ├── costFunction.m
    │   │   │   ├── costFunctionReg.m
    │   │   │   ├── ex2.m
    │   │   │   ├── ex2_reg.m
    │   │   │   ├── plotData.m
    │   │   │   ├── predict.m
    │   │   │   └── sigmoid.m
    │   │   ├── ex3_nn
    │   │   │   ├── lrCostFunction.m
    │   │   │   ├── oneVsAll.m
    │   │   │   ├── predict.m
    │   │   │   └── predictOneVsAll.m
    │   │   └── readme.md
    │   ├── cs231n
    │   │   ├── README.md
    │   │   ├── assignment1
    │   │   │   ├── .gitignore
    │   │   │   ├── .ipynb_checkpoints
    │   │   │   │   ├── features-checkpoint.ipynb
    │   │   │   │   ├── knn-checkpoint.ipynb
    │   │   │   │   ├── softmax-checkpoint.ipynb
    │   │   │   │   ├── svm-checkpoint.ipynb
    │   │   │   │   └── two_layer_net-checkpoint.ipynb
    │   │   │   ├── README.md
    │   │   │   ├── collectSubmission.sh
    │   │   │   ├── cs231n
    │   │   │   │   ├── __init__.py
    │   │   │   │   ├── classifiers
    │   │   │   │   │   ├── __init__.py
    │   │   │   │   │   ├── k_nearest_neighbor.py
    │   │   │   │   │   ├── linear_classifier.py
    │   │   │   │   │   ├── linear_svm.py
    │   │   │   │   │   ├── neural_net.py
    │   │   │   │   │   └── softmax.py
    │   │   │   │   ├── data_utils.py
    │   │   │   │   ├── datasets
    │   │   │   │   │   ├── .gitignore
    │   │   │   │   │   └── get_datasets.sh
    │   │   │   │   ├── features.py
    │   │   │   │   ├── gradient_check.py
    │   │   │   │   └── vis_utils.py
    │   │   │   ├── features.ipynb
    │   │   │   ├── frameworkpython
    │   │   │   ├── knn.ipynb
    │   │   │   ├── setup_googlecloud.sh
    │   │   │   ├── softmax.ipynb
    │   │   │   ├── start_ipython_osx.sh
    │   │   │   ├── svm.ipynb
    │   │   │   └── two_layer_net.ipynb
    │   │   └── note
    │   │   │   └── inverted_dropout.py
    │   └── qiyuezaixian
    │   │   ├── .DS_Store
    │   │   ├── README.md
    │   │   ├── course1
    │   │       ├── PythonRegEx.html
    │   │       ├── jieba.html
    │   │       └── string_operation.ipynb
    │   │   ├── course2
    │   │       ├── Ngram.html
    │   │       ├── bayes.html
    │   │       ├── bayesClassfierNews.html
    │   │       ├── bayesDetector.html
    │   │       └── stopwords_cn.txt
    │   │   ├── course3
    │   │       └── HillaryEmail.ipynb
    │   │   └── course5
    │   │       └── HMM_POS_TAG.html
    ├── frameworks
    │   ├── caffe
    │   │   ├── code
    │   │   │   ├── ssd_detection_output_layer.py
    │   │   │   └── test_ssd.py
    │   │   ├── docs
    │   │   │   └── ubuntu18_anaconda3_py27_cpu_COMPILE_CAFFE.md
    │   │   ├── project
    │   │   │   └── caffe_ssd_write_layer
    │   │   │   │   ├── 3.jpg
    │   │   │   │   ├── caffe_ssd_deploy.prototxt
    │   │   │   │   ├── caffe_ssd_deploy2.prototxt
    │   │   │   │   ├── dog_bike_car.jpg
    │   │   │   │   ├── img
    │   │   │   │       └── two_faces_300.jpg
    │   │   │   │   ├── nnie_ssd_deploy.prototxt
    │   │   │   │   ├── res.jpg
    │   │   │   │   ├── res222.jpg
    │   │   │   │   ├── test_caffe_ssd.py
    │   │   │   │   ├── test_img.jpg
    │   │   │   │   ├── test_img2.jpg
    │   │   │   │   ├── test_img3.jpg
    │   │   │   │   ├── test_ssd_concat.py
    │   │   │   │   ├── test_ssd_detection_output.py
    │   │   │   │   ├── test_ssd_priorbox.py
    │   │   │   │   ├── test_ssd_priorbox_originSSD.py
    │   │   │   │   ├── two_faces_300.jpg
    │   │   │   │   ├── yufacedetectnet-open-v1-concat.prototxt
    │   │   │   │   ├── yufacedetectnet-open-v1-detection_output.caffemodel
    │   │   │   │   ├── yufacedetectnet-open-v1-detection_output.prototxt
    │   │   │   │   ├── yufacedetectnet-open-v1-priorbox.prototxt
    │   │   │   │   ├── yufacedetectnet-open-v1.caffemodel
    │   │   │   │   ├── yufacedetectnet-open-v1.prototxt
    │   │   │   │   ├── yufacedetectnet-open-v1_my.caffemodel
    │   │   │   │   ├── yufacedetectnet-open-v1_my.prototxt
    │   │   │   │   ├── yufacedetectnet-open-v1_new.caffemodel
    │   │   │   │   └── yufacedetectnet-open-v1_new.prototxt
    │   │   └── readme.md
    │   ├── keras
    │   │   ├── .DS_Store
    │   │   ├── baseline
    │   │   │   ├── main.py
    │   │   │   ├── my_data.py
    │   │   │   └── my_model.py
    │   │   ├── data
    │   │   │   ├── 0_0.png
    │   │   │   └── 2_100.png
    │   │   ├── demo
    │   │   │   ├── .DS_Store
    │   │   │   ├── Keras_GAN.ipynb
    │   │   │   ├── RNN_classify.ipynb
    │   │   │   ├── Word_Language_Modelling_LSTM.ipynb
    │   │   │   ├── cam_heatmap.py
    │   │   │   ├── classify_focal_loss.py
    │   │   │   ├── clearData.py
    │   │   │   ├── data_aug.py
    │   │   │   ├── data_generator.py
    │   │   │   ├── fmeasure_metric.py
    │   │   │   ├── h5_customer_to_tflite.py
    │   │   │   ├── h5_to_ckpt.py
    │   │   │   ├── h5_to_pb.py
    │   │   │   ├── h5_to_tflite.py
    │   │   │   ├── keras_cifar10.ipynb
    │   │   │   ├── keras_mnist.ipynb
    │   │   │   ├── keras_net.py
    │   │   │   ├── layer_trainable.py
    │   │   │   ├── lstm_word_embedding.ipynb
    │   │   │   ├── multi_output_class_weight.py
    │   │   │   ├── pretrain.py
    │   │   │   ├── show_keras_data.py
    │   │   │   └── tflite_pre.py
    │   │   ├── keras_example.ipynb
    │   │   ├── note
    │   │   │   ├── .DS_Store
    │   │   │   └── keras_multiGPU.md
    │   │   ├── project
    │   │   │   ├── .DS_Store
    │   │   │   ├── 3D_predict.py
    │   │   │   ├── Caipiao_nn.ipynb
    │   │   │   ├── history3D.txt
    │   │   │   └── plate_color.ipynb
    │   │   └── readme.md
    │   ├── mxnet
    │   │   └── load_pre_demo.py
    │   ├── pytorch
    │   │   ├── IOU_balanced.py
    │   │   ├── IoU_loss.py
    │   │   ├── demo
    │   │   │   ├── CEloss.py
    │   │   │   ├── onnx_pre.py
    │   │   │   └── show_pth_data.py
    │   │   ├── practice
    │   │   │   ├── 60分钟入门PyTorch-0.目录.ipynb
    │   │   │   ├── 60分钟入门PyTorch-1.PyTorch是什么？.ipynb
    │   │   │   ├── 60分钟入门PyTorch-2.AUTOGRAD.ipynb
    │   │   │   ├── 60分钟入门PyTorch-3.神经网络.ipynb
    │   │   │   ├── 60分钟入门PyTorch-4.训练一个分类器.ipynb
    │   │   │   ├── 60分钟入门PyTorch-5.数据并行.ipynb
    │   │   │   ├── gan_pytorch.py
    │   │   │   ├── mnist_demo.py
    │   │   │   ├── pytorch_example.ipynb
    │   │   │   └── pytorch_lstm.ipynb
    │   │   └── readme.md
    │   └── tensorflow
    │   │   ├── .DS_Store
    │   │   ├── basic
    │   │       ├── .DS_Store
    │   │       ├── Learn_tf.ipynb
    │   │       ├── TFLiteModelMaker
    │   │       │   ├── README.md
    │   │       │   └── train.py
    │   │       ├── TensorFlowExample.ipynb
    │   │       ├── ckpt2pb.py
    │   │       ├── ckpt_pre.py
    │   │       ├── onnx_pre.py
    │   │       ├── pb2tflite.py
    │   │       ├── pruned_demo.py
    │   │       ├── read_pb.py
    │   │       ├── tf_pb_pre.py
    │   │       ├── tf_save_load.ipynb
    │   │       ├── tflite_pre.py
    │   │       └── tflite_show_middle_output.py
    │   │   ├── demo
    │   │       ├── .DS_Store
    │   │       ├── TF_logsitic.ipynb
    │   │       ├── basic_mnist_demo.py
    │   │       ├── mnist_cnn_demo.py
    │   │       └── ten_people_face_reconize
    │   │       │   ├── .DS_Store
    │   │       │   ├── main.py
    │   │       │   ├── model
    │   │       │       └── .DS_Store
    │   │       │   ├── olivettifaces.gif
    │   │       │   └── result.png
    │   │   └── readme.md
    └── tools
    │   ├── lightgbm
    │       ├── readme.md
    │       ├── simpleexample.py
    │       └── sklearnexample.py
    │   ├── scikit-learn
    │       ├── .DS_Store
    │       ├── README.md
    │       ├── choose.png
    │       ├── demo
    │       │   ├── kmeans_color.py
    │       │   └── tSNE.py
    │       ├── ex2data1.txt
    │       ├── pearsonr.ipynb
    │       ├── sklearn_LR.py
    │       └── useful.py
    │   ├── spark
    │       ├── .DS_Store
    │       ├── README.md
    │       ├── learnsparkLDA
    │       │   ├── .DS_Store
    │       │   ├── learn_sparkRDD.ipynb
    │       │   ├── spark_MLlib.ipynb
    │       │   ├── spark_pairRDD.ipynb
    │       │   ├── spark_saveload.ipynb
    │       │   └── spark_uplevel.ipynb
    │       └── start.py
    │   └── xgboost
    │       ├── readme.md
    │       ├── xgboost.ipynb
    │       └── xgboost_multi.ipynb
├── CV
    ├── codes
    │   ├── IOU.py
    │   ├── flickr_to_voc.py
    │   ├── label_smoothing.py
    │   ├── makeVOCDirs.py
    │   ├── nms.py
    │   ├── pascalVOC2csv.py
    │   ├── show_voc_box.py
    │   ├── simple_mixup.py
    │   ├── to_coco_person17.py
    │   ├── txt2xml.py
    │   └── updateTXT.py
    ├── knowledge.md
    ├── nets
    │   ├── .DS_Store
    │   ├── alexnet
    │   │   ├── .DS_Store
    │   │   ├── README.md
    │   │   ├── alexnet.jpg
    │   │   └── keras_alexnet.py
    │   ├── lenet5
    │   │   ├── .DS_Store
    │   │   ├── README.md
    │   │   ├── keras_lenet5.py
    │   │   └── lenet5.jpg
    │   └── vgg
    │   │   ├── .DS_Store
    │   │   ├── README.md
    │   │   ├── keras_vgg.py
    │   │   ├── vgg.jpg
    │   │   └── vgg16.jpg
    └── note
    │   ├── .DS_Store
    │   ├── DCNN_book_note.md
    │   ├── chineseocr-ctpn-densenet.md
    │   ├── cptn.jpg
    │   ├── ctc.jpg
    │   ├── denseblock.jpg
    │   ├── densenet.jpg
    │   ├── handwrite_ocr_note.md
    │   ├── nms.jpg
    │   ├── vgg1.jpg
    │   ├── vgg2.jpg
    │   ├── vgg3.jpg
    │   └── vgg4.jpg
├── DIY
    ├── .DS_Store
    ├── Adaboost.ipynb
    ├── CRF.ipynb
    ├── DecisionTree.ipynb
    ├── EM.ipynb
    ├── HMM.ipynb
    ├── IOU.py
    ├── LR.ipynb
    ├── NN.ipynb
    ├── NaiveBayes.ipynb
    ├── PCA.ipynb
    ├── ROC_AUC.ipynb
    ├── SVM.ipynb
    ├── Stacking.py
    ├── ex1_py_liner.ipynb
    ├── ex1data1.txt
    ├── kMeans.ipynb
    ├── kNN.ipynb
    ├── lenses.txt
    ├── perceptron.ipynb
    ├── tryStacking.ipynb
    └── yoloF1.py
├── DM
    ├── knowledge.md
    └── note
    │   ├── .DS_Store
    │   ├── FeatureEngneering.md
    │   └── img
    │       └── fe.jpg
├── NLP
    ├── .DS_Store
    ├── codes
    │   └── re.ipynb
    ├── knowledge.md
    └── tools
    │   ├── NLPIR
    │       └── Start.py
    │   ├── gensim
    │       ├── load_w2v_ch.py
    │       ├── process_wiki_data.py
    │       ├── readme.md
    │       ├── test_word2vec.ipynb
    │       └── train_word2vec_model.py
    │   ├── jieba
    │       ├── cixing.py
    │       ├── if-idf.py
    │       ├── jieba_cut.ipynb
    │       ├── jieba_cut_ngram.py
    │       ├── read_save.ipynb
    │       └── readme.md
    │   ├── lda
    │       └── lda.ipynb
    │   ├── nltk
    │       ├── func
    │       │   ├── nltk_FreqDist.ipynb
    │       │   └── tokenize_text_html.ipynb
    │       ├── practice
    │       │   ├── Sentiment_analysis.ipynb
    │       │   ├── TF-IDF.ipynb
    │       │   ├── Text_similarity.ipynb
    │       │   └── wordsNormalization.ipynb
    │       └── readme.md
    │   └── word2vec
    │       ├── .DS_Store
    │       ├── readme.md
    │       └── word2vec_start.ipynb
├── Others
    ├── .DS_Store
    └── infos
    │   └── README.md
└── README.md


/Base/books/JTZHBC/3.discovery/3test.py:
--------------------------------------------------------------------------------
 1 | #coding:utf-8
 2 | import clusters
 3 | blognames,words,data = clusters.readfile('blogdata.txt')
 4 | clust = clusters.hcluster(data)
 5 | 
 6 | #显示聚类树
 7 | #clusters.printclust(clust,labels=blognames)
 8 | 
 9 | #生成聚类图
10 | #clusters.drawdendrogram(clust,blognames,jpeg='blogclust.jpg')
11 | 
12 | #多维缩放
13 | coords = clusters.scaledown(data)
14 | clusters.draw2d(coords,blognames,jpeg='blogs2d.jpg')
15 | 


--------------------------------------------------------------------------------
/Base/books/JTZHBC/3.discovery/generatefeedvector.py:
--------------------------------------------------------------------------------
 1 | #coding:utf-8
 2 | import feedparser
 3 | import re
 4 | 
 5 | # 返回一个RSS订阅源的标题和包含单词计数情况的字典
 6 | def getwordcounts(url):
 7 |   # Parse the feed 解析订阅源
 8 |   d=feedparser.parse(url)
 9 |   wc={}
10 | 
11 |   # Loop over all the entries循环遍历所有文章条目
12 |   for e in d.entries:
13 |     if 'summary' in e: summary=e.summary
14 |     else: summary=e.description
15 | 
16 |     # Extract a list of words提取一个单词列表
17 |     words=getwords(e.title+' '+summary)
18 |     for word in words:
19 |       wc.setdefault(word,0)
20 |       #setdefault() 函数和get() 方法类似, 如果键不存在于字典中，将会添加键并将值设为默认值。
21 |       wc[word]+=1
22 |   return d.feed.title,wc
23 | 
24 | def getwords(html):
25 |   # Remove all the HTML tags 去除所有HTML标记
26 |   txt=re.compile(r'<[^>]+>').sub('',html)
27 |   #re.sub(a,b,x)用作把x中的a替换为b，这里没有b，应该就是删除空格
28 |   
29 |   # Split words by all non-alpha characters利用所有非字母字符拆分出单词
30 |   words=re.compile(r'[^A-Z^a-z]+').split(txt)
31 | 
32 |   # Convert to lowercase转化小写
33 |   return [word.lower() for word in words if word!='']
34 | 
35 | 
36 | apcount={}
37 | wordcounts={}
38 | feedlist=[line for line in file('feedlist.txt')] #循环遍历订阅源
39 | #不用read直接读取txt文件
40 | for feedurl in feedlist:
41 |   try:
42 |     title,wc=getwordcounts(feedurl) #得到每篇文章的词和次数
43 |     wordcounts[title]=wc
44 |     for word,count in wc.items(): #items()方法返回字典的(键，值)元组对的列表 / 试了下，不加items会报错
45 |       apcount.setdefault(word,0)
46 |       if count>1:
47 |         apcount[word]+=1
48 |   except:
49 |     print 'Failed to parse feed %s' % feedurl
50 | 
51 | wordlist=[]
52 | for w,bc in apcount.items(): #items()方法返回字典的(键，值)元组对的列表
53 |   frac=float(bc)/len(feedlist)
54 |   if frac>0.1 and frac<0.5: #去掉出现频率太高太低的词
55 |     wordlist.append(w)
56 | 
57 | out=file('blogdata1.txt','w')
58 | out.write('Blog')
59 | for word in wordlist: out.write('\t%s' % word)
60 | out.write('\n')
61 | for blog,wc in wordcounts.items():
62 |   print blog
63 |   out.write(blog)
64 |   for word in wordlist:
65 |     if word in wc: out.write('\t%d' % wc[word])
66 |     else: out.write('\t0')
67 |   out.write('\n')
68 | 


--------------------------------------------------------------------------------
/Base/books/JTZHBC/5.optimization/dorm.py:
--------------------------------------------------------------------------------
 1 | #coding:utf-8
 2 | #分配宿舍
 3 | 
 4 | import random
 5 | import math
 6 | 
 7 | # The dorms, each of which has two available spaces
 8 | dorms=['Zeus','Athena','Hercules','Bacchus','Pluto']
 9 | 
10 | # People, along with their first and second choices
11 | prefs=[('Toby', ('Bacchus', 'Hercules')),
12 |        ('Steve', ('Zeus', 'Pluto')),
13 |        ('Karen', ('Athena', 'Zeus')),
14 |        ('Sarah', ('Zeus', 'Pluto')),
15 |        ('Dave', ('Athena', 'Bacchus')), 
16 |        ('Jeff', ('Hercules', 'Pluto')), 
17 |        ('Fred', ('Pluto', 'Athena')), 
18 |        ('Suzie', ('Bacchus', 'Hercules')), 
19 |        ('Laura', ('Bacchus', 'Hercules')), 
20 |        ('James', ('Hercules', 'Athena'))]
21 | 
22 | # [(0,9),(0,8),(0,7),(0,6),...,(0,0)]
23 | domain=[(0,(len(dorms)*2)-i-1) for i in range(0,len(dorms)*2)] #这个列表推导式不错
24 | 
25 | def printsolution(vec): #把数字列表打印成选择宿舍的情况
26 |   slots=[]
27 |   # Create two slots for each dorm
28 |   for i in range(len(dorms)): slots+=[i,i]
29 | 
30 |   # Loop over each students assignment
31 |   for i in range(len(vec)):
32 |     x=int(vec[i])
33 | 
34 |     # Choose the slot from the remaining ones
35 |     dorm=dorms[slots[x]]
36 |     # Show the student and assigned dorm
37 |     print prefs[i][0],dorm
38 |     # Remove this slot
39 |     del slots[x]
40 | 
41 | def dormcost(vec):#成本函数
42 |   cost=0
43 |   # Create list a of slots
44 |   slots=[0,0,1,1,2,2,3,3,4,4]
45 | 
46 |   # Loop over each student
47 |   for i in range(len(vec)):
48 |     x=int(vec[i])
49 |     dorm=dorms[slots[x]]
50 |     pref=prefs[i][1]
51 |     # First choice costs 0, second choice costs 1
52 |     if pref[0]==dorm: cost+=0
53 |     elif pref[1]==dorm: cost+=1
54 |     else: cost+=3
55 |     # Not on the list costs 3
56 | 
57 |     # Remove selected slot
58 |     del slots[x]
59 |     
60 |   return cost
61 | 


--------------------------------------------------------------------------------
/Base/books/JTZHBC/5.optimization/socialnetwork.py:
--------------------------------------------------------------------------------
 1 | #coding:utf-8
 2 | #关系网络可视化
 3 | import math
 4 | 
 5 | people=['Charlie','Augustus','Veruca','Violet','Mike','Joe','Willy','Miranda']
 6 | 
 7 | links=[('Augustus', 'Willy'), 
 8 |        ('Mike', 'Joe'), 
 9 |        ('Miranda', 'Mike'), 
10 |        ('Violet', 'Augustus'), 
11 |        ('Miranda', 'Willy'), 
12 |        ('Charlie', 'Mike'), 
13 |        ('Veruca', 'Joe'), 
14 |        ('Miranda', 'Augustus'), 
15 |        ('Willy', 'Augustus'), 
16 |        ('Joe', 'Charlie'), 
17 |        ('Veruca', 'Augustus'), 
18 |        ('Miranda', 'Joe')]
19 | 
20 | 
21 | def crosscount(v):  #计算交叉线
22 |   # Convert the number list into a dictionary of person:(x,y)
23 |   loc=dict([(people[i],(v[i*2],v[i*2+1])) for i in range(0,len(people))])
24 |   total=0
25 |   
26 |   # Loop through every pair of links
27 |   for i in range(len(links)):
28 |     for j in range(i+1,len(links)):
29 | 
30 |       # Get the locations 
31 |       (x1,y1),(x2,y2)=loc[links[i][0]],loc[links[i][1]]
32 |       (x3,y3),(x4,y4)=loc[links[j][0]],loc[links[j][1]]
33 |       
34 |       den=(y4-y3)*(x2-x1)-(x4-x3)*(y2-y1)
35 | 
36 |       # den==0 if the lines are parallel
37 |       if den==0: continue
38 | 
39 |       # Otherwise ua and ub are the fraction of the
40 |       # line where they cross
41 |       ua=((x4-x3)*(y1-y3)-(y4-y3)*(x1-x3))/den
42 |       ub=((x2-x1)*(y1-y3)-(y2-y1)*(x1-x3))/den
43 |       
44 |       # If the fraction is between 0 and 1 for both lines
45 |       # then they cross each other
46 |       if ua>0 and ua<1 and ub>0 and ub<1:
47 |         total+=1
48 |     for i in range(len(people)):
49 |       for j in range(i+1,len(people)):
50 |         # Get the locations of the two nodes
51 |         (x1,y1),(x2,y2)=loc[people[i]],loc[people[j]]
52 | 
53 |         # Find the distance between them
54 |         dist=math.sqrt(math.pow(x1-x2,2)+math.pow(y1-y2,2))
55 |         # Penalize any nodes closer than 50 pixels
56 |         if dist<50:
57 |           total+=(1.0-(dist/50.0))
58 |         
59 |   return total
60 | from PIL import Image,ImageDraw
61 | 
62 | def drawnetwork(sol):#绘制网络
63 |   # Create the image
64 |   img=Image.new('RGB',(400,400),(255,255,255))
65 |   draw=ImageDraw.Draw(img)
66 | 
67 |   # Create the position dict
68 |   pos=dict([(people[i],(sol[i*2],sol[i*2+1])) for i in range(0,len(people))])
69 | 
70 |   for (a,b) in links:
71 |     draw.line((pos[a],pos[b]),fill=(255,0,0))
72 | 
73 |   for n,p in pos.items():
74 |     draw.text(p,n,(0,0,0))
75 | 
76 |   img.show()
77 | 
78 | 
79 | domain=[(10,370)]*(len(people)*2)
80 | 


--------------------------------------------------------------------------------
/Base/books/JTZHBC/5.optimization/test.py:
--------------------------------------------------------------------------------
 1 | #coding:utf-8
 2 | 
 3 | import optimization
 4 | 
 5 | #s = [1,4,3,2,7,3,6,3,2,4,5,3]
 6 | 
 7 | #print optimization.printschedule(s)
 8 | 
 9 | #print optimization.schedulecost(s)
10 | 
11 | domain = [(0,9)]*len(optimization.people)*2
12 | #s = optimization.randomoptimize(domain,optimization.schedulecost)
13 | 
14 | #s = optimization.hillclimb(domain,optimization.schedulecost)
15 | 
16 | #s = optimization.annealingoptimize(domain,optimization.schedulecost)
17 | 
18 | s = optimization.geneticoptimize(domain,optimization.schedulecost)
19 | 
20 | print optimization.schedulecost(s)
21 | print optimization.printschedule(s)
22 | 
23 | 
24 | '''
25 | 这两种及大多数优化方法都假设：大多数问题，最优解应该接近于其他的最优解。
26 | 但某些特殊情况不一定有效。比如存在陡峭的突变的最优解。
27 | '''
28 | 


--------------------------------------------------------------------------------
/Base/books/JTZHBC/readme.md:
--------------------------------------------------------------------------------
 1 | 
 2 | ### 第2章 提供推荐
 3 | * [影片推荐系统](/JTZHBC/2.recommendation/recommendations.py)
 4 | 
 5 | ### 第3章 发现群组
 6 | * [字词向量](/JTZHBC/3.discovery/generatefeedvector.py)
 7 | * [聚类](/JTZHBC/3.discovery/clusters.py)
 8 | 
 9 | ### 第4章 搜索与排名
10 | * [爬虫与搜索引擎](/JTZHBC/4.searchengine/searchengine.py)
11 | * [神经网络](/JTZHBC/4.searchengine/nn.py)
12 | 
13 | ### 第5章 优化
14 | * [优化](/JTZHBC/5.optimization/optimization.py)  `(爬山法，模拟退火算法，遗传算法)`
15 | * [宿舍分配问题](/JTZHBC/5.optimization/dorm.py)
16 | * [关系网络可视化](/JTZHBC/5.optimization/socialnetwork.py)
17 | 
18 | ### 第6章 文档过滤
19 | 
20 | 


--------------------------------------------------------------------------------
/Base/books/ML_in_action/AdaBoost/adaboost.py:
--------------------------------------------------------------------------------
 1 | #coding:utf-8
 2 | from numpy import *
 3 | 
 4 | #创建简单数据集
 5 | def loadSimpData():
 6 |     dataMat = matrix([[1.,2.1],
 7 |                     [2. ,1.1],
 8 |                     [1.3,1. ],
 9 |                     [1. ,1. ],
10 |                     [2. ,1. ]])
11 |     classLabels = [1.0,1.0,-1.0,-1.0,1.0]
12 |     return dataMat,classLabels
13 | 
14 | #####树桩（单层决策树）分类器
15 | #通过阈值比较对数据进行分类
16 | def stumpClassify(dataMatrix,dimen,threshVal,threshIneq):
17 |     #threshVal阈值    threshIneq不等的类型
18 |     retArray = ones((shape(dataMatrix)[0],1)) #先全部设成1
19 |     if threshIneq == 'lt': #gt 大于greater than/  lt小于 less than
20 |         retArray[dataMatrix[:,dimen] <= threshVal] = -1.0  #数组过滤 / 若是大于，则把第dimen维上小于阈值的设为-1
21 |     else:
22 |         retArray[dataMatrix[:,dimen] > threshVal] = -1.0   # / 若是小于，则把第dimen维上大于阈值的设为-1
23 |     return retArray
24 | 
25 | #遍历上面函数的所有可能输入值，找到数据集上最佳的单层决策树
26 | def buildStump(dataArr,classLabels,D):
27 |     #D权重向量
28 |     dataMatrix = mat(dataArr);labelMat = mat(classLabels).T
29 |     m,n = shape(dataMatrix)
30 |     numSteps = 10.0 #用于在特征的所有可能值上进行遍历
31 |     bestStump = {} # 存储给定权重向量D时所得到得最佳单层决策树的相关信息
32 |     bestClasEst = mat(zeros((m,1)))
33 |     minError = inf #先初始化为无穷大，用于寻找可能的最小错误率
34 |     for i in range(n): #在数据集的所有特征上遍历
35 |         rangeMin = dataMatrix[:,i].min();rangeMax = dataMatrix[:,i].max();  #这种极值求法应该是np中的，得到第i个特征(第i列)上的极值
36 |         stepSize = (rangeMax-rangeMin)/numSteps #得到步长
37 |         for j in range(-1,int(numSteps)+1): #在这些步上遍历
38 |             for inequal in ['lt','gt']:  #在大于小于中切换不等式   / 这两层循环其实就是遍历了所有阈值取值的情况，且每种阈值取值对应两种情况：大于它为1还是小于为1
39 |                 threshVal = (rangeMin + float(j)*stepSize)
40 |                 predictedVals = stumpClassify(dataMatrix,i,threshVal,inequal) #分类预测结果
41 |                 errArr = mat(ones((m,1)))
42 |                 errArr[predictedVals == labelMat] = 0 #数组过滤得到误差向量
43 |                 weightedError = D.T*errArr   #计算加权错误率
44 |                 print "split:dim %d,thresh %.2f,thresh ineqal: %s,the weighted error is %.3f" % (i,threshVal,inequal,weightedError)
45 |                 if weightedError < minError:  #如果误差向量变小了，则在bestStump字典中保存该单层决策树
46 |                     minError = weightedError
47 |                     bestClasEst = predictedVals.copy()
48 |                     bestStump['dim'] = i
49 |                     bestStump['thresh'] = threshVal
50 |                     bestStump['ineq'] = inequal
51 |     return bestStump,minError,bestClasEst
52 | 
53 | 
54 | #完整AdaBoost算法
55 | #基于单层决策树的AdaBoost训练过程
56 | def adaBoostTrainDS(dataArr,classLabels,numIt=40):
57 |     #numIt迭代次数
58 |     weakClassArr = []
59 |     m = shape(dataArr)[0]
60 |     D = mat(ones((m,1))/m)
61 |     aggClassEst = mat(zeros((m,1)))
62 |     for i in range(numIt):
63 |         bestStump,error,classEst = buildStump(dataArr,classLabels,D)
64 |         print "D:",D.T
65 |         alpha = float(0.5*log((1.0-error)/max(error,1e-16)))
66 |         bestStump['alpha'] = alpha
67 |         weakClassArr.append(bestStump)
68 |         print "classEst:",classEst.T
69 |         expon = multiply(-1*alpha*mat(classLabels).T,classEst) #为下一次迭代计算D
70 |         D = multiply(D,exp(expon))
71 |         D = D/D.sum()    #D包含了每个数据点的权重
72 |         aggClassEst += alpha*classEst     #错误率累加计算
73 |         print "aggClassEst: ",aggClassEst
74 |         aggErrors = multiply(sign(aggClassEst) != mat(classLabels).T,ones((m,1)))
75 |         errorRate = aggErrors.sum() / m
76 |         print "total error:",errorRate,'\n'
77 |         if errorRate == 0.0:break
78 |     return weakClassArr
79 | 


--------------------------------------------------------------------------------
/Base/books/ML_in_action/AdaBoost/test.py:
--------------------------------------------------------------------------------
 1 | #coding:utf-8
 2 | 
 3 | import adaboost
 4 | from numpy import *
 5 | 
 6 | datMat,classLabels=adaboost.loadSimpData()
 7 | 
 8 | #print datMat,classLabels
 9 | 
10 | D = mat(ones((5,1))/5)
11 | #print adaboost.buildStump(datMat,classLabels,D)
12 | 
13 | classifierArray = adaboost.adaBoostTrainDS(datMat,classLabels,9)
14 | print classifierArray
15 | 


--------------------------------------------------------------------------------
/Base/books/ML_in_action/Bayes/test.py:
--------------------------------------------------------------------------------
 1 | #coding:utf-8
 2 | 
 3 | import bayes
 4 | 
 5 | '''
 6 | listOPosts,listClasses = bayes.loadDataSet()
 7 | myVocabList = bayes.createVocabList(listOPosts)  #构建一个包含所有词的词汇表
 8 | #print myVocabList
 9 | 
10 | #print bayes.setOfWords2Vec(myVocabList,listOPosts[0])
11 | #print bayes.setOfWords2Vec(myVocabList,listOPosts[3])
12 | 
13 | 
14 | trainMat = []
15 | for postinDoc in listOPosts:            #循环使用词向量来填充trainMat列表
16 |     trainMat.append(bayes.setOfWords2Vec(myVocabList,postinDoc))   # 把训练样本的每一项文档中的词在词汇表中出现的位置标识成1，然后把所有词向量构成一个矩阵
17 | p0V,p1V,pAb = bayes.trainNB0(trainMat,listClasses)   #概率向量
18 | 
19 | #print p0V,p1V
20 | '''
21 | #全部封装到测试函数里面去了
22 | 
23 | #print bayes.testingNB()
24 | 
25 | #垃圾邮件测试
26 | print bayes.spamTest()
27 | 
28 | 


--------------------------------------------------------------------------------
/Base/books/ML_in_action/DecisionTree/test.py:
--------------------------------------------------------------------------------
 1 | #coding:utf-8
 2 | import trees
 3 | import treePlotter
 4 | import sys
 5 | reload(sys)
 6 | sys.setdefaultencoding('utf8')
 7 | 
 8 | #myDat[0][-1]='maybe'
 9 | #print trees.calcShannonEnt(myDat)
10 | #print trees.chooseBestFeatureToSplit(myDat)
11 | #print trees.splitDataSet(myDat,0,0)
12 | #print trees.splitDataSet(myDat,0,1)
13 | 
14 | myDat,labels = trees.createDataSet()
15 | #print myDat
16 | #print trees.createTree(myDat,labels) 
17 | 
18 | #treePlotter.createPlot()
19 | 
20 | #print treePlotter.retrieveTree(1)
21 | 
22 | myTree = treePlotter.retrieveTree(0)
23 | #print treePlotter.getNumLeafs(myTree),treePlotter.getTreeDepth(myTree)
24 | 
25 | #myTree['no surfacing'][3]='maybe'
26 | #print myTree
27 | #treePlotter.createPlot(myTree)
28 | 
29 | '''
30 | print labels
31 | print myTree
32 | print trees.classify(myTree,labels,[1,0])
33 | print trees.classify(myTree,labels,[1,1])
34 | '''
35 | 
36 | #trees.storeTree(myTree,'classifierStorage.txt')
37 | #print trees.grabTree('classifierStorage.txt')
38 | 
39 | #预测隐形眼镜类型
40 | fr=open('lenses.txt')
41 | lenses = [inst.strip().split('\t') for inst in fr.readlines()]
42 | lensesLabels = ['age','prescript','astigmatic','tearRate']
43 | lensesTree = trees.createTree(lenses,lensesLabels)
44 | print lensesTree
45 | treePlotter.createPlot(lensesTree)
46 | 


--------------------------------------------------------------------------------
/Base/books/ML_in_action/DecisionTree/treePlotter.py:
--------------------------------------------------------------------------------
 1 | #coding:utf-8
 2 | #使用文本注解绘制树节点
 3 | 
 4 | import matplotlib.pyplot as plt
 5 | 
 6 | decisionNode = dict(boxstyle="sawtooth",fc="0.8")  #声明字典的一种新方式
 7 | leafNode = dict(boxstyle="round4",fc="0.8")
 8 | arrow_args = dict(arrowstyle="<-")      #定义文本框和箭头样式
 9 | 
10 | #绘制带箭头的注解
11 | def plotNode(nodeTxt,centerPt,parentPt,nodeType):
12 |     createPlot.axl.annotate(nodeTxt,xy=parentPt,xycoords='axes fraction',\
13 |                             xytext=centerPt,textcoords='axes fraction',\
14 |                             va="center",ha="center",bbox=nodeType,arrowprops=arrow_args)
15 | 
16 | #在父子节点间填充文本信息
17 | def plotMidText(cntrPt,parentPt,txtString):
18 |     xMid = (parentPt[0]-cntrPt[0])/2.0 + cntrPt[0]
19 |     yMid = (parentPt[1]-cntrPt[1])/2.0 + cntrPt[1]
20 |     createPlot.axl.text(xMid,yMid,txtString)
21 | 
22 | #
23 | def plotTree(myTree,parentPt,nodeTxt):
24 |     numLeafs = getNumLeafs(myTree)
25 |     depth = getTreeDepth(myTree)    #计算宽与高
26 |     firstStr = myTree.keys()[0]
27 |     cntrPt = (plotTree.xOff +(1.0+float(numLeafs))/2.0/plotTree.totalW,plotTree.yOff)  #全局变量plotTree.xOff/.yOff追踪已经绘制的节点位置 
28 |     plotMidText(cntrPt,parentPt,nodeTxt)   #标记子节点属性值
29 |     plotNode(firstStr,cntrPt,parentPt,decisionNode)
30 |     secondDict = myTree[firstStr]
31 |     plotTree.yOff = plotTree.yOff - 1.0/plotTree.totalD   #减少y偏移
32 |     for key in secondDict.keys():
33 |         if type(secondDict[key]).__name__=='dict':
34 |             plotTree(secondDict[key],cntrPt,str(key))
35 |         else:
36 |             plotTree.xOff = plotTree.xOff +1.0/plotTree.totalW
37 |             plotNode(secondDict[key],(plotTree.xOff,plotTree.yOff),cntrPt,leafNode)
38 |             plotMidText((plotTree.xOff,plotTree.yOff),cntrPt,str(key))
39 |     plotTree.yOff = plotTree.yOff + 1.0/plotTree.totalD
40 | 
41 | 
42 | def createPlot(inTree):
43 |     fig = plt.figure(1,facecolor='white')
44 |     fig.clf()
45 |     axprops = dict(xticks=[],yticks=[])
46 |     createPlot.axl = plt.subplot(111,frameon=False,**axprops)
47 |     plotTree.totalW = float(getNumLeafs(inTree))
48 |     plotTree.totalD = float(getTreeDepth(inTree))
49 |     plotTree.xOff = -0.5/plotTree.totalW; plotTree.yOff = 1.0;  #设置根节点在y轴1.0，x轴宽度的1/2处
50 |     plotTree(inTree,(0.5,1.0),'')
51 |     plt.show()
52 | 
53 | #获取叶节点的数目
54 | def getNumLeafs(myTree):
55 |     numLeafs = 0
56 |     firstStr = myTree.keys()[0]  #dict.keys()返回字典中所有关键字组成的list
57 |     secondDict = myTree[firstStr] #因为子树下都是从0,1开始，所以这里可以当做关键字 参看49行
58 |     for key in secondDict.keys():
59 |         if type(secondDict[key]).__name__=='dict':    #测试节点数据类型是否为字典,可通过__name__访问
60 |             numLeafs += getNumLeafs(secondDict[key])  #递归
61 |         else: numLeafs+=1
62 |     return numLeafs
63 | 
64 | #获取树的层数
65 | def getTreeDepth(myTree):
66 |     maxDepth = 0
67 |     firstStr = myTree.keys()[0]   
68 |     secondDict = myTree[firstStr] 
69 |     for key in secondDict.keys():
70 |         if type(secondDict[key]).__name__=='dict':
71 |             thisDepth = 1 + getTreeDepth(secondDict[key])
72 |         else:thisDepth = 1
73 |         if thisDepth > maxDepth:maxDepth = thisDepth
74 |     return maxDepth
75 | 
76 | #节省时间，输出预先存储的树信息/ 主要用于测试
77 | def retrieveTree(i):
78 |     listOfTrees = [{'no surfacing':{0:'no',1:{'flippers':{0:'no',1:'yes'}}}},
79 |                     {'no surfacing':{0:'no',1:{'flippers':{0:{'head':{0:'no',1:'yes'}},1:'no'}}}}
80 |                     ]
81 |     return listOfTrees[i]
82 | 


--------------------------------------------------------------------------------
/Base/books/ML_in_action/Logistic/test.py:
--------------------------------------------------------------------------------
 1 | #coding:utf-8
 2 | import logRegres
 3 | from numpy import *
 4 | 
 5 | 
 6 | dataArr,labelMat=logRegres.loadDataSet()
 7 | #print logRegres.gradAscent(dataArr,labelMat)
 8 | 
 9 | #weights = logRegres.gradAscent(dataArr,labelMat)
10 | #print weights,weights.getA()
11 | #logRegres.plotBestFit(weights.getA())  #矩阵通过这个getA()这个方法可以将自身返回成一个n维数组对象,
12 |                             #不然直接使用weights在plotBestFit函数中的weights[1]就不是一个数而是[ 0.48007329]了
13 | 
14 | weights = logRegres.stocGradAscent1(array(dataArr),labelMat)
15 | logRegres.plotBestFit(weights)
16 | 


--------------------------------------------------------------------------------
/Base/books/ML_in_action/Logistic/testSet.txt:
--------------------------------------------------------------------------------
  1 | -0.017612	14.053064	0
  2 | -1.395634	4.662541	1
  3 | -0.752157	6.538620	0
  4 | -1.322371	7.152853	0
  5 | 0.423363	11.054677	0
  6 | 0.406704	7.067335	1
  7 | 0.667394	12.741452	0
  8 | -2.460150	6.866805	1
  9 | 0.569411	9.548755	0
 10 | -0.026632	10.427743	0
 11 | 0.850433	6.920334	1
 12 | 1.347183	13.175500	0
 13 | 1.176813	3.167020	1
 14 | -1.781871	9.097953	0
 15 | -0.566606	5.749003	1
 16 | 0.931635	1.589505	1
 17 | -0.024205	6.151823	1
 18 | -0.036453	2.690988	1
 19 | -0.196949	0.444165	1
 20 | 1.014459	5.754399	1
 21 | 1.985298	3.230619	1
 22 | -1.693453	-0.557540	1
 23 | -0.576525	11.778922	0
 24 | -0.346811	-1.678730	1
 25 | -2.124484	2.672471	1
 26 | 1.217916	9.597015	0
 27 | -0.733928	9.098687	0
 28 | -3.642001	-1.618087	1
 29 | 0.315985	3.523953	1
 30 | 1.416614	9.619232	0
 31 | -0.386323	3.989286	1
 32 | 0.556921	8.294984	1
 33 | 1.224863	11.587360	0
 34 | -1.347803	-2.406051	1
 35 | 1.196604	4.951851	1
 36 | 0.275221	9.543647	0
 37 | 0.470575	9.332488	0
 38 | -1.889567	9.542662	0
 39 | -1.527893	12.150579	0
 40 | -1.185247	11.309318	0
 41 | -0.445678	3.297303	1
 42 | 1.042222	6.105155	1
 43 | -0.618787	10.320986	0
 44 | 1.152083	0.548467	1
 45 | 0.828534	2.676045	1
 46 | -1.237728	10.549033	0
 47 | -0.683565	-2.166125	1
 48 | 0.229456	5.921938	1
 49 | -0.959885	11.555336	0
 50 | 0.492911	10.993324	0
 51 | 0.184992	8.721488	0
 52 | -0.355715	10.325976	0
 53 | -0.397822	8.058397	0
 54 | 0.824839	13.730343	0
 55 | 1.507278	5.027866	1
 56 | 0.099671	6.835839	1
 57 | -0.344008	10.717485	0
 58 | 1.785928	7.718645	1
 59 | -0.918801	11.560217	0
 60 | -0.364009	4.747300	1
 61 | -0.841722	4.119083	1
 62 | 0.490426	1.960539	1
 63 | -0.007194	9.075792	0
 64 | 0.356107	12.447863	0
 65 | 0.342578	12.281162	0
 66 | -0.810823	-1.466018	1
 67 | 2.530777	6.476801	1
 68 | 1.296683	11.607559	0
 69 | 0.475487	12.040035	0
 70 | -0.783277	11.009725	0
 71 | 0.074798	11.023650	0
 72 | -1.337472	0.468339	1
 73 | -0.102781	13.763651	0
 74 | -0.147324	2.874846	1
 75 | 0.518389	9.887035	0
 76 | 1.015399	7.571882	0
 77 | -1.658086	-0.027255	1
 78 | 1.319944	2.171228	1
 79 | 2.056216	5.019981	1
 80 | -0.851633	4.375691	1
 81 | -1.510047	6.061992	0
 82 | -1.076637	-3.181888	1
 83 | 1.821096	10.283990	0
 84 | 3.010150	8.401766	1
 85 | -1.099458	1.688274	1
 86 | -0.834872	-1.733869	1
 87 | -0.846637	3.849075	1
 88 | 1.400102	12.628781	0
 89 | 1.752842	5.468166	1
 90 | 0.078557	0.059736	1
 91 | 0.089392	-0.715300	1
 92 | 1.825662	12.693808	0
 93 | 0.197445	9.744638	0
 94 | 0.126117	0.922311	1
 95 | -0.679797	1.220530	1
 96 | 0.677983	2.556666	1
 97 | 0.761349	10.693862	0
 98 | -2.168791	0.143632	1
 99 | 1.388610	9.341997	0
100 | 0.317029	14.739025	0
101 | 


--------------------------------------------------------------------------------
/Base/books/ML_in_action/Regression/regression.py:
--------------------------------------------------------------------------------
 1 | #coding:utf-8
 2 | from numpy import *
 3 | 
 4 | def loadDataSet(fileName):
 5 |     numFeat = len(open(fileName).readline().split('\t')) - 1
 6 |     dataMat = []; labelMat = []
 7 |     fr = open(fileName)
 8 |     for line in fr.readlines():
 9 |         lineArr = []
10 |         curLine = line.strip().split('\t')
11 |         for i in range(numFeat):
12 |             lineArr.append(float(curLine[i]))
13 |         dataMat.append(lineArr)
14 |         labelMat.append(float(curLine[-1]))
15 |     return dataMat,labelMat
16 | 
17 | #计算最佳拟合直线
18 | def standRegres(xArr,yArr):
19 |     xMat = mat(xArr);yMat =mat(yArr).T
20 |     xTx = xMat.T*xMat
21 |     if linalg.det(xTx) == 0.0:  #np提供的线性函数库linalg，其中linalg.det(x)函数计算行列式的值
22 |         print 'this matrix is singular,cannot do inverse'
23 |         return
24 |     ws = xTx.I * (xMat.T*yMat) #.T是转置的话这里.I就是逆了呗  / 这里都是用的p138的公式
25 |     return ws #预测的参数向量
26 | 
27 | 
28 | #####局部加权线性回归函数 / 给定x空间中任意一点，计算出对应的预测值yHat
29 | def lwlr(testPoint,xArr,yArr,k=1.0):
30 |     xMat = mat(xArr); yMat = mat(yArr).T
31 |     m = shape(xMat)[0]
32 |     weights = mat(eye((m)))  #numpy.eye()创建对角矩阵
33 |     for j in range(m):
34 |         diffMat = testPoint - xMat[j,:]
35 |         weights[j,j] = exp(diffMat*diffMat.T/(-2.0*k**2))  #权值大小以指数级衰减，参数k控制衰减速度
36 |     xTx = xMat.T * (weights * xMat)
37 |     if linalg.det(xTx) == 0.0:  #判断行列式是否为0
38 |         print 'this matrix is singular, cannot do inverse'
39 |         return
40 |     ws = xTx.I * (xMat.T * (weights * yMat))   #按照书上的公式 / 得到对回归系数ws的一个估计
41 |     return testPoint *ws
42 | 
43 | def lwlrTest(testArr,xArr,yArr,k=1.0):   #用于为数据集中每个点调用lwlr()，有助于求解k的大小
44 |     m = shape(testArr)[0]
45 |     yHat = zeros(m)
46 |     for i in range(m):
47 |         yHat[i] = lwlr(testArr[i],xArr,yArr,k)
48 |     return yHat
49 | 
50 | #缩减系数：岭回归（在矩阵XT*X上加一个kI从而使矩阵非奇异，进而能求逆，I为单位矩阵 / 缩减法的一种，相当于对回归系数的大小施加了限制）
51 | #lasso：限制回归系。难以求解，可使用计算简便的逐步线性回归法（属于贪心算法，每一步都尽可能减少误差）求近似结果。
52 | 
53 | #权衡方差与误差。可指出哪些特征时关键的，哪些是不重要的。
54 | 


--------------------------------------------------------------------------------
/Base/books/ML_in_action/Regression/test.py:
--------------------------------------------------------------------------------
 1 | #coding:utf-8
 2 | from numpy import *
 3 | import regression
 4 | 
 5 | xArr,yArr=regression.loadDataSet('ex0.txt')
 6 | #print xArr[0:2]
 7 | 
 8 | 
 9 | ws = regression.standRegres(xArr,yArr)
10 | print ws
11 | 
12 | #使用新的ws值计算预测的值yHat
13 | xMat = mat(xArr)
14 | yMat = mat(yArr)
15 | yHat = xMat*ws
16 | 
17 | #绘出数据集散点图和最佳拟合直线图
18 | import matplotlib.pyplot as plt
19 | fig = plt.figure()
20 | ax = fig.add_subplot(111)
21 | ax.scatter(xMat[:,1].flatten().A[0],yMat.T[:,0].flatten().A[0])
22 | 
23 | #为了绘制计算出的最佳拟合曲线，需要绘出yHat的值
24 | #若直线上的数据点次序混乱，绘图时将会出现问题，固要先将点按照升序排列
25 | xCopy = xMat.copy()
26 | xCopy.sort(0)  #这个应该是np中的sort，意思是按照0维度排序
27 | yHat = xCopy*ws
28 | ax.plot(xCopy[:,1],yHat)
29 | plt.show()
30 | 
31 | #对单点进行估计
32 | print yArr[0]
33 | print regression.lwlr(xArr[0],xArr,yArr,1.0)
34 | print  regression.lwlr(xArr[0],xArr,yArr,0.001)
35 | 
36 | #得到所有点的估计
37 | yHat = regression.lwlrTest(xArr,xArr,yArr,0.003)
38 | srtInd = xMat[:,1].argsort(0)
39 | xSort = xMat[srtInd][:,0,:]
40 | fig = plt.figure()
41 | ax = fig.add_subplot(111)
42 | ax.plot(xSort[:,1],yHat[srtInd])
43 | ax.scatter(xMat[:,1].flatten().A[0],mat(yArr).T.flatten().A[0], s=2,c='red')
44 | plt.show()
45 | 
46 | 


--------------------------------------------------------------------------------
/Base/books/ML_in_action/SVM/test.py:
--------------------------------------------------------------------------------
 1 | #coding:utf-8
 2 | 
 3 | import svm
 4 | 
 5 | dataArr,labelArr = svm.loadDataSet('testSet.txt')
 6 | 
 7 | #print labelArr
 8 | 
 9 | b,alphas = svm.smoSimple(dataArr,labelArr,0.6,0.001,40)
10 | 
11 | print b
12 | print alphas[alphas>0]
13 | 


--------------------------------------------------------------------------------
/Base/books/ML_in_action/kMeans/test.py:
--------------------------------------------------------------------------------
 1 | #coding:utf-8
 2 | 
 3 | import kMeans
 4 | from numpy import *
 5 | 
 6 | datMat=mat(kMeans.loadDataSet('testSet.txt'))
 7 | '''
 8 | print datMat[:,0]
 9 | print min(datMat[:,0])
10 | print min(datMat[:,1])
11 | print max(datMat[:,0])
12 | print max(datMat[:,1])
13 | 
14 | print kMeans.randCent(datMat,2)
15 | print kMeans.distEclud(datMat[0],datMat[1])
16 | '''
17 | 
18 | #myCentroids,clusterAssing = kMeans.kMeans(datMat,4)
19 | 
20 | datMat3 = mat(kMeans.loadDataSet('testSet2.txt'))
21 | centList,myNewAssments = kMeans.biKmeans(datMat3,3)
22 | 
23 | print centList
24 | 


--------------------------------------------------------------------------------
/Base/books/ML_in_action/kNN/test.py:
--------------------------------------------------------------------------------
 1 | #coding:utf-8
 2 | 
 3 | import kNN 
 4 | import matplotlib
 5 | import matplotlib.pyplot as plt
 6 | from numpy import *
 7 | 
 8 | #group,labels = kNN.createDataSet()
 9 | 
10 | #kNN.classify0([0,0], group,labels,3) 
11 | 
12 | datingDataMat,datingLabels = kNN.file2matrix('datingTestSet2.txt')
13 | """
14 | fig = plt.figure()   #表示绘制一个图
15 | ax = fig.add_subplot(111) #将画布分割成1行1列，图像画在从左到右从上到下的第1块
16 | ax.scatter(datingDataMat[:,1],datingDataMat[:,2],15.0*array(datingLabels),15.0*array(datingLabels)) #scatter生成散点图函数。使用datingDataMat矩阵的第二、三列数据
17 |                                                     #datingDataMat[:,1]意思是所有行的第2列(从0开始)
18 |                                                     #后面第一个数字参数对应左边两种颜色的点的半径大小，第二个数字试了下没什么变化
19 | plt.show()
20 | """
21 | 
22 | normMat,ranges,minVals = kNN.autoNorm(datingDataMat)
23 | 
24 | #print kNN.datingClassTest()
25 | 
26 | 
27 | #print kNN.classifyPerson()
28 | 
29 | #testVector = kNN.img2vector('testDigits/0_13.txt')
30 | #print testVector[0,32:63]
31 | 
32 | print kNN.handwritingClassTest()
33 | 


--------------------------------------------------------------------------------
/Base/books/ML_in_action/readme.md:
--------------------------------------------------------------------------------
 1 | 
 2 | ## 机器学习实战
 3 | 
 4 | 记录随书敲的代码，同时自己添加了详细的注释。
 5 | 
 6 | ### 第一部分 分类
 7 | 
 8 | * [k-近邻算法/kNN](https://github.com/fire717/Machine-Learning/tree/master/ML_in_action/kNN)
 9 | * [决策树](https://github.com/fire717/Machine-Learning/tree/master/ML_in_action/DecisionTree)
10 | * [朴素贝叶斯](https://github.com/fire717/Machine-Learning/tree/master/ML_in_action/Bayes)
11 | * [Logistic回归](https://github.com/fire717/Machine-Learning/tree/master/ML_in_action/Logistic)
12 | * [支持向量机SVM](https://github.com/fire717/Machine-Learning/tree/master/ML_in_action/SVM)
13 | * [AdaBoost元算法](https://github.com/fire717/Machine-Learning/tree/master/ML_in_action/AdaBoost)
14 | 
15 | ### 第二部分 回归
16 | 
17 | 
18 | 
19 | ### 第三部分 无监督学习
20 | 
21 | * [K-均值聚类算法/kMeans](https://github.com/fire717/Machine-Learning/tree/master/ML_in_action/kMeans)
22 | 


--------------------------------------------------------------------------------
/Base/challenge/DataFountain/writing_classify/pre_1.ipynb:
--------------------------------------------------------------------------------
 1 | {
 2 |  "cells": [
 3 |   {
 4 |    "cell_type": "code",
 5 |    "execution_count": 1,
 6 |    "metadata": {},
 7 |    "outputs": [],
 8 |    "source": [
 9 |     "import numpy as np\n"
10 |    ]
11 |   },
12 |   {
13 |    "cell_type": "code",
14 |    "execution_count": 2,
15 |    "metadata": {},
16 |    "outputs": [],
17 |    "source": [
18 |     "# 读取数据"
19 |    ]
20 |   },
21 |   {
22 |    "cell_type": "code",
23 |    "execution_count": null,
24 |    "metadata": {},
25 |    "outputs": [],
26 |    "source": []
27 |   }
28 |  ],
29 |  "metadata": {
30 |   "kernelspec": {
31 |    "display_name": "Python 3",
32 |    "language": "python",
33 |    "name": "python3"
34 |   },
35 |   "language_info": {
36 |    "codemirror_mode": {
37 |     "name": "ipython",
38 |     "version": 3
39 |    },
40 |    "file_extension": ".py",
41 |    "mimetype": "text/x-python",
42 |    "name": "python",
43 |    "nbconvert_exporter": "python",
44 |    "pygments_lexer": "ipython3",
45 |    "version": "3.5.4"
46 |   }
47 |  },
48 |  "nbformat": 4,
49 |  "nbformat_minor": 2
50 | }
51 | 


--------------------------------------------------------------------------------
/Base/challenge/kaggle/readme.md:
--------------------------------------------------------------------------------
1 | 
2 | ### 目录
3 | * [房价](./HousePrices)
4 | * [泰坦尼克](./Titanic)
5 | * [五金网站关键词搜索](./searchrelevance.pdf)


--------------------------------------------------------------------------------
/Base/challenge/kaggle/searchrelevance.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fire717/Machine-Learning/46d417e4c872052857899331ff7f526f79110896/Base/challenge/kaggle/searchrelevance.pdf


--------------------------------------------------------------------------------
/Base/courses/DL_AndrewNg/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fire717/Machine-Learning/46d417e4c872052857899331ff7f526f79110896/Base/courses/DL_AndrewNg/.DS_Store


--------------------------------------------------------------------------------
/Base/courses/DL_AndrewNg/README.md:
--------------------------------------------------------------------------------
 1 | ## Deep Learning Specialization
 2 | 
 3 | ### Course 1:Neural Networks and Deep Learning
 4 | * [Logistic Regression with a Neural Network mindset](./course1/LRwithNN.ipynb)
 5 | * [Planar data classification with a hidden layer](./course1/week3NN.ipynb)
 6 | * [Building your Deep Neural Network: Step by Step](./course1/BuildingDNNv3.ipynb)
 7 | * [Deep Neural Network for Image Classification: Application](./course1/DNNApplicationv3.ipynb)
 8 | 
 9 | ### Course 2:Improving Deep Neural Networks: Hyperparameter tuning, Regularization and Optimization
10 | * [Initialization](./course2/Initialization.ipynb)
11 | * [Regularization](./course2/Regularization.ipynb)
12 | * [Gradient Checking](./course2/GradientChecking.ipynb)
13 | * [Optimization](./course2/OptimizationMethods.ipynb)
14 | * [Tensorflow Tutorial](./course2/TensorflowTutorial.ipynb)
15 | 
16 | ### Course 3:Structuring Machine Learning Projects
17 | * No homework
18 | 
19 | ### Course 4:Convolutional Neural Networks
20 | * [Convolutional Model: step by step](./course4/ConvolutionmodelStepbyStepv2.ipynb)
21 | * [Convolutional Model: application (tensorflow)](./course4/ConvolutionmodelApplicationv1.ipynb)
22 | * [Keras Tutorial - The Happy House](./course4/KerasTutorialHappyHousev2.ipynb)
23 | * [Residual Networks](./course4/ResidualNetworksv2.ipynb)
24 | * [Car detection with YOLOv2](./course4/AutonomousdrivingapplicationCardetectionv3.ipynb)
25 | * [Art generation with Neural Style Transfer](./course4/ArtGenerationwithNeuralStyleTransfer2.ipynb)
26 | * [Face Recognition for the Happy House](./course4/FaceRecognitionfortheHappyHousev3.ipynb)
27 | 
28 | ### Course 5:Sequence Models
29 | * [rnn_utils](./course5/rnn_utils.py)
30 | * [Building a recurrent neural network - step by step](./course5/BuildingaRecurrentNeuralNetworkStepbyStepv3.ipynb)
31 | * [Dinosaur Island - Character-Level Language Modeling](./course5/DinosaurusIslandCharacterlevellanguagemodelfinalv3.ipynb)
32 | * [Jazz improvisation with LSTM](./course5/ImproviseaJazzSolowithanLSTMNetworkv3.ipynb)
33 | * [Operations on word vectors - Debiasing](./course5/Operationsonwordvectorsv2.ipynb)
34 | * [Emojify](./course5/Emojifyv2.ipynb)
35 | * [Neural Machine Translation with Attention](./course5/Neuralmachinetranslationwithattentionv3.ipynb)
36 | * [Trigger word detection](./course5/Triggerworddetectionv1.ipynb)
37 | 
38 | 
39 | 其它笔记资源：https://github.com/fengdu78/deeplearning_ai_books
40 | 


--------------------------------------------------------------------------------
/Base/courses/DL_AndrewNg/course4/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fire717/Machine-Learning/46d417e4c872052857899331ff7f526f79110896/Base/courses/DL_AndrewNg/course4/.DS_Store


--------------------------------------------------------------------------------
/Base/courses/DL_AndrewNg/course5/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fire717/Machine-Learning/46d417e4c872052857899331ff7f526f79110896/Base/courses/DL_AndrewNg/course5/.DS_Store


--------------------------------------------------------------------------------
/Base/courses/coursera_ML/ex1_liner/computeCost.m:
--------------------------------------------------------------------------------
 1 | function J = computeCost(X, y, theta)
 2 | %COMPUTECOST Compute cost for linear regression
 3 | %   J = COMPUTECOST(X, y, theta) computes the cost of using theta as the
 4 | %   parameter for linear regression to fit the data points in X and y
 5 | 
 6 | % Initialize some useful values
 7 | m = length(y); % number of training examples
 8 | 
 9 | % You need to return the following variables correctly 
10 | J = 0;
11 | 
12 | % ====================== YOUR CODE HERE ======================
13 | % Instructions: Compute the cost of a particular choice of theta
14 | %               You should set J to the cost.
15 | 
16 | %X(:,2) = (X(:,2)-mean(X(:,2)))/(max(X(:,2))-min(X(:,2)))
17 | %y = (y-mean(y)/((max(y)-min(y))
18 | 
19 | J = (1/(2*m))*sum((X*theta-y).^2)
20 | 
21 | 
22 | 
23 | % =========================================================================
24 | 
25 | end
26 | 


--------------------------------------------------------------------------------
/Base/courses/coursera_ML/ex1_liner/gradientDescent.m:
--------------------------------------------------------------------------------
 1 | function [theta, J_history] = gradientDescent(X, y, theta, alpha, num_iters)
 2 | %GRADIENTDESCENT Performs gradient descent to learn theta
 3 | %   theta = GRADIENTDESCENT(X, y, theta, alpha, num_iters) updates theta by 
 4 | %   taking num_iters gradient steps with learning rate alpha
 5 | 
 6 | % Initialize some useful values
 7 | m = length(y); % number of training examples
 8 | J_history = zeros(num_iters, 1);
 9 | 
10 | for iter = 1:num_iters
11 | 
12 |     % ====================== YOUR CODE HERE ======================
13 |     % Instructions: Perform a single gradient step on the parameter vector
14 |     %               theta. 
15 |     %
16 |     % Hint: While debugging, it can be useful to print out the values
17 |     %       of the cost function (computeCost) and gradient here.
18 |     %
19 |     
20 |     tmp = theta       %simultaneously update
21 |     tmp(1) = theta(1)-alpha*(1/m)*sum(X*theta-y)
22 |     tmp(2) = theta(2)-alpha*(1/m)*((X*theta-y)'*X(:,2))
23 |     theta = tmp
24 | 
25 | %    n = length(theta);
26 | %    theta1  = theta; 
27 | %    for i = 1:n
28 | %      S  =  0;
29 | %      for j = 1:m
30 | %        S  =  S + (X(j,:)*theta-y(j)).*X(j,i);
31 | %      end
32 | %      S = S*alpha/m;
33 | %      theta1(i) = theta(i) - S;
34 | %    end
35 | %     theta = theta1;
36 | 
37 |     % ============================================================
38 | 
39 |     % Save the cost J in every iteration    
40 |     J_history(iter) = computeCost(X, y, theta);
41 | 
42 | end
43 | 
44 | end
45 | 


--------------------------------------------------------------------------------
/Base/courses/coursera_ML/ex2_logistc/costFunction.m:
--------------------------------------------------------------------------------
 1 | function [J, grad] = costFunction(theta, X, y)
 2 | %COSTFUNCTION Compute cost and gradient for logistic regression
 3 | %   J = COSTFUNCTION(theta, X, y) computes the cost of using theta as the
 4 | %   parameter for logistic regression and the gradient of the cost
 5 | %   w.r.t. to the parameters.
 6 | 
 7 | % Initialize some useful values
 8 | m = length(y); % number of training examples
 9 | 
10 | % You need to return the following variables correctly 
11 | J = 0;
12 | grad = zeros(size(theta));
13 | 
14 | % ====================== YOUR CODE HERE ======================
15 | % Instructions: Compute the cost of a particular choice of theta.
16 | %               You should set J to the cost.
17 | %               Compute the partial derivatives and set grad to the partial
18 | %               derivatives of the cost w.r.t. each parameter in theta
19 | %
20 | % Note: grad should have the same dimensions as theta
21 | %
22 | 
23 | %要注意sigmoid的参数
24 | J = -1/m*(y'*log(sigmoid(X*theta))+(ones(size(y))-y)'*log(1-sigmoid(X*theta)))
25 | 
26 | for i=1:size(theta),
27 |   grad(i) = 1/m*((sigmoid(X*theta)-y)'*X(:,i))
28 | 
29 | % =============================================================
30 | 
31 | end
32 | 


--------------------------------------------------------------------------------
/Base/courses/coursera_ML/ex2_logistc/costFunctionReg.m:
--------------------------------------------------------------------------------
 1 | function [J, grad] = costFunctionReg(theta, X, y, lambda)
 2 | %COSTFUNCTIONREG Compute cost and gradient for logistic regression with regularization
 3 | %   J = COSTFUNCTIONREG(theta, X, y, lambda) computes the cost of using
 4 | %   theta as the parameter for regularized logistic regression and the
 5 | %   gradient of the cost w.r.t. to the parameters. 
 6 | 
 7 | % Initialize some useful values
 8 | m = length(y); % number of training examples
 9 | 
10 | % You need to return the following variables correctly 
11 | J = 0;
12 | grad = zeros(size(theta));
13 | 
14 | % ====================== YOUR CODE HERE ======================
15 | % Instructions: Compute the cost of a particular choice of theta.
16 | %               You should set J to the cost.
17 | %               Compute the partial derivatives and set grad to the partial
18 | %               derivatives of the cost w.r.t. each parameter in theta
19 | n = size(theta);
20 | 
21 | for i = 1:m
22 |    h_thetax = sigmoid(X(i,:)*theta);
23 |    J = J - y(i)*log(h_thetax) -(1-y(i))*log(1-h_thetax);
24 | end
25 | for i = 2:n
26 |     J = J + 0.5*lambda*theta(i)*theta(i);
27 | end
28 |   J = J/m;
29 | 
30 |   
31 | for j =1:n
32 |     sum = 0;
33 |     for i = 1:m
34 |       h_thetax = sigmoid(X(i,:)*theta);
35 |       sum = sum + (h_thetax - y(i))*X(i,j); 
36 |     end 
37 |     if(j==1)
38 |        grad(j) = sum/m;
39 |     else
40 |         grad(j) = theta(j)*lambda/m + sum/m;
41 |     end
42 | end
43 | 
44 | 
45 | 
46 | % =============================================================
47 | 
48 | end
49 | 


--------------------------------------------------------------------------------
/Base/courses/coursera_ML/ex2_logistc/plotData.m:
--------------------------------------------------------------------------------
 1 | function plotData(X, y)
 2 | %PLOTDATA Plots the data points X and y into a new figure 
 3 | %   PLOTDATA(x,y) plots the data points with + for the positive examples
 4 | %   and o for the negative examples. X is assumed to be a Mx2 matrix.
 5 | 
 6 | % Create New Figure
 7 | figure; hold on;
 8 | 
 9 | % ====================== YOUR CODE HERE ======================
10 | % Instructions: Plot the positive and negative examples on a
11 | %               2D plot, using the option 'k+' for the positive
12 | %               examples and 'ko' for the negative examples.
13 | %
14 | 
15 | 
16 | % Find Indices of Positive and Negative Examples
17 | pos = find(y==1); neg = find(y == 0); %选择y=1的那些行
18 | % Plot Examples
19 | plot(X(pos, 1), X(pos, 2), 'k+', 'LineWidth', 2,'MarkerSize', 7);%选择y=1的那些行
20 | plot(X(neg, 1), X(neg, 2), 'ko', 'MarkerFaceColor', 'y', 'MarkerSize', 7);
21 | 
22 | 
23 | 
24 | % =========================================================================
25 | 
26 | 
27 | 
28 | hold off;
29 | 
30 | end
31 | 


--------------------------------------------------------------------------------
/Base/courses/coursera_ML/ex2_logistc/predict.m:
--------------------------------------------------------------------------------
 1 | function p = predict(theta, X)
 2 | %PREDICT Predict whether the label is 0 or 1 using learned logistic 
 3 | %regression parameters theta
 4 | %   p = PREDICT(theta, X) computes the predictions for X using a 
 5 | %   threshold at 0.5 (i.e., if sigmoid(theta'*x) >= 0.5, predict 1)
 6 | 
 7 | m = size(X, 1); % Number of training examples
 8 | 
 9 | % You need to return the following variables correctly
10 | p = zeros(m, 1);
11 | 
12 | % ====================== YOUR CODE HERE ======================
13 | % Instructions: Complete the following code to make predictions using
14 | %               your learned logistic regression parameters. 
15 | %               You should set p to a vector of 0's and 1's
16 | %
17 | 
18 | for i=1:m,
19 |   if sigmoid(X*theta)(i)>=0.5,%sigmoid(X*theta)也是一个列向量！
20 |     p(i)=1;
21 |   else
22 |     p(i)=0;
23 |   end;
24 |  end;
25 | 
26 | % =========================================================================
27 | 
28 | end
29 | 


--------------------------------------------------------------------------------
/Base/courses/coursera_ML/ex2_logistc/sigmoid.m:
--------------------------------------------------------------------------------
 1 | function g = sigmoid(z)
 2 | %SIGMOID Compute sigmoid function
 3 | %   g = SIGMOID(z) computes the sigmoid of z.
 4 | 
 5 | % You need to return the following variables correctly 
 6 | g = zeros(size(z));
 7 | 
 8 | % ====================== YOUR CODE HERE ======================
 9 | % Instructions: Compute the sigmoid of each value of z (z can be a matrix,
10 | %               vector or scalar).
11 | 
12 | 
13 | %for i=1:size(z),
14 | %  g(i)=1/(1+exp(-z(i)));
15 | %end;  因为测试不是列向量是行向量，所以只有第一位对。所以还是要按答案写考虑矩阵
16 | %
17 | [m,n] = size(z);
18 | 
19 | for i = 1:m
20 |     for j= 1:n
21 |        g(i,j) = 1/(1+exp(-z(i,j)));        
22 |     end
23 | end
24 | 
25 | % =============================================================
26 | 
27 | end
28 | 


--------------------------------------------------------------------------------
/Base/courses/coursera_ML/ex3_nn/lrCostFunction.m:
--------------------------------------------------------------------------------
 1 | function [J, grad] = lrCostFunction(theta, X, y, lambda)
 2 | %LRCOSTFUNCTION Compute cost and gradient for logistic regression with 
 3 | %regularization
 4 | %   J = LRCOSTFUNCTION(theta, X, y, lambda) computes the cost of using
 5 | %   theta as the parameter for regularized logistic regression and the
 6 | %   gradient of the cost w.r.t. to the parameters. 
 7 | 
 8 | % Initialize some useful values
 9 | m = length(y); % number of training examples
10 | 
11 | % You need to return the following variables correctly 
12 | J = 0;
13 | grad = zeros(size(theta));
14 | 
15 | % ====================== YOUR CODE HERE ======================
16 | % Instructions: Compute the cost of a particular choice of theta.
17 | %               You should set J to the cost.
18 | %               Compute the partial derivatives and set grad to the partial
19 | %               derivatives of the cost w.r.t. each parameter in theta
20 | %
21 | % Hint: The computation of the cost function and gradients can be
22 | %       efficiently vectorized. For example, consider the computation
23 | %
24 | %           sigmoid(X * theta)
25 | %
26 | %       Each row of the resulting matrix will contain the value of the
27 | %       prediction for that example. You can make use of this to vectorize
28 | %       the cost function and gradient computations. 
29 | %
30 | % Hint: When computing the gradient of the regularized cost function, 
31 | %       there're many possible vectorized solutions, but one solution
32 | %       looks like:
33 | %           grad = (unregularized gradient for logistic regression)
34 | %           temp = theta; 
35 | %           temp(1) = 0;   % because we don't add anything for j = 0  
36 | %           grad = grad + YOUR_CODE_HERE (using the temp variable)
37 | %
38 | 
39 | %temp = - y.*log(h_thetax) -(ones(m,1)-y).*log(1-h_thetax); 答案这里还是点乘，我是直接矩阵相乘了
40 | %J = sum(temp)/m;
41 | %temp = theta;
42 | %temp(1) = 0;
43 | %J = J + 0.5*lambda*(temp'*temp)/m;
44 | %
45 | %grad = X'*(h_thetax -y)/m;
46 | %temp = theta;
47 | %temp(1) = 0;
48 | %temp = temp*lambda/m;
49 | %grad = grad + temp; 
50 | 
51 | h_thetax = sigmoid(X*theta);
52 | J = -(y'*log(h_thetax)+(ones(size(y))-y)'*log(1-h_thetax))/m+lambda*(theta'*theta-theta(1)^2)*0.5/m
53 | 
54 | grad = ((h_thetax-y)'*X)'/m
55 | temp = theta;
56 | temp(1) = 0;
57 | temp = temp*lambda/m;
58 | grad = grad + temp; 
59 | % =============================================================
60 | 
61 | grad = grad(:); %这句有什么意义？！
62 | 
63 | end
64 | 


--------------------------------------------------------------------------------
/Base/courses/coursera_ML/ex3_nn/oneVsAll.m:
--------------------------------------------------------------------------------
 1 | function [all_theta] = oneVsAll(X, y, num_labels, lambda)
 2 | %ONEVSALL trains multiple logistic regression classifiers and returns all
 3 | %the classifiers in a matrix all_theta, where the i-th row of all_theta 
 4 | %corresponds to the classifier for label i
 5 | %   [all_theta] = ONEVSALL(X, y, num_labels, lambda) trains num_labels
 6 | %   logistic regression classifiers and returns each of these classifiers
 7 | %   in a matrix all_theta, where the i-th row of all_theta corresponds 
 8 | %   to the classifier for label i
 9 | 
10 | % Some useful variables
11 | m = size(X, 1);
12 | n = size(X, 2);
13 | 
14 | % You need to return the following variables correctly 
15 | all_theta = zeros(num_labels, n + 1);
16 | 
17 | % Add ones to the X data matrix
18 | X = [ones(m, 1) X];
19 | 
20 | % ====================== YOUR CODE HERE ======================
21 | % Instructions: You should complete the following code to train num_labels
22 | %               logistic regression classifiers with regularization
23 | %               parameter lambda. 
24 | %
25 | % Hint: theta(:) will return a column vector.
26 | %
27 | % Hint: You can use y == c to obtain a vector of 1's and 0's that tell you
28 | %       whether the ground truth is true/false for this class.
29 | %
30 | % Note: For this assignment, we recommend using fmincg to optimize the cost
31 | %       function. It is okay to use a for-loop (for c = 1:num_labels) to
32 | %       loop over the different classes.
33 | %
34 | %       fmincg works similarly to fminunc, but is more efficient when we
35 | %       are dealing with large number of parameters.
36 | %
37 | % Example Code for fmincg:
38 | %
39 | %     % Set Initial theta
40 | %     initial_theta = zeros(n + 1, 1);
41 | %     
42 | %     % Set options for fminunc
43 | %     options = optimset('GradObj', 'on', 'MaxIter', 50);
44 | % 
45 | %     % Run fmincg to obtain the optimal theta
46 | %     % This function will return theta and the cost 
47 | %     [theta] = ...
48 | %         fmincg (@(t)(lrCostFunction(t, X, (y == c), lambda)), ...
49 | %                 initial_theta, options);
50 | %
51 | 
52 | for c = 1:num_labels
53 |  initial_theta = zeros(n + 1, 1);
54 |  options = optimset('GradObj', 'on', 'MaxIter', 50);
55 | [theta] = fmincg (@(t)(lrCostFunction(t, X, (y == c), lambda)), ...
56 |               initial_theta, options);
57 | all_theta(c,:) = theta(:);
58 | end
59 | 
60 | 
61 | 
62 | 
63 | 
64 | 
65 | % =========================================================================
66 | 
67 | 
68 | end
69 | 


--------------------------------------------------------------------------------
/Base/courses/coursera_ML/ex3_nn/predict.m:
--------------------------------------------------------------------------------
 1 | function p = predict(Theta1, Theta2, X)
 2 | %PREDICT Predict the label of an input given a trained neural network
 3 | %   p = PREDICT(Theta1, Theta2, X) outputs the predicted label of X given the
 4 | %   trained weights of a neural network (Theta1, Theta2)
 5 | 
 6 | % Useful values
 7 | m = size(X, 1);
 8 | num_labels = size(Theta2, 1);
 9 | 
10 | % You need to return the following variables correctly 
11 | p = zeros(size(X, 1), 1);
12 | 
13 | % ====================== YOUR CODE HERE ======================
14 | % Instructions: Complete the following code to make predictions using
15 | %               your learned neural network. You should set p to a 
16 | %               vector containing labels between 1 to num_labels.
17 | %
18 | % Hint: The max function might come in useful. In particular, the max
19 | %       function can also return the index of the max element, for more
20 | %       information see 'help max'. If your examples are in rows, then, you
21 | %       can use max(A, [], 2) to obtain the max for each row.
22 | %
23 | 
24 |  % Add ones to the X data matrix
25 |  X = [ones(m, 1) X];  %添加一列的方式
26 | 
27 |  z = X*Theta1';
28 |  X2 = sigmoid(z);
29 |  
30 |  X2 = [ones(m, 1) X2];  %两层 处理两次
31 |  temp = X2*Theta2';
32 |  [M,I] = max(temp,[],2); %在第2维方向上取最大值,也就是每行最大值
33 | 
34 |   p = I; 
35 | 
36 | 
37 | 
38 | 
39 | % =========================================================================
40 | 
41 | 
42 | end
43 | 


--------------------------------------------------------------------------------
/Base/courses/coursera_ML/ex3_nn/predictOneVsAll.m:
--------------------------------------------------------------------------------
 1 | function p = predictOneVsAll(all_theta, X)
 2 | %PREDICT Predict the label for a trained one-vs-all classifier. The labels 
 3 | %are in the range 1..K, where K = size(all_theta, 1). 
 4 | %  p = PREDICTONEVSALL(all_theta, X) will return a vector of predictions
 5 | %  for each example in the matrix X. Note that X contains the examples in
 6 | %  rows. all_theta is a matrix where the i-th row is a trained logistic
 7 | %  regression theta vector for the i-th class. You should set p to a vector
 8 | %  of values from 1..K (e.g., p = [1; 3; 1; 2] predicts classes 1, 3, 1, 2
 9 | %  for 4 examples) 
10 | 
11 | m = size(X, 1);
12 | num_labels = size(all_theta, 1);
13 | 
14 | % You need to return the following variables correctly 
15 | p = zeros(size(X, 1), 1);
16 | 
17 | % Add ones to the X data matrix
18 | X = [ones(m, 1) X];
19 | 
20 | % ====================== YOUR CODE HERE ======================
21 | % Instructions: Complete the following code to make predictions using
22 | %               your learned logistic regression parameters (one-vs-all).
23 | %               You should set p to a vector of predictions (from 1 to
24 | %               num_labels).
25 | %
26 | % Hint: This code can be done all vectorized using the max function.
27 | %       In particular, the max function can also return the index of the 
28 | %       max element, for more information see 'help max'. If your examples 
29 | %       are in rows, then, you can use max(A, [], 2) to obtain the max 
30 | %       for each row.
31 | %       
32 | 
33 | temp = all_theta*X';
34 | [M,I] = max(temp);  %[最大值,索引号]
35 | 
36 | p = I; 
37 | 
38 | 
39 | 
40 | 
41 | % =========================================================================
42 | 
43 | 
44 | end
45 | 


--------------------------------------------------------------------------------
/Base/courses/coursera_ML/readme.md:
--------------------------------------------------------------------------------
1 | 
2 | ### coursera上机器学习课程作业代码
3 | 
4 | 
5 | 其它笔记资源：[斯坦福大学2014（吴恩达）机器学习教程中文笔记](https://github.com/fengdu78/Coursera-ML-AndrewNg-Notes)
6 | 


--------------------------------------------------------------------------------
/Base/courses/cs231n/README.md:
--------------------------------------------------------------------------------
 1 | ## CS231n
 2 | [![Python](https://img.shields.io/badge/python-3.5-green.svg)](https://github.com/fire717/Machine-Learning/tree/master/cs231n)
 3 | 
 4 | ### 资源
 5 | * [官方Schedule and Syllabus](http://cs231n.stanford.edu/syllabus.html)
 6 | * [课程视频-网易云](http://study.163.com/course/courseMain.htm?courseId=1003223001)
 7 | * [课程视频-b站](http://www.bilibili.com/video/av13260183/index_1.html#page=1)
 8 | * [斯坦福CS231n Spring 2017开放全部课程视频](https://zhuanlan.zhihu.com/p/28488268?utm_medium=social&utm_source=wechat_session) 
 9 | 
10 | ### 作业
11 | * [官方作业说明](http://cs231n.github.io/)
12 | * [参考](https://github.com/lightaime/cs231n)
13 | 
14 | * [assignment1](./assignment1)
15 | 
16 | 
17 | ### 笔记
18 | [CS231n官方笔记授权翻译总集篇发布](https://zhuanlan.zhihu.com/p/21930884)
19 | 
20 | * [反向随机失活(inverted dropout)](./note/inverted_dropout.py)


--------------------------------------------------------------------------------
/Base/courses/cs231n/assignment1/.gitignore:
--------------------------------------------------------------------------------
1 | *.swp
2 | *.pyc
3 | .env/*
4 | 


--------------------------------------------------------------------------------
/Base/courses/cs231n/assignment1/README.md:
--------------------------------------------------------------------------------
1 | Details about this assignment can be found [on the course webpage](http://cs231n.github.io/), under Assignment #1 of Spring 2017.
2 | 


--------------------------------------------------------------------------------
/Base/courses/cs231n/assignment1/collectSubmission.sh:
--------------------------------------------------------------------------------
1 | rm -f assignment1.zip 
2 | zip -r assignment1.zip . -x "*.git*" "*cs231n/datasets*" "*.ipynb_checkpoints*" "*README.md" "*collectSubmission.sh" "*requirements.txt" ".env/*"
3 | 


--------------------------------------------------------------------------------
/Base/courses/cs231n/assignment1/cs231n/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fire717/Machine-Learning/46d417e4c872052857899331ff7f526f79110896/Base/courses/cs231n/assignment1/cs231n/__init__.py


--------------------------------------------------------------------------------
/Base/courses/cs231n/assignment1/cs231n/classifiers/__init__.py:
--------------------------------------------------------------------------------
1 | from cs231n.classifiers.k_nearest_neighbor import *
2 | from cs231n.classifiers.linear_classifier import *
3 | 


--------------------------------------------------------------------------------
/Base/courses/cs231n/assignment1/cs231n/datasets/.gitignore:
--------------------------------------------------------------------------------
1 | cifar-10-batches-py/*
2 | tiny-imagenet-100-A*
3 | tiny-imagenet-100-B*
4 | tiny-100-A-pretrained/*
5 | 


--------------------------------------------------------------------------------
/Base/courses/cs231n/assignment1/cs231n/datasets/get_datasets.sh:
--------------------------------------------------------------------------------
1 | # Get CIFAR10
2 | wget http://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz
3 | tar -xzvf cifar-10-python.tar.gz
4 | rm cifar-10-python.tar.gz 
5 | 


--------------------------------------------------------------------------------
/Base/courses/cs231n/assignment1/cs231n/vis_utils.py:
--------------------------------------------------------------------------------
 1 | from past.builtins import xrange
 2 | 
 3 | from math import sqrt, ceil
 4 | import numpy as np
 5 | 
 6 | def visualize_grid(Xs, ubound=255.0, padding=1):
 7 |   """
 8 |   Reshape a 4D tensor of image data to a grid for easy visualization.
 9 | 
10 |   Inputs:
11 |   - Xs: Data of shape (N, H, W, C)
12 |   - ubound: Output grid will have values scaled to the range [0, ubound]
13 |   - padding: The number of blank pixels between elements of the grid
14 |   """
15 |   (N, H, W, C) = Xs.shape
16 |   grid_size = int(ceil(sqrt(N)))
17 |   grid_height = H * grid_size + padding * (grid_size - 1)
18 |   grid_width = W * grid_size + padding * (grid_size - 1)
19 |   grid = np.zeros((grid_height, grid_width, C))
20 |   next_idx = 0
21 |   y0, y1 = 0, H
22 |   for y in xrange(grid_size):
23 |     x0, x1 = 0, W
24 |     for x in xrange(grid_size):
25 |       if next_idx < N:
26 |         img = Xs[next_idx]
27 |         low, high = np.min(img), np.max(img)
28 |         grid[y0:y1, x0:x1] = ubound * (img - low) / (high - low)
29 |         # grid[y0:y1, x0:x1] = Xs[next_idx]
30 |         next_idx += 1
31 |       x0 += W + padding
32 |       x1 += W + padding
33 |     y0 += H + padding
34 |     y1 += H + padding
35 |   # grid_max = np.max(grid)
36 |   # grid_min = np.min(grid)
37 |   # grid = ubound * (grid - grid_min) / (grid_max - grid_min)
38 |   return grid
39 | 
40 | def vis_grid(Xs):
41 |   """ visualize a grid of images """
42 |   (N, H, W, C) = Xs.shape
43 |   A = int(ceil(sqrt(N)))
44 |   G = np.ones((A*H+A, A*W+A, C), Xs.dtype)
45 |   G *= np.min(Xs)
46 |   n = 0
47 |   for y in range(A):
48 |     for x in range(A):
49 |       if n < N:
50 |         G[y*H+y:(y+1)*H+y, x*W+x:(x+1)*W+x, :] = Xs[n,:,:,:]
51 |         n += 1
52 |   # normalize to [0,1]
53 |   maxg = G.max()
54 |   ming = G.min()
55 |   G = (G - ming)/(maxg-ming)
56 |   return G
57 |   
58 | def vis_nn(rows):
59 |   """ visualize array of arrays of images """
60 |   N = len(rows)
61 |   D = len(rows[0])
62 |   H,W,C = rows[0][0].shape
63 |   Xs = rows[0][0]
64 |   G = np.ones((N*H+N, D*W+D, C), Xs.dtype)
65 |   for y in range(N):
66 |     for x in range(D):
67 |       G[y*H+y:(y+1)*H+y, x*W+x:(x+1)*W+x, :] = rows[y][x]
68 |   # normalize to [0,1]
69 |   maxg = G.max()
70 |   ming = G.min()
71 |   G = (G - ming)/(maxg-ming)
72 |   return G
73 | 
74 | 
75 | 
76 | 


--------------------------------------------------------------------------------
/Base/courses/cs231n/assignment1/frameworkpython:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # what real Python executable to use
 4 | #PYVER=2.7
 5 | #PATHTOPYTHON=/usr/local/bin/
 6 | #PYTHON=${PATHTOPYTHON}python${PYVER}
 7 | 
 8 | PYTHON=$(which $(readlink .env/bin/python)) # only works with python3
 9 | 
10 | # find the root of the virtualenv, it should be the parent of the dir this script is in
11 | ENV=`$PYTHON -c "import os; print(os.path.abspath(os.path.join(os.path.dirname(\"$0\"), '..')))"`
12 | 
13 | # now run Python with the virtualenv set as Python's HOME
14 | export PYTHONHOME=$ENV
15 | exec $PYTHON "$@"
16 | 


--------------------------------------------------------------------------------
/Base/courses/cs231n/assignment1/setup_googlecloud.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | # This is the set-up script for Google Cloud.
 4 | sudo apt-get update
 5 | sudo apt-get install libncurses5-dev
 6 | sudo apt-get install python-dev
 7 | sudo apt-get install python-pip
 8 | sudo apt-get install libjpeg8-dev
 9 | sudo ln -s /usr/lib/x86_64-linux-gnu/libjpeg.so /usr/lib
10 | pip install pillow
11 | sudo apt-get build-dep python-imaging
12 | sudo apt-get install libjpeg8 libjpeg62-dev libfreetype6 libfreetype6-dev
13 | sudo pip install virtualenv  
14 | virtualenv .env                  # Create a virtual environment
15 | source .env/bin/activate         # Activate the virtual environment
16 | pip install -r requirements.txt  # Install dependencies
17 | deactivate
18 | echo "**************************************************"
19 | echo "*****  End of Google Cloud Set-up Script  ********"
20 | echo "**************************************************"
21 | echo ""
22 | echo "If you had no errors, You can proceed to work with your virtualenv as normal."
23 | echo "(run 'source .env/bin/activate' in your assignment directory to load the venv,"
24 | echo " and run 'deactivate' to exit the venv. See assignment handout for details.)"
25 | 


--------------------------------------------------------------------------------
/Base/courses/cs231n/assignment1/start_ipython_osx.sh:
--------------------------------------------------------------------------------
1 | # Assume the virtualenv is called .env
2 | 
3 | cp frameworkpython .env/bin
4 | .env/bin/frameworkpython -m IPython notebook
5 | 


--------------------------------------------------------------------------------
/Base/courses/cs231n/note/inverted_dropout.py:
--------------------------------------------------------------------------------
 1 | #反向随机失活（inverted dropout）
 2 | """ 
 3 | 反向随机失活: 推荐实现方式.
 4 | 在训练的时候drop和调整数值范围，测试时不做任何事.
 5 | """
 6 | 
 7 | p = 0.5 # 激活神经元的概率. p值更高 = 随机失活更弱
 8 | 
 9 | def train_step(X):
10 |   # 3层neural network的前向传播
11 |   H1 = np.maximum(0, np.dot(W1, X) + b1)
12 |   U1 = (np.random.rand(*H1.shape) < p) / p # 第一个随机失活遮罩. 注意/p!
13 |   H1 *= U1 # drop!
14 |   H2 = np.maximum(0, np.dot(W2, H1) + b2)
15 |   U2 = (np.random.rand(*H2.shape) < p) / p # 第二个随机失活遮罩. 注意/p!
16 |   H2 *= U2 # drop!
17 |   out = np.dot(W3, H2) + b3
18 | 
19 |   # 反向传播:计算梯度... (略)
20 |   # 进行参数更新... (略)
21 | 
22 | def predict(X):
23 |   # 前向传播时模型集成
24 |   H1 = np.maximum(0, np.dot(W1, X) + b1) # 不用数值范围调整了
25 |   H2 = np.maximum(0, np.dot(W2, H1) + b2)
26 |   out = np.dot(W3, H2) + b3


--------------------------------------------------------------------------------
/Base/courses/qiyuezaixian/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fire717/Machine-Learning/46d417e4c872052857899331ff7f526f79110896/Base/courses/qiyuezaixian/.DS_Store


--------------------------------------------------------------------------------
/Base/courses/qiyuezaixian/README.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fire717/Machine-Learning/46d417e4c872052857899331ff7f526f79110896/Base/courses/qiyuezaixian/README.md


--------------------------------------------------------------------------------
/Base/frameworks/caffe/docs/ubuntu18_anaconda3_py27_cpu_COMPILE_CAFFE.md:
--------------------------------------------------------------------------------
 1 | 
 2 | 先随便搜一个教程照着装，比如[这个](https://blog.csdn.net/lukaslong/article/details/81390276)
 3 | 然后会遇到如下问题：
 4 | 
 5 | 
 6 | #### 1. recipe for target '.build_release/src/caffe/layers/detection_output_layer.o' failed
 7 | ```
 8 | 先protoc --version查看版本
 9 | 然后conda install protobuf=x.x.x 
10 | ```
11 | 
12 | #### 2.Makefile:621: recipe for target '.build_release/tools/convert_imageset.bin' failed
13 | ```
14 | conda install py-opencv=3.4.2
15 | ```
16 | 
17 | #### 3.fatal error: caffe/proto/caffe.pb.h: 没有那个文件或目录
18 | ```
19 | In the directory you installed Caffe to
20 | protoc src/caffe/proto/caffe.proto --cpp_out=.
21 | mkdir include/caffe/proto
22 | mv src/caffe/proto/caffe.pb.h include/caffe/proto
23 | ```
24 | 
25 | #### 4.libprotobuf.so.19: cannot open shared object file: No such file or directory
26 | ```
27 | sudo find / -name libprotobuf.so.19
28 | 发现确实存在libprotobuf.so.19（备注libprotobuf.so.19是一个软链接文件）
29 | 解决办法：
30 | sudo cp xx/xx/libprotobuf.so.19.0.0 /usr/local/lib/
31 | sudo ln -s /usr/local/lib/libprotobuf.so.19.0.0 /usr/local/lib/libprotobuf.so.19
32 | 
33 | export LD_LIBRARY_PATH=/usr/local/lib
34 | ```
35 | 
36 | #### 5.ImportError: libopencv_core.so.3.4: cannot open shared object file: No such file or directory
37 | ```
38 | sudo find / -name "libopencv_core.so.3.4*"
39 | Then got the result: /usr/local/lib/libopencv_core.so.3.2.
40 | Create a file called /etc/ld.so.conf.d/opencv.conf 
41 |  write to it the path to the folder where the binary is stored.
42 | For example, I wrote /usr/local/lib/ to my opencv.conf file.
43 | Run the command line as follows.
44 | sudo ldconfig -v
45 | ```
46 | 
47 | #### 6.ImportError: 'No module named skimage.io'
48 | ```
49 | pip install scikit-image
50 | ```
51 | 
52 | #### 7.TypeError: __new__() got an unexpected keyword argument 'serialized_options'
53 | ```
54 | pip install -U protobuf
55 | ```
56 | 
57 | 
58 | 最后需要加入环境变量export PYTHONPATH=~/caffe-ssd/python:$PYTHONPATH
59 | 


--------------------------------------------------------------------------------
/Base/frameworks/caffe/project/caffe_ssd_write_layer/3.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fire717/Machine-Learning/46d417e4c872052857899331ff7f526f79110896/Base/frameworks/caffe/project/caffe_ssd_write_layer/3.jpg


--------------------------------------------------------------------------------
/Base/frameworks/caffe/project/caffe_ssd_write_layer/dog_bike_car.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fire717/Machine-Learning/46d417e4c872052857899331ff7f526f79110896/Base/frameworks/caffe/project/caffe_ssd_write_layer/dog_bike_car.jpg


--------------------------------------------------------------------------------
/Base/frameworks/caffe/project/caffe_ssd_write_layer/img/two_faces_300.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fire717/Machine-Learning/46d417e4c872052857899331ff7f526f79110896/Base/frameworks/caffe/project/caffe_ssd_write_layer/img/two_faces_300.jpg


--------------------------------------------------------------------------------
/Base/frameworks/caffe/project/caffe_ssd_write_layer/res.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fire717/Machine-Learning/46d417e4c872052857899331ff7f526f79110896/Base/frameworks/caffe/project/caffe_ssd_write_layer/res.jpg


--------------------------------------------------------------------------------
/Base/frameworks/caffe/project/caffe_ssd_write_layer/res222.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fire717/Machine-Learning/46d417e4c872052857899331ff7f526f79110896/Base/frameworks/caffe/project/caffe_ssd_write_layer/res222.jpg


--------------------------------------------------------------------------------
/Base/frameworks/caffe/project/caffe_ssd_write_layer/test_img.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fire717/Machine-Learning/46d417e4c872052857899331ff7f526f79110896/Base/frameworks/caffe/project/caffe_ssd_write_layer/test_img.jpg


--------------------------------------------------------------------------------
/Base/frameworks/caffe/project/caffe_ssd_write_layer/test_img2.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fire717/Machine-Learning/46d417e4c872052857899331ff7f526f79110896/Base/frameworks/caffe/project/caffe_ssd_write_layer/test_img2.jpg


--------------------------------------------------------------------------------
/Base/frameworks/caffe/project/caffe_ssd_write_layer/test_img3.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fire717/Machine-Learning/46d417e4c872052857899331ff7f526f79110896/Base/frameworks/caffe/project/caffe_ssd_write_layer/test_img3.jpg


--------------------------------------------------------------------------------
/Base/frameworks/caffe/project/caffe_ssd_write_layer/two_faces_300.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fire717/Machine-Learning/46d417e4c872052857899331ff7f526f79110896/Base/frameworks/caffe/project/caffe_ssd_write_layer/two_faces_300.jpg


--------------------------------------------------------------------------------
/Base/frameworks/caffe/project/caffe_ssd_write_layer/yufacedetectnet-open-v1-detection_output.caffemodel:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fire717/Machine-Learning/46d417e4c872052857899331ff7f526f79110896/Base/frameworks/caffe/project/caffe_ssd_write_layer/yufacedetectnet-open-v1-detection_output.caffemodel


--------------------------------------------------------------------------------
/Base/frameworks/caffe/project/caffe_ssd_write_layer/yufacedetectnet-open-v1.caffemodel:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fire717/Machine-Learning/46d417e4c872052857899331ff7f526f79110896/Base/frameworks/caffe/project/caffe_ssd_write_layer/yufacedetectnet-open-v1.caffemodel


--------------------------------------------------------------------------------
/Base/frameworks/caffe/project/caffe_ssd_write_layer/yufacedetectnet-open-v1_my.caffemodel:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fire717/Machine-Learning/46d417e4c872052857899331ff7f526f79110896/Base/frameworks/caffe/project/caffe_ssd_write_layer/yufacedetectnet-open-v1_my.caffemodel


--------------------------------------------------------------------------------
/Base/frameworks/caffe/project/caffe_ssd_write_layer/yufacedetectnet-open-v1_new.caffemodel:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fire717/Machine-Learning/46d417e4c872052857899331ff7f526f79110896/Base/frameworks/caffe/project/caffe_ssd_write_layer/yufacedetectnet-open-v1_new.caffemodel


--------------------------------------------------------------------------------
/Base/frameworks/caffe/readme.md:
--------------------------------------------------------------------------------
 1 | # Caffe
 2 | 
 3 | 
 4 | ### 文档资料
 5 | * [ssd版caffe](https://github.com/weiliu89/caffe/tree/ssd)
 6 | * [编译caffe源码](./docs/ubuntu18_anaconda3_py27_cpu_COMPILE_CAFFE.md)
 7 | 
 8 | 
 9 | ### 代码片段
10 | * [SSD predict](./code/test_ssd.py)
11 | * [SSD detection_output层](./code/ssd_detection_output_layer.py)
12 | 
13 | 
14 | ### 项目
15 | * [手写SSD后面几层](./project/caffe_ssd_write_layer/)
16 | 


--------------------------------------------------------------------------------
/Base/frameworks/keras/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fire717/Machine-Learning/46d417e4c872052857899331ff7f526f79110896/Base/frameworks/keras/.DS_Store


--------------------------------------------------------------------------------
/Base/frameworks/keras/baseline/main.py:
--------------------------------------------------------------------------------
 1 | #coding:utf-8
 2 | # @fire
 3 | import cv2
 4 | import os,sys
 5 | import numpy as np
 6 | from PIL import Image
 7 | import random
 8 | 
 9 | 
10 | from my_data import myData
11 | from my_model import myModel
12 | 
13 | 
14 | def getAllName(file_dir): 
15 |     L=[] 
16 |     for root, dirs, files in os.walk(file_dir):
17 |         # root 所指的是当前正在遍历的这个文件夹的本身的地址
18 |         # dirs 是一个 list ，内容是该文件夹中所有的目录的名字(不包括子目录)
19 |         # files 同样是 list , 内容是该文件夹中所有的文件(不包括子目录)
20 |         for file in files:
21 |             if os.path.splitext(file)[1] == '.jpg' or os.path.splitext(file)[1] == '.png':
22 |                 L.append(os.path.join(root, file))
23 |     return L
24 | 
25 | 
26 | 
27 | 
28 | 
29 | data_path_fake = "data/train/fake/"
30 | data_path_true = "data/train/true/"
31 | fake_imgs_train = getAllName(data_path_fake)
32 | true_imgs_train = getAllName(data_path_true)
33 | 
34 | data_path_fake = "data/val/fake/"
35 | data_path_true = "data/val/true/"
36 | fake_imgs_val = getAllName(data_path_fake)
37 | true_imgs_val = getAllName(data_path_true)
38 | 
39 | 
40 | 
41 | 
42 | batch_size = 16
43 | nb_epoch = 20
44 | img_name_list_train_cate1 = true_imgs_train
45 | img_name_list_train_cate2 = fake_imgs_train
46 | img_name_list_val_cate1 = true_imgs_val
47 | img_name_list_val_cate2 = fake_imgs_val
48 | my_data = myData(batch_size, nb_epoch, img_name_list_train_cate1, img_name_list_train_cate2,
49 |                      img_name_list_val_cate1, img_name_list_val_cate2)
50 | 
51 | print(my_data.total_train, my_data.total_val)
52 | 
53 | 
54 | my_model = myModel()
55 | 
56 | my_model.train(my_data)
57 | 


--------------------------------------------------------------------------------
/Base/frameworks/keras/baseline/my_data.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import cv2
 3 | import keras
 4 | from keras import Sequential
 5 | from keras.layers import Conv2D,Activation,MaxPooling2D,Flatten,Dense,Activation,Dropout
 6 | 
 7 | 
 8 | class myModel(object):
 9 | 
10 | 
11 | 
12 |     def __init__(self):
13 |         self.model = Sequential()
14 |         self.model.add(Conv2D(32, (3, 3), input_shape=(100,100,3)))
15 |         self.model.add(Activation('relu'))
16 |         self.model.add(MaxPooling2D(pool_size=(2, 2)))
17 | 
18 |         self.model.add(Conv2D(32, (3, 3)))
19 |         self.model.add(Activation('relu'))
20 |         self.model.add(MaxPooling2D(pool_size=(2, 2)))
21 | 
22 |         self.model.add(Conv2D(64, (3, 3)))
23 |         self.model.add(Activation('relu'))
24 |         self.model.add(MaxPooling2D(pool_size=(2, 2)))
25 | 
26 |         self.model.add(Conv2D(64, (3, 3)))
27 |         self.model.add(Activation('relu'))
28 |         self.model.add(MaxPooling2D(pool_size=(2, 2)))
29 | 
30 |         self.model.add(Flatten())
31 |         self.model.add(Dense(64))
32 |         self.model.add(Activation('relu'))
33 |         self.model.add(Dropout(0.85))
34 |         self.model.add(Dense(2))
35 |         self.model.add(Activation('sigmoid'))
36 | 
37 | 
38 |     def train(self, dataset):
39 |         batch_size = dataset.batch_size
40 |         nb_epoch = dataset.nb_epoch
41 |         self.model.compile(loss='binary_crossentropy',
42 |                       optimizer='adam',
43 |                       metrics=['accuracy'])
44 |         self.model.fit_generator(dataset.train_data_generate(),
45 |                                  steps_per_epoch=dataset.total_train // batch_size,
46 |                                  epochs=nb_epoch,
47 |                                  validation_data=dataset.val_data_generate(),
48 |                                  validation_steps=dataset.total_val//batch_size)
49 | 
50 | 
51 |     def save(self, file_path="model.h5"):
52 |         print('Model Saved.')
53 |         self.model.save_weights(file_path)
54 | 
55 |     def load(self, file_path="model.h5"):
56 |         print('Model Loaded.')
57 |         self.model.load_weights(file_path)
58 | 
59 |     def predict(self, image):
60 |         # 预测样本分类
61 |         img = image.resize((1, IMAGE_SIZE, IMAGE_SIZE, 3))
62 |         img = image.astype('float32')
63 |         img /= 255
64 | 
65 |         #归一化
66 |         result = self.model.predict(img)
67 |         print(result)
68 |         # 概率
69 |         result = self.model.predict_classes(img)
70 |         print(result)
71 |         # 0/1
72 | 
73 |         return result[0]
74 | 
75 |     def evaluate(self, dataset):
76 |         # 测试样本准确率
77 |         score = self.model.evaluate_generator(dataset.valid,steps=2)
78 |         print("样本准确率%s: %.2f%%" % (self.model.metrics_names[1], score[1] * 100))
79 | 


--------------------------------------------------------------------------------
/Base/frameworks/keras/baseline/my_model.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import cv2
 3 | import keras
 4 | from keras import Sequential
 5 | from keras.layers import Conv2D,Activation,MaxPooling2D,Flatten,Dense,Activation,Dropout
 6 | 
 7 | 
 8 | class myModel(object):
 9 | 
10 | 
11 | 
12 |     def __init__(self):
13 |         self.model = Sequential()
14 |         self.model.add(Conv2D(32, (3, 3), input_shape=(100,100,3)))
15 |         self.model.add(Activation('relu'))
16 |         self.model.add(MaxPooling2D(pool_size=(2, 2)))
17 | 
18 |         self.model.add(Conv2D(32, (3, 3)))
19 |         self.model.add(Activation('relu'))
20 |         self.model.add(MaxPooling2D(pool_size=(2, 2)))
21 | 
22 |         self.model.add(Conv2D(64, (3, 3)))
23 |         self.model.add(Activation('relu'))
24 |         self.model.add(MaxPooling2D(pool_size=(2, 2)))
25 | 
26 |         self.model.add(Conv2D(64, (3, 3)))
27 |         self.model.add(Activation('relu'))
28 |         self.model.add(MaxPooling2D(pool_size=(2, 2)))
29 | 
30 |         self.model.add(Flatten())
31 |         self.model.add(Dense(64))
32 |         self.model.add(Activation('relu'))
33 |         self.model.add(Dropout(0.85))
34 |         self.model.add(Dense(2))
35 |         self.model.add(Activation('sigmoid'))
36 | 
37 | 
38 |     def train(self, dataset):
39 |         batch_size = dataset.batch_size
40 |         nb_epoch = dataset.nb_epoch
41 |         self.model.compile(loss='binary_crossentropy',
42 |                       optimizer='adam',
43 |                       metrics=['accuracy'])
44 |         self.model.fit_generator(dataset.train_data_generate(),
45 |                                  steps_per_epoch=dataset.total_train // batch_size,
46 |                                  epochs=nb_epoch,
47 |                                  validation_data=dataset.val_data_generate(),
48 |                                  validation_steps=dataset.total_val//batch_size)
49 | 
50 | 
51 |     def save(self, file_path="model.h5"):
52 |         print('Model Saved.')
53 |         self.model.save_weights(file_path)
54 | 
55 |     def load(self, file_path="model.h5"):
56 |         print('Model Loaded.')
57 |         self.model.load_weights(file_path)
58 | 
59 |     def predict(self, image):
60 |         # 预测样本分类
61 |         img = image.resize((1, IMAGE_SIZE, IMAGE_SIZE, 3))
62 |         img = image.astype('float32')
63 |         img /= 255
64 | 
65 |         #归一化
66 |         result = self.model.predict(img)
67 |         print(result)
68 |         # 概率
69 |         result = self.model.predict_classes(img)
70 |         print(result)
71 |         # 0/1
72 | 
73 |         return result[0]
74 | 
75 |     def evaluate(self, dataset):
76 |         # 测试样本准确率
77 |         score = self.model.evaluate_generator(dataset.valid,steps=2)
78 |         print("样本准确率%s: %.2f%%" % (self.model.metrics_names[1], score[1] * 100))
79 | 


--------------------------------------------------------------------------------
/Base/frameworks/keras/data/0_0.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fire717/Machine-Learning/46d417e4c872052857899331ff7f526f79110896/Base/frameworks/keras/data/0_0.png


--------------------------------------------------------------------------------
/Base/frameworks/keras/data/2_100.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fire717/Machine-Learning/46d417e4c872052857899331ff7f526f79110896/Base/frameworks/keras/data/2_100.png


--------------------------------------------------------------------------------
/Base/frameworks/keras/demo/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fire717/Machine-Learning/46d417e4c872052857899331ff7f526f79110896/Base/frameworks/keras/demo/.DS_Store


--------------------------------------------------------------------------------
/Base/frameworks/keras/demo/classify_focal_loss.py:
--------------------------------------------------------------------------------
 1 | """
 2 | @Fire
 3 | focal loss本身是用于检测网络的
 4 | 其中alpha因子用于控制不平衡样本比例，主要是降低背景类
 5 | gamma因子用于控制易分类样本权重
 6 | 
 7 | 而在分类任务中，没有背景类，且keras的fit可以直接设置class_weight，所以这里直接去掉了alpha
 8 | 
 9 | """
10 | # version 1
11 | def focal_loss(y_true,y_pred,gamma = 2):
12 |     '''
13 |     :param y_true: ont-hot encoding ,shape is [batch_size,nums_classes]
14 |     :param y_pred: shape is [batch_size,nums_classes],each example defined as probability for per class
15 |     :return:shape is [batch_size,], a list include cross_entropy for per example
16 |     '''
17 |     y_pred = K.clip(y_pred, K.epsilon(),1.0 - K.epsilon())
18 |     crossEntropyLoss = -((1-y_pred)**gamma)*y_true * tf.log(y_pred)#facal loss
19 |  
20 |     return tf.reduce_sum(crossEntropyLoss,-1)
21 | 
22 | # version 2 rec
23 | def focal_loss(target, output, gamma=2):
24 |     output /= K.sum(output, axis=-1, keepdims=True)
25 |     eps = K.epsilon()
26 |     output = K.clip(output, eps, 1. - eps)
27 |     return -K.sum(K.pow(1. - output, gamma) * target * K.log(output),
28 |                   axis=-1)
29 | 


--------------------------------------------------------------------------------
/Base/frameworks/keras/demo/fmeasure_metric.py:
--------------------------------------------------------------------------------
 1 | import keras.backend as K
 2 | 
 3 | def binary_accuracy(y_true, y_pred, threshold=0.5):
 4 |     if threshold != 0.5:
 5 |         threshold = K.cast(threshold, y_pred.dtype)
 6 |         y_pred = K.cast(y_pred > threshold, y_pred.dtype)
 7 |     return K.mean(K.equal(y_true, K.round(y_pred)), axis=-1)
 8 | 
 9 | def precision(y_true, y_pred):
10 |     # Calculates the precision
11 |     true_positives = K.sum(K.round(K.clip(y_true * y_pred, 0, 1)))
12 |     predicted_positives = K.sum(K.round(K.clip(y_pred, 0, 1)))
13 |     precision = true_positives / (predicted_positives + K.epsilon())
14 |     return precision
15 | 
16 | def recall(y_true, y_pred):
17 |     # Calculates the recall
18 |     true_positives = K.sum(K.round(K.clip(y_true * y_pred, 0, 1)))
19 |     possible_positives = K.sum(K.round(K.clip(y_true, 0, 1)))
20 |     recall = true_positives / (possible_positives + K.epsilon())
21 |     return recall
22 | 
23 | def fbeta_score(y_true, y_pred, beta=1):
24 |     # Calculates the F score, the weighted harmonic mean of precision and recall.
25 |     if beta < 0:
26 |         raise ValueError('The lowest choosable beta is zero (only precision).')
27 |     
28 |     # If there are no true positives, fix the F score at 0 like sklearn.
29 |     if K.sum(K.round(K.clip(y_true, 0, 1))) == 0:
30 |         return 0
31 |     p = precision(y_true, y_pred)
32 |     r = recall(y_true, y_pred)
33 |     bb = beta ** 2
34 |     fbeta_score = (1 + bb) * (p * r) / (bb * p + r + K.epsilon())
35 |     return fbeta_score
36 | 
37 | def fmeasure(y_true, y_pred):
38 |     # Calculates the f-measure, the harmonic mean of precision and recall.
39 |     return fbeta_score(y_true, y_pred, beta=1)
40 |     
41 | earlystop = EarlyStopping(monitor='val_fmeasure', patience=4, verbose=0, mode='max')   
42 |     
43 | model.compile(optimizer = 'adam',
44 |               loss='binary_crossentropy',
45 |               metrics=['accuracy',fmeasure,recall,precision])
46 | 


--------------------------------------------------------------------------------
/Base/frameworks/keras/demo/h5_to_ckpt.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import json
 3 | import pandas as pd
 4 | 
 5 | 
 6 | from keras.preprocessing.image import ImageDataGenerator
 7 | from keras.callbacks import EarlyStopping,ModelCheckpoint,ReduceLROnPlateau
 8 | import tensorflow as tf
 9 | from keras.models import load_model
10 | import numpy as np
11 | import random
12 | 
13 | random.seed(2020)
14 | np.random.seed(2020)
15 | 
16 | import os
17 | os.environ["CUDA_VISIBLE_DEVICES"] = "-1"
18 | #os.environ["TF_ENABLE_AUTO_MIXED_PRECISION"] = "1"
19 | 
20 | 
21 | 
22 | def train(cfg):
23 |     
24 |     save_dir = cfg['save_dir']
25 |     shape = (int(cfg['height']), int(cfg['width']), 3)
26 | 
27 |     n_class = int(cfg['class_number'])
28 |     batch = int(cfg['batch'])
29 | 
30 |     if not os.path.exists(save_dir):
31 |         os.mkdir(save_dir)
32 | 
33 |     # if cfg['model'] == 'large':
34 |     #     from model.mobilenet_v3_large import MobileNetV3_Large
35 |     #     model = MobileNetV3_Large(shape, n_class).build()
36 |     # if cfg['model'] == 'small':
37 |     #     from model.mobilenet_v3_small import MobileNetV3_Small
38 |     #     model = MobileNetV3_Small(shape, n_class).build()
39 | 
40 |     # if cfg['model'] == 'mymodel':
41 |     #     from model.my_model import MyModel
42 |     #     model = MyModel(shape, n_class).build()
43 | 
44 |     # if cfg['model'] == 'v2':
45 |     #     from model.mobilenet_v2 import MyModel
46 |     #     model = MyModel(shape, n_class).buildRaw()
47 | 
48 |     model_path = "save/v2"
49 |     loaded_model = load_model(os.path.join(model_path,'e_06_0.20_1.00.h5'))
50 |     from keras import backend as K
51 |     import tensorflow as tf
52 |     print(loaded_model.input.op.name)
53 |     print(loaded_model.output.op.name)
54 |     saver = tf.train.Saver()
55 |     saver.save(K.get_session(), 'save/ckpt/keras_model.ckpt')
56 | 
57 |     """
58 | 
59 |     python freeze_graph.py --input_meta_graph=./ckpt/keras_model.ckpt.meta --input_checkpoint=./ckpt/keras_model.ckpt --output_graph=./ckpt/keras_model.pb --output_node_names="dense/Softmax" --input_binary=true
60 |     """
61 | 
62 | if __name__ == '__main__':
63 |     # with open('config/config.json', 'r') as f:
64 |     #     cfg = json.load(f)
65 | 
66 |     cfg = {
67 |     "model": "v2",
68 |     "height": 224,
69 |     "width": 224,
70 |     "class_number": 2,
71 |     "batch": 16,
72 |     "epochs": 50,
73 |     "train_dir": "/home/AlgorithmicGroup/yw/workshop/antiface/data/test_position/level1/train",
74 |     "eval_dir": "/home/AlgorithmicGroup/yw/workshop/antiface/data/test_position/level1/val",
75 |     "save_dir": "save",
76 |     "weights": ""
77 |     }
78 |     train(cfg)
79 |     #nohup python -u train_cls.py > nohup.log 2>&1 &
80 | 


--------------------------------------------------------------------------------
/Base/frameworks/keras/demo/h5_to_pb.py:
--------------------------------------------------------------------------------
 1 | #*-coding:utf-8-*
 2 | 
 3 | """
 4 | 将keras的.h5的模型文件，转换成TensorFlow的pb文件
 5 | """
 6 | # ==========================================================
 7 | 
 8 | from keras.models import load_model
 9 | import tensorflow as tf
10 | import os
11 | from keras import backend
12 | from keras.applications.mobilenetv2 import MobileNetV2
13 | from keras.layers import Input
14 | from keras.preprocessing import image
15 | from keras.applications.mobilenetv2 import preprocess_input, decode_predictions
16 | from keras.applications.inception_resnet_v2 import InceptionResNetV2
17 | 
18 | from keras import backend as K
19 | K.set_learning_phase(0)
20 | 
21 | def h5_to_pb(h5_model, output_dir, model_name, out_prefix="output_", log_tensorboard=True):
22 |     """.h5模型文件转换成pb模型文件
23 |     Argument:
24 |         h5_model: str
25 |             .h5模型文件
26 |         output_dir: str
27 |             pb模型文件保存路径
28 |         model_name: str
29 |             pb模型文件名称
30 |         out_prefix: str
31 |             根据训练，需要修改
32 |         log_tensorboard: bool
33 |             是否生成日志文件
34 |     Return:
35 |         pb模型文件
36 |     """
37 |     if os.path.exists(output_dir) == False:
38 |         os.mkdir(output_dir)
39 |     out_nodes = []
40 |     for i in range(len(h5_model.outputs)):
41 |         out_nodes.append(out_prefix + str(i + 1))
42 |         #tf.identity(h5_model.output[i], out_prefix + str(i + 1))
43 |         tf.identity(h5_model.outputs[i],out_prefix + str(i + 1))
44 |     sess = backend.get_session()
45 | 
46 |     from tensorflow.python.framework import graph_util, graph_io
47 |     # 写入pb模型文件
48 |     init_graph = sess.graph.as_graph_def()
49 |     main_graph = graph_util.convert_variables_to_constants(sess, init_graph, out_nodes)
50 |     graph_io.write_graph(main_graph, output_dir, name=model_name, as_text=False)
51 |     # 输出日志文件
52 |     # if log_tensorboard:
53 |     #     from tensorflow.python.tools import import_pb_to_tensorboard
54 |     #     import_pb_to_tensorboard.import_to_tensorboard(os.path.join(output_dir, model_name), output_dir)
55 | 
56 | 
57 | if __name__ == '__main__':
58 |     #  .h模型文件路径参数
59 |     # input_path = './'
60 |     # weight_file = '224_1.0_epoch1_1.0.h5'
61 |     # #weight_file = 'mobilenet_v2_weights_tf_dim_ordering_tf_kernels_1.4_224.h5'
62 |     # weight_file_path = os.path.join(input_path, weight_file)
63 |     # output_graph_name = weight_file[:-3] + '.pb'
64 | 
65 |     # #  pb模型文件输出输出路径
66 |     # output_dir = input_path
67 | 
68 |     #  加载模型
69 |     # h5_model = 0
70 | 
71 |     # input_tensor = Input(shape=(224, 224, 3))  # or you could put (None, None, 3) for shape.
72 |     # h5_model = MobileNetV2(input_tensor=input_tensor, alpha=1.0, include_top=False,weights=input_path+weight_file)
73 |     h5_model = load_model('224_1.0_epoch1_1.0.h5', compile=False)
74 |     output_dir = "./"
75 |     output_graph_path = "224_1.0_epoch1_1.0_new.pb"
76 | 
77 |     #h5_model.summary()
78 |     h5_to_pb(h5_model, output_dir=output_dir, model_name=output_graph_path)
79 |     print('Finished')
80 | 


--------------------------------------------------------------------------------
/Base/frameworks/keras/demo/h5_to_tflite.py:
--------------------------------------------------------------------------------
 1 | #coding:utf-8
 2 | #python rename.py "xx路径"  
 3 | import cv2
 4 | import os,sys
 5 | import numpy as np
 6 | from PIL import Image
 7 | import random
 8 | 
 9 | from keras import backend as K
10 | import tensorflow as tf
11 | 
12 | 
13 | 
14 | from keras.models import load_model,save_model
15 | my_model = load_model('model_all.h5', compile=False)
16 | #my_model.summary()
17 | 
18 | 
19 | my_model.save('model_tmp.h5')
20 | 
21 | # keras_file = './tmp/keras_model.ckpt'
22 | # saver = tf.train.Saver()
23 | # saver.save(K.get_session(), keras_file)
24 | #python freeze_graph.py --input_meta_graph=./tmp/keras_model.ckpt.meta --input_checkpoint=./tmp/keras_model.ckpt --output_graph=./tmp/keras_model.pb --output_node_names="activation_6/Sigmoid" --input_binary=false
25 | converter =  tf.lite.TocoConverter.from_keras_model_file("model_tmp.h5")
26 | #converter.post_training_quantize = True
27 | tflite_quantized_model=converter.convert()
28 |  
29 | open("model.tflite", "wb").write(tflite_quantized_model)
30 | 


--------------------------------------------------------------------------------
/Base/frameworks/keras/demo/layer_trainable.py:
--------------------------------------------------------------------------------
 1 | 
 2 | 
 3 | model = ...
 4 | 
 5 | 
 6 | opt = Adam(lr=float(0.001))
 7 | for layer in model.layers[:-8]:
 8 |     layer.trainable = False
 9 | print(model.summary())
10 | 
11 | model.compile(loss='binary_crossentropy',  
12 |             optimizer=opt, 
13 |             metrics=[binary_accuracy])#fmeasure
14 | 
15 | model.fit_generator(myGenerator(train_generator,cate_names_final,pre_to_label), 
16 |     validation_data=myGenerator(val_generator,cate_names_final,pre_to_label), 
17 |     steps_per_epoch=count_train // batch_size,
18 |     validation_steps=count_val // batch_size,
19 |     epochs=6,
20 |     class_weight='auto',
21 |     callbacks=[reduce_lr])
22 | 
23 | 
24 | 
25 | for layer in model.layers[:-8]:
26 |     layer.trainable = True
27 | print(model.summary())
28 | 
29 | opt = Adam(lr=float(0.0001))
30 | model.compile(loss='binary_crossentropy',  
31 |             optimizer=opt, 
32 |             metrics=[binary_accuracy])#fmeasure
33 | 
34 | model.fit_generator(myGenerator(train_generator,cate_names_final,pre_to_label), 
35 |     validation_data=myGenerator(val_generator,cate_names_final,pre_to_label), 
36 |     steps_per_epoch=count_train // batch_size,
37 |     validation_steps=count_val // batch_size,
38 |     epochs=cfg['epochs'],
39 |     class_weight='auto',
40 |     callbacks=[earlystop,checkpoint,reduce_lr])
41 | 


--------------------------------------------------------------------------------
/Base/frameworks/keras/demo/multi_output_class_weight.py:
--------------------------------------------------------------------------------
 1 | from sklearn.utils import class_weight
 2 | 
 3 | 
 4 | class_weights = class_weight.compute_class_weight('balanced',
 5 |                                                   np.unique(label_list),
 6 |                                                   label_list)
 7 |                                                   
 8 |                                                   
 9 |                                                   
10 | model.fit_generator(...,
11 |                     class_weight={'outputs':class_weights},
12 |                     )
13 | #'outputs' is the output (which u want to balance) layer name 
14 | 


--------------------------------------------------------------------------------
/Base/frameworks/keras/demo/show_keras_data.py:
--------------------------------------------------------------------------------
 1 | import cv2
 2 | #import matplotlib.pyplot as plt
 3 | #from PIL import Image
 4 | from keras.preprocessing.image import ImageDataGenerator
 5 | #import glob
 6 | 
 7 | # 设置生成器参数
 8 | datagen = ImageDataGenerator(
 9 |         rescale=1. / 255,
10 |         horizontal_flip=True,
11 |         channel_shift_range=20)
12 | 
13 | 
14 | 
15 | ptrain = "/home/AlgorithmicGroup/yw/workshop/antiface/data/v3/val"
16 | SAVE_PATH = "images/gen/"
17 | 
18 | gen_data = datagen.flow_from_directory(
19 |         ptrain,
20 |         target_size=(224, 224),
21 |         batch_size=1,
22 |         class_mode='categorical',
23 |         shuffle=True,
24 |         save_to_dir=SAVE_PATH,
25 |         save_prefix='gen')
26 | 
27 | # 生成9张图
28 | for i in range(100):
29 |     gen_data.next()
30 | 
31 | 


--------------------------------------------------------------------------------
/Base/frameworks/keras/demo/tflite_pre.py:
--------------------------------------------------------------------------------
 1 | #coding:utf-8
 2 | from __future__ import print_function
 3 | import numpy as np
 4 | import cv2
 5 | from cv2 import dnn
 6 | import sys
 7 |  
 8 | import tensorflow as tf
 9 | from tensorflow.python.framework import graph_util
10 | import os
11 | 
12 | 
13 | 
14 | # Load TFLite model and allocate tensors.
15 | interpreter = tf.lite.Interpreter(model_path="model.tflite")
16 | interpreter.allocate_tensors()
17 | 
18 | input_details = interpreter.get_input_details()
19 | output_details = interpreter.get_output_details()
20 | 
21 | print(input_details)
22 | print(output_details)
23 | 
24 | 
25 | img = cv2.imread( "D:/Data/clothes_style/data/TestSet/img_0.jpg")
26 | print("img shape: ", img.shape)
27 | rows = img.shape[ 0]
28 | cols = img.shape[ 1]
29 | input_data = cv2.resize(img, ( 224, 224))
30 | #input_data = np.array([input_data[:, :, [ 2, 1, 0]]]) # BGR2RGB
31 | 
32 | 
33 | input_data = cv2.resize(input_data, (224, 224), interpolation=cv2.INTER_CUBIC)
34 | input_data = np.array(input_data)
35 | input_data = np.reshape(input_data, (1, 224, 224, 3))
36 | input_data = input_data.astype('float32')
37 | input_data = np.multiply(input_data, 1.0 / 255)
38 | #input_data = np.multiply(input_data, 1.0 / 127.5) - 1
39 | 
40 | 
41 | 
42 | index = input_details[0]['index']
43 | interpreter.set_tensor(index, input_data)
44 | interpreter.invoke()
45 | output_data = interpreter.get_tensor(output_details[0]['index'])
46 | print('output_data :',output_data)
47 | 
48 | 


--------------------------------------------------------------------------------
/Base/frameworks/keras/note/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fire717/Machine-Learning/46d417e4c872052857899331ff7f526f79110896/Base/frameworks/keras/note/.DS_Store


--------------------------------------------------------------------------------
/Base/frameworks/keras/note/keras_multiGPU.md:
--------------------------------------------------------------------------------
 1 | # Keras 多GPU训练
 2 | > Fire 2018.12.05
 3 | 
 4 | ### 1.指定GPU训练
 5 | ```python
 6 | os.environ["CUDA_VISIBLE_DEVICES"] = "1"
 7 | os.environ["CUDA_VISIBLE_DEVICES"] = "0,2"
 8 | ```
 9 | 
10 | ### 2.多块GPU训练
11 | 直接可以跑，但是通过nvidia-smi观察GPU占用率，只有第一块显卡占用了在跑，其他占用都是闲置的。
12 | 
13 | 设置：
14 | 
15 | ```python
16 | 
17 | from keras.utils import multi_gpu_model
18 | # 最多支持8块GPU
19 | 
20 | model = Model(input=.., output=..)  #这里同单卡，声明好模型
21 | 
22 | parallel_model = multi_gpu_model(model, gpus=4) #这里假设有4块
23 | 
24 | parallel_model.compile(...) #这里同单卡compile
25 | parallel_model.fit(x, y, epochs=40, batch_size=128)
26 | 
27 | ```
28 | 
29 | 注意：用多卡跑的时候，batchsize要乘以对应的块数，因为会把总的batchsize分到几块上面。
30 | 
31 | 但是还是会报错，说
32 | > could not satisfy explicit device specification '/device:GPU:3' because no supported kernel for GPU device is aviailable
33 | 
34 | 解决方法是，在加载模型之前添加：
35 | 
36 | ``` python
37 | import tensorflow as tf
38 | from keras.backend.tensorflow_backend import set_session
39 | 
40 | config = tf.ConfigProto(allow_soft_placement=True)
41 | set_session(tf.Session(config=config))
42 | ```
43 | 
44 | 另外如果遇到如下报错：
45 | ```shell
46 | AttributeError: '_TfDeviceCaptureOp' object has no attribute '_set_device_from_string'
47 | ```
48 | 是由于keras2.2+tensorflow1.14+的一个bug，升级keras到2.3或者降级tensorflow到1.13可以解决。
49 | 
50 | P.S.网上还有很多人遇到Modelcheckpoint callback报错的问题，我没遇到过，贴一个供参考:
51 | 
52 | [Keras 多GPU下模型和参数保存Modelcheckpoint callback报错](https://blog.csdn.net/Umi_you/article/details/81301002)
53 | 
54 | 
55 | ### 3.多块GPU训练模型用多块GPU预测
56 | ```python
57 | from keras.utils import multi_gpu_model
58 | 
59 | basemodel = Model(inputs=input, outputs=y_pred) ##这里同单卡，声明好模型
60 | 
61 | multi_model=multi_gpu_model(basemodel,gpus=4)
62 | multi_model.load_weights("multi_model.h5") #加载多卡训练的模型
63 | 
64 | multi_model.predict(...) #预测
65 | ```
66 | 
67 | ### 4.多块GPU训练模型用单块GPU预测
68 | 多核训练的网络的每一层都是按GPU来命名的，训练时采用多个GPU那么当导入参数的时候必须指定相同数量的GPU才行。所以直接将model切换到单GPU的环境中会报错，此时我们必须将参数保存为单GPU的形式。
69 | 
70 | ```python
71 | from keras.utils import multi_gpu_model
72 | 
73 | basemodel = Model(inputs=input, outputs=y_pred) ##这里同单卡，声明好模型
74 | 
75 | multi_model=multi_gpu_model(basemodel,gpus=4)
76 | multi_model.load_weights("multi_model.h5") # 此时basemodel也自动载入了权重，
77 | 
78 | basemodel.save('basemodel.h5')
79 | ```
80 | 这里保存的basemodel.h5就是对应单卡的模型，直接在单卡机器上加载就可以使用了。
81 | 


--------------------------------------------------------------------------------
/Base/frameworks/keras/project/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fire717/Machine-Learning/46d417e4c872052857899331ff7f526f79110896/Base/frameworks/keras/project/.DS_Store


--------------------------------------------------------------------------------
/Base/frameworks/mxnet/load_pre_demo.py:
--------------------------------------------------------------------------------
 1 | # load model and predicate
 2 | import mxnet as mx
 3 | import numpy as np
 4 | import cv2
 5 | from collections import namedtuple
 6 | Batch = namedtuple('Batch', ['data'])
 7 | 
 8 | def load_model(prefix, epoch, ctx, height, width):
 9 |     print(prefix, epoch)
10 |     sym, arg_params, aux_params = mx.model.load_checkpoint(prefix, epoch)
11 |     mod = mx.mod.Module(symbol=sym, context=ctx, label_names=None)
12 |     mod.bind(for_training=False,
13 |              data_shapes=[('data', (1, 3, int(height), int(width)))])
14 |     mod.set_params(arg_params=arg_params, aux_params=aux_params, allow_missing=True)
15 |     return sym, mod
16 | 
17 | height, width = 112,112
18 | load_epoch = 0
19 | model_prefix = "mynet"
20 | sym, mod = load_model(model_prefix, load_epoch, mx.cpu(), height, width)  # ctx = mx.cpu()  mx.gpu(0)
21 | 
22 | img=cv2.imread('./h.jpg')
23 | img=cv2.resize(img,(width, height))
24 | img = np.reshape(img, (3, height, width))
25 | img = np.array([img])
26 | 
27 | print(img.shape)
28 | img = mx.nd.array(img)
29 | mod.forward(Batch([img]))     
30 | print('height', height, 'width', width)
31 | print('img',img[0,2,0])
32 | prob = mod.get_outputs()[0].asnumpy()
33 | 
34 | print(prob.shape)
35 | 


--------------------------------------------------------------------------------
/Base/frameworks/pytorch/IoU_loss.py:
--------------------------------------------------------------------------------
 1 | #参考https://blog.csdn.net/weixin_38241876/article/details/110041645
 2 | #但是它计算有问题，这里是修改后的
 3 | 
 4 | def myIOULoss(self,predicted_locations, gt_locations, labels,
 5 |                 GIoU=False, DIoU=False, CIoU=False):
 6 |     #torch.Size([691, 4]) torch.Size([691, 4])
 7 | 
 8 |     #    
 9 |     ### 1. to conner type box
10 |     pos_mask = labels > 0
11 |     pre_boxes = box_utils.convert_locations_to_boxes(
12 |                         predicted_locations, self.priors, 0.1, 0.2)
13 |     pre_boxes = box_utils.center_form_to_corner_form(pre_boxes)
14 |     pre_boxes = pre_boxes[pos_mask, :].reshape(-1, 4)
15 |     #print(pre_boxes[:5])#[0.3799, 0.2177, 0.4424, 0.2723]
16 | 
17 |     gt_boxes = box_utils.convert_locations_to_boxes(
18 |                         gt_locations, self.priors, 0.1, 0.2)
19 |     gt_boxes = box_utils.center_form_to_corner_form(gt_boxes)
20 |     gt_boxes = gt_boxes[pos_mask, :].reshape(-1, 4)
21 |     #print(gt_boxes[:5])
22 |     #print(pre_boxes.shape, gt_boxes.shape)
23 |     num_pos = gt_boxes.size(0)
24 | 
25 | 
26 |     ### 2.compute IOU
27 |     b1_x1, b1_y1, b1_x2, b1_y2 = pre_boxes[:,0], pre_boxes[:,1], pre_boxes[:,2], pre_boxes[:,3]
28 |     b2_x1, b2_y1, b2_x2, b2_y2 = gt_boxes[:,0], gt_boxes[:,1], gt_boxes[:,2], gt_boxes[:,3]
29 | 
30 |     # Intersection area
31 |     inter = (torch.min(b1_x2, b2_x2) - torch.max(b1_x1, b2_x1)).clamp(0) * \
32 |             (torch.min(b1_y2, b2_y2) - torch.max(b1_y1, b2_y1)).clamp(0)
33 | 
34 |     # Union Area
35 |     w1, h1 = b1_x2 - b1_x1, b1_y2 - b1_y1
36 |     w2, h2 = b2_x2 - b2_x1, b2_y2 - b2_y1
37 |     union = (w1 * h1 + 1e-16) + w2 * h2 - inter
38 | 
39 |     #print(inter.shape, union.shape)
40 |     iou = inter / union  # iou
41 |     # print(iou.shape) #[691]
42 |     # b
43 |     if GIoU or DIoU or CIoU:
44 |         cw = torch.max(b1_x2, b2_x2) - torch.min(b1_x1, b2_x1)  # convex (smallest enclosing box) width
45 |         ch = torch.max(b1_y2, b2_y2) - torch.min(b1_y1, b2_y1)  # convex height
46 |         if GIoU:  # Generalized IoU https://arxiv.org/pdf/1902.09630.pdf
47 |             c_area = cw * ch + 1e-16  # convex area
48 |             loss = iou - (c_area - union) / c_area  # GIoU
49 |             loss = 1-loss
50 |         else:  # Distance or Complete IoU https://arxiv.org/abs/1911.08287v1
51 |             # convex diagonal squared
52 |             c2 = cw ** 2 + ch ** 2 + 1e-16
53 |             # centerpoint distance squared
54 |             rho2 = ((b2_x1 + b2_x2) - (b1_x1 + b1_x2)) ** 2 / 4 + ((b2_y1 + b2_y2) - (b1_y1 + b1_y2)) ** 2 / 4
55 |             if DIoU:
56 |                 loss = iou - rho2 / c2  # DIoU
57 |                 loss = 1-loss
58 |             elif CIoU:  # https://github.com/Zzh-tju/DIoU-SSD-pytorch/blob/master/utils/box/box_utils.py#L47
59 |                 v = (4 / math.pi ** 2) * torch.pow(torch.atan(w2 / h2) - torch.atan(w1 / h1), 2)
60 |                 with torch.no_grad():
61 |                     alpha = v / (1 - iou + v)
62 |                 loss = iou - (rho2 / c2 + v * alpha)  # CIoU
63 |                 loss = 1-loss
64 | 
65 |         loss = loss.sum()
66 |     else:
67 |         iou = -torch.log(iou + 1e-16) #防止为0
68 |         loss = iou.sum()
69 | 
70 |     #print(loss,num_pos)
71 |     # b
72 |     return loss, num_pos
73 | 


--------------------------------------------------------------------------------
/Base/frameworks/pytorch/demo/CEloss.py:
--------------------------------------------------------------------------------
 1 | def myCELoss(self, pre, label):
 2 |         #print(pre.shape, label.shape)#torch.Size([2764, 3]) torch.Size([2764]
 3 | 
 4 |         ### 原始CE loss
 5 |         #loss = F.cross_entropy(pre, label, reduction='sum') #e0 loss 7.9068
 6 | 
 7 |         ### CE loss等价实现1
 8 |         # log_soft_out = F.log_softmax(pre, dim=-1)
 9 |         # loss = F.nll_loss(log_soft_out, label, reduction='sum')
10 | 
11 |         ### CE loss等价实现2
12 |         # soft_out = F.softmax(pre, dim=-1)
13 |         # log_soft_out = torch.log(soft_out)
14 |         # loss = F.nll_loss(log_soft_out, label, reduction='sum')
15 | 
16 |         ### CE loss等价实现3
17 |         # log_soft_out = F.log_softmax(pre, dim=-1)
18 |         # one_hot = F.one_hot(label, pre.shape[1]).float().to(self.device)
19 |         # loss = torch.sum(-one_hot * log_soft_out)
20 | 
21 |         ### label smooth
22 |         log_soft_out = F.log_softmax(pre, dim=-1)
23 |         one_hot = F.one_hot(label, pre.shape[1]).float().to(pre.device)
24 |         one_hot = one_hot * (1-self.labelsmooth)+self.labelsmooth/pre.shape[1]
25 |         loss = torch.sum(-one_hot * log_soft_out)
26 | 
27 |         ### label smooth, 加强face when==facemask
28 |         # log_soft_out = F.log_softmax(pre, dim=-1)
29 |         # one_hot = F.one_hot(label, pre.shape[1]).float().to(self.device)
30 |         # one_hot = one_hot * (1-self.labelsmooth)+self.labelsmooth/pre.shape[1]
31 |         # facemask_index = label==2
32 |         # one_hot[facemask_index,1] = one_hot[facemask_index,0]+one_hot[facemask_index,1]
33 |         # one_hot[facemask_index,2] = one_hot[facemask_index,2]-one_hot[facemask_index,0]
34 |         # loss = torch.sum(-one_hot * log_soft_out)
35 | 
36 | 
37 |         # print(loss) #4388.9595/1.5879
38 |         # #b
39 |         return loss
40 | 


--------------------------------------------------------------------------------
/Base/frameworks/pytorch/demo/onnx_pre.py:
--------------------------------------------------------------------------------
 1 | #coding:utf-8
 2 | # from __future__ import print_function
 3 | import numpy as np
 4 | import cv2
 5 | # from cv2 import dnn
 6 | # import sys
 7 |  
 8 | # import tensorflow as tf
 9 | # from tensorflow.python.framework import graph_util
10 | # import os
11 | 
12 | # import time
13 | 
14 | 
15 | import time
16 | 
17 | import onnxruntime as rt
18 | 
19 | model_path = 'mymodel.onnx'
20 | sess=rt.InferenceSession(model_path)#model_path就是模型的地址
21 | input_name=sess.get_inputs()[0].name
22 | 
23 | 
24 | img = cv2.imread( 'tmp/face0_0.8583003.jpg')
25 | print("img shape: ", img.shape)
26 | inp = cv2.resize(img, ( 112, 112))
27 | inp = inp[:, :, [ 2, 1, 0]] # BGR2RGB
28 | 
29 | data = inp.reshape( 1, inp.shape[ 0], inp.shape[ 1], 3)
30 | #print(data.shape)
31 | data = np.transpose(data,(0,3,1,2))
32 | data = data/255.0
33 | data = (data-0.5)/0.5
34 | #print(data.shape)
35 | data = data.astype(np.float32)
36 | 
37 | for _ in range(5):
38 |     t = time.time()
39 |     res=sess.run(None,{input_name:data})[0]
40 |     print(time.time() - t)
41 | 
42 | print("res: ", res[0][:20])
43 | print("res: ", np.array(res).shape)
44 | 


--------------------------------------------------------------------------------
/Base/frameworks/pytorch/demo/show_pth_data.py:
--------------------------------------------------------------------------------
 1 | from __future__ import print_function
 2 | import argparse
 3 | import torch
 4 | import torch.nn as nn
 5 | import torch.nn.functional as F
 6 | import torch.optim as optim
 7 | from torchvision import datasets, transforms
 8 | import os
 9 | import numpy as np
10 | import random
11 | from torch.utils.data.dataset import Dataset
12 | import cv2
13 | import torchvision.transforms as transforms
14 | # import imagehash
15 | from PIL import Image
16 | from torchsummary import summary
17 | import torchvision.models as models
18 | import pretrainedmodels
19 | #from pretrainedmodels.models.xception import Xception,xception
20 | 
21 | #print(pretrainedmodels.pretrained_settings['xception'])
22 | #{'imagenet': {'url': 'http://data.lip6.fr/cadene/pretrainedmodels/xception-43020ad28.pth', 'input_space': 'RGB', 'input_size': [3, 299, 299], 'input_range': [0, 1], 'mean': [0.5, 0.5, 0.5], 'std': [0.5, 0.5, 0.5], 'num_classes': 1000, 'scale': 0.8975}}
23 | 
24 | #b
25 | 
26 | import os
27 | os.environ["CUDA_VISIBLE_DEVICES"] = "0"
28 | 
29 | 
30 | my_seed = 42
31 | random.seed(my_seed)
32 | np.random.seed(my_seed)
33 | torch.manual_seed(my_seed)
34 | 
35 | def getAllName(file_dir, tail_list = ['.png','.jpg']): 
36 |     L=[] 
37 |     for root, dirs, files in os.walk(file_dir):
38 |         for file in files:
39 |             if os.path.splitext(file)[1] in tail_list:
40 |                 L.append(os.path.join(root, file))
41 |     return L
42 | 
43 | 
44 | 
45 | img_path_list = getAllName("../../mywork/data/datasets/raw/train_clean/train_pad")
46 | transform = transforms.Compose([
47 |                             # transforms.Resize((224, 224)),
48 |                             # transforms.CenterCrop(size=(210, 180)),
49 |                             transforms.Resize((224, 224)),
50 |                             #transforms.RandomAffine(20, translate=(0.2,0.1), scale=(0.9,1.1),shear=(10,10), fillcolor=(0,0,0)),
51 |                             #transforms.RandomHorizontalFlip(),
52 |                             # transforms.RandomRotation(20),
53 |                             #transforms.ColorJitter(brightness=0.5, contrast=0.5, hue=0.2),
54 |                             #transforms.ToTensor(),
55 |                              #transforms.Normalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5])
56 |                              ])
57 | 
58 | 
59 | for i,img_path in enumerate(img_path_list):
60 |     img = Image.open(img_path).convert('RGB')
61 |     img = transform(img)
62 |     img.save("tmp/"+str(i)+".jpg", quality=100)
63 | 
64 |     if i>100:
65 |         break
66 | 
67 | 


--------------------------------------------------------------------------------
/Base/frameworks/pytorch/practice/60分钟入门PyTorch-0.目录.ipynb:
--------------------------------------------------------------------------------
 1 | {
 2 |  "cells": [
 3 |   {
 4 |    "cell_type": "markdown",
 5 |    "metadata": {},
 6 |    "source": [
 7 |     "# 60分钟入门深度学习工具-PyTorch(目录)"
 8 |    ]
 9 |   },
10 |   {
11 |    "cell_type": "markdown",
12 |    "metadata": {},
13 |    "source": [
14 |     "**作者**：Soumith Chintala\n",
15 |     "\n",
16 |     "原文翻译自：https://pytorch.org/tutorials/beginner/deep_learning_60min_blitz.html\n",
17 |     "    \n",
18 |     "中文翻译、注释制作：黄海广\n",
19 |     "\n",
20 |     "github：https://github.com/fengdu78\n",
21 |     "\n",
22 |     "代码全部测试通过。\n",
23 |     "\n",
24 |     "配置环境：PyTorch 1.0，Python 3.6\n",
25 |     "\n",
26 |     "主机：显卡：一块1080ti；内存：32g（注：绝大部分代码不需要GPU）\n",
27 |     "![公众号](images/gongzhong.jpg)\n",
28 |     "    "
29 |    ]
30 |   },
31 |   {
32 |    "cell_type": "markdown",
33 |    "metadata": {},
34 |    "source": [
35 |     "## 本教程的目标：\n",
36 |     "\n",
37 |     "* 在高层次上理解PyTorch的张量(Tensor)库和神经网络\n",
38 |     "* 训练一个小型神经网络对图像进行分类\n",
39 |     "* 本教程假设您对numpy有基本的了解\n",
40 |     "\n",
41 |     "**注意**： 务必确认您已经安装了 torch 和 torchvision 两个包。"
42 |    ]
43 |   },
44 |   {
45 |    "cell_type": "markdown",
46 |    "metadata": {},
47 |    "source": [
48 |     "## 目录"
49 |    ]
50 |   },
51 |   {
52 |    "cell_type": "markdown",
53 |    "metadata": {},
54 |    "source": [
55 |     "* 1.[Pytorch是什么？](60分钟入门PyTorch-1.PyTorch是什么？.ipynb)\n",
56 |     "* 2.[AUTOGRAD](60分钟入门PyTorch-2.AUTOGRAD.ipynb)\n",
57 |     "* 3.[神经网络](60分钟入门PyTorch-3.神经网络.ipynb)\n",
58 |     "* 4.[训练一个分类器](60分钟入门PyTorch-4.训练一个分类器.ipynb)\n",
59 |     "* 5.[数据并行](60分钟入门PyTorch-5.数据并行.ipynb)"
60 |    ]
61 |   }
62 |  ],
63 |  "metadata": {
64 |   "kernelspec": {
65 |    "display_name": "pth",
66 |    "language": "python",
67 |    "name": "pth"
68 |   },
69 |   "language_info": {
70 |    "codemirror_mode": {
71 |     "name": "ipython",
72 |     "version": 3
73 |    },
74 |    "file_extension": ".py",
75 |    "mimetype": "text/x-python",
76 |    "name": "python",
77 |    "nbconvert_exporter": "python",
78 |    "pygments_lexer": "ipython3",
79 |    "version": "3.6.10"
80 |   }
81 |  },
82 |  "nbformat": 4,
83 |  "nbformat_minor": 2
84 | }
85 | 


--------------------------------------------------------------------------------
/Base/frameworks/tensorflow/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fire717/Machine-Learning/46d417e4c872052857899331ff7f526f79110896/Base/frameworks/tensorflow/.DS_Store


--------------------------------------------------------------------------------
/Base/frameworks/tensorflow/basic/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fire717/Machine-Learning/46d417e4c872052857899331ff7f526f79110896/Base/frameworks/tensorflow/basic/.DS_Store


--------------------------------------------------------------------------------
/Base/frameworks/tensorflow/basic/TFLiteModelMaker/README.md:
--------------------------------------------------------------------------------
 1 | # awesome_train_tflite
 2 | 
 3 | 利用TF的官方工具TFLite Model Maker，几行代码使用流行的预训练模型训练，直接生成tflite模型。
 4 | 
 5 | ## 使用方法
 6 | ### 1. 环境配置
 7 | 
 8 | * 需要tensorflow 2.0以上，我是安装的2.1.1
 9 | * 按照examples/tensorflow_examples/lite/model_maker/requirements.txt安装其余库
10 | 
11 | ### 2.工具代码
12 | 
13 | * 从[官方github](https://github.com/tensorflow/examples/tree/master/tensorflow_examples/lite/model_maker)下载。
14 | 
15 | 
16 | ### 3.开始训练
17 | 
18 | 参考代码：[examples/train.py](examples/train.py)
19 | 
20 | 代码不到十行，有详细注释。
21 | 
22 | 
23 | ## 其他
24 | 
25 | ### 相关链接
26 | * [TFLite Model Maker 官方Github](https://github.com/tensorflow/examples/tree/master/tensorflow_examples/lite/model_maker)
27 | * [TensorFlow Hub：官方预训练模型下载](https://tfhub.dev/)
28 | 


--------------------------------------------------------------------------------
/Base/frameworks/tensorflow/basic/TFLiteModelMaker/train.py:
--------------------------------------------------------------------------------
 1 | 
 2 | import tensorflow as tf
 3 | 
 4 | from tensorflow_examples.lite.model_maker.core.data_util.image_dataloader import ImageClassifierDataLoader
 5 | from tensorflow_examples.lite.model_maker.core.task import image_classifier
 6 | from tensorflow_examples.lite.model_maker.core.task import model_spec as ms
 7 | 
 8 | 
 9 | data_path = r"/home/AlgorithmicGroup/yw/workshop/antiface/data/clean_data"
10 | # 这个path指图像数据文件夹路径，其下面按类别分为多个子文件夹
11 | data = ImageClassifierDataLoader.from_folder(data_path)
12 | train_data, test_data = data.split(0.92)
13 | 
14 | print("done data load.")
15 | 
16 | model = image_classifier.create(train_data, 
17 |   
18 |                                 model_spec=ms.efficientnet_lite0_spec,
19 |                                 shuffle=True,
20 |                                 validation_data=test_data,
21 |                                 batch_size=32,
22 |                                 epochs=20,
23 |                                 train_whole_model=False,
24 |                                 dropout_rate=0.2,
25 |                                 learning_rate=0.005,
26 |                                 momentum=0.9)
27 | #指定模型为efficientnet_lite0，可以换成其他的
28 | """
29 | def get_default_hparams():
30 |   return HParams(
31 |       train_epochs=5,
32 |       do_fine_tuning=False,(train_whole_model)
33 |       batch_size=32,
34 |       learning_rate=0.005,
35 |       momentum=0.9,
36 |       dropout_rate=0.2)
37 | """
38 | 
39 | 
40 | #loss, accuracy = model.evaluate(test_data)
41 | #训练过程也会打印相关信息，类似keras
42 | 
43 | 
44 | model.export('image_classifier.tflite', 'image_labels.txt')
45 | #导出tflite模型，image_labels即对应的类别
46 | 


--------------------------------------------------------------------------------
/Base/frameworks/tensorflow/basic/TensorFlowExample.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 1,
  6 |    "metadata": {
  7 |     "collapsed": true
  8 |    },
  9 |    "outputs": [],
 10 |    "source": [
 11 |     "import numpy as np\n",
 12 |     "import tensorflow as tf"
 13 |    ]
 14 |   },
 15 |   {
 16 |    "cell_type": "markdown",
 17 |    "metadata": {},
 18 |    "source": [
 19 |     "Writing and running programs in TensorFlow has the following steps:\n",
 20 |     "\n",
 21 |     "1. Create Tensors (variables) that are not yet executed/evaluated. \n",
 22 |     "2. Write operations between those Tensors.\n",
 23 |     "3. Initialize your Tensors. \n",
 24 |     "4. Create a Session. \n",
 25 |     "5. Run the Session. This will run the operations you'd written above. "
 26 |    ]
 27 |   },
 28 |   {
 29 |    "cell_type": "code",
 30 |    "execution_count": 12,
 31 |    "metadata": {},
 32 |    "outputs": [
 33 |     {
 34 |      "name": "stdout",
 35 |      "output_type": "stream",
 36 |      "text": [
 37 |       "0.0\n"
 38 |      ]
 39 |     }
 40 |    ],
 41 |    "source": [
 42 |     "coefficients = np.array([[1.],[-10],[25.]])\n",
 43 |     "\n",
 44 |     "w = tf.Variable(0,dtype=tf.float32)\n",
 45 |     "x = tf.placeholder(tf.float32,[3,1])\n",
 46 |     "\n",
 47 |     "cost = x[0][0]*w**2 + x[1][0]*w + x[2][0]\n",
 48 |     "train = tf.train.GradientDescentOptimizer(0.01).minimize(cost)\n",
 49 |     "\n",
 50 |     "init = tf.global_variables_initializer()\n",
 51 |     "\n",
 52 |     "session = tf.Session()\n",
 53 |     "session.run(init)\n",
 54 |     "\n",
 55 |     "print(session.run(w))"
 56 |    ]
 57 |   },
 58 |   {
 59 |    "cell_type": "code",
 60 |    "execution_count": 13,
 61 |    "metadata": {},
 62 |    "outputs": [
 63 |     {
 64 |      "name": "stdout",
 65 |      "output_type": "stream",
 66 |      "text": [
 67 |       "4.99999\n"
 68 |      ]
 69 |     }
 70 |    ],
 71 |    "source": [
 72 |     "for i in range(1000):\n",
 73 |     "    session.run(train,feed_dict={x:coefficients})\n",
 74 |     "print(session.run(w))"
 75 |    ]
 76 |   },
 77 |   {
 78 |    "cell_type": "code",
 79 |    "execution_count": null,
 80 |    "metadata": {
 81 |     "collapsed": true
 82 |    },
 83 |    "outputs": [],
 84 |    "source": []
 85 |   }
 86 |  ],
 87 |  "metadata": {
 88 |   "kernelspec": {
 89 |    "display_name": "Python 3",
 90 |    "language": "python",
 91 |    "name": "python3"
 92 |   },
 93 |   "language_info": {
 94 |    "codemirror_mode": {
 95 |     "name": "ipython",
 96 |     "version": 3
 97 |    },
 98 |    "file_extension": ".py",
 99 |    "mimetype": "text/x-python",
100 |    "name": "python",
101 |    "nbconvert_exporter": "python",
102 |    "pygments_lexer": "ipython3",
103 |    "version": "3.5.4"
104 |   }
105 |  },
106 |  "nbformat": 4,
107 |  "nbformat_minor": 2
108 | }
109 | 


--------------------------------------------------------------------------------
/Base/frameworks/tensorflow/basic/ckpt2pb.py:
--------------------------------------------------------------------------------
 1 | 
 2 | import tensorflow as tf
 3 | from tensorflow.python import pywrap_tensorflow
 4 | import os
 5 | 
 6 | 
 7 | def getTensorName(checkpoint_path):
 8 |     reader=pywrap_tensorflow.NewCheckpointReader(checkpoint_path)
 9 |     var_to_shape_map=reader.get_variable_to_shape_map()
10 | 
11 |     with open("tensorname.txt","w", encoding="utf-8") as f:
12 |         for key in var_to_shape_map:
13 |             f.write('tensor_name: '+key+'\n')
14 | 
15 | 
16 | def freeze_graph(input_checkpoint,output_graph):
17 |     '''
18 |     :param input_checkpoint:
19 |     :param output_graph: PB模型保存路径
20 |     :return:
21 |     '''
22 |     # checkpoint = tf.train.get_checkpoint_state(model_folder) #检查目录下ckpt文件状态是否可用
23 |     # input_checkpoint = checkpoint.model_checkpoint_path #得ckpt文件路径
24 |  
25 |     # 指定输出的节点名称,该节点名称必须是原模型中存在的节点
26 |     output_node_names = "logits/age/BiasAdd,logits/gender/BiasAdd"#,logits/gender/biases,logits/age/biases,logits/age/weights"
27 |     saver = tf.compat.v1.train.import_meta_graph(input_checkpoint + '.meta', clear_devices=True)
28 |     graph = tf.compat.v1.get_default_graph() # 获得默认的图
29 |     input_graph_def = graph.as_graph_def()  # 返回一个序列化的图代表当前的图
30 |  
31 |     with tf.compat.v1.Session() as sess:
32 |         saver.restore(sess, input_checkpoint) #恢复图并得到数据
33 |         output_graph_def = tf.compat.v1.graph_util.convert_variables_to_constants(  # 模型持久化，将变量值固定
34 |             sess=sess,
35 |             input_graph_def=input_graph_def,# 等于:sess.graph_def
36 |             output_node_names=output_node_names.split(","))# 如果有多个输出节点，以逗号隔开
37 |  
38 |         with tf.io.gfile.GFile(output_graph, "wb") as f: #保存模型
39 |             f.write(output_graph_def.SerializeToString()) #序列化输出
40 |         print("%d ops in the final graph." % len(output_graph_def.node)) #得到当前图有几个操作节点
41 |  
42 |         # for op in graph.get_operations():
43 |         #     print(op.name, op.values())
44 | 
45 | 
46 | 
47 | if __name__ == "__main__":
48 |     # 输入ckpt模型路径
49 |     input_checkpoint='./savedmodel.ckpt'
50 |     # 输出pb模型的路径
51 |     out_pb_path="./frozen_model.pb"
52 |     # 调用freeze_graph将ckpt转为pb
53 | 
54 |     #getTensorName(input_checkpoint)
55 |     freeze_graph(input_checkpoint,out_pb_path)
56 | 


--------------------------------------------------------------------------------
/Base/frameworks/tensorflow/basic/ckpt_pre.py:
--------------------------------------------------------------------------------
 1 | #coding:utf-8
 2 | from __future__ import print_function
 3 | import numpy as np
 4 | import cv2
 5 | from cv2 import dnn
 6 | import sys
 7 |  
 8 | import tensorflow as tf
 9 | from tensorflow.python.framework import graph_util
10 | import os
11 | 
12 | 
13 | import os
14 | os.environ["CUDA_VISIBLE_DEVICES"] = "-1"
15 | 
16 | 
17 | 
18 | #### data
19 | img = cv2.imread( '1593250301105_1f43f6a0e8.png')
20 | print("img shape: ", img.shape)
21 | rows = img.shape[ 0]
22 | cols = img.shape[ 1]
23 | img = cv2.resize(img, ( 224, 224))
24 | #img = img[:, :, [ 2, 1, 0]] # BGR2RGB
25 | img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
26 | img = np.multiply(img, 1.0 / 255.0)
27 | 
28 | 
29 | 
30 | 
31 | 
32 | #### model
33 | sess = tf.Session()
34 | saver = tf.train.import_meta_graph('./keras_model.ckpt.meta') # 加载模型结构
35 | saver.restore(sess, tf.train.latest_checkpoint('./')) # 只需要指定目录就可以恢复所有变量信息
36 | 
37 | 
38 | # 获取placeholder变量
39 | input_x = sess.graph.get_tensor_by_name('input_1:0')
40 | 
41 | # 获取需要进行计算的operator
42 | op = sess.graph.get_tensor_by_name('dense_1/Softmax:0')
43 | 
44 | ret = sess.run(op, 
45 |             feed_dict={ input_x: np.array([img],dtype = np.float32)})
46 | print("ret: ",ret)
47 | 
48 | 
49 | 
50 | 


--------------------------------------------------------------------------------
/Base/frameworks/tensorflow/basic/onnx_pre.py:
--------------------------------------------------------------------------------
 1 | #coding:utf-8
 2 | # from __future__ import print_function
 3 | import numpy as np
 4 | import cv2
 5 | # from cv2 import dnn
 6 | # import sys
 7 |  
 8 | # import tensorflow as tf
 9 | # from tensorflow.python.framework import graph_util
10 | # import os
11 | 
12 | # import time
13 | 
14 | 
15 | 
16 | 
17 | import onnxruntime as rt
18 | 
19 | model_path = 'keras_model.onnx'
20 | sess=rt.InferenceSession(model_path)#model_path就是模型的地址
21 | input_name=sess.get_inputs()[0].name
22 | 
23 | 
24 | img = cv2.imread( './1593250301105_1f43f6a0e8.png')
25 | print("img shape: ", img.shape)
26 | rows = img.shape[ 0]
27 | cols = img.shape[ 1]
28 | inp = cv2.resize(img, ( 224, 224))
29 | inp = inp[:, :, [ 2, 1, 0]] # BGR2RGB
30 | inp = inp/255.0
31 | data = inp.reshape( 1, inp.shape[ 0], inp.shape[ 1], 3)
32 | print(data.shape)
33 | data = np.transpose(data,(0,3,1,2))
34 | print(data.shape)
35 | data = data.astype(np.float32)
36 | 
37 | res=sess.run(None,{input_name:data})
38 | 
39 | print("res: ", res)
40 | 


--------------------------------------------------------------------------------
/Base/frameworks/tensorflow/basic/pb2tflite.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 |  
 3 | import pathlib2 as pathlib
 4 |  
 5 |  
 6 | # 1.伪量化
 7 | # converter = tf.contrib.lite.TocoConverter.from_frozen_graph('model.pb',["input_image"],["result"], input_shapes={"input_image":[1,626,361,3]})   #Python 2.7.6版本,但测试量化后模型大小不会变小
 8 | converter = tf.lite.TFLiteConverter.from_frozen_graph('frozen_insightface_r50.pb',["data"],["output"], input_shapes={"data":[1,112,112,3]})   #python3.4.3--nightly版本,测试量化后模型大小会变小
 9 |  
10 | converter.post_training_quantize = True
11 |  
12 | tflite_quantized_model=converter.convert()
13 |  
14 | open("quantized_model.tflite", "wb").write(tflite_quantized_model)
15 | 
16 | 
17 | 
18 | # 2 量化
19 | # converter = tf.lite.TFLiteConverter.from_frozen_graph('frozen_insightface_r50.pb',["data"],["output"], input_shapes={"data":[1,112,112,3]})   #python3.4.3--nightly版本,测试量化后模型大小会变小
20 |  
21 | # converter.inference_type = tf.contrib.lite.constants.QUANTIZED_UINT8
22 |  
23 | # converter.quantized_input_stats = {"data" : (127, 2.)}
24 |  
25 | # converter.default_ranges_stats=(0, 6)
26 |  
27 | # tflite_quantized_model=converter.convert()
28 |  
29 | # open("true_quantized_model.tflite", "wb").write(tflite_quantized_model)
30 | 


--------------------------------------------------------------------------------
/Base/frameworks/tensorflow/basic/read_pb.py:
--------------------------------------------------------------------------------
 1 | #coding:utf-8
 2 | from __future__ import print_function
 3 | import numpy as np
 4 | import cv2
 5 | from cv2 import dnn
 6 | import sys
 7 |  
 8 | import tensorflow as tf
 9 |  
10 | 
11 | 
12 | # Read the graph.
13 | model_dir = 'frozen_inference_graph.pb'
14 | with tf.gfile.FastGFile(model_dir, 'rb') as f:
15 |     graph_def = tf.GraphDef()
16 |     graph_def.ParseFromString(f.read())
17 | with tf.Session() as sess:
18 | # Restore session
19 |     sess.graph.as_default()
20 |     tf.import_graph_def(graph_def, name= '')
21 |     # Read and preprocess an image.
22 |     img = cv2.imread( 'coco.png')
23 |     rows = img.shape[ 0]
24 |     cols = img.shape[ 1]
25 |     inp = cv2.resize(img, ( 300, 300))
26 |     inp = inp[:, :, [ 2, 1, 0]] # BGR2RGB
27 |     # Run the model
28 |     out = sess.run([sess.graph.get_tensor_by_name( 'num_detections:0'),
29 |     sess.graph.get_tensor_by_name( 'detection_scores:0'),
30 |     sess.graph.get_tensor_by_name( 'detection_boxes:0'),
31 |     sess.graph.get_tensor_by_name( 'detection_classes:0')],
32 |     feed_dict={ 'image_tensor:0': inp.reshape( 1, inp.shape[ 0], inp.shape[ 1], 3)})
33 |     # Visualize detected bounding boxes.
34 |     num_detections = int(out[ 0][ 0])
35 |     print(num_detections)
36 |     for i in range(num_detections):
37 |         classId = int(out[ 3][ 0][i])
38 |         score = float(out[ 1][ 0][i])
39 |         bbox = [float(v) for v in out[ 2][ 0][i]]
40 |         if score > 0.3:
41 |             x = bbox[ 1] * cols
42 |             y = bbox[ 0] * rows
43 |             right = bbox[ 3] * cols
44 |             bottom = bbox[ 2] * rows
45 |         cv2.rectangle(img, (int(x), int(y)), (int(right), int(bottom)), ( 125, 255, 51), thickness= 2)
46 |     cv2.imshow( 'TensorFlow MobileNet-SSD', img)
47 |     cv2.waitKey()
48 |     
49 | 


--------------------------------------------------------------------------------
/Base/frameworks/tensorflow/basic/tf_pb_pre.py:
--------------------------------------------------------------------------------
 1 | #coding:utf-8
 2 | from __future__ import print_function
 3 | import numpy as np
 4 | import cv2
 5 | from cv2 import dnn
 6 | import sys
 7 |  
 8 | import tensorflow as tf
 9 | from tensorflow.python.framework import graph_util
10 | import os
11 | 
12 | 
13 | 
14 | 
15 | model_dir = './'
16 | model_name = 'frozen_insightface_r50.pb'
17 | 
18 | # def create_graph():
19 | #     with tf.gfile.FastGFile(os.path.join(model_dir, model_name), 'rb') as f:
20 | #         graph_def = tf.GraphDef()
21 | #         graph_def.ParseFromString(f.read())
22 | #         tf.import_graph_def(graph_def, name='')
23 | 
24 | # create_graph()
25 | # tensor_name_list = [tensor.name for tensor in tf.get_default_graph().as_graph_def().node]
26 | # for tensor_name in tensor_name_list:
27 | #     print(tensor_name,'\n')
28 | 
29 | # print("00000")
30 | 
31 | 
32 | #Read the graph.
33 | with tf.gfile.FastGFile(os.path.join(model_dir, model_name), 'rb') as f:
34 |     graph_def = tf.GraphDef()
35 |     graph_def.ParseFromString(f.read())
36 | print("11111")
37 | with tf.Session() as sess:
38 | # Restore session
39 |     sess.graph.as_default()
40 |     print("22222")
41 |     tf.import_graph_def(graph_def, name= '')
42 |     # Read and preprocess an image.
43 |     img = cv2.imread( '../t4.png')
44 |     print("img shape: ", img.shape)
45 |     rows = img.shape[ 0]
46 |     cols = img.shape[ 1]
47 |     inp = cv2.resize(img, ( 112, 112))
48 |     inp = inp[:, :, [ 2, 1, 0]] # BGR2RGB
49 |     # Run the model
50 |     out = sess.run([sess.graph.get_tensor_by_name( 'output:0'),],
51 |         feed_dict={ 'data:0': inp.reshape( 1, inp.shape[ 0], inp.shape[ 1], 3)})
52 |     # Visualize detected bounding boxes.
53 |     print("out: ", out)
54 |     # detections = int(out[ 0][ 0])
55 |     # print(detections)
56 | 
57 | def get_ga(data):
58 | 
59 |     ret = data[0]
60 |     
61 |     print("ret length: ", len(ret[0]))
62 | 
63 |     #ret = ret1
64 | 
65 |     g = ret[:,0:2].flatten()
66 |     gender = np.argmax(g)
67 |     a = ret[:,2:202].reshape( (100,2) )
68 |     a = np.argmax(a, axis=1)
69 |     age = int(sum(a))
70 | 
71 |     return gender, age
72 | 
73 | gender, age = get_ga(out)
74 | print(gender, age)
75 | 


--------------------------------------------------------------------------------
/Base/frameworks/tensorflow/basic/tflite_pre.py:
--------------------------------------------------------------------------------
  1 | #coding:utf-8
  2 | from __future__ import print_function
  3 | import numpy as np
  4 | import cv2
  5 | from cv2 import dnn
  6 | import sys
  7 |  
  8 | import tensorflow as tf
  9 | from tensorflow.python.framework import graph_util
 10 | import os
 11 | 
 12 | 
 13 | model_name = 'frozen_insightface_r50.pb'
 14 | 
 15 | # #Read the graph.
 16 | # with tf.gfile.FastGFile(os.path.join(model_dir, model_name), 'rb') as f:
 17 | #     graph_def = tf.GraphDef()
 18 | #     graph_def.ParseFromString(f.read())
 19 | # print("11111")
 20 | # with tf.Session() as sess:
 21 | # # Restore session
 22 | #     sess.graph.as_default()
 23 | #     print("22222")
 24 | #     tf.import_graph_def(graph_def, name= '')
 25 | #     # Read and preprocess an image.
 26 | #     img = cv2.imread( '../t7.png')
 27 | #     print("img shape: ", img.shape)
 28 | #     rows = img.shape[ 0]
 29 | #     cols = img.shape[ 1]
 30 | #     inp = cv2.resize(img, ( 112, 112))
 31 | #     inp = inp[:, :, [ 2, 1, 0]] # BGR2RGB
 32 | #     # Run the model
 33 | #     out = sess.run([sess.graph.get_tensor_by_name( 'output:0'),],
 34 | #         feed_dict={ 'data:0': inp.reshape( 1, inp.shape[ 0], inp.shape[ 1], 3)})
 35 | #     # Visualize detected bounding boxes.
 36 | #     print("out: ", out)
 37 | #     # detections = int(out[ 0][ 0])
 38 | #     # print(detections)
 39 | 
 40 | # def get_ga(data):
 41 | 
 42 | #     ret = data[0]
 43 |     
 44 | #     print("ret length: ", len(ret[0]))
 45 | 
 46 | #     #ret = ret1
 47 | 
 48 | #     g = ret[:,0:2].flatten()
 49 | #     gender = np.argmax(g)
 50 | #     a = ret[:,2:202].reshape( (100,2) )
 51 | #     a = np.argmax(a, axis=1)
 52 | #     age = int(sum(a))
 53 | 
 54 | #     return gender, age
 55 | 
 56 | # gender, age = get_ga(out)
 57 | # print(gender, age)
 58 | 
 59 | 
 60 | 
 61 | 
 62 | # Load TFLite model and allocate tensors.
 63 | interpreter = tf.lite.Interpreter(model_path="quantized_model.tflite")
 64 | interpreter.allocate_tensors()
 65 | 
 66 | input_details = interpreter.get_input_details()
 67 | output_details = interpreter.get_output_details()
 68 | 
 69 | print(input_details)
 70 | print(output_details)
 71 | 
 72 | 
 73 | img = cv2.imread( '../head112.jpg')
 74 | print("img shape: ", img.shape)
 75 | rows = img.shape[ 0]
 76 | cols = img.shape[ 1]
 77 | input_data = cv2.resize(img, ( 112, 112))
 78 | input_data = np.array([input_data[:, :, [ 2, 1, 0]]]) # BGR2RGB
 79 | print(input_data.shape)
 80 | input_data = input_data.astype(np.float32)
 81 | index = input_details[0]['index']
 82 | interpreter.set_tensor(index, input_data)
 83 | interpreter.invoke()
 84 | output_data = interpreter.get_tensor(output_details[0]['index'])
 85 | print('output_data shape:',output_data.shape)
 86 | 
 87 | def get_ga(data):
 88 |     print("ret length: ", len(data))
 89 |     g = data[:,0:2].flatten()
 90 |     gender = np.argmax(g)
 91 |     a = data[:,2:202].reshape( (100,2) )
 92 |     a = np.argmax(a, axis=1)
 93 |     age = int(sum(a))
 94 |     return gender, age
 95 | 
 96 | print("output_data max : ", np.argmax(output_data))
 97 | 
 98 | print(output_data[0,0],output_data[0,1], output_data[0,163],output_data[0,164] )
 99 | print(output_data[0,164]/np.sum(output_data[0,:]))
100 | gender, age = get_ga(output_data)
101 | print(gender, age)
102 | 


--------------------------------------------------------------------------------
/Base/frameworks/tensorflow/basic/tflite_show_middle_output.py:
--------------------------------------------------------------------------------
 1 | from tensorflow.lite.python import schema_py_generated as schema_fb
 2 | import flatbuffers
 3 | import tensorflow as tf
 4 | import time
 5 | import os
 6 | import cv2
 7 | os.environ["CUDA_VISIBLE_DEVICES"] = "-1"
 8 | 
 9 | def OutputsOffset(subgraph, j):
10 |     o = flatbuffers.number_types.UOffsetTFlags.py_type(subgraph._tab.Offset(8))
11 |     if o != 0:
12 |         a = subgraph._tab.Vector(o)
13 |         return a + flatbuffers.number_types.UOffsetTFlags.py_type(j * 4)
14 |     return 0
15 |  
16 | #参考了https://github.com/raymond-li/tflite_tensor_outputter/blob/master/tflite_tensor_outputter.py
17 | #调整output到指定idx
18 | def buffer_change_output_tensor_to(model_buffer, new_tensor_i):
19 |     
20 |     root = schema_fb.Model.GetRootAsModel(model_buffer, 0)
21 |     output_tensor_index_offset = OutputsOffset(root.Subgraphs(0), 0)
22 |     
23 |     # Flatbuffer scalars are stored in little-endian.
24 |     new_tensor_i_bytes = bytes([
25 |     new_tensor_i & 0x000000FF, \
26 |     (new_tensor_i & 0x0000FF00) >> 8, \
27 |     (new_tensor_i & 0x00FF0000) >> 16, \
28 |     (new_tensor_i & 0xFF000000) >> 24 \
29 |     ])
30 |     # Replace the 4 bytes corresponding to the first output tensor index
31 |     return model_buffer[:output_tensor_index_offset] + new_tensor_i_bytes + model_buffer[output_tensor_index_offset + 4:]
32 | 
33 | 
34 | # Read the model.
35 | with open('lite-model_movenet_singlepose_lightning_3.tflite', 'rb') as f:
36 |     model_buffer = f.read()
37 |  
38 | # 修改输出idx
39 | idx = 95  #可以通过interpreter.get_tensor_details()，查各层的idx值
40 | model_buffer = buffer_change_output_tensor_to(model_buffer, idx)
41 |  
42 |  
43 | # 推理
44 | interpreter = tf.lite.Interpreter(model_content=model_buffer)
45 | interpreter.allocate_tensors()
46 | 
47 | print(interpreter.get_tensor_details())
48 |  
49 | input_index = interpreter.get_input_details()[0]["index"]
50 | output_index = interpreter.get_output_details()[0]["index"]
51 | 
52 | 
53 | image_path = '320240.jpg'
54 | image = tf.io.read_file(image_path)
55 | image = tf.compat.v1.image.decode_jpeg(image)
56 | image = tf.expand_dims(image, axis=0)
57 | # Resize and pad the image to keep the aspect ratio and fit the expected size.
58 | image = tf.image.resize_with_pad(image, 192, 192)
59 | input_data = tf.cast(image, dtype=tf.float32)
60 | 
61 | 
62 | interpreter.set_tensor(input_index, input_data)
63 | interpreter.invoke()
64 |  
65 | # 中间层的output值
66 | out_val = interpreter.get_tensor(output_index)
67 | print(out_val.shape)
68 | 


--------------------------------------------------------------------------------
/Base/frameworks/tensorflow/demo/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fire717/Machine-Learning/46d417e4c872052857899331ff7f526f79110896/Base/frameworks/tensorflow/demo/.DS_Store


--------------------------------------------------------------------------------
/Base/frameworks/tensorflow/demo/basic_mnist_demo.py:
--------------------------------------------------------------------------------
 1 | 
 2 | import numpy as np
 3 | import cv2
 4 | import tensorflow as tf 
 5 | 
 6 | 
 7 | from tensorflow.examples.tutorials.mnist import input_data
 8 | 
 9 | def inverse_color(image):
10 | 
11 |     height,width = image.shape
12 |     img2 = image.copy()
13 | 
14 |     for i in range(height):
15 |         for j in range(width):
16 |             img2[i,j] = (255-image[i,j]) 
17 |     return img2
18 | 
19 | mnist = input_data.read_data_sets("MNIST_data/", one_hot=True)
20 | 
21 | x = tf.placeholder(tf.float32, [None, 784])
22 | W = tf.Variable(tf.zeros([784,10]))
23 | b = tf.Variable(tf.zeros([10]))
24 | 
25 | y = tf.nn.softmax(tf.matmul(x,W) + b)
26 | 
27 | y_ = tf.placeholder("float", [None,10])
28 | 
29 | cross_entropy = -tf.reduce_sum(y_*tf.log(y))
30 | 
31 | train_step = tf.train.GradientDescentOptimizer(0.01).minimize(cross_entropy)
32 | 
33 | init = tf.initialize_all_variables()
34 | 
35 | sess = tf.Session()
36 | sess.run(init)
37 | 
38 | for i in range(1000):
39 |   batch_xs, batch_ys = mnist.train.next_batch(100)
40 |   sess.run(train_step, feed_dict={x: batch_xs, y_: batch_ys})
41 | 
42 | correct_prediction = tf.equal(tf.argmax(y,1), tf.argmax(y_,1))
43 | 
44 | accuracy = tf.reduce_mean(tf.cast(correct_prediction, "float"))
45 | 
46 | print sess.run(accuracy, feed_dict={x: mnist.test.images, y_: mnist.test.labels})
47 | 
48 | 
49 | #read any size pic
50 | z = cv2.imread("2.png",0)
51 | z = cv2.resize(z,(28,28),interpolation = cv2.INTER_CUBIC)
52 | z=inverse_color(z)
53 | 
54 | image = np.reshape(z,[1,784],order='C')
55 | #cant use tf.reshape() cause its output is a tensor while cant be feed 
56 | 
57 | x2 = tf.placeholder(tf.float32, [1, 784])
58 | y2 = tf.nn.softmax(tf.matmul(x2,W) + b)
59 | ans = tf.argmax(y2,1)
60 | print sess.run(ans,feed_dict={x2:image,})
61 | 


--------------------------------------------------------------------------------
/Base/frameworks/tensorflow/demo/ten_people_face_reconize/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fire717/Machine-Learning/46d417e4c872052857899331ff7f526f79110896/Base/frameworks/tensorflow/demo/ten_people_face_reconize/.DS_Store


--------------------------------------------------------------------------------
/Base/frameworks/tensorflow/demo/ten_people_face_reconize/model/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fire717/Machine-Learning/46d417e4c872052857899331ff7f526f79110896/Base/frameworks/tensorflow/demo/ten_people_face_reconize/model/.DS_Store


--------------------------------------------------------------------------------
/Base/frameworks/tensorflow/demo/ten_people_face_reconize/olivettifaces.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fire717/Machine-Learning/46d417e4c872052857899331ff7f526f79110896/Base/frameworks/tensorflow/demo/ten_people_face_reconize/olivettifaces.gif


--------------------------------------------------------------------------------
/Base/frameworks/tensorflow/demo/ten_people_face_reconize/result.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fire717/Machine-Learning/46d417e4c872052857899331ff7f526f79110896/Base/frameworks/tensorflow/demo/ten_people_face_reconize/result.png


--------------------------------------------------------------------------------
/Base/frameworks/tensorflow/readme.md:
--------------------------------------------------------------------------------
 1 | ## 基于TF的一些东西
 2 | 
 3 | #### 使用inspect_checkpoint来查看ckpt里的内容 打印节点信息
 4 | ~~~
 5 | from tensorflow.python.tools import inspect_checkpoint as chkp
 6 | from tensorflow.python.framework import meta_graph
 7 | input_graph_def = meta_graph.read_meta_graph_file("model2.ckpt.meta").graph_def
 8 | for node in input_graph_def.node:
 9 |     print(node.name)
10 | ~~~
11 | 
12 | #### Tensorboard
13 | > tensorboard --logdir=/Users/fire/A
14 | 
15 | #### pb2onnx
16 | https://github.com/onnx/tensorflow-onnx
17 | 
18 | 
19 | #### 剪枝
20 | * [官方示例](https://tensorflow.google.cn/model_optimization/guide/pruning/pruning_with_keras)
21 | * [博客示例](https://www.cnblogs.com/purple5252/p/11812207.html)
22 | * [我的示例(基于mobilenetv2)](./basic/pruned_demo.py)
23 | 
24 | ### Basic
25 | 
26 | * [TensorFlow Example](./basic/TensorFlowExample.ipynb)
27 | * [graph/ placeholder/ TensorBoard](./basic/Learn_tf.ipynb)
28 | * [模型保存读取](./basic/tf_save_load.ipynb)
29 | * [ckpt转pb](./basic/ckpt2pb.py) | [ckpt模型加载预测](./basic/ckpt_pre.py)
30 | * [pb转tflite](./basic/pb2tflite.py)| [pb模型测试](./basic/tf_pb_pre.py) |  [tf加载pb模型](./basic/read_pb.py)
31 | * [tflite模型测试](./basic/tflite_pre.py) | [转onnx后模型测试](./basic/onnx_pre.py)
32 | * [TFLiteModelMaker轻松利用预训练模型训练tflite(支持efficientnetlite等)](./basic/TFLiteModelMaker)
33 | * [TFLite打印中间节点输出](./basic/tflite_show_middle_output.py)
34 | 
35 | ### Demo
36 | 
37 | * [逻辑回归](./demo/TF_logsitic.ipynb)
38 | * [mnist手写数字识别(NN)](./demo/basic_mnist_demo.py)
39 | * [mnist手写数字识别(CNN)](./demo/mnist_cnn_demo.py)
40 | * [10人版人脸识别](./demo/ten_people_face_reconize)
41 | 
42 | 
43 | ### Resource
44 | * [TensorRT安装及使用教程](https://blog.csdn.net/zong596568821xp/article/details/86077553)
45 | 


--------------------------------------------------------------------------------
/Base/tools/lightgbm/readme.md:
--------------------------------------------------------------------------------
 1 | From [官方文档](https://github.com/Microsoft/LightGBM)
 2 | 
 3 | * simple_example.py
 4 | 	* Construct Dataset
 5 | 	* Basic train and predict
 6 | 	* Eval during training
 7 | 	* Early stopping
 8 | 	* Save model to file
 9 | 
10 | * sklearn_example.py
11 | 	* Basic train and predict with sklearn interface
12 | 	* Feature importances with sklearn interface


--------------------------------------------------------------------------------
/Base/tools/lightgbm/simpleexample.py:
--------------------------------------------------------------------------------
 1 | # coding: utf-8
 2 | # pylint: disable = invalid-name, C0111
 3 | import json
 4 | import lightgbm as lgb
 5 | import pandas as pd
 6 | from sklearn.metrics import mean_squared_error
 7 | 
 8 | 
 9 | # load or create your dataset
10 | print('Load data...')
11 | df_train = pd.read_csv('../regression/regression.train', header=None, sep='\t')
12 | df_test = pd.read_csv('../regression/regression.test', header=None, sep='\t')
13 | 
14 | y_train = df_train[0].values
15 | y_test = df_test[0].values
16 | X_train = df_train.drop(0, axis=1).values
17 | X_test = df_test.drop(0, axis=1).values
18 | 
19 | # create dataset for lightgbm
20 | lgb_train = lgb.Dataset(X_train, y_train)
21 | lgb_eval = lgb.Dataset(X_test, y_test, reference=lgb_train)
22 | 
23 | # specify your configurations as a dict
24 | params = {
25 |     'task': 'train',
26 |     'boosting_type': 'gbdt',
27 |     'objective': 'regression',
28 |     'metric': {'l2', 'auc'},
29 |     'num_leaves': 31,
30 |     'learning_rate': 0.05,
31 |     'feature_fraction': 0.9,
32 |     'bagging_fraction': 0.8,
33 |     'bagging_freq': 5,
34 |     'verbose': 0
35 | }
36 | 
37 | print('Start training...')
38 | # train
39 | gbm = lgb.train(params,
40 |                 lgb_train,
41 |                 num_boost_round=20,
42 |                 valid_sets=lgb_eval,
43 |                 early_stopping_rounds=5)
44 | 
45 | print('Save model...')
46 | # save model to file
47 | gbm.save_model('model.txt')
48 | 
49 | print('Start predicting...')
50 | # predict
51 | y_pred = gbm.predict(X_test, num_iteration=gbm.best_iteration)
52 | # eval
53 | print('The rmse of prediction is:', mean_squared_error(y_test, y_pred) ** 0.5)


--------------------------------------------------------------------------------
/Base/tools/lightgbm/sklearnexample.py:
--------------------------------------------------------------------------------
 1 | # coding: utf-8
 2 | # pylint: disable = invalid-name, C0111
 3 | import lightgbm as lgb
 4 | import pandas as pd
 5 | from sklearn.metrics import mean_squared_error
 6 | from sklearn.model_selection import GridSearchCV
 7 | 
 8 | # load or create your dataset
 9 | print('Load data...')
10 | df_train = pd.read_csv('../regression/regression.train', header=None, sep='\t')
11 | df_test = pd.read_csv('../regression/regression.test', header=None, sep='\t')
12 | 
13 | y_train = df_train[0].values
14 | y_test = df_test[0].values
15 | X_train = df_train.drop(0, axis=1).values
16 | X_test = df_test.drop(0, axis=1).values
17 | 
18 | print('Start training...')
19 | # train
20 | gbm = lgb.LGBMRegressor(objective='regression',
21 |                         num_leaves=31,
22 |                         learning_rate=0.05,
23 |                         n_estimators=20)
24 | gbm.fit(X_train, y_train,
25 |         eval_set=[(X_test, y_test)],
26 |         eval_metric='l1',
27 |         early_stopping_rounds=5)
28 | 
29 | print('Start predicting...')
30 | # predict
31 | y_pred = gbm.predict(X_test, num_iteration=gbm.best_iteration_)
32 | # eval
33 | print('The rmse of prediction is:', mean_squared_error(y_test, y_pred) ** 0.5)
34 | 
35 | # feature importances
36 | print('Feature importances:', list(gbm.feature_importances_))
37 | 
38 | # other scikit-learn modules
39 | estimator = lgb.LGBMRegressor(num_leaves=31)
40 | 
41 | param_grid = {
42 |     'learning_rate': [0.01, 0.1, 1],
43 |     'n_estimators': [20, 40]
44 | }
45 | 
46 | gbm = GridSearchCV(estimator, param_grid)
47 | 
48 | gbm.fit(X_train, y_train)
49 | 
50 | print('Best parameters found by grid search are:', gbm.best_params_)


--------------------------------------------------------------------------------
/Base/tools/scikit-learn/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fire717/Machine-Learning/46d417e4c872052857899331ff7f526f79110896/Base/tools/scikit-learn/.DS_Store


--------------------------------------------------------------------------------
/Base/tools/scikit-learn/README.md:
--------------------------------------------------------------------------------
 1 | # scikit-learn
 2 | 
 3 | ### 常用
 4 | * 划分验证集
 5 | 
 6 | ```python
 7 | from sklearn.cross_validation import train_test_split
 8 | X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)
 9 | ```
10 | * Grid Search 
11 | ```python
12 | param_grid = {'n_estimators': [300, 500], 'max_features': [10, 12, 14]}
13 | model = grid_search.GridSearchCV(estimator=rfr, param_grid=param_grid, n_jobs=1, cv=10, verbose=20, scoring=RMSE)
14 | model.fit(X_train, y_train)
15 | ```
16 | * LabelEncoder
17 | ```python
18 | from sklearn.preprocessing import LabelEncoder
19 | le = LabelEncoder()
20 | le.fit([1,5,67,100])
21 | le.transform([1,1,100,67,5])
22 | #输出： array([0,0,3,2,1])
23 | #可通过le.inverse_transform(x)转换回去
24 | ```
25 | * sklearn.utils.shuffle(多个数组按同样顺序打乱)
26 | ```python
27 | def fill_feed_dict(data_X, data_Y, batch_size):
28 |     """Generator datasets to yield batches"""
29 |     # Shuffle data first.
30 |     shuffled_X, shuffled_Y = shuffle(data_X, data_Y)
31 |     for idx in range(data_X.shape[0] // batch_size):
32 |         x_batch = shuffled_X[batch_size * idx: batch_size * (idx + 1)]
33 |         y_batch = shuffled_Y[batch_size * idx: batch_size * (idx + 1)]
34 |         yield x_batch, y_batch
35 | ```
36 | 
37 | ### 算法
38 | * [GBDT（MART）迭代决策树入门教程 | 简介](http://blog.csdn.net/suranxu007/article/details/49910323)
39 | 
40 | ### 实现
41 | * [常用算法调用(LR/ RF/ GBDT/ knn/ SVM)](./useful.py)
42 | * [logistic回归](./sklearn_LR.py)
43 | * [皮尔逊相关度](./pearsonr.ipynb)
44 | * [利用kmeans对图片颜色聚类并可视化](./demo/kmeans_color.py)
45 | * [高维数据可视化tSNE](./demo/tSNE.py)
46 | 
47 | ### Choosing the right estimator
48 | 
49 | ![Choosing the right estimator](./choose.png)
50 | 
51 | 
52 | 
53 | 
54 | 
55 | 
56 | 


--------------------------------------------------------------------------------
/Base/tools/scikit-learn/choose.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fire717/Machine-Learning/46d417e4c872052857899331ff7f526f79110896/Base/tools/scikit-learn/choose.png


--------------------------------------------------------------------------------
/Base/tools/scikit-learn/demo/kmeans_color.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | 
 3 | import numpy as np
 4 | import os,shutil
 5 | import random
 6 | 
 7 | import cv2
 8 | from sklearn.cluster import KMeans
 9 | from sklearn.externals import joblib
10 | 
11 | import time
12 | 
13 | 
14 | def getKmeansColor(img, n_cluster):
15 |     img = cv2.resize(img,(300,300))
16 | 
17 |     h,w,c = img.shape
18 |     img = img[int(h*0.2):int(h*0.8), int(w*0.2):int(w*0.8)]
19 |     h,w,c = img.shape
20 | 
21 |     data = np.reshape(img, (-1,3))
22 | 
23 |     #调用kmeans类
24 |     clf = KMeans(n_clusters=n_cluster)
25 |     s = clf.fit(data)
26 | 
27 | 
28 |     #中心
29 |     print(clf.cluster_centers_)
30 | 
31 |     #每个样本所属的簇
32 |     #print(clf.labels_)
33 |     from collections import Counter
34 |     color_count_dict = Counter(clf.labels_)
35 |     color_count_ratio = []
36 |     for i in range(n_cluster):
37 |         color_count_ratio.append(color_count_dict[i]*1.0/len(data))
38 |     print(color_count_ratio)
39 | 
40 | 
41 |     #用来评估簇的个数是否合适，距离越小说明簇分的越好，选取临界点的簇个数
42 |     #print(clf.inertia_)
43 | 
44 |     #进行预测
45 |     #print(clf.predict(feature))
46 | 
47 | 
48 | 
49 |     #保存模型
50 |     #joblib.dump(clf , 'km.pkl')
51 | 
52 |     #载入保存的模型
53 |     #clf = joblib.load('c:/km.pkl')
54 | 
55 |     '''
56 |     #用来评估簇的个数是否合适，距离越小说明簇分的越好，选取临界点的簇个数
57 |     for i in range(5,30,1):
58 |         clf = KMeans(n_clusters=i)
59 |         s = clf.fit(feature)
60 |         print i , clf.inertia_
61 |     '''
62 | 
63 |     return clf.cluster_centers_, color_count_ratio
64 | 
65 | 
66 | 
67 | 
68 | 
69 | 
70 | 
71 | 
72 | 
73 | 
74 | 
75 | 
76 | if '__main__' == __name__:
77 |     n_cluster = 5
78 |     img = cv2.imread("11.jpg")
79 | 
80 |     cluster_centers, color_count_ratio = getKmeansColor(img, n_cluster)
81 | 
82 | 
83 |     # 可视化结果
84 |     res_img_h = n_cluster*2*10*2
85 |     res_img_w = 100
86 |     res = np.ones((res_img_h,res_img_w,3))*255
87 |     y_start = 0
88 |     for i in range(n_cluster):
89 |         color_h = int(res_img_h*color_count_ratio[i])
90 |         cv2.rectangle(res, (0, y_start), (100, y_start+color_h), [int(x) for x in cluster_centers[i]], -1) 
91 |         y_start += color_h
92 | 
93 |     cv2.rectangle(res, (0, 0), (res_img_w-1, res_img_h-1), (0,0,0),2) 
94 | 
95 |     cv2.imwrite("res.jpg", res)
96 | 


--------------------------------------------------------------------------------
/Base/tools/scikit-learn/demo/tSNE.py:
--------------------------------------------------------------------------------
 1 | # coding='utf-8'
 2 | import numpy as np
 3 | import cv2
 4 | from sklearn.manifold import TSNE
 5 | 
 6 | 
 7 | X = np.array([[0, 0, 0], [0, 1, 1], [1, 0, 1], [1, 1, 1]])
 8 | tsne = TSNE(n_components=2)
 9 | tsne.fit_transform(X)
10 | print(tsne.embedding_)
11 | 
12 | 
13 | 
14 | # 一个对S曲线数据集上进行各种降维的说明。
15 | from time import time
16 | 
17 | import matplotlib.pyplot as plt
18 | from mpl_toolkits.mplot3d import Axes3D
19 | from matplotlib.ticker import NullFormatter
20 | 
21 | from sklearn import manifold, datasets
22 | 
23 | # # Next line to silence pyflakes. This import is needed.
24 | # Axes3D
25 | 
26 | n_points = 1000
27 | # X是一个(1000, 3)的2维数据，color是一个(1000,)的1维数据
28 | X, color = datasets.samples_generator.make_s_curve(n_points, random_state=0)
29 | n_neighbors = 10
30 | n_components = 2
31 | 
32 | fig = plt.figure(figsize=(8, 8))
33 | # 创建了一个figure，标题为"Manifold Learning with 1000 points, 10 neighbors"
34 | plt.suptitle("Manifold Learning with %i points, %i neighbors"
35 |              % (1000, n_neighbors), fontsize=14)
36 | 
37 | 
38 | '''绘制S曲线的3D图像'''
39 | ax = fig.add_subplot(211, projection='3d')
40 | ax.scatter(X[:, 0], X[:, 1], X[:, 2], c=color, cmap=plt.cm.Spectral)
41 | ax.view_init(4, -72)  # 初始化视角
42 | 
43 | '''t-SNE'''
44 | t0 = time()
45 | tsne = manifold.TSNE(n_components=n_components, init='pca', random_state=0)
46 | Y = tsne.fit_transform(X)  # 转换后的输出
47 | t1 = time()
48 | print("t-SNE: %.2g sec" % (t1 - t0))  # 算法用时
49 | ax = fig.add_subplot(2, 1, 2)
50 | plt.scatter(Y[:, 0], Y[:, 1], c=color, cmap=plt.cm.Spectral)
51 | plt.title("t-SNE (%.2g sec)" % (t1 - t0))
52 | ax.xaxis.set_major_formatter(NullFormatter())  # 设置标签显示格式为空
53 | ax.yaxis.set_major_formatter(NullFormatter())
54 | # plt.axis('tight')
55 | 
56 | plt.show()
57 | 


--------------------------------------------------------------------------------
/Base/tools/scikit-learn/pearsonr.ipynb:
--------------------------------------------------------------------------------
 1 | {
 2 |  "cells": [
 3 |   {
 4 |    "cell_type": "code",
 5 |    "execution_count": 7,
 6 |    "metadata": {
 7 |     "collapsed": true
 8 |    },
 9 |    "outputs": [],
10 |    "source": [
11 |     "from scipy.stats import pearsonr\n",
12 |     "#doc:https://docs.scipy.org/doc/scipy-0.14.0/reference/generated/scipy.stats.pearsonr.html\n",
13 |     "\n",
14 |     "#Calculates a Pearson correlation coefficient \n",
15 |     "#and the p-value for testing non-correlation. \n",
16 |     "# 原假设：无相关性\n",
17 |     "#Calculates a Pearson correlation coefficient and the p-value for testing non-correlation.\n",
18 |     "\n",
19 |     "## Pearson’s correlation requires that each dataset be normally distributed\n",
20 |     "\n",
21 |     "#p值反应了相关系数的显著性。\n",
22 |     "#The p-value roughly indicates the probability of an uncorrelated system \n",
23 |     "#producing datasets that have a Pearson correlation at least as extreme as \n",
24 |     "#the one computed from these datasets. The p-values are not entirely reliable \n",
25 |     "#but are probably reasonable for datasets larger than 500 or so."
26 |    ]
27 |   },
28 |   {
29 |    "cell_type": "code",
30 |    "execution_count": 8,
31 |    "metadata": {},
32 |    "outputs": [
33 |     {
34 |      "name": "stdout",
35 |      "output_type": "stream",
36 |      "text": [
37 |       "(1.0, 0.0)\n",
38 |       "(0.9450110410366913, 0.0549889589633087)\n",
39 |       "(-0.4465937565388721, 0.5534062434611278)\n",
40 |       "(0.9450110410366913, 0.0549889589633087)\n",
41 |       "(-0.4465937565388721, 0.5534062434611278)\n"
42 |      ]
43 |     }
44 |    ],
45 |    "source": [
46 |     "x1 = [1,2,2,4]\n",
47 |     "x2 = [2,3,3,5]\n",
48 |     "x3 = [4,9,16,25]\n",
49 |     "x4 = [4,2,9,1]\n",
50 |     "\n",
51 |     "print(pearsonr(x1,x2))\n",
52 |     "print(pearsonr(x1,x3))\n",
53 |     "print(pearsonr(x1,x4))\n",
54 |     "print(pearsonr(x2,x3))\n",
55 |     "print(pearsonr(x2,x4))"
56 |    ]
57 |   },
58 |   {
59 |    "cell_type": "code",
60 |    "execution_count": null,
61 |    "metadata": {
62 |     "collapsed": true
63 |    },
64 |    "outputs": [],
65 |    "source": []
66 |   }
67 |  ],
68 |  "metadata": {
69 |   "kernelspec": {
70 |    "display_name": "Python 3",
71 |    "language": "python",
72 |    "name": "python3"
73 |   },
74 |   "language_info": {
75 |    "codemirror_mode": {
76 |     "name": "ipython",
77 |     "version": 3
78 |    },
79 |    "file_extension": ".py",
80 |    "mimetype": "text/x-python",
81 |    "name": "python",
82 |    "nbconvert_exporter": "python",
83 |    "pygments_lexer": "ipython3",
84 |    "version": "3.6.2"
85 |   }
86 |  },
87 |  "nbformat": 4,
88 |  "nbformat_minor": 2
89 | }
90 | 


--------------------------------------------------------------------------------
/Base/tools/scikit-learn/sklearn_LR.py:
--------------------------------------------------------------------------------
  1 | 
  2 | # coding: utf-8
  3 | 
  4 | # In[1]:
  5 | 
  6 | 
  7 | 
  8 | #==================== Part 0: Basic Function ====================
  9 | with open('D:\ex2data1.txt', 'r') as f:  
 10 |     data = f.readlines()  #txt中所有字符串读入data  
 11 | 
 12 | datamat = []
 13 | for line in data:  
 14 |     odom = line.strip().split(',')        #将单个数据分隔开存好  
 15 |     numbers_float = map(float, odom) #转化为浮点数  
 16 |     datamat.append(numbers_float)
 17 | 
 18 | #print datamat
 19 | import numpy as np
 20 | 
 21 | datanp = np.array(datamat)
 22 | #print datanp
 23 | x = datanp[:,(0,1)].reshape((100,2))  
 24 | y = datanp[:,2].reshape((100,1))
 25 | lenY = len(y)
 26 | X = np.hstack((np.ones((lenY,1)),x)) 
 27 | 
 28 | 
 29 | # In[2]:
 30 | 
 31 | 
 32 | #%% ==================== Part 1: Plotting ====================
 33 | #We start the exercise by first plotting the data to understand the the problem we are working with.
 34 | import matplotlib.pyplot as plt
 35 | 
 36 | def plotData(x,y):
 37 |     fig = plt.figure()  
 38 |     ax= fig.add_subplot(111) #使画在一个图上
 39 |     
 40 |     pos = np.where(y[:,0]==0) #y为类似矩阵形式，所以要再取第一列
 41 |     neg = np.where(y[:,0]==1)
 42 |     ax1 = plt.scatter(x[pos,0], x[pos,1], marker = 'x', color = 'm')  
 43 |     ax2 = plt.scatter(x[neg,0], x[neg,1], marker = 'o', color = 'r')  
 44 |     plt.xlabel('exam1 score') 
 45 |     plt.ylabel('exam2 score')
 46 |     
 47 |     plt.legend([ax1, ax2], ['Admitted', 'Not admitted'])
 48 |     plt.show()
 49 | 
 50 | plotData(x,y)
 51 | 
 52 | 
 53 | # In[3]:
 54 | 
 55 | 
 56 | from sklearn import datasets
 57 | from sklearn.cross_validation import train_test_split
 58 | 
 59 | from sklearn.linear_model import LogisticRegression
 60 | lr = LogisticRegression(C=1e9)
 61 | lr.fit(x, y)   #这里要用的x是原始的！不带1(x0)的！
 62 | print lr.coef_,lr.intercept_
 63 | final_theta = np.zeros((3,1))
 64 | final_theta[0] = lr.intercept_
 65 | final_theta[1],final_theta[2]= lr.coef_[0]
 66 | print final_theta
 67 | 
 68 | 
 69 | # In[4]:
 70 | 
 71 | 
 72 | #%% ============= Part 4: Visualizing J(theta_0, theta_1) =============
 73 | fig = plt.figure()  
 74 | ax= fig.add_subplot(111) #使画在一个图上
 75 |     
 76 | pos = np.where(y[:,0]==1) #y为类似矩阵形式，所以要再取第一列
 77 | neg = np.where(y[:,0]==0)
 78 |     
 79 | ax1 = plt.scatter(x[pos,0], x[pos,1], marker = 'x', color = 'm')  
 80 | ax2 = plt.scatter(x[neg,0], x[neg,1], marker = 'o', color = 'r')  
 81 |     
 82 | plt.xlabel('exam1 score') 
 83 | plt.ylabel('exam2 score')
 84 |     
 85 | plt.legend([ax1, ax2], ['Admitted', 'Not admitted'])
 86 | 
 87 | 
 88 | #plotX = [30,100] #范围a[2]
 89 | plotX = np.arange(30,100,1)
 90 | plotY = (-final_theta[0]-final_theta[1]*plotX)/final_theta[2] #由0=w0x0+w1x1+w2x2推导，这里的y就是x2，x0=1 
 91 | #注意等于0！！！因为这是分类问题
 92 | plt.plot(plotX,plotY)#调用plot函数绘制得到由点生成的线条
 93 | 
 94 |     
 95 | plt.show()
 96 | 
 97 | 
 98 | # In[ ]:
 99 | 
100 | 
101 | 
102 | 
103 | 


--------------------------------------------------------------------------------
/Base/tools/scikit-learn/useful.py:
--------------------------------------------------------------------------------
 1 | ### 1. LR
 2 | from sklearn.linear_model import LogisticRegression
 3 | lr = LogisticRegression(C=1000.0, random_state=0)
 4 | lr.fit(train_x, train_y)
 5 | y_pre = lr.predict(val_x)
 6 | 
 7 | 
 8 | ### 2.RF
 9 | #Random Forest 一般在 max_features 设为 Feature 数量的平方根附近得到最佳结果。
10 | from sklearn.ensemble import RandomForestClassifier
11 | from sklearn.datasets import make_classification
12 | 
13 | rf = RandomForestClassifier(max_depth=2, random_state=0)
14 | rf.fit(train_x, train_y)
15 | 
16 | y_pre = rf.predict(val_x)
17 | y_pre[y_pre>0.5] = 1
18 | y_pre[y_pre<0.5] = 0
19 | 
20 | 
21 | ### 3.GBDT
22 | from sklearn.ensemble import GradientBoostingRegressor
23 | gbdt=GradientBoostingRegressor(loss='ls', learning_rate=0.1, n_estimators=100, max_depth=3)
24 | gbdt.fit(train_x, train_y)
25 | 
26 | y_pre=gbdt.predict(val_x)
27 | y_pre[y_pre>0.5] = 1
28 | y_pre[y_pre<0.5] = 0
29 | 
30 | ### 4.knn
31 | from sklearn import neighbors 
32 | 
33 | knn = neighbors.KNeighborsClassifier(n_neighbors=8,leaf_size=30,p=3)
34 | knn.fit(x,y)  
35 | 
36 | 
37 | ### 5.svm
38 | #http://blog.csdn.net/u013709270/article/details/53365744 (d多分类)
39 | from sklearn import svm
40 | X = [[0, 0], [1, 1]]
41 | y = [0, 1]
42 | clf = svm.SVC()
43 | clf.fit(X, y)  
44 | clf.predict([[2., 2.]])
45 | 


--------------------------------------------------------------------------------
/Base/tools/spark/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fire717/Machine-Learning/46d417e4c872052857899331ff7f526f79110896/Base/tools/spark/.DS_Store


--------------------------------------------------------------------------------
/Base/tools/spark/README.md:
--------------------------------------------------------------------------------
 1 | # Spark
 2 | 
 3 | ### 文章
 4 | [在windows安装部署spark(python版)](https://blog.csdn.net/hjxinkkl/article/details/57083549?winzoom=1)
 5 | 
 6 | ### 代码
 7 | [最简单的示例](start.py)
 8 | 
 9 | ### Book
10 | * 《Spark快速大数据分析》
11 | 	1. [配套代码](https://github.com/databricks/learning-spark) 
12 | 	2. 分章笔记：[RDD编程](./learnsparkLDA/learn_sparkRDD.ipynb) | [pair RDD](./learnsparkLDA/spark_pairRDD.ipynb) | [读存数据](./learnsparkLDA/spark_saveload.ipynb) | [累加器&广播变量&分区操作&数值RDD](./learnsparkLDA/spark_uplevel.ipynb) | [MLlib](./learnsparkLDA/spark_MLlib.ipynb)
13 | 
14 | 
15 | 
16 | 


--------------------------------------------------------------------------------
/Base/tools/spark/learnsparkLDA/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fire717/Machine-Learning/46d417e4c872052857899331ff7f526f79110896/Base/tools/spark/learnsparkLDA/.DS_Store


--------------------------------------------------------------------------------
/Base/tools/spark/start.py:
--------------------------------------------------------------------------------
 1 | from pyspark import SparkConf, SparkContext
 2 | conf = SparkConf().setMaster("local[*]").setAppName("First_App")
 3 | sc = SparkContext(conf=conf)
 4 | 
 5 | data = sc.parallelize(range(10))
 6 | ans = data.reduce(lambda x, y: x + y)
 7 | print (ans)
 8 | 
 9 | '''
10 | output:
11 | 
12 | Setting default log level to "WARN".
13 | To adjust logging level use sc.setLogLevel(newLevel). For SparkR, use setLogLevel(newLevel).
14 | 2018-05-16 17:08:22 WARN  Utils:66 - Service 'SparkUI' could not bind on port 4040. Attempting port 4041.
15 | 
16 | [Stage 0:>                                                          (0 + 4) / 4]
17 | [Stage 0:==============>                                            (1 + 3) / 4]
18 | [Stage 0:=============================>                             (2 + 2) / 4]
19 | [Stage 0:============================================>              (3 + 1) / 4]
20 |                                                                                 
21 | 45
22 | [Decode error - output not utf-8]
23 | [Decode error - output not utf-8]
24 | [Decode error - output not utf-8]
25 | '''
26 | 


--------------------------------------------------------------------------------
/Base/tools/xgboost/readme.md:
--------------------------------------------------------------------------------
 1 | ### 资料
 2 | * [官网](http://xgboost.readthedocs.io/en/latest/)
 3 | * [Python API](http://xgboost.readthedocs.io/en/latest/python/python_api.html)
 4 | * [安装教程](http://m.blog.csdn.net/huangdunxian/article/details/53432432)
 5 | 
 6 | ### 应用
 7 | * [demo](./xgboost.ipynb)
 8 | * [多分类](./xgboost_multi.ipynb)
 9 | 
10 | 
11 | ### 调参
12 | Xgboost 的调参。通常认为对它性能影响较大的参数有：
13 | * eta：每次迭代完成后更新权重时的步长。越小训练越慢。
14 | * num_round：总共迭代的次数。
15 | * subsample：训练每棵树时用来训练的数据占全部的比例。用于防止 Overfitting。
16 | * colsample_bytree：训练每棵树时用来训练的特征的比例，类似 RandomForestClassifier 的 max_features。
17 | * max_depth：每棵树的最大深度限制。与 Random Forest 不同，Gradient Boosting 如果不对深度加以限制，最终是会 Overfit 的。
18 | * early_stopping_rounds：用于控制在 Out Of Sample 的验证集上连续多少个迭代的分数都没有提高后就提前终止训练。用于防止 Overfitting。
19 | 
20 | #### 一般的调参步骤是：
21 | 1. 将训练数据的一部分划出来作为验证集。
22 | 2. 先将 eta 设得比较高（比如 0.1），num_round 设为 300 ~ 500。
23 | 3. 用 Grid Search 对其他参数进行搜索
24 | 4. 逐步将 eta 降低，找到最佳值。
25 | 5.以验证集为 watchlist，用找到的最佳参数组合重新在训练集上训练。注意观察算法的输出，看每次迭代后在验证集上分数的变化情况，从而得到最佳的 early_stopping_rounds。
26 | 
27 | ```
28 | X_dtrain, X_deval, y_dtrain, y_deval = cross_validation.train_test_split(X_train, y_train, random_state=1026, test_size=0.3)
29 | dtrain = xgb.DMatrix(X_dtrain, y_dtrain)
30 | deval = xgb.DMatrix(X_deval, y_deval)
31 | watchlist = [(deval, 'eval')]
32 | params = {
33 |     'booster': 'gbtree',
34 |     'objective': 'reg:linear',
35 |     'subsample': 0.8,
36 |     'colsample_bytree': 0.85,
37 |     'eta': 0.05,
38 |     'max_depth': 7,
39 |     'seed': 2016,
40 |     'silent': 0,
41 |     'eval_metric': 'rmse'
42 | }
43 | clf = xgb.train(params, dtrain, 500, watchlist, early_stopping_rounds=50)
44 | pred = clf.predict(xgb.DMatrix(df_test))
45 | ```
46 | 所有具有随机性的 Model 一般都会有一个 seed 或是 random_state 参数用于控制随机种子。得到一个好的 Model 后，在记录参数时务必也记录下这个值，从而能够在之后重现 Model。
47 | 


--------------------------------------------------------------------------------
/Base/tools/xgboost/xgboost_multi.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "[参数说明](http://xgboost.readthedocs.io/en/latest//parameter.html)"
  8 |    ]
  9 |   },
 10 |   {
 11 |    "cell_type": "code",
 12 |    "execution_count": 4,
 13 |    "metadata": {},
 14 |    "outputs": [],
 15 |    "source": [
 16 |     "import xgboost as xgb\n",
 17 |     "import numpy as np"
 18 |    ]
 19 |   },
 20 |   {
 21 |    "cell_type": "code",
 22 |    "execution_count": 10,
 23 |    "metadata": {},
 24 |    "outputs": [],
 25 |    "source": [
 26 |     "### make data\n",
 27 |     "x1 = [[0,0.1,0.2],\n",
 28 |     "     [1.1,1.2,1.3],\n",
 29 |     "     [2.1,2.2,2.3],\n",
 30 |     "     [0.1,0.2,0.3],\n",
 31 |     "     [1.4,1.2,1.3],\n",
 32 |     "     [2.1,2.2,2.1],\n",
 33 |     "     [0.1,0.2,0.2],\n",
 34 |     "     [1.1,1.2,1.3],\n",
 35 |     "    [2.1,2.2,2.3]]\n",
 36 |     "\n",
 37 |     "y1 = [0,1,2,0,1,2,0,1,2]\n",
 38 |     "\n",
 39 |     "x2 = [[0,0.1,0.2],\n",
 40 |     "     [1.1,1.2,1.3],\n",
 41 |     "     [2.0,2.2,2.3],\n",
 42 |     "     [0.2,0.2,0.3]]"
 43 |    ]
 44 |   },
 45 |   {
 46 |    "cell_type": "code",
 47 |    "execution_count": 12,
 48 |    "metadata": {},
 49 |    "outputs": [
 50 |     {
 51 |      "name": "stdout",
 52 |      "output_type": "stream",
 53 |      "text": [
 54 |       "[ 0.  1.  2.  0.]\n"
 55 |      ]
 56 |     }
 57 |    ],
 58 |    "source": [
 59 |     "\n",
 60 |     "# read in data\n",
 61 |     "#dtrain = xgb.DMatrix('demo/data/agaricus.txt.train')\n",
 62 |     "#dtest = xgb.DMatrix('demo/data/agaricus.txt.test')\n",
 63 |     "# specify parameters via map\n",
 64 |     "dtrain = xgb.DMatrix(x1,y1)\n",
 65 |     "dtest = xgb.DMatrix(x2)\n",
 66 |     "\n",
 67 |     "param = {'max_depth':2, 'eta':1, 'silent':1, 'objective':'multi:softmax', 'num_class' : 3}\n",
 68 |     "\n",
 69 |     "#“multi:softmax” –set XGBoost to do multiclass classification using the softmax objective,\n",
 70 |     "#you also need to set num_class(number of classes)\n",
 71 |     "num_round = 2\n",
 72 |     "bst = xgb.train(param, dtrain, num_round)\n",
 73 |     "# make prediction\n",
 74 |     "preds = bst.predict(dtest)\n",
 75 |     "print(preds)"
 76 |    ]
 77 |   },
 78 |   {
 79 |    "cell_type": "code",
 80 |    "execution_count": null,
 81 |    "metadata": {},
 82 |    "outputs": [],
 83 |    "source": []
 84 |   }
 85 |  ],
 86 |  "metadata": {
 87 |   "kernelspec": {
 88 |    "display_name": "Python 3",
 89 |    "language": "python",
 90 |    "name": "python3"
 91 |   },
 92 |   "language_info": {
 93 |    "codemirror_mode": {
 94 |     "name": "ipython",
 95 |     "version": 3
 96 |    },
 97 |    "file_extension": ".py",
 98 |    "mimetype": "text/x-python",
 99 |    "name": "python",
100 |    "nbconvert_exporter": "python",
101 |    "pygments_lexer": "ipython3",
102 |    "version": "3.5.4"
103 |   }
104 |  },
105 |  "nbformat": 4,
106 |  "nbformat_minor": 2
107 | }
108 | 


--------------------------------------------------------------------------------
/CV/codes/IOU.py:
--------------------------------------------------------------------------------
 1 | def IOU(box1,box2):
 2 |     xmin1, ymin1, xmax1, ymax1 = box1
 3 |     xmin2, ymin2, xmax2, ymax2 = box2
 4 |     # 求交集部分左上角的点
 5 |     xmin = max(xmin1,xmin2)
 6 |     ymin = max(ymin1,ymin2)
 7 |     # 求交集部分右下角的点
 8 |     xmax = min(xmax1,xmax2)
 9 |     ymax = min(ymax1,ymax2)
10 |     # 计算输入的两个矩形的面积
11 |     s1 = (xmax1-xmin1) * (ymax1 - ymin1)
12 |     s2 = (xmax2-xmin2) * (ymax2 - ymin2)
13 | 
14 |     #计算总面积
15 |     s = s1 + s2 
16 |     # 计算交集
17 |     inter_area = max(0,(xmax - xmin)) * max(0,(ymax - ymin))
18 | 
19 |     iou = inter_area / (s - inter_area)
20 |     return iou
21 | 


--------------------------------------------------------------------------------
/CV/codes/label_smoothing.py:
--------------------------------------------------------------------------------
1 | 
2 | #y: one-hot numpy array
3 | #e.g. [[1,0,0],[0,1,0]]
4 | label_smoothing = 0.01
5 | y = y * (1 - label_smoothing) + label_smoothing / num_classes
6 | 


--------------------------------------------------------------------------------
/CV/codes/makeVOCDirs.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | 
 3 | 
 4 | 
 5 | BASE_PATH = "./"
 6 | 
 7 | voc_dir = os.path.join(BASE_PATH, "VOC2007")
 8 | 
 9 | anno_dir = os.path.join(voc_dir, "Annotations")
10 | set_dir = os.path.join(voc_dir, "ImageSets")
11 | img_dir = os.path.join(voc_dir, "JPEGImages")
12 | 
13 | main_dir = os.path.join(set_dir, "Main")
14 | 
15 | 
16 | if not os.path.exists(voc_dir):
17 |     os.makedirs(voc_dir)
18 | 
19 | if not os.path.exists(anno_dir):
20 |     os.makedirs(anno_dir)
21 | 
22 | if not os.path.exists(set_dir):
23 |     os.makedirs(set_dir)
24 | 
25 | if not os.path.exists(img_dir):
26 |     os.makedirs(img_dir)
27 | 
28 | if not os.path.exists(main_dir):
29 |     os.makedirs(main_dir)
30 | 


--------------------------------------------------------------------------------
/CV/codes/pascalVOC2csv.py:
--------------------------------------------------------------------------------
 1 | # -*- coding:utf-8 -*-
 2 |  
 3 | import csv
 4 | import os
 5 | import glob
 6 | import sys
 7 |  
 8 | class PascalVOC2CSV(object):
 9 |     def __init__(self,xml=[], 
10 |                     ann_path='./annotations.csv',
11 |                     classes_path='./classes.csv'):
12 |         '''
13 |         :param xml: 所有Pascal VOC的xml文件路径组成的列表
14 |         :param ann_path: ann_path
15 |         :param classes_path: classes_path
16 |         '''
17 |         self.xml = xml
18 |         self.ann_path = ann_path
19 |         self.classes_path=classes_path
20 |         self.label=[]
21 |         self.annotations=[]
22 |  
23 |         self.data_transfer()
24 |         print(len(self.annotations))
25 |         self.write_file()
26 |  
27 |  
28 |     def data_transfer(self):
29 |         for num, xml_file in enumerate(self.xml):
30 |                 #print(xml_file)
31 |                 # 进度输出
32 |             sys.stdout.write('\r>> Converting image %d/%d' % (
33 |                 num + 1, len(self.xml)))
34 |             sys.stdout.flush()
35 | 
36 |             with open(xml_file, 'r') as fp:
37 |                 #print(len(fp.readlines()))
38 |                 for p in fp:
39 |                     if '<filename>' in p:
40 |                         self.filen_ame = p.split('>')[1].split('<')[0]
41 | 
42 |                     if '<object>' in p:
43 |                         # 类别
44 |                         d = [next(fp).split('>')[1].split('<')[0] for _ in range(9)]
45 |                         d = d[:-1]
46 |                         self.supercategory = d[0]
47 |                         if self.supercategory not in self.label:
48 |                             self.label.append(self.supercategory)
49 | 
50 |                         # 边界框
51 |                         x1 = int(d[-4])
52 |                         y1 = int(d[-3])
53 |                         x2 = int(d[-2])
54 |                         y2 = int(d[-1])
55 | 
56 |                         self.annotations.append([os.path.join('JPEGImages',self.filen_ame),x1,y1,x2,y2,self.supercategory])
57 |                         
58 |  
59 |         sys.stdout.write('\n')
60 |         sys.stdout.flush()
61 |  
62 |     def write_file(self,):
63 |         with open(self.ann_path, 'w', newline='') as fp:
64 |             csv_writer = csv.writer(fp, dialect='excel')
65 |             csv_writer.writerows(self.annotations)
66 |  
67 |         class_name=sorted(self.label)
68 |         class_=[]
69 |         for num,name in enumerate(class_name):
70 |             class_.append([name,num])
71 |         with open(self.classes_path, 'w', newline='') as fp:
72 |             csv_writer = csv.writer(fp, dialect='excel')
73 |             csv_writer.writerows(class_)
74 |  
75 |  
76 | xml_file = glob.glob('./Annotations/*.xml')
77 |  
78 | PascalVOC2CSV(xml_file)
79 | 


--------------------------------------------------------------------------------
/CV/codes/show_voc_box.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import os.path
 3 | import numpy as np
 4 | import xml.etree.ElementTree as xmlET
 5 | from PIL import Image, ImageDraw
 6 | 
 7 | classes = ('__background__', # always index 0
 8 |            'Adidas', 'Nike', 'Puma')
 9 | 
10 | file_path_img = 'VOC2007/JPEGImages'
11 | file_path_xml = 'VOC2007/Annotations'
12 | save_file_path = 'VOC2007/Vis_boxes_VOC2007'
13 | 
14 | if not os.path.exists(save_file_path):
15 |     os.makedirs(save_file_path)
16 | 
17 | pathDir = os.listdir(file_path_xml)
18 | for idx in range(len(pathDir)):  
19 |     filename = pathDir[idx]
20 |     tree = xmlET.parse(os.path.join(file_path_xml, filename))
21 |     objs = tree.findall('object')        
22 |     num_objs = len(objs)
23 |     boxes = np.zeros((num_objs, 5), dtype=np.uint16)
24 | 
25 |     for ix, obj in enumerate(objs):
26 |         bbox = obj.find('bndbox')
27 |         # Make pixel indexes 0-based
28 |         x1 = float(bbox.find('xmin').text) - 1
29 |         y1 = float(bbox.find('ymin').text) - 1
30 |         x2 = float(bbox.find('xmax').text) - 1
31 |         y2 = float(bbox.find('ymax').text) - 1
32 | 
33 |         cla = obj.find('name').text 
34 |         label = classes.index(cla)
35 | 
36 |         boxes[ix, 0:4] = [x1, y1, x2, y2]
37 |         boxes[ix, 4] = label
38 | 
39 |     image_name = os.path.splitext(filename)[0]
40 |     img = Image.open(os.path.join(file_path_img, image_name + '.jpg')) 
41 | 
42 |     draw = ImageDraw.Draw(img)
43 |     for ix in range(len(boxes)):
44 |         xmin = int(boxes[ix, 0])
45 |         ymin = int(boxes[ix, 1])
46 |         xmax = int(boxes[ix, 2])
47 |         ymax = int(boxes[ix, 3])
48 |         draw.rectangle([xmin, ymin, xmax, ymax], outline=(255, 0, 0))
49 |         draw.text([xmin, ymin], classes[boxes[ix, 4]], (255, 0, 0))
50 | 
51 |     img.save(os.path.join(save_file_path, image_name + '.jpg'))
52 | 


--------------------------------------------------------------------------------
/CV/codes/simple_mixup.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import cv2
 3 | 
 4 | """
 5 | 说明：
 6 | 这里是前期看到公式后一个粗略的实现
 7 | 参考了更多资料后其实有些地方还是有点问题
 8 | 
 9 | 比如weight应该是一个beta分布而不是均匀分布，如
10 | weight = np.random.beta(alpha,alpha)
11 |     
12 | 然后是输入每个batch的x，y即可，统一和一个从数据集中随机选取的图片做mixup
13 | 
14 | 最后，最关键的是，最后计算loss并不是修改标签y，
15 | 而是如下计算loss：
16 | loss = weight * criterion(outputs, targets_a) + (1 - weight) * criterion(outputs, targets_b)；
17 | 
18 | 下一次用到mixup的时候修改了再更新到这里吧。
19 | 5.24： 已更新beta分布
20 | """
21 | 
22 | def mixup_batch(x1,y1,x2,y2,alpha=0.4):
23 |     """
24 |     get batch data
25 |     :param x: two training imgs (same shape)   ndarry
26 |     :param y: two one-hot labels(same shape)   ndarry
27 |     :param alpha: hyper-parameter α, default as 0.2
28 |     :return: new_x,new_y
29 |     """
30 |     #weight = np.random.choice([0.1,0.2,0.3,0.4,0.6,0.7,0.8,0.9])
31 |     weight = np.random.beta(alpha, alpha)
32 |     print(weight)
33 |     new_x = x1*weight+x2*(1-weight)
34 |     new_y = y1*weight+y2*(1-weight)
35 | 
36 |     return new_x, new_y
37 | 
38 | 
39 | img1 = cv2.imread("1.jpg")
40 | img1 = cv2.resize(img1,(224,224))
41 | img2 = cv2.imread("2.jpg")
42 | img2 = cv2.resize(img2,(224,224))
43 | y1 = np.array([0,0,1])
44 | y2 = np.array([0,1,0])
45 | 
46 | x,y = mixup_batch(img1,y1,img2,y2)
47 | cv2.imwrite("12.jpg", x)
48 | print(y)
49 | 


--------------------------------------------------------------------------------
/CV/codes/txt2xml.py:
--------------------------------------------------------------------------------
  1 | 
  2 | import glob
  3 | import cv2
  4 | 
  5 | xml_head = '''<annotation>
  6 |     <folder>VOC2007</folder>
  7 |     <filename>{}</filename>.
  8 |     <source>
  9 |         <database>The VOC2007 Database</database>
 10 |         <annotation>PASCAL VOC2007</annotation>
 11 |         <image>flickr</image>
 12 |     </source>   
 13 |     <size>
 14 |         <width>{}</width>
 15 |         <height>{}</height>
 16 |         <depth>{}</depth>
 17 |     </size>
 18 |     <segmented>0</segmented>
 19 |     '''
 20 | xml_obj = '''
 21 |     <object>        
 22 |         <name>{}</name>
 23 |         <pose>Unspecified</pose>
 24 |         <truncated>0</truncated>
 25 |         <difficult>0</difficult>
 26 |         <bndbox>
 27 |             <xmin>{}</xmin>
 28 |             <ymin>{}</ymin>
 29 |             <xmax>{}</xmax>
 30 |             <ymax>{}</ymax>
 31 |         </bndbox>
 32 |     </object>
 33 |     '''
 34 | xml_end = '''
 35 | </annotation>'''
 36 | 
 37 | #--data
 38 | #----train 训练集图片
 39 | #----train_txt 对应的txt标签
 40 | #----train_xml 对应的xml标签
 41 | 
 42 | root='./'
 43 | 
 44 | 
 45 | labels = {0: 'person'}
 46 | 
 47 | txt_Lists = glob.glob(root +'labels_abs'+ '/*.txt')
 48 | print(len(txt_Lists))
 49 | # print(txt_Lists)
 50 | cnt=0
 51 | 
 52 | for txt_path in txt_Lists:
 53 |     filename=txt_path.split('\\')
 54 |     filename=filename[-1]
 55 |     filename=filename.split('.')
 56 |     filename=filename[0]
 57 | 
 58 |     txt = root+'labels_abs/'+filename+'.txt'
 59 |     # jpg=root+'train/'+filename+'.jpg' #jpg path
 60 |     xml=root+'labels_xml/'+filename+'.xml'
 61 | 
 62 |     print(txt)
 63 |     print(xml)
 64 | 
 65 |     obj = ''
 66 | 
 67 |     # img = cv2.imread(jpg)
 68 |     img_h, img_w = 1080, 1920
 69 | 
 70 |     print('h_factor:',img_h,'  w_factor:',img_w)
 71 |     # cv2.imshow("img", img)  #显示图片
 72 |     # cv2.waitKey(0)
 73 |     # cv2.destroyWindow("img")
 74 | 
 75 |     head = xml_head.format(str(filename), str(img_w), str(img_h), "3")
 76 | 
 77 |     with open(txt, 'r') as f:
 78 |         for line in f.readlines():
 79 |             yolo_datas = line.strip().split(' ')
 80 |             label = int(float(yolo_datas[0].strip()))
 81 |             # center_x = round(float(str(yolo_datas[1]).strip()) * img_w)
 82 |             # center_y = round(float(str(yolo_datas[2]).strip()) * img_h)
 83 |             # bbox_width = round(float(str(yolo_datas[3]).strip()) * img_w)
 84 |             # bbox_height = round(float(str(yolo_datas[4]).strip()) * img_h)
 85 | 
 86 |             # xmin = str(int(center_x - bbox_width / 2))
 87 |             # ymin = str(int(center_y - bbox_height / 2))
 88 |             # xmax = str(int(center_x + bbox_width / 2))
 89 |             # ymax = str(int(center_y + bbox_height / 2))
 90 | 
 91 |             xmin = str(int(float(yolo_datas[2].strip())))
 92 |             ymin = str(int(float(yolo_datas[3].strip())))
 93 |             xmax = str(int(float(yolo_datas[4].strip())))
 94 |             ymax = str(int(float(yolo_datas[5].strip())))
 95 | 
 96 |             obj += xml_obj.format(labels[label], xmin, ymin, xmax, ymax)
 97 | 
 98 |     with open(xml, 'w') as f_xml:
 99 |         f_xml.write(head + obj + xml_end)
100 |     cnt += 1
101 |     print(cnt)
102 | 


--------------------------------------------------------------------------------
/CV/codes/updateTXT.py:
--------------------------------------------------------------------------------
 1 | 
 2 | 
 3 | 
 4 | import os
 5 | import random
 6 | 
 7 | def getAllName(file_dir, tail_list = ['.jpg']): 
 8 |     L=[] 
 9 |     for root, dirs, files in os.walk(file_dir):
10 |         for file in files:
11 |             if os.path.splitext(file)[1] in tail_list:
12 |                 L.append(os.path.join(root, file))
13 |     return L
14 | 
15 | 
16 | 
17 | 
18 | # make all
19 | img_names = getAllName("VOC2007/JPEGImages")
20 | with open("VOC2007/ImageSets/Main/trainval.txt", "w", encoding="utf-8") as f:
21 |     for img_name in img_names:
22 |         f.write(os.path.basename(img_name)[:-4]+"\n")
23 | 
24 | 
25 | batch_size = 16
26 | # split
27 | split_ratio = 0.1
28 | with open("VOC2007/ImageSets/Main/trainval.txt", "r", encoding="utf-8") as f:
29 |     lines = f.readlines()
30 | print("total label: ", len(lines))
31 | print("batch  size: ", batch_size)
32 | print("train steps: ", int(len(lines)*(1-split_ratio)*1.0/batch_size))
33 | 
34 | f_train = open("VOC2007/ImageSets/Main/train.txt", "w", encoding="utf-8")
35 | f_val  = open("VOC2007/ImageSets/Main/val.txt", "w", encoding="utf-8")
36 | 
37 | for line in lines:
38 |     if random.random() < split_ratio:
39 |         f_val.write(line)
40 | 
41 |     else:
42 |         f_train.write(line)
43 | 
44 | f_train.close()
45 | f_val.close()
46 | 


--------------------------------------------------------------------------------
/CV/nets/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fire717/Machine-Learning/46d417e4c872052857899331ff7f526f79110896/CV/nets/.DS_Store


--------------------------------------------------------------------------------
/CV/nets/alexnet/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fire717/Machine-Learning/46d417e4c872052857899331ff7f526f79110896/CV/nets/alexnet/.DS_Store


--------------------------------------------------------------------------------
/CV/nets/alexnet/README.md:
--------------------------------------------------------------------------------
 1 | # Alexnet
 2 | > @Fire 2019.7.1
 3 | 
 4 | * Intro: 2012年，Alex等人提出的AlexNet网络在ImageNet大赛上以远超第二名的成绩夺冠，卷积神经网络乃至深度学习重新引起了广泛的关注。
 5 | 
 6 | * Year: 2012
 7 | * Paper: [ImageNet Classification with Deep Convolutional Neural Networks](http://xueshu.baidu.com/usercenter/paper/show?paperid=bfdf67dfdf8cea0c47038f63e91b9df1&site=xueshu_se)
 8 | * Code: [keras_alexnet](keras_alexnet.py)
 9 | * Info: 224 * 224 * 3的输入，6的输出，参数量为7千万。
10 | 
11 | ![net](./alexnet.jpg)
12 | 
13 | * Note: 
14 | 
15 | 	1. 数据增强：图像裁剪（crop），水平翻转；颜色、光照变换（使用PCA对每个像素点RGB分别加一个数）。
16 | 	2. Dropout：以一定概率使神经元的输出为0，减少过拟合。
17 | 	3. ReLU：方便计算，求导容易，使网络变得稀疏（类似L1正则），能够更快的学习。
18 | 	4. Local Response Normalization：局部响应归一化，利用临近的数据做归一化。贡献了1.2%的Top-5正确率。
19 | 	5. Overlapping Pooling：即Pooling的步长比Pooling Kernel小。贡献了0.3%的Top-5正确率。
20 | 	6. 多GPU学习。
21 | 
22 | 


--------------------------------------------------------------------------------
/CV/nets/alexnet/alexnet.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fire717/Machine-Learning/46d417e4c872052857899331ff7f526f79110896/CV/nets/alexnet/alexnet.jpg


--------------------------------------------------------------------------------
/CV/nets/alexnet/keras_alexnet.py:
--------------------------------------------------------------------------------
 1 | '''
 2 | A Keras port of the original Caffe SSD300 network.
 3 | 
 4 | Copyright (C) 2018 Pierluigi Ferrari
 5 | 
 6 | Licensed under the Apache License, Version 2.0 (the "License");
 7 | you may not use this file except in compliance with the License.
 8 | You may obtain a copy of the License at
 9 | 
10 |    http://www.apache.org/licenses/LICENSE-2.0
11 | 
12 | Unless required by applicable law or agreed to in writing, software
13 | distributed under the License is distributed on an "AS IS" BASIS,
14 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 | See the License for the specific language governing permissions and
16 | limitations under the License.
17 | '''
18 | 
19 | from __future__ import division
20 | import numpy as np
21 | from keras.models import Model
22 | from keras.layers import Input, Lambda, Dropout, Activation, Dense, Flatten, Conv2D, MaxPooling2D, ZeroPadding2D, Reshape
23 | from keras.regularizers import l2
24 | import keras.backend as K
25 | 
26 | from keras.layers.normalization import BatchNormalization
27 | 
28 | 
29 | def alexnet(image_size, n_classes):
30 | 
31 |     img_height, img_width, img_channels = image_size
32 | 
33 |     x = Input(shape=(img_height, img_width, img_channels))
34 | 
35 |     conv1 = Conv2D(96, (11, 11), strides=(4,4), activation='relu', padding='valid', name='conv1')(x)
36 |     pool1 = MaxPooling2D(pool_size=(3, 3), strides=(2, 2), padding='same', name='pool1')(conv1)
37 |     bn1 = BatchNormalization(axis=-1)(pool1)
38 | 
39 |     conv2 = Conv2D(256, (5, 5), strides=(1,1), activation='relu', padding='same', name='conv2')(bn1)
40 |     pool2 = MaxPooling2D(pool_size=(3, 3), strides=(2, 2), padding='same', name='pool2')(conv2)
41 |     bn2 = BatchNormalization(axis=-1)(pool2)
42 | 
43 |     conv3 = Conv2D(384, (3, 3), strides=(1,1), activation='relu', padding='same', name='conv3')(bn2)
44 |     conv4 = Conv2D(384, (3, 3), strides=(1,1), activation='relu', padding='same', name='conv4')(conv3)
45 |     conv5 = Conv2D(256, (3, 3), strides=(1,1), activation='relu', padding='same', name='conv5')(conv4)
46 |     pool3 = MaxPooling2D(pool_size=(3, 3), strides=(2, 2), padding='same', name='pool3')(conv5)
47 |     
48 |     flat = Flatten()(pool3)
49 |     fc6 = Dense(4096, activation='relu', trainable=True, name='fc6')(flat)
50 |     dp6 = Dropout(0.5)(fc6)
51 |     fc7 = Dense(4096, activation='relu', trainable=True, name='fc7')(dp6)
52 |     dp7 = Dropout(0.5)(fc7)
53 |     fc8 = Dense(n_classes, activation='softmax', name='fc8')(dp7)
54 | 
55 |     model = Model(inputs=x, outputs=fc8)
56 |     return model
57 | 


--------------------------------------------------------------------------------
/CV/nets/lenet5/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fire717/Machine-Learning/46d417e4c872052857899331ff7f526f79110896/CV/nets/lenet5/.DS_Store


--------------------------------------------------------------------------------
/CV/nets/lenet5/README.md:
--------------------------------------------------------------------------------
 1 | # LeNet-5
 2 | > @Fire 2019.7.1
 3 | 
 4 | * Intro: Yann LeCun在1998年设计的用于手写数字识别的卷积神经网络,算是第一个比较经典的CNN网络。
 5 | 
 6 | * Year: 1998
 7 | * Paper: [Gradient-Based Learning Applied to Document Recognition](http://xueshu.baidu.com/usercenter/paper/show?paperid=80fd293244903d8233327d0e5ba6de62&site=xueshu_se)
 8 | * Code: [keras_lenet](keras_lenet5.py)
 9 | * Info: 32 * 32 * 1的输入，10的输出，参数量为6万。
10 | 
11 | ![net](./lenet5.jpg)
12 | 


--------------------------------------------------------------------------------
/CV/nets/lenet5/keras_lenet5.py:
--------------------------------------------------------------------------------
 1 | '''
 2 | A Keras port of the original Caffe SSD300 network.
 3 | 
 4 | Copyright (C) 2018 Pierluigi Ferrari
 5 | 
 6 | Licensed under the Apache License, Version 2.0 (the "License");
 7 | you may not use this file except in compliance with the License.
 8 | You may obtain a copy of the License at
 9 | 
10 |    http://www.apache.org/licenses/LICENSE-2.0
11 | 
12 | Unless required by applicable law or agreed to in writing, software
13 | distributed under the License is distributed on an "AS IS" BASIS,
14 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 | See the License for the specific language governing permissions and
16 | limitations under the License.
17 | '''
18 | 
19 | from __future__ import division
20 | import numpy as np
21 | from keras.models import Model
22 | from keras.layers import Input, Lambda, Dropout, Activation, Dense, Flatten, Conv2D, MaxPooling2D, ZeroPadding2D, Reshape
23 | from keras.regularizers import l2
24 | import keras.backend as K
25 | 
26 | from keras.layers.normalization import BatchNormalization
27 | 
28 | 
29 | def lenet5(image_size, n_classes):
30 | 
31 |     img_height, img_width, img_channels = image_size
32 | 
33 |     x = Input(shape=(img_height, img_width, img_channels))
34 | 
35 |     conv1 = Conv2D(6, (5, 5), strides=(1,1), activation='relu', padding='valid', name='conv1')(x)
36 |     pool1 = MaxPooling2D(pool_size=(2, 2), strides=(2, 2), padding='same', name='pool1')(conv1)
37 | 
38 |     conv2 = Conv2D(16, (5, 5), strides=(1,1), activation='relu', padding='valid', name='conv2')(pool1)
39 |     pool2 = MaxPooling2D(pool_size=(2, 2), strides=(2, 2), padding='same', name='pool2')(conv2)
40 | 
41 |     flat = Flatten()(pool2)
42 |     fc3 = Dense(120, activation='relu', trainable=True, name='fc6')(flat)
43 |     #dp6 = Dropout(0.5)(fc6)
44 |     fc4 = Dense(84, activation='relu', trainable=True, name='fc7')(fc3)
45 |     #dp7 = Dropout(0.5)(fc7)
46 |     fc5 = Dense(n_classes, activation='softmax', name='fc8')(fc4)
47 | 
48 |     model = Model(inputs=x, outputs=fc5)
49 |     return model
50 | 
51 | 
52 | if __name__ == '__main__':
53 |     image_size = (32,32,1)
54 |     n_classes = 10
55 |     net = lenet5(image_size, n_classes)
56 |     net.summary()


--------------------------------------------------------------------------------
/CV/nets/lenet5/lenet5.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fire717/Machine-Learning/46d417e4c872052857899331ff7f526f79110896/CV/nets/lenet5/lenet5.jpg


--------------------------------------------------------------------------------
/CV/nets/vgg/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fire717/Machine-Learning/46d417e4c872052857899331ff7f526f79110896/CV/nets/vgg/.DS_Store


--------------------------------------------------------------------------------
/CV/nets/vgg/README.md:
--------------------------------------------------------------------------------
 1 | # VGGNet
 2 | > @Fire 2019.7.2
 3 | 
 4 | * Intro: 2014年新的一届ILSVRC大赛中Googlenet与VGG的身影分外亮眼。Googlenet相对VGG而言在网络结构上有了更新的突破，不过其复杂度也大大增加了。VGG相对Googlenet虽然精度略逊些，但其整体网络框架还是延续了Alexnet及更早的Lenet等的一贯思路，此外还更深入的探讨了ConvNet深度对模型性能可能的影响。
 5 | 
 6 | * Year: 2014
 7 | * Paper: [Very Deep Convolutional Networks for Large-Scale Image Recognition](http://xueshu.baidu.com/usercenter/paper/show?paperid=2801f41808e377a1897a3887b6758c59&site=xueshu_se)
 8 | * Code: [keras_vgg](keras_vgg.py)
 9 | * Info: 224 * 224 * 3的输入，10的输出，参数量为1.3亿。
10 | 
11 | ![net](./vgg.jpg)
12 | 
13 | ![net](./vgg16.jpg)
14 | 
15 | * Note: 
16 | 
17 | 	1. 相比于AlexNet最大的改进是用小size的Filter代替大size的Filter。两个3 * 3的卷积核代替5 * 5的卷积核，三个3 * 3代替7 * 7。多个小尺度的卷积核比大尺度的卷积核非线性更强，同时参数减少，不容易过拟合。
18 | 
19 | 


--------------------------------------------------------------------------------
/CV/nets/vgg/vgg.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fire717/Machine-Learning/46d417e4c872052857899331ff7f526f79110896/CV/nets/vgg/vgg.jpg


--------------------------------------------------------------------------------
/CV/nets/vgg/vgg16.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fire717/Machine-Learning/46d417e4c872052857899331ff7f526f79110896/CV/nets/vgg/vgg16.jpg


--------------------------------------------------------------------------------
/CV/note/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fire717/Machine-Learning/46d417e4c872052857899331ff7f526f79110896/CV/note/.DS_Store


--------------------------------------------------------------------------------
/CV/note/DCNN_book_note.md:
--------------------------------------------------------------------------------
 1 | # 《深度卷积网络：原理与实践》笔记
 2 | > Fire 2019.01.12
 3 | 
 4 | ### 前言
 5 | 1. 本书选择MXNet框架的原因：训练速度快、占用资源少、使用方便、架构清晰、易于二次开发；
 6 | 
 7 | ### 第1章 走进深度学习的世界
 8 | 2. 预测学习（Predictive Learning）：输入1张图像，预测图像后续的发展；
 9 | 
10 | ### 第2章 深度卷机网络：第一课
11 | 3. 为何深度神经网络拥有如此强大的威力？这仍然是学术界的研究课题。目前大致的认知是，深度神经网络的逐层结构可以实现对于概念的不断抽象，这恰好与世界的运行规律吻合。
12 | 4. 经验法则：如果一项工作中所需要思考和决策的问题，人能在5秒内解决，它就很有可能被目前的深度神经网络实现。 
13 | 5. 2011年，谷歌用传统浅层神经网络用了16000台机器，计算3天，才构建出一个足以识别猫的网络；在2012年，著名的深度神经网络AlexNet面世，1台机器就可以完成这个任务；
14 | 6. 图像分割（segmentation），能进一步将图像自动划分为各个物体，并标记处每个物体的具体区域。目前主流是Mask R-CNN网络，还可进一步实现包括人体姿态识别的图像分割；
15 | 7. Tensorflow游乐场：playground.tensorflow.org
16 | 8. MNIST识别经典模型是1998年的LeNet-5网络，是DCNN的雏形，在正常情况下可达到99.05%的识别率。可访问scs.ryerson.ca/~aharley/vis/conv/，看到该网络每一层输出的图像；
17 | 9. 策略网络实例（围棋）：withablink.coding.me/goPolicyNet/
18 | 
19 | 
20 | ### 第3章 深度卷积网络：第二课
21 | 10. Excel实现神经网络：2.3.5-P297
22 | 11. 如果发现网络的训练性能很差，值得做的事情就是观察网络内部梯度的流动情况；改善的技巧：BN、残差网络（ResNet），梯度截断，梯度惩罚；
23 | 12. 从几何观点理解神经网络：colah.github.io/posts/2014-03-NN-Manifolds-Topology/;
24 | 13. 根据拓扑学定理，所有n维流形都可以在2n+2维空间中划分开。神经网络在隐层使用大量神经元，就是在做升维，以便划分样本，这成为disentangling，即将纠缠在一起的特征或概念分开；
25 | 14. 很多时候我们还是会根据测试集调参，因此，很多研究中已不使用验证集；
26 | 15. 半监督学习（simi-supervised learning），即数据中只有部分样本带有标签，然后希望给所有样本和未来的样本找到标签；一种有趣的方法是，先人工标记少量标签，然后从少量标签训练网络，然后让网络预测所有样本的标签，再人工筛选和修改其中的标签，重复这个过程。由于网络的预测会越来越准，因此可节省许多人工标注的时间；
27 | 16. 根据近年的研究，比如《The Loss Surfaces of Multilayer Networks》，对于大规模的神经网络，这实际影响不大。如果神经网络的规模够大够深，使用足够多的神经元，往往最后会得到相当靠近全局最优值的解；
28 | 17. 根据经验，如果数据集很复杂，那么普通的SGD虽然速度更慢，但有可能会得到更好的准确率。
29 | 18. L2和L1正则化的基本思想，最简单的网络没有连接，因此希望网络的连接越少越好，如果连接的权重为0就相当于没有连接，因此希望网络中连接的权重越小越好；
30 | 19. 两种类似dropout思想的正则化方法：随机深度和Shake-Shake正则化；
31 | 20. 多分类设置目标类别为100%不好，因为softmax的特性导致网络权重越来越大（需要输入无穷大才能输出100%），不利于网络稳定性，因此可尝试设置为95%；
32 | 21. 进一步的预处理包括白化，常用方法包括PCA和ZCA白化。对于图像还可以进行直方图均衡；
33 | 22. batch大小经验：对于常见问题，最优的往往在16-256之间，太小训练过慢，太大则性能不佳；facebook2017年论文《Accurate,Large Minibatch SGD:Traing ImageNet in 1Hour》指出，设置合理的学习率，在批大小很大（比如8192）的时候也能取得较好的性能；
34 | 23. 可通过CPU-Z和GPU-Z软件，观察CPU和GPU是否在满负荷工作；
35 | 24. 2017年提出Fashion-MNIST，比MNIST难度更大更有代表性；
36 | 
37 | 
38 | ### 第4章 深度卷积网络：第三课
39 | 25. 卷积操作后，图像中的值往往会有正有负，正表示与特征匹配，负表示相反。如果再进行ReLU操作，就会只留下正值，因此ReLU很适合CNN；
40 | 26. 使用奇数卷积核的好处，可以通过设置合适的padding使得图像再卷积后大小不变；
41 | 27. 转置卷积4.3.5；
42 | 
43 | ### 第5章 深度卷积网络：第四课
44 | 28. 5.1.1 AlexNet的特点总结；
45 | 29. 5.1.2 VGG的特点；
46 | 30. 5.1.3 DarkNet的特点；
47 | 31. 2017年9月发布的SmoothGrad技术，找到图像特征关键区域；
48 | 32. 1*1卷积核的应用场景 5.4.2；
49 | 33. batch normalization 5.4.3；
50 | 34. 残差网络：ResNet的思想 5.5.1 、 5.5.2 残差网络架构细节；
51 | 35. 5.6.1 残差网络进展：ResNet、Pyramid Net、 DenseNet；
52 | 36. 压缩网络：SqueezeNet、MobileNet、ShuffleNet（可在AlexNet的二十分之一的运算量下实现相近性能）；
53 | 37. 5.6.3 卷积核的变形：扩张卷积（dilated convolution）、可变形卷积（deformable convolution）；
54 | 38. 5.7.1 yolo v1；5.7.3 Faster R-CNN；5.7.4 Mask-RCNN
55 | 39. 5.8 图像风格迁移；
56 | 
57 | ### 第6章 AlphaGo架构综述
58 | * 6.1.1 v13和v18；
59 | * 6.2 对弈过程；
60 | * 6.2.3 蒙特卡洛树搜索与估值问题；
61 | 
62 | ### 第7章 训练策略网络与实战
63 | * zero.sjeng.org
64 | 
65 | ### 第8章 生成式对抗网络：GAN
66 | * github.com/hindupuravinash/the-gan-zonn 列举了上百种不同的GAN设计；
67 | * 8.5.1 自编码器：从AE到VAE；
68 | * 8.5.2 逐点生成：PixelRNN和PixelCNN系列；
69 | 
70 | ### 第9章 通向智能之秘
71 | * 9.3.3 目前研究人员认为，NLP中的问题，从易到难，顺序如下：
72 | 	1. 文本搜索
73 | 	2. 文本分类、情感分析
74 | 	3. 翻译（到这里为止AI已经接近人类）
75 | 	4. 文本摘要
76 | 	5. 垂直领域问答
77 | 	6. 泛领域问答
78 | * 9.4 深度学习理论发展、前沿研究
79 | * 9.4.2 超越神经网络：Capsule与gcForest
80 | * 9.4.3 深度学习为什么泛化能力好
81 | * 研究人员发现，网络越大，越难通过训练到达全局极小值点，但是同时发现，网络越大，局部极小值点和全局极小值点的差距会越小；因此，网络越大，训练的过程会越简单，越稳定，因为到达任意一个局部极小值点酒足够了；


--------------------------------------------------------------------------------
/CV/note/chineseocr-ctpn-densenet.md:
--------------------------------------------------------------------------------
 1 | ## [chinese-ocr模型](https://github.com/YCG09/chinese_ocr)说明文档
 2 | > Fire 2018.10.18
 3 | 
 4 | ### 基本架构
 5 | * 文本检测：CTPN （输出包含文字的图片框）
 6 | * 文本识别：DenseNet + CTC (输出识别字符)
 7 | 
 8 | ### CTPN
 9 | ![cptn](http://5b0988e595225.cdn.sohucs.com/images/20171130/5466184cc9504f62adcf602a899aca83.jpeg)
10 | 
11 | 
12 | ![](./vgg1.jpg) ![](./vgg2.jpg) ![](./vgg3.jpg) ![](./vgg4.jpg)
13 | 
14 | 1. 输入图片 （1 * 3 * 224 * 224）
15 | 2. vgg提取空间特征，conv5输出：N * C * H * W （1 * 512 * 14 * 14）
16 | 3. rpn_conv层：每个点取周围3 * 3区域做滑窗，输出：N * 9C * H * W （1 * 4608 * 14 * 14）
17 | 4. 双向LSTM层提取序列特征（正反各128）：对3中的每一行当作一个数据输入，然后reshape成 N * 256 * H * W（1 * 256 * 14 * 14）
18 | 5. FC层：N * 512 * H * W（1 * 512 * 14 * 14）
19 | 6. RPN网络：
20 | 	* 回归：1 * 20 * 14 * 14  （20 = 2 * 10；x,y偏移）
21 | 	* 分类：1 * 20 * 14 * 14  （20 = 2 * 10；前景或者背景）
22 | 7. NMS非极大值抑制
23 | 
24 | 	![](./nms.jpg)
25 | 
26 | ### DenseNet+CTC
27 | 
28 | 1. densenet: 
29 | 	
30 | 	![](./densenet.jpg)
31 | 	
32 | 	* 每个denseblock中都连接每层的残差
33 | 	* block基本结构: BN+Relu+Conv+dropout
34 | 	* flatten成一维
35 | 	* 输出层 dense(5000)
36 | 
37 | 	![](./denseblock.jpg)
38 | 	
39 | 2. CTC-loss：损失函数可以解释为：给定样本后输出正确label的概率的乘积，最优化负对数。
40 | 
41 | 	![](./ctc.jpg)
42 | 	
43 | 	e.g. [1 * 8] -> -AA--B-C -> ABC
44 | 


--------------------------------------------------------------------------------
/CV/note/cptn.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fire717/Machine-Learning/46d417e4c872052857899331ff7f526f79110896/CV/note/cptn.jpg


--------------------------------------------------------------------------------
/CV/note/ctc.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fire717/Machine-Learning/46d417e4c872052857899331ff7f526f79110896/CV/note/ctc.jpg


--------------------------------------------------------------------------------
/CV/note/denseblock.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fire717/Machine-Learning/46d417e4c872052857899331ff7f526f79110896/CV/note/denseblock.jpg


--------------------------------------------------------------------------------
/CV/note/densenet.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fire717/Machine-Learning/46d417e4c872052857899331ff7f526f79110896/CV/note/densenet.jpg


--------------------------------------------------------------------------------
/CV/note/nms.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fire717/Machine-Learning/46d417e4c872052857899331ff7f526f79110896/CV/note/nms.jpg


--------------------------------------------------------------------------------
/CV/note/vgg1.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fire717/Machine-Learning/46d417e4c872052857899331ff7f526f79110896/CV/note/vgg1.jpg


--------------------------------------------------------------------------------
/CV/note/vgg2.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fire717/Machine-Learning/46d417e4c872052857899331ff7f526f79110896/CV/note/vgg2.jpg


--------------------------------------------------------------------------------
/CV/note/vgg3.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fire717/Machine-Learning/46d417e4c872052857899331ff7f526f79110896/CV/note/vgg3.jpg


--------------------------------------------------------------------------------
/CV/note/vgg4.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fire717/Machine-Learning/46d417e4c872052857899331ff7f526f79110896/CV/note/vgg4.jpg


--------------------------------------------------------------------------------
/DIY/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fire717/Machine-Learning/46d417e4c872052857899331ff7f526f79110896/DIY/.DS_Store


--------------------------------------------------------------------------------
/DIY/EM.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 12,
  6 |    "metadata": {
  7 |     "collapsed": true
  8 |    },
  9 |    "outputs": [],
 10 |    "source": [
 11 |     "######### 1.DATA ##########\n",
 12 |     "#例子：统计学习方法三硬币 A,B,C 正面出现概率分别为 a，p，q。\n",
 13 |     "#先扔A，正面选B，反面C。再扔选的硬币，正面为1，反面为0\n",
 14 |     "#已知观测结果，求a，p，b。\n",
 15 |     "import numpy as np\n",
 16 |     "\n",
 17 |     "y = [1,1,0,1,0,0,1,0,1,1]\n",
 18 |     "y = np.array(y)"
 19 |    ]
 20 |   },
 21 |   {
 22 |    "cell_type": "code",
 23 |    "execution_count": 15,
 24 |    "metadata": {
 25 |     "collapsed": true
 26 |    },
 27 |    "outputs": [],
 28 |    "source": [
 29 |     "######### 2.EM ###########\n",
 30 |     "def stepE(valuesOld,y):\n",
 31 |     "    a = valuesOld[0]\n",
 32 |     "    p = valuesOld[1]\n",
 33 |     "    q = valuesOld[2]\n",
 34 |     "    miu = (a*p**y*(1-p)**(1-y)) / ((a*p**y*(1-p)**(1-y)) + (1-a)*q**y*(1-q)**(1-y))\n",
 35 |     "    return miu\n",
 36 |     "    \n",
 37 |     "def setpM(miu,y):\n",
 38 |     "    a = np.mean(miu)\n",
 39 |     "    p = np.sum(miu*y) / np.sum(miu)\n",
 40 |     "    q = np.sum((1-miu)*y) / np.sum(1-miu)\n",
 41 |     "    valuesNew = np.array([a,p,q])\n",
 42 |     "    return valuesNew\n",
 43 |     "    \n",
 44 |     "def EM(init_values,y,tol = 0.0001,iterations = 1000):\n",
 45 |     "    valuesOld = np.array(init_values)\n",
 46 |     "    for i in range(iterations):\n",
 47 |     "        miu = stepE(valuesOld,y)\n",
 48 |     "        valuesNew = setpM(miu,y)\n",
 49 |     "        if np.sum(valuesNew-valuesOld) < tol:\n",
 50 |     "            break\n",
 51 |     "        else:\n",
 52 |     "            valuesOld = valuesNew\n",
 53 |     "    return valuesNew"
 54 |    ]
 55 |   },
 56 |   {
 57 |    "cell_type": "code",
 58 |    "execution_count": 17,
 59 |    "metadata": {},
 60 |    "outputs": [
 61 |     {
 62 |      "name": "stdout",
 63 |      "output_type": "stream",
 64 |      "text": [
 65 |       "[ 0.5  0.6  0.6]\n",
 66 |       "[ 0.40641711  0.53684211  0.64324324]\n"
 67 |      ]
 68 |     }
 69 |    ],
 70 |    "source": [
 71 |     "######### 3.test #########\n",
 72 |     "\n",
 73 |     "init_values1 = [0.5,0.5,0.5]\n",
 74 |     "output1 = EM(init_values1,y)\n",
 75 |     "print(output1)\n",
 76 |     "init_values2 = [0.4,0.6,0.7]\n",
 77 |     "output2 = EM(init_values2,y)\n",
 78 |     "print(output2)"
 79 |    ]
 80 |   },
 81 |   {
 82 |    "cell_type": "code",
 83 |    "execution_count": null,
 84 |    "metadata": {
 85 |     "collapsed": true
 86 |    },
 87 |    "outputs": [],
 88 |    "source": []
 89 |   }
 90 |  ],
 91 |  "metadata": {
 92 |   "kernelspec": {
 93 |    "display_name": "Python 3",
 94 |    "language": "python",
 95 |    "name": "python3"
 96 |   },
 97 |   "language_info": {
 98 |    "codemirror_mode": {
 99 |     "name": "ipython",
100 |     "version": 3
101 |    },
102 |    "file_extension": ".py",
103 |    "mimetype": "text/x-python",
104 |    "name": "python",
105 |    "nbconvert_exporter": "python",
106 |    "pygments_lexer": "ipython3",
107 |    "version": "3.5.4"
108 |   }
109 |  },
110 |  "nbformat": 4,
111 |  "nbformat_minor": 2
112 | }
113 | 


--------------------------------------------------------------------------------
/DIY/IOU.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | _IOU_threshold = 0.6
 4 | 
 5 | def IOU(Reframe,GTframe):
 6 |     """
 7 |     自定义函数，计算两矩形 IOU，传入为均为矩形对角线，（x,y）  坐标。
 8 |     """
 9 |     x1 = Reframe[0]
10 |     y1 = Reframe[1]
11 |     width1 = Reframe[2]-Reframe[0]
12 |     height1 = Reframe[3]-Reframe[1]
13 | 
14 |     x2 = GTframe[0]
15 |     y2 = GTframe[1]
16 |     width2 = GTframe[2]-GTframe[0]
17 |     height2 = GTframe[3]-GTframe[1]
18 | 
19 |     endx = max(x1+width1,x2+width2)
20 |     startx = min(x1,x2)
21 |     width = width1+width2-(endx-startx)
22 | 
23 |     endy = max(y1+height1,y2+height2)
24 |     starty = min(y1,y2)
25 |     height = height1+height2-(endy-starty)
26 | 
27 |     if width <=0 or height <= 0:
28 |         ratio = 0 # 重叠率为 0 
29 |     else:
30 |         Area = width*height # 两矩形相交面积
31 |         Area1 = width1*height1
32 |         Area2 = width2*height2
33 |         ratio = Area*1./(Area1+Area2-Area)
34 |     # return IOU
35 |     return ratio
36 | 
37 | def computeLoss(pre_box_list, label_box_list, R_weight = 1):
38 |     pre_box_list = np.array(pre_box_list)
39 |     label_box_list = np.array(label_box_list)
40 |     total_pre = len(pre_box_list)
41 |     total_label = len(label_box_list)
42 | 
43 |     # compute precise
44 |     p_count = 0.0
45 |     for box_pre in pre_box_list:
46 |         for box_label in label_box_list:
47 |             print(IOU(box_pre,box_label))
48 |             if IOU(box_pre,box_label) > _IOU_threshold:
49 |                 p_count += 1
50 |                 break
51 |     P = p_count / total_pre
52 | 
53 |     # compute recall
54 |     r_count = 0.0
55 |     for box_label in label_box_list:
56 |         for box_pre in pre_box_list:
57 |             if IOU(box_pre,box_label) > _IOU_threshold:
58 |                 r_count += 1
59 |                 break
60 |     R = r_count / total_label
61 | 
62 |     # compute F1-score
63 | 
64 |     F = 2*P*R*R_weight/(P+R*R_weight)
65 | 
66 |     return P,R,F
67 | 
68 | 
69 | if __name__ == '__main__':
70 |     
71 |     # 1.test iou
72 |     # tests_iou = [
73 |     #                 [ [[10,40,30,80],[10,40,30,80]], 1],
74 |     #                 [ [[10,40,30,80],[30,80,60,120]], 0]
75 |     #             ]
76 | 
77 |     # for t in tests_iou:
78 |     #     v,_,_ = IOU(t[0][0],t[0][1])
79 |     #     print(v, t[1])
80 |     
81 |     # 2.test compute
82 |     pre_box_list = [ [10,40,30,80], [30,80,60,120], [40,100,80,140], [42,100,80,140], [44,100,80,140] ]
83 |     label_box_list = [ [10,40,30,80], [30,80,60,120] ]
84 |     print(computeLoss(pre_box_list, label_box_list))
85 | 


--------------------------------------------------------------------------------
/DIY/Stacking.py:
--------------------------------------------------------------------------------
 1 | # code from https://dnc1994.com/2016/04/rank-10-percent-in-first-kaggle-competition/
 2 | # 自己加了点注释帮助理解，也方便自己以后使用
 3 | class Ensemble(object):
 4 |     def __init__(self, n_folds, stacker, base_models):
 5 |         self.n_folds = n_folds            #交叉验证集划分的折数
 6 |         self.stacker = stacker            #第二层stacking时使用的分类器
 7 |         self.base_models = base_models    #第一层的基本模型 们
 8 |     def fit_predict(self, X, y, T):
 9 |         X = np.array(X)   #train_x
10 |         y = np.array(y)   #train_y
11 |         T = np.array(T)   #test_x
12 |         folds = list(KFold(len(y), n_folds=self.n_folds, shuffle=True, random_state=2016))
13 |         #sklearn.cross_validation.KFold(n, n_folds=3, shuffle=False, random_state=None)
14 |         #http://scikit-learn.org/stable/modules/generated/sklearn.cross_validation.KFold.html
15 |         #这里只是生成了index 的迭代器，根据index取数据在后面进行
16 |         
17 |         S_train = np.zeros((X.shape[0], len(self.base_models))) #第二层的训练数据
18 |         S_test = np.zeros((T.shape[0], len(self.base_models)))
19 |         #数据条数不变，特征数变为模型数，因为每个模型产生一列
20 |         
21 |         for i, clf in enumerate(self.base_models):    #clf  Classification  
22 |             S_test_i = np.zeros((T.shape[0], len(folds)))
23 |             for j, (train_idx, test_idx) in enumerate(folds):
24 |                 X_train = X[train_idx]
25 |                 y_train = y[train_idx]
26 |                 X_holdout = X[test_idx]
27 |                 # y_holdout = y[test_idx]
28 |                 clf.fit(X_train, y_train)
29 |                 y_pred = clf.predict(X_holdout)[:]
30 |                 S_train[test_idx, i] = y_pred
31 |                 S_test_i[:, j] = clf.predict(T)[:]  #整个T的预测
32 |             S_test[:, i] = S_test_i.mean(1)    #按行求平均值 即axis=1. 矩阵变成一列后加入S_test中
33 |             
34 |         self.stacker.fit(S_train, y)
35 |         y_pred = self.stacker.predict(S_test)[:]
36 |         return y_pred
37 |         
38 |  '''
39 |  据说获奖选手往往会使用比这复杂得多的 Ensemble，会出现三层、四层甚至五层，不同的层数之间有各种交互，
40 |  还有将经过不同的 Preprocessing 和不同的 Feature Engineering 的数据用 Ensemble 组合起来的做法。
41 |  '''
42 | 


--------------------------------------------------------------------------------
/DIY/ex1data1.txt:
--------------------------------------------------------------------------------
 1 | 6.1101,17.592
 2 | 5.5277,9.1302
 3 | 8.5186,13.662
 4 | 7.0032,11.854
 5 | 5.8598,6.8233
 6 | 8.3829,11.886
 7 | 7.4764,4.3483
 8 | 8.5781,12
 9 | 6.4862,6.5987
10 | 5.0546,3.8166
11 | 5.7107,3.2522
12 | 14.164,15.505
13 | 5.734,3.1551
14 | 8.4084,7.2258
15 | 5.6407,0.71618
16 | 5.3794,3.5129
17 | 6.3654,5.3048
18 | 5.1301,0.56077
19 | 6.4296,3.6518
20 | 7.0708,5.3893
21 | 6.1891,3.1386
22 | 20.27,21.767
23 | 5.4901,4.263
24 | 6.3261,5.1875
25 | 5.5649,3.0825
26 | 18.945,22.638
27 | 12.828,13.501
28 | 10.957,7.0467
29 | 13.176,14.692
30 | 22.203,24.147
31 | 5.2524,-1.22
32 | 6.5894,5.9966
33 | 9.2482,12.134
34 | 5.8918,1.8495
35 | 8.2111,6.5426
36 | 7.9334,4.5623
37 | 8.0959,4.1164
38 | 5.6063,3.3928
39 | 12.836,10.117
40 | 6.3534,5.4974
41 | 5.4069,0.55657
42 | 6.8825,3.9115
43 | 11.708,5.3854
44 | 5.7737,2.4406
45 | 7.8247,6.7318
46 | 7.0931,1.0463
47 | 5.0702,5.1337
48 | 5.8014,1.844
49 | 11.7,8.0043
50 | 5.5416,1.0179
51 | 7.5402,6.7504
52 | 5.3077,1.8396
53 | 7.4239,4.2885
54 | 7.6031,4.9981
55 | 6.3328,1.4233
56 | 6.3589,-1.4211
57 | 6.2742,2.4756
58 | 5.6397,4.6042
59 | 9.3102,3.9624
60 | 9.4536,5.4141
61 | 8.8254,5.1694
62 | 5.1793,-0.74279
63 | 21.279,17.929
64 | 14.908,12.054
65 | 18.959,17.054
66 | 7.2182,4.8852
67 | 8.2951,5.7442
68 | 10.236,7.7754
69 | 5.4994,1.0173
70 | 20.341,20.992
71 | 10.136,6.6799
72 | 7.3345,4.0259
73 | 6.0062,1.2784
74 | 7.2259,3.3411
75 | 5.0269,-2.6807
76 | 6.5479,0.29678
77 | 7.5386,3.8845
78 | 5.0365,5.7014
79 | 10.274,6.7526
80 | 5.1077,2.0576
81 | 5.7292,0.47953
82 | 5.1884,0.20421
83 | 6.3557,0.67861
84 | 9.7687,7.5435
85 | 6.5159,5.3436
86 | 8.5172,4.2415
87 | 9.1802,6.7981
88 | 6.002,0.92695
89 | 5.5204,0.152
90 | 5.0594,2.8214
91 | 5.7077,1.8451
92 | 7.6366,4.2959
93 | 5.8707,7.2029
94 | 5.3054,1.9869
95 | 8.2934,0.14454
96 | 13.394,9.0551
97 | 5.4369,0.61705
98 | 


--------------------------------------------------------------------------------
/DIY/lenses.txt:
--------------------------------------------------------------------------------
 1 | young	myope	no	reduced	no lenses
 2 | young	myope	no	normal	soft
 3 | young	myope	yes	reduced	no lenses
 4 | young	myope	yes	normal	hard
 5 | young	hyper	no	reduced	no lenses
 6 | young	hyper	no	normal	soft
 7 | young	hyper	yes	reduced	no lenses
 8 | young	hyper	yes	normal	hard
 9 | pre	myope	no	reduced	no lenses
10 | pre	myope	no	normal	soft
11 | pre	myope	yes	reduced	no lenses
12 | pre	myope	yes	normal	hard
13 | pre	hyper	no	reduced	no lenses
14 | pre	hyper	no	normal	soft
15 | pre	hyper	yes	reduced	no lenses
16 | pre	hyper	yes	normal	no lenses
17 | presbyopic	myope	no	reduced	no lenses
18 | presbyopic	myope	no	normal	no lenses
19 | presbyopic	myope	yes	reduced	no lenses
20 | presbyopic	myope	yes	normal	hard
21 | presbyopic	hyper	no	reduced	no lenses
22 | presbyopic	hyper	no	normal	soft
23 | presbyopic	hyper	yes	reduced	no lenses
24 | presbyopic	hyper	yes	normal	no lenses
25 | 


--------------------------------------------------------------------------------
/DM/note/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fire717/Machine-Learning/46d417e4c872052857899331ff7f526f79110896/DM/note/.DS_Store


--------------------------------------------------------------------------------
/DM/note/img/fe.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fire717/Machine-Learning/46d417e4c872052857899331ff7f526f79110896/DM/note/img/fe.jpg


--------------------------------------------------------------------------------
/NLP/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fire717/Machine-Learning/46d417e4c872052857899331ff7f526f79110896/NLP/.DS_Store


--------------------------------------------------------------------------------
/NLP/codes/re.ipynb:
--------------------------------------------------------------------------------
 1 | {
 2 |  "cells": [
 3 |   {
 4 |    "cell_type": "code",
 5 |    "execution_count": 10,
 6 |    "metadata": {},
 7 |    "outputs": [],
 8 |    "source": [
 9 |     "import re\n",
10 |     "emoticons_str = r\"\"\"\n",
11 |     "    (?:\n",
12 |     "    [:=;] # 眼睛\n",
13 |     "    [oO\\-]? # 鼻子\n",
14 |     "    [D\\)\\]\\(\\]/\\\\OpP] # 嘴\n",
15 |     "    )\"\"\"\n",
16 |     "regex_str = [\n",
17 |     "    emoticons_str,\n",
18 |     "    r'<[^>]+>', # HTML tags\n",
19 |     "    r'(?:@[\\w_]+)', # @某人\n",
20 |     "    r\"(?:\\#+[\\w_]+[\\w\\'_\\-]*[\\w_]+)\", # 话题标签\n",
21 |     "    r'http[s]?://(?:[a-z]|[0-9]|[$-_@.&amp;+]|[!*\\(\\),]|(?:%[0-9a-f][0-9a-f]))+',\n",
22 |     "    # URLs\n",
23 |     "    r'(?:(?:\\d+,?)+(?:\\.?\\d+)?)', # 数字\n",
24 |     "    r\"(?:[a-z][a-z'\\-_]+[a-z])\", # 含有 - 和 ‘ 的单词\n",
25 |     "    r'(?:[\\w_]+)', # 其他\n",
26 |     "    r'(?:\\S)' # 其他\n",
27 |     "    ]"
28 |    ]
29 |   },
30 |   {
31 |    "cell_type": "code",
32 |    "execution_count": 13,
33 |    "metadata": {},
34 |    "outputs": [
35 |     {
36 |      "name": "stdout",
37 |      "output_type": "stream",
38 |      "text": [
39 |       "['RT', '@angelababy', ':', 'love', 'you', 'baby', '!', ':D', 'http://ah.love', '#168cm']\n"
40 |      ]
41 |     }
42 |    ],
43 |    "source": [
44 |     "tokens_re = re.compile(r'('+'|'.join(regex_str)+')', re.VERBOSE | re.IGNORECASE)\n",
45 |     "emoticon_re = re.compile(r'^'+emoticons_str+'$', re.VERBOSE | re.IGNORECASE)\n",
46 |     "\n",
47 |     "def tokenize(s):\n",
48 |     "    return tokens_re.findall(s)\n",
49 |     "def preprocess(s, lowercase=False):\n",
50 |     "    tokens = tokenize(s)\n",
51 |     "    if lowercase:\n",
52 |     "        tokens = [token if emoticon_re.search(token) else token.lower() for token in tokens]\n",
53 |     "    return tokens\n",
54 |     "tweet = 'RT @angelababy: love you baby! :D http://ah.love #168cm'\n",
55 |     "print(preprocess(tweet))\n",
56 |     "# ['RT', '@angelababy', ':', 'love', 'you', 'baby',\n",
57 |     "# ’!', ':D', 'http://ah.love', '#168cm']"
58 |    ]
59 |   },
60 |   {
61 |    "cell_type": "code",
62 |    "execution_count": null,
63 |    "metadata": {
64 |     "collapsed": true
65 |    },
66 |    "outputs": [],
67 |    "source": []
68 |   }
69 |  ],
70 |  "metadata": {
71 |   "kernelspec": {
72 |    "display_name": "Python 3",
73 |    "language": "python",
74 |    "name": "python3"
75 |   },
76 |   "language_info": {
77 |    "codemirror_mode": {
78 |     "name": "ipython",
79 |     "version": 3
80 |    },
81 |    "file_extension": ".py",
82 |    "mimetype": "text/x-python",
83 |    "name": "python",
84 |    "nbconvert_exporter": "python",
85 |    "pygments_lexer": "ipython3",
86 |    "version": "3.5.2rc1"
87 |   }
88 |  },
89 |  "nbformat": 4,
90 |  "nbformat_minor": 2
91 | }
92 | 


--------------------------------------------------------------------------------
/NLP/knowledge.md:
--------------------------------------------------------------------------------
 1 | 
 2 | ## NLP知识点
 3 | 
 4 | 
 5 | 
 6 | ### 1.word2vec
 7 | * CBOW: 训练输入是某一个特征词的上下文相关的词对应的词向量，而输出就是这特定的一个词的词向量。由于CBOW使用的是词袋模型，因此这8个词都是平等的，也就是不考虑他们和我们关注的词之间的距离大小，只要在我们上下文之内即可。
 8 | * Skip-Gram: 输入是特定的一个词的词向量，而输出是特定词对应的上下文词向量。
 9 | 
10 | ### 2.分词
11 | 	中文分词的基本方法可以分为基于语法规则的方法、基于词典的方法和基于统计的方法。
12 |     基于语法规则的分词法基本思想是在分词的同时进行句法、语义分析, 利用句法信息和语义信息来进行词性标注, 以解决分词歧义现象。因为现有的语法知识、句法规则十分笼统、复杂, 基于语法和规则的分词法所能达到的精确度远远还不能令人满意, 目前这种分词系统应用较少。
13 |     在基于词典的方法中，可以进一步分为最大匹配法，最大概率法，最短路径法等。最大匹配法指的是按照一定顺序选取字符串中的若干个字当做一个词，去词典中查找。根据扫描方式可细分为：正向最大匹配，反向最大匹配，双向最大匹配，最小切分。最大概率法指的是一个待切分的汉字串可能包含多种分词结果，将其中概率最大的那个作为该字串的分词结果。最短路径法指的是在词图上选择一条词数最少的路径。
14 |     基于统计的分词法的基本原理是根据字符串在语料库中出现的统计频率来决定其是否构成词。词是字的组合，相邻的字同时出现的次数越多, 就越有可能构成一个词。因此字与字相邻共现的频率或概率能够较好的反映它们成为词的可信度。常用的方法有HMM（隐马尔科夫模型），MAXENT（最大熵模型），MEMM（最大熵隐马尔科夫模型），CRF（条件随机场）。
15 | 
16 | 
17 | 
18 | 
19 | 
20 | 
21 | 
22 | 
23 | 
24 | 
25 | 


--------------------------------------------------------------------------------
/NLP/tools/NLPIR/Start.py:
--------------------------------------------------------------------------------
 1 | import pynlpir
 2 | 
 3 | pynlpir.open()
 4 | s = '今天天气真是好呀'
 5 | segments = pynlpir.segment(s)
 6 | stop_cx =['modal particle', 'punctuation mark', 'noun of locality','particle','numeral']
 7 | for segment in segments:
 8 |     #if segment[1] not in stop_cx:
 9 |         print(segment[0], '\t', segment[1])
10 | print('---')
11 | key_words = pynlpir.get_key_words(s, weighted=True)
12 | for key_word in key_words:
13 |     print(key_word[0], '\t', key_word[1])
14 |     
15 | pynlpir.close()
16 | 
17 | '''
18 | 今天 	 time word
19 | 天气 	 noun
20 | 真是 	 adverb
21 | 好 	 adjective
22 | 呀 	 modal particle
23 | ---
24 | 今天 	 2.2
25 | 天气 	 2.0
26 | '''
27 | 


--------------------------------------------------------------------------------
/NLP/tools/gensim/load_w2v_ch.py:
--------------------------------------------------------------------------------
 1 | #coding:utf-8
 2 | 
 3 | #py3 
 4 | #https://ai.tencent.com/ailab/nlp/embedding.html
 5 | 
 6 | from gensim.models.keyedvectors import KeyedVectors
 7 | 
 8 | 
 9 | file = "Tencent_AILab_ChineseEmbedding.txt"
10 | 
11 | 
12 | # with open(file,"r",encoding="utf-8") as f:
13 | #     print(f.readline()) # 8824330 200
14 | #     # print(f.readline())
15 | #     # print(f.readline())
16 | 
17 | 
18 | wv_from_text = KeyedVectors.load_word2vec_format(file, binary=False)
19 | 
20 | wv_from_text.most_similar(u"足球")
21 | """
22 | [('足球运动', 0.8081263303756714), ('足球文化', 0.7661516070365906), ('足球发展', 0.7645934820175171), ('职业足球', 0.7609031200408936), ('足球教育', 0.7551054954528809), ('热爱足球', 0.7491205930709839), ('足球技术', 0.7459214925765991), ('踢球', 0.7441200017929077), ('世界足球', 0.7434529066085815), ('足球项目', 0.7409517765045166)]
23 | 
24 | """
25 | 


--------------------------------------------------------------------------------
/NLP/tools/gensim/process_wiki_data.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # -*- coding: utf-8 -*-
 3 | # process_wiki_data.py 用于解析XML，将XML的wiki数据转换为text格式
 4 | #执行命令：python process_wiki_data.py zhwiki-latest-pages-articles.xml.bz2 wiki.zh.text
 5 | import logging
 6 | import os.path
 7 | import sys
 8 | from gensim.corpora import WikiCorpus
 9 | if __name__ == '__main__':
10 |     program = os.path.basename(sys.argv[0])
11 |     logger = logging.getLogger(program)
12 |     logging.basicConfig(format='%(asctime)s: %(levelname)s: %(message)s')
13 |     logging.root.setLevel(level=logging.INFO)
14 |     logger.info("running %s" % ' '.join(sys.argv))
15 |     # check and process input arguments
16 |     if len(sys.argv) < 3:
17 |         print(globals()['__doc__'] % locals())
18 |         sys.exit(1)
19 |     inp, outp = sys.argv[1:3]
20 |     space = " "
21 |     i = 0
22 |     output = open(outp, 'w',encoding='utf-8')
23 |     wiki = WikiCorpus(inp, lemmatize=False, dictionary={})
24 |     for text in wiki.get_texts():
25 |         output.write(space.join(text) + "\n")
26 |         i = i + 1
27 |         if (i % 10000 == 0):
28 |             logger.info("Saved " + str(i) + " articles")
29 |     output.close()
30 |     logger.info("Finished Saved " + str(i) + " articles")


--------------------------------------------------------------------------------
/NLP/tools/gensim/readme.md:
--------------------------------------------------------------------------------
1 | ### 资料
2 | * [官方文档](https://radimrehurek.com/gensim/apiref.html)
3 | * [word2vec API](https://radimrehurek.com/gensim/models/word2vec.html)
4 | 
5 | ### 实践
6 | * [word2vec训练中文模型](https://www.zybuluo.com/hanxiaoyang/note/472184) （ [将XML的wiki数据转text](./process_wiki_data.py) [jieba完成分词](../jieba/read_save.ipynb) [用word2vec工具训练](./train_word2vec_model.py) [测试模型效果](./test_word2vec.ipynb) ）
7 | * [加载腾讯开源的中文word2vec词向量语料库](./load_w2v_ch.py)
8 | 


--------------------------------------------------------------------------------
/NLP/tools/gensim/test_word2vec.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 2,
  6 |    "metadata": {},
  7 |    "outputs": [
  8 |     {
  9 |      "data": {
 10 |       "text/plain": [
 11 |        "[('足球运动', 0.6127325296401978),\n",
 12 |        " ('排球', 0.5376268625259399),\n",
 13 |        " ('冰球', 0.5342495441436768),\n",
 14 |        " ('板球', 0.5301790833473206),\n",
 15 |        " ('手球', 0.5166541337966919),\n",
 16 |        " ('籃球', 0.5052165389060974),\n",
 17 |        " ('英超球', 0.499561607837677),\n",
 18 |        " ('女足', 0.4948025941848755),\n",
 19 |        " ('足球联赛', 0.491238534450531),\n",
 20 |        " ('美式足球', 0.49103665351867676)]"
 21 |       ]
 22 |      },
 23 |      "execution_count": 2,
 24 |      "metadata": {},
 25 |      "output_type": "execute_result"
 26 |     }
 27 |    ],
 28 |    "source": [
 29 |     "import gensim\n",
 30 |     "model = gensim.models.Word2Vec.load(r\"F:\\data\\wiki.zh.text.model\")\n",
 31 |     "model.most_similar(u\"足球\")"
 32 |    ]
 33 |   },
 34 |   {
 35 |    "cell_type": "code",
 36 |    "execution_count": 4,
 37 |    "metadata": {},
 38 |    "outputs": [
 39 |     {
 40 |      "data": {
 41 |       "text/plain": [
 42 |        "[('女人', 0.7703686952590942),\n",
 43 |        " ('傻瓜', 0.5394862294197083),\n",
 44 |        " ('家伙', 0.5176622271537781),\n",
 45 |        " ('女孩', 0.5025584101676941),\n",
 46 |        " ('撒嬌', 0.4929904341697693),\n",
 47 |        " ('小伙子', 0.4917035698890686),\n",
 48 |        " ('女明星', 0.4843180179595947),\n",
 49 |        " ('爸爸', 0.4842095673084259),\n",
 50 |        " ('女孩子', 0.48044753074645996),\n",
 51 |        " ('老公', 0.4802494943141937)]"
 52 |       ]
 53 |      },
 54 |      "execution_count": 4,
 55 |      "metadata": {},
 56 |      "output_type": "execute_result"
 57 |     }
 58 |    ],
 59 |    "source": [
 60 |     "model.most_similar(u\"男人\")"
 61 |    ]
 62 |   },
 63 |   {
 64 |    "cell_type": "code",
 65 |    "execution_count": 5,
 66 |    "metadata": {},
 67 |    "outputs": [
 68 |     {
 69 |      "data": {
 70 |       "text/plain": [
 71 |        "[('鞋子', 0.7696906924247742),\n",
 72 |        " ('衣物', 0.7572050094604492),\n",
 73 |        " ('裙子', 0.7095688581466675),\n",
 74 |        " ('大衣', 0.7061837911605835),\n",
 75 |        " ('外套', 0.7023261785507202),\n",
 76 |        " ('外衣', 0.6756951808929443),\n",
 77 |        " ('內褲', 0.6667477488517761),\n",
 78 |        " ('褲子', 0.6629331707954407),\n",
 79 |        " ('上衣', 0.6550877690315247),\n",
 80 |        " ('西装', 0.6357579231262207)]"
 81 |       ]
 82 |      },
 83 |      "execution_count": 5,
 84 |      "metadata": {},
 85 |      "output_type": "execute_result"
 86 |     }
 87 |    ],
 88 |    "source": [
 89 |     "model.most_similar(u\"衣服\")"
 90 |    ]
 91 |   },
 92 |   {
 93 |    "cell_type": "code",
 94 |    "execution_count": null,
 95 |    "metadata": {
 96 |     "collapsed": true
 97 |    },
 98 |    "outputs": [],
 99 |    "source": []
100 |   }
101 |  ],
102 |  "metadata": {
103 |   "kernelspec": {
104 |    "display_name": "Python 3",
105 |    "language": "python",
106 |    "name": "python3"
107 |   },
108 |   "language_info": {
109 |    "codemirror_mode": {
110 |     "name": "ipython",
111 |     "version": 3
112 |    },
113 |    "file_extension": ".py",
114 |    "mimetype": "text/x-python",
115 |    "name": "python",
116 |    "nbconvert_exporter": "python",
117 |    "pygments_lexer": "ipython3",
118 |    "version": "3.5.4"
119 |   }
120 |  },
121 |  "nbformat": 4,
122 |  "nbformat_minor": 2
123 | }
124 | 


--------------------------------------------------------------------------------
/NLP/tools/gensim/train_word2vec_model.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # -*- coding: utf-8 -*-
 3 | # train_word2vec_model.py用于训练模型
 4 | import logging
 5 | import os.path
 6 | import sys
 7 | import multiprocessing
 8 | from gensim.corpora import WikiCorpus
 9 | from gensim.models import Word2Vec
10 | from gensim.models.word2vec import LineSentence
11 | if __name__ == '__main__':
12 |     program = os.path.basename(sys.argv[0])
13 |     logger = logging.getLogger(program)
14 |     logging.basicConfig(format='%(asctime)s: %(levelname)s: %(message)s')
15 |     logging.root.setLevel(level=logging.INFO)
16 |     logger.info("running %s" % ' '.join(sys.argv))
17 |     # check and process input arguments
18 |     if len(sys.argv) < 4:
19 |         print globals()['__doc__'] % locals()
20 |         sys.exit(1)
21 |     inp, outp1, outp2 = sys.argv[1:4]
22 |     model = Word2Vec(LineSentence(inp), size=400, window=5, min_count=5,
23 |             workers=multiprocessing.cpu_count())
24 |     # trim unneeded model memory = use(much) less RAM
25 |     #model.init_sims(replace=True)
26 |     model.save(outp1)
27 |     model.save_word2vec_format(outp2, binary=False)


--------------------------------------------------------------------------------
/NLP/tools/jieba/cixing.py:
--------------------------------------------------------------------------------
 1 | import jieba.posseg as pseg
 2 | words = pseg.cut("迅速落实整改，报道称 河南省2017年护士执业资格考试已于2017年5月8日结束 模块消防站全景器材室多功能室图书室厨房")
 3 | for w in words:
 4 |     print("%s %s" %(w.word, w.flag))
 5 |     
 6 |     
 7 | '''
 8 | #output:
 9 | 迅速 ad
10 | 落实 a
11 | 整改 v
12 | ， x
13 | 报道 v
14 | 称 v
15 | ...
16 | '''
17 | 


--------------------------------------------------------------------------------
/NLP/tools/jieba/jieba_cut.ipynb:
--------------------------------------------------------------------------------
 1 | {
 2 |  "cells": [
 3 |   {
 4 |    "cell_type": "code",
 5 |    "execution_count": 9,
 6 |    "metadata": {},
 7 |    "outputs": [
 8 |     {
 9 |      "name": "stdout",
10 |      "output_type": "stream",
11 |      "text": [
12 |       "<generator object Tokenizer.cut at 0x000000000475CBF8>\n",
13 |       "Full Mode: 我/ 来到/ 北京/ 清华/ 清华大学/ 华大/ 大学\n",
14 |       "Default Mode: 我/ 来到/ 北/ 京/ 清华大学\n",
15 |       "他, 来到, 了, 网易, 杭研, 大厦\n",
16 |       "小明, 硕士, 毕业, 于, 中国, 科学, 学院, 科学院, 中国科学院, 计算, 计算所, ，, 后, 在, 日本, 京都, 大学, 日本京都大学, 深造\n"
17 |      ]
18 |     }
19 |    ],
20 |    "source": [
21 |     "##################### 分词的不同模式 #######################\n",
22 |     "import jieba\n",
23 |     "seg_list = jieba.cut(\"我来到北京清华大学\", cut_all=True)\n",
24 |     "print(seg_list) #直接输入不行 因为不是list 而是一个类似于迭代器的 enerator\n",
25 |     "print(\"Full Mode:\", \"/ \".join(seg_list)) # 全模式\n",
26 |     "\n",
27 |     "seg_list = jieba.cut(\"我来到北京清华大学\", cut_all=False)\n",
28 |     "print(\"Default Mode:\", \"/ \".join(seg_list)) # 精确模式\n",
29 |     "\n",
30 |     "seg_list = jieba.cut(\"他来到了网易杭研大厦\") # 默认是精确模式\n",
31 |     "print(\", \".join(seg_list))\n",
32 |     "\n",
33 |     "seg_list = jieba.cut_for_search(\"小明硕士毕业于中国科学院计算所，后在日本京都大学深造\")# 搜索引擎模式\n",
34 |     "print(\", \".join(seg_list))"
35 |    ]
36 |   },
37 |   {
38 |    "cell_type": "code",
39 |    "execution_count": null,
40 |    "metadata": {
41 |     "collapsed": true
42 |    },
43 |    "outputs": [],
44 |    "source": []
45 |   }
46 |  ],
47 |  "metadata": {
48 |   "kernelspec": {
49 |    "display_name": "Python 3",
50 |    "language": "python",
51 |    "name": "python3"
52 |   },
53 |   "language_info": {
54 |    "codemirror_mode": {
55 |     "name": "ipython",
56 |     "version": 3
57 |    },
58 |    "file_extension": ".py",
59 |    "mimetype": "text/x-python",
60 |    "name": "python",
61 |    "nbconvert_exporter": "python",
62 |    "pygments_lexer": "ipython3",
63 |    "version": "3.5.2rc1"
64 |   }
65 |  },
66 |  "nbformat": 4,
67 |  "nbformat_minor": 2
68 | }
69 | 


--------------------------------------------------------------------------------
/NLP/tools/jieba/jieba_cut_ngram.py:
--------------------------------------------------------------------------------
 1 | #coding:utf-8
 2 | import jieba
 3 | 
 4 | ori_data = u'刘超是一个喜欢学习的好学生，你看，他正在学习呢。除了学习刘超还喜欢打游戏。'
 5 | print ori_data
 6 | 
 7 | def reform(sentence):
 8 |     #如果是以“。”结束的则将“。”删掉
 9 |     if sentence[-1] == u"。":
10 |         sentence=sentence[:-1]
11 |     #添加起始符BOS和终止符EOS 
12 |     tmp = [u'、' ,u'，',u'：',u'。']
13 |     for i in xrange(len(sentence)):
14 |         if sentence[i] in tmp:
15 |             sentence=sentence[:i]+'EEEBBB'+sentence[i+1:]
16 |     sentence2="BBB"+sentence+"EEE"
17 |     return sentence2
18 | 
19 | data1 = reform(ori_data)
20 | print data1
21 | 
22 | #分词并统计词频
23 | def segmentation(sentence,lists=[],dicts={}):
24 |     jieba.suggest_freq(u"BBB", True)
25 |     jieba.suggest_freq(u"EEE", True)
26 |     jieba.suggest_freq(u"刘超", True)
27 |     #分词
28 |     sentence = jieba.cut(sentence,cut_all=False,HMM=False)
29 |     #组合
30 |     format_sentence=",".join(sentence)
31 |     lists=format_sentence.split(",")     
32 |     for word in lists:
33 |         if word not in dicts:
34 |             dicts[word]=1
35 |         else:
36 |             dicts[word]+=1 
37 |     return lists
38 | 
39 | 
40 | dict1 = {}
41 | t = segmentation(data1,lists=[],dicts=dict1)
42 | for x in t:
43 |     print x.encode('utf-8')
44 | #输出词频，同时去除一些杂词  
45 | badwords = []
46 | #badwords = [u'的',u'是',u'呢',u'还',u'BBB',u'EEE']
47 | for key in dict1.keys():
48 |     if key not in badwords:
49 |         print key.encode('utf-8'),':',dict1[key]
50 |         
51 |         
52 | test1 = u'刘超喜欢学习'
53 | test2 = u'学习喜欢刘超'
54 | 
55 | def segmentation(sentence,lists=[]):
56 |     jieba.suggest_freq(u"刘超", True)
57 |     #分词
58 |     sentence = jieba.cut(sentence,cut_all=False,HMM=False)
59 |     #组合
60 |     format_sentence=",".join(sentence)
61 |     lists=format_sentence.split(",")     
62 |     return lists
63 | 
64 | test1 = segmentation(test1)
65 | test2 = segmentation(test2)
66 | 
67 | #比较两个数列，二元语法
68 | def compareList(ori_list,test_list):
69 |     #申请空间
70 |     count_list=[0]*(len(test_list))
71 |     #遍历测试的字符串
72 |     for i in range(0,len(test_list)-1):
73 |         #遍历语料字符串，且因为是二元语法，不用比较语料字符串的最后一个字符
74 |         for j in range(0,len(ori_list)-2):                
75 |             #如果测试的第一个词和语料的第一个词相等则比较第二个词
76 |             if test_list[i]==ori_list[j]:
77 |                 if test_list[i+1]==ori_list[j+1]:
78 |                     count_list[i]+=1
79 |     return count_list
80 | 
81 | print compareList(t,test1)
82 | print compareList(t,test2)
83 | 
84 | #计算概率    
85 | def probability(test_list,count_list,ori_dict):
86 |     flag=0
87 |     #概率值为p
88 |     p=1
89 |     for key in test_list: 
90 |         #数据平滑处理：加1法
91 |         p*=(float(count_list[flag]+1)/float(ori_dict[key]+1))
92 |         flag+=1
93 |     return p
94 | print probability(test1,compareList(t,test1),dict1)
95 | print probability(test2,compareList(t,test2),dict1)
96 | 


--------------------------------------------------------------------------------
/NLP/tools/jieba/read_save.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 1,
  6 |    "metadata": {},
  7 |    "outputs": [
  8 |     {
  9 |      "name": "stderr",
 10 |      "output_type": "stream",
 11 |      "text": [
 12 |       "Building prefix dict from the default dictionary ...\n",
 13 |       "Loading model from cache C:\\Users\\Fire\\AppData\\Local\\Temp\\jieba.cache\n"
 14 |      ]
 15 |     },
 16 |     {
 17 |      "name": "stdout",
 18 |      "output_type": "stream",
 19 |      "text": [
 20 |       "start cut.\n"
 21 |      ]
 22 |     },
 23 |     {
 24 |      "name": "stderr",
 25 |      "output_type": "stream",
 26 |      "text": [
 27 |       "Loading model cost 0.684 seconds.\n",
 28 |       "Prefix dict has been built succesfully.\n"
 29 |      ]
 30 |     },
 31 |     {
 32 |      "name": "stdout",
 33 |      "output_type": "stream",
 34 |      "text": [
 35 |       "End file.\n"
 36 |      ]
 37 |     },
 38 |     {
 39 |      "data": {
 40 |       "text/plain": [
 41 |        "0"
 42 |       ]
 43 |      },
 44 |      "execution_count": 1,
 45 |      "metadata": {},
 46 |      "output_type": "execute_result"
 47 |     }
 48 |    ],
 49 |    "source": [
 50 |     "import codecs\n",
 51 |     "import jieba  \n",
 52 |     "import jieba.analyse  \n",
 53 |     "  \n",
 54 |     "#Read file and cut  \n",
 55 |     "def read_file_cut():  \n",
 56 |     "    fileName = r\"F:\\data\\wiki.zh.text\"  \n",
 57 |     "    source = open(fileName, 'r',encoding='utf-8') \n",
 58 |     "    line = source.readline()  \n",
 59 |     "    line = line.rstrip('\\n')  \n",
 60 |     "    \n",
 61 |     "    result = codecs.open('wiki.zh.text.seg', 'w', 'utf-8')  \n",
 62 |     "    \n",
 63 |     "    print('start cut.')\n",
 64 |     "    while line!=\"\":  \n",
 65 |     "        seglist = jieba.cut(line,cut_all=False)  #精确模式  \n",
 66 |     "        output = ' '.join(list(seglist))         #空格拼接  \n",
 67 |     "        result.write(output + '\\r\\n')  \n",
 68 |     "        line = source.readline()   \n",
 69 |     "    print('End file.') \n",
 70 |     "    source.close()  \n",
 71 |     "    result.close() \n",
 72 |     "    return 0\n",
 73 |     "        \n",
 74 |     "read_file_cut()        "
 75 |    ]
 76 |   },
 77 |   {
 78 |    "cell_type": "code",
 79 |    "execution_count": null,
 80 |    "metadata": {
 81 |     "collapsed": true
 82 |    },
 83 |    "outputs": [],
 84 |    "source": []
 85 |   }
 86 |  ],
 87 |  "metadata": {
 88 |   "kernelspec": {
 89 |    "display_name": "Python 3",
 90 |    "language": "python",
 91 |    "name": "python3"
 92 |   },
 93 |   "language_info": {
 94 |    "codemirror_mode": {
 95 |     "name": "ipython",
 96 |     "version": 3
 97 |    },
 98 |    "file_extension": ".py",
 99 |    "mimetype": "text/x-python",
100 |    "name": "python",
101 |    "nbconvert_exporter": "python",
102 |    "pygments_lexer": "ipython3",
103 |    "version": "3.5.4"
104 |   }
105 |  },
106 |  "nbformat": 4,
107 |  "nbformat_minor": 2
108 | }
109 | 


--------------------------------------------------------------------------------
/NLP/tools/jieba/readme.md:
--------------------------------------------------------------------------------
 1 | ### 常用
 2 | * 读取自定义词典
 3 | 
 4 | ```
 5 | jieba.load_userdict(r'./data/user_dict.txt') # file_name为自定义词典的路径  
 6 | #（格式：每行：词 [词频] [词性]） 中括号代表可选
 7 | ```
 8 | 
 9 | ### 实践
10 | * [二元语法模型](./jieba_cut_ngram.py)
11 | * [jieba分词不同模式](./jieba_cut.ipynb)
12 | * [读取文本分词并存储](./read_save.ipynb)
13 | * [词性标注](./cixing.py)
14 | * [TF-IDF](./if-idf.py)
15 | 


--------------------------------------------------------------------------------
/NLP/tools/nltk/readme.md:
--------------------------------------------------------------------------------
 1 | ### 资料
 2 | * [官方文档](http://www.nltk.org/api/nltk.html)
 3 | 
 4 | ### 常用功能
 5 | * [处理频率问题 .FreqDist](./func/nltk_FreqDist.ipynb)
 6 | * [分词tokenize & Text & 处理HTML](./func/tokenize_text_html.ipynb)
 7 | 
 8 | ### 实践
 9 | * [词形归一化 & 词性标注](./practice/wordsNormalization.ipynb)
10 | * [情感分析](./practice/Sentiment_analysis.ipynb)
11 | * [文本相似性](./practice/Text_similarity.ipynb)
12 | * [TF-IDF](./practice/TF-IDF.ipynb)


--------------------------------------------------------------------------------
/NLP/tools/word2vec/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fire717/Machine-Learning/46d417e4c872052857899331ff7f526f79110896/NLP/tools/word2vec/.DS_Store


--------------------------------------------------------------------------------
/NLP/tools/word2vec/readme.md:
--------------------------------------------------------------------------------
 1 | ### 常用
 2 | * 使用自定义语料：
 3 | 	 1. 分词，去除停用词
 4 | 	 2. 所有词以空格键或tab隔开写入一个文件中
 5 | 
 6 | 
 7 | 
 8 | ### 实践
 9 | * [基本示例](word2vec_start.ipynb)
10 | 


--------------------------------------------------------------------------------
/Others/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fire717/Machine-Learning/46d417e4c872052857899331ff7f526f79110896/Others/.DS_Store


--------------------------------------------------------------------------------