├── .gitignore
├── README.md
├── computer_buy.csv
├── decision_example.py
├── hierarchical_example.py
├── kmeans_example.py
├── kmeanspp.py
├── knn_example.py
├── linear_example.py
├── logistic_example.py
├── logistic_set.txt
├── network_sgd.py
├── nn_example.py
├── regression_relativity.py
├── result.dot
├── svm_example.py
└── tf_demos
    ├── activation.py
    ├── board.py
    ├── board_simple.py
    ├── classification.py
    ├── cnn.py
    ├── demo.py
    ├── input.py
    ├── layer.py
    ├── optimizer.py
    ├── overfitting.py
    ├── simple.py
    ├── variable.py
    └── viewable.py


/.gitignore:
--------------------------------------------------------------------------------
  1 | # Created by .ignore support plugin (hsz.mobi)
  2 | ### Python template
  3 | # Byte-compiled / optimized / DLL files
  4 | __pycache__/
  5 | *.py[cod]
  6 | *$py.class
  7 | 
  8 | # C extensions
  9 | *.so
 10 | 
 11 | # Distribution / packaging
 12 | .Python
 13 | env/
 14 | build/
 15 | develop-eggs/
 16 | downloads/
 17 | eggs/
 18 | .eggs/
 19 | lib/
 20 | lib64/
 21 | parts/
 22 | sdist/
 23 | var/
 24 | *.egg-info/
 25 | .installed.cfg
 26 | *.egg
 27 | 
 28 | # PyInstaller
 29 | #  Usually these files are written by a python script from a template
 30 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 31 | *.manifest
 32 | *.spec
 33 | 
 34 | # Installer logs
 35 | pip-log.txt
 36 | pip-delete-this-directory.txt
 37 | 
 38 | # Unit test / coverage reports
 39 | htmlcov/
 40 | .tox/
 41 | .coverage
 42 | .coverage.*
 43 | .cache
 44 | nosetests.xml
 45 | coverage.xml
 46 | *,cover
 47 | .hypothesis/
 48 | 
 49 | # Translations
 50 | *.mo
 51 | *.pot
 52 | 
 53 | # Django stuff:
 54 | *.log
 55 | local_settings.py
 56 | 
 57 | # Flask stuff:
 58 | instance/
 59 | .webassets-cache
 60 | 
 61 | # Scrapy stuff:
 62 | .scrapy
 63 | 
 64 | # Sphinx documentation
 65 | docs/_build/
 66 | 
 67 | # PyBuilder
 68 | target/
 69 | 
 70 | # IPython Notebook
 71 | .ipynb_checkpoints
 72 | 
 73 | # pyenv
 74 | .python-version
 75 | 
 76 | # celery beat schedule file
 77 | celerybeat-schedule
 78 | 
 79 | # dotenv
 80 | .env
 81 | 
 82 | # virtualenv
 83 | venv/
 84 | ENV/
 85 | 
 86 | # Spyder project settings
 87 | .spyderproject
 88 | 
 89 | # Rope project settings
 90 | .ropeproject
 91 | ### Example user template template
 92 | ### Example user template
 93 | 
 94 | # IntelliJ project files
 95 | .idea
 96 | *.iml
 97 | out
 98 | gen
 99 | test.py
100 | logs/


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | ## 说明
2 | 本项目用于学习人工智能，提供各个算法的应用实例，有的算法甚至提供手动实现的方式。


--------------------------------------------------------------------------------
/computer_buy.csv:
--------------------------------------------------------------------------------
 1 | RID,age,income,student,credit_rating,class_buys_computer
 2 | 1,youth,hight,no,fair,no
 3 | 2,youth,hight,no,excellent,no
 4 | 3,middle_aged,hight,no,fair,yes
 5 | 4,senior,medium,no,fair,yes
 6 | 5,senior,low,yes,fair,yes
 7 | 6,senior,low,yes,excellent,no
 8 | 7,middle_aged,low,yes,excellent,yes
 9 | 8,youth,medium,no,fair,no
10 | 9,youth,low,yes,fair,yes
11 | 10,senior,medium,yes,fair,yes
12 | 11,youth,medium,yes,excellent,yes
13 | 12,middle_aged,medium,no,excellent,yes
14 | 13,middle_aged,hight,yes,fair,yes
15 | 14,senior,medium,no,excellent,no
16 | 


--------------------------------------------------------------------------------
/decision_example.py:
--------------------------------------------------------------------------------
  1 | # coding: utf-8
  2 | import math
  3 | import collections
  4 | 
  5 | def entropy(rows: list) -> float:
  6 |     """
  7 |     计算数组的熵
  8 |     :param rows:
  9 |     :return:
 10 |     """
 11 |     result = collections.Counter()
 12 |     result.update(rows)
 13 |     rows_len = len(rows)
 14 |     assert rows_len   # 数组长度不能为0
 15 |     # 开始计算熵值
 16 |     ent = 0.0
 17 |     for r in result.values():
 18 |         p = float(r) / rows_len
 19 |         ent -= p * math.log2(p)
 20 |     return ent
 21 | 
 22 | def condition_entropy(future_list: list, result_list: list) -> float:
 23 |     """
 24 |     计算条件熵
 25 |     """
 26 |     entropy_dict = collections.defaultdict(list)  # {0:[], 1:[]}
 27 |     for future, value in zip(future_list, result_list):
 28 |         entropy_dict[future].append(value)
 29 |     # 计算条件熵
 30 |     ent = 0.0
 31 |     future_len = len(future_list)
 32 |     for value in entropy_dict.values():
 33 |         p = len(value) / future_len * entropy(value)
 34 |         ent += p
 35 | 
 36 |     return ent
 37 | 
 38 | def gain(future_list: list, result_list: list) -> float:
 39 |     """
 40 |     获取某特征的信息增益
 41 |     """
 42 |     info = entropy(result_list)
 43 |     info_condition = condition_entropy(future_list, result_list)
 44 |     return info - info_condition
 45 | 
 46 | class DecisionNode(object):
 47 |     """
 48 |     决策树的节点
 49 |     """
 50 |     def __init__(self, col=-1, data_set=None, labels=None, results=None, tb=None, fb=None):
 51 |         self.has_calc_index = []    # 已经计算过的特征索引
 52 |         self.col = col              # col 是待检验的判断条件，对应列索引值
 53 |         self.data_set = data_set    # 节点的 待检测数据
 54 |         self.labels = labels        # 对应当前列必须匹配的值
 55 |         self.results = results      # 保存的是针对当前分支的结果，有值则表示该点是叶子节点
 56 |         self.tb = tb                # 当信息增益最高的特征为True时的子树
 57 |         self.fb = fb                # 当信息增益最高的特征为False时的子树
 58 | 
 59 | def if_split_end(result_list: list) -> bool:
 60 |     """
 61 |     递归的结束条件，每个分支的结果集都是相同的分类
 62 |     :param result_list:
 63 |     :return:
 64 |     """
 65 |     result = collections.Counter()
 66 |     result.update(result_list)
 67 |     return len(result) == 1
 68 | 
 69 | def choose_best_future(data_set: list, labels: list, ignore_index: list) -> int:
 70 |     """
 71 |     从特征向量中筛选出最好的特征，返回它的特征索引
 72 |     """
 73 |     result_dict = {}  # { 索引: 信息增益值 }
 74 |     future_num = len(data_set[0])
 75 |     for i in range(future_num):
 76 |         if i in ignore_index: # 如果已经计算过了
 77 |             continue
 78 |         future_list = [x[i] for x in data_set]
 79 |         result_dict[i] = gain(future_list, labels) # 获取信息增益
 80 |     # 排序后选择第一个
 81 |     ret = sorted(result_dict.items(), key=lambda x: x[1], reverse=True)
 82 |     return ret[0][0]
 83 | 
 84 | class DecisionTreeClass():
 85 |     def __init__(self):
 86 |         self.future_num = 0      # 特征
 87 |         self.tree_root = None    # 决策树根节点
 88 | 
 89 |     def build_tree(self, node: DecisionNode):
 90 |         # 递归条件结束
 91 |         if if_split_end(node.labels):
 92 |             node.results = node.labels[0] # 表明是叶子节点
 93 |             return
 94 |         #print(node.data_set)
 95 |         # 不是叶子节点，开始创建分支
 96 |         best_index = choose_best_future(node.data_set, node.labels, node.has_calc_index)
 97 |         node.col = best_index
 98 | 
 99 |         # 根据信息增益最大进行划分
100 |         # 左子树
101 |         tb_index = [i for i, value in enumerate(node.data_set) if value[best_index]]
102 |         tb_data_set     = [node.data_set[x] for x in tb_index]
103 |         tb_data_labels  = [node.labels[x] for x in tb_index]
104 |         tb_node = DecisionNode(data_set=tb_data_set, labels=tb_data_labels)
105 |         tb_node.has_calc_index = list(node.has_calc_index)
106 |         tb_node.has_calc_index.append(best_index)
107 |         node.tb = tb_node
108 | 
109 |         # 右子树
110 |         fb_index = [i for i, value in enumerate(node.data_set) if not value[best_index]]
111 |         fb_data_set = [node.data_set[x] for x in fb_index]
112 |         fb_data_labels = [node.labels[x] for x in fb_index]
113 |         fb_node = DecisionNode(data_set=fb_data_set, labels=fb_data_labels)
114 |         fb_node.has_calc_index = list(node.has_calc_index)
115 |         fb_node.has_calc_index.append(best_index)
116 |         node.fb = fb_node
117 | 
118 |         # 递归创建子树
119 |         if tb_index:
120 |             self.build_tree(node.tb)
121 |         if fb_index:
122 |             self.build_tree(node.fb)
123 | 
124 |     def clear_tree_example_data(self, node: DecisionNode):
125 |         """
126 |         清理tree的训练数据
127 |         :return:
128 |         """
129 |         del node.has_calc_index
130 |         del node.labels
131 |         del node.data_set
132 |         if node.tb:
133 |             self.clear_tree_example_data(node.tb)
134 |         if node.fb:
135 |             self.clear_tree_example_data(node.fb)
136 | 
137 |     def fit(self, x: list, y: list):
138 |         """
139 |         x是训练集，二维数组。y是结果集，一维数组
140 |         """
141 |         self.future_num = len(x[0])
142 |         self.tree_root = DecisionNode(data_set=x, labels=y)
143 |         self.build_tree(self.tree_root)
144 |         self.clear_tree_example_data(self.tree_root)
145 | 
146 |     def _predict(self, data_test: list, node: DecisionNode):
147 |         if node.results:
148 |             return node.results
149 |         col = node.col
150 |         if data_test[col]:
151 |             return self._predict(data_test, node.tb)
152 |         else:
153 |             return self._predict(data_test, node.fb)
154 | 
155 |     def predict(self, data_test):
156 |         """
157 |         预测
158 |         """
159 |         return self._predict(data_test, self.tree_root)
160 | 
161 | if __name__ == "__main__":
162 |     dummy_x = [
163 |         [0, 0, 1, 0, 1, 1, 0, 0, 1, 0, ],
164 |         [0, 0, 1, 1, 0, 1, 0, 0, 1, 0, ],
165 |         [1, 0, 0, 0, 1, 1, 0, 0, 1, 0, ],
166 |         [0, 1, 0, 0, 1, 0, 0, 1, 1, 0, ],
167 |         [0, 1, 0, 0, 1, 0, 1, 0, 0, 1, ],
168 |         [0, 1, 0, 1, 0, 0, 1, 0, 0, 1, ],
169 |         [1, 0, 0, 1, 0, 0, 1, 0, 0, 1, ],
170 |         [0, 0, 1, 0, 1, 0, 0, 1, 1, 0, ],
171 |         [0, 0, 1, 0, 1, 0, 1, 0, 0, 1, ],
172 |         [0, 1, 0, 0, 1, 0, 0, 1, 0, 1, ],
173 |         [0, 0, 1, 1, 0, 0, 0, 1, 0, 1, ],
174 |         [1, 0, 0, 1, 0, 0, 0, 1, 1, 0, ],
175 |         [1, 0, 0, 0, 1, 1, 0, 0, 0, 1, ],
176 |         [0, 1, 0, 1, 0, 0, 0, 1, 1, 0, ],
177 |     ]
178 |     dummy_y = [0, 0, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 0]
179 | 
180 |     tree = DecisionTreeClass()
181 |     tree.fit(dummy_x, dummy_y)
182 | 
183 |     test_row = [1, 0, 0, 0, 1, 1, 0, 0, 1, 0, ]
184 |     print(tree.predict(test_row))


--------------------------------------------------------------------------------
/hierarchical_example.py:
--------------------------------------------------------------------------------
  1 | # coding: utf-8
  2 | # 层次聚类
  3 | import math
  4 | import numpy as np
  5 | from sklearn import datasets
  6 | from sklearn import cluster
  7 | 
  8 | def euler_distance(point1: np.ndarray, point2: list) -> float:
  9 |     """
 10 |     计算两点之间的欧拉距离，支持多维
 11 |     """
 12 |     distance = 0.0
 13 |     for a, b in zip(point1, point2):
 14 |         distance += math.pow(a - b, 2)
 15 |     return math.sqrt(distance)
 16 | 
 17 | class ClusterNode(object):
 18 |     def __init__(self, vec, left=None, right=None, distance=-1, id=None, count=1):
 19 |         """
 20 |         :param vec: 保存两个数据聚类后形成新的中心
 21 |         :param left: 左节点
 22 |         :param right:  右节点
 23 |         :param distance: 两个节点的距离
 24 |         :param id: 用来标记哪些节点是计算过的
 25 |         :param count: 这个节点的叶子节点个数
 26 |         """
 27 |         self.vec = vec
 28 |         self.left = left
 29 |         self.right = right
 30 |         self.distance = distance
 31 |         self.id = id
 32 |         self.count = count
 33 | 
 34 | class Hierarchical(object):
 35 |     def __init__(self, k = 1):
 36 |         assert k > 0
 37 |         self.k = k
 38 |         self.labels = None
 39 |     def fit(self, x):
 40 |         nodes = [ClusterNode(vec=v, id=i) for i,v in enumerate(x)]
 41 |         distances = {}
 42 |         point_num, future_num = np.shape(x)  # 特征的维度
 43 |         self.labels = [ -1 ] * point_num
 44 |         currentclustid = -1
 45 |         while len(nodes) > self.k:
 46 |             min_dist = math.inf
 47 |             nodes_len = len(nodes)
 48 |             closest_part = None  # 表示最相似的两个聚类
 49 |             for i in range(nodes_len - 1):
 50 |                 for j in range(i + 1, nodes_len):
 51 |                     # 为了不重复计算距离，保存在字典内
 52 |                     d_key = (nodes[i].id, nodes[j].id)
 53 |                     if d_key not in distances:
 54 |                         distances[d_key] = euler_distance(nodes[i].vec, nodes[j].vec)
 55 |                     d = distances[d_key]
 56 |                     if d < min_dist:
 57 |                         min_dist = d
 58 |                         closest_part = (i, j)
 59 |             # 合并两个聚类
 60 |             part1, part2 = closest_part
 61 |             node1, node2 = nodes[part1], nodes[part2]
 62 |             new_vec = [ (node1.vec[i] * node1.count + node2.vec[i] * node2.count ) / (node1.count + node2.count)
 63 |                         for i in range(future_num)]
 64 |             new_node = ClusterNode(vec=new_vec,
 65 |                                    left=node1,
 66 |                                    right=node2,
 67 |                                    distance=min_dist,
 68 |                                    id=currentclustid,
 69 |                                    count=node1.count + node2.count)
 70 |             currentclustid -= 1
 71 |             del nodes[part2], nodes[part1]   # 一定要先del索引较大的
 72 |             nodes.append(new_node)
 73 |         self.nodes = nodes
 74 |         self.calc_label()
 75 | 
 76 |     def calc_label(self):
 77 |         """
 78 |         调取聚类的结果
 79 |         """
 80 |         for i, node in enumerate(self.nodes):
 81 |             # 将节点的所有叶子节点都分类
 82 |             self.leaf_traversal(node, i)
 83 | 
 84 |     def leaf_traversal(self, node: ClusterNode, label):
 85 |         """
 86 |         递归遍历叶子节点
 87 |         """
 88 |         if node.left == None and node.right == None:
 89 |             self.labels[node.id] = label
 90 |         if node.left:
 91 |             self.leaf_traversal(node.left, label)
 92 |         if node.right:
 93 |             self.leaf_traversal(node.right, label)
 94 | 
 95 | 
 96 | 
 97 | iris = datasets.load_iris()
 98 | 
 99 | my = Hierarchical(4)
100 | my.fit(iris.data)
101 | print(np.array(my.labels))
102 | 
103 | sk = cluster.AgglomerativeClustering(4)
104 | sk.fit(iris.data)
105 | print(sk.labels_)
106 | 


--------------------------------------------------------------------------------
/kmeans_example.py:
--------------------------------------------------------------------------------
 1 | # coding: utf-8
 2 | import math
 3 | import random
 4 | import numpy as np
 5 | import collections
 6 | from sklearn import cluster, datasets
 7 | 
 8 | 
 9 | def euler_distance(point1: list, point2: list) -> float:
10 |     """
11 |     计算两点之间的欧拉距离，支持多维
12 |     """
13 |     distance = 0.0
14 |     for a, b in zip(point1, point2):
15 |         distance += math.pow(a - b, 2)
16 |     return math.sqrt(distance)
17 | 
18 | class K_means(object):
19 |     def __init__(self, k: int, max_iter=10):
20 |         self.k = k
21 |         self.max_iter = 10      # 最大迭代次数
22 |         self.data_set = None    # 训练集
23 |         self.labels = None      # 结果集
24 | 
25 |     def init_centroids(self) -> list:
26 |         """
27 |         从训练集中随机选择 k 个点作为质点
28 |         """
29 |         point_num = np.shape(self.data_set)[0]
30 |         random_index = random.sample(list(range(point_num)), self.k)
31 |         centroids = [self.data_set[i] for i in random_index]
32 |         return centroids
33 | 
34 |     def fit(self, data_set):
35 |         self.data_set = data_set
36 |         point_num = np.shape(data_set)[0]
37 |         self.labels = [ -1 ] * point_num            # 初始化结果集
38 |         centroids = self.init_centroids()           # 初始化随机质点
39 |         old_centroids = []                          # 上一次迭代的质点
40 |         step = 0                                    # 当前迭代次数
41 |         while not self.should_stop(old_centroids, centroids, step):
42 |             old_centroids = np.copy(centroids)
43 |             step += 1
44 |             for i, point in enumerate(data_set):
45 |                 self.labels[i] = self.get_closest_index(point, centroids)
46 |             centroids = self.update_centroids()
47 | 
48 |     def get_closest_index(self, point, centroids):
49 |         min_dist = math.inf # 初始设为无穷大
50 |         label = -1
51 |         for i, centroid in enumerate(centroids):
52 |             dist = euler_distance(centroid, point)
53 |             if dist < min_dist:
54 |                 min_dist = dist
55 |                 label = i
56 |         return label
57 | 
58 |     def update_centroids(self):
59 |         """
60 |         取各类的中心设为新的质点
61 |         """
62 |         collect = collections.defaultdict(list)
63 |         for i, label in enumerate(self.labels):
64 |             collect[label].append(self.data_set[i])
65 | 
66 |         centroids = []
67 |         for i in range(self.k):
68 |             centroids.append(np.mean(collect[i], axis=0))
69 |         return centroids
70 | 
71 |     def should_stop(self, old_centroids, centroids, step) -> bool:
72 |         """
73 |         判断是否停止迭代，停止的条件是 新分类结果与原来一致或者已达到设置的迭代次数
74 |         """
75 |         if step > self.max_iter:
76 |             return True
77 |         return np.array_equal(old_centroids, centroids)
78 | 
79 | 
80 | if __name__ == "__main__":
81 |     iris = datasets.load_iris()
82 | 
83 |     my_k = K_means(4)
84 |     my_k.fit(iris.data)
85 |     print(np.array(my_k.labels))
86 | 
87 |     sk = cluster.KMeans(4)
88 |     sk.fit(iris.data)
89 |     print(sk.labels_)
90 | 


--------------------------------------------------------------------------------
/kmeanspp.py:
--------------------------------------------------------------------------------
 1 | # coding: utf-8
 2 | import math
 3 | import random
 4 | from sklearn import datasets
 5 | 
 6 | def euler_distance(point1: list, point2: list) -> float:
 7 |     """
 8 |     计算两点之间的欧拉距离，支持多维
 9 |     """
10 |     distance = 0.0
11 |     for a, b in zip(point1, point2):
12 |         distance += math.pow(a - b, 2)
13 |     return math.sqrt(distance)
14 | 
15 | def get_closest_dist(point, centroids):
16 |     min_dist = math.inf  # 初始设为无穷大
17 |     for i, centroid in enumerate(centroids):
18 |         dist = euler_distance(centroid, point)
19 |         if dist < min_dist:
20 |             min_dist = dist
21 |     return min_dist
22 | 
23 | def kpp_centers(data_set: list, k: int) -> list:
24 |     """
25 |     从数据集中返回 k 个对象可作为质心
26 |     """
27 |     cluster_centers = []
28 |     cluster_centers.append(random.choice(data_set))
29 |     d = [0 for _ in range(len(data_set))]
30 |     for _ in range(1, k):
31 |         total = 0.0
32 |         for i, point in enumerate(data_set):
33 |             d[i] = get_closest_dist(point, cluster_centers) # 与最近一个聚类中心的距离
34 |             total += d[i]
35 |         total *= random.random()
36 |         for i, di in enumerate(d): # 轮盘法选出下一个聚类中心；
37 |             total -= di
38 |             if total > 0:
39 |                 continue
40 |             cluster_centers.append(data_set[i])
41 |             break
42 |     return cluster_centers
43 | 
44 | if __name__ == "__main__":
45 |     iris = datasets.load_iris()
46 |     print(kpp_centers(iris.data, 4))


--------------------------------------------------------------------------------
/knn_example.py:
--------------------------------------------------------------------------------
 1 | # coding: utf-8
 2 | import math
 3 | import collections
 4 | import numpy as np
 5 | 
 6 | def euler_distance(point1: list, point2: list) -> float:
 7 |     """
 8 |     计算两点之间的欧拉距离，支持多维
 9 |     """
10 |     distance = 0.0
11 |     for a, b in zip(point1, point2):
12 |         distance += math.pow(a - b, 2)
13 |     return math.sqrt(distance)
14 | 
15 | 
16 | class KNeighborsClass(object):
17 |     def __init__(self, n_neighbors=5):
18 |         self.n_neighbors = n_neighbors
19 | 
20 |     def fit(self, data_set, labels):
21 |         self.data_set = data_set
22 |         self.labels = labels
23 | 
24 |     def predict(self, test_row):
25 |         dist = []
26 |         for v in self.data_set:
27 |             dist.append(euler_distance(v, test_row))
28 |         dist = np.array(dist)
29 |         sorted_dist_index = np.argsort(dist) # 根据元素的值从大到小对元素进行排序，返回下标
30 | 
31 |         # 根据K值选出分类结果, ['A', 'B', 'B', 'A', ...]
32 |         class_list = [ self.labels[ sorted_dist_index[i] ] for i in range(self.n_neighbors)]
33 |         result_dict = collections.Counter(class_list)   # 计算各个分类出现的次数
34 |         ret = sorted(result_dict.items(), key=lambda x: x[1], reverse=True) # 采用多数表决，即排序后的第一个分类
35 |         return ret[0][0]
36 | 
37 | if __name__ == "__main__":
38 |     from sklearn import datasets
39 |     iris = datasets.load_iris()
40 |     knn = KNeighborsClass(n_neighbors=5)
41 |     knn.fit(iris.data, iris.target)
42 |     predict = knn.predict([0.1, 0.2, 0.3, 0.4])
43 |     print(predict)  # output: 1


--------------------------------------------------------------------------------
/linear_example.py:
--------------------------------------------------------------------------------
 1 | # coding: utf-8
 2 | import numpy as np
 3 | from sklearn import linear_model
 4 | 
 5 | # 一元线性回归
 6 | class SimpleLinearRegression(object):
 7 |     """
 8 |     简单线性回归方程，即一元线性回归,只有一个自变量，估值函数为: y = b0 + b1 * x
 9 |     """
10 |     def __init__(self):
11 |         self.b0 = 0
12 |         self.b1 = 0
13 |     def fit(self, x: list, y: list):
14 |         n = len(x)
15 |         x_mean = sum(x) / n
16 |         y_mean = sum(y) / n
17 |         dinominator = 0
18 |         numerator = 0
19 |         for xi, yi in zip(x, y):
20 |             numerator += (xi - x_mean) * (yi - y_mean)
21 |             dinominator += (xi - x_mean) ** 2
22 |         self.b1 = numerator / dinominator
23 |         self.b0 = y_mean - self.b1 * x_mean
24 | 
25 |     def pridict(self, x):
26 |         return self.b0 + self.b1 * x
27 | 
28 | 
29 | class MyLinearRegression(object):
30 |     def __init__(self):
31 |         self.b = []
32 | 
33 |     def fit(self, x: list, y: list):
34 |         # 为每条数据添加 1
35 |         point_num, future_num = np.shape(x)
36 |         tmpx = np.ones(shape=(point_num, future_num + 1))
37 |         tmpx[:,1 :] = x
38 |         x_mat = np.mat(tmpx)
39 |         y_mat = np.mat(y).T
40 |         xT = x_mat.T
41 |         self.b = (xT * x_mat).I * xT * y_mat
42 | 
43 |     def predict(self, x):
44 |         return np.mat([1] + x) * self.b
45 | 
46 | if __name__ == "__main__":
47 |     x = [
48 |         [100.0, 4.0],
49 |         [50.0, 3.0],
50 |         [100.0, 4.0],
51 |         [100.0, 2.0],
52 |         [50.0, 2.0],
53 |         [80.0, 2.0],
54 |         [75.0, 3.0],
55 |         [65.0, 4.0],
56 |         [90.0, 3.0],
57 |         [90.0, 2.0]
58 |     ]
59 | 
60 |     y = [9.3, 4.8, 8.9, 6.5, 4.2, 6.2, 7.4, 6.0, 7.6, 6.1]
61 | 
62 |     test_row = [50, 3]
63 |     linear = MyLinearRegression()
64 |     linear.fit(x, y)
65 |     print(linear.predict(test_row)) # [[ 4.95830457]]
66 | 
67 |     sk = linear_model.LinearRegression()
68 |     sk.fit(x, y)
69 |     print(sk.predict([test_row])) # [ 4.95830457]
70 | 
71 | 
72 | 
73 | 
74 | 


--------------------------------------------------------------------------------
/logistic_example.py:
--------------------------------------------------------------------------------
 1 | #coding: utf-8
 2 | 
 3 | import numpy as np
 4 | import matplotlib.pyplot as plt
 5 | from sklearn.linear_model import LogisticRegression
 6 | 
 7 | 
 8 | def genData():
 9 |     train_x = []
10 |     train_y = []
11 |     with open("logistic_set.txt") as f:
12 |         for line in f.readlines():
13 |             line = line.strip().split()
14 |             num = len(line)
15 |             train_x.append([float(line[x]) for x in range(num - 1)])
16 |             train_y.append(float(line[-1]))
17 |     return train_x, train_y
18 | 
19 | def sigmoid(x):
20 |     return 1.0 / (1 + np.exp(-x))
21 | 
22 | class LogisticReg(object):
23 |     def __init__(self):
24 |         pass
25 |     def fit(self, x, y, learn_rate=0.0005):
26 |         point_num, future_num = np.shape(x)
27 |         new_x = np.ones(shape=(point_num, future_num + 1)) # 多一列x0，全部设为1
28 |         new_x[:, 1:] = x
29 |         self.theta = np.ones(shape=(future_num + 1, 1))
30 | 
31 |         x_mat = np.mat(new_x)
32 |         y_mat = np.mat(y).T
33 |         J = []
34 |         for i in range(800):
35 |             h = sigmoid(np.dot(x_mat, self.theta))
36 |             # 打印损失函数
37 |             cost = np.sum([ a * -np.log(b) + (1 - a) * -np.log(1 - b)  for a, b in zip(y_mat, h)])
38 |             J.append(cost)
39 |             self.theta -= learn_rate * x_mat.T * (h - y_mat)
40 |         plt.plot(J)
41 |         plt.show()
42 | 
43 |     def predict(self, row):
44 |         row = np.array([1] + row)
45 |         result = sigmoid(np.dot(row, self.theta))
46 |         return 1 if result > 0.5 else 0
47 | 
48 | if __name__ == "__main__":
49 |     mylog = LogisticReg()
50 |     x, y = genData()
51 |     test_row = [0.6, 12]
52 |     mylog.fit(x, y)
53 |     print(mylog.theta)
54 |     print("LogisticReg predict:", mylog.predict(test_row))
55 | 
56 |     sk = LogisticRegression()
57 |     sk.fit(x, y)
58 |     print(sk.intercept_)
59 |     print(sk.coef_)
60 |     print("sklearn LogisticRegression predict:", sk.predict([test_row]))


--------------------------------------------------------------------------------
/logistic_set.txt:
--------------------------------------------------------------------------------
  1 | -0.017612	14.053064	0
  2 | -1.395634	4.662541	1
  3 | -0.752157	6.538620	0
  4 | -1.322371	7.152853	0
  5 | 0.423363	11.054677	0
  6 | 0.406704	7.067335	1
  7 | 0.667394	12.741452	0
  8 | -2.460150	6.866805	1
  9 | 0.569411	9.548755	0
 10 | -0.026632	10.427743	0
 11 | 0.850433	6.920334	1
 12 | 1.347183	13.175500	0
 13 | 1.176813	3.167020	1
 14 | -1.781871	9.097953	0
 15 | -0.566606	5.749003	1
 16 | 0.931635	1.589505	1
 17 | -0.024205	6.151823	1
 18 | -0.036453	2.690988	1
 19 | -0.196949	0.444165	1
 20 | 1.014459	5.754399	1
 21 | 1.985298	3.230619	1
 22 | -1.693453	-0.557540	1
 23 | -0.576525	11.778922	0
 24 | -0.346811	-1.678730	1
 25 | -2.124484	2.672471	1
 26 | 1.217916	9.597015	0
 27 | -0.733928	9.098687	0
 28 | -3.642001	-1.618087	1
 29 | 0.315985	3.523953	1
 30 | 1.416614	9.619232	0
 31 | -0.386323	3.989286	1
 32 | 0.556921	8.294984	1
 33 | 1.224863	11.587360	0
 34 | -1.347803	-2.406051	1
 35 | 1.196604	4.951851	1
 36 | 0.275221	9.543647	0
 37 | 0.470575	9.332488	0
 38 | -1.889567	9.542662	0
 39 | -1.527893	12.150579	0
 40 | -1.185247	11.309318	0
 41 | -0.445678	3.297303	1
 42 | 1.042222	6.105155	1
 43 | -0.618787	10.320986	0
 44 | 1.152083	0.548467	1
 45 | 0.828534	2.676045	1
 46 | -1.237728	10.549033	0
 47 | -0.683565	-2.166125	1
 48 | 0.229456	5.921938	1
 49 | -0.959885	11.555336	0
 50 | 0.492911	10.993324	0
 51 | 0.184992	8.721488	0
 52 | -0.355715	10.325976	0
 53 | -0.397822	8.058397	0
 54 | 0.824839	13.730343	0
 55 | 1.507278	5.027866	1
 56 | 0.099671	6.835839	1
 57 | -0.344008	10.717485	0
 58 | 1.785928	7.718645	1
 59 | -0.918801	11.560217	0
 60 | -0.364009	4.747300	1
 61 | -0.841722	4.119083	1
 62 | 0.490426	1.960539	1
 63 | -0.007194	9.075792	0
 64 | 0.356107	12.447863	0
 65 | 0.342578	12.281162	0
 66 | -0.810823	-1.466018	1
 67 | 2.530777	6.476801	1
 68 | 1.296683	11.607559	0
 69 | 0.475487	12.040035	0
 70 | -0.783277	11.009725	0
 71 | 0.074798	11.023650	0
 72 | -1.337472	0.468339	1
 73 | -0.102781	13.763651	0
 74 | -0.147324	2.874846	1
 75 | 0.518389	9.887035	0
 76 | 1.015399	7.571882	0
 77 | -1.658086	-0.027255	1
 78 | 1.319944	2.171228	1
 79 | 2.056216	5.019981	1
 80 | -0.851633	4.375691	1
 81 | -1.510047	6.061992	0
 82 | -1.076637	-3.181888	1
 83 | 1.821096	10.283990	0
 84 | 3.010150	8.401766	1
 85 | -1.099458	1.688274	1
 86 | -0.834872	-1.733869	1
 87 | -0.846637	3.849075	1
 88 | 1.400102	12.628781	0
 89 | 1.752842	5.468166	1
 90 | 0.078557	0.059736	1
 91 | 0.089392	-0.715300	1
 92 | 1.825662	12.693808	0
 93 | 0.197445	9.744638	0
 94 | 0.126117	0.922311	1
 95 | -0.679797	1.220530	1
 96 | 0.677983	2.556666	1
 97 | 0.761349	10.693862	0
 98 | -2.168791	0.143632	1
 99 | 1.388610	9.341997	0
100 | 0.317029	14.739025	0


--------------------------------------------------------------------------------
/network_sgd.py:
--------------------------------------------------------------------------------
  1 | #coding: utf-8
  2 | import random
  3 | import collections
  4 | import numpy as np
  5 | from sklearn import datasets
  6 | from sklearn.model_selection import train_test_split
  7 | from sklearn.preprocessing import LabelBinarizer
  8 | 
  9 | def logistic(x):
 10 |     return 1.0 / (1 + np.exp(-x))
 11 | 
 12 | def logistic_deriv(x):
 13 |     """
 14 |     逻辑函数的导数
 15 |     """
 16 |     fx = logistic(x)
 17 |     return fx * (1 - fx)
 18 | 
 19 | class Network(object):
 20 |     def __init__(self, layers: list):
 21 |         self.num_layers = len(layers)       # 神经网络层数
 22 |         self.activation = logistic
 23 |         self.activation_deriv = logistic_deriv
 24 |         # 初始化随机权重
 25 |         self.weights = []
 26 |         for i in range(self.num_layers - 1):
 27 |             self.weights.append(np.random.randn(layers[i], layers[i + 1]))
 28 | 
 29 |         # 偏向
 30 |         self.bias = []
 31 |         for i in range(1, self.num_layers):
 32 |             self.bias.append(np.random.randn(layers[i]))
 33 | 
 34 |     def feedforward(self, a):
 35 |         for w, b in zip(self.weights, self.bias):
 36 |             a = self.activation(np.dot(a, w) + b)
 37 |         return a
 38 | 
 39 |     def SGD(self, train_data: list, epochs: int, mini_batch_size=100, eta=0.5, test_data=None):
 40 |         n = len(train_data)
 41 |         for i in range(epochs):
 42 |             random.shuffle(train_data)
 43 |             mini_batches = [ train_data[k:k + mini_batch_size]
 44 |                 for k in range(0, n, mini_batch_size)]
 45 |             for mini_batch in mini_batches:
 46 |                 self.update_mini_batch(mini_batch, eta)
 47 |             if test_data:
 48 |                 print("Epoch {0}: {1} / {2}".format(i, self.evaluate(test_data), len(test_data)))
 49 | 
 50 |     def update_mini_batch(self, mini_batch, eta):
 51 |         nabla_w = [np.zeros(w.shape) for w in self.weights]
 52 |         nabla_b = [np.zeros(b.shape) for b in self.weights]
 53 |         for x, y in mini_batch:
 54 |             delta_nabla_b, delta_nabla_w = self.backprop(x, y)
 55 |             nabla_b = [nb + dnb for nb, dnb in zip(nabla_b, delta_nabla_b)]
 56 |             nabla_w = [nw + dnw for nw, dnw in zip(nabla_w, delta_nabla_w)]
 57 |         self.weights = [w - (eta / len(mini_batch)) * nw
 58 |                         for w, nw in zip(self.weights, nabla_w)]
 59 |         self.biases = [b - (eta / len(mini_batch)) * nb
 60 |                        for b, nb in zip(self.bias, nabla_b)]
 61 | 
 62 | 
 63 |     def backprop(self, x, y):
 64 |         # 正向传播
 65 |         activations = [x, ]
 66 |         #zs = [] # 未经过激活函数的输出向量
 67 |         for i in range(len(self.weights)):
 68 |             z = np.dot(activations[i], self.weights[i]) + self.bias[i]
 69 |             #zs.append(z)
 70 |             activations.append(self.activation(z))
 71 | 
 72 |         # 反向传播
 73 |         delta = self.cost_derivative(activations[-1], y) * self.activation_deriv(activations[-1])
 74 |         deltas = [delta, ]
 75 |         for i in range(len(activations) - 2, 0, -1):
 76 |             deltas.append(np.dot(deltas[-1], self.weights[i].T) * self.activation_deriv(activations[i]))
 77 |             deltas.reverse()
 78 |         weights = []
 79 |         bias = []
 80 |         for i in range(len(self.weights)):
 81 |             bias.append(deltas[i])
 82 |             layer = np.atleast_2d(activations[i])
 83 |             delta = np.atleast_2d(deltas[i])
 84 |             weights.append(np.dot(layer.T, delta))
 85 |         return bias, weights
 86 | 
 87 |     def cost_derivative(self, output, y):
 88 |         return output - y
 89 |     def evaluate(self, test_data):
 90 |         predictions = []
 91 |         for x, y in test_data:
 92 |             o = self.feedforward(x)
 93 |             predictions.append(np.argmax(o) == np.argmax(y))
 94 |         counter = collections.Counter(predictions)
 95 |         return counter[True]
 96 | 
 97 | if __name__ == "__main__":
 98 |     nn = Network(layers=[784, 30, 10])
 99 |     digits = datasets.fetch_mldata('mnist-original')
100 |     X = digits.data
101 |     y = digits.target
102 |     y = LabelBinarizer().fit_transform(y)# 分类结果离散化
103 |     # 拆分为训练集和测试集
104 |     X_train, X_test, y_train, y_test = train_test_split(X, y, train_size=60000)
105 | 
106 |     # 分类结果离散化
107 |     #labels_train = LabelBinarizer().fit_transform(y_train)
108 |     #labels_test = LabelBinarizer().fit_transform(y_test)
109 | 
110 |     train_data = [(a, b) for a, b in zip(X_train, y_train)]
111 |     test_data = [(a, b) for a, b in zip(X_test, y_test)]
112 |     train_data = random.sample(train_data, 50000)
113 |     nn.SGD(train_data, 30, mini_batch_size=30, eta=3.0, test_data=test_data)
114 | 


--------------------------------------------------------------------------------
/nn_example.py:
--------------------------------------------------------------------------------
  1 | # coding: utf-8
  2 | import numpy as np
  3 | from sklearn import datasets
  4 | from sklearn.model_selection import train_test_split
  5 | from sklearn.preprocessing import LabelBinarizer
  6 | from sklearn.metrics import confusion_matrix, classification_report
  7 | 
  8 | def tanh(x):
  9 |     return np.tanh(x)
 10 | 
 11 | def tanh_deriv(x):
 12 |     """
 13 |     tanh的导数
 14 |     """
 15 |     return 1.0 - np.tanh(x) * np.tanh(x)
 16 | 
 17 | def logistic(x):
 18 |     return 1.0 / (1 + np.exp(-x))
 19 | 
 20 | def logistic_deriv(x):
 21 |     """
 22 |     逻辑函数的导数
 23 |     """
 24 |     fx = logistic(x)
 25 |     return fx * (1 - fx)
 26 | 
 27 | class NeuralNetwork(object):
 28 |     def __init__(self, layers, activation='logistic'):
 29 |         """
 30 |         :param layers: 层数，如[4, 3, 2] 表示两层len(list)-1,(因为第一层是输入层，，有4个单元)，
 31 |         第一层有3个单元，第二层有2个单元
 32 |         :param activation:
 33 |         """
 34 |         if activation == 'tanh':
 35 |             self.activation = tanh
 36 |             self.activation_deriv = tanh_deriv
 37 |         elif activation == 'logistic':
 38 |             self.activation = logistic
 39 |             self.activation_deriv = logistic_deriv
 40 | 
 41 |         # 初始化随即权重
 42 |         self.weights = []
 43 |         for i in range(len(layers) - 1):
 44 |             #tmp = (np.random.random([layers[i], layers[i + 1]]) * 2 - 1) * 0.25
 45 |             tmp = (np.random.random([layers[i], layers[i + 1]]) * 2 - 1) * 0.25
 46 |             self.weights.append(tmp)
 47 | 
 48 |         # 偏向
 49 |         self.bias = []
 50 |         for i in range(1, len(layers)):
 51 |             self.bias.append((np.random.random(layers[i]) * 2 - 1) * 0.25)
 52 | 
 53 |     def fit(self, X, y, learning_rate=0.2, epochs=10000):
 54 |         X = np.atleast_2d(X)
 55 |         y = np.array(y)
 56 |         # 随即梯度
 57 |         for k in range(epochs):
 58 |             i = np.random.randint(X.shape[0])
 59 |             a = [X[i]]   # 随即取某一条实例
 60 |             for j in range(len(self.weights)):
 61 |                 a.append(self.activation(np.dot(a[j], self.weights[j]) + self.bias[j] ))
 62 |             errors = y[i] - a[-1]
 63 |             deltas = [errors * self.activation_deriv(a[-1]) ,]  # 输出层的误差
 64 |             # 反向传播，对于隐藏层的误差
 65 |             for j in range(len(a) - 2, 0, -1):
 66 |                 tmp = np.dot(deltas[-1], self.weights[j].T) * self.activation_deriv(a[j])
 67 |                 deltas.append(tmp)
 68 |             deltas.reverse()
 69 | 
 70 |             # 更新权重
 71 |             for j in range(len(self.weights)):
 72 |                 layer = np.atleast_2d(a[j])
 73 |                 delta = np.atleast_2d(deltas[j])
 74 |                 self.weights[j] += learning_rate * np.dot(layer.T, delta)
 75 | 
 76 |             # 更新偏向
 77 |             for j in range(len(self.bias)):
 78 |                 self.bias[j] += learning_rate * deltas[j]
 79 | 
 80 |     def predict(self, row):
 81 |         a = np.array(row) # 确保是 ndarray 对象
 82 |         for i in range(len(self.weights)):
 83 |             a = self.activation(np.dot(a, self.weights[i]) + self.bias[i])
 84 |         return a
 85 | 
 86 | 
 87 | if __name__ == "__main__":
 88 |     nn = NeuralNetwork(layers=[64, 100, 10])
 89 |     digits = datasets.load_digits()
 90 |     X = digits.data
 91 |     y = digits.target
 92 | 
 93 |     # 拆分为训练集和测试集
 94 |     X_train, X_test, y_train, y_test = train_test_split(X, y)
 95 | 
 96 |     # 分类结果离散化
 97 |     labels_train = LabelBinarizer().fit_transform(y_train)
 98 |     labels_test = LabelBinarizer().fit_transform(y_test)
 99 | 
100 |     nn.fit(X_train, labels_train)
101 | 
102 |     # 收集测试结果
103 |     predictions = []
104 |     for i in range(X_test.shape[0]):
105 |         o = nn.predict(X_test[i] )
106 |         predictions.append(np.argmax(o))
107 | 
108 |     # 打印对比结果
109 |     print (confusion_matrix(y_test, predictions) )
110 |     print (classification_report(y_test, predictions))


--------------------------------------------------------------------------------
/regression_relativity.py:
--------------------------------------------------------------------------------
 1 | #coding: utf-8
 2 | import math
 3 | import numpy as np
 4 | from sklearn import linear_model
 5 | 
 6 | def computeCorrelation(x: list, y: list) -> float:
 7 |     x_mean = np.mean(x)
 8 |     y_mean = np.mean(y)
 9 |     SSR = 0
10 |     var_x = 0  # x的方差
11 |     var_y = 0  # y的方差
12 |     for xi, yi in zip(x, y):
13 |         diff_x = xi - x_mean
14 |         diff_y = yi - y_mean
15 |         SSR += diff_x * diff_y
16 |         var_x += diff_x ** 2
17 |         var_y += diff_y ** 2
18 |     SST = math.sqrt(var_x * var_y)
19 |     return  SSR / SST
20 | 
21 | def polyfit(x, y):
22 |     linear = linear_model.LinearRegression()
23 |     linear.fit(x, y)
24 |     y_hat = linear.predict(x)
25 |     y_mean = np.mean(y)
26 |     SSR = 0
27 |     SST = 0
28 |     for i in range(len(y)):
29 |         SSR += (y_hat[i] - y_mean) ** 2
30 |         SST += (y[i] - y_mean) ** 2
31 |     return SSR / SST
32 | 
33 | if __name__ == "__main__":
34 |     train_x = [1, 3, 8, 7, 9]
35 |     train_y = [10, 12, 24, 21, 34]
36 | 
37 |     print(computeCorrelation(train_x, train_y))
38 | 
39 |     train_x_2d = [[x] for x in train_x] # 通用的方式，训练集至少是二维的
40 |     print(polyfit(train_x_2d, train_y))
41 | 
42 | 


--------------------------------------------------------------------------------
/result.dot:
--------------------------------------------------------------------------------
 1 | digraph Tree {
 2 | node [shape=box] ;
 3 | 0 [label="age=middle_aged <= 0.5\nentropy = 0.94\nsamples = 14\nvalue = [5, 9]"] ;
 4 | 1 [label="student=no <= 0.5\nentropy = 1.0\nsamples = 10\nvalue = [5, 5]"] ;
 5 | 0 -> 1 [labeldistance=2.5, labelangle=45, headlabel="True"] ;
 6 | 2 [label="credit_rating=fair <= 0.5\nentropy = 0.722\nsamples = 5\nvalue = [1, 4]"] ;
 7 | 1 -> 2 ;
 8 | 3 [label="income=low <= 0.5\nentropy = 1.0\nsamples = 2\nvalue = [1, 1]"] ;
 9 | 2 -> 3 ;
10 | 4 [label="entropy = 0.0\nsamples = 1\nvalue = [0, 1]"] ;
11 | 3 -> 4 ;
12 | 5 [label="entropy = 0.0\nsamples = 1\nvalue = [1, 0]"] ;
13 | 3 -> 5 ;
14 | 6 [label="entropy = 0.0\nsamples = 3\nvalue = [0, 3]"] ;
15 | 2 -> 6 ;
16 | 7 [label="age=youth <= 0.5\nentropy = 0.722\nsamples = 5\nvalue = [4, 1]"] ;
17 | 1 -> 7 ;
18 | 8 [label="credit_rating=excellent <= 0.5\nentropy = 1.0\nsamples = 2\nvalue = [1, 1]"] ;
19 | 7 -> 8 ;
20 | 9 [label="entropy = 0.0\nsamples = 1\nvalue = [0, 1]"] ;
21 | 8 -> 9 ;
22 | 10 [label="entropy = 0.0\nsamples = 1\nvalue = [1, 0]"] ;
23 | 8 -> 10 ;
24 | 11 [label="entropy = 0.0\nsamples = 3\nvalue = [3, 0]"] ;
25 | 7 -> 11 ;
26 | 12 [label="entropy = 0.0\nsamples = 4\nvalue = [0, 4]"] ;
27 | 0 -> 12 [labeldistance=2.5, labelangle=-45, headlabel="False"] ;
28 | }


--------------------------------------------------------------------------------
/svm_example.py:
--------------------------------------------------------------------------------
 1 | # coding: utf-8
 2 | import time
 3 | import logging
 4 | import matplotlib.pyplot as plt
 5 | 
 6 | from sklearn.model_selection import train_test_split, GridSearchCV
 7 | from sklearn.datasets import fetch_lfw_people
 8 | from sklearn.metrics import classification_report
 9 | from sklearn.metrics import confusion_matrix
10 | from sklearn.decomposition import PCA
11 | from sklearn import svm
12 | 
13 | logging.basicConfig(level=logging.INFO, format='%(asctime)s %(message)s')
14 | 
15 | lfw_people = fetch_lfw_people(min_faces_per_person=70, resize=0.4)
16 | 
17 | n_samples, h, w = lfw_people.images.shape   # 获取图像数据集的形状，绘图使用
18 | 
19 | # 获取特征数据集和结果集
20 | X = lfw_people.data
21 | Y = lfw_people.target
22 | 
23 | n_features = X.shape[1]  # 特征的个数，或称为特征的维数
24 | target_names = lfw_people.target_names # 数据集中有多少个人，以人名组成列表返回
25 | n_classes = target_names.shape[0]
26 | print("===== 数据集中信息 =====")
27 | print("数据个数(n_samples):", n_samples)
28 | print("特征个数，维度(n_features):", n_features)
29 | print("结果集类别个数(n_classes):", n_classes)
30 | 
31 | # 拆分训练集和测试集
32 | X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.25)
33 | 
34 | # 降维处理
35 | n_components = 150
36 | t0 = time.time()
37 | pca = PCA(n_components=n_components, whiten=True).fit(X_train)
38 | print("pca done %0.3fs" % (time.time() - t0))
39 | 
40 | # 从人脸中提取特征值
41 | eigenfaces = pca.components_.reshape((n_components, h, w))
42 | 
43 | print("Projecting the input data on the eigenfaces orthonormal basis")
44 | t0 = time.time()
45 | X_train_pca = pca.transform(X_train)
46 | X_test_pca  = pca.transform(X_test)
47 | print("data set to pca done %0.3fs" % (time.time() - t0))
48 | 
49 | # 构造分类器
50 | t0 = time.time()
51 | param_grid = {
52 |     "C": [1e3, 5e3, 1e4, 1e5],
53 |     "gamma": [0.0001, 0.0005, 0.001, 0.005, 0.01, 0.1]
54 | }
55 | 
56 | clf = GridSearchCV(svm.SVC(kernel='rbf', class_weight='balanced'), param_grid=param_grid)
57 | clf.fit(X_train_pca, Y_train)
58 | print("fit done %0.3fs" % (time.time() - t0))
59 | print(clf.best_estimator_)
60 | 
61 | # 预测
62 | t0 = time.time()
63 | y_pred = clf.predict(X_test_pca)
64 | 
65 | print(classification_report(Y_test, y_pred, target_names=target_names))
66 | print(confusion_matrix(Y_test, y_pred, labels=range(n_classes)))
67 | 
68 | 
69 | # 测试结果可视化
70 | 
71 | def plot_gallery(images, titles, h, w, n_row=3, n_col=4):
72 |     """Helper function to plot a gallery of portraits"""
73 |     plt.figure(figsize=(1.8 * n_col, 2.4 * n_row))
74 |     plt.subplots_adjust(bottom=0, left=.01, right=.99, top=.90, hspace=.35)
75 |     for i in range(n_row * n_col):
76 |         plt.subplot(n_row, n_col, i + 1)
77 |         plt.imshow(images[i].reshape((h, w)), cmap=plt.cm.gray)
78 |         plt.title(titles[i], size=12)
79 |         plt.xticks(())
80 |         plt.yticks(())
81 | 
82 | def title(y_pred, y_test, target_names, i):
83 |     pred_name = target_names[y_pred[i]].rsplit(' ', 1)[-1]
84 |     true_name = target_names[y_test[i]].rsplit(' ', 1)[-1]
85 |     return 'predicted: %s\ntrue:      %s' % (pred_name, true_name)
86 | 
87 | prediction_titles = [title(y_pred, Y_test, target_names, i)
88 |                      for i in range(y_pred.shape[0])]
89 | 
90 | plot_gallery(X_test, prediction_titles, h, w)
91 | 
92 | # plot the gallery of the most significative eigenfaces
93 | 
94 | eigenface_titles = ["eigenface %d" % i for i in range(eigenfaces.shape[0])]
95 | plot_gallery(eigenfaces, eigenface_titles, h, w)
96 | 
97 | plt.show()


--------------------------------------------------------------------------------
/tf_demos/activation.py:
--------------------------------------------------------------------------------
1 | # coding: utf-8
2 | """
3 | 激活函数
4 | """
5 | 
6 | 
7 | 
8 | 
9 | 


--------------------------------------------------------------------------------
/tf_demos/board.py:
--------------------------------------------------------------------------------
 1 | #coding: utf-8
 2 | import numpy as np
 3 | import tensorflow as tf
 4 | import matplotlib.pyplot as plt
 5 | 
 6 | layer_num = 0
 7 | # 添加层
 8 | def add_layer(inputs, in_size, out_size, activation=None):
 9 |     global layer_num
10 |     layer_num += 0
11 |     with tf.name_scope('layer'):
12 |         with tf.name_scope('weights'):
13 |             weights = tf.Variable(tf.random_normal(shape=[in_size, out_size]), name='W')
14 |             tf.summary.histogram("layer%s-weights" % layer_num, weights)
15 |         with tf.name_scope('biases'):
16 |             biases = tf.Variable(tf.zeros([1, out_size]) + 0.1, name='b')
17 |             tf.summary.histogram("layer%s-biases" % layer_num, biases)
18 |         with tf.name_scope('y'):
19 |             y = tf.matmul(inputs, weights) + biases
20 |         if activation:
21 |             outputs = activation(y)
22 |         else:
23 |             outputs = y
24 |         tf.summary.histogram("layer%s-outputs" % layer_num, outputs)
25 | 
26 |         return outputs
27 | 
28 | 
29 | 
30 | # print(np.random.randn(10))
31 | 
32 | x_data = np.linspace(-1, 1, 300)[:, np.newaxis]
33 | 
34 | noise = np.random.normal(0.0, 0.05, x_data.shape)    # 添加噪点
35 | y_data = np.square(x_data) - 0.5 + noise
36 | 
37 | with tf.name_scope('inputs'):
38 |     xs = tf.placeholder(tf.float32, [None, 1], name='x_input')
39 |     ys = tf.placeholder(tf.float32, [None, 1], name='y_input')
40 | 
41 | 
42 | layer1 = add_layer(xs, 1, 10, activation=tf.nn.relu)
43 | layer2 = add_layer(layer1, 10, 1, activation=None)
44 | 
45 | with tf.name_scope('loss'):
46 |     loss = tf.reduce_mean(tf.reduce_sum(tf.square(ys - layer2), reduction_indices=[1]), name='loss')
47 |     tf.summary.scalar("loss", loss)
48 | 
49 | optimizer = tf.train.GradientDescentOptimizer(0.1)
50 | 
51 | with tf.name_scope('train'):
52 |     train_step = optimizer.minimize(loss)
53 | 
54 | init = tf.global_variables_initializer()
55 | merged = tf.summary.merge_all()
56 | with tf.Session() as sess:
57 |     writer = tf.summary.FileWriter("logs/", sess.graph)
58 |     sess.run(init)
59 |     for i in range(1000):
60 |         sess.run(train_step, feed_dict={
61 |             xs: x_data,
62 |             ys: y_data,
63 |         })
64 |         if i % 50 == 0:
65 |             sess.run(loss, feed_dict={xs:x_data, ys: y_data})
66 |             result = sess.run(merged, feed_dict={xs:x_data, ys: y_data})
67 |             writer.add_summary(result, i)
68 | # 在命令行运行 tensorboard --logdir=logs
69 | 
70 | 


--------------------------------------------------------------------------------
/tf_demos/board_simple.py:
--------------------------------------------------------------------------------
 1 | #coding: utf-8
 2 | import numpy as np
 3 | import tensorflow as tf
 4 | import matplotlib.pyplot as plt
 5 | 
 6 | layer_num = 0
 7 | # 添加层
 8 | def add_layer(inputs, in_size, out_size, activation=None):
 9 |     global layer_num
10 |     layer_num += 0
11 |     with tf.name_scope('layer'):
12 |         with tf.name_scope('weights'):
13 |             weights = tf.Variable(tf.random_normal(shape=[in_size, out_size]), name='W')
14 |         with tf.name_scope('biases'):
15 |             biases = tf.Variable(tf.zeros([1, out_size]) + 0.1, name='b')
16 |         with tf.name_scope('y'):
17 |             y = tf.matmul(inputs, weights) + biases
18 |         if activation:
19 |             outputs = activation(y)
20 |         else:
21 |             outputs = y
22 | 
23 |         return outputs
24 | 
25 | 
26 | 
27 | # print(np.random.randn(10))
28 | 
29 | x_data = np.linspace(-1, 1, 300)[:, np.newaxis]
30 | 
31 | noise = np.random.normal(0.0, 0.05, x_data.shape)    # 添加噪点
32 | y_data = np.square(x_data) - 0.5 + noise
33 | 
34 | with tf.name_scope('inputs'):
35 |     xs = tf.placeholder(tf.float32, [None, 1], name='x_input')
36 |     ys = tf.placeholder(tf.float32, [None, 1], name='y_input')
37 | 
38 | 
39 | layer1 = add_layer(xs, 1, 10, activation=tf.nn.relu)
40 | layer2 = add_layer(layer1, 10, 1, activation=None)
41 | 
42 | with tf.name_scope('loss'):
43 |     loss = tf.reduce_mean(tf.reduce_sum(tf.square(ys - layer2), reduction_indices=[1]), name='loss')
44 | 
45 | optimizer = tf.train.GradientDescentOptimizer(0.1)
46 | 
47 | with tf.name_scope('train'):
48 |     train_step = optimizer.minimize(loss)
49 | 
50 | init = tf.global_variables_initializer()
51 | with tf.Session() as sess:
52 |     writer = tf.summary.FileWriter("logs/", sess.graph)
53 |     sess.run(init)
54 |     for i in range(1000):
55 |         sess.run(train_step, feed_dict={
56 |             xs: x_data,
57 |             ys: y_data,
58 |         })
59 |         if i % 50 == 0:
60 |             sess.run(loss, feed_dict={xs:x_data, ys: y_data})
61 |             result = sess.run(loss, feed_dict={xs:x_data, ys: y_data})
62 |             #writer.add_summary(result, i)
63 | # 在命令行运行 tensorboard --logdir=logs
64 | 
65 | 


--------------------------------------------------------------------------------
/tf_demos/classification.py:
--------------------------------------------------------------------------------
 1 | #coding: utf-8
 2 | import collections
 3 | import tensorflow as tf
 4 | from tensorflow.examples.tutorials import mnist
 5 | """
 6 | 分类问题
 7 | """
 8 | # 添加层
 9 | def add_layer(inputs, in_size, out_size, activation=None):
10 |     weights = tf.Variable(tf.random_normal(shape=[in_size, out_size]))
11 |     biases = tf.Variable(tf.zeros([1, out_size]) + 0.1)
12 | 
13 |     y = tf.matmul(inputs, weights) + biases
14 |     if activation:
15 |         outputs = activation(y)
16 |     else:
17 |         outputs = y
18 |     return outputs
19 | 
20 | def compute_accuracy(v_xs, v_ys, sess: tf.Session):
21 |     global prediction
22 |     y_pre = sess.run(prediction, feed_dict={xs: v_xs})
23 |     correct_prediction = tf.equal(tf.argmax(y_pre, 1), tf.argmax(v_ys, 1))
24 |     result = sess.run(correct_prediction) # [False False False True False]
25 |     counter = collections.Counter(result)
26 |     return counter[True] /  len(result)
27 |     #accuarcy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
28 |     #result = sess.run(accuarcy, feed_dict={xs: v_xs, ys: v_ys})
29 |     #return result
30 | 
31 | 
32 | # 数据集
33 | digist = mnist.input_data.read_data_sets('MNIST_data', one_hot=True)
34 | 
35 | # 定义输入
36 | xs = tf.placeholder(tf.float32, [None, 784])
37 | ys = tf.placeholder(tf.float32, [None, 10])
38 | 
39 | # 神经网络层
40 | prediction = add_layer(xs, 784, 10, activation=tf.nn.softmax)
41 | 
42 | # 损失函数
43 | cross_entropy = tf.reduce_mean(-tf.reduce_sum(ys * tf.log(prediction), reduction_indices=[1]))
44 | 
45 | train_step = tf.train.GradientDescentOptimizer(0.5).minimize(cross_entropy)
46 | 
47 | init = tf.global_variables_initializer()
48 | with tf.Session() as sess:
49 |     sess.run(init)
50 |     # 随机梯度
51 |     for i in range(1001):
52 |         batch_xs, batch_ys = digist.train.next_batch(100)
53 |         sess.run(train_step, feed_dict={xs: batch_xs, ys: batch_ys})
54 |         if i % 50 == 0:
55 |             print(compute_accuracy(batch_xs, batch_ys, sess))
56 | 
57 | 
58 | 


--------------------------------------------------------------------------------
/tf_demos/cnn.py:
--------------------------------------------------------------------------------
 1 | #coding: utf-8
 2 | import collections
 3 | import tensorflow as tf
 4 | from tensorflow.examples.tutorials import mnist
 5 | """
 6 | 分类问题
 7 | """
 8 | # 添加层
 9 | def add_layer(inputs, in_size, out_size, activation=None):
10 |     weights = tf.Variable(tf.random_normal(shape=[in_size, out_size]))
11 |     biases = tf.Variable(tf.zeros([1, out_size]) + 0.1)
12 | 
13 |     y = tf.matmul(inputs, weights) + biases
14 |     if activation:
15 |         outputs = activation(y)
16 |     else:
17 |         outputs = y
18 |     return outputs
19 | 
20 | def compute_accuracy(v_xs, v_ys, sess: tf.Session):
21 |     global prediction
22 |     y_pre = sess.run(prediction, feed_dict={xs: v_xs})
23 |     correct_prediction = tf.equal(tf.argmax(y_pre, 1), tf.argmax(v_ys, 1))
24 |     result = sess.run(correct_prediction) # [False False False True False]
25 |     counter = collections.Counter(result)
26 |     return counter[True] /  len(result)
27 |     #accuarcy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
28 |     #result = sess.run(accuarcy, feed_dict={xs: v_xs, ys: v_ys})
29 |     #return result
30 | 
31 | 
32 | # 数据集
33 | digist = mnist.input_data.read_data_sets('MNIST_data', one_hot=True)
34 | 
35 | # 定义输入
36 | xs = tf.placeholder(tf.float32, [None, 784])
37 | ys = tf.placeholder(tf.float32, [None, 10])
38 | 
39 | def no_cnn():
40 |     # 神经网络层
41 |     prediction = add_layer(xs, 784, 10, activation=tf.nn.softmax)
42 | 
43 |     # 损失函数
44 |     cross_entropy = tf.reduce_mean(-tf.reduce_sum(ys * tf.log(prediction), reduction_indices=[1]))
45 | 
46 |     train_step = tf.train.GradientDescentOptimizer(0.5).minimize(cross_entropy)
47 | 
48 |     init = tf.global_variables_initializer()
49 |     with tf.Session() as sess:
50 |         sess.run(init)
51 |         # 随机梯度
52 |         for i in range(501):
53 |             batch_xs, batch_ys = digist.train.next_batch(100)
54 |             sess.run(train_step, feed_dict={xs: batch_xs, ys: batch_ys})
55 |             if i % 50 == 0:
56 |                 print(compute_accuracy(batch_xs, batch_ys, sess))
57 | 
58 | 
59 | def weight_variable(shape):
60 |     return tf.Variable(tf.truncated_normal(shape=shape, stddev=0.1))
61 | 
62 | def bias_variable(shape):
63 |     return tf.Variable(tf.constant(0.1, shape=shape))
64 | 
65 | def conv2d(x, weights):
66 |     """
67 |     设置卷积神经网络
68 |     strides [1, x_movement, y_movement, 1]
69 |     strides[0] 必须与 strides[3] 相等
70 |     """
71 |     return tf.nn.conv2d(x, weights, strides=[1, 1, 1, 1], padding='SAME')
72 | 
73 | def max_pool_2x2(x):
74 |     return tf.nn.max_pool(x, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME')


--------------------------------------------------------------------------------
/tf_demos/demo.py:
--------------------------------------------------------------------------------
 1 | #coding: utf-8
 2 | import numpy as np
 3 | import tensorflow as tf
 4 | 
 5 | # 使用 NumPy 生成假数据(phony data), 总共 100 个点.
 6 | x_data = np.float32(np.random.rand(2, 100)) # 随机输入
 7 | y_data = np.dot([0.100, 0.200], x_data) + 0.300
 8 | 
 9 | # 构造一个线性模型
10 | #
11 | b = tf.Variable(tf.zeros([1]))
12 | W = tf.Variable(tf.random_uniform([1, 2], -1.0, 1.0))
13 | y = tf.matmul(W, x_data) + b
14 | # 最小化方差
15 | loss = tf.reduce_mean(tf.square(y - y_data))
16 | optimizer = tf.train.GradientDescentOptimizer(0.5)
17 | train = optimizer.minimize(loss)
18 | # 初始化变量
19 | init = tf.global_variables_initializer()
20 | # 启动图 (graph)
21 | sess = tf.Session()
22 | sess.run(init)
23 | # 拟合平面
24 | for step in range(0, 201):
25 |     sess.run(train)
26 |     if step % 20 == 0:
27 |         print (step, sess.run(W), sess.run(b))
28 | # 得到最佳拟合结果 W: [[0.100 0.200]], b: [0.300]


--------------------------------------------------------------------------------
/tf_demos/input.py:
--------------------------------------------------------------------------------
 1 | # coding: utf-8
 2 | import tensorflow as tf
 3 | 
 4 | """
 5 | placeholder
 6 | """
 7 | input1 = tf.placeholder(tf.float32)
 8 | input2 = tf.placeholder(tf.float32)
 9 | 
10 | output = tf.multiply(input1, input2)
11 | 
12 | with tf.Session() as sess:
13 |     # 运算，需要传入值给placeholder
14 |     print(sess.run(output, feed_dict={
15 |         input1: [7.0],
16 |         input2: [2.0],
17 |     }))
18 | 
19 | 
20 | 
21 | 
22 | 


--------------------------------------------------------------------------------
/tf_demos/layer.py:
--------------------------------------------------------------------------------
 1 | #coding: utf-8
 2 | import numpy as np
 3 | import tensorflow as tf
 4 | import matplotlib.pyplot as plt
 5 | 
 6 | # 添加层
 7 | def add_layer(inputs, in_size, out_size, activation=None):
 8 |     weights = tf.Variable(tf.random_normal(shape=[in_size, out_size]))
 9 |     biases = tf.Variable(tf.zeros([1, out_size]) + 0.1)
10 | 
11 |     y = tf.matmul(inputs, weights) + biases
12 |     if activation:
13 |         outputs = activation(y)
14 |     else:
15 |         outputs = y
16 |     return outputs
17 | 
18 | 
19 | 
20 | # print(np.random.randn(10))
21 | 
22 | x_data = np.linspace(-1, 1, 300)[:, np.newaxis]
23 | 
24 | noise = np.random.normal(0.0, 0.05, x_data.shape)    # 添加噪点
25 | y_data = np.square(x_data) - 0.5 + noise
26 | 
27 | xs = tf.placeholder(tf.float32, [None, 1])
28 | ys = tf.placeholder(tf.float32, [None, 1])
29 | 
30 | 
31 | layer1 = add_layer(xs, 1, 10, activation=tf.nn.relu)
32 | layer2 = add_layer(layer1, 10, 1, activation=None)
33 | 
34 | loss = tf.reduce_mean(tf.reduce_sum(tf.square(ys - layer2), reduction_indices=[1]))
35 | 
36 | optimizer = tf.train.GradientDescentOptimizer(0.1)
37 | train_step = optimizer.minimize(loss)
38 | 
39 | init = tf.global_variables_initializer()
40 | 
41 | with tf.Session() as sess:
42 |     sess.run(init)
43 |     for i in range(1000):
44 |         sess.run(train_step, feed_dict={
45 |             xs: x_data,
46 |             ys: y_data,
47 |         })
48 |         if i % 50 == 0:
49 |             print(sess.run(loss, feed_dict={xs:x_data, ys: y_data}))
50 | 


--------------------------------------------------------------------------------
/tf_demos/optimizer.py:
--------------------------------------------------------------------------------
 1 | # codign: utf-8
 2 | """
 3 | 优化器
 4 | """
 5 | 
 6 | import tensorflow as tf
 7 | 
 8 | # 梯度下降
 9 | tf.train.GradientDescentOptimizer
10 | 
11 | 
12 | 


--------------------------------------------------------------------------------
/tf_demos/overfitting.py:
--------------------------------------------------------------------------------
 1 | # coding: utf-8
 2 | """
 3 | 过拟合问题, 使用 dropout 解决
 4 | """
 5 | import tensorflow as tf
 6 | from sklearn import datasets
 7 | from sklearn.model_selection import train_test_split
 8 | from sklearn.preprocessing import LabelBinarizer
 9 | 
10 | layer_num = 0
11 | # 添加层
12 | def add_layer(inputs, in_size, out_size, activation=None):
13 |     global layer_num, keep_prob
14 |     layer_num += 0
15 |     weights = tf.Variable(tf.random_normal(shape=[in_size, out_size]))
16 |     biases = tf.Variable(tf.zeros([1, out_size]) + 0.1)
17 | 
18 |     y = tf.matmul(inputs, weights) + biases
19 |     y = tf.nn.dropout(y, keep_prob)   # 使用 dropout 防止过拟合
20 |     if activation:
21 |         outputs = activation(y)
22 |     else:
23 |         outputs = y
24 |     tf.summary.histogram("layer%s-outputs" % layer_num, outputs)
25 |     return outputs
26 | 
27 | 
28 | digits = datasets.load_digits()
29 | X = digits.data
30 | 
31 | y = digits.target
32 | y = LabelBinarizer().fit_transform(y)
33 | 
34 | # 拆分为训练集和测试集
35 | X_train, X_test, y_train, y_test = train_test_split(X, y)
36 | 
37 | # 定义输入
38 | keep_prob = tf.placeholder(tf.float32)
39 | xs = tf.placeholder(tf.float32, [None, 64])  # 8x8
40 | ys = tf.placeholder(tf.float32, [None, 10])
41 | 
42 | # 设计神经网络层
43 | l1 = add_layer(xs, 64, 50, activation=tf.nn.tanh)
44 | prediction = add_layer(l1, 50, 10, activation=tf.nn.softmax)
45 | 
46 | # 损失函数和优化器
47 | cross_entropy = tf.reduce_mean(-tf.reduce_sum(ys * tf.log(prediction),
48 |                                               reduction_indices=[1]))  # loss
49 | tf.summary.scalar('loss', cross_entropy)
50 | train_step = tf.train.GradientDescentOptimizer(0.5).minimize(cross_entropy)
51 | 
52 | merged = tf.summary.merge_all()
53 | 
54 | with tf.Session() as sess:
55 |     sess.run(tf.global_variables_initializer())
56 |     train_writer = tf.summary.FileWriter("logs/train", sess.graph)
57 |     test_writer = tf.summary.FileWriter("logs/test", sess.graph)
58 | 
59 |     for i in range(501):
60 |         sess.run(train_step, feed_dict={xs: X_train, ys: y_train, keep_prob: 0.5})
61 |         if i % 50 == 0:
62 |             train_result = sess.run(merged, feed_dict={xs: X_train, ys: y_train, keep_prob: 1})
63 |             test_result = sess.run(merged, feed_dict={xs: X_test, ys: y_test, keep_prob: 1})
64 |             train_writer.add_summary(train_result, i)
65 |             test_writer.add_summary(test_result, i)
66 | 
67 | 
68 | 
69 | 
70 | 


--------------------------------------------------------------------------------
/tf_demos/simple.py:
--------------------------------------------------------------------------------
 1 | #coding: utf-8
 2 | import numpy as np
 3 | import tensorflow as tf
 4 | 
 5 | x_data = np.random.rand(100)
 6 | y_data = x_data * 0.1 + 0.3
 7 | 
 8 | weights = tf.Variable(tf.random_uniform([1], -1.0, 1.0))
 9 | biases = tf.Variable(tf.zeros([1]))
10 | 
11 | y = weights * x_data + biases
12 | 
13 | # 损失函数
14 | loss = tf.reduce_mean(tf.square(y - y_data))
15 | 
16 | # 创建优化器 使用梯度下降
17 | optimizer = tf.train.GradientDescentOptimizer(0.5) # 参数表示学习率
18 | train = optimizer.minimize(loss)
19 | 
20 | init = tf.global_variables_initializer()
21 | with tf.Session() as sess:
22 |     sess.run(init)   # 这句很容易被忽略
23 |     for step in range(0, 201):
24 |         sess.run(train)
25 |         if step % 20 == 0:
26 |             print(step, sess.run(weights), sess.run(biases))


--------------------------------------------------------------------------------
/tf_demos/variable.py:
--------------------------------------------------------------------------------
 1 | # coding: utf-8
 2 | import tensorflow as tf
 3 | """
 4 | 在 tf 的结构中，只有通过 Variable 函数声明的，才能算是tf中的变量
 5 | """
 6 | # 设置变量
 7 | state = tf.Variable(0, name='counter')
 8 | 
 9 | # 设置常量
10 | one = tf.constant(1)
11 | 
12 | new_value = tf.add(state, one)        # 设置运算
13 | update = tf.assign(state, new_value)  # 变量赋值
14 | 
15 | init = tf.global_variables_initializer()
16 | with tf.Session() as sess:
17 |     sess.run(init)
18 |     for _ in range(3):
19 |         sess.run(update)
20 |         print(sess.run(state))
21 | 
22 | 
23 | 


--------------------------------------------------------------------------------
/tf_demos/viewable.py:
--------------------------------------------------------------------------------
 1 | #coding: utf-8
 2 | import numpy as np
 3 | import tensorflow as tf
 4 | import matplotlib.pyplot as plt
 5 | 
 6 | # 添加层
 7 | def add_layer(inputs, in_size, out_size, activation=None):
 8 |     weights = tf.Variable(tf.random_normal(shape=[in_size, out_size]))
 9 |     biases = tf.Variable(tf.zeros([1, out_size]) + 0.1)
10 | 
11 |     y = tf.matmul(inputs, weights) + biases
12 |     if activation:
13 |         outputs = activation(y)
14 |     else:
15 |         outputs = y
16 |     return outputs
17 | 
18 | 
19 | 
20 | # print(np.random.randn(10))
21 | 
22 | x_data = np.linspace(-1, 1, 300)[:, np.newaxis]
23 | 
24 | noise = np.random.normal(0.0, 0.05, x_data.shape)    # 添加噪点
25 | y_data = np.square(x_data) - 0.5 + noise
26 | 
27 | xs = tf.placeholder(tf.float32, [None, 1])
28 | ys = tf.placeholder(tf.float32, [None, 1])
29 | 
30 | 
31 | layer1 = add_layer(xs, 1, 10, activation=tf.nn.relu)
32 | layer2 = add_layer(layer1, 10, 1, activation=None)
33 | 
34 | loss = tf.reduce_mean(tf.reduce_sum(tf.square(ys - layer2), reduction_indices=[1]))
35 | 
36 | optimizer = tf.train.GradientDescentOptimizer(0.1)
37 | train_step = optimizer.minimize(loss)
38 | 
39 | init = tf.global_variables_initializer()
40 | lines = None
41 | # 可视化
42 | fig = plt.figure()
43 | ax = fig.add_subplot(1, 1, 1)
44 | ax.scatter(x_data, y_data)
45 | plt.ion()
46 | plt.show()
47 | with tf.Session() as sess:
48 |     sess.run(init)
49 |     for i in range(1000):
50 |         sess.run(train_step, feed_dict={
51 |             xs: x_data,
52 |             ys: y_data,
53 |         })
54 |         if i % 50 == 0:
55 |             #    print(sess.run(loss, feed_dict={xs:x_data, ys: y_data}))
56 |             if lines:
57 |                 ax.lines.remove(lines[0])
58 |             prediction = sess.run(layer2, feed_dict={xs: x_data})
59 |             lines = ax.plot(x_data, prediction, 'r-', lw=3)
60 |             #ax.lines.remove(lines[0])
61 |             plt.pause(0.5)
62 | 


--------------------------------------------------------------------------------