├── First lesson
    ├── homework0.ipynb
    ├── 【姚超】工具下载&作业说明.pdf
    └── 【王明哲】青年AI自强项目第一讲-AI鸟瞰.pptx
├── Homework-01
    ├── daizhen1.png
    ├── daizhen2.png
    ├── dongxu1.png
    ├── dongxu2.png
    ├── fanshengxu1.png.png
    ├── linxinhui1.png
    ├── linxinhui2.png
    ├── liuyi1.png
    ├── liuyi2.png
    ├── lixiaoyu1.png
    ├── lixiaoyu2.png
    ├── mashiyao1.png
    ├── mashiyao2.png
    ├── niexin_01.png
    ├── niexin_02.png
    ├── pengziye.png
    ├── pengziye1.png.png
    ├── weizhuoqi1.png
    ├── weizhuoqi2.png
    ├── yaoyu1.jpg
    ├── yaoyu2.jpg
    ├── yuanmanxue1.png
    └── yuanmanxue2.png
├── Lesson-02
    ├── data.xls
    ├── homework02.py
    ├── 【王明哲】机器学习入门公式汇总.pdf
    └── 【王明哲】青年AI自强计划第二讲——机器学习入门.pdf
├── Lesson-03
    ├── 青年自强计划第三章-神经网络.pdf
    └── 青年自强计划第三章-神经网络.pptx
├── README.md
├── fitbit-analyzer-master.zip
├── homework-02
    ├── dongxu.ipynb
    ├── dongxu1.png
    ├── dongxu2.png
    ├── fengshuai.ipynb
    ├── fengshuai1.png
    ├── fengshuai2.png
    ├── fengshuai3.png
    ├── guanluotong(2)1.png
    ├── guanluotong(2)2.png
    ├── linxinhui1.png.png
    ├── linxinhui2.png.png
    ├── linxinhui3.png.png
    ├── lixiaoyu1.png
    ├── lixiaoyu2.png
    ├── lixiaoyu3.png
    ├── mashiyao 1.png
    ├── mashiyao 2.png
    ├── mashiyao 3.png
    ├── niexin_01.png
    ├── niexin_02.png
    ├── pengziye.ipynb
    ├── pengziye3.jpg
    ├── weizhuoqi1.png
    ├── weizhuoqi2.png
    ├── weizhuoqi3.png
    ├── 刘毅-homework-02.pdf
    ├── 姚宇-homework-02.pdf
    ├── 戴振-homework-02.pdf
    ├── 李校宇-homework-02.pdf
    ├── 袁曼雪-homework-02.pdf
    └── 说明.md
├── homework-03
    ├── Fitbit_Data_Analysis_1.ipynb
    ├── MINST
    │   ├── t10k-images-idx3-ubyte.gz
    │   ├── t10k-images.idx3-ubyte
    │   ├── t10k-labels-idx1-ubyte.gz
    │   ├── t10k-labels.idx1-ubyte
    │   ├── train-images-idx3-ubyte.gz
    │   ├── train-labels-idx1-ubyte.gz
    │   └── train-labels.idx1-ubyte
    ├── ex3-part1
    │   └── tensorflow-MNIST-logistRes.py
    ├── ex3-part2
    │   ├── ann_classification_two_hidden_layers.py
    │   └── data.xls
    ├── ex3-part3
    │   └── tensorflow-MNIST-nn.py
    ├── 作业说明.txt
    └── 马士尧 homework 03.pdf
├── homework-04
    ├── requirement.md
    ├── 余欣灿.ipynb
    ├── 刘广升.ipynb
    ├── 史一阳.ipynb
    ├── 吴洁茹.ipynb
    ├── 吴玉隆.ipynb
    ├── 周小梅.ipynb
    ├── 姚宇.ipynb
    ├── 张博.ipynb
    ├── 张晏铭.ipynb
    ├── 张泷玲.ipynb
    ├── 戴振.ipynb
    ├── 房增林.ipynb
    ├── 曲礼阳.ipynb
    ├── 机器学习第一课.pptx
    ├── 李校宇.ipynb
    ├── 胡明玥.ipynb
    ├── 苏峥.ipynb
    ├── 范升旭.ipynb
    ├── 谢易凡.ipynb
    ├── 闫泳寰.ipynb
    ├── 阳治玖.ipynb
    ├── 陈宝旭.ipynb
    ├── 陈瑞.ipynb
    ├── 韩依格.ipynb
    ├── 马士尧.ipynb
    ├── 高一淇.ipynb
    ├── 魏卓其.ipynb
    └── 黄禹霏.ipynb
├── homework-05
    ├── fangzenglin.ipynb
    ├── lixiaoyu.py
    └── zhangbo.ipynb
└── homework-06
    └── requirement.md


/First lesson/homework0.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "## 示例脚本"
  8 |    ]
  9 |   },
 10 |   {
 11 |    "cell_type": "markdown",
 12 |    "metadata": {},
 13 |    "source": [
 14 |     "本脚本目的在于验证工作环境是否安装配置成功，在完成PPT展示的安装教程之后，需要在命令行窗口运行如下命令，使得本脚本工作。"
 15 |    ]
 16 |   },
 17 |   {
 18 |    "cell_type": "markdown",
 19 |    "metadata": {},
 20 |    "source": [
 21 |     "pip install pygame"
 22 |    ]
 23 |   },
 24 |   {
 25 |    "cell_type": "code",
 26 |    "execution_count": 1,
 27 |    "metadata": {},
 28 |    "outputs": [],
 29 |    "source": [
 30 |     "### 黑客帝国代码雨"
 31 |    ]
 32 |   },
 33 |   {
 34 |    "cell_type": "code",
 35 |    "execution_count": 3,
 36 |    "metadata": {},
 37 |    "outputs": [
 38 |     {
 39 |      "name": "stdout",
 40 |      "output_type": "stream",
 41 |      "text": [
 42 |       "pygame 1.9.4\n",
 43 |       "Hello from the pygame community. https://www.pygame.org/contribute.html\n"
 44 |      ]
 45 |     },
 46 |     {
 47 |      "ename": "KeyboardInterrupt",
 48 |      "evalue": "",
 49 |      "output_type": "error",
 50 |      "traceback": [
 51 |       "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
 52 |       "\u001b[1;31mKeyboardInterrupt\u001b[0m                         Traceback (most recent call last)",
 53 |       "\u001b[1;32m<ipython-input-3-6379d4a9c84f>\u001b[0m in \u001b[0;36m<module>\u001b[1;34m()\u001b[0m\n\u001b[0;32m     35\u001b[0m         \u001b[0mx\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;36m0\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m     36\u001b[0m         \u001b[1;32mfor\u001b[0m \u001b[0mc\u001b[0m \u001b[1;32min\u001b[0m \u001b[0mtext\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m---> 37\u001b[1;33m             \u001b[0mscreen\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mblit\u001b[0m\u001b[1;33m(\u001b[0m \u001b[0mfont\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mrender\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mc\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;32mTrue\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;33m(\u001b[0m\u001b[1;36m0\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;36m255\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;36m0\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;33m(\u001b[0m\u001b[0mx\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0my\u001b[0m\u001b[1;33m)\u001b[0m \u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m     38\u001b[0m             \u001b[0mx\u001b[0m\u001b[1;33m+=\u001b[0m\u001b[1;36m20\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m     39\u001b[0m         \u001b[0my\u001b[0m\u001b[1;33m-=\u001b[0m\u001b[0mfont_height\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
 54 |       "\u001b[1;31mKeyboardInterrupt\u001b[0m: "
 55 |      ]
 56 |     }
 57 |    ],
 58 |    "source": [
 59 |     "import pygame\n",
 60 |     "import sys\n",
 61 |     "from pygame.locals import *\n",
 62 |     "from sys import exit\n",
 63 |     "import random\n",
 64 |     " \n",
 65 |     "pygame.init()\n",
 66 |     "SCREEN_SIZE = (960, 640)\n",
 67 |     "screen = pygame.display.set_mode(SCREEN_SIZE, 0, 32)\n",
 68 |     " \n",
 69 |     "font = pygame.font.SysFont(\"arial\", 16);\n",
 70 |     "font_height = font.get_linesize()\n",
 71 |     "#event_text = []\n",
 72 |     "texts=[['0']*80]\n",
 73 |     " \n",
 74 |     "while True:\n",
 75 |     "    event= pygame.event.poll()\n",
 76 |     "    i=0\n",
 77 |     "    t=80\n",
 78 |     "    tx=[]\n",
 79 |     "    while i<t:\n",
 80 |     "        tx.append(chr(random.randint(33, 126)))\n",
 81 |     "        i+=1\n",
 82 |     "    texts.append(tx)\n",
 83 |     "    texts=texts[-SCREEN_SIZE[1]//font_height:]\n",
 84 |     "    #这个切片操作保证了event_text里面只保留一个屏幕的文字\n",
 85 |     " \n",
 86 |     "    if event.type == QUIT:\n",
 87 |     "        sys.exit()\n",
 88 |     " \n",
 89 |     "    screen.fill((0, 0, 0))#屏幕填充黑色\n",
 90 |     "    y = SCREEN_SIZE[1]-font_height\n",
 91 |     "    #找一个合适的起笔位置，最下面开始但是要留一行的空\n",
 92 |     "    for text in texts:\n",
 93 |     "        x=0\n",
 94 |     "        for c in text:\n",
 95 |     "            screen.blit( font.render(c, True, (0, 255, 0)), (x, y) )\n",
 96 |     "            x+=20       \n",
 97 |     "        y-=font_height\n",
 98 |     "        #把笔提一行\n",
 99 |     "        i=i+1\n",
100 |     " \n",
101 |     "    pygame.display.update()\n"
102 |    ]
103 |   },
104 |   {
105 |    "cell_type": "markdown",
106 |    "metadata": {},
107 |    "source": [
108 |     "### 黑客帝国代码雨2"
109 |    ]
110 |   },
111 |   {
112 |    "cell_type": "code",
113 |    "execution_count": 1,
114 |    "metadata": {},
115 |    "outputs": [
116 |     {
117 |      "name": "stdout",
118 |      "output_type": "stream",
119 |      "text": [
120 |       "pygame 1.9.4\n",
121 |       "Hello from the pygame community. https://www.pygame.org/contribute.html\n"
122 |      ]
123 |     },
124 |     {
125 |      "ename": "error",
126 |      "evalue": "display Surface quit",
127 |      "output_type": "error",
128 |      "traceback": [
129 |       "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
130 |       "\u001b[1;31merror\u001b[0m                                     Traceback (most recent call last)",
131 |       "\u001b[1;32m<ipython-input-1-8af801fecd57>\u001b[0m in \u001b[0;36m<module>\u001b[1;34m()\u001b[0m\n\u001b[0;32m     64\u001b[0m             \u001b[0mexit\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m     65\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m---> 66\u001b[1;33m     \u001b[0mscreen\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mfill\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;36m0\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;36m0\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;36m0\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m     67\u001b[0m     \u001b[1;32mfor\u001b[0m \u001b[0mi\u001b[0m \u001b[1;32min\u001b[0m \u001b[0mrange\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;36m0\u001b[0m\u001b[1;33m,\u001b[0m\u001b[0mgroup_count\u001b[0m\u001b[1;33m)\u001b[0m \u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m     68\u001b[0m         \u001b[0mgroup\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0madd\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mWord\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mi\u001b[0m \u001b[1;33m*\u001b[0m \u001b[0mFONT_SIZE\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;33m-\u001b[0m\u001b[0mFONT_SIZE\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
132 |       "\u001b[1;31merror\u001b[0m: display Surface quit"
133 |      ]
134 |     }
135 |    ],
136 |    "source": [
137 |     "# imports\n",
138 |     "import pygame\n",
139 |     "import random\n",
140 |     "from pygame.locals import *\n",
141 |     "from random import randint\n",
142 |     "\n",
143 |     "\n",
144 |     "# define\n",
145 |     "SCREEN_WIDTH  = 900\n",
146 |     "SCREEN_HEIGHT = 600\n",
147 |     "LOW_SPEED  = 10\n",
148 |     "HIGH_SPEED = 20\n",
149 |     "LOW_SIZE  = 5\n",
150 |     "HIGH_SIZE = 50\n",
151 |     "FONT_SIZE = 10\n",
152 |     "FONT_NAME = \"myfont.ttf\"\n",
153 |     "FREQUENCE = 10\n",
154 |     "times = 0\n",
155 |     "\n",
156 |     "# def func\n",
157 |     "def randomcolor() :\n",
158 |     "    return (randint(0,255),randint(0,255),randint(0,255))\n",
159 |     "def randomspeed() :\n",
160 |     "    return randint(LOW_SPEED,HIGH_SPEED)\n",
161 |     "def randomposition() :\n",
162 |     "    return (randint(0,SCREEN_WIDTH),randint(0,SCREEN_HEIGHT))\n",
163 |     "def randomsize() :\n",
164 |     "    return randint(LOW_SIZE,HIGH_SIZE)\n",
165 |     "def randomoname() :\n",
166 |     "    return randint(0,100000)\n",
167 |     "def randomvalue() :\n",
168 |     "    return randint(0,100)                               #this is your own display number range\n",
169 |     "\n",
170 |     "# class of sprite\n",
171 |     "class Word(pygame.sprite.Sprite) :\n",
172 |     "    def __init__(self,bornposition) :\n",
173 |     "        pygame.sprite.Sprite.__init__(self)\n",
174 |     "        self.value = chr(random.randint(33, 126))\n",
175 |     "        self.font = pygame.font.Font(FONT_NAME,FONT_SIZE)\n",
176 |     "        self.image = self.font.render(str(self.value),True,randomcolor())\n",
177 |     "        self.speed = randomspeed()\n",
178 |     "        self.rect = self.image.get_rect()\n",
179 |     "        self.rect.topleft = bornposition\n",
180 |     "\n",
181 |     "    def update(self) :\n",
182 |     "        self.rect = self.rect.move(0,self.speed)\n",
183 |     "        if self.rect.top > SCREEN_HEIGHT :\n",
184 |     "            self.kill()\n",
185 |     "\n",
186 |     "# init the available modules\n",
187 |     "pygame.init()\n",
188 |     "screen = pygame.display.set_mode((SCREEN_WIDTH,SCREEN_HEIGHT))\n",
189 |     "pygame.display.set_caption(\"ViatorSun HACKER EMPIRE CodeRain\")\n",
190 |     "clock = pygame.time.Clock()\n",
191 |     "group = pygame.sprite.Group()\n",
192 |     "group_count = int(SCREEN_WIDTH / FONT_SIZE)\n",
193 |     "\n",
194 |     "# mainloop\n",
195 |     "while True :\n",
196 |     "    time = clock.tick(FREQUENCE)\n",
197 |     "    for event in pygame.event.get() :\n",
198 |     "        if event.type == QUIT :\n",
199 |     "            pygame.quit()\n",
200 |     "            exit()\n",
201 |     "\n",
202 |     "    screen.fill((0,0,0))\n",
203 |     "    for i in range(0,group_count) :\n",
204 |     "        group.add(Word((i * FONT_SIZE,-FONT_SIZE)))\n",
205 |     "\n",
206 |     "    group.update()\n",
207 |     "    group.draw(screen)\n",
208 |     "    pygame.display.update()\n"
209 |    ]
210 |   },
211 |   {
212 |    "cell_type": "markdown",
213 |    "metadata": {},
214 |    "source": [
215 |     "<font color=gray> 本脚本程序来源于CSDN，教学使用，非盈利目的 </font>"
216 |    ]
217 |   }
218 |  ],
219 |  "metadata": {
220 |   "kernelspec": {
221 |    "display_name": "Python 3",
222 |    "language": "python",
223 |    "name": "python3"
224 |   },
225 |   "language_info": {
226 |    "codemirror_mode": {
227 |     "name": "ipython",
228 |     "version": 3
229 |    },
230 |    "file_extension": ".py",
231 |    "mimetype": "text/x-python",
232 |    "name": "python",
233 |    "nbconvert_exporter": "python",
234 |    "pygments_lexer": "ipython3",
235 |    "version": "3.5.6"
236 |   }
237 |  },
238 |  "nbformat": 4,
239 |  "nbformat_minor": 2
240 | }
241 | 


--------------------------------------------------------------------------------
/First lesson/【姚超】工具下载&作业说明.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/heilongjianguniversity/AI-Team/6710f4383b5c33e1dc466112009fba1c05e8069b/First lesson/【姚超】工具下载&作业说明.pdf


--------------------------------------------------------------------------------
/First lesson/【王明哲】青年AI自强项目第一讲-AI鸟瞰.pptx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/heilongjianguniversity/AI-Team/6710f4383b5c33e1dc466112009fba1c05e8069b/First lesson/【王明哲】青年AI自强项目第一讲-AI鸟瞰.pptx


--------------------------------------------------------------------------------
/Homework-01/daizhen1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/heilongjianguniversity/AI-Team/6710f4383b5c33e1dc466112009fba1c05e8069b/Homework-01/daizhen1.png


--------------------------------------------------------------------------------
/Homework-01/daizhen2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/heilongjianguniversity/AI-Team/6710f4383b5c33e1dc466112009fba1c05e8069b/Homework-01/daizhen2.png


--------------------------------------------------------------------------------
/Homework-01/dongxu1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/heilongjianguniversity/AI-Team/6710f4383b5c33e1dc466112009fba1c05e8069b/Homework-01/dongxu1.png


--------------------------------------------------------------------------------
/Homework-01/dongxu2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/heilongjianguniversity/AI-Team/6710f4383b5c33e1dc466112009fba1c05e8069b/Homework-01/dongxu2.png


--------------------------------------------------------------------------------
/Homework-01/fanshengxu1.png.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/heilongjianguniversity/AI-Team/6710f4383b5c33e1dc466112009fba1c05e8069b/Homework-01/fanshengxu1.png.png


--------------------------------------------------------------------------------
/Homework-01/linxinhui1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/heilongjianguniversity/AI-Team/6710f4383b5c33e1dc466112009fba1c05e8069b/Homework-01/linxinhui1.png


--------------------------------------------------------------------------------
/Homework-01/linxinhui2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/heilongjianguniversity/AI-Team/6710f4383b5c33e1dc466112009fba1c05e8069b/Homework-01/linxinhui2.png


--------------------------------------------------------------------------------
/Homework-01/liuyi1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/heilongjianguniversity/AI-Team/6710f4383b5c33e1dc466112009fba1c05e8069b/Homework-01/liuyi1.png


--------------------------------------------------------------------------------
/Homework-01/liuyi2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/heilongjianguniversity/AI-Team/6710f4383b5c33e1dc466112009fba1c05e8069b/Homework-01/liuyi2.png


--------------------------------------------------------------------------------
/Homework-01/lixiaoyu1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/heilongjianguniversity/AI-Team/6710f4383b5c33e1dc466112009fba1c05e8069b/Homework-01/lixiaoyu1.png


--------------------------------------------------------------------------------
/Homework-01/lixiaoyu2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/heilongjianguniversity/AI-Team/6710f4383b5c33e1dc466112009fba1c05e8069b/Homework-01/lixiaoyu2.png


--------------------------------------------------------------------------------
/Homework-01/mashiyao1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/heilongjianguniversity/AI-Team/6710f4383b5c33e1dc466112009fba1c05e8069b/Homework-01/mashiyao1.png


--------------------------------------------------------------------------------
/Homework-01/mashiyao2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/heilongjianguniversity/AI-Team/6710f4383b5c33e1dc466112009fba1c05e8069b/Homework-01/mashiyao2.png


--------------------------------------------------------------------------------
/Homework-01/niexin_01.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/heilongjianguniversity/AI-Team/6710f4383b5c33e1dc466112009fba1c05e8069b/Homework-01/niexin_01.png


--------------------------------------------------------------------------------
/Homework-01/niexin_02.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/heilongjianguniversity/AI-Team/6710f4383b5c33e1dc466112009fba1c05e8069b/Homework-01/niexin_02.png


--------------------------------------------------------------------------------
/Homework-01/pengziye.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/heilongjianguniversity/AI-Team/6710f4383b5c33e1dc466112009fba1c05e8069b/Homework-01/pengziye.png


--------------------------------------------------------------------------------
/Homework-01/pengziye1.png.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/heilongjianguniversity/AI-Team/6710f4383b5c33e1dc466112009fba1c05e8069b/Homework-01/pengziye1.png.png


--------------------------------------------------------------------------------
/Homework-01/weizhuoqi1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/heilongjianguniversity/AI-Team/6710f4383b5c33e1dc466112009fba1c05e8069b/Homework-01/weizhuoqi1.png


--------------------------------------------------------------------------------
/Homework-01/weizhuoqi2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/heilongjianguniversity/AI-Team/6710f4383b5c33e1dc466112009fba1c05e8069b/Homework-01/weizhuoqi2.png


--------------------------------------------------------------------------------
/Homework-01/yaoyu1.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/heilongjianguniversity/AI-Team/6710f4383b5c33e1dc466112009fba1c05e8069b/Homework-01/yaoyu1.jpg


--------------------------------------------------------------------------------
/Homework-01/yaoyu2.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/heilongjianguniversity/AI-Team/6710f4383b5c33e1dc466112009fba1c05e8069b/Homework-01/yaoyu2.jpg


--------------------------------------------------------------------------------
/Homework-01/yuanmanxue1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/heilongjianguniversity/AI-Team/6710f4383b5c33e1dc466112009fba1c05e8069b/Homework-01/yuanmanxue1.png


--------------------------------------------------------------------------------
/Homework-01/yuanmanxue2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/heilongjianguniversity/AI-Team/6710f4383b5c33e1dc466112009fba1c05e8069b/Homework-01/yuanmanxue2.png


--------------------------------------------------------------------------------
/Lesson-02/data.xls:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/heilongjianguniversity/AI-Team/6710f4383b5c33e1dc466112009fba1c05e8069b/Lesson-02/data.xls


--------------------------------------------------------------------------------
/Lesson-02/【王明哲】机器学习入门公式汇总.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/heilongjianguniversity/AI-Team/6710f4383b5c33e1dc466112009fba1c05e8069b/Lesson-02/【王明哲】机器学习入门公式汇总.pdf


--------------------------------------------------------------------------------
/Lesson-02/【王明哲】青年AI自强计划第二讲——机器学习入门.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/heilongjianguniversity/AI-Team/6710f4383b5c33e1dc466112009fba1c05e8069b/Lesson-02/【王明哲】青年AI自强计划第二讲——机器学习入门.pdf


--------------------------------------------------------------------------------
/Lesson-03/青年自强计划第三章-神经网络.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/heilongjianguniversity/AI-Team/6710f4383b5c33e1dc466112009fba1c05e8069b/Lesson-03/青年自强计划第三章-神经网络.pdf


--------------------------------------------------------------------------------
/Lesson-03/青年自强计划第三章-神经网络.pptx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/heilongjianguniversity/AI-Team/6710f4383b5c33e1dc466112009fba1c05e8069b/Lesson-03/青年自强计划第三章-神经网络.pptx


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # AI-Machine-learning-via-HeilongjiangUniversity
2 | ## http://www.xuetangx.com/courses/course-v1:Tsinghua+20181011X+2018_T2/about 希望大家关注此课程！
3 | 


--------------------------------------------------------------------------------
/fitbit-analyzer-master.zip:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/heilongjianguniversity/AI-Team/6710f4383b5c33e1dc466112009fba1c05e8069b/fitbit-analyzer-master.zip


--------------------------------------------------------------------------------
/homework-02/dongxu1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/heilongjianguniversity/AI-Team/6710f4383b5c33e1dc466112009fba1c05e8069b/homework-02/dongxu1.png


--------------------------------------------------------------------------------
/homework-02/dongxu2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/heilongjianguniversity/AI-Team/6710f4383b5c33e1dc466112009fba1c05e8069b/homework-02/dongxu2.png


--------------------------------------------------------------------------------
/homework-02/fengshuai1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/heilongjianguniversity/AI-Team/6710f4383b5c33e1dc466112009fba1c05e8069b/homework-02/fengshuai1.png


--------------------------------------------------------------------------------
/homework-02/fengshuai2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/heilongjianguniversity/AI-Team/6710f4383b5c33e1dc466112009fba1c05e8069b/homework-02/fengshuai2.png


--------------------------------------------------------------------------------
/homework-02/fengshuai3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/heilongjianguniversity/AI-Team/6710f4383b5c33e1dc466112009fba1c05e8069b/homework-02/fengshuai3.png


--------------------------------------------------------------------------------
/homework-02/guanluotong(2)1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/heilongjianguniversity/AI-Team/6710f4383b5c33e1dc466112009fba1c05e8069b/homework-02/guanluotong(2)1.png


--------------------------------------------------------------------------------
/homework-02/guanluotong(2)2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/heilongjianguniversity/AI-Team/6710f4383b5c33e1dc466112009fba1c05e8069b/homework-02/guanluotong(2)2.png


--------------------------------------------------------------------------------
/homework-02/linxinhui1.png.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/heilongjianguniversity/AI-Team/6710f4383b5c33e1dc466112009fba1c05e8069b/homework-02/linxinhui1.png.png


--------------------------------------------------------------------------------
/homework-02/linxinhui2.png.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/heilongjianguniversity/AI-Team/6710f4383b5c33e1dc466112009fba1c05e8069b/homework-02/linxinhui2.png.png


--------------------------------------------------------------------------------
/homework-02/linxinhui3.png.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/heilongjianguniversity/AI-Team/6710f4383b5c33e1dc466112009fba1c05e8069b/homework-02/linxinhui3.png.png


--------------------------------------------------------------------------------
/homework-02/lixiaoyu1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/heilongjianguniversity/AI-Team/6710f4383b5c33e1dc466112009fba1c05e8069b/homework-02/lixiaoyu1.png


--------------------------------------------------------------------------------
/homework-02/lixiaoyu2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/heilongjianguniversity/AI-Team/6710f4383b5c33e1dc466112009fba1c05e8069b/homework-02/lixiaoyu2.png


--------------------------------------------------------------------------------
/homework-02/lixiaoyu3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/heilongjianguniversity/AI-Team/6710f4383b5c33e1dc466112009fba1c05e8069b/homework-02/lixiaoyu3.png


--------------------------------------------------------------------------------
/homework-02/mashiyao 1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/heilongjianguniversity/AI-Team/6710f4383b5c33e1dc466112009fba1c05e8069b/homework-02/mashiyao 1.png


--------------------------------------------------------------------------------
/homework-02/mashiyao 2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/heilongjianguniversity/AI-Team/6710f4383b5c33e1dc466112009fba1c05e8069b/homework-02/mashiyao 2.png


--------------------------------------------------------------------------------
/homework-02/mashiyao 3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/heilongjianguniversity/AI-Team/6710f4383b5c33e1dc466112009fba1c05e8069b/homework-02/mashiyao 3.png


--------------------------------------------------------------------------------
/homework-02/niexin_01.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/heilongjianguniversity/AI-Team/6710f4383b5c33e1dc466112009fba1c05e8069b/homework-02/niexin_01.png


--------------------------------------------------------------------------------
/homework-02/niexin_02.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/heilongjianguniversity/AI-Team/6710f4383b5c33e1dc466112009fba1c05e8069b/homework-02/niexin_02.png


--------------------------------------------------------------------------------
/homework-02/pengziye3.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/heilongjianguniversity/AI-Team/6710f4383b5c33e1dc466112009fba1c05e8069b/homework-02/pengziye3.jpg


--------------------------------------------------------------------------------
/homework-02/weizhuoqi1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/heilongjianguniversity/AI-Team/6710f4383b5c33e1dc466112009fba1c05e8069b/homework-02/weizhuoqi1.png


--------------------------------------------------------------------------------
/homework-02/weizhuoqi2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/heilongjianguniversity/AI-Team/6710f4383b5c33e1dc466112009fba1c05e8069b/homework-02/weizhuoqi2.png


--------------------------------------------------------------------------------
/homework-02/weizhuoqi3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/heilongjianguniversity/AI-Team/6710f4383b5c33e1dc466112009fba1c05e8069b/homework-02/weizhuoqi3.png


--------------------------------------------------------------------------------
/homework-02/刘毅-homework-02.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/heilongjianguniversity/AI-Team/6710f4383b5c33e1dc466112009fba1c05e8069b/homework-02/刘毅-homework-02.pdf


--------------------------------------------------------------------------------
/homework-02/姚宇-homework-02.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/heilongjianguniversity/AI-Team/6710f4383b5c33e1dc466112009fba1c05e8069b/homework-02/姚宇-homework-02.pdf


--------------------------------------------------------------------------------
/homework-02/戴振-homework-02.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/heilongjianguniversity/AI-Team/6710f4383b5c33e1dc466112009fba1c05e8069b/homework-02/戴振-homework-02.pdf


--------------------------------------------------------------------------------
/homework-02/李校宇-homework-02.pdf:
--------------------------------------------------------------------------------
  1 | # -*- coding: UTF-8 -*-
  2 |  #numpy库的使用，请参考：https://www.jianshu.com/p/a260a8c43e44
  3 |  #matplotlib库请参考：https://matplotlib.org/api/index.html
  4 | 
  5 | import xlrd     # 导入必备的 xlrd 库，目的是为了调用 xlrd.open_workbook 函数打开 excel 文件，读取数据
  6 | import matplotlib.pyplot as plt     # 导入必备的 matplotlib 库，并且将其中的 matplotlib.pyplot 重名为 plt, 目的是为了后续的绘图需要，也是为了编程方便，简写为 plt
  7 | import numpy as np      # 导入必备的 numpy 库，并且将其重名为 np, 目的是为了后续的矩阵的定义、运算、操作等，也是为了编程方便，简写为 np
  8 | 
  9 | 
 10 | # 定义函数loadData函数，输入参数是 filename 指代文件名，返回数据data，目的是从.xls文件中加载数据，并存储为numpy中的array格式
 11 | def loadData(filename):  #filename 其实就是个形参
 12 |     workbook = xlrd.open_workbook(filename)   # 通过调用 xlrd.open_workbook 函数打开 excel 文件，读取数据，并返回给 workbook 变量
 13 |     boyinfo = workbook.sheet_by_index(0) # 通过使用属性 sheet_by_index 得到  excel 文件 中的工作簿，其中 sheet_by_index(0) 表示是第一个工作簿，在 python 中，下标从 0 开始
 14 |     col_num = boyinfo.ncols # 通过使用属性 ncols 得到 excel 文件 中第一个工作簿的 列数，并赋值给 col_num
 15 |     row_num = boyinfo.nrows   # 通过使用属性 nrows 得到 excel 文件 中第一个工作簿的 行数，并赋值给 row_num
 16 |     col0 = boyinfo.col_values(0)[1:] # 通过使用属性 col_values(0)[1:] 得到 excel 文件 中第一列数据中，从第2行到最后一行的所有数据，并赋值给 col0
 17 |     data = np.array(col0)   # 通过使用 np.array 函数， 将 col0 转换成数组，并赋值给 data
 18 |     if col_num == 1:   # 条件判断语句： 如果列数 col_num 为1， 只有一列，那么直接返回数据 data
 19 |         return data     # 返回data
 20 |     else:                                           # 否则，如果不止一列数据，需要遍历所有列的数据
 21 |         for i in range(col_num-1):                      # 通过使用for循环达到遍历的目的
 22 |             coltemp = boyinfo.col_values(i+1)[1:]           # 从第二行开始，表头不算，遍历从 第二列 开始到最后一列的数据
 23 |             data = np.c_[data, coltemp]   # 通过使用 np.c_ 函数将 第一列的数据 和后面 所有列的数据组合起来，并赋值给 data
 24 |     return data                                     # 返回data
 25 | 
 26 | # 定义一个 plotData 函数，输入参数是 数据 X 和标志 flag: y，返回作图操作 plt, p1, p2 ， 目的是为了画图
 27 | def plotData(X, y):
 28 |     pos = np.where(y==1)  # 通过使用 np.where 函数查找所有满足条件的数据，查找所有满足标志 y == 1 的数据，并赋值给 pos
 29 |     neg = np.where(y==0)  # 通过使用 np.where 函数查找所有满足条件的数据，查找所有满足标志 y == 0 的数据，并赋值给 neg
 30 |     # 通过使用 plt.plot 函数作图，对所有满足标志 y == 1 的数据作图，点采用 s (正方形)，代表 square, 点的大小为 7 单位，颜色为 红色 red
 31 |     p1 = plt.plot(X[pos, 0], X[pos, 1], marker='s', markersize=7, color='red')[0]
 32 |     # 请补全 通过使用 plt.plot 函数作图，对所有满足标志 y == 0 的数据作图，点采用 o (圆形)，代表 circle, 点的大小为 7 单位，颜色为 绿色 green 
 33 |     p2 =  plt.plot(X[neg,0],X[neg,1],marker='o',markersize=7,color='green')[0]  #请补全 
 34 | 
 35 |     return p1, p2                                   # 返回作图操作plt, p1, p2
 36 | 
 37 | # normalization： 定义一个 normalization 函数，输入参数是原始数据 X ，返回归一化后的数据 X_norm ， 目的是为了数据预处理，得到归一化后的数据 X_norm
 38 | def normalization(X):
 39 |    Xmin =np.min(X,axis=0)     # 请补全 通过使用 np.min 函数，计算原始数据沿着 axis=0 方向的最小值，即：求每一列的最小值，并赋值给 Xmin
 40 |    Xmax = np.max(X,axis=0)    # 请补全 通过使用 np.max 函数，计算原始数据沿着 axis=0 方向的最大值，即：求每一列的最大值，并赋值给 Xmax
 41 |    Xmu  = np.mean ( X,axis=0)   # 请补全 通过使用 np.mean 函数，计算原始数据均值，并赋值给 Xmu                 
 42 |    X_norm = (X-Xmu)/(Xmax-Xmin)    # 请补全 计算归一化后的数据，归一化公式为：(X-Xmu)/(Xmax-Xmin)，归一化后数据范围为 [-1,1]
 43 |    return X_norm  # 返回数据预处理，归一化后的数据 X_norm
 44 | 
 45 | # plot decision boundary：定义一个 plotDecisionBoundaryn 函数，输入参数是 训练集 trainX, 训练集 trainY, 直线斜率截距相关参数 w, 迭代次数 iter_num ，目的是为了画出决策的判断边界
 46 | def plotDecisionBoundary(trainX, trainY, w, iter_num = 0):
 47 |     # prepare data
 48 |     xcord1 = [];ycord1 = [];xcord2 = [];ycord2 = [] # 准备数据，定义四个空的列表，并分别赋值给 xcord1、ycord1、xcord2、ycord2，进行初始化
 49 |     m, n = np.shape(trainX)  # 通过使用 np.shape 函数，得到训练集 trainX 的形状大小，其中，m 为训练集 trainX 的行数，n 为训练集 trainX 的列数
 50 |     for i in range(m):     # 通过使用 for 循环语句，遍历训练集 trainX 所有的行，其中，i 可以取得值分别是 0，1，2，...，m-1，总共是 m 行
 51 |         if trainY[i] == 1:   # 通过使用 if 条件判断语句，如果训练集 trainY（标志）中的元素为 1，那么将训练集 trainX中的 trainX[i,1] 和 trainX[i,2] 分别添加到 xcord1 和 ycord1 列表中
 52 |             xcord1.append(trainX[i,1])    # 通过 append 的方法，将训练集 trainX中 的 trainX[i,1] 添加到 xcord1 列表中，保存的是 pos 的横坐标, 代表 positive 的数据
 53 |             ycord1.append(trainX[i,2])    # 通过 append 的方法，将训练集 trainX中 的 trainX[i,2] 添加到 ycord1 列表中，保存的是 pos 的纵坐标, 代表 positive 的数据
 54 |         else:                                       # 否则，如果训练集 trainY（标志）中的元素不为 1，那么将训练集 trainX中的 trainX[i,1] 和 trainX[i,2] 分别添加到 xcord2 和 ycord2 列表中     
 55 |             xcord2.append(trainX[i,1])   # 通过 append 的方法，将训练集 trainX中 的 trainX[i,1] 添加到 xcord2 列表中，保存的是 neg 的横坐标, 代表 negative 的数据
 56 |             ycord2.append(trainX[i,2])    # 通过 append 的方法，将训练集 trainX中 的 trainX[i,2] 添加到 ycord2 列表中，保存的是 neg 的纵坐标, 代表 negative 的数据   
 57 |     x_min = min(trainX[:,1])  # 通过使用 min 函数，计算出 trainX[:,1] ，即 trainX 第2列的最小值，并赋值给 x_min   
 58 |     y_min = min(trainX[:,2])                        # 通过使用 min 函数，计算出 trainX[:,2] ，即 trainX 第3列的最小值，并赋值给 y_min  
 59 |     x_max = max(trainX[:,1])                        # 通过使用 max 函数，计算出 trainX[:,1] ，即 trainX 第2列的最大值，并赋值给 x_max  
 60 |     y_max = max(trainX[:,2])                        # 通过使用 max 函数，计算出 trainX[:,2] ，即 trainX 第3列的最大值，并赋值给 y_max 
 61 | 
 62 |     # plot scatter  & legend
 63 |     fig = plt.figure(1)                              # 通过使用 plt.figure 函数，开始创建一个图形窗口，并赋值给 fig
 64 |     # 通过使用 plt.scatter 函数，绘制散点图，横坐标为 xcord1, 纵坐标为 ycord1，标记大小为30，颜色为红色，形状样式为 s (正方形)，代表 square, 图例标签为 'I like you'
 65 |     plt.scatter(xcord1, ycord1, s=30, c='red', marker='s', label='I like you')
 66 |     # 请补全 通过使用 plt.scatter 函数，绘制散点图，横坐标为 xcord2, 纵坐标为 ycord2，标记大小为30，颜色为绿色，形状样式为 o (圆形)，代表 circle, 图例标签为 'I don't like you'
 67 |     plt.scatter(xcord2, ycord2, s=30, c='green', marker='o', label='I dont like you')
 68 |     #请补全 
 69 |     
 70 |     plt.legend(loc='upper right')                   # 设置图例的位置为右上角
 71 | 
 72 |     # set axis and ticks
 73 |     delta_x = x_max-x_min                           # 计算横坐标的极差为横坐标最大值与最小值的差，并赋值给 delta_x
 74 |     delta_y = y_max-y_min                           # 计算纵坐标的极差为纵坐标最大值与最小值的差，并赋值给 delta_y
 75 |     # 设置横坐标的刻度：从 x_min - delta_x / 10 到 x_max + delta_x / 10，使用 np.arange 函数创建数组，步长为 1，并赋值给 my_x_ticks
 76 |     my_x_ticks = np.arange(x_min - delta_x / 10, x_max + delta_x / 10, 1)
 77 |     # 设置纵坐标的刻度：从 y_min - delta_y / 10 到 y_max + delta_y / 10，使用 np.arange 函数创建数组，步长为 1，并赋值给 my_y_ticks
 78 |     my_y_ticks = np.arange(y_min - delta_y / 10, y_max + delta_y / 10, 1)
 79 | 
 80 |     plt.xticks(my_x_ticks)                          # 通过使用 plt.xticks 函数，设置作图的横坐标的刻度为 my_x_ticks
 81 |     plt.yticks(my_y_ticks)                          # 通过使用 plt.yticks 函数，设置作图的纵坐标的刻度为 my_y_ticks
 82 |     # 通过使用 plt.axis 函数，设置作图的横坐标和纵坐标的显示范围，分别是[x_min-delta_x/10, x_max+delta_x/10] 和 [y_min-delta_y/10, y_max+delta_y/10]    
 83 |     plt.axis([x_min-delta_x/10, x_max+delta_x/10, y_min-delta_y/10, y_max+delta_y/10])
 84 | 
 85 |     # drwa a line：绘制一条直线，用于决策判断
 86 |     x = np.arange(x_min-delta_x/10, x_max+delta_x/10, 0.01) # 通过使用 np.arange 函数创建数组， 从 x_min - delta_x / 10 到 x_max + delta_x / 10，步长为 0.01，并赋值给 x
 87 |     y =  (-w[0]-w[1]*x)/w[2]         #请补全               # 通过公式计算得到直线的纵坐标： y = (-w[0]-w[1]*x)/w[2]
 88 |     plt.plot(x, y.T)     # 通过使用 plt.plot 函数绘制图象，其中，横坐标是 x , 纵坐标是 y.T， “.T” 表示的是矩阵的转置，因为绘图时需要横纵坐标的维度一致
 89 | 
 90 |     # figure name：设置图像的文件名和标题名
 91 |     # 设置图像的文件名为 'Training ' + str(iter_num) + ' times.png'，其中，str(iter_num) 表示将迭代次数 iter_num 转变成字符串，图片格式为 “png”
 92 |     fig_name = 'Training ' + str(iter_num) + ' times.png'   
 93 |     # 设置图像的标题名为'Training ' + str(iter_num) + ' times.png'，其中，str(iter_num) 表示将迭代次数 iter_num 转变成字符串，图片格式为 “png”
 94 |     plt.title(fig_name)
 95 |     fig.savefig(fig_name)                           # 通过使用 fig.savefig 函数，保存图片，分辨率等参数采取默认值
 96 |     plt.show(fig)    # 通过使用 plt.show 函数，显示绘制好的图片，注意的是必须关闭图像窗口，才可以进入执行后续的程序
 97 | 
 98 | # sigmoid: 定义一个 激活（激励）函数 sigmoid 函数 （activation function），输入参数是 wx， 返回的是 sigmoid 函数值
 99 | def sigmoid(wx):
100 |     sigmoidV =  1.0/(1.0+np.exp(-wx))  # 请补全  计算激活函数 sigmoid 函数 的函数值，计算公式为：1.0/(1.0+np.exp(-wx))
101 |     return sigmoidV                    
102 | 
103 | # loss fuc Y_ 预测值 Y 真值
104 | def loss(X, Y, w):       # 定义一个 损失函数 loss 函数 （loss function），输入参数是 X, Y, w， 返回的是 损失函数的值
105 |     m, n = np.shape(X)       # 通过使用 np.shape 函数，得到数据集 X 的形状大小，其中，m 为数据集 X 的行数，n 为数据集 X 的列数
106 |     trainMat = np.mat(X)   # 通过使用 np.mat 函数，将数据集 X 转变成矩阵类型，并赋值给 trainMat
107 |     Y_ = []    # 准备数据，定义一个空的列表，并赋值给 Y_，进行初始化, 后续会通过 append 的方法向空列表内不断添加新的元素
108 |     for i in np.arange(m):     # 通过 for 循环结构，遍历数据集 X 所有的行，其中，i 可取的数依次为：0，1 ，2，....，m-1， 数据集 X总共有 m 行
109 |         # 通过 append 的方法向空列表 Y_ 内不断添加新的元素，新元素是通过 训练的矩阵数据集  trainMat[i] 乘以权重 w 之后，再计算激活函数 sigmoid 的函数值	
110 |         Y_.append(sigmoid(trainMat[i]*w))
111 |     m = np.shape(Y_)[0]   # 通过使用 np.shape 函数，得到数据集 X 的形状大小，其中，np.shape(Y_)[0] 为数据集 X 的行数，并赋值给 m        
112 |     sum_err = 0.0    # 初始化误差的总和为 0.0, 赋值给 sum_err， 后续会不断更新 误差的总和 sum_err 的数值   
113 |     for i in range(m):   # 通过 for 循环结构，遍历数据集 Y_ 所有的行，其中，i 可取的数依次为：0，1 ，2，....，m-1， 数据集 Y_ 总共有 m 行    
114 |         # 请补全 更新误差的总和 sum_err 的数值， 每次 误差的总和 sum_err 递减 Y[i]*np.log(Y_[i])+(1-Y[i])*np.log(1-Y_[i])，这是 交叉熵损失函数（ Cross Entropy Loss ）的计算公式    	
115 |         sum_err -=  Y[i]*np.log(Y_[i])+(1-Y[i])*np.log(1-Y_[i])#请补全 
116 |     return sum_err/m                                # 返回 sum_err
117 | 
118 | # BGD 批量梯度下降法求最优参数
119 | # 定义一个BGD 函数，即：批量梯度下降法（Batch Gradient Descent，BGD），输入参数是 数据集 X 和 y, 
120 | # 迭代次数 iter_num, 学习率 alpha，又写作 lr (learning rate), 它表示每次向着J最陡峭的方向迈步的大小， 返回的是 权重 w
121 | # 通过批量梯度下降法（Batch Gradient Descent，BGD），不断更新权重 W
122 | def BGD(X, y, iter_num, alpha):
123 |     trainMat = np.mat(X)                            # 通过使用 np.mat 函数，将数据集 X 转换成矩阵类型，并赋值给 trainMat
124 |     trainY = np.mat(y).T                            # 通过使用 np.mat 函数，将数据集 y 转换成矩阵类型，并且转置，然后赋值给 trainY 
125 |     m, n = np.shape(X)    # 通过使用 np.shape 函数，得到数据集 X 的形状大小，其中，m 为数据集 X 的行数，n 为数据集 X 的列数    
126 |     w = np.ones((n,1))# 通过使用 np.ones 函数，创建元素全为 1 的矩阵，矩阵的大小为 n 行 1 列，并赋值给 w, 即：进行权重 w 的初始化，令其全为 1  
127 |     for i in range(iter_num):    # 通过 for 循环结构，开始进行迭代，其中，i 可取的数依次为：0，1 ，2，....，iter_num-1， 迭代次数总共有 iter_num 次
128 |         error =  sigmoid(trainMat*w)-trainY  #请补全 # 计算迭代的误差 error：将预测得到的激活函数的数值 sigmoid(trainMat*w) 减去 实际的 trainY 数值
129 |         w =   w - (1.0/m)*alpha*trainMat.T*error    #请补全  # 更新权重 w , BGD 批量梯度下降法 的核心， w = w - (1.0/m)*alpha*trainMat.T*error
130 |     return w                                        # 返回 w
131 | 
132 | # classify：定义一个 classify 函数，输入参数是 wx， 返回的是标志 1 或者 0
133 | def classify(wx):
134 |     prob = sigmoid(wx)                              # 计算概率：将激活函数 sigmoid(wx) 的数值作为预测的概率，并赋值给 prob
135 |     if prob > 0.5:                                  # 如果 概率 prob 大于 0.5， 那么返回数值 1  
136 |         return 1
137 |     else:                                           # 否则，如果 概率 prob 不大于 0.5， 那么返回数值 0
138 |         return 0
139 | 
140 | # predict：定义一个 predict 函数，输入参数是 测试集 testX 和权重 w， 返回的是预测的结果 result
141 | def predict(testX, w):
142 |     m, n = np.shape(testX)  # 通过使用 np.shape 函数，得到测试集 testX 的形状大小，其中，m 为测试集 testX 的行数，n 为测试集 testX 的列数  
143 |     testMat = np.mat(testX)                         # 通过使用 np.mat 函数，将测试集 testX 转换成矩阵类型，并赋值给 testMat
144 |     result = []   # 准备数据，定义一个空的列表，并赋值给结果 result，进行初始化, 后续会通过 append 的方法向空列表内不断添加新的元素
145 |     for i in np.arange(m):     # 通过 for 循环结构，遍历测试集 testX 所有的行，其中，i 可取的数依次为：0，1 ，2，....，m-1， 测试集 testX 总共有 m 行
146 |         # 通过 append 的方法向空列表 result 内不断添加新的元素，新元素是通过调用 classify 函数进行预测得到，将返回的浮点型的 1 或者 0 添加到 空列表 result 内
147 |         result.append(classify(float(testMat[i]*w)))
148 |     return result                                   # 返回预测结果result
149 | 
150 | # Precision：定义一个 Precision 函数，输入参数是数据集 X, Y 和权重 w， 返回的是 测试集的正确率 
151 | def Precision(X, Y, w):
152 |     result = predict(X, w)    # 通过调用 predict 函数，输入测试集 X 和权重 w， 计算得到预测结果，并把返回的结果赋值给 result 
153 |     right_sum = 0    # 进行初始化预测正确的数目，赋值 0 给 right_sum，后续如果预测正确，会不断增加 1 
154 |     # 通过 for 循环结构，开始进行遍历，其中，i 可取的数依次为：0，1 ，2，....，len(result)-1， 预测结果 result 内元素的个数总和为 len(result) 
155 |     for i in range(len(result)):
156 |         if result[i]-int(Y[i]) == 0:    # 通过条件判断语句 if, 如果结果 result 的元素与 int(Y[i])相等，即：预测正确！ 那么更新预测正确的数目 right_sum
157 |             right_sum += 1    # 如果预测正确！ 那么更新预测正确的数目 right_sum，每次递增加 1
158 |     # 最后返回测试集预测的正确率，计算公式为：1.0*right_sum/len(Y)，注意：乘以 1.0 的原因是把正确率变成浮点型，当然也可以直接用 float 强制转换
159 |     return 1.0*right_sum/len(Y)                     
160 | 
161 | # python 主程序，当本文件被执行的时候，运行下列语句：
162 | if __name__ == "__main__":
163 | 
164 |     # load data and visualization，加载数据并可视化
165 |     data = loadData('data.xls')                     # 通过调用 loadData 函数，导入原始数据集 文件 'data.xls'，并赋值给 data
166 |     X = data[:,:2]# 将数据集 data 的 第一列 和 第二列 的所有行的数据，赋值给　Ｘ, 实际对应的是 身高（m）、　月薪（元）的原始数据  
167 |     y = data[:,2] # 将数据集 data 的 第三列 所有行的数据，赋值给　y，实际对应的是 是否有兴趣尝试交往（Y=1/N=0）的原始数据，可取 0 或 1  
168 |     
169 |     # plot data
170 |     plt_data = plt.figure(1)
171 |     p1, p2 = plotData(X, y)                    # 通过调用 plotData 函数，输入参数为 数据集 X 和 y， 绘制图像
172 | 
173 |     #Labels and Legend
174 |     plt.xlabel('tall')                              # 通过调用 plt.xlabel 函数，设置图像的横坐标名称为'tall'，意思是： 身高（m）
175 |     plt.ylabel('salary')                            # 通过调用 plt.ylabel 函数，设置图像的纵坐标名称为'salary'，意思是： 月薪（元）
176 |     # 通过调用 plt.legend 函数，设置图像的图例分别为 'I like you' 和 "I don't like you"
177 |     # 设置 为线条图图例条目创建的标记点数 numpoints 为 1，图例句柄的长度 handlelength 为0，即：只用散点图形表示图例，没有图例句柄的长度的横线
178 |     plt.legend((p1, p2), ('I like you', "I don't like you"), numpoints=1, handlelength=0)
179 | 
180 |     # show and save visualized image
181 |     plt_data.savefig('visualization_org.jpg')     # 通过调用 plt.savefig 函数，保存图像，并且图像的文件名为：'visualization_org.jpg'，其中，图片的格式为 'jpg'
182 |     plt.show(plt_data)                            # 通过调用 plt.show 函数，显示图像
183 |     plt.close(plt_data)                           # 通过调用 plt.close 来关闭窗口
184 | 
185 |     # normalization and visualization：通过调用 normalization 函数，对原始数据集 X 进行归一化
186 |     X_norm = normalization(X)
187 |     # plot data
188 |     plt_norm = plt.figure(1)
189 |     # 通过调用 plotData 函数，进行绘图，输入参数是 归一化后的 X_norm 和标签数据 y，返回的是 plt_norm, p1_norm 和 p2_norm
190 |     p1_norm, p2_norm = plotData(X_norm, y)
191 | 
192 |     # Labels and Legend
193 |     plt.xlabel('tall')                         # 通过调用 plt.xlabel 函数，设置图像的横坐标名称为'tall'，意思是： 身高（m）
194 |     plt.ylabel('salary')                       # 通过调用 plt.ylabel 函数，设置图像的纵坐标名称为'salary'，意思是： 月薪（元）
195 |     # 通过调用 plt.legend 函数，设置图像的图例分别为 'I like you' 和 "I don't like you"
196 |     # 设置 为线条图图例条目创建的标记点数 numpoints 为 1，图例句柄的长度 handlelength 为0，即：只用散点图形表示图例，没有图例句柄的长度的横线    
197 |     plt.legend((p1_norm, p2_norm), ('I like you', "I don't like you"), numpoints=1, handlelength=0)
198 |     
199 |     # show and save visualized image
200 |     # 通过调用 plt.show 函数，显示图像
201 |     plt.show(plt_data)#请补全 
202 |     # 通过调用 plt.savefig 函数，保存图像，并且图像的文件名为：'visualization_norm.jpg'，其中，图片的格式为 'jpg'
203 |     plt.savefig('visualization_norm.jpg')#请补全 
204 |     # 通过调用 plt.close 函数，关闭窗口
205 |     plt.close(plt_data)#请补全 
206 |     
207 |     # optimizing by BSD
208 |     iter_num=200                                    # 进行初始化迭代的次数 iter_num，赋值 200 给 iter_num
209 |     lr=0.05                                        # 进行初始化学习率 lr，赋值 0.001 给 lr
210 |     m,n = np.shape(data)       # 通过使用 np.shape 函数，得到数据集 data 的形状大小，其中，m 为数据集 data 的行数，n 为数据集 data 的列数
211 |     offset = np.ones((m, 1))    # 通过使用 np.ones 函数，创建元素全为 1 的矩阵，矩阵的大小为 m 行 1 列，并赋值给 offset, 即：进行 offset  的初始化，令其全为 1
212 |     trainMat = np.c_[offset, X_norm] # 通过使用 np.c_ 函数将 offset 和 归一化后的 X_norm 数据集组合起来，并赋值给 trainMat
213 |     theta=BGD(trainMat,y,iter_num,lr) # 通过调用 BGD 函数，即：批量梯度下降法（Batch Gradient Descent，BGD），返回最优化后的权重， 并赋值给 theta
214 | 
215 |     ## Plot Boundary    # 通过调用 plotDecisionBoundary 函数，绘制分类决策的直线，其中，输入参数分别是：训练集 trainMat, 标签 y, 最优化后的权重 theta 和 迭代次数 iter_num
216 |     plotDecisionBoundary(trainMat, y, theta, iter_num)
217 |     cost = loss(trainMat, y, theta)    # 通过调用 loss 函数，计算出本模型算法的损失函数，其中, 输入参数分别是： 训练集 trainMat, 标签 y 和 最优化后的权重 theta， 并赋值给 cost   
218 |     print('Cost theta: {0}'.format(cost))# 在屏幕上输出 损失函数的数值，其中，.format(cost) 的格式是更加规范的输出格式，当然也可以用转义字符 %s
219 | 
220 |     # Compute accuracy on our training set
221 |     p = Precision(trainMat, y, theta)   # 通过调用 Precision 函数，计算出预测 测试集结果的正确率，其中，输入参数分别是： 训练集 trainMat, 标签 y 和 最优化后的权重 theta， 并赋值给 p
222 |     print('Train Accuracy: {0}'.format(p))  # 在屏幕上输出 测试集正确率的数值，其中，.format(p) 的格式是更加规范的输出格式，当然也可以用转义字符 %s    
223 |     print('finished!')                              # 在屏幕上输出完成的信息，'finished!' 
224 |     
225 |     
226 | 


--------------------------------------------------------------------------------
/homework-02/袁曼雪-homework-02.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/heilongjianguniversity/AI-Team/6710f4383b5c33e1dc466112009fba1c05e8069b/homework-02/袁曼雪-homework-02.pdf


--------------------------------------------------------------------------------
/homework-02/说明.md:
--------------------------------------------------------------------------------
1 | #作业提交说明  
2 | 补充我给你们代码的剩余部分,并将代码跑通之后的图片和源码上传.图片命名:姓名拼音+序号,源码直接以姓名拼音命名.  
3 | 有任何问题在微信群里问.  
4 | 代码可以优化，请自行了解Pandas库.   
5 | 作业时间为一周内完成.    
6 | 


--------------------------------------------------------------------------------
/homework-03/MINST/t10k-images-idx3-ubyte.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/heilongjianguniversity/AI-Team/6710f4383b5c33e1dc466112009fba1c05e8069b/homework-03/MINST/t10k-images-idx3-ubyte.gz


--------------------------------------------------------------------------------
/homework-03/MINST/t10k-images.idx3-ubyte:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/heilongjianguniversity/AI-Team/6710f4383b5c33e1dc466112009fba1c05e8069b/homework-03/MINST/t10k-images.idx3-ubyte


--------------------------------------------------------------------------------
/homework-03/MINST/t10k-labels-idx1-ubyte.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/heilongjianguniversity/AI-Team/6710f4383b5c33e1dc466112009fba1c05e8069b/homework-03/MINST/t10k-labels-idx1-ubyte.gz


--------------------------------------------------------------------------------
/homework-03/MINST/t10k-labels.idx1-ubyte:
--------------------------------------------------------------------------------
1 |     ' 		 	 		   		  				 						   		  	  	 				 		 		  					      			  	 	   		 			 		   		 			 		 	 		 	   		 	  	   	 		  	 				    					  	    	  		 		  	 	   	 		 	   			 			  	  		 	 	 	 		 		 			 			 	 	 	 	  	   	   	 	 		   		   						 	 		 	    	 						 	 		 	  		 		 		  	   			 	    			   	 		 				 			 		  	  	  	 		       				  	  			   			 		 	 			   	 	 	 	 				   		   	  		   		 	 	  	 	  		  		 								   		 		    				   	  	 	 	 		 			  	 			   			 	 	  			  	 			  	   		  	    		 			     	 			  		    	  		  	    						 			  					 	 		   	 		 			   				    		   			    					    	 	    		      		        	     	 				 		   	     						  			 				 		 	 	   			  	      		  		   	 			   			   	 		    		 			  					  	 				 					   			  	 			 	 		 						 	  	 	    	   	  								  	  		 				  	 			 			 				 	  		 										 		  	 	   	 	 	 	   								 		  	     			 				   	 		 							 	  	   	  		 				 	 								  	  	 		  	 		  	  					  	 	  				   		   						  		  						   	 	 	 		 	 		   	 		  	  	 					 	  	      			 	 	 	   		  	  		   					 	 	 	 	   	  	    		   	 	  		 	 			 	  	 	 				    		 	    			 	 	   		 	 				 		  	    	  	 		 	 		 	 	   	 	 	 		 	    		   			 		 	 	 	  	  		     	 				 	 	 	 					 		        		 	     	 		  						    	 	 	     	 				 			    		 		    	  		 		 	 	 				 		 	 	 		      		  	 		  	 	 	    	 		  							    	 	 	  	     							  		  	 	 	  				 		 	 	 	   	 	 	    	 	 					 	   	 	 	 		 	  	 		   		 		  	    			 	 		 			  	 	 	   	 	 					 	   	 	 			 		  		  	 	   	  	 	 		  	 			 	  		  	  	 		 	  	 	  		   	 	 	 		   				  	  	 		   	 	 	 	 		   	 	 		    			   		 	 	 	 	 	 	 	  		 		 		     	 		 	 	  	 	 			 	 	 		    			  	 	 	 	    		 		    	  		 		 	 	 				    		 	    			 	 	 	 			 	       		 				 	 	 			 				    		   			   		 	 	 			    			  	 	  	 	 				       					 	  			  	  	   	 	 	 	  		 	 		 	      				 	 	 	   		 	 					 	 	 	    		  	  		   		 	 	 	 		    				   	  	 		 	   		 	  		 	 	     	 	 


--------------------------------------------------------------------------------
/homework-03/MINST/train-images-idx3-ubyte.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/heilongjianguniversity/AI-Team/6710f4383b5c33e1dc466112009fba1c05e8069b/homework-03/MINST/train-images-idx3-ubyte.gz


--------------------------------------------------------------------------------
/homework-03/MINST/train-labels-idx1-ubyte.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/heilongjianguniversity/AI-Team/6710f4383b5c33e1dc466112009fba1c05e8069b/homework-03/MINST/train-labels-idx1-ubyte.gz


--------------------------------------------------------------------------------
/homework-03/MINST/train-labels.idx1-ubyte:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/heilongjianguniversity/AI-Team/6710f4383b5c33e1dc466112009fba1c05e8069b/homework-03/MINST/train-labels.idx1-ubyte


--------------------------------------------------------------------------------
/homework-03/ex3-part1/tensorflow-MNIST-logistRes.py:
--------------------------------------------------------------------------------
  1 | 
  2 | #以下函数的使用方法，请参考：https://tensorflow.google.cn/api_docs/python/
  3 | 
  4 | from tensorflow.examples.tutorials.mnist import input_data   # 导入 tensorflow 中的 input_data 子模块，目的是为了后续的导入读取数据
  5 | import tensorflow as tf # 导入 tensorflow 库，并且重名为 tf, 便于后面的简写 tf 
  6 | import numpy as np  # 导入 numpy 库，并且重名为 np, 便于后面的简写 np
  7 | 
  8 | #基本参数设置
  9 | batchSize = 30   #batchsize的大小，代表每次训练载入的图像张数
 10 | lr = 0.005       #学习率的大小，若后面启用learning rate decay策略，则该值为学习率的初始值
 11 | iter = 1000000   #训练的迭代次数
 12 | saveInter = 100  #保存结果的频率，即每训练100次保存一次模型训练参数及模型性能
 13 | sample_size = 55000  #学习example的总大小，MNIST中官方写60000张，实际为55000（训练）+ 5000（校验），本例中只使用了55000 train
 14 | 
 15 | # 对模型输出的结果进行评判，>0.5为“正”，<0.5为“负”
 16 | def predict(X):   # 定义一个函数 predict， 作用是用来进行预测
 17 |     num = X.shape[0]  # 通过 shape 属性，得到 X 行的个数
 18 |     result = [] # 定义一个空的列表 result ，后面通过 append 的方式，向里面添加元素
 19 |     for i in range(num):  # for循环语句， i 从0，1，2, 到 num -1
 20 |         if X[i]>0.5: # 如果 X[i] 大于 0.5
 21 |             result.append(1.0) # 将 1.0 添加到列表 result 中
 22 |         else: # 否则，X[i] 小于或等于 0.5
 23 |             result.append(0.0)  # 将 0.0 添加到列表 result 中
 24 |     return result # 返回 result 的结果
 25 | 
 26 | # 加载数据集，建议提前到官网上下载MNIST数据集，并解压到./MNIST文件夹下
 27 | # MNIST下载地址：http://yann.lecun.com/exdb/mnist/
 28 | def loadData(): # 定义一个 loadData 函数
 29 |     file = "../MNIST" # 数据集 MINIST 
 30 |     mnist = input_data.read_data_sets(file, one_hot=True)  # input_data.read_data_sets 读取数据
 31 |     return mnist # 返回读取的数据 mnist
 32 | 
 33 | # 申请模型输入输出的占位符
 34 | def create_placeholder(n_x=784,n_y=0): # 定义一个 create_placeholder  函数
 35 |     X = tf.placeholder(tf.float32,shape=[None,n_x],name='X')   # 调用tf.placeholder函数，tensorflow 中定义 X
 36 |     Y = tf.placeholder(tf.float32, shape=[None,], name='Y')  # 调用tf.placeholder函数，tensorflow 中定义 Y
 37 |     return X,Y  #返回 X 和 Y　的数值
 38 | 
 39 | # 定义参数，W,b
 40 | def initialize_parameters(): # 定义一个 initialize_parameters 函数
 41 |     W = tf.Variable({0})  #调用tf.Variable函数，设置模型参数W，W的维度为[784,1]，且初始化为0
 42 |     b = tf.Variable({0})  #调用tf.Variable函数，设置模型参数b，b的维度为[1  ,1],且初始化为0
 43 |     parameters={'W': W,  # 参数权重 W
 44 |                 'b': b}  # 参数偏置 b
 45 |     return parameters  # 返回参数
 46 | 
 47 | # 将标签转换为one-hot形式，本例中未用到该函数，是因为tensorflow中封装了one-hot功能
 48 | def convert_one_hot(Y,C):  # 定义一个 convert_one_hot 函数
 49 |     one_hot=np.eye(C)[Y.reshape(-1)].T  # 初始化 one_hot 为对角矩阵
 50 |     return one_hot  # 返回 one_hot 
 51 | 
 52 | # 定义网络模型
 53 | def forward_propagation(X,parameters):  # 定义一个 forward_propagation 函数
 54 |     W = parameters['W']  # 参数权重 W 
 55 |     b = parameters['b']  # 参数偏置 b
 56 | 
 57 |     Z1={0}  #调用tensorflow函数，实现Z1=X*W+b
 58 |     A1={0}  #调用tf.nn.sigmoid，实现A1 = sigmoid(Z1)
 59 |     A1 = tf.clip_by_value({0})  #调用clip_by_value，将A1进行裁剪，使其在[0.001，1.0]之间，是为了避免出现接近于0的极小值，输入np.log()中出现nan的情况
 60 |     return A1 # 返回 A1
 61 | 
 62 | # 定义loss function
 63 | def compute_cost(y_,y,W):  # 定义一个 compute_cost 函数
 64 |     #以下的cross_entropy经过了简单变化，在(1.0-y_)*tf.log(1.0-y)之前乘以0.1，是因为正负样本比例基本上为1：9，严重偏向负样本
 65 |     #以下添加了正则，也可以尝试去掉
 66 |     cross_entropy = -(1.0/batchSize)*tf.reduce_sum({0}) #调用tf.reduce_sum函数，实现交叉熵函数
 67 |     return cross_entropy   # 返回 交叉熵函数 的数值 cross_entropy 
 68 | 
 69 | # 模型搭建、训练、存储
 70 | def model(mnist,Num): # 定义一个  model 函数
 71 |     x,y_ = create_placeholder(784, 0) # 调用 create_placeholder 函数，初始化  x,y_ 
 72 |     parameters = initialize_parameters() # 调用 initialize_parameters 函数， 初始化 参数
 73 |     A1 = forward_propagation(x, parameters)   # 调用 forward_propagation 函数，实现前向反馈
 74 | 
 75 |     #设置learning rate decay策略，随着迭代次数的增加，学习率成指数逐渐减小，减小公式为：decayed_learning_rate = learning_rate * decay_rate ^ (global_step / decay_steps)
 76 |     global_step = tf.Variable(0)  # 调用  tf.Variable 函数， 初始化 global_step 变量
 77 |     learning_rate = tf.train.exponential_decay(lr,global_step,decay_steps=sample_size/batchSize,decay_rate=0.98,staircase=True) # 设置指数衰减的 学习率，调用tf.train.exponential_decay。
 78 |     
 79 |     cost = compute_cost(y_, A1,parameters['W']) # 调用 compute_cost 函数，计算损失函数
 80 |     train_step = tf.train.GradientDescentOptimizer(learning_rate).minimize(cost,global_step=global_step) # 调用 tf.train.GradientDescentOptimizer 函数， 实现梯度下降的优化
 81 |     sess = {0}   #调用tf.InteractiveSession()函数，创建Session
 82 |     sess.run({0}) #执行tf.global_variables_initializer()，初始化参数
 83 |     
 84 |     #利用全部样本对模型进行测试
 85 |     testbatchX = mnist.train.images  # 导入 mnist 数据中的训练集 图片
 86 |     testbatchY = mnist.train.labels  # 导入 mnist 数据中的训练集 标签
 87 |      
 88 |     modelLast = []  # 定义一个空的列表 modelLast 
 89 |     logName = "./log"+str(Num)+".txt" # 新建文件名为  log"+str(Num)+".txt
 90 |     
 91 |     #保存模型，且设定保存最大迭代次数的4个
 92 |     saver = tf.train.Saver(max_to_keep=4)  # 调用 tf.train.Saver 函数，保存模型
 93 |     pf = open(logName, "w") # 以 写入的方式 打开文件  log"+str(Num)+".txt
 94 |     for i in range(iter): # for 循环结构， 遍历　iter
 95 |         #加载minibatch=50个训练样本
 96 |         batch = mnist.train.next_batch(batchSize) # 调用  mnist.train.next_batch 函数，复制给 batch
 97 |         batchX = batch[0] # 赋值给 batchX 为 batch 中第一个元素
 98 |         batchY = batch[1] # 赋值给 batchY为 batch 中第二个元素
 99 |         #执行训练
100 |         train_step.run(feed_dict={0})  #执行tensor流图，并为其添加输入x: batchX, y_: batchY[:,Num]
101 | 
102 |         #每隔saveInter次迭代，保存当前模型的状态，并测试模型精度
103 |         if i % saveInter == 0:  #条件判断语句 if， 如果 i 整除 iter
104 |             [total_cross_entropy,pred,Wsum,lrr] = sess.run([cost,A1,parameters['W'],learning_rate],feed_dict={x:batchX,y_:batchY[:,Num]}) # 调用 sess.run， 启动 tensoflow
105 |             pred1 = predict(pred)  # 调用 predict 函数，进行预测
106 |             
107 |             #保存当前模型的学习率lr、在minibatch上的测试精度
108 |             print('lr:{:f},train Set Accuracy: {:f}'.format(lrr,(np.mean(pred1 == batchY[:,Num]) * 100))) # 输出训练集的准确率等
109 |             pf.write('lr:{:f},train Set Accuracy: {:f}\n'.format(lrr,(np.mean(pred1 == batchY[:,Num]) * 100))) # 写入训练集的准确率
110 |  
111 |             #保存迭代次数、cross entropy
112 |             print("handwrite: %d, iterate times: %d , cross entropy:%g"%(Num,i,total_cross_entropy)) # 输出迭代次数，交叉熵损失函数等
113 |             pf.write("handwrite: %d, iterate times: %d , cross entropy:%g, W sum is: %g\n" %(Num,i,total_cross_entropy,np.sum(Wsum))) # 写入出迭代次数，交叉熵损失函数等
114 |             
115 |             #保存当前参数状态、测试testbatch上的精度
116 |             [testpred] = sess.run([A1],feed_dict={x: testbatchX, y_: testbatchY[:, Num]})  # 调用 sess.run， 启动 tensoflow
117 |             testpred1 = predict(testpred)   # 调用 predict 函数，进行预测
118 |             print('predict sum is: {:f},Testing Set Accuracy: {:f}\n'.format(np.sum(testpred1),(np.mean(testpred1 == testbatchY[:, Num]) * 100)))  # 输出测试集的准确率等
119 |             pf.write('predict sum is: {:f},Testing Set Accuracy: {:f}\n'.format(np.sum(testpred1),(np.mean(testpred1 == testbatchY[:,Num]) * 100))) # 写入测试集的准确率等
120 |             pf.write("\n") # 写入换行字符
121 |             
122 |             #保存当前模型
123 |             saveName = "model/my-model-" + str(Num) # 保存模型为 "model/my-model-" + str(Num)
124 |             saver.save(sess, saveName, global_step=i) # 调用  saver.save 函数，保存模型
125 |             pf.write("save model completed\n") # 写入 save model completed
126 |             
127 |             #若交叉熵出现nan（出现极值），此时停止训练，保存最新的一次模型名称
128 |             if total_cross_entropy != total_cross_entropy: # 条件判断语句 if ， 如果 total_cross_entropy 不等于 total_cross_entropy
129 |                 print("is nan, stop") # 输出 is nan, stop
130 |                 pf.write("is nan, stop\n") # 写入 is nan, stop
131 |                 modelLast = "model/my-model-" + str(Num)+str(i-saveInter) # 模型文件名为  "model/my-model-" + str(Num)+str(i-saveInter)
132 |                 break; # break 跳出循环
133 |     pf.close() # close 关闭打开的文件 
134 |     return modelLast # 返回 modelLast  
135 |     
136 | # 模型测试
137 | def test_model(): # 定义 test_model 函数
138 |     mnist = loadData() # 调用 loadData 函数， 导入数据 
139 |     classNum = 10 # 类别 初始化赋值为 10 ， 共有 10 类
140 |     modelNames = [] # 定义一个空的列表 modelNames
141 |     logName = "./logModelNames.txt" #  文件名为 logModelNames.txt
142 |     pf = open(logName, "w") # 以写入的方式打开  logModelNames.txt
143 |     
144 |     #循环训练每个类别与其他类别的二分类器，保存10个分类器模型
145 |     for i in range(classNum): # for 循环语句， 遍历所有 classNum的类别， 
146 |         modelNames.append(model(mnist,i)) # 通过 append 的方式， 向 modelNames 里面添加 model(mnist,i)
147 |         pf.write(modelNames[i]) # 写入 modelNames[i]
148 |         pf.write("\n") # 写入 换行字符
149 |     pf.close() # 关闭文件
150 | 
151 | if __name__ == '__main__': # 主程序
152 |     test_model() # 调用 test_model 函数 
153 |     
154 | 
155 | 


--------------------------------------------------------------------------------
/homework-03/ex3-part2/ann_classification_two_hidden_layers.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import matplotlib.pyplot as plt
  3 | import xlrd     # 导入必备的 xlrd 库，目的是为了调用 xlrd.open_workbook 函数打开 excel 文件，读取数据
  4 | 
  5 | class Config:
  6 |     input_dim = 2  # input layer dimensionality
  7 |     output_dim = 1  # output layer dimensionality
  8 |     # Gradient descent parameters (I picked these by hand)
  9 |     lr = 5  # learning rate for gradient descent
 10 |     reg_lambda = 0 #0.01  # regularization strength
 11 | 
 12 | # 定义函数loadData函数，输入参数是 filename 指代文件名，返回数据data，目的是从.xls文件中加载数据，并存储为numpy中的array格式
 13 | def loadData(filename):
 14 |     workbook = xlrd.open_workbook(filename)         # 通过调用 xlrd.open_workbook 函数打开 excel 文件，读取数据，并返回给 workbook 变量
 15 |     boyinfo = workbook.sheet_by_index(0)            # 通过使用属性 sheet_by_index 得到  excel 文件 中的工作簿，其中 sheet_by_index(0) 表示是第一个工作簿，在 python 中，下标从 0 开始
 16 |     col_num = boyinfo.ncols                         # 通过使用属性 ncols 得到 excel 文件 中第一个工作簿的 列数，并赋值给 col_num
 17 |     row_num = boyinfo.nrows                         # 通过使用属性 nrows 得到 excel 文件 中第一个工作簿的 行数，并赋值给 row_num
 18 |     col0 = boyinfo.col_values(0)[1:]                # 通过使用属性 col_values(0)[1:] 得到 excel 文件 中第一列数据中，从第2行到最后一行的所有数据，并赋值给 col0
 19 |     data = np.array(col0)                           # 通过使用 np.array 函数， 将 col0 转换成数组，并赋值给 data
 20 |     if col_num == 1:                                # 条件判断语句： 如果列数 col_num 为1， 只有一列，那么直接返回数据 data
 21 |         return data                                     # 返回data
 22 |     else:                                           # 否则，如果不止一列数据，需要遍历所有列的数据
 23 |         for i in range(col_num-1):                      # 通过使用for循环达到遍历的目的
 24 |             coltemp = boyinfo.col_values(i+1)[1:]           # 从第二行开始，表头不算，遍历从 第二列 开始到最后一列的数据
 25 |             data = np.c_[data, coltemp]                     # 通过使用 np.c_ 函数将 第一列的数据 和后面 所有列的数据组合起来，并赋值给 data
 26 |     return data                                     # 返回data
 27 | 
 28 | # 定义一个 plotData 函数，输入参数是 数据 X 和标志 flag: y，返回作图操作 plt, p1, p2 ， 目的是为了画图
 29 | def plotData(X, y):
 30 |     pos = np.where(y==1)                            # 通过使用 np.where 函数查找所有满足条件的数据，查找所有满足标志 y == 1 的数据，并赋值给 pos
 31 |     neg = np.where(y==0)                            # 通过使用 np.where 函数查找所有满足条件的数据，查找所有满足标志 y == 0 的数据，并赋值给 neg
 32 |     # 通过使用 plt.plot 函数作图，对所有满足标志 y == 1 的数据作图，点采用 s (正方形)，代表 square, 点的大小为 7 单位，颜色为 红色 red
 33 |     p1 = plt.plot(X[pos, 0], X[pos, 1], marker='s', markersize=3, color='red')[0]
 34 |     # 通过使用 plt.plot 函数作图，对所有满足标志 y == 1 的数据作图，点采用 o (圆形)，代表 circle, 点的大小为 7 单位，颜色为 绿色 green
 35 |     p2 = plt.plot(X[neg, 0], X[neg, 1], marker='o', markersize=3, color='green')[0]
 36 |     return plt,p1,p2                            # 返回作图操作plt, p1, p2
 37 | 
 38 | # normalization： 定义一个 normalization 函数，输入参数是原始数据 X ，返回归一化后的数据 X_norm ， 目的是为了数据预处理，得到归一化后的数据 X_norm
 39 | def normalization(X):
 40 |     mu = np.mean(X, axis=0)  # 对数据X的每列求均值，axis = 0 代表在矩阵第一个维度上求均值
 41 |     Xmin = np.min(X, axis=0)  # 对数据X的每列求最小值，axis = 0 代表在矩阵第一个维度上求最小值
 42 |     Xmax = np.max(X, axis=0)  # 对数据X的每列求最大值，axis = 0 代表在矩阵第一个维度上求最大值
 43 |     X_norm = (X-mu)/(Xmax-Xmin)  # 计算归一化后的数据，归一化公式为：(2*(X-Xmin)/(Xmax-Xmin))-1，归一化后数据范围为 [-1,1]
 44 |     return X_norm  # 返回数据预处理，归一化后的数据 X_norm
 45 | 
 46 | 
 47 | # visualize: 定义一个visualize函数，输入参数为特征矩阵X，标签数据y和分类模型model，函数的作用是：展示出模型的的分类边界
 48 | def visualize(X, y, model):
 49 |     plot_decision_boundary(lambda x:predict(model,x), X, y)  # 调用plot_decision_boundary函数
 50 |     plt.savefig("result.png")  # 将plot_decision_boundary函数中返回的图片保存，图片名字为result.png
 51 |     plt.show()  # 展示plot_decision_boundary中返回的图片
 52 | 
 53 | 
 54 | # plot_decision_boundary:定义决策边界函数，输入为预测函数 特征矩阵数据X 标签数据y
 55 | def plot_decision_boundary(pred_func, X, y):
 56 |     # Set min and max values and give it some padding
 57 |     x_min, x_max = X[:, 0].min() - .5, X[:, 0].max() + .5  # 将特征矩阵X中第一列中最小值与最大值分别加上0.5 赋予x_min,x_max
 58 |     y_min, y_max = X[:, 1].min() - .5, X[:, 1].max() + .5  # 将特征矩阵X中第二列中最小值与最大值分别加上0.5 赋予y_min,y_max
 59 |     h = 0.01  # 步长为0.01
 60 |     # Generate a grid of points with distance h between them
 61 |     # 由np.arrange生成一维数组作为np.meshgrid的参数，返回xx矩阵，yy矩阵
 62 |     xx, yy = np.meshgrid(np.arange(x_min, x_max, h), np.arange(y_min, y_max, h))
 63 |     # Predict the function value for the whole gid
 64 |     # .ravel()方法将xx,yy矩阵压缩为一维向量；np.c_：是按行连接两个矩阵，就是把两矩阵左右相加，要求行数相等
 65 |     # 合成的矩阵作为pred_func的输入，返回预测值
 66 |     Z = pred_func(np.c_[xx.ravel(), yy.ravel()])
 67 |     Z = (Z.T).reshape(xx.shape)  # Z矩阵转置并将维度调整和xx的维度一致
 68 |     p=plt.figure()  # 生成一个“画布”
 69 |     _,p1,p2=plotData(X,y)  # 将特征矩阵X与标签数据y传入plotData函数，返回图操作p1,p2,其中‘_’用来接没有用到的返回值
 70 |     p3=plt.contour(xx, yy, Z, levels=0,linewidths=2).collections[0]  # 画登高线，即决策边界
 71 |     # label & Legend, specific for the exercise
 72 |     plt.xlabel("tall")  # 横坐标的标签为tall
 73 |     plt.ylabel("salary")  # 纵坐标的标签为salary
 74 |     plt.legend((p1, p2, p3), ('y = I like you', "y = I don't like you", 'Decision Boundary'), numpoints=1,handlelength=0)  # 为每一个绘图添加图例
 75 |     plt.title("ANN")  # 设置图标题
 76 | 
 77 | 
 78 | # 定义sigmoid激活函数，将输入数据压缩在0-1之间
 79 | def sigmoid(z):
 80 |     g = 1 / (1 + np.exp(-z))  # 根据sigmoid 函数公式写出
 81 |     return g  # 返回函数输出值
 82 | 
 83 | # 定义sigmoidGradient函数，计算sigmoid函数的梯度值
 84 | def sigmoidGradient(z):
 85 |     g = 1.0 / (1.0 + np.exp(-z))  # 根据sigmoid 函数公式写出
 86 |     g = g * (1 - g)  # 根据sigmoid 函数公式写出
 87 |     return g  # 返回梯度值
 88 | 
 89 | # Helper function to evaluate the total loss on the dataset
 90 | # 定义损失函数，计算所有样本的损失值
 91 | def calculate_loss(model, X, y):
 92 |     num_examples = X.shape[1]  # training set size  # X的第二个维度为训练集样本个数
 93 |     W1, W2, W3,= model['W1'], model['W2'] , model['W3']  # 神经网络为两层隐藏层，对应的参数矩阵分别为W1 W2 W3
 94 |     # Forward propagation to calculate our predictions 需要补全
 95 |     a1={0}  # 将特征矩阵X赋值给a1
 96 |     z2 = {0}  # 参数矩阵W1与a1做矩阵乘法，得到z2矩阵
 97 |     a2 = {0}  # 对z2矩阵进行sigmoid激活函数处理得到激活后的矩阵a2，即第一层隐藏层数值
 98 |     a2 = {0}  # 为矩阵a2增加一列值为1的偏置
 99 |     z3 = {0}  # 参数矩阵W2与a2做矩阵乘法，得到z3矩阵
100 |     a3 = {0}  # 对z3矩阵进行sigmoid激活函数处理得到激活后的矩阵a3，即第二层隐藏层数值
101 |     a3 = {0}  # 为矩阵a3增加一列值为1的偏置
102 |     z4 = {0}  # 参数矩阵W3与a3做矩阵乘法，得到z4矩阵
103 |     a4 = {0}  # 对z4矩阵进行sigmoid激活函数处理得到激活后的矩阵a4，即输出值
104 | 
105 |     # Calculating the loss
106 |     one = np.multiply(y, np.log(a4))  # 将真实标签y与预测值a4的对数值对应相乘
107 |     two = np.multiply((1 - y), np.log(1-a4))  # 将真实标签（1-y)与预测值（1-a4）对数值对应相乘
108 |     data_loss = -(1. / num_examples) * (one + two).sum()  # 损失函数的和，对应交叉熵公式
109 |     return data_loss  # 返回损失值
110 | 
111 | 
112 | # 定义compare函数，将预测值大于0.5的归为正例，小于0.5的归为负例
113 | def compare(X):
114 |     num = X.shape[1]  # X的第二个维度为训练集样本个数，注意X为函数的形参，真正数据调用时传入的实参
115 |     result = []  # 声明一个存放结果的列表
116 |     for i in range(num):  # 遍历所有结果
117 |         if X[:,i]>0.5:  # 判断预测结果是否大于0.5
118 |             result.append(1.0)  # 如果大于0.5，则在result列表中增加一个1.0
119 |         else:
120 |             result.append(0.0)  # 否则在result列表中增加一个0.0
121 |     return result  # 返回result列表，里面是预测为正例与反例的结果
122 | 
123 | 
124 | # 定义predict预测函数，输入为训练好的模型和特征矩阵X，返回预测值
125 | def predict(model, X):
126 |     m = X.shape[0]  # 将输入矩阵的第一个维度赋值给m
127 |     W1, W2, W3= model['W1'], model['W2'] , model['W3']  #  将模型训练好的参数分别赋值给W1 W2 W3
128 |     # Forward propagation 需要补全
129 |     X_m = np.transpose(np.column_stack((np.ones((m, 1)), X)))  # 为输入矩阵增加一列值为1的偏置
130 |     a1={0}  # 将矩阵X_m赋予a1
131 |     z2 = {0}  #  参数W1与a1做矩阵乘法
132 |     a2 = {0}  # 对矩阵z2进行做sigmoid激活
133 |     a2 = {0}  # 为第一层隐藏层的矩阵a2增加一列值为1的偏置
134 |     z3 = {0}  # 参数W2与a2做矩阵乘法
135 |     a3 = {0}  # 对矩阵z3做sigmoid激活
136 |     a3 = {0}  # 为第二层隐藏层的矩阵a3增加一列值为1的偏置
137 |     z4 = {0}  # 参数W3与a3做矩阵乘法
138 |     a4 = {0}  # 对矩阵z4做sigmoid激活
139 |     return a4  # 返回输出矩阵
140 | 
141 | 
142 | # 定义precision函数：输入为训练模型，与特征矩阵，目的是返回样本预测结果，正例为1，反例为0
143 | def precision(model, x):
144 |     W1, W2, W3= model['W1'], model['W2'],  model['W3']  # 将模型更新后的参数赋值给W1 W2 W3
145 |     # Forward propagation 需要补全
146 |     a1={0}  # 将矩阵x赋予a1
147 |     z2 = {0}  #  参数W1与a1做矩阵乘法
148 |     a2 = {0}  # 对矩阵z2进行做sigmoid激活
149 |     a2 = {0}  # 为第一层隐藏层的矩阵a2增加一列值为1的偏置
150 |     z3 = {0}  # 参数W2与a2做矩阵乘法
151 |     a3 = {0}  # 对矩阵z3做sigmoid激活
152 |     a3 = {0}  # 为第二层隐藏层的矩阵a3增加一列值为1的偏置
153 |     z4 = {0}  # 参数W3与a3做矩阵乘法
154 |     a4 = {0}  # 对矩阵z4做sigmoid激活
155 |     result = compare(a4)  # 调用compare函数，返回预测结果
156 |     return result  #  返回预测结果
157 | 
158 | 
159 | # 定义randInitializeWeights，参数为输入维度和输出维度，作用是随机初始化参数矩阵
160 | def randInitializeWeights(L_in, L_out):
161 |     W = np.zeros((L_out, 1 + L_in))  #生成一个维度为(L_out, 1 + L_in)的全0矩阵
162 |     # Randomly initialize the weights to small values
163 |     epsilon_init = 0.12  # 初始化一个很小的数
164 |     W = np.random.rand(L_out, 1 + L_in)*(2*epsilon_init) - epsilon_init  #  随机生成维度为(L_out, 1 + L_in)的参数矩阵
165 |     return W  # 返回参数矩阵
166 | 
167 | # This function learns parameters for the neural network and returns the model.
168 | # - hidden1_dim: Number of nodes in the hidden layer 1
169 | # - hidden2_dim: Number of nodes in the hidden layer 2
170 | # - iterNum: Number of passes through the training data for gradient descent
171 | # - print_loss: If True, print the loss every 1000 iterations
172 | # 定义build_model函数，输入为特征矩阵X，标签向量y，第一层隐藏层神经元个数，第二层隐藏层神经元个数，迭代次数，是否打印损失函数的布尔变量
173 | # 作用是完成神经网络的前向和反向传播，训练参数W1 W2 W3
174 | def build_model(X, y, hidden1_dim,hidden2_dim, iterNum=2000, print_loss=False):
175 |     # Initialize the parameters to random values. We need to learn these.
176 |     m = X.shape[0]  #将输入矩阵X的第一个维度赋予m
177 |     
178 |     W1 = randInitializeWeights(Config.input_dim, hidden1_dim)  # 调用randInitializeWeights函数，初始化W1
179 |     W2 = randInitializeWeights(hidden1_dim, hidden2_dim)  # 调用randInitializeWeights函数，初始化W2
180 |     W3 = randInitializeWeights(hidden2_dim, Config.output_dim)  # 调用randInitializeWeights函数，初始化W3
181 | 
182 |     # This is what we return at the end
183 |     model = {}  # 将model声明为字典数据格式
184 |     # Gradient descent.
185 |     logName = "logText.txt"  # 日志文件名称
186 |     logFile = open(logName, "w")  # 调用open函数,打开文件，模式为写
187 |     for t in range(0, iterNum):  # 从0循环至iterNum
188 |         # Forward propagation 需要补全
189 |         X_m = np.transpose(np.column_stack((np.ones((m, 1)), X)))  # 为输入矩阵X增加一列偏置为1的值
190 |         a1={0}  # 将X_m赋给a1
191 |         z2 = {0}  #  参数W1与a1做矩阵乘法
192 |         a2 = {0}  # 对矩阵z2进行做sigmoid激活
193 |         a2 = {0}  # 为第一层隐藏层的矩阵a2增加一列值为1的偏置
194 |         z3 = {0}  # 参数W2与a2做矩阵乘法
195 |         a3 = {0}  # 对矩阵z3做sigmoid激活
196 |         a3 = {0}  # 为第二层隐藏层的矩阵a3增加一列值为1的偏置
197 |         z4 = {0}  # 参数W3与a3做矩阵乘法
198 |         a4 = {0}  # 对矩阵z4做sigmoid激活
199 | 
200 |         # Back propagation
201 |         y_m = np.transpose(np.reshape(y, [-1, 1])) #reshape y_m from (n,)to (1,n)
202 |         delta4 ={0}  # 计算delta4，将预测标签向量a4与y_m做差
203 |         delta3 = {0} # 计算delta3，参数矩阵W3转置后与delta4做矩阵乘法，然后与sigmoidGradient(z3)对应位相乘
204 |         delta2 = {0}  # 计算delta2，参数矩阵W2转置后与delta3做矩阵乘法，然后与sigmoidGradient(z2)对应位相乘
205 | 
206 |         # layer 4
207 |         bigDelta3 = np.zeros(W3.shape)  # 初始化一个与W3维度一致的全零矩阵bigDelta3
208 |         DW3 = np.zeros(W3.shape)  # 初始化一个与W3维度一致的全零矩阵bigDelta3
209 |         for i in range(W3.shape[0]):  # 根据W3第一个维度大小遍历
210 |             for j in range ((W3.shape[1])):  # 根据W3的第二个维度大小进行遍历
211 |                 for n in range(0, m):  # 第n样本
212 |                     bigDelta3[i,j] += a3[j,n]*delta4[i,n]   # 将a3[j,n]与delta4[i,n]对应为相乘，然后全部加和求出bigDelta3[i,j]
213 |                 DW3[i,j]= (1./m) * bigDelta3[i,j]  #对bigDelta3[i,j]乘样本个数的倒数得出DW3[i,j]
214 |                 W3[i,j] += -Config.lr * DW3[i,j]   # 学习率-lr乘DW3[i,j]并加和得出W3[i,j]
215 | 
216 |         # layer 3
217 |         bigDelta2 = np.zeros(W2.shape)  # 初始化一个与W2维度一致的全零矩阵bigDelta2
218 |         DW2 = np.zeros(W2.shape)  # 初始化一个与W2维度一致的全零矩阵bigDelta2
219 |         for i in range(W2.shape[0]):  # 根据W2第一个维度大小遍历
220 |             for j in range((W2.shape[1])):   # 根据W2的第二个维度大小进行遍历
221 |                 for n in range(0, m):   # 第n样本
222 |                     bigDelta2[i, j] += a2[j, n] * delta3[i, n]  # 将a2[j,n]与delta3[i,n]对应为相乘，然后全部加和求出bigDelta2[i,j]
223 |                 DW2[i,j] = (1. / m) * bigDelta2[i, j]  #对bigDelta2[i,j]乘样本个数的倒数得出DW2[i,j]
224 |                 W2[i, j] += -Config.lr * DW2[i,j]  # 学习率-lr乘DW2[i,j]并加和得出W2[i,j]
225 | 
226 |         # layer 2
227 |         bigDelta1 = np.zeros(W1.shape)  # 初始化一个与W1维度一致的全零矩阵bigDelta1
228 |         DW1 = np.zeros(W1.shape)  # 初始化一个与W1维度一致的全零矩阵bigDelta1
229 |         for i in range(W1.shape[0]):  # 根据W1第一个维度大小遍历
230 |             for j in range((W1.shape[1])):  # 根据W1第二个维度大小遍历
231 |                 for n in range(0, m):  # 第n样本
232 |                     bigDelta1[i, j] += a1[j, n] * delta2[i, n]  # 将a1[j,n]与delta2[i,n]对应为相乘，然后全部加和求出bigDelta1[i,j]
233 |                 DW1[i,j] = (1. / m) * bigDelta1[i, j]  #对bigDelta1[i,j]乘样本个数的倒数得出DW1[i,j]
234 |                 W1[i, j] += -Config.lr * DW1[i,j]  # 学习率-lr乘DW2[i,j]并加和得出W2[i,j]
235 | 
236 |         # 向量运算
237 |         # DW3 = (1./m) * np.dot(delta4,a3.T)
238 |         # DW2 = (1./m) * np.dot(delta3,a2.T)
239 |         # DW1 = (1./m) * np.dot(delta2,a1.T)
240 | 
241 |         # Gradient descent parameter update
242 |         # W1 += -Config.lr * DW1
243 |         # W2 += -Config.lr * DW2
244 |         # W3 += -Config.lr * DW3
245 | 
246 | 
247 |         # Assign new parameters to the model
248 |         model = {'W1': W1, 'W2': W2, 'W3': W3}  #模型的键值对分别对应更新后的参数W1 W2 W3
249 | 
250 |         # Optionally print the loss.
251 |         # This is expensive because it uses the whole dataset, so we don't want to do it too often.
252 |         if print_loss and t % 1000 == 0:  #如果print_loss 与 t是1000的整数倍同时为True，运行下面代码
253 |             print("Loss after iteration %i: %f" % (t, calculate_loss(model, X_m, y_m)))   #格式化打印语句，输出迭代t次后，损失值是多少
254 |             logFile.write("Loss after iteration %i: %f" % (t, calculate_loss(model, X_m, y_m)))  # 将输出语句写入日志文件
255 |             logFile.write("\n")
256 |             result = precision(model, X_m)  #调用precision函数，返回预测结果
257 |             print("Traning Set Accuracy: {:f}".format((np.mean(result == y) * 100)))  #计算准确率
258 |             logFile.write("Traning Set Accuracy: {:f}".format((np.mean(result == y) * 100)))  # 将输出语句写入日志文件
259 |             logFile.write("\n")  # 换行
260 |     logFile.close()  # 关闭文件
261 | 
262 |     return model  # 返回模型，实际是返回模型更新后的参数
263 | 
264 | def main():
265 |     # load data 加载数据
266 |     data = loadData('data.xls')  # 通过调用 loadData 函数，导入原始数据集 文件 'data.xls'，并赋值给 data
267 |     X = data[:, :2]  # 将数据集 data 的 第一列 和 第二列 的所有行的数据，赋值给　Ｘ, 实际对应的是 身高（m）、　月薪（元）的原始数据
268 |     y = data[:, 2]  # 将数据集 data 的 第三列 所有行的数据，赋值给　y，实际对应的是 是否有兴趣尝试交往（Y=1/N=0）的原始数据，可取 0 或 1
269 |     # normalization 通过调用 normalization 函数，对原始数据集 X 进行归一化
270 |     X_norm = normalization(X)
271 |     # 训练模型
272 |     model = build_model(X_norm, y, 5, 3, iterNum=20000, print_loss=True)
273 |     # 可视化
274 |     visualize(X_norm, y, model)
275 | 
276 | if __name__ == "__main__":
277 |     main()
278 | 


--------------------------------------------------------------------------------
/homework-03/ex3-part2/data.xls:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/heilongjianguniversity/AI-Team/6710f4383b5c33e1dc466112009fba1c05e8069b/homework-03/ex3-part2/data.xls


--------------------------------------------------------------------------------
/homework-03/ex3-part3/tensorflow-MNIST-nn.py:
--------------------------------------------------------------------------------
 1 | from tensorflow.examples.tutorials.mnist import input_data
 2 | import tensorflow as tf
 3 | 
 4 | # MNIST数据存放的路径
 5 | file = "../MNIST"
 6 | 
 7 | # 导入数据，首先检测file路径下是否存在数据集，若不存在，则到网上下载.
 8 | # MNIST下载地址：http://yann.lecun.com/exdb/mnist/
 9 | # 注意：下载后需要解压
10 | mnist = input_data.read_data_sets(file, one_hot=True)#读取数据集，标签数据设置为one-hot格式。即n维标签中只有一个数据为1，其余为0
11 | 
12 | # 模型的输入和输出
13 | # 为模型的输入输出申请占位符，作为外部数据与网络模型的交互接口
14 | # 784=28*28
15 | x  = tf.placeholder(tf.float32, shape={0})  #申请占位符 输入图像 N*784的矩阵 [None, 784]
16 | y_ = tf.placeholder(tf.float32, shape={0})  #申请占位符 输入label N*10的矩阵[None, 10]
17 | 
18 | # 将tensor图中的输入和变量进行计算  通过tf.layers.dense搭建全连接网络层，并为该层设置对应的输入、神经元个数、激活函数
19 | # 通过units设置神经元的个数，通过activation设置激活函数，可设定的激活函数，请参考https://tensorflow.google.cn/api_docs/python/tf/nn/softmax
20 | 
21 | A1 = tf.layers.dense(inputs=x, units={0},activation=tf.nn.{0})  #{0}为待补充, 添加全连接层，神经元个数为16个，激活函数为sigmoid、tanh或relu
22 | A2 = tf.layers.dense(inputs=A1,units={0},activation=tf.nn.{0})  #{0}为待补充，添加全连接层，神经元个数为16个，激活函数为sigmoid、tanh或relu
23 | y  = tf.layers.dense(inputs=A2,units=10, activation=tf.nn.{0})  #{0}为待补充，添加全连接层，设置激活函数为sigmoid或softmax，由于输出类别是10，所以输出层神经元个数为10
24 | 
25 | # 交叉熵 用来度量y_与y之间的差异性
26 | # y_表示样本的标签 one-hot形式 ; y表示tensor流图计算出的值，即预测值
27 | cross_entropy = -tf.reduce_sum(y_*tf.log(y))#对损失求和
28 | 
29 | # 训练 利用梯度下降法，以0.01的学习率最小化目标函数（cross_entropy）
30 | train_step = tf.train.GradientDescentOptimizer({0}).minimize({0}) #设置随机梯度下降的学习率为0.01，最小化目标函数为cross_entropy
31 | 
32 | # 创建Session，用于启动tensor图
33 | sess = tf.InteractiveSession()
34 | 
35 | # 调用global_variables_initializer函数，将前面定义的Variable变量按照设置的初始化方式，进行初始化
36 | sess.run({0})  #执行tf.global_variables_initializer()，初始化模型参数
37 | 
38 | #循环训练，设置迭代次数为10000
39 | for i in range({0}):
40 |     #选取mnist训练数据集，设置minibatchsize为50，即选取样本集中的50个样本
41 |     batch = mnist.train.next_batch({0})
42 |     #启动tensor流图，并执行训练，输入数据为图像（batch[0]）和对应的标签（batch[1]）
43 |     train_step.run(feed_dict={x: batch[0], y_: batch[1]})
44 | 
45 | ################################### 测试  ###################################
46 | # 计算模型预测结果与标签中相等的部分
47 | # 调用tf.equal计算模型预测结果y与标签结果y_的差异，预测正确则返回1，预测错误则返回0；
48 | # tf.argmax(y, 1)为计算y中每行数据最大值的索引;
49 | correct_prediction = tf.equal(tf.argmax(y, 1), {0})
50 | 
51 | # 根据correct_prediction计算模型预测精度
52 | accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
53 | 
54 | # 启动tensor流图，计算模型预测精度，模型输入数据为train/test的图像和对应标签
55 | print(sess.run(accuracy, feed_dict={x: mnist.train.images, y_:mnist.train.labels}))#计算模型在训练集上的准确率
56 | print(sess.run(accuracy, feed_dict={x: mnist.test.images,  y_:mnist.test.labels}))#计算模型在测试集上的准确率
57 | 
58 | # 结果输出
59 | logFileName = "logText.txt"
60 | logFile = open(logFileName, "w")
61 | logFile.write(str(sess.run(accuracy, feed_dict={x: mnist.train.images, y_:mnist.train.labels})))
62 | logFile.write("\n")
63 | logFile.write(str(sess.run(accuracy, feed_dict={x: mnist.test.images,  y_:mnist.test.labels})))
64 | logFile.close()
65 | 


--------------------------------------------------------------------------------
/homework-03/作业说明.txt:
--------------------------------------------------------------------------------
1 | 作业说明：
2 | 本次作业分为三个部分，以下是一些注意事项：
3 | 1. 作业形式依旧是程序填空的形式，需要填写的内容在文中用 {0} 代替，其余部分不需要修改，如果出现错误，在微信群里说明。
4 | 2. 作业工作过程在本文件夹中进行，作业提交时，将MINST挪出文件夹，其余部分压缩为一个压缩包，提交压缩包。
5 | 


--------------------------------------------------------------------------------
/homework-03/马士尧 homework 03.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/heilongjianguniversity/AI-Team/6710f4383b5c33e1dc466112009fba1c05e8069b/homework-03/马士尧 homework 03.pdf


--------------------------------------------------------------------------------
/homework-04/requirement.md:
--------------------------------------------------------------------------------
1 | # 姓名.ipynb命名上传代码
2 | 
3 | # 上传时间下周五前
4 | 


--------------------------------------------------------------------------------
/homework-04/余欣灿.ipynb:
--------------------------------------------------------------------------------
 1 | import numpy as np 
 2 | import pandas as pd 
 3 | dataset = pd.read_csv('Data.csv') 
 4 | X = dataset.iloc[ : , :-1].values 
 5 | Y = dataset.iloc[ : , 3].values 
 6 | from sklearn.preprocessing import Imputer 
 7 | imputer = Imputer(missing_values = "NaN", strategy = "mean", axis = 0) 
 8 | imputer = imputer.fit(X[ : , 1:3]) 
 9 | X[ : , 1:3] = imputer.transform(X[ : , 1:3]) 
10 | from sklearn.preprocessing import LabelEncoder, OneHotEncoder 
11 | labelencoder_X = LabelEncoder() 
12 | X[ : , 0] = labelencoder_X.fit_transform(X[ : , 0]) 
13 | onehotencoder = OneHotEncoder(categorical_features = [0]) 
14 | X = onehotencoder.fit_transform(X).toarray() 
15 | labelencoder_Y = LabelEncoder() 
16 | Y =  labelencoder_Y.fit_transform(Y) 
17 | from sklearn.cross_validation import train_test_split 
18 | X_train, X_test, Y_train, Y_test = train_test_split( X , Y , test_size = 0.2, random_state = 0) 
19 | from sklearn.preprocessing import StandardScaler 
20 | sc_X = StandardScaler() 
21 | X_train = sc_X.fit_transform(X_train) 
22 | X_test = sc_X.fit_transform(X_test) 
23 | 


--------------------------------------------------------------------------------
/homework-04/刘广升.ipynb:
--------------------------------------------------------------------------------
 1 | #Day 1: Data Prepocessing
 2 | 
 3 | #Step 1: Importing the libraries
 4 | import numpy as np
 5 | import pandas as pd
 6 | 
 7 | #Step 2: Importing dataset
 8 | dataset = pd.read_csv('../datasets/Data.csv')
 9 | X = dataset.iloc[ : , :-1].values
10 | Y = dataset.iloc[ : , 3].values
11 | print("Step 2: Importing dataset")
12 | print("X")
13 | print(X)
14 | print("Y")
15 | print(Y)
16 | 
17 | #Step 3: Handling the missing data
18 | from sklearn.preprocessing import Imputer
19 | imputer = Imputer(missing_values = "NaN", strategy = "mean", axis = 0)
20 | imputer = imputer.fit(X[ : , 1:3])
21 | X[ : , 1:3] = imputer.transform(X[ : , 1:3])
22 | print("---------------------")
23 | print("Step 3: Handling the missing data")
24 | print("step2")
25 | print("X")
26 | print(X)
27 | 
28 | #Step 4: Encoding categorical data
29 | from sklearn.preprocessing import LabelEncoder, OneHotEncoder
30 | labelencoder_X = LabelEncoder()
31 | X[ : , 0] = labelencoder_X.fit_transform(X[ : , 0])
32 | #Creating a dummy variable
33 | onehotencoder = OneHotEncoder(categorical_features = [0])
34 | X = onehotencoder.fit_transform(X).toarray()
35 | labelencoder_Y = LabelEncoder()
36 | Y =  labelencoder_Y.fit_transform(Y)
37 | print("---------------------")
38 | print("Step 4: Encoding categorical data")
39 | print("X")
40 | print(X)
41 | print("Y")
42 | print(Y)
43 | 
44 | #Step 5: Splitting the datasets into training sets and Test sets
45 | from sklearn.model_selection import train_test_split
46 | X_train, X_test, Y_train, Y_test = train_test_split( X , Y , test_size = 0.2, random_state = 0)
47 | print("---------------------")
48 | print("Step 5: Splitting the datasets into training sets and Test sets")
49 | print("X_train")
50 | print(X_train)
51 | print("X_test")
52 | print(X_test)
53 | print("Y_train")
54 | print(Y_train)
55 | print("Y_test")
56 | print(Y_test)
57 | 
58 | #Step 6: Feature Scaling
59 | from sklearn.preprocessing import StandardScaler
60 | sc_X = StandardScaler()
61 | X_train = sc_X.fit_transform(X_train)
62 | X_test = sc_X.transform(X_test)
63 | print("---------------------")
64 | print("Step 6: Feature Scaling")
65 | print("X_train")
66 | print(X_train)
67 | print("X_test")
68 | print(X_test)
69 | 


--------------------------------------------------------------------------------
/homework-04/史一阳.ipynb:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import pandas as nd
 3 | dataset = pd.read_csv(' Data.csv ') //读取csv文件
 4 | X = dataset.iloc[ : , :-1].values//.iloc[行，列]
 5 | Y = dataset.iloc [: , 3 ] .values  //: 全部行 or 列；[a]第a行 or 列
 6 |                                  // [a,b,c]第 a,b,c 行 or 列
 7 | from sklearn.preprocessing import Imputer
 8 | imputer = Imputer(missing_values = "NaN", strategy = "mean", axis = 0)
 9 | imputer = imputer.fit(X[ : , 1:3])
10 | X[ : , 1:3] = imputer.transform(X[ : , 1:3])
11 | from sklearn.preprocessing import LabelEncoder, OneHotEncoder
12 | labelencoder_X = LabelEncoder()
13 | X[ : , 0] = labelencoder_X.fit_transform(X[ : , 0])
14 | onehotencoder = OneHotEncoder(categorical_features = [0])
15 | X = onehotencoder.fit_transform(X).toarray()
16 | labelencoder_Y = LabelEncoder()
17 | Y =  labelencoder_Y.fit_transform(Y)
18 | from sklearn.model_selection import train_test_split
19 | X_train, X_test, Y_train, Y_test = train_test_split( X , Y , test_size = 0.2, random_state = 0)
20 | from sklearn.preprocessing import StandardScaler
21 | sc_X = StandardScaler()
22 | X_train = sc_X.fit_transform(X_train)
23 | X_test = sc_X.transform(X_test)
24 | 
25 | 
26 | 
27 | 


--------------------------------------------------------------------------------
/homework-04/吴洁茹.ipynb:
--------------------------------------------------------------------------------
 1 | #Day 1: Data Prepocessing
 2 | 
 3 | #Step 1: Importing the libraries
 4 | import numpy as np
 5 | import pandas as pd
 6 | 
 7 | #Step 2: Importing dataset
 8 | dataset = pd.read_csv('../datasets/Data.csv')
 9 | X = dataset.iloc[ : , :-1].values
10 | Y = dataset.iloc[ : , 3].values
11 | print("Step 2: Importing dataset")
12 | print("X")
13 | print(X)
14 | print("Y")
15 | print(Y)
16 | 
17 | #Step 3: Handling the missing data
18 | from sklearn.preprocessing import Imputer
19 | imputer = Imputer(missing_values = "NaN", strategy = "mean", axis = 0)
20 | imputer = imputer.fit(X[ : , 1:3])
21 | X[ : , 1:3] = imputer.transform(X[ : , 1:3])
22 | print("---------------------")
23 | print("Step 3: Handling the missing data")
24 | print("step2")
25 | print("X")
26 | print(X)
27 | 
28 | #Step 4: Encoding categorical data
29 | from sklearn.preprocessing import LabelEncoder, OneHotEncoder
30 | labelencoder_X = LabelEncoder()
31 | X[ : , 0] = labelencoder_X.fit_transform(X[ : , 0])
32 | #Creating a dummy variable
33 | onehotencoder = OneHotEncoder(categorical_features = [0])
34 | X = onehotencoder.fit_transform(X).toarray()
35 | labelencoder_Y = LabelEncoder()
36 | Y =  labelencoder_Y.fit_transform(Y)
37 | print("---------------------")
38 | print("Step 4: Encoding categorical data")
39 | print("X")
40 | print(X)
41 | print("Y")
42 | print(Y)
43 | 
44 | #Step 5: Splitting the datasets into training sets and Test sets
45 | from sklearn.model_selection import train_test_split
46 | X_train, X_test, Y_train, Y_test = train_test_split( X , Y , test_size = 0.2, random_state = 0)
47 | print("---------------------")
48 | print("Step 5: Splitting the datasets into training sets and Test sets")
49 | print("X_train")
50 | print(X_train)
51 | print("X_test")
52 | print(X_test)
53 | print("Y_train")
54 | print(Y_train)
55 | print("Y_test")
56 | print(Y_test)
57 | 
58 | #Step 6: Feature Scaling
59 | from sklearn.preprocessing import StandardScaler
60 | sc_X = StandardScaler()
61 | X_train = sc_X.fit_transform(X_train)
62 | X_test = sc_X.transform(X_test)
63 | print("---------------------")
64 | print("Step 6: Feature Scaling")
65 | print("X_train")
66 | print(X_train)
67 | print("X_test")
68 | print(X_test)
69 | 


--------------------------------------------------------------------------------
/homework-04/吴玉隆.ipynb:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """
 3 | Created on Sun Mar 24 22:30:52 2019
 4 | 
 5 | @author: 15188
 6 | """
 7 | import numpy as np
 8 | import pandas as pd
 9 | dataset = pd.read_csv('Data.csv')//读取csv文件
10 | X = dataset.iloc[ : , :-1].values//.iloc[行，列]
11 | Y = dataset.iloc[ : , 3].values  // : 全部行 or 列；[a]第a行 or 列
12 |                                  // [a,b,c]第 a,b,c 行 or 列
13 | from sklearn.preprocessing import Imputer
14 | imputer = Imputer(missing_values = "NaN", strategy = "mean", axis = 0)
15 | imputer = imputer.fit(X[ : , 1:3])
16 | X[ : , 1:3] = imputer.transform(X[ : , 1:3])
17 | from sklearn.preprocessing import LabelEncoder, OneHotEncoder
18 | labelencoder_X = LabelEncoder()
19 | X[ : , 0] = labelencoder_X.fit_transform(X[ : , 0])
20 | onehotencoder = OneHotEncoder(categorical_features = [0])
21 | X = onehotencoder.fit_transform(X).toarray()
22 | labelencoder_Y = LabelEncoder()
23 | Y =  labelencoder_Y.fit_transform(Y)
24 | from sklearn.model_selection import train_test_split
25 | X_train, X_test, Y_train, Y_test = train_test_split( X , Y , test_size = 0.2, random_state = 0)
26 | from sklearn.preprocessing import StandardScaler
27 | sc_X = StandardScaler()
28 | X_train = sc_X.fit_transform(X_train)
29 | X_test = sc_X.transform(X_test)
30 | 


--------------------------------------------------------------------------------
/homework-04/周小梅.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 2,
  6 |    "metadata": {},
  7 |    "outputs": [],
  8 |    "source": [
  9 |     "\n",
 10 |     "#1: 导入类库\n",
 11 |     "\n",
 12 |     "import numpy as np\n",
 13 |     "import pandas as pd"
 14 |    ]
 15 |   },
 16 |   {
 17 |    "cell_type": "code",
 18 |    "execution_count": 5,
 19 |    "metadata": {},
 20 |    "outputs": [
 21 |     {
 22 |      "name": "stdout",
 23 |      "output_type": "stream",
 24 |      "text": [
 25 |       "Step 2: Importing dataset\n",
 26 |       "X\n",
 27 |       "[['France' 44.0 72000.0]\n",
 28 |       " ['Spain' 27.0 48000.0]\n",
 29 |       " ['Germany' 30.0 54000.0]\n",
 30 |       " ['Spain' 38.0 61000.0]\n",
 31 |       " ['Germany' 40.0 nan]\n",
 32 |       " ['France' 35.0 58000.0]\n",
 33 |       " ['Spain' nan 52000.0]\n",
 34 |       " ['France' 48.0 79000.0]\n",
 35 |       " ['Germany' 50.0 83000.0]\n",
 36 |       " ['France' 37.0 67000.0]]\n",
 37 |       "Y\n",
 38 |       "['No' 'Yes' 'No' 'No' 'Yes' 'Yes' 'No' 'Yes' 'No' 'Yes']\n"
 39 |      ]
 40 |     }
 41 |    ],
 42 |    "source": [
 43 |     "#2：导入数据集\n",
 44 |     "dataset = pd.read_csv('D:\\Data.csv')\n",
 45 |     "# 不包括最后一列的所有列\n",
 46 |     "X = dataset.iloc[ : , :-1].values\n",
 47 |     "#取最后一列\n",
 48 |     "Y = dataset.iloc[ : , 3].values\n",
 49 |     "print(\"Step 2: Importing dataset\")\n",
 50 |     "print(\"X\")\n",
 51 |     "print(X)\n",
 52 |     "print(\"Y\")\n",
 53 |     "print(Y)"
 54 |    ]
 55 |   },
 56 |   {
 57 |    "cell_type": "code",
 58 |    "execution_count": 6,
 59 |    "metadata": {},
 60 |    "outputs": [
 61 |     {
 62 |      "name": "stdout",
 63 |      "output_type": "stream",
 64 |      "text": [
 65 |       "---------------------\n",
 66 |       "Step 3: Handling the missing data\n",
 67 |       "step2\n",
 68 |       "X\n",
 69 |       "[['France' 44.0 72000.0]\n",
 70 |       " ['Spain' 27.0 48000.0]\n",
 71 |       " ['Germany' 30.0 54000.0]\n",
 72 |       " ['Spain' 38.0 61000.0]\n",
 73 |       " ['Germany' 40.0 63777.77777777778]\n",
 74 |       " ['France' 35.0 58000.0]\n",
 75 |       " ['Spain' 38.77777777777778 52000.0]\n",
 76 |       " ['France' 48.0 79000.0]\n",
 77 |       " ['Germany' 50.0 83000.0]\n",
 78 |       " ['France' 37.0 67000.0]]\n"
 79 |      ]
 80 |     },
 81 |     {
 82 |      "name": "stderr",
 83 |      "output_type": "stream",
 84 |      "text": [
 85 |       "C:\\Users\\lenovo\\Anaconda3\\lib\\site-packages\\sklearn\\utils\\deprecation.py:58: DeprecationWarning: Class Imputer is deprecated; Imputer was deprecated in version 0.20 and will be removed in 0.22. Import impute.SimpleImputer from sklearn instead.\n",
 86 |       "  warnings.warn(msg, category=DeprecationWarning)\n"
 87 |      ]
 88 |     }
 89 |    ],
 90 |    "source": [
 91 |     "#3: 处理缺失的数据\n",
 92 |     "from sklearn.preprocessing import Imputer\n",
 93 |     "# axis=0表示按列进行\n",
 94 |     "imputer = Imputer(missing_values = \"NaN\", strategy = \"mean\", axis = 0)\n",
 95 |     "imputer = imputer.fit(X[ : , 1:3])\n",
 96 |     "X[ : , 1:3] = imputer.transform(X[ : , 1:3])\n",
 97 |     "print(\"---------------------\")\n",
 98 |     "print(\"Step 3: Handling the missing data\")\n",
 99 |     "print(\"step2\")\n",
100 |     "print(\"X\")\n",
101 |     "print(X)"
102 |    ]
103 |   },
104 |   {
105 |    "cell_type": "code",
106 |    "execution_count": 7,
107 |    "metadata": {},
108 |    "outputs": [
109 |     {
110 |      "name": "stdout",
111 |      "output_type": "stream",
112 |      "text": [
113 |       "---------------------\n",
114 |       "Step 4: Encoding categorical data\n",
115 |       "X\n",
116 |       "[[1.00000000e+00 0.00000000e+00 0.00000000e+00 4.40000000e+01\n",
117 |       "  7.20000000e+04]\n",
118 |       " [0.00000000e+00 0.00000000e+00 1.00000000e+00 2.70000000e+01\n",
119 |       "  4.80000000e+04]\n",
120 |       " [0.00000000e+00 1.00000000e+00 0.00000000e+00 3.00000000e+01\n",
121 |       "  5.40000000e+04]\n",
122 |       " [0.00000000e+00 0.00000000e+00 1.00000000e+00 3.80000000e+01\n",
123 |       "  6.10000000e+04]\n",
124 |       " [0.00000000e+00 1.00000000e+00 0.00000000e+00 4.00000000e+01\n",
125 |       "  6.37777778e+04]\n",
126 |       " [1.00000000e+00 0.00000000e+00 0.00000000e+00 3.50000000e+01\n",
127 |       "  5.80000000e+04]\n",
128 |       " [0.00000000e+00 0.00000000e+00 1.00000000e+00 3.87777778e+01\n",
129 |       "  5.20000000e+04]\n",
130 |       " [1.00000000e+00 0.00000000e+00 0.00000000e+00 4.80000000e+01\n",
131 |       "  7.90000000e+04]\n",
132 |       " [0.00000000e+00 1.00000000e+00 0.00000000e+00 5.00000000e+01\n",
133 |       "  8.30000000e+04]\n",
134 |       " [1.00000000e+00 0.00000000e+00 0.00000000e+00 3.70000000e+01\n",
135 |       "  6.70000000e+04]]\n",
136 |       "Y\n",
137 |       "[0 1 0 0 1 1 0 1 0 1]\n"
138 |      ]
139 |     },
140 |     {
141 |      "name": "stderr",
142 |      "output_type": "stream",
143 |      "text": [
144 |       "C:\\Users\\lenovo\\Anaconda3\\lib\\site-packages\\sklearn\\preprocessing\\_encoders.py:368: FutureWarning: The handling of integer data will change in version 0.22. Currently, the categories are determined based on the range [0, max(values)], while in the future they will be determined based on the unique values.\n",
145 |       "If you want the future behaviour and silence this warning, you can specify \"categories='auto'\".\n",
146 |       "In case you used a LabelEncoder before this OneHotEncoder to convert the categories to integers, then you can now use the OneHotEncoder directly.\n",
147 |       "  warnings.warn(msg, FutureWarning)\n",
148 |       "C:\\Users\\lenovo\\Anaconda3\\lib\\site-packages\\sklearn\\preprocessing\\_encoders.py:390: DeprecationWarning: The 'categorical_features' keyword is deprecated in version 0.20 and will be removed in 0.22. You can use the ColumnTransformer instead.\n",
149 |       "  \"use the ColumnTransformer instead.\", DeprecationWarning)\n"
150 |      ]
151 |     }
152 |    ],
153 |    "source": [
154 |     "from sklearn.preprocessing import LabelEncoder,OneHotEncoder\n",
155 |     "labelencoder_X = LabelEncoder()\n",
156 |     "X[ : , 0] = labelencoder_X.fit_transform(X[ : , 0])\n",
157 |     "#Creating a dummy variable\n",
158 |     "onehotencoder = OneHotEncoder(categorical_features = [0])\n",
159 |     "X = onehotencoder.fit_transform(X).toarray()\n",
160 |     "labelencoder_Y = LabelEncoder()\n",
161 |     "Y =  labelencoder_Y.fit_transform(Y)\n",
162 |     "print(\"---------------------\")\n",
163 |     "print(\"Step 4: Encoding categorical data\")\n",
164 |     "print(\"X\")\n",
165 |     "print(X)\n",
166 |     "print(\"Y\")\n",
167 |     "print(Y)"
168 |    ]
169 |   },
170 |   {
171 |    "cell_type": "code",
172 |    "execution_count": 8,
173 |    "metadata": {},
174 |    "outputs": [
175 |     {
176 |      "name": "stdout",
177 |      "output_type": "stream",
178 |      "text": [
179 |       "---------------------\n",
180 |       "Step 5: Splitting the datasets into training sets and Test sets\n",
181 |       "X_train\n",
182 |       "[[0.00000000e+00 1.00000000e+00 0.00000000e+00 4.00000000e+01\n",
183 |       "  6.37777778e+04]\n",
184 |       " [1.00000000e+00 0.00000000e+00 0.00000000e+00 3.70000000e+01\n",
185 |       "  6.70000000e+04]\n",
186 |       " [0.00000000e+00 0.00000000e+00 1.00000000e+00 2.70000000e+01\n",
187 |       "  4.80000000e+04]\n",
188 |       " [0.00000000e+00 0.00000000e+00 1.00000000e+00 3.87777778e+01\n",
189 |       "  5.20000000e+04]\n",
190 |       " [1.00000000e+00 0.00000000e+00 0.00000000e+00 4.80000000e+01\n",
191 |       "  7.90000000e+04]\n",
192 |       " [0.00000000e+00 0.00000000e+00 1.00000000e+00 3.80000000e+01\n",
193 |       "  6.10000000e+04]\n",
194 |       " [1.00000000e+00 0.00000000e+00 0.00000000e+00 4.40000000e+01\n",
195 |       "  7.20000000e+04]\n",
196 |       " [1.00000000e+00 0.00000000e+00 0.00000000e+00 3.50000000e+01\n",
197 |       "  5.80000000e+04]]\n",
198 |       "X_test\n",
199 |       "[[0.0e+00 1.0e+00 0.0e+00 3.0e+01 5.4e+04]\n",
200 |       " [0.0e+00 1.0e+00 0.0e+00 5.0e+01 8.3e+04]]\n",
201 |       "Y_train\n",
202 |       "[1 1 1 0 1 0 0 1]\n",
203 |       "Y_test\n",
204 |       "[0 0]\n"
205 |      ]
206 |     }
207 |    ],
208 |    "source": [
209 |     "#Step 5: 切分数据集成训练数据和测试数据\n",
210 |     "from sklearn.model_selection import train_test_split\n",
211 |     "X_train, X_test, Y_train, Y_test = train_test_split( X , Y , test_size = 0.2, random_state = 0)\n",
212 |     "print(\"---------------------\")\n",
213 |     "print(\"Step 5: Splitting the datasets into training sets and Test sets\")\n",
214 |     "print(\"X_train\")\n",
215 |     "print(X_train)\n",
216 |     "print(\"X_test\")\n",
217 |     "print(X_test)\n",
218 |     "print(\"Y_train\")\n",
219 |     "print(Y_train)\n",
220 |     "print(\"Y_test\")\n",
221 |     "print(Y_test)"
222 |    ]
223 |   },
224 |   {
225 |    "cell_type": "code",
226 |    "execution_count": 9,
227 |    "metadata": {},
228 |    "outputs": [
229 |     {
230 |      "name": "stdout",
231 |      "output_type": "stream",
232 |      "text": [
233 |       "---------------------\n",
234 |       "Step 6: Feature Scaling\n",
235 |       "X_train\n",
236 |       "[[-1.          2.64575131 -0.77459667  0.26306757  0.12381479]\n",
237 |       " [ 1.         -0.37796447 -0.77459667 -0.25350148  0.46175632]\n",
238 |       " [-1.         -0.37796447  1.29099445 -1.97539832 -1.53093341]\n",
239 |       " [-1.         -0.37796447  1.29099445  0.05261351 -1.11141978]\n",
240 |       " [ 1.         -0.37796447 -0.77459667  1.64058505  1.7202972 ]\n",
241 |       " [-1.         -0.37796447  1.29099445 -0.0813118  -0.16751412]\n",
242 |       " [ 1.         -0.37796447 -0.77459667  0.95182631  0.98614835]\n",
243 |       " [ 1.         -0.37796447 -0.77459667 -0.59788085 -0.48214934]]\n",
244 |       "X_test\n",
245 |       "[[-1.          2.64575131 -0.77459667 -1.45882927 -0.90166297]\n",
246 |       " [-1.          2.64575131 -0.77459667  1.98496442  2.13981082]]\n"
247 |      ]
248 |     }
249 |    ],
250 |    "source": [
251 |     "#Step 6: 特征缩放\n",
252 |     "from sklearn.preprocessing import StandardScaler\n",
253 |     "sc_X = StandardScaler()\n",
254 |     "X_train = sc_X.fit_transform(X_train)\n",
255 |     "X_test = sc_X.transform(X_test)\n",
256 |     "print(\"---------------------\")\n",
257 |     "print(\"Step 6: Feature Scaling\")\n",
258 |     "print(\"X_train\")\n",
259 |     "print(X_train)\n",
260 |     "print(\"X_test\")\n",
261 |     "print(X_test)"
262 |    ]
263 |   },
264 |   {
265 |    "cell_type": "code",
266 |    "execution_count": null,
267 |    "metadata": {},
268 |    "outputs": [],
269 |    "source": []
270 |   }
271 |  ],
272 |  "metadata": {
273 |   "kernelspec": {
274 |    "display_name": "Python 3",
275 |    "language": "python",
276 |    "name": "python3"
277 |   },
278 |   "language_info": {
279 |    "codemirror_mode": {
280 |     "name": "ipython",
281 |     "version": 3
282 |    },
283 |    "file_extension": ".py",
284 |    "mimetype": "text/x-python",
285 |    "name": "python",
286 |    "nbconvert_exporter": "python",
287 |    "pygments_lexer": "ipython3",
288 |    "version": "3.7.1"
289 |   },
290 |   "widgets": {
291 |    "application/vnd.jupyter.widget-state+json": {
292 |     "state": {},
293 |     "version_major": 2,
294 |     "version_minor": 0
295 |    }
296 |   }
297 |  },
298 |  "nbformat": 4,
299 |  "nbformat_minor": 2
300 | }
301 | 


--------------------------------------------------------------------------------
/homework-04/姚宇.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 32,
  6 |    "metadata": {},
  7 |    "outputs": [],
  8 |    "source": [
  9 |     "#1: 导入类库\n",
 10 |     "\n",
 11 |     "import numpy as np\n",
 12 |     "import pandas as pd"
 13 |    ]
 14 |   },
 15 |   {
 16 |    "cell_type": "code",
 17 |    "execution_count": 33,
 18 |    "metadata": {},
 19 |    "outputs": [
 20 |     {
 21 |      "name": "stdout",
 22 |      "output_type": "stream",
 23 |      "text": [
 24 |       "Step 2: Importing dataset\n",
 25 |       "X\n",
 26 |       "[['France' 44.0 72000.0]\n",
 27 |       " ['Spain' 27.0 48000.0]\n",
 28 |       " ['Germany' 30.0 54000.0]\n",
 29 |       " ['Spain' 38.0 61000.0]\n",
 30 |       " ['Germany' 40.0 nan]\n",
 31 |       " ['France' 35.0 58000.0]\n",
 32 |       " ['Spain' nan 52000.0]\n",
 33 |       " ['France' 48.0 79000.0]\n",
 34 |       " ['Germany' 50.0 83000.0]\n",
 35 |       " ['France' 37.0 67000.0]]\n",
 36 |       "Y\n",
 37 |       "['No' 'Yes' 'No' 'No' 'Yes' 'Yes' 'No' 'Yes' 'No' 'Yes']\n"
 38 |      ]
 39 |     }
 40 |    ],
 41 |    "source": [
 42 |     "dataset = pd.read_csv('Data.csv')\n",
 43 |     "# 不包括最后一列的所有列\n",
 44 |     "X = dataset.iloc[ : , :-1].values\n",
 45 |     "#取最后一列\n",
 46 |     "Y = dataset.iloc[ : , 3].values\n",
 47 |     "print(\"Step 2: Importing dataset\")\n",
 48 |     "print(\"X\")\n",
 49 |     "print(X)\n",
 50 |     "print(\"Y\")\n",
 51 |     "print(Y)"
 52 |    ]
 53 |   },
 54 |   {
 55 |    "cell_type": "code",
 56 |    "execution_count": 34,
 57 |    "metadata": {},
 58 |    "outputs": [
 59 |     {
 60 |      "name": "stdout",
 61 |      "output_type": "stream",
 62 |      "text": [
 63 |       "---------------------\n",
 64 |       "Step 3: Handling the missing data\n",
 65 |       "step2\n",
 66 |       "X\n",
 67 |       "[['France' 44.0 72000.0]\n",
 68 |       " ['Spain' 27.0 48000.0]\n",
 69 |       " ['Germany' 30.0 54000.0]\n",
 70 |       " ['Spain' 38.0 61000.0]\n",
 71 |       " ['Germany' 40.0 63777.77777777778]\n",
 72 |       " ['France' 35.0 58000.0]\n",
 73 |       " ['Spain' 38.77777777777778 52000.0]\n",
 74 |       " ['France' 48.0 79000.0]\n",
 75 |       " ['Germany' 50.0 83000.0]\n",
 76 |       " ['France' 37.0 67000.0]]\n"
 77 |      ]
 78 |     },
 79 |     {
 80 |      "name": "stderr",
 81 |      "output_type": "stream",
 82 |      "text": [
 83 |       "D:\\Anaconda3\\envs\\yaoyu\\lib\\site-packages\\sklearn\\utils\\deprecation.py:58: DeprecationWarning: Class Imputer is deprecated; Imputer was deprecated in version 0.20 and will be removed in 0.22. Import impute.SimpleImputer from sklearn instead.\n",
 84 |       "  warnings.warn(msg, category=DeprecationWarning)\n"
 85 |      ]
 86 |     }
 87 |    ],
 88 |    "source": [
 89 |     "#3: 处理缺失的数据\n",
 90 |     "from sklearn.preprocessing import Imputer\n",
 91 |     "# axis=0表示按列进行\n",
 92 |     "imputer = Imputer(missing_values = \"NaN\", strategy = \"mean\", axis = 0)\n",
 93 |     "imputer = imputer.fit(X[ : , 1:3])\n",
 94 |     "X[ : , 1:3] = imputer.transform(X[ : , 1:3])\n",
 95 |     "print(\"---------------------\")\n",
 96 |     "print(\"Step 3: Handling the missing data\")\n",
 97 |     "print(\"step2\")\n",
 98 |     "print(\"X\")\n",
 99 |     "print(X)"
100 |    ]
101 |   },
102 |   {
103 |    "cell_type": "code",
104 |    "execution_count": 35,
105 |    "metadata": {},
106 |    "outputs": [
107 |     {
108 |      "name": "stdout",
109 |      "output_type": "stream",
110 |      "text": [
111 |       "---------------------\n",
112 |       "Step 4: Encoding categorical data\n",
113 |       "X\n",
114 |       "[[1.00000000e+00 0.00000000e+00 0.00000000e+00 4.40000000e+01\n",
115 |       "  7.20000000e+04]\n",
116 |       " [0.00000000e+00 0.00000000e+00 1.00000000e+00 2.70000000e+01\n",
117 |       "  4.80000000e+04]\n",
118 |       " [0.00000000e+00 1.00000000e+00 0.00000000e+00 3.00000000e+01\n",
119 |       "  5.40000000e+04]\n",
120 |       " [0.00000000e+00 0.00000000e+00 1.00000000e+00 3.80000000e+01\n",
121 |       "  6.10000000e+04]\n",
122 |       " [0.00000000e+00 1.00000000e+00 0.00000000e+00 4.00000000e+01\n",
123 |       "  6.37777778e+04]\n",
124 |       " [1.00000000e+00 0.00000000e+00 0.00000000e+00 3.50000000e+01\n",
125 |       "  5.80000000e+04]\n",
126 |       " [0.00000000e+00 0.00000000e+00 1.00000000e+00 3.87777778e+01\n",
127 |       "  5.20000000e+04]\n",
128 |       " [1.00000000e+00 0.00000000e+00 0.00000000e+00 4.80000000e+01\n",
129 |       "  7.90000000e+04]\n",
130 |       " [0.00000000e+00 1.00000000e+00 0.00000000e+00 5.00000000e+01\n",
131 |       "  8.30000000e+04]\n",
132 |       " [1.00000000e+00 0.00000000e+00 0.00000000e+00 3.70000000e+01\n",
133 |       "  6.70000000e+04]]\n",
134 |       "Y\n",
135 |       "[0 1 0 0 1 1 0 1 0 1]\n"
136 |      ]
137 |     },
138 |     {
139 |      "name": "stderr",
140 |      "output_type": "stream",
141 |      "text": [
142 |       "D:\\Anaconda3\\envs\\yaoyu\\lib\\site-packages\\sklearn\\preprocessing\\_encoders.py:371: FutureWarning: The handling of integer data will change in version 0.22. Currently, the categories are determined based on the range [0, max(values)], while in the future they will be determined based on the unique values.\n",
143 |       "If you want the future behaviour and silence this warning, you can specify \"categories='auto'\".\n",
144 |       "In case you used a LabelEncoder before this OneHotEncoder to convert the categories to integers, then you can now use the OneHotEncoder directly.\n",
145 |       "  warnings.warn(msg, FutureWarning)\n",
146 |       "D:\\Anaconda3\\envs\\yaoyu\\lib\\site-packages\\sklearn\\preprocessing\\_encoders.py:392: DeprecationWarning: The 'categorical_features' keyword is deprecated in version 0.20 and will be removed in 0.22. You can use the ColumnTransformer instead.\n",
147 |       "  \"use the ColumnTransformer instead.\", DeprecationWarning)\n"
148 |      ]
149 |     }
150 |    ],
151 |    "source": [
152 |     "from sklearn.preprocessing import LabelEncoder,OneHotEncoder\n",
153 |     "labelencoder_X = LabelEncoder()\n",
154 |     "X[ : , 0] = labelencoder_X.fit_transform(X[ : , 0])\n",
155 |     "#Creating a dummy variable\n",
156 |     "onehotencoder = OneHotEncoder(categorical_features = [0])\n",
157 |     "X = onehotencoder.fit_transform(X).toarray()\n",
158 |     "labelencoder_Y = LabelEncoder()\n",
159 |     "Y =  labelencoder_Y.fit_transform(Y)\n",
160 |     "print(\"---------------------\")\n",
161 |     "print(\"Step 4: Encoding categorical data\")\n",
162 |     "print(\"X\")\n",
163 |     "print(X)\n",
164 |     "print(\"Y\")\n",
165 |     "print(Y)"
166 |    ]
167 |   },
168 |   {
169 |    "cell_type": "code",
170 |    "execution_count": 36,
171 |    "metadata": {},
172 |    "outputs": [
173 |     {
174 |      "name": "stdout",
175 |      "output_type": "stream",
176 |      "text": [
177 |       "---------------------\n",
178 |       "Step 5: Splitting the datasets into training sets and Test sets\n",
179 |       "X_train\n",
180 |       "[[0.00000000e+00 1.00000000e+00 0.00000000e+00 4.00000000e+01\n",
181 |       "  6.37777778e+04]\n",
182 |       " [1.00000000e+00 0.00000000e+00 0.00000000e+00 3.70000000e+01\n",
183 |       "  6.70000000e+04]\n",
184 |       " [0.00000000e+00 0.00000000e+00 1.00000000e+00 2.70000000e+01\n",
185 |       "  4.80000000e+04]\n",
186 |       " [0.00000000e+00 0.00000000e+00 1.00000000e+00 3.87777778e+01\n",
187 |       "  5.20000000e+04]\n",
188 |       " [1.00000000e+00 0.00000000e+00 0.00000000e+00 4.80000000e+01\n",
189 |       "  7.90000000e+04]\n",
190 |       " [0.00000000e+00 0.00000000e+00 1.00000000e+00 3.80000000e+01\n",
191 |       "  6.10000000e+04]\n",
192 |       " [1.00000000e+00 0.00000000e+00 0.00000000e+00 4.40000000e+01\n",
193 |       "  7.20000000e+04]\n",
194 |       " [1.00000000e+00 0.00000000e+00 0.00000000e+00 3.50000000e+01\n",
195 |       "  5.80000000e+04]]\n",
196 |       "X_test\n",
197 |       "[[0.0e+00 1.0e+00 0.0e+00 3.0e+01 5.4e+04]\n",
198 |       " [0.0e+00 1.0e+00 0.0e+00 5.0e+01 8.3e+04]]\n",
199 |       "Y_train\n",
200 |       "[1 1 1 0 1 0 0 1]\n",
201 |       "Y_test\n",
202 |       "[0 0]\n"
203 |      ]
204 |     }
205 |    ],
206 |    "source": [
207 |     "#Step 5: 切分数据集成训练数据和测试数据\n",
208 |     "from sklearn.model_selection import train_test_split\n",
209 |     "X_train, X_test, Y_train, Y_test = train_test_split( X , Y , test_size = 0.2, random_state = 0)\n",
210 |     "print(\"---------------------\")\n",
211 |     "print(\"Step 5: Splitting the datasets into training sets and Test sets\")\n",
212 |     "print(\"X_train\")\n",
213 |     "print(X_train)\n",
214 |     "print(\"X_test\")\n",
215 |     "print(X_test)\n",
216 |     "print(\"Y_train\")\n",
217 |     "print(Y_train)\n",
218 |     "print(\"Y_test\")\n",
219 |     "print(Y_test)"
220 |    ]
221 |   },
222 |   {
223 |    "cell_type": "code",
224 |    "execution_count": 37,
225 |    "metadata": {},
226 |    "outputs": [
227 |     {
228 |      "name": "stdout",
229 |      "output_type": "stream",
230 |      "text": [
231 |       "---------------------\n",
232 |       "Step 6: Feature Scaling\n",
233 |       "X_train\n",
234 |       "[[-1.          2.64575131 -0.77459667  0.26306757  0.12381479]\n",
235 |       " [ 1.         -0.37796447 -0.77459667 -0.25350148  0.46175632]\n",
236 |       " [-1.         -0.37796447  1.29099445 -1.97539832 -1.53093341]\n",
237 |       " [-1.         -0.37796447  1.29099445  0.05261351 -1.11141978]\n",
238 |       " [ 1.         -0.37796447 -0.77459667  1.64058505  1.7202972 ]\n",
239 |       " [-1.         -0.37796447  1.29099445 -0.0813118  -0.16751412]\n",
240 |       " [ 1.         -0.37796447 -0.77459667  0.95182631  0.98614835]\n",
241 |       " [ 1.         -0.37796447 -0.77459667 -0.59788085 -0.48214934]]\n",
242 |       "X_test\n",
243 |       "[[-1.          2.64575131 -0.77459667 -1.45882927 -0.90166297]\n",
244 |       " [-1.          2.64575131 -0.77459667  1.98496442  2.13981082]]\n"
245 |      ]
246 |     }
247 |    ],
248 |    "source": [
249 |     "#Step 6: 特征缩放\n",
250 |     "from sklearn.preprocessing import StandardScaler\n",
251 |     "sc_X = StandardScaler()\n",
252 |     "X_train = sc_X.fit_transform(X_train)\n",
253 |     "X_test = sc_X.transform(X_test)\n",
254 |     "print(\"---------------------\")\n",
255 |     "print(\"Step 6: Feature Scaling\")\n",
256 |     "print(\"X_train\")\n",
257 |     "print(X_train)\n",
258 |     "print(\"X_test\")\n",
259 |     "print(X_test)"
260 |    ]
261 |   },
262 |   {
263 |    "cell_type": "code",
264 |    "execution_count": null,
265 |    "metadata": {},
266 |    "outputs": [],
267 |    "source": []
268 |   },
269 |   {
270 |    "cell_type": "code",
271 |    "execution_count": null,
272 |    "metadata": {},
273 |    "outputs": [],
274 |    "source": []
275 |   }
276 |  ],
277 |  "metadata": {
278 |   "kernelspec": {
279 |    "display_name": "Python 3",
280 |    "language": "python",
281 |    "name": "python3"
282 |   },
283 |   "language_info": {
284 |    "codemirror_mode": {
285 |     "name": "ipython",
286 |     "version": 3
287 |    },
288 |    "file_extension": ".py",
289 |    "mimetype": "text/x-python",
290 |    "name": "python",
291 |    "nbconvert_exporter": "python",
292 |    "pygments_lexer": "ipython3",
293 |    "version": "3.6.8"
294 |   }
295 |  },
296 |  "nbformat": 4,
297 |  "nbformat_minor": 2
298 | }
299 | 


--------------------------------------------------------------------------------
/homework-04/张博.ipynb:
--------------------------------------------------------------------------------
 1 | #Day 1: Data Prepocessing
 2 | 
 3 | #Step 1: Importing the libraries
 4 | import numpy as np
 5 | import pandas as pd
 6 | 
 7 | #Step 2: Importing dataset
 8 | dataset = pd.read_csv('D:\datasets\Data.csv')
 9 | X = dataset.iloc[ : , :-1].values
10 | Y = dataset.iloc[ : , 3].values
11 | print("Step 2: Importing dataset")
12 | print("X")
13 | print(X)
14 | print("Y")
15 | print(Y)
16 | 
17 | #Step 3: Handling the missing data
18 | from sklearn.preprocessing import Imputer
19 | imputer = Imputer(missing_values = "NaN", strategy = "mean", axis = 0)
20 | imputer = imputer.fit(X[ : , 1:3])
21 | X[ : , 1:3] = imputer.transform(X[ : , 1:3])
22 | print("---------------------")
23 | print("Step 3: Handling the missing data")
24 | print("step2")
25 | print("X")
26 | print(X)
27 | 
28 | #Step 4: Encoding categorical data
29 | from sklearn.preprocessing import LabelEncoder, OneHotEncoder
30 | labelencoder_X = LabelEncoder()
31 | X[ : , 0] = labelencoder_X.fit_transform(X[ : , 0])
32 | #Creating a dummy variable
33 | onehotencoder = OneHotEncoder(categorical_features = [0])
34 | X = onehotencoder.fit_transform(X).toarray()
35 | labelencoder_Y = LabelEncoder()
36 | Y =  labelencoder_Y.fit_transform(Y)
37 | print("---------------------")
38 | print("Step 4: Encoding categorical data")
39 | print("X")
40 | print(X)
41 | print("Y")
42 | print(Y)
43 | 
44 | #Step 5: Splitting the datasets into training sets and Test sets
45 | from sklearn.model_selection import train_test_split
46 | X_train, X_test, Y_train, Y_test = train_test_split( X , Y , test_size = 0.2, random_state = 0)
47 | print("---------------------")
48 | print("Step 5: Splitting the datasets into training sets and Test sets")
49 | print("X_train")
50 | print(X_train)
51 | print("X_test")
52 | print(X_test)
53 | print("Y_train")
54 | print(Y_train)
55 | print("Y_test")
56 | print(Y_test)
57 | 
58 | #Step 6: Feature Scaling
59 | from sklearn.preprocessing import StandardScaler
60 | sc_X = StandardScaler()
61 | X_train = sc_X.fit_transform(X_train)
62 | X_test = sc_X.transform(X_test)
63 | print("---------------------")
64 | print("Step 6: Feature Scaling")
65 | print("X_train")
66 | print(X_train)
67 | print("X_test")
68 | print(X_test)
69 | 


--------------------------------------------------------------------------------
/homework-04/张晏铭.ipynb:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """
 3 | Created on Sun Mar 24 20:53:21 2019
 4 | 
 5 | @author: 38653
 6 | """
 7 | 
 8 | import numpy as np
 9 | import pandas as pd
10 | dataset = pd.read_csv('Data.csv')//读取csv文件
11 | X = dataset.iloc[ : , :-1].values//.iloc[行，列]
12 | Y = dataset.iloc[ : , 3].values  // : 全部行 or 列；[a]第a行 or 列
13 |                                  // [a,b,c]第 a,b,c 行 or 列
14 | from sklearn.preprocessing import Imputer
15 | imputer = Imputer(missing_values = "NaN", strategy = "mean", axis = 0)
16 | imputer = imputer.fit(X[ : , 1:3])
17 | X[ : , 1:3] = imputer.transform(X[ : , 1:3])
18 | from sklearn.preprocessing import LabelEncoder, OneHotEncoder
19 | labelencoder_X = LabelEncoder()
20 | X[ : , 0] = labelencoder_X.fit_transform(X[ : , 0])
21 | onehotencoder = OneHotEncoder(categorical_features = [0])
22 | X = onehotencoder.fit_transform(X).toarray()
23 | labelencoder_Y = LabelEncoder()
24 | Y =  labelencoder_Y.fit_transform(Y)
25 | from sklearn.model_selection import train_test_split
26 | X_train, X_test, Y_train, Y_test = train_test_split( X , Y , test_size = 0.2, random_state = 0)
27 | from sklearn.preprocessing import StandardScaler
28 | sc_X = StandardScaler()
29 | X_train = sc_X.fit_transform(X_train)
30 | X_test = sc_X.transform(X_test)


--------------------------------------------------------------------------------
/homework-04/张泷玲.ipynb:
--------------------------------------------------------------------------------
 1 | 
 2 | import numpy as np
 3 | import pandas as pd
 4 | 
 5 | dataset = pd.read_csv('data.csv')
 6 | X = dataset.iloc[ : , :-1].values
 7 | Y = dataset.iloc[ : , : 3].values
 8 | 
 9 | from sklearn.perprocessing import Imputer
10 | imputer = Imputer(missing_values = "NAN",strategy = "mean", axis = 0)
11 | imputer = Imputer.fit(X[ : , 1 :3])
12 | X[ : , 1:3] = imputer.transform(X[ : , 1:3])
13 | 
14 | from sklearn.perprocessing import LabelEncoder , OneHotEncoder
15 | labelencoder_X = LabelEncoder()
16 | X[ : , 0]=labelencoder_X.fit_transform(X[ : , 0])
17 | 
18 | onehotencoder = OneHotEncoder(categorical_features= [0] )
19 | X = onehotencoder.fix_transform(X).toarray()
20 | labelencoder_Y = LabelEncoder()
21 | Y = labelencoder_Y.fit_transform(Y)
22 | 
23 | from sklearn.model_seletion import train_test_split
24 | X_train,X_test,Y_train,Y_test = train_test_split(X , Y ,test_size=0.2,random_state=0)
25 | 
26 | from sklearn.perprocessing import StandardScaler
27 | sc_X = StandardScaler()
28 | X_train = sc_X.fit_transform(X_train)
29 | X_test = sc_X.transform(X_test)
30 | 
31 | 


--------------------------------------------------------------------------------
/homework-04/戴振.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 24,
  6 |    "metadata": {
  7 |     "collapsed": true
  8 |    },
  9 |    "outputs": [],
 10 |    "source": [
 11 |     "import numpy as np\n",
 12 |     "import pandas as pd"
 13 |    ]
 14 |   },
 15 |   {
 16 |    "cell_type": "code",
 17 |    "execution_count": 25,
 18 |    "metadata": {},
 19 |    "outputs": [
 20 |     {
 21 |      "name": "stdout",
 22 |      "output_type": "stream",
 23 |      "text": [
 24 |       "Step 2:Importing dataset\n",
 25 |       "X\n",
 26 |       "[['France' 44.0 72000.0]\n",
 27 |       " ['Spain' 27.0 48000.0]\n",
 28 |       " ['Germany' 30.0 54000.0]\n",
 29 |       " ['Spain' 38.0 61000.0]\n",
 30 |       " ['Germany' 40.0 nan]\n",
 31 |       " ['France' 35.0 58000.0]\n",
 32 |       " ['Spain' nan 52000.0]\n",
 33 |       " ['France' 48.0 79000.0]\n",
 34 |       " ['Germany' 50.0 83000.0]\n",
 35 |       " ['France' 37.0 67000.0]]\n",
 36 |       "Y\n",
 37 |       "['No' 'Yes' 'No' 'No' 'Yes' 'Yes' 'No' 'Yes' 'No' 'Yes']\n"
 38 |      ]
 39 |     }
 40 |    ],
 41 |    "source": [
 42 |     "dataset=pd.read_csv('Data.csv')\n",
 43 |     "#iloc方法是对数据行列进行操作，中括号逗号左边是行取全部，使用冒号取左闭右开区间的范围\n",
 44 |     "#逗号右边的冒号右边-1是列取除最后一列的所有列，此处-1可用3替换，即数据只有4列，只取到前三列\n",
 45 |     "X=dataset.iloc[ : , :-1].values \n",
 46 |     "#取第四列数据\n",
 47 |     "Y=dataset.iloc[ : ,3].values\n",
 48 |     "print('Step 2:Importing dataset')\n",
 49 |     "print('X')\n",
 50 |     "print(X)\n",
 51 |     "print('Y')\n",
 52 |     "print(Y)"
 53 |    ]
 54 |   },
 55 |   {
 56 |    "cell_type": "code",
 57 |    "execution_count": 26,
 58 |    "metadata": {},
 59 |    "outputs": [
 60 |     {
 61 |      "name": "stdout",
 62 |      "output_type": "stream",
 63 |      "text": [
 64 |       "****************\n",
 65 |       "Step 3:Handling the missing data\n",
 66 |       "step2\n",
 67 |       "X\n",
 68 |       "[['France' 44.0 72000.0]\n",
 69 |       " ['Spain' 27.0 48000.0]\n",
 70 |       " ['Germany' 30.0 54000.0]\n",
 71 |       " ['Spain' 38.0 61000.0]\n",
 72 |       " ['Germany' 40.0 63777.77777777778]\n",
 73 |       " ['France' 35.0 58000.0]\n",
 74 |       " ['Spain' 38.77777777777778 52000.0]\n",
 75 |       " ['France' 48.0 79000.0]\n",
 76 |       " ['Germany' 50.0 83000.0]\n",
 77 |       " ['France' 37.0 67000.0]]\n"
 78 |      ]
 79 |     }
 80 |    ],
 81 |    "source": [
 82 |     "from sklearn.preprocessing import Imputer\n",
 83 |     "imputer = Imputer(missing_values = \"NaN\", strategy = \"mean\",axis=0)\n",
 84 |     "imputer = imputer.fit(X[ : ,1:3])\n",
 85 |     "X[ : , 1:3] = imputer.transform(X[ : , 1:3])\n",
 86 |     "print(\"****************\")\n",
 87 |     "print(\"Step 3:Handling the missing data\")\n",
 88 |     "print(\"step2\")\n",
 89 |     "print(\"X\")\n",
 90 |     "print(X)"
 91 |    ]
 92 |   },
 93 |   {
 94 |    "cell_type": "code",
 95 |    "execution_count": 27,
 96 |    "metadata": {},
 97 |    "outputs": [
 98 |     {
 99 |      "name": "stdout",
100 |      "output_type": "stream",
101 |      "text": [
102 |       "**********************\n",
103 |       "Step 4: Encoding categorical data\n",
104 |       "X\n",
105 |       "[[  1.00000000e+00   0.00000000e+00   0.00000000e+00   4.40000000e+01\n",
106 |       "    7.20000000e+04]\n",
107 |       " [  0.00000000e+00   0.00000000e+00   1.00000000e+00   2.70000000e+01\n",
108 |       "    4.80000000e+04]\n",
109 |       " [  0.00000000e+00   1.00000000e+00   0.00000000e+00   3.00000000e+01\n",
110 |       "    5.40000000e+04]\n",
111 |       " [  0.00000000e+00   0.00000000e+00   1.00000000e+00   3.80000000e+01\n",
112 |       "    6.10000000e+04]\n",
113 |       " [  0.00000000e+00   1.00000000e+00   0.00000000e+00   4.00000000e+01\n",
114 |       "    6.37777778e+04]\n",
115 |       " [  1.00000000e+00   0.00000000e+00   0.00000000e+00   3.50000000e+01\n",
116 |       "    5.80000000e+04]\n",
117 |       " [  0.00000000e+00   0.00000000e+00   1.00000000e+00   3.87777778e+01\n",
118 |       "    5.20000000e+04]\n",
119 |       " [  1.00000000e+00   0.00000000e+00   0.00000000e+00   4.80000000e+01\n",
120 |       "    7.90000000e+04]\n",
121 |       " [  0.00000000e+00   1.00000000e+00   0.00000000e+00   5.00000000e+01\n",
122 |       "    8.30000000e+04]\n",
123 |       " [  1.00000000e+00   0.00000000e+00   0.00000000e+00   3.70000000e+01\n",
124 |       "    6.70000000e+04]]\n",
125 |       "Y\n",
126 |       "[0 1 0 0 1 1 0 1 0 1]\n"
127 |      ]
128 |     }
129 |    ],
130 |    "source": [
131 |     "from sklearn.preprocessing import LabelEncoder, OneHotEncoder\n",
132 |     "labelencoder_X=LabelEncoder()\n",
133 |     "X[ : , 0]=labelencoder_X.fit_transform(X[ : ,0])\n",
134 |     "onehotencoder = OneHotEncoder(categorical_features = [0])\n",
135 |     "X = onehotencoder.fit_transform(X).toarray()\n",
136 |     "labelencoder_Y = LabelEncoder()\n",
137 |     "Y = labelencoder_Y.fit_transform(Y)\n",
138 |     "print(\"**********************\")\n",
139 |     "print(\"Step 4: Encoding categorical data\")\n",
140 |     "print(\"X\")\n",
141 |     "print(X)\n",
142 |     "print(\"Y\")\n",
143 |     "print(Y)"
144 |    ]
145 |   },
146 |   {
147 |    "cell_type": "code",
148 |    "execution_count": 28,
149 |    "metadata": {},
150 |    "outputs": [
151 |     {
152 |      "name": "stdout",
153 |      "output_type": "stream",
154 |      "text": [
155 |       "---------------------\n",
156 |       "Step 5: Splitting the datasets into training sets and Test sets\n",
157 |       "X_train\n",
158 |       "[[  0.00000000e+00   1.00000000e+00   0.00000000e+00   4.00000000e+01\n",
159 |       "    6.37777778e+04]\n",
160 |       " [  1.00000000e+00   0.00000000e+00   0.00000000e+00   3.70000000e+01\n",
161 |       "    6.70000000e+04]\n",
162 |       " [  0.00000000e+00   0.00000000e+00   1.00000000e+00   2.70000000e+01\n",
163 |       "    4.80000000e+04]\n",
164 |       " [  0.00000000e+00   0.00000000e+00   1.00000000e+00   3.87777778e+01\n",
165 |       "    5.20000000e+04]\n",
166 |       " [  1.00000000e+00   0.00000000e+00   0.00000000e+00   4.80000000e+01\n",
167 |       "    7.90000000e+04]\n",
168 |       " [  0.00000000e+00   0.00000000e+00   1.00000000e+00   3.80000000e+01\n",
169 |       "    6.10000000e+04]\n",
170 |       " [  1.00000000e+00   0.00000000e+00   0.00000000e+00   4.40000000e+01\n",
171 |       "    7.20000000e+04]\n",
172 |       " [  1.00000000e+00   0.00000000e+00   0.00000000e+00   3.50000000e+01\n",
173 |       "    5.80000000e+04]]\n",
174 |       "X_test\n",
175 |       "[[  0.00000000e+00   1.00000000e+00   0.00000000e+00   3.00000000e+01\n",
176 |       "    5.40000000e+04]\n",
177 |       " [  0.00000000e+00   1.00000000e+00   0.00000000e+00   5.00000000e+01\n",
178 |       "    8.30000000e+04]]\n",
179 |       "Y_train\n",
180 |       "[1 1 1 0 1 0 0 1]\n",
181 |       "Y_test\n",
182 |       "[0 0]\n"
183 |      ]
184 |     }
185 |    ],
186 |    "source": [
187 |     "from sklearn.model_selection import train_test_split\n",
188 |     "X_train, X_test, Y_train, Y_test = train_test_split( X , Y , test_size = 0.2, random_state = 0)\n",
189 |     "print(\"---------------------\")\n",
190 |     "print(\"Step 5: Splitting the datasets into training sets and Test sets\")\n",
191 |     "print(\"X_train\")\n",
192 |     "print(X_train)\n",
193 |     "print(\"X_test\")\n",
194 |     "print(X_test)\n",
195 |     "print(\"Y_train\")\n",
196 |     "print(Y_train)\n",
197 |     "print(\"Y_test\")\n",
198 |     "print(Y_test)"
199 |    ]
200 |   },
201 |   {
202 |    "cell_type": "code",
203 |    "execution_count": 29,
204 |    "metadata": {},
205 |    "outputs": [
206 |     {
207 |      "name": "stdout",
208 |      "output_type": "stream",
209 |      "text": [
210 |       "---------------------\n",
211 |       "Step 6: Feature Scaling\n",
212 |       "X_train\n",
213 |       "[[-1.          2.64575131 -0.77459667  0.26306757  0.12381479]\n",
214 |       " [ 1.         -0.37796447 -0.77459667 -0.25350148  0.46175632]\n",
215 |       " [-1.         -0.37796447  1.29099445 -1.97539832 -1.53093341]\n",
216 |       " [-1.         -0.37796447  1.29099445  0.05261351 -1.11141978]\n",
217 |       " [ 1.         -0.37796447 -0.77459667  1.64058505  1.7202972 ]\n",
218 |       " [-1.         -0.37796447  1.29099445 -0.0813118  -0.16751412]\n",
219 |       " [ 1.         -0.37796447 -0.77459667  0.95182631  0.98614835]\n",
220 |       " [ 1.         -0.37796447 -0.77459667 -0.59788085 -0.48214934]]\n",
221 |       "X_test\n",
222 |       "[[-1.          2.64575131 -0.77459667 -1.45882927 -0.90166297]\n",
223 |       " [-1.          2.64575131 -0.77459667  1.98496442  2.13981082]]\n"
224 |      ]
225 |     }
226 |    ],
227 |    "source": [
228 |     "from sklearn.preprocessing import StandardScaler\n",
229 |     "sc_X = StandardScaler()\n",
230 |     "X_train = sc_X.fit_transform(X_train)\n",
231 |     "X_test = sc_X.transform(X_test)\n",
232 |     "print(\"---------------------\")\n",
233 |     "print(\"Step 6: Feature Scaling\")\n",
234 |     "print(\"X_train\")\n",
235 |     "print(X_train)\n",
236 |     "print(\"X_test\")\n",
237 |     "print(X_test)"
238 |    ]
239 |   },
240 |   {
241 |    "cell_type": "code",
242 |    "execution_count": null,
243 |    "metadata": {
244 |     "collapsed": true
245 |    },
246 |    "outputs": [],
247 |    "source": []
248 |   },
249 |   {
250 |    "cell_type": "code",
251 |    "execution_count": null,
252 |    "metadata": {
253 |     "collapsed": true
254 |    },
255 |    "outputs": [],
256 |    "source": []
257 |   },
258 |   {
259 |    "cell_type": "code",
260 |    "execution_count": null,
261 |    "metadata": {
262 |     "collapsed": true
263 |    },
264 |    "outputs": [],
265 |    "source": []
266 |   }
267 |  ],
268 |  "metadata": {
269 |   "kernelspec": {
270 |    "display_name": "Python 3",
271 |    "language": "python",
272 |    "name": "python3"
273 |   },
274 |   "language_info": {
275 |    "codemirror_mode": {
276 |     "name": "ipython",
277 |     "version": 3
278 |    },
279 |    "file_extension": ".py",
280 |    "mimetype": "text/x-python",
281 |    "name": "python",
282 |    "nbconvert_exporter": "python",
283 |    "pygments_lexer": "ipython3",
284 |    "version": "3.6.2"
285 |   }
286 |  },
287 |  "nbformat": 4,
288 |  "nbformat_minor": 2
289 | }
290 | 


--------------------------------------------------------------------------------
/homework-04/房增林.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 5,
  6 |    "metadata": {},
  7 |    "outputs": [
  8 |     {
  9 |      "ename": "FileNotFoundError",
 10 |      "evalue": "File b'100-Days-Of-ML-Code/datasets/Data.csv' does not exist",
 11 |      "output_type": "error",
 12 |      "traceback": [
 13 |       "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
 14 |       "\u001b[1;31mFileNotFoundError\u001b[0m                         Traceback (most recent call last)",
 15 |       "\u001b[1;32m<ipython-input-5-f81a2e59e6c7>\u001b[0m in \u001b[0;36m<module>\u001b[1;34m\u001b[0m\n\u001b[0;32m      2\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m      3\u001b[0m \u001b[1;31m#Step 2: Importing dataset\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m----> 4\u001b[1;33m \u001b[0mdataset\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mpd\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mread_csv\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;34m'100-Days-Of-ML-Code/datasets/Data.csv'\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m      5\u001b[0m \u001b[0mdataset\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mpd\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mread_csv\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;34m'100-Days-Of-ML-Code/datasets/Data.csv'\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m      6\u001b[0m \u001b[0mX\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mdataset\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0miloc\u001b[0m\u001b[1;33m[\u001b[0m \u001b[1;33m:\u001b[0m \u001b[1;33m,\u001b[0m \u001b[1;33m:\u001b[0m\u001b[1;33m-\u001b[0m\u001b[1;36m1\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mvalues\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
 16 |       "\u001b[1;32mD:\\Anaconda\\lib\\site-packages\\pandas\\io\\parsers.py\u001b[0m in \u001b[0;36mparser_f\u001b[1;34m(filepath_or_buffer, sep, delimiter, header, names, index_col, usecols, squeeze, prefix, mangle_dupe_cols, dtype, engine, converters, true_values, false_values, skipinitialspace, skiprows, nrows, na_values, keep_default_na, na_filter, verbose, skip_blank_lines, parse_dates, infer_datetime_format, keep_date_col, date_parser, dayfirst, iterator, chunksize, compression, thousands, decimal, lineterminator, quotechar, quoting, escapechar, comment, encoding, dialect, tupleize_cols, error_bad_lines, warn_bad_lines, skipfooter, doublequote, delim_whitespace, low_memory, memory_map, float_precision)\u001b[0m\n\u001b[0;32m    676\u001b[0m                     skip_blank_lines=skip_blank_lines)\n\u001b[0;32m    677\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 678\u001b[1;33m         \u001b[1;32mreturn\u001b[0m \u001b[0m_read\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mfilepath_or_buffer\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mkwds\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m    679\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m    680\u001b[0m     \u001b[0mparser_f\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m__name__\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mname\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
 17 |       "\u001b[1;32mD:\\Anaconda\\lib\\site-packages\\pandas\\io\\parsers.py\u001b[0m in \u001b[0;36m_read\u001b[1;34m(filepath_or_buffer, kwds)\u001b[0m\n\u001b[0;32m    438\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m    439\u001b[0m     \u001b[1;31m# Create the parser.\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 440\u001b[1;33m     \u001b[0mparser\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mTextFileReader\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mfilepath_or_buffer\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;33m**\u001b[0m\u001b[0mkwds\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m    441\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m    442\u001b[0m     \u001b[1;32mif\u001b[0m \u001b[0mchunksize\u001b[0m \u001b[1;32mor\u001b[0m \u001b[0miterator\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
 18 |       "\u001b[1;32mD:\\Anaconda\\lib\\site-packages\\pandas\\io\\parsers.py\u001b[0m in \u001b[0;36m__init__\u001b[1;34m(self, f, engine, **kwds)\u001b[0m\n\u001b[0;32m    785\u001b[0m             \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0moptions\u001b[0m\u001b[1;33m[\u001b[0m\u001b[1;34m'has_index_names'\u001b[0m\u001b[1;33m]\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mkwds\u001b[0m\u001b[1;33m[\u001b[0m\u001b[1;34m'has_index_names'\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m    786\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 787\u001b[1;33m         \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_make_engine\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mengine\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m    788\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m    789\u001b[0m     \u001b[1;32mdef\u001b[0m \u001b[0mclose\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
 19 |       "\u001b[1;32mD:\\Anaconda\\lib\\site-packages\\pandas\\io\\parsers.py\u001b[0m in \u001b[0;36m_make_engine\u001b[1;34m(self, engine)\u001b[0m\n\u001b[0;32m   1012\u001b[0m     \u001b[1;32mdef\u001b[0m \u001b[0m_make_engine\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mengine\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;34m'c'\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m   1013\u001b[0m         \u001b[1;32mif\u001b[0m \u001b[0mengine\u001b[0m \u001b[1;33m==\u001b[0m \u001b[1;34m'c'\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m-> 1014\u001b[1;33m             \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_engine\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mCParserWrapper\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mf\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;33m**\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0moptions\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m   1015\u001b[0m         \u001b[1;32melse\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m   1016\u001b[0m             \u001b[1;32mif\u001b[0m \u001b[0mengine\u001b[0m \u001b[1;33m==\u001b[0m \u001b[1;34m'python'\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
 20 |       "\u001b[1;32mD:\\Anaconda\\lib\\site-packages\\pandas\\io\\parsers.py\u001b[0m in \u001b[0;36m__init__\u001b[1;34m(self, src, **kwds)\u001b[0m\n\u001b[0;32m   1706\u001b[0m         \u001b[0mkwds\u001b[0m\u001b[1;33m[\u001b[0m\u001b[1;34m'usecols'\u001b[0m\u001b[1;33m]\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0musecols\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m   1707\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m-> 1708\u001b[1;33m         \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_reader\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mparsers\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mTextReader\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0msrc\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;33m**\u001b[0m\u001b[0mkwds\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m   1709\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m   1710\u001b[0m         \u001b[0mpassed_names\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mnames\u001b[0m \u001b[1;32mis\u001b[0m \u001b[1;32mNone\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
 21 |       "\u001b[1;32mpandas\\_libs\\parsers.pyx\u001b[0m in \u001b[0;36mpandas._libs.parsers.TextReader.__cinit__\u001b[1;34m()\u001b[0m\n",
 22 |       "\u001b[1;32mpandas\\_libs\\parsers.pyx\u001b[0m in \u001b[0;36mpandas._libs.parsers.TextReader._setup_parser_source\u001b[1;34m()\u001b[0m\n",
 23 |       "\u001b[1;31mFileNotFoundError\u001b[0m: File b'100-Days-Of-ML-Code/datasets/Data.csv' does not exist"
 24 |      ]
 25 |     }
 26 |    ],
 27 |    "source": [
 28 |     "import pandas as pd\n",
 29 |     "dataset = pd.read_csv('100-Days-Of-ML-Code/datasets/Data.csv')\n",
 30 |     "dataset = pd.read_csv('100-Days-Of-ML-Code/datasets/Data.csv')\n",
 31 |     "X = dataset.iloc[ : , :-1].values\n",
 32 |     "Y = dataset.iloc[ : , 3].values\n",
 33 |     "print(\"Step 2: Importing dataset\")\n",
 34 |     "print(\"X\")\n",
 35 |     "print(X)\n",
 36 |     "print(\"Y\")\n",
 37 |     "print(Y)\n",
 38 |     "from sklearn.preprocessing import Imputer\n",
 39 |     "imputer = Imputer(missing_values = \"NaN\", strategy = \"mean\", axis = 0)\n",
 40 |     "imputer = imputer.fit(X[ : , 1:3])\n",
 41 |     "X[ : , 1:3] = imputer.transform(X[ : , 1:3])\n",
 42 |     "print(\"---------------------\")\n",
 43 |     "print(\"Step 3: Handling the missing data\")\n",
 44 |     "print(\"step2\")\n",
 45 |     "print(\"X\")\n",
 46 |     "print(X)\n",
 47 |     "from sklearn.preprocessing import LabelEncoder, OneHotEncoder\n",
 48 |     "labelencoder_X = LabelEncoder()\n",
 49 |     "X[ : , 0] = labelencoder_X.fit_transform(X[ : , 0])\n",
 50 |     "#Creating a dummy variable\n",
 51 |     "onehotencoder = OneHotEncoder(categorical_features = [0])\n",
 52 |     "X = onehotencoder.fit_transform(X).toarray()\n",
 53 |     "labelencoder_Y = LabelEncoder()\n",
 54 |     "Y =  labelencoder_Y.fit_transform(Y)\n",
 55 |     "print(\"---------------------\")\n",
 56 |     "print(\"Step 4: Encoding categorical data\")\n",
 57 |     "print(\"X\")  \n",
 58 |     "print(X)\n",
 59 |     "print(\"Y\")\n",
 60 |     "print(Y)\n",
 61 |     "from sklearn.model_selection import train_test_split\n",
 62 |     "X_train, X_test, Y_train, Y_test = train_test_split( X , Y , test_size = 0.2, random_state = 0)\n",
 63 |     "print(\"---------------------\")\n",
 64 |     "print(\"Step 5: Splitting the datasets into training sets and Test sets\")\n",
 65 |     "print(\"X_train\")\n",
 66 |     "print(X_train)\n",
 67 |     "print(\"X_test\")\n",
 68 |     "print(X_test)\n",
 69 |     "print(\"Y_train\")\n",
 70 |     "print(Y_train)\n",
 71 |     "print(\"Y_test\")\n",
 72 |     "print(Y_test)\n",
 73 |     "from sklearn.preprocessing import StandardScaler\n",
 74 |     "sc_X = StandardScaler()\n",
 75 |     "X_train = sc_X.fit_transform(X_train)\n",
 76 |     "X_test = sc_X.transform(X_test)\n",
 77 |     "print(\"---------------------\")\n",
 78 |     "print(\"Step 6: Feature Scaling\")\n",
 79 |     "print(\"X_train\")\n",
 80 |     "print(X_train)\n",
 81 |     "print(\"X_test\")\n",
 82 |     "print(X_test)"
 83 |    ]
 84 |   },
 85 |   {
 86 |    "cell_type": "code",
 87 |    "execution_count": null,
 88 |    "metadata": {},
 89 |    "outputs": [],
 90 |    "source": []
 91 |   },
 92 |   {
 93 |    "cell_type": "code",
 94 |    "execution_count": null,
 95 |    "metadata": {},
 96 |    "outputs": [],
 97 |    "source": []
 98 |   },
 99 |   {
100 |    "cell_type": "code",
101 |    "execution_count": null,
102 |    "metadata": {},
103 |    "outputs": [],
104 |    "source": []
105 |   }
106 |  ],
107 |  "metadata": {
108 |   "kernelspec": {
109 |    "display_name": "Python 3",
110 |    "language": "python",
111 |    "name": "python3"
112 |   },
113 |   "language_info": {
114 |    "codemirror_mode": {
115 |     "name": "ipython",
116 |     "version": 3
117 |    },
118 |    "file_extension": ".py",
119 |    "mimetype": "text/x-python",
120 |    "name": "python",
121 |    "nbconvert_exporter": "python",
122 |    "pygments_lexer": "ipython3",
123 |    "version": "3.7.1"
124 |   }
125 |  },
126 |  "nbformat": 4,
127 |  "nbformat_minor": 2
128 | }
129 | 


--------------------------------------------------------------------------------
/homework-04/曲礼阳.ipynb:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import pandas as pd
 3 | 
 4 | dataset = pd.read_csv('Data.csv')
 5 | X = dataset.iloc[ : , :-1].values
 6 | Y = dataset.iloc[ : , 3].values
 7 | 
 8 | from sklearn.preprocessing import Imputer
 9 | imputer = Imputer(missing_values = "NaN", strategy = "mean", axis = 0)
10 | imputer = imputer.fit(X[ : , 1:3])
11 | X[ : , 1:3] = imputer.transform(X[ : , 1:3])
12 | 
13 | from sklearn.preprocessing import LabelEncoder, OneHotEncoder
14 | labelencoder_X = LabelEncoder()
15 | X[ : , 0] = labelencoder_X.fit_transform(X[ : , 0])
16 | 
17 | onehotencoder = OneHotEncoder(categorical_features = [0])
18 | X = onehotencoder.fit_transform(X).toarray()
19 | labelencoder_Y = LabelEncoder()
20 | Y =  labelencoder_Y.fit_transform(Y)
21 | 
22 | from sklearn.cross_validation import train_test_split
23 | X_train, X_test, Y_train, Y_test = train_test_split( X , Y , test_size = 0.2, random_state = 0)
24 | 
25 | from sklearn.preprocessing import StandardScaler
26 | sc_X = StandardScaler()
27 | X_train = sc_X.fit_transform(X_train)
28 | X_test = sc_X.fit_transform(X_test)
29 | 


--------------------------------------------------------------------------------
/homework-04/机器学习第一课.pptx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/heilongjianguniversity/AI-Team/6710f4383b5c33e1dc466112009fba1c05e8069b/homework-04/机器学习第一课.pptx


--------------------------------------------------------------------------------
/homework-04/李校宇.ipynb:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import pandas as pd
 3 | 
 4 | dataset=pd.read_csv('Data.csv')
 5 | X=dataset.iloc[ : , :-1].values 
 6 | Y=dataset.iloc[ : ,3].values
 7 | print('Step 2:Importing dataset')
 8 | print('X')
 9 | print(X)
10 | print('Y')
11 | print(Y)
12 | 
13 | from sklearn.preprocessing import Imputer
14 | imputer = Imputer(missing_values = "NaN", strategy = "mean",axis=0)
15 | imputer = imputer.fit(X[ : ,1:3])
16 | X[ : , 1:3] = imputer.transform(X[ : , 1:3])
17 | print("****************")
18 | print("Step 3:Handling the missing data")
19 | print("step2")
20 | print("X")
21 | print(X)
22 | 
23 | from sklearn.preprocessing import LabelEncoder, OneHotEncoder
24 | labelencoder_X=LabelEncoder()
25 | X[ : , 0]=labelencoder_X.fit_transform(X[ : ,0])
26 | onehotencoder = OneHotEncoder(categorical_features = [0])
27 | X = onehotencoder.fit_transform(X).toarray()
28 | labelencoder_Y = LabelEncoder()
29 | Y = labelencoder_Y.fit_transform(Y)
30 | print("**********************")
31 | print("Step 4: Encoding categorical data")
32 | print("X")
33 | print(X)
34 | print("Y")
35 | print(Y)
36 | 
37 | from sklearn.model_selection import train_test_split
38 | X_train, X_test, Y_train, Y_test = train_test_split( X , Y , test_size = 0.2, random_state = 0)
39 | print("---------------------")
40 | print("Step 5: Splitting the datasets into training sets and Test sets")
41 | print("X_train")
42 | print(X_train)
43 | print("X_test")
44 | print(X_test)
45 | print("Y_train")
46 | print(Y_train)
47 | print("Y_test")
48 | print(Y_test)
49 | 
50 | from sklearn.preprocessing import StandardScaler
51 | sc_X = StandardScaler()
52 | X_train = sc_X.fit_transform(X_train)
53 | X_test = sc_X.transform(X_test)
54 | print("---------------------")
55 | print("Step 6: Feature Scaling")
56 | print("X_train")
57 | print(X_train)
58 | print("X_test")
59 | print(X_test)
60 | 


--------------------------------------------------------------------------------
/homework-04/胡明玥.ipynb:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import pandas as pd
 3 | 
 4 | dataset = pd.read_csv('Data.csv')//读取csv文件
 5 | X = dataset.iloc[ : , :-1].values//.iloc[行，列]
 6 | Y = dataset.iloc[ : , 3].values  // : 全部行 or 列；[a]第a行 or 列
 7 |                                  // [a,b,c]第 a,b,c 行 or 列
 8 | 
 9 | from sklearn.preprocessing import Imputer
10 | imputer = Imputer(missing_values = "NaN", strategy = "mean", axis = 0)
11 | imputer = imputer.fit(X[ : , 1:3])
12 | X[ : , 1:3] = imputer.transform(X[ : , 1:3])
13 | 
14 | from sklearn.preprocessing import LabelEncoder, OneHotEncoder
15 | labelencoder_X = LabelEncoder()
16 | X[ : , 0] = labelencoder_X.fit_transform(X[ : , 0])
17 | 
18 | onehotencoder = OneHotEncoder(categorical_features = [0])
19 | X = onehotencoder.fit_transform(X).toarray()
20 | labelencoder_Y = LabelEncoder()
21 | Y =  labelencoder_Y.fit_transform(Y)
22 | 
23 | from sklearn.model_selection import train_test_split
24 | X_train, X_test, Y_train, Y_test = train_test_split( X , Y , test_size = 0.2, random_state = 0)
25 | 
26 | from sklearn.preprocessing import StandardScaler
27 | sc_X = StandardScaler()
28 | X_train = sc_X.fit_transform(X_train)
29 | X_test = sc_X.transform(X_test)
30 | 


--------------------------------------------------------------------------------
/homework-04/苏峥.ipynb:
--------------------------------------------------------------------------------
 1 | #Day 1: Data Prepocessing
 2 | 
 3 | #Step 1: Importing the libraries
 4 | import numpy as np
 5 | import pandas as pd
 6 | 
 7 | #Step 2: Importing dataset
 8 | dataset = pd.read_csv('../datasets/Data.csv')
 9 | X = dataset.iloc[ : , :-1].values
10 | Y = dataset.iloc[ : , 3].values
11 | print("Step 2: Importing dataset")
12 | print("X")
13 | print(X)
14 | print("Y")
15 | print(Y)
16 | 
17 | #Step 3: Handling the missing data
18 | from sklearn.preprocessing import Imputer
19 | imputer = Imputer(missing_values = "NaN", strategy = "mean", axis = 0)
20 | imputer = imputer.fit(X[ : , 1:3])
21 | X[ : , 1:3] = imputer.transform(X[ : , 1:3])
22 | print("---------------------")
23 | print("Step 3: Handling the missing data")
24 | print("step2")
25 | print("X")
26 | print(X)
27 | 
28 | #Step 4: Encoding categorical data
29 | from sklearn.preprocessing import LabelEncoder, OneHotEncoder
30 | labelencoder_X = LabelEncoder()
31 | X[ : , 0] = labelencoder_X.fit_transform(X[ : , 0])
32 | #Creating a dummy variable
33 | onehotencoder = OneHotEncoder(categorical_features = [0])
34 | X = onehotencoder.fit_transform(X).toarray()
35 | labelencoder_Y = LabelEncoder()
36 | Y =  labelencoder_Y.fit_transform(Y)
37 | print("---------------------")
38 | print("Step 4: Encoding categorical data")
39 | print("X")
40 | print(X)
41 | print("Y")
42 | print(Y)
43 | 
44 | #Step 5: Splitting the datasets into training sets and Test sets
45 | from sklearn.model_selection import train_test_split
46 | X_train, X_test, Y_train, Y_test = train_test_split( X , Y , test_size = 0.2, random_state = 0)
47 | print("---------------------")
48 | print("Step 5: Splitting the datasets into training sets and Test sets")
49 | print("X_train")
50 | print(X_train)
51 | print("X_test")
52 | print(X_test)
53 | print("Y_train")
54 | print(Y_train)
55 | print("Y_test")
56 | print(Y_test)
57 | 
58 | #Step 6: Feature Scaling
59 | from sklearn.preprocessing import StandardScaler
60 | sc_X = StandardScaler()
61 | X_train = sc_X.fit_transform(X_train)
62 | X_test = sc_X.transform(X_test)
63 | print("---------------------")
64 | print("Step 6: Feature Scaling")
65 | print("X_train")
66 | print(X_train)
67 | print("X_test")
68 | print(X_test)
69 | 


--------------------------------------------------------------------------------
/homework-04/范升旭.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 1,
  6 |    "metadata": {},
  7 |    "outputs": [],
  8 |    "source": [
  9 |     "import numpy as np\n",
 10 |     "import pandas as pd\n",
 11 |     "from sklearn.preprocessing import Imputer"
 12 |    ]
 13 |   },
 14 |   {
 15 |    "cell_type": "code",
 16 |    "execution_count": 2,
 17 |    "metadata": {},
 18 |    "outputs": [
 19 |     {
 20 |      "name": "stdout",
 21 |      "output_type": "stream",
 22 |      "text": [
 23 |       "Step 2: Importing dataset\n",
 24 |       "X\n",
 25 |       "[['France' 44.0 72000.0]\n",
 26 |       " ['Spain' 27.0 48000.0]\n",
 27 |       " ['Germany' 30.0 54000.0]\n",
 28 |       " ['Spain' 38.0 61000.0]\n",
 29 |       " ['Germany' 40.0 nan]\n",
 30 |       " ['France' 35.0 58000.0]\n",
 31 |       " ['Spain' nan 52000.0]\n",
 32 |       " ['France' 48.0 79000.0]\n",
 33 |       " ['Germany' 50.0 83000.0]\n",
 34 |       " ['France' 37.0 67000.0]]\n",
 35 |       "Y\n",
 36 |       "['No' 'Yes' 'No' 'No' 'Yes' 'Yes' 'No' 'Yes' 'No' 'Yes']\n"
 37 |      ]
 38 |     }
 39 |    ],
 40 |    "source": [
 41 |     "dataset = pd.read_csv('Data.csv')\n",
 42 |     "X = dataset.iloc[ : , :-1].values   ## 创建独立变量---选取前三列\n",
 43 |     "Y = dataset.iloc[ : , 3].values     ## 创建依赖变量-----选取最后一列    返回值的类型仍为 dataframe\n",
 44 |     "print(\"Step 2: Importing dataset\")\n",
 45 |     "print(\"X\")\n",
 46 |     "print(X)\n",
 47 |     "print(\"Y\")\n",
 48 |     "print(Y)"
 49 |    ]
 50 |   },
 51 |   {
 52 |    "cell_type": "code",
 53 |    "execution_count": 3,
 54 |    "metadata": {},
 55 |    "outputs": [
 56 |     {
 57 |      "name": "stdout",
 58 |      "output_type": "stream",
 59 |      "text": [
 60 |       "---------------------\n",
 61 |       "Step 3: Handling the missing data\n",
 62 |       "step2\n",
 63 |       "X\n",
 64 |       "[['France' 44.0 72000.0]\n",
 65 |       " ['Spain' 27.0 48000.0]\n",
 66 |       " ['Germany' 30.0 54000.0]\n",
 67 |       " ['Spain' 38.0 61000.0]\n",
 68 |       " ['Germany' 40.0 63777.77777777778]\n",
 69 |       " ['France' 35.0 58000.0]\n",
 70 |       " ['Spain' 38.77777777777778 52000.0]\n",
 71 |       " ['France' 48.0 79000.0]\n",
 72 |       " ['Germany' 50.0 83000.0]\n",
 73 |       " ['France' 37.0 67000.0]]\n"
 74 |      ]
 75 |     },
 76 |     {
 77 |      "name": "stderr",
 78 |      "output_type": "stream",
 79 |      "text": [
 80 |       "D:\\anaconda\\envs\\ANACONDA\\lib\\site-packages\\sklearn\\utils\\deprecation.py:58: DeprecationWarning: Class Imputer is deprecated; Imputer was deprecated in version 0.20 and will be removed in 0.22. Import impute.SimpleImputer from sklearn instead.\n",
 81 |       "  warnings.warn(msg, category=DeprecationWarning)\n"
 82 |      ]
 83 |     }
 84 |    ],
 85 |    "source": [
 86 |     "from sklearn.preprocessing import Imputer\n",
 87 |     "imputer = Imputer(missing_values = \"NaN\", strategy = \"mean\", axis = 0)\n",
 88 |     "imputer = imputer.fit(X[ : , 1:3])\n",
 89 |     "X[ : , 1:3] = imputer.transform(X[ : , 1:3]) ## 将其应用到数据\n",
 90 |     "print(\"---------------------\")\n",
 91 |     "print(\"Step 3: Handling the missing data\")\n",
 92 |     "print(\"step2\")\n",
 93 |     "print(\"X\")\n",
 94 |     "print(X)"
 95 |    ]
 96 |   },
 97 |   {
 98 |    "cell_type": "code",
 99 |    "execution_count": 4,
100 |    "metadata": {},
101 |    "outputs": [
102 |     {
103 |      "name": "stdout",
104 |      "output_type": "stream",
105 |      "text": [
106 |       "---------------------\n",
107 |       "Step 3: Handling the missing data\n",
108 |       "step2\n",
109 |       "X\n",
110 |       "[[1.00000000e+00 0.00000000e+00 0.00000000e+00 4.40000000e+01\n",
111 |       "  7.20000000e+04]\n",
112 |       " [0.00000000e+00 0.00000000e+00 1.00000000e+00 2.70000000e+01\n",
113 |       "  4.80000000e+04]\n",
114 |       " [0.00000000e+00 1.00000000e+00 0.00000000e+00 3.00000000e+01\n",
115 |       "  5.40000000e+04]\n",
116 |       " [0.00000000e+00 0.00000000e+00 1.00000000e+00 3.80000000e+01\n",
117 |       "  6.10000000e+04]\n",
118 |       " [0.00000000e+00 1.00000000e+00 0.00000000e+00 4.00000000e+01\n",
119 |       "  6.37777778e+04]\n",
120 |       " [1.00000000e+00 0.00000000e+00 0.00000000e+00 3.50000000e+01\n",
121 |       "  5.80000000e+04]\n",
122 |       " [0.00000000e+00 0.00000000e+00 1.00000000e+00 3.87777778e+01\n",
123 |       "  5.20000000e+04]\n",
124 |       " [1.00000000e+00 0.00000000e+00 0.00000000e+00 4.80000000e+01\n",
125 |       "  7.90000000e+04]\n",
126 |       " [0.00000000e+00 1.00000000e+00 0.00000000e+00 5.00000000e+01\n",
127 |       "  8.30000000e+04]\n",
128 |       " [1.00000000e+00 0.00000000e+00 0.00000000e+00 3.70000000e+01\n",
129 |       "  6.70000000e+04]]\n"
130 |      ]
131 |     },
132 |     {
133 |      "name": "stderr",
134 |      "output_type": "stream",
135 |      "text": [
136 |       "D:\\anaconda\\envs\\ANACONDA\\lib\\site-packages\\sklearn\\preprocessing\\_encoders.py:371: FutureWarning: The handling of integer data will change in version 0.22. Currently, the categories are determined based on the range [0, max(values)], while in the future they will be determined based on the unique values.\n",
137 |       "If you want the future behaviour and silence this warning, you can specify \"categories='auto'\".\n",
138 |       "In case you used a LabelEncoder before this OneHotEncoder to convert the categories to integers, then you can now use the OneHotEncoder directly.\n",
139 |       "  warnings.warn(msg, FutureWarning)\n",
140 |       "D:\\anaconda\\envs\\ANACONDA\\lib\\site-packages\\sklearn\\preprocessing\\_encoders.py:392: DeprecationWarning: The 'categorical_features' keyword is deprecated in version 0.20 and will be removed in 0.22. You can use the ColumnTransformer instead.\n",
141 |       "  \"use the ColumnTransformer instead.\", DeprecationWarning)\n"
142 |      ]
143 |     }
144 |    ],
145 |    "source": [
146 |     "from sklearn.preprocessing import LabelEncoder, OneHotEncoder\n",
147 |     "labelencoder_X = LabelEncoder()\n",
148 |     "X[ : , 0] = labelencoder_X.fit_transform(X[ : , 0])\n",
149 |     "\n",
150 |     "onehotencoder = OneHotEncoder(categorical_features = [0])\n",
151 |     "X = onehotencoder.fit_transform(X).toarray()\n",
152 |     "\n",
153 |     "labelencoder_Y = LabelEncoder()   \n",
154 |     "Y =  labelencoder_Y.fit_transform(Y)\n",
155 |     "print(\"---------------------\")\n",
156 |     "print(\"Step 3: Handling the missing data\")\n",
157 |     "print(\"step2\")\n",
158 |     "print(\"X\")\n",
159 |     "print(X)"
160 |    ]
161 |   },
162 |   {
163 |    "cell_type": "code",
164 |    "execution_count": 5,
165 |    "metadata": {},
166 |    "outputs": [],
167 |    "source": [
168 |     "from sklearn.model_selection import train_test_split\n",
169 |     "X_train, X_test, Y_train, Y_test = train_test_split( X , Y , test_size = 0.2, random_state = 0)"
170 |    ]
171 |   },
172 |   {
173 |    "cell_type": "code",
174 |    "execution_count": 6,
175 |    "metadata": {},
176 |    "outputs": [
177 |     {
178 |      "name": "stdout",
179 |      "output_type": "stream",
180 |      "text": [
181 |       "---------------------\n",
182 |       "Step 6: Feature Scaling\n",
183 |       "X_train\n",
184 |       "[[-1.          2.64575131 -0.77459667  0.26306757  0.12381479]\n",
185 |       " [ 1.         -0.37796447 -0.77459667 -0.25350148  0.46175632]\n",
186 |       " [-1.         -0.37796447  1.29099445 -1.97539832 -1.53093341]\n",
187 |       " [-1.         -0.37796447  1.29099445  0.05261351 -1.11141978]\n",
188 |       " [ 1.         -0.37796447 -0.77459667  1.64058505  1.7202972 ]\n",
189 |       " [-1.         -0.37796447  1.29099445 -0.0813118  -0.16751412]\n",
190 |       " [ 1.         -0.37796447 -0.77459667  0.95182631  0.98614835]\n",
191 |       " [ 1.         -0.37796447 -0.77459667 -0.59788085 -0.48214934]]\n",
192 |       "X_test\n",
193 |       "[[ 0.  0.  0. -1. -1.]\n",
194 |       " [ 0.  0.  0.  1.  1.]]\n"
195 |      ]
196 |     }
197 |    ],
198 |    "source": [
199 |     "from sklearn.preprocessing import StandardScaler\n",
200 |     "sc_X = StandardScaler()\n",
201 |     "X_train = sc_X.fit_transform(X_train)\n",
202 |     "X_test = sc_X.fit_transform(X_test)\n",
203 |     "print(\"---------------------\")\n",
204 |     "print(\"Step 6: Feature Scaling\")\n",
205 |     "print(\"X_train\")\n",
206 |     "print(X_train)\n",
207 |     "print(\"X_test\")\n",
208 |     "print(X_test)"
209 |    ]
210 |   },
211 |   {
212 |    "cell_type": "code",
213 |    "execution_count": null,
214 |    "metadata": {},
215 |    "outputs": [],
216 |    "source": []
217 |   },
218 |   {
219 |    "cell_type": "code",
220 |    "execution_count": null,
221 |    "metadata": {},
222 |    "outputs": [],
223 |    "source": []
224 |   }
225 |  ],
226 |  "metadata": {
227 |   "kernelspec": {
228 |    "display_name": "Python 3",
229 |    "language": "python",
230 |    "name": "python3"
231 |   },
232 |   "language_info": {
233 |    "codemirror_mode": {
234 |     "name": "ipython",
235 |     "version": 3
236 |    },
237 |    "file_extension": ".py",
238 |    "mimetype": "text/x-python",
239 |    "name": "python",
240 |    "nbconvert_exporter": "python",
241 |    "pygments_lexer": "ipython3",
242 |    "version": "3.6.6"
243 |   }
244 |  },
245 |  "nbformat": 4,
246 |  "nbformat_minor": 2
247 | }
248 | 


--------------------------------------------------------------------------------
/homework-04/谢易凡.ipynb:
--------------------------------------------------------------------------------
 1 | {
 2 |  "cells": [
 3 |   {
 4 |    "cell_type": "code",
 5 |    "execution_count": 1,
 6 |    "metadata": {},
 7 |    "outputs": [
 8 |     {
 9 |      "ename": "SyntaxError",
10 |      "evalue": "invalid syntax (<ipython-input-1-a0e3e07c3629>, line 5)",
11 |      "output_type": "error",
12 |      "traceback": [
13 |       "\u001b[1;36m  File \u001b[1;32m\"<ipython-input-1-a0e3e07c3629>\"\u001b[1;36m, line \u001b[1;32m5\u001b[0m\n\u001b[1;33m    : -1].values // .iloc[行,列]\u001b[0m\n\u001b[1;37m                    ^\u001b[0m\n\u001b[1;31mSyntaxError\u001b[0m\u001b[1;31m:\u001b[0m invalid syntax\n"
14 |      ]
15 |     }
16 |    ],
17 |    "source": [
18 |     "import numpy as np\n",
19 |     "import pandas as pd\n",
20 |     "dataset = pd.read_csv('Data .csv') // 读取csv文件\n",
21 |     "X = dataset.iloc[ : ,\n",
22 |     "                 : -1].values // .iloc[行,列]\n",
23 |     "Y = dataset.iloc[ : , 3].values \n",
24 |     "// : 全部行 or 列;[a]第a行 or 列\n",
25 |     "// [a,b,c]第 a,b,c 行 or 列\n",
26 |     "    from sklearn.preprocessing import Imputer\n",
27 |     "    imputer = Imputer(missing_values = \"NaN\", strategy = \"mean\",axis = 0)\n",
28 |     "    imputer = imputer.fit(X[ : , 1:3])\n",
29 |     "    X[ : , 1:3] = imputer.transform(X[ : , 1:3])\n",
30 |     "    from sklearn.preprocessing import LabelEncoder ,OneHotEncoderEncoder()X[ : , 0] = labelencoder_X.fit_transform(X[ : , 0])\n",
31 |     "    onehotencoder = OneHotEncoder(categorical_features = [0])\n",
32 |     "    X = onehotencoder.fit_transform(X).toarr\n",
33 |     "    labelencoder_Y = LabelEncoder()\n",
34 |     "    Y = labelencoder_Y.fit_transform(Y)\n",
35 |     "    from sklearn.model_selection import train_tese_split\n",
36 |     "    X_train, X_test, Y_train, Y_test = train_tese_split(X ,Y ,test_size = 0.2, random_state = 0)\n",
37 |     "    from sklearn.preprocessing import StandardScaler \n",
38 |     "    sc_X = StandardScaler()\n",
39 |     "    X_train = sc_X.fit_transform(X_train)\n",
40 |     "    X_tese = sc_X.transform(X_test)"
41 |    ]
42 |   },
43 |   {
44 |    "cell_type": "code",
45 |    "execution_count": null,
46 |    "metadata": {},
47 |    "outputs": [],
48 |    "source": []
49 |   }
50 |  ],
51 |  "metadata": {
52 |   "kernelspec": {
53 |    "display_name": "Python 3",
54 |    "language": "python",
55 |    "name": "python3"
56 |   },
57 |   "language_info": {
58 |    "codemirror_mode": {
59 |     "name": "ipython",
60 |     "version": 3
61 |    },
62 |    "file_extension": ".py",
63 |    "mimetype": "text/x-python",
64 |    "name": "python",
65 |    "nbconvert_exporter": "python",
66 |    "pygments_lexer": "ipython3",
67 |    "version": "3.7.1"
68 |   }
69 |  },
70 |  "nbformat": 4,
71 |  "nbformat_minor": 2
72 | }
73 | 


--------------------------------------------------------------------------------
/homework-04/闫泳寰.ipynb:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import pandas as pd
 3 | 
 4 | dataset = pd.read_csv('../datasets/Data.csv')
 5 | X = dataset.iloc[ : , :-1].values
 6 | Y = dataset.iloc[ : , 3].values
 7 | print("Step 2: Importing dataset")
 8 | print("X")
 9 | print(X)
10 | print("Y")
11 | print(Y)
12 | 
13 | from sklearn.preprocessing import Imputer
14 | imputer = Imputer(missing_values = "NaN", strategy = "mean", axis = 0)
15 | imputer = imputer.fit(X[ : , 1:3])
16 | X[ : , 1:3] = imputer.transform(X[ : , 1:3])
17 | print("---------------------")
18 | print("Step 3: Handling the missing data")
19 | print("step2")
20 | print("X")
21 | print(X)
22 | 
23 | from sklearn.preprocessing import LabelEncoder, OneHotEncoder
24 | labelencoder_X = LabelEncoder()
25 | X[ : , 0] = labelencoder_X.fit_transform(X[ : , 0])
26 | #Creating a dummy variable
27 | onehotencoder = OneHotEncoder(categorical_features = [0])
28 | X = onehotencoder.fit_transform(X).toarray()
29 | labelencoder_Y = LabelEncoder()
30 | Y =  labelencoder_Y.fit_transform(Y)
31 | print("---------------------")
32 | print("Step 4: Encoding categorical data")
33 | print("X")
34 | print(X)
35 | print("Y")
36 | print(Y)
37 | 
38 | from sklearn.model_selection import train_test_split
39 | X_train, X_test, Y_train, Y_test = train_test_split( X , Y , test_size = 0.2, random_state = 0)
40 | print("---------------------")
41 | print("Step 5: Splitting the datasets into training sets and Test sets")
42 | print("X_train")
43 | print(X_train)
44 | print("X_test")
45 | print(X_test)
46 | print("Y_train")
47 | print(Y_train)
48 | print("Y_test")
49 | print(Y_test)
50 | 
51 | from sklearn.preprocessing import StandardScaler
52 | sc_X = StandardScaler()
53 | X_train = sc_X.fit_transform(X_train)
54 | X_test = sc_X.transform(X_test)
55 | print("---------------------")
56 | print("Step 6: Feature Scaling")
57 | print("X_train")
58 | print(X_train)
59 | print("X_test")
60 | print(X_test)


--------------------------------------------------------------------------------
/homework-04/阳治玖.ipynb:
--------------------------------------------------------------------------------
 1 | import numpy as np 
 2 | import pandas as pd 
 3 | dataset = pd.read_csv('Data.csv') 
 4 | X = dataset.iloc[ : , :-1].values 
 5 | Y = dataset.iloc[ : , 3].values 
 6 | from sklearn.preprocessing import Imputer 
 7 | imputer = Imputer(missing_values = "NaN", strategy = "mean", axis = 0) 
 8 | imputer = imputer.fit(X[ : , 1:3]) 
 9 | X[ : , 1:3] = imputer.transform(X[ : , 1:3]) 
10 | from sklearn.preprocessing import LabelEncoder, OneHotEncoder 
11 | labelencoder_X = LabelEncoder() 
12 | X[ : , 0] = labelencoder_X.fit_transform(X[ : , 0]) 
13 | onehotencoder = OneHotEncoder(categorical_features = [0]) 
14 | X = onehotencoder.fit_transform(X).toarray() 
15 | labelencoder_Y = LabelEncoder() 
16 | Y =  labelencoder_Y.fit_transform(Y) 
17 | from sklearn.cross_validation import train_test_split 
18 | X_train, X_test, Y_train, Y_test = train_test_split( X , Y , test_size = 0.2, random_state = 0) 
19 | from sklearn.preprocessing import StandardScaler 
20 | sc_X = StandardScaler() 
21 | X_train = sc_X.fit_transform(X_train) 
22 | X_test = sc_X.fit_transform(X_test)


--------------------------------------------------------------------------------
/homework-04/陈宝旭.ipynb:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """
 3 | Spyder Editor
 4 | 
 5 | This is a temporary script file.
 6 | """
 7 | 
 8 | import numpy as np
 9 | import pandas as pd
10 | 
11 | dataset = pd.read_csv('data.csv')
12 | X = dataset.iloc[ : , :-1].values
13 | Y = dataset.iloc[ : , : 3].values
14 | 
15 | from sklearn.perprocessing import Imputer
16 | imputer = Imputer(missing_values = "NAN",strategy = "mean", axis = 0)
17 | imputer = Imputer.fit(X[ : , 1 :3])
18 | X[ : , 1:3] = imputer.transform(X[ : , 1:3])
19 | 
20 | from sklearn.perprocessing import LabelEncoder, OneHotEncoder
21 | labelencoder_X = LabelEncoder()
22 | X[ : , 0]=labelencoder_X.fit_transform(X[ : , 0])
23 | 
24 | onehotencoder = OneHotEncoder(categorical_features= [0] )
25 | X=onehotencoder.fix_transform(X).toarray()
26 | labelencoder_Y = LabelEncoder()
27 | Y = labelencoder_Y.fit_transform(Y)
28 | 
29 | from sklearn.model_seletion import train_test_split
30 | X_train,X_test,Y_train,Y_test = train_test_split(X , Y ,test_size=0.2,random_state=0)
31 | 
32 | from sklearn.perprocessing import StandardScaler
33 | sc_X = StandardScaler()
34 | X_train = sc_X.fit_transform(X_train)
35 | X_test = sc_X.transform(X_test)
36 | 
37 | 


--------------------------------------------------------------------------------
/homework-04/陈瑞.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 2,
  6 |    "metadata": {},
  7 |    "outputs": [
  8 |     {
  9 |      "name": "stdout",
 10 |      "output_type": "stream",
 11 |      "text": [
 12 |       "Step 2: Importing dataset\n",
 13 |       "X\n",
 14 |       "[['France' 44.0 72000.0]\n",
 15 |       " ['Spain' 27.0 48000.0]\n",
 16 |       " ['Germany' 30.0 54000.0]\n",
 17 |       " ['Spain' 38.0 61000.0]\n",
 18 |       " ['Germany' 40.0 nan]\n",
 19 |       " ['France' 35.0 58000.0]\n",
 20 |       " ['Spain' nan 52000.0]\n",
 21 |       " ['France' 48.0 79000.0]\n",
 22 |       " ['Germany' 50.0 83000.0]\n",
 23 |       " ['France' 37.0 67000.0]]\n",
 24 |       "Y\n",
 25 |       "['No' 'Yes' 'No' 'No' 'Yes' 'Yes' 'No' 'Yes' 'No' 'Yes']\n"
 26 |      ]
 27 |     }
 28 |    ],
 29 |    "source": [
 30 |     "import numpy as np\n",
 31 |     "import pandas as pd\n",
 32 |     "dataset = pd.read_csv('./Data.csv')\n",
 33 |     "# 不包括最后一列的所有列\n",
 34 |     "X = dataset.iloc[ : , :-1].values\n",
 35 |     "#取最后一列\n",
 36 |     "Y = dataset.iloc[ : , 3].values\n",
 37 |     "print(\"Step 2: Importing dataset\")\n",
 38 |     "print(\"X\")\n",
 39 |     "print(X)\n",
 40 |     "print(\"Y\")\n",
 41 |     "print(Y)"
 42 |    ]
 43 |   },
 44 |   {
 45 |    "cell_type": "code",
 46 |    "execution_count": 3,
 47 |    "metadata": {},
 48 |    "outputs": [
 49 |     {
 50 |      "name": "stdout",
 51 |      "output_type": "stream",
 52 |      "text": [
 53 |       "---------------------\n",
 54 |       "Step 3: Handling the missing data\n",
 55 |       "step2\n",
 56 |       "X\n",
 57 |       "[['France' 44.0 72000.0]\n",
 58 |       " ['Spain' 27.0 48000.0]\n",
 59 |       " ['Germany' 30.0 54000.0]\n",
 60 |       " ['Spain' 38.0 61000.0]\n",
 61 |       " ['Germany' 40.0 63777.77777777778]\n",
 62 |       " ['France' 35.0 58000.0]\n",
 63 |       " ['Spain' 38.77777777777778 52000.0]\n",
 64 |       " ['France' 48.0 79000.0]\n",
 65 |       " ['Germany' 50.0 83000.0]\n",
 66 |       " ['France' 37.0 67000.0]]\n"
 67 |      ]
 68 |     },
 69 |     {
 70 |      "name": "stderr",
 71 |      "output_type": "stream",
 72 |      "text": [
 73 |       "C:\\Users\\dell\\Anaconda3\\lib\\site-packages\\sklearn\\utils\\deprecation.py:58: DeprecationWarning: Class Imputer is deprecated; Imputer was deprecated in version 0.20 and will be removed in 0.22. Import impute.SimpleImputer from sklearn instead.\n",
 74 |       "  warnings.warn(msg, category=DeprecationWarning)\n"
 75 |      ]
 76 |     }
 77 |    ],
 78 |    "source": [
 79 |     "from sklearn.preprocessing import Imputer\n",
 80 |     "# axis=0表示按列进行\n",
 81 |     "imputer = Imputer(missing_values = \"NaN\", strategy = \"mean\", axis = 0)\n",
 82 |     "imputer = imputer.fit(X[ : , 1:3])\n",
 83 |     "X[ : , 1:3] = imputer.transform(X[ : , 1:3])\n",
 84 |     "print(\"---------------------\")\n",
 85 |     "print(\"Step 3: Handling the missing data\")\n",
 86 |     "print(\"step2\")\n",
 87 |     "print(\"X\")\n",
 88 |     "print(X)\n"
 89 |    ]
 90 |   },
 91 |   {
 92 |    "cell_type": "code",
 93 |    "execution_count": 4,
 94 |    "metadata": {},
 95 |    "outputs": [
 96 |     {
 97 |      "name": "stderr",
 98 |      "output_type": "stream",
 99 |      "text": [
100 |       "C:\\Users\\dell\\Anaconda3\\lib\\site-packages\\sklearn\\preprocessing\\_encoders.py:368: FutureWarning: The handling of integer data will change in version 0.22. Currently, the categories are determined based on the range [0, max(values)], while in the future they will be determined based on the unique values.\n",
101 |       "If you want the future behaviour and silence this warning, you can specify \"categories='auto'\".\n",
102 |       "In case you used a LabelEncoder before this OneHotEncoder to convert the categories to integers, then you can now use the OneHotEncoder directly.\n",
103 |       "  warnings.warn(msg, FutureWarning)\n",
104 |       "C:\\Users\\dell\\Anaconda3\\lib\\site-packages\\sklearn\\preprocessing\\_encoders.py:390: DeprecationWarning: The 'categorical_features' keyword is deprecated in version 0.20 and will be removed in 0.22. You can use the ColumnTransformer instead.\n",
105 |       "  \"use the ColumnTransformer instead.\", DeprecationWarning)\n"
106 |      ]
107 |     },
108 |     {
109 |      "name": "stdout",
110 |      "output_type": "stream",
111 |      "text": [
112 |       "---------------------\n",
113 |       "Step 4: Encoding categorical data\n",
114 |       "X\n",
115 |       "[[1.00000000e+00 0.00000000e+00 0.00000000e+00 4.40000000e+01\n",
116 |       "  7.20000000e+04]\n",
117 |       " [0.00000000e+00 0.00000000e+00 1.00000000e+00 2.70000000e+01\n",
118 |       "  4.80000000e+04]\n",
119 |       " [0.00000000e+00 1.00000000e+00 0.00000000e+00 3.00000000e+01\n",
120 |       "  5.40000000e+04]\n",
121 |       " [0.00000000e+00 0.00000000e+00 1.00000000e+00 3.80000000e+01\n",
122 |       "  6.10000000e+04]\n",
123 |       " [0.00000000e+00 1.00000000e+00 0.00000000e+00 4.00000000e+01\n",
124 |       "  6.37777778e+04]\n",
125 |       " [1.00000000e+00 0.00000000e+00 0.00000000e+00 3.50000000e+01\n",
126 |       "  5.80000000e+04]\n",
127 |       " [0.00000000e+00 0.00000000e+00 1.00000000e+00 3.87777778e+01\n",
128 |       "  5.20000000e+04]\n",
129 |       " [1.00000000e+00 0.00000000e+00 0.00000000e+00 4.80000000e+01\n",
130 |       "  7.90000000e+04]\n",
131 |       " [0.00000000e+00 1.00000000e+00 0.00000000e+00 5.00000000e+01\n",
132 |       "  8.30000000e+04]\n",
133 |       " [1.00000000e+00 0.00000000e+00 0.00000000e+00 3.70000000e+01\n",
134 |       "  6.70000000e+04]]\n",
135 |       "Y\n",
136 |       "[0 1 0 0 1 1 0 1 0 1]\n"
137 |      ]
138 |     }
139 |    ],
140 |    "source": [
141 |     "from sklearn.preprocessing import LabelEncoder, OneHotEncoder\n",
142 |     "labelencoder_X = LabelEncoder()\n",
143 |     "X[ : , 0] = labelencoder_X.fit_transform(X[ : , 0])\n",
144 |     "#Creating a dummy variable\n",
145 |     "onehotencoder = OneHotEncoder(categorical_features = [0])\n",
146 |     "X = onehotencoder.fit_transform(X).toarray()\n",
147 |     "labelencoder_Y = LabelEncoder()\n",
148 |     "Y =  labelencoder_Y.fit_transform(Y)\n",
149 |     "print(\"---------------------\")\n",
150 |     "print(\"Step 4: Encoding categorical data\")\n",
151 |     "print(\"X\")\n",
152 |     "print(X)\n",
153 |     "print(\"Y\")\n",
154 |     "print(Y)\n"
155 |    ]
156 |   },
157 |   {
158 |    "cell_type": "code",
159 |    "execution_count": 5,
160 |    "metadata": {},
161 |    "outputs": [
162 |     {
163 |      "name": "stdout",
164 |      "output_type": "stream",
165 |      "text": [
166 |       "---------------------\n",
167 |       "Step 5: Splitting the datasets into training sets and Test sets\n",
168 |       "X_train\n",
169 |       "[[0.00000000e+00 1.00000000e+00 0.00000000e+00 4.00000000e+01\n",
170 |       "  6.37777778e+04]\n",
171 |       " [1.00000000e+00 0.00000000e+00 0.00000000e+00 3.70000000e+01\n",
172 |       "  6.70000000e+04]\n",
173 |       " [0.00000000e+00 0.00000000e+00 1.00000000e+00 2.70000000e+01\n",
174 |       "  4.80000000e+04]\n",
175 |       " [0.00000000e+00 0.00000000e+00 1.00000000e+00 3.87777778e+01\n",
176 |       "  5.20000000e+04]\n",
177 |       " [1.00000000e+00 0.00000000e+00 0.00000000e+00 4.80000000e+01\n",
178 |       "  7.90000000e+04]\n",
179 |       " [0.00000000e+00 0.00000000e+00 1.00000000e+00 3.80000000e+01\n",
180 |       "  6.10000000e+04]\n",
181 |       " [1.00000000e+00 0.00000000e+00 0.00000000e+00 4.40000000e+01\n",
182 |       "  7.20000000e+04]\n",
183 |       " [1.00000000e+00 0.00000000e+00 0.00000000e+00 3.50000000e+01\n",
184 |       "  5.80000000e+04]]\n",
185 |       "X_test\n",
186 |       "[[0.0e+00 1.0e+00 0.0e+00 3.0e+01 5.4e+04]\n",
187 |       " [0.0e+00 1.0e+00 0.0e+00 5.0e+01 8.3e+04]]\n",
188 |       "Y_train\n",
189 |       "[1 1 1 0 1 0 0 1]\n",
190 |       "Y_test\n",
191 |       "[0 0]\n"
192 |      ]
193 |     }
194 |    ],
195 |    "source": [
196 |     "from sklearn.model_selection import train_test_split\n",
197 |     "X_train, X_test, Y_train, Y_test = train_test_split( X , Y , test_size = 0.2, random_state = 0)\n",
198 |     "print(\"---------------------\")\n",
199 |     "print(\"Step 5: Splitting the datasets into training sets and Test sets\")\n",
200 |     "print(\"X_train\")\n",
201 |     "print(X_train)\n",
202 |     "print(\"X_test\")\n",
203 |     "print(X_test)\n",
204 |     "print(\"Y_train\")\n",
205 |     "print(Y_train)\n",
206 |     "print(\"Y_test\")\n",
207 |     "print(Y_test)\n"
208 |    ]
209 |   },
210 |   {
211 |    "cell_type": "code",
212 |    "execution_count": 6,
213 |    "metadata": {},
214 |    "outputs": [
215 |     {
216 |      "name": "stdout",
217 |      "output_type": "stream",
218 |      "text": [
219 |       "---------------------\n",
220 |       "Step 6: Feature Scaling\n",
221 |       "X_train\n",
222 |       "[[-1.          2.64575131 -0.77459667  0.26306757  0.12381479]\n",
223 |       " [ 1.         -0.37796447 -0.77459667 -0.25350148  0.46175632]\n",
224 |       " [-1.         -0.37796447  1.29099445 -1.97539832 -1.53093341]\n",
225 |       " [-1.         -0.37796447  1.29099445  0.05261351 -1.11141978]\n",
226 |       " [ 1.         -0.37796447 -0.77459667  1.64058505  1.7202972 ]\n",
227 |       " [-1.         -0.37796447  1.29099445 -0.0813118  -0.16751412]\n",
228 |       " [ 1.         -0.37796447 -0.77459667  0.95182631  0.98614835]\n",
229 |       " [ 1.         -0.37796447 -0.77459667 -0.59788085 -0.48214934]]\n",
230 |       "X_test\n",
231 |       "[[-1.          2.64575131 -0.77459667 -1.45882927 -0.90166297]\n",
232 |       " [-1.          2.64575131 -0.77459667  1.98496442  2.13981082]]\n"
233 |      ]
234 |     }
235 |    ],
236 |    "source": [
237 |     "from sklearn.preprocessing import StandardScaler\n",
238 |     "sc_X = StandardScaler()\n",
239 |     "X_train = sc_X.fit_transform(X_train)\n",
240 |     "X_test = sc_X.transform(X_test)\n",
241 |     "print(\"---------------------\")\n",
242 |     "print(\"Step 6: Feature Scaling\")\n",
243 |     "print(\"X_train\")\n",
244 |     "print(X_train)\n",
245 |     "print(\"X_test\")\n",
246 |     "print(X_test)\n"
247 |    ]
248 |   },
249 |   {
250 |    "cell_type": "code",
251 |    "execution_count": null,
252 |    "metadata": {},
253 |    "outputs": [],
254 |    "source": []
255 |   }
256 |  ],
257 |  "metadata": {
258 |   "kernelspec": {
259 |    "display_name": "Python 3",
260 |    "language": "python",
261 |    "name": "python3"
262 |   },
263 |   "language_info": {
264 |    "codemirror_mode": {
265 |     "name": "ipython",
266 |     "version": 3
267 |    },
268 |    "file_extension": ".py",
269 |    "mimetype": "text/x-python",
270 |    "name": "python",
271 |    "nbconvert_exporter": "python",
272 |    "pygments_lexer": "ipython3",
273 |    "version": "3.7.1"
274 |   }
275 |  },
276 |  "nbformat": 4,
277 |  "nbformat_minor": 2
278 | }
279 | 


--------------------------------------------------------------------------------
/homework-04/韩依格.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 50,
  6 |    "metadata": {},
  7 |    "outputs": [],
  8 |    "source": [
  9 |     "import numpy as np\n",
 10 |     "import pandas as pd"
 11 |    ]
 12 |   },
 13 |   {
 14 |    "cell_type": "code",
 15 |    "execution_count": 51,
 16 |    "metadata": {},
 17 |    "outputs": [],
 18 |    "source": [
 19 |     "dataset = pd.read_csv('Data.csv')\n",
 20 |     "X = dataset.iloc[: , : - 1].values\n",
 21 |     "Y = dataset.iloc[: , 3 ].values"
 22 |    ]
 23 |   },
 24 |   {
 25 |    "cell_type": "code",
 26 |    "execution_count": 52,
 27 |    "metadata": {
 28 |     "scrolled": false
 29 |    },
 30 |    "outputs": [],
 31 |    "source": [
 32 |     "from sklearn.preprocessing import Imputer\n",
 33 |     "imputer = Imputer(missing_values = \"NaN\", strategy = \"mean\", axis = 0)\n",
 34 |     "imputer = imputer.fit(X[ : , 1:3])\n",
 35 |     "X[ : , 1:3] = imputer.transform(X[ : , 1:3])"
 36 |    ]
 37 |   },
 38 |   {
 39 |    "cell_type": "code",
 40 |    "execution_count": 53,
 41 |    "metadata": {},
 42 |    "outputs": [],
 43 |    "source": [
 44 |     "from sklearn.preprocessing import LabelEncoder, OneHotEncoder\n",
 45 |     "labelencoder_X = LabelEncoder()\n",
 46 |     "X[ : , 0] = labelencoder_X.fit_transform(X[ : , 0])"
 47 |    ]
 48 |   },
 49 |   {
 50 |    "cell_type": "code",
 51 |    "execution_count": 54,
 52 |    "metadata": {},
 53 |    "outputs": [],
 54 |    "source": [
 55 |     "onehotencoder = OneHotEncoder(categorical_features = [0])\n",
 56 |     "X = onehotencoder.fit_transform(X).toarray()\n",
 57 |     "labelencoder_Y = LabelEncoder()\n",
 58 |     "Y =  labelencoder_Y.fit_transform(Y)"
 59 |    ]
 60 |   },
 61 |   {
 62 |    "cell_type": "code",
 63 |    "execution_count": 55,
 64 |    "metadata": {},
 65 |    "outputs": [],
 66 |    "source": [
 67 |     "from sklearn.model_selection import train_test_split\n",
 68 |     "X_train, X_test, Y_train, Y_test = train_test_split( X , Y , test_size = 0.2, random_state = 0)"
 69 |    ]
 70 |   },
 71 |   {
 72 |    "cell_type": "code",
 73 |    "execution_count": 56,
 74 |    "metadata": {},
 75 |    "outputs": [],
 76 |    "source": [
 77 |     "from sklearn.preprocessing import StandardScaler\n",
 78 |     "sc_X = StandardScaler()\n",
 79 |     "X_train = sc_X.fit_transform(X_train)\n",
 80 |     "X_test = sc_X.transform(X_test)"
 81 |    ]
 82 |   },
 83 |   {
 84 |    "cell_type": "code",
 85 |    "execution_count": null,
 86 |    "metadata": {},
 87 |    "outputs": [],
 88 |    "source": []
 89 |   },
 90 |   {
 91 |    "cell_type": "code",
 92 |    "execution_count": null,
 93 |    "metadata": {},
 94 |    "outputs": [],
 95 |    "source": []
 96 |   }
 97 |  ],
 98 |  "metadata": {
 99 |   "kernelspec": {
100 |    "display_name": "Python 3",
101 |    "language": "python",
102 |    "name": "python3"
103 |   },
104 |   "language_info": {
105 |    "codemirror_mode": {
106 |     "name": "ipython",
107 |     "version": 3
108 |    },
109 |    "file_extension": ".py",
110 |    "mimetype": "text/x-python",
111 |    "name": "python",
112 |    "nbconvert_exporter": "python",
113 |    "pygments_lexer": "ipython3",
114 |    "version": "3.6.5"
115 |   }
116 |  },
117 |  "nbformat": 4,
118 |  "nbformat_minor": 2
119 | }
120 | 


--------------------------------------------------------------------------------
/homework-04/马士尧.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 34,
  6 |    "metadata": {},
  7 |    "outputs": [
  8 |     {
  9 |      "name": "stdout",
 10 |      "output_type": "stream",
 11 |      "text": [
 12 |       "[['France' 44.0 72000.0]\n",
 13 |       " ['Spain' 27.0 48000.0]\n",
 14 |       " ['Germany' 30.0 54000.0]\n",
 15 |       " ['Spain' 38.0 61000.0]\n",
 16 |       " ['Germany' 40.0 nan]\n",
 17 |       " ['France' 35.0 58000.0]\n",
 18 |       " ['Spain' nan 52000.0]\n",
 19 |       " ['France' 48.0 79000.0]\n",
 20 |       " ['Germany' 50.0 83000.0]\n",
 21 |       " ['France' 37.0 67000.0]]\n",
 22 |       "['No' 'Yes' 'No' 'No' 'Yes' 'Yes' 'No' 'Yes' 'No' 'Yes']\n",
 23 |       "new X.1\n",
 24 |       "[['France' 44.0 72000.0]\n",
 25 |       " ['Spain' 27.0 48000.0]\n",
 26 |       " ['Germany' 30.0 54000.0]\n",
 27 |       " ['Spain' 38.0 61000.0]\n",
 28 |       " ['Germany' 40.0 48000.0]\n",
 29 |       " ['France' 35.0 58000.0]\n",
 30 |       " ['Spain' 27.0 52000.0]\n",
 31 |       " ['France' 48.0 79000.0]\n",
 32 |       " ['Germany' 50.0 83000.0]\n",
 33 |       " ['France' 37.0 67000.0]]\n",
 34 |       "new X.2\n",
 35 |       "[[0 44.0 72000.0]\n",
 36 |       " [2 27.0 48000.0]\n",
 37 |       " [1 30.0 54000.0]\n",
 38 |       " [2 38.0 61000.0]\n",
 39 |       " [1 40.0 48000.0]\n",
 40 |       " [0 35.0 58000.0]\n",
 41 |       " [2 27.0 52000.0]\n",
 42 |       " [0 48.0 79000.0]\n",
 43 |       " [1 50.0 83000.0]\n",
 44 |       " [0 37.0 67000.0]]\n",
 45 |       "StandardScaler(copy=True, with_mean=True, with_std=True)\n",
 46 |       "[[-1.          2.64575131 -0.77459667  0.4330127  -1.1851228 ]\n",
 47 |       " [ 1.         -0.37796447 -0.77459667  0.          0.59842834]\n",
 48 |       " [-1.         -0.37796447  1.29099445 -1.44337567 -1.1851228 ]\n",
 49 |       " [-1.         -0.37796447  1.29099445 -1.44337567 -0.80963835]\n",
 50 |       " [ 1.         -0.37796447 -0.77459667  1.58771324  1.72488169]\n",
 51 |       " [-1.         -0.37796447  1.29099445  0.14433757  0.03520167]\n",
 52 |       " [ 1.         -0.37796447 -0.77459667  1.01036297  1.0677839 ]\n",
 53 |       " [ 1.         -0.37796447 -0.77459667 -0.28867513 -0.24641167]]\n",
 54 |       "[[-1.          2.64575131 -0.77459667 -1.01036297 -0.62189612]\n",
 55 |       " [-1.          2.64575131 -0.77459667  1.87638837  2.10036614]]\n"
 56 |      ]
 57 |     },
 58 |     {
 59 |      "name": "stderr",
 60 |      "output_type": "stream",
 61 |      "text": [
 62 |       "C:\\Anaconda\\lib\\site-packages\\sklearn\\utils\\deprecation.py:58: DeprecationWarning: Class Imputer is deprecated; Imputer was deprecated in version 0.20 and will be removed in 0.22. Import impute.SimpleImputer from sklearn instead.\n",
 63 |       "  warnings.warn(msg, category=DeprecationWarning)\n",
 64 |       "C:\\Anaconda\\lib\\site-packages\\sklearn\\preprocessing\\_encoders.py:371: FutureWarning: The handling of integer data will change in version 0.22. Currently, the categories are determined based on the range [0, max(values)], while in the future they will be determined based on the unique values.\n",
 65 |       "If you want the future behaviour and silence this warning, you can specify \"categories='auto'\".\n",
 66 |       "In case you used a LabelEncoder before this OneHotEncoder to convert the categories to integers, then you can now use the OneHotEncoder directly.\n",
 67 |       "  warnings.warn(msg, FutureWarning)\n",
 68 |       "C:\\Anaconda\\lib\\site-packages\\sklearn\\preprocessing\\_encoders.py:392: DeprecationWarning: The 'categorical_features' keyword is deprecated in version 0.20 and will be removed in 0.22. You can use the ColumnTransformer instead.\n",
 69 |       "  \"use the ColumnTransformer instead.\", DeprecationWarning)\n"
 70 |      ]
 71 |     }
 72 |    ],
 73 |    "source": [
 74 |     "import numpy as py\n",
 75 |     "import pandas as pd\n",
 76 |     "\n",
 77 |     "dateset = pd.read_csv(r\"C:\\Users\\Administrator\\Data.csv\")\n",
 78 |     "X = dateset.iloc[: , :-1].values\n",
 79 |     "Y = dateset.iloc[:,3].values\n",
 80 |     "print(X)\n",
 81 |     "print(Y)\n",
 82 |     "\n",
 83 |     "from sklearn.preprocessing import Imputer\n",
 84 |     "imputer = Imputer(missing_values =\"NaN\",strategy = \"most_frequent\", axis = 0)\n",
 85 |     "imputer = imputer.fit(X[ : , 1:3])\n",
 86 |     "X[ : , 1:3] = imputer.transform(X[ : , 1:3])\n",
 87 |     "print(\"new X.1\")\n",
 88 |     "print(X)\n",
 89 |     "\n",
 90 |     "from sklearn.preprocessing import LabelEncoder, OneHotEncoder\n",
 91 |     "labelencoder_X = LabelEncoder()\n",
 92 |     "X[ : , 0] = labelencoder_X.fit_transform(X[ : , 0])\n",
 93 |     "print(\"new X.2\")\n",
 94 |     "print(X)\n",
 95 |     "\n",
 96 |     "onehotencoder = OneHotEncoder(categorical_features = [0])\n",
 97 |     "X = onehotencoder.fit_transform(X).toarray()\n",
 98 |     "labelencoder_Y = LabelEncoder()\n",
 99 |     "Y =  labelencoder_Y.fit_transform(Y)\n",
100 |     "\n",
101 |     "from sklearn.model_selection import train_test_split\n",
102 |     "X_train, X_test, Y_train, Y_test = train_test_split( X , Y , test_size = 0.2, random_state = 0)\n",
103 |     "\n",
104 |     "from sklearn.preprocessing import StandardScaler\n",
105 |     "sc_X = StandardScaler()\n",
106 |     "X_train = sc_X.fit_transform(X_train)\n",
107 |     "X_test = sc_X.transform(X_test)\n",
108 |     "print(sc_X,X_train,X_test,sep='\\n')"
109 |    ]
110 |   },
111 |   {
112 |    "cell_type": "code",
113 |    "execution_count": null,
114 |    "metadata": {},
115 |    "outputs": [],
116 |    "source": []
117 |   }
118 |  ],
119 |  "metadata": {
120 |   "kernelspec": {
121 |    "display_name": "Python 3",
122 |    "language": "python",
123 |    "name": "python3"
124 |   },
125 |   "language_info": {
126 |    "codemirror_mode": {
127 |     "name": "ipython",
128 |     "version": 3
129 |    },
130 |    "file_extension": ".py",
131 |    "mimetype": "text/x-python",
132 |    "name": "python",
133 |    "nbconvert_exporter": "python",
134 |    "pygments_lexer": "ipython3",
135 |    "version": "3.7.0"
136 |   }
137 |  },
138 |  "nbformat": 4,
139 |  "nbformat_minor": 2
140 | }
141 | 


--------------------------------------------------------------------------------
/homework-04/高一淇.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 12,
  6 |    "metadata": {},
  7 |    "outputs": [
  8 |     {
  9 |      "name": "stdout",
 10 |      "output_type": "stream",
 11 |      "text": [
 12 |       "Step 2: Importing dataset\n",
 13 |       "X\n",
 14 |       "[['France' 44.0 72000.0]\n",
 15 |       " ['Spain' 27.0 48000.0]\n",
 16 |       " ['Germany' 30.0 54000.0]\n",
 17 |       " ['Spain' 38.0 61000.0]\n",
 18 |       " ['Germany' 40.0 nan]\n",
 19 |       " ['France' 35.0 58000.0]\n",
 20 |       " ['Spain' nan 52000.0]\n",
 21 |       " ['France' 48.0 79000.0]\n",
 22 |       " ['Germany' 50.0 83000.0]\n",
 23 |       " ['France' 37.0 67000.0]]\n",
 24 |       "Y\n",
 25 |       "['No' 'Yes' 'No' 'No' 'Yes' 'Yes' 'No' 'Yes' 'No' 'Yes']\n"
 26 |      ]
 27 |     }
 28 |    ],
 29 |    "source": [
 30 |     "import numpy as np\n",
 31 |     "import pandas as pd\n",
 32 |     "dataset = pd.read_csv('./Data.csv')\n",
 33 |     "# 不包括最后一列的所有列\n",
 34 |     "X = dataset.iloc[ : , :-1].values\n",
 35 |     "#取最后一列\n",
 36 |     "Y = dataset.iloc[ : , 3].values\n",
 37 |     "print(\"Step 2: Importing dataset\")\n",
 38 |     "print(\"X\")\n",
 39 |     "print(X)\n",
 40 |     "print(\"Y\")\n",
 41 |     "print(Y)\n"
 42 |    ]
 43 |   },
 44 |   {
 45 |    "cell_type": "code",
 46 |    "execution_count": 15,
 47 |    "metadata": {},
 48 |    "outputs": [
 49 |     {
 50 |      "name": "stdout",
 51 |      "output_type": "stream",
 52 |      "text": [
 53 |       "---------------------\n",
 54 |       "Step 3: Handling the missing data\n",
 55 |       "step2\n",
 56 |       "X\n",
 57 |       "[['France' 44.0 72000.0]\n",
 58 |       " ['Spain' 27.0 48000.0]\n",
 59 |       " ['Germany' 30.0 54000.0]\n",
 60 |       " ['Spain' 38.0 61000.0]\n",
 61 |       " ['Germany' 40.0 63777.77777777778]\n",
 62 |       " ['France' 35.0 58000.0]\n",
 63 |       " ['Spain' 38.77777777777778 52000.0]\n",
 64 |       " ['France' 48.0 79000.0]\n",
 65 |       " ['Germany' 50.0 83000.0]\n",
 66 |       " ['France' 37.0 67000.0]]\n"
 67 |      ]
 68 |     },
 69 |     {
 70 |      "name": "stderr",
 71 |      "output_type": "stream",
 72 |      "text": [
 73 |       "C:\\Users\\dell\\Anaconda3\\envs\\AAA\\lib\\site-packages\\sklearn\\utils\\deprecation.py:58: DeprecationWarning: Class Imputer is deprecated; Imputer was deprecated in version 0.20 and will be removed in 0.22. Import impute.SimpleImputer from sklearn instead.\n",
 74 |       "  warnings.warn(msg, category=DeprecationWarning)\n"
 75 |      ]
 76 |     }
 77 |    ],
 78 |    "source": [
 79 |     "from sklearn.preprocessing import Imputer\n",
 80 |     "# axis=0表示按列进行\n",
 81 |     "imputer = Imputer(missing_values = \"NaN\", strategy = \"mean\", axis = 0)\n",
 82 |     "imputer = imputer.fit(X[ : , 1:3])\n",
 83 |     "X[ : , 1:3] = imputer.transform(X[ : , 1:3])\n",
 84 |     "print(\"---------------------\")\n",
 85 |     "print(\"Step 3: Handling the missing data\")\n",
 86 |     "print(\"step2\")\n",
 87 |     "print(\"X\")\n",
 88 |     "print(X)"
 89 |    ]
 90 |   },
 91 |   {
 92 |    "cell_type": "code",
 93 |    "execution_count": 16,
 94 |    "metadata": {},
 95 |    "outputs": [
 96 |     {
 97 |      "name": "stdout",
 98 |      "output_type": "stream",
 99 |      "text": [
100 |       "---------------------\n",
101 |       "Step 4: Encoding categorical data\n",
102 |       "X\n",
103 |       "[[1.00000000e+00 0.00000000e+00 0.00000000e+00 4.40000000e+01\n",
104 |       "  7.20000000e+04]\n",
105 |       " [0.00000000e+00 0.00000000e+00 1.00000000e+00 2.70000000e+01\n",
106 |       "  4.80000000e+04]\n",
107 |       " [0.00000000e+00 1.00000000e+00 0.00000000e+00 3.00000000e+01\n",
108 |       "  5.40000000e+04]\n",
109 |       " [0.00000000e+00 0.00000000e+00 1.00000000e+00 3.80000000e+01\n",
110 |       "  6.10000000e+04]\n",
111 |       " [0.00000000e+00 1.00000000e+00 0.00000000e+00 4.00000000e+01\n",
112 |       "  6.37777778e+04]\n",
113 |       " [1.00000000e+00 0.00000000e+00 0.00000000e+00 3.50000000e+01\n",
114 |       "  5.80000000e+04]\n",
115 |       " [0.00000000e+00 0.00000000e+00 1.00000000e+00 3.87777778e+01\n",
116 |       "  5.20000000e+04]\n",
117 |       " [1.00000000e+00 0.00000000e+00 0.00000000e+00 4.80000000e+01\n",
118 |       "  7.90000000e+04]\n",
119 |       " [0.00000000e+00 1.00000000e+00 0.00000000e+00 5.00000000e+01\n",
120 |       "  8.30000000e+04]\n",
121 |       " [1.00000000e+00 0.00000000e+00 0.00000000e+00 3.70000000e+01\n",
122 |       "  6.70000000e+04]]\n",
123 |       "Y\n",
124 |       "[0 1 0 0 1 1 0 1 0 1]\n"
125 |      ]
126 |     },
127 |     {
128 |      "name": "stderr",
129 |      "output_type": "stream",
130 |      "text": [
131 |       "C:\\Users\\dell\\Anaconda3\\envs\\AAA\\lib\\site-packages\\sklearn\\preprocessing\\_encoders.py:371: FutureWarning: The handling of integer data will change in version 0.22. Currently, the categories are determined based on the range [0, max(values)], while in the future they will be determined based on the unique values.\n",
132 |       "If you want the future behaviour and silence this warning, you can specify \"categories='auto'\".\n",
133 |       "In case you used a LabelEncoder before this OneHotEncoder to convert the categories to integers, then you can now use the OneHotEncoder directly.\n",
134 |       "  warnings.warn(msg, FutureWarning)\n",
135 |       "C:\\Users\\dell\\Anaconda3\\envs\\AAA\\lib\\site-packages\\sklearn\\preprocessing\\_encoders.py:392: DeprecationWarning: The 'categorical_features' keyword is deprecated in version 0.20 and will be removed in 0.22. You can use the ColumnTransformer instead.\n",
136 |       "  \"use the ColumnTransformer instead.\", DeprecationWarning)\n"
137 |      ]
138 |     }
139 |    ],
140 |    "source": [
141 |     "from sklearn.preprocessing import LabelEncoder, OneHotEncoder\n",
142 |     "labelencoder_X = LabelEncoder()\n",
143 |     "X[ : , 0] = labelencoder_X.fit_transform(X[ : , 0])\n",
144 |     "#Creating a dummy variable\n",
145 |     "onehotencoder = OneHotEncoder(categorical_features = [0])\n",
146 |     "X = onehotencoder.fit_transform(X).toarray()\n",
147 |     "labelencoder_Y = LabelEncoder()\n",
148 |     "Y =  labelencoder_Y.fit_transform(Y)\n",
149 |     "print(\"---------------------\")\n",
150 |     "print(\"Step 4: Encoding categorical data\")\n",
151 |     "print(\"X\")\n",
152 |     "print(X)\n",
153 |     "print(\"Y\")\n",
154 |     "print(Y)"
155 |    ]
156 |   },
157 |   {
158 |    "cell_type": "code",
159 |    "execution_count": 17,
160 |    "metadata": {},
161 |    "outputs": [
162 |     {
163 |      "name": "stdout",
164 |      "output_type": "stream",
165 |      "text": [
166 |       "---------------------\n",
167 |       "Step 5: Splitting the datasets into training sets and Test sets\n",
168 |       "X_train\n",
169 |       "[[0.00000000e+00 1.00000000e+00 0.00000000e+00 4.00000000e+01\n",
170 |       "  6.37777778e+04]\n",
171 |       " [1.00000000e+00 0.00000000e+00 0.00000000e+00 3.70000000e+01\n",
172 |       "  6.70000000e+04]\n",
173 |       " [0.00000000e+00 0.00000000e+00 1.00000000e+00 2.70000000e+01\n",
174 |       "  4.80000000e+04]\n",
175 |       " [0.00000000e+00 0.00000000e+00 1.00000000e+00 3.87777778e+01\n",
176 |       "  5.20000000e+04]\n",
177 |       " [1.00000000e+00 0.00000000e+00 0.00000000e+00 4.80000000e+01\n",
178 |       "  7.90000000e+04]\n",
179 |       " [0.00000000e+00 0.00000000e+00 1.00000000e+00 3.80000000e+01\n",
180 |       "  6.10000000e+04]\n",
181 |       " [1.00000000e+00 0.00000000e+00 0.00000000e+00 4.40000000e+01\n",
182 |       "  7.20000000e+04]\n",
183 |       " [1.00000000e+00 0.00000000e+00 0.00000000e+00 3.50000000e+01\n",
184 |       "  5.80000000e+04]]\n",
185 |       "X_test\n",
186 |       "[[0.0e+00 1.0e+00 0.0e+00 3.0e+01 5.4e+04]\n",
187 |       " [0.0e+00 1.0e+00 0.0e+00 5.0e+01 8.3e+04]]\n",
188 |       "Y_train\n",
189 |       "[1 1 1 0 1 0 0 1]\n",
190 |       "Y_test\n",
191 |       "[0 0]\n"
192 |      ]
193 |     }
194 |    ],
195 |    "source": [
196 |     "from sklearn.model_selection import train_test_split\n",
197 |     "X_train, X_test, Y_train, Y_test = train_test_split( X , Y , test_size = 0.2, random_state = 0)\n",
198 |     "print(\"---------------------\")\n",
199 |     "print(\"Step 5: Splitting the datasets into training sets and Test sets\")\n",
200 |     "print(\"X_train\")\n",
201 |     "print(X_train)\n",
202 |     "print(\"X_test\")\n",
203 |     "print(X_test)\n",
204 |     "print(\"Y_train\")\n",
205 |     "print(Y_train)\n",
206 |     "print(\"Y_test\")\n",
207 |     "print(Y_test)\n"
208 |    ]
209 |   },
210 |   {
211 |    "cell_type": "code",
212 |    "execution_count": 18,
213 |    "metadata": {},
214 |    "outputs": [
215 |     {
216 |      "name": "stdout",
217 |      "output_type": "stream",
218 |      "text": [
219 |       "---------------------\n",
220 |       "Step 6: Feature Scaling\n",
221 |       "X_train\n",
222 |       "[[-1.          2.64575131 -0.77459667  0.26306757  0.12381479]\n",
223 |       " [ 1.         -0.37796447 -0.77459667 -0.25350148  0.46175632]\n",
224 |       " [-1.         -0.37796447  1.29099445 -1.97539832 -1.53093341]\n",
225 |       " [-1.         -0.37796447  1.29099445  0.05261351 -1.11141978]\n",
226 |       " [ 1.         -0.37796447 -0.77459667  1.64058505  1.7202972 ]\n",
227 |       " [-1.         -0.37796447  1.29099445 -0.0813118  -0.16751412]\n",
228 |       " [ 1.         -0.37796447 -0.77459667  0.95182631  0.98614835]\n",
229 |       " [ 1.         -0.37796447 -0.77459667 -0.59788085 -0.48214934]]\n",
230 |       "X_test\n",
231 |       "[[-1.          2.64575131 -0.77459667 -1.45882927 -0.90166297]\n",
232 |       " [-1.          2.64575131 -0.77459667  1.98496442  2.13981082]]\n"
233 |      ]
234 |     }
235 |    ],
236 |    "source": [
237 |     "from sklearn.preprocessing import StandardScaler\n",
238 |     "sc_X = StandardScaler()\n",
239 |     "X_train = sc_X.fit_transform(X_train)\n",
240 |     "X_test = sc_X.transform(X_test)\n",
241 |     "print(\"---------------------\")\n",
242 |     "print(\"Step 6: Feature Scaling\")\n",
243 |     "print(\"X_train\")\n",
244 |     "print(X_train)\n",
245 |     "print(\"X_test\")\n",
246 |     "print(X_test)"
247 |    ]
248 |   },
249 |   {
250 |    "cell_type": "code",
251 |    "execution_count": null,
252 |    "metadata": {},
253 |    "outputs": [],
254 |    "source": []
255 |   }
256 |  ],
257 |  "metadata": {
258 |   "kernelspec": {
259 |    "display_name": "Python 3",
260 |    "language": "python",
261 |    "name": "python3"
262 |   },
263 |   "language_info": {
264 |    "codemirror_mode": {
265 |     "name": "ipython",
266 |     "version": 3
267 |    },
268 |    "file_extension": ".py",
269 |    "mimetype": "text/x-python",
270 |    "name": "python",
271 |    "nbconvert_exporter": "python",
272 |    "pygments_lexer": "ipython3",
273 |    "version": "3.7.0"
274 |   }
275 |  },
276 |  "nbformat": 4,
277 |  "nbformat_minor": 2
278 | }
279 | 


--------------------------------------------------------------------------------
/homework-04/魏卓其.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 4,
  6 |    "metadata": {
  7 |     "collapsed": true
  8 |    },
  9 |    "outputs": [],
 10 |    "source": [
 11 |     "#Day 1: Data Prepocessing\n",
 12 |     "\n",
 13 |     "#Step 1: Importing the libraries\n",
 14 |     "import numpy as np\n",
 15 |     "import pandas as pd"
 16 |    ]
 17 |   },
 18 |   {
 19 |    "cell_type": "code",
 20 |    "execution_count": 5,
 21 |    "metadata": {},
 22 |    "outputs": [
 23 |     {
 24 |      "name": "stdout",
 25 |      "output_type": "stream",
 26 |      "text": [
 27 |       "Step 2: Importing dataset\n",
 28 |       "X\n",
 29 |       "[['France' 44.0 72000.0]\n",
 30 |       " ['Spain' 27.0 48000.0]\n",
 31 |       " ['Germany' 30.0 54000.0]\n",
 32 |       " ['Spain' 38.0 61000.0]\n",
 33 |       " ['Germany' 40.0 nan]\n",
 34 |       " ['France' 35.0 58000.0]\n",
 35 |       " ['Spain' nan 52000.0]\n",
 36 |       " ['France' 48.0 79000.0]\n",
 37 |       " ['Germany' 50.0 83000.0]\n",
 38 |       " ['France' 37.0 67000.0]]\n",
 39 |       "Y\n",
 40 |       "['No' 'Yes' 'No' 'No' 'Yes' 'Yes' 'No' 'Yes' 'No' 'Yes']\n"
 41 |      ]
 42 |     }
 43 |    ],
 44 |    "source": [
 45 |     "#Step 2: Importing dataset\n",
 46 |     "dataset = pd.read_csv('Data.csv')\n",
 47 |     "X = dataset.iloc[ : , :-1].values\n",
 48 |     "Y = dataset.iloc[ : , 3].values\n",
 49 |     "print(\"Step 2: Importing dataset\")\n",
 50 |     "print(\"X\")\n",
 51 |     "print(X)\n",
 52 |     "print(\"Y\")\n",
 53 |     "print(Y)"
 54 |    ]
 55 |   },
 56 |   {
 57 |    "cell_type": "code",
 58 |    "execution_count": 8,
 59 |    "metadata": {},
 60 |    "outputs": [
 61 |     {
 62 |      "name": "stdout",
 63 |      "output_type": "stream",
 64 |      "text": [
 65 |       "---------------------\n",
 66 |       "Step 3: Handling the missing data\n",
 67 |       "step2\n",
 68 |       "X\n",
 69 |       "[['France' 44.0 72000.0]\n",
 70 |       " ['Spain' 27.0 48000.0]\n",
 71 |       " ['Germany' 30.0 54000.0]\n",
 72 |       " ['Spain' 38.0 61000.0]\n",
 73 |       " ['Germany' 40.0 63777.77777777778]\n",
 74 |       " ['France' 35.0 58000.0]\n",
 75 |       " ['Spain' 38.77777777777778 52000.0]\n",
 76 |       " ['France' 48.0 79000.0]\n",
 77 |       " ['Germany' 50.0 83000.0]\n",
 78 |       " ['France' 37.0 67000.0]]\n"
 79 |      ]
 80 |     }
 81 |    ],
 82 |    "source": [
 83 |     "#Step 3: Handling the missing data\n",
 84 |     "from sklearn.preprocessing import Imputer\n",
 85 |     "imputer = Imputer(missing_values = \"NaN\", strategy = \"mean\", axis = 0)\n",
 86 |     "imputer = imputer.fit(X[ : , 1:3])\n",
 87 |     "X[ : , 1:3] = imputer.transform(X[ : , 1:3])\n",
 88 |     "print(\"---------------------\")\n",
 89 |     "print(\"Step 3: Handling the missing data\")\n",
 90 |     "print(\"step2\")\n",
 91 |     "print(\"X\")\n",
 92 |     "print(X)"
 93 |    ]
 94 |   },
 95 |   {
 96 |    "cell_type": "code",
 97 |    "execution_count": 9,
 98 |    "metadata": {},
 99 |    "outputs": [
100 |     {
101 |      "name": "stdout",
102 |      "output_type": "stream",
103 |      "text": [
104 |       "---------------------\n",
105 |       "Step 4: Encoding categorical data\n",
106 |       "X\n",
107 |       "[[  1.00000000e+00   0.00000000e+00   0.00000000e+00   4.40000000e+01\n",
108 |       "    7.20000000e+04]\n",
109 |       " [  0.00000000e+00   0.00000000e+00   1.00000000e+00   2.70000000e+01\n",
110 |       "    4.80000000e+04]\n",
111 |       " [  0.00000000e+00   1.00000000e+00   0.00000000e+00   3.00000000e+01\n",
112 |       "    5.40000000e+04]\n",
113 |       " [  0.00000000e+00   0.00000000e+00   1.00000000e+00   3.80000000e+01\n",
114 |       "    6.10000000e+04]\n",
115 |       " [  0.00000000e+00   1.00000000e+00   0.00000000e+00   4.00000000e+01\n",
116 |       "    6.37777778e+04]\n",
117 |       " [  1.00000000e+00   0.00000000e+00   0.00000000e+00   3.50000000e+01\n",
118 |       "    5.80000000e+04]\n",
119 |       " [  0.00000000e+00   0.00000000e+00   1.00000000e+00   3.87777778e+01\n",
120 |       "    5.20000000e+04]\n",
121 |       " [  1.00000000e+00   0.00000000e+00   0.00000000e+00   4.80000000e+01\n",
122 |       "    7.90000000e+04]\n",
123 |       " [  0.00000000e+00   1.00000000e+00   0.00000000e+00   5.00000000e+01\n",
124 |       "    8.30000000e+04]\n",
125 |       " [  1.00000000e+00   0.00000000e+00   0.00000000e+00   3.70000000e+01\n",
126 |       "    6.70000000e+04]]\n",
127 |       "Y\n",
128 |       "[0 1 0 0 1 1 0 1 0 1]\n"
129 |      ]
130 |     }
131 |    ],
132 |    "source": [
133 |     "#Step 4: Encoding categorical data\n",
134 |     "from sklearn.preprocessing import LabelEncoder, OneHotEncoder\n",
135 |     "labelencoder_X = LabelEncoder()\n",
136 |     "X[ : , 0] = labelencoder_X.fit_transform(X[ : , 0])\n",
137 |     "#Creating a dummy variable\n",
138 |     "onehotencoder = OneHotEncoder(categorical_features = [0])\n",
139 |     "X = onehotencoder.fit_transform(X).toarray()\n",
140 |     "labelencoder_Y = LabelEncoder()\n",
141 |     "Y =  labelencoder_Y.fit_transform(Y)\n",
142 |     "print(\"---------------------\")\n",
143 |     "print(\"Step 4: Encoding categorical data\")\n",
144 |     "print(\"X\")\n",
145 |     "print(X)\n",
146 |     "print(\"Y\")\n",
147 |     "print(Y)"
148 |    ]
149 |   },
150 |   {
151 |    "cell_type": "code",
152 |    "execution_count": 10,
153 |    "metadata": {},
154 |    "outputs": [
155 |     {
156 |      "name": "stdout",
157 |      "output_type": "stream",
158 |      "text": [
159 |       "---------------------\n",
160 |       "Step 5: Splitting the datasets into training sets and Test sets\n",
161 |       "X_train\n",
162 |       "[[  0.00000000e+00   1.00000000e+00   0.00000000e+00   4.00000000e+01\n",
163 |       "    6.37777778e+04]\n",
164 |       " [  1.00000000e+00   0.00000000e+00   0.00000000e+00   3.70000000e+01\n",
165 |       "    6.70000000e+04]\n",
166 |       " [  0.00000000e+00   0.00000000e+00   1.00000000e+00   2.70000000e+01\n",
167 |       "    4.80000000e+04]\n",
168 |       " [  0.00000000e+00   0.00000000e+00   1.00000000e+00   3.87777778e+01\n",
169 |       "    5.20000000e+04]\n",
170 |       " [  1.00000000e+00   0.00000000e+00   0.00000000e+00   4.80000000e+01\n",
171 |       "    7.90000000e+04]\n",
172 |       " [  0.00000000e+00   0.00000000e+00   1.00000000e+00   3.80000000e+01\n",
173 |       "    6.10000000e+04]\n",
174 |       " [  1.00000000e+00   0.00000000e+00   0.00000000e+00   4.40000000e+01\n",
175 |       "    7.20000000e+04]\n",
176 |       " [  1.00000000e+00   0.00000000e+00   0.00000000e+00   3.50000000e+01\n",
177 |       "    5.80000000e+04]]\n",
178 |       "X_test\n",
179 |       "[[  0.00000000e+00   1.00000000e+00   0.00000000e+00   3.00000000e+01\n",
180 |       "    5.40000000e+04]\n",
181 |       " [  0.00000000e+00   1.00000000e+00   0.00000000e+00   5.00000000e+01\n",
182 |       "    8.30000000e+04]]\n",
183 |       "Y_train\n",
184 |       "[1 1 1 0 1 0 0 1]\n",
185 |       "Y_test\n",
186 |       "[0 0]\n"
187 |      ]
188 |     }
189 |    ],
190 |    "source": [
191 |     "#Step 5: Splitting the datasets into training sets and Test sets\n",
192 |     "from sklearn.model_selection import train_test_split\n",
193 |     "X_train, X_test, Y_train, Y_test = train_test_split( X , Y , test_size = 0.2, random_state = 0)\n",
194 |     "print(\"---------------------\")\n",
195 |     "print(\"Step 5: Splitting the datasets into training sets and Test sets\")\n",
196 |     "print(\"X_train\")\n",
197 |     "print(X_train)\n",
198 |     "print(\"X_test\")\n",
199 |     "print(X_test)\n",
200 |     "print(\"Y_train\")\n",
201 |     "print(Y_train)\n",
202 |     "print(\"Y_test\")\n",
203 |     "print(Y_test)"
204 |    ]
205 |   },
206 |   {
207 |    "cell_type": "code",
208 |    "execution_count": 11,
209 |    "metadata": {},
210 |    "outputs": [
211 |     {
212 |      "name": "stdout",
213 |      "output_type": "stream",
214 |      "text": [
215 |       "---------------------\n",
216 |       "Step 6: Feature Scaling\n",
217 |       "X_train\n",
218 |       "[[-1.          2.64575131 -0.77459667  0.26306757  0.12381479]\n",
219 |       " [ 1.         -0.37796447 -0.77459667 -0.25350148  0.46175632]\n",
220 |       " [-1.         -0.37796447  1.29099445 -1.97539832 -1.53093341]\n",
221 |       " [-1.         -0.37796447  1.29099445  0.05261351 -1.11141978]\n",
222 |       " [ 1.         -0.37796447 -0.77459667  1.64058505  1.7202972 ]\n",
223 |       " [-1.         -0.37796447  1.29099445 -0.0813118  -0.16751412]\n",
224 |       " [ 1.         -0.37796447 -0.77459667  0.95182631  0.98614835]\n",
225 |       " [ 1.         -0.37796447 -0.77459667 -0.59788085 -0.48214934]]\n",
226 |       "X_test\n",
227 |       "[[-1.          2.64575131 -0.77459667 -1.45882927 -0.90166297]\n",
228 |       " [-1.          2.64575131 -0.77459667  1.98496442  2.13981082]]\n"
229 |      ]
230 |     }
231 |    ],
232 |    "source": [
233 |     "#Step 6: Feature Scaling\n",
234 |     "from sklearn.preprocessing import StandardScaler\n",
235 |     "sc_X = StandardScaler()\n",
236 |     "X_train = sc_X.fit_transform(X_train)\n",
237 |     "X_test = sc_X.transform(X_test)\n",
238 |     "print(\"---------------------\")\n",
239 |     "print(\"Step 6: Feature Scaling\")\n",
240 |     "print(\"X_train\")\n",
241 |     "print(X_train)\n",
242 |     "print(\"X_test\")\n",
243 |     "print(X_test)"
244 |    ]
245 |   },
246 |   {
247 |    "cell_type": "code",
248 |    "execution_count": null,
249 |    "metadata": {
250 |     "collapsed": true
251 |    },
252 |    "outputs": [],
253 |    "source": []
254 |   }
255 |  ],
256 |  "metadata": {
257 |   "kernelspec": {
258 |    "display_name": "Python 3",
259 |    "language": "python",
260 |    "name": "python3"
261 |   },
262 |   "language_info": {
263 |    "codemirror_mode": {
264 |     "name": "ipython",
265 |     "version": 3
266 |    },
267 |    "file_extension": ".py",
268 |    "mimetype": "text/x-python",
269 |    "name": "python",
270 |    "nbconvert_exporter": "python",
271 |    "pygments_lexer": "ipython3",
272 |    "version": "3.6.2"
273 |   }
274 |  },
275 |  "nbformat": 4,
276 |  "nbformat_minor": 2
277 | }
278 | 


--------------------------------------------------------------------------------
/homework-04/黄禹霏.ipynb:
--------------------------------------------------------------------------------
 1 | #Day 1: Data Prepocessing
 2 | 
 3 | #Step 1: Importing the libraries
 4 | import numpy as np
 5 | import pandas as pd
 6 | 
 7 | #Step 2: Importing dataset
 8 | dataset = pd.read_csv('../datasets/Data.csv')
 9 | X = dataset.iloc[ : , :-1].values
10 | Y = dataset.iloc[ : , 3].values
11 | print("Step 2: Importing dataset")
12 | print("X")
13 | print(X)
14 | print("Y")
15 | print(Y)
16 | 
17 | #Step 3: Handling the missing data
18 | from sklearn.preprocessing import Imputer
19 | imputer = Imputer(missing_values = "NaN", strategy = "mean", axis = 0)
20 | imputer = imputer.fit(X[ : , 1:3])
21 | X[ : , 1:3] = imputer.transform(X[ : , 1:3])
22 | print("---------------------")
23 | print("Step 3: Handling the missing data")
24 | print("step2")
25 | print("X")
26 | print(X)
27 | 
28 | #Step 4: Encoding categorical data
29 | from sklearn.preprocessing import LabelEncoder, OneHotEncoder
30 | labelencoder_X = LabelEncoder()
31 | X[ : , 0] = labelencoder_X.fit_transform(X[ : , 0])
32 | #Creating a dummy variable
33 | onehotencoder = OneHotEncoder(categorical_features = [0])
34 | X = onehotencoder.fit_transform(X).toarray()
35 | labelencoder_Y = LabelEncoder()
36 | Y =  labelencoder_Y.fit_transform(Y)
37 | print("---------------------")
38 | print("Step 4: Encoding categorical data")
39 | print("X")
40 | print(X)
41 | print("Y")
42 | print(Y)
43 | 
44 | #Step 5: Splitting the datasets into training sets and Test sets
45 | from sklearn.model_selection import train_test_split
46 | X_train, X_test, Y_train, Y_test = train_test_split( X , Y , test_size = 0.2, random_state = 0)
47 | print("---------------------")
48 | print("Step 5: Splitting the datasets into training sets and Test sets")
49 | print("X_train")
50 | print(X_train)
51 | print("X_test")
52 | print(X_test)
53 | print("Y_train")
54 | print(Y_train)
55 | print("Y_test")
56 | print(Y_test)
57 | 
58 | #Step 6: Feature Scaling
59 | from sklearn.preprocessing import StandardScaler
60 | sc_X = StandardScaler()
61 | X_train = sc_X.fit_transform(X_train)
62 | X_test = sc_X.transform(X_test)
63 | print("---------------------")
64 | print("Step 6: Feature Scaling")
65 | print("X_train")
66 | print(X_train)
67 | print("X_test")
68 | print(X_test)
69 | 


--------------------------------------------------------------------------------
/homework-05/lixiaoyu.py:
--------------------------------------------------------------------------------
 1 | import pandas as pd
 2 | import numpy as np
 3 | import matplotlib.pyplot as plt
 4 | 
 5 | dataset=pd.read_csv('C:\Users\李校宇\Desktop\studentscores.csv')
 6 | X = dataset.iloc[ : ,  : 1].values
 7 | Y = dataset.iloc[ : , 1].values
 8 | 
 9 | from sklearn.model_selection import train_test_split
10 | X_train,X_test,Y_train,Y_test = train_test_split(X,Y,test_size =1/4,random_state=0)
11 | 
12 | from sklearn.linear_model import LinearRegression
13 | regressor=LinearRegression()
14 | regressor=regressor.fit(X_train,Y_train)
15 | 
16 | Y_pred=regressor.predict(X_test)
17 | 
18 | plt.scatter(X_train,Y_train,color='red')
19 | plt.plot(X_train,regressor.predict(X_train),color='blue')
20 | plt.show()
21 | 
22 | plt.scatter(X_test,Y_test,color='red')
23 | plt.plot(X_test,regressor.predict(X_test),color='blue')
24 | plt.show()
25 | 


--------------------------------------------------------------------------------
/homework-05/zhangbo.ipynb:
--------------------------------------------------------------------------------
 1 | {
 2 |  "cells": [
 3 |   {
 4 |    "cell_type": "code",
 5 |    "execution_count": 4,
 6 |    "metadata": {},
 7 |    "outputs": [
 8 |     {
 9 |      "data": {
10 |       "image/png": "iVBORw0KGgoAAAANSUhEUgAAAXQAAAD8CAYAAABn919SAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDMuMC4zLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvnQurowAAHxtJREFUeJzt3XmUVOW19/HvrqZBmS4gBBkCqGE55lWSlqh40TjdmPiCiSYvigZNlAzG6I2JQ4ga4sIhGmMuer22ECWmFaJiQFduIqAomtjaDFEZBGUSRGkQIgiC3bXfP+p0dTc0dFXXcE5V/T5rsZrzUHVqi/Dj6ec5Zx9zd0REpPDFwi5ARESyQ4EuIlIkFOgiIkVCgS4iUiQU6CIiRUKBLiJSJBToIiJFQoEuIlIkFOgiIkWiXT4/rGfPnj5o0KB8fqSISMGbP3/+Jnfv1drr8hrogwYNoqamJp8fKSJS8MxsTSqv05KLiEiRUKCLiBQJBbqISJFQoIuIFAkFuohIkVCgi4iko6oKBg2CWCzxtaoq7IqS8nrZoohIQauqgrFjYceOxPGaNYljgNGjw6sroBm6iEiqxo1rDPMGO3YkxiNAgS4ikqq1a9MbzzMFuohIqgYMSG88zxToIiKpmjABOnZsPtaxY2I8AhToIiKpGj0aKith4EAwS3ytrNznhmh9fT1jxlzCjBkz8lKeuXtePgigoqLC1ZxLRErB008/zYgRIwE45phjeOON19t8LjOb7+4Vrb1Oly2KiGTRxx9/zGc+05sdwdUwJ554Ii+9NC8vn60lFxGRLJk4cSKdO3dJhvnChQv4+99fJhbLT9Rqhi4ikqH333+fPn36Jo+/+93vMGnSpLzXoRm6iEgGrrzyymZhvm7du6GEOSjQRUTa5M0338Qsxr333gfAnXf+Gvc4/fr1C60mLbmIiKShvr6edu3Kk8dlZWVs2fIhXbp0CbGqBM3QRURSdO211zYL8yeffIK6uk8jEeagGbqISKu2bt1K9+49mo198slOOnToEFJFLdMMXURkP0444cRmYX7vvRNxj0cuzEGBLiLSoqVLl2IWo7q6OjnmHueKK65I/SR5fhiGAl1EZA9mMY466ujk8dy5z+MeT+8kDQ/DWLMG3BsfhpHDUFegi4gEZs6ciVljLHbo0AH3OKecckr6JwvhYRjaFBWRkufuxGJlzcbWrFnNgEz6nIfwMAzN0EWkpN1yyy3NwnzEiBG4xzMLcwjlYRgKdBGJjjxuIm7fvh2zGDfddHNy7OOPtzNjxp+z8wEhPAxDgS4i0ZDHTUSzGF26dE0e33HH7bjH6bhnAGcizYdhZIMecCEi4amqSmwSrl2bmJXX1+/9moEDYfXqrHxcTU0Nxx8/tNlYPF6PmWXl/LmS6gMuNEMXkXDsOSNvKcwha5uIZrFmYd5wg1DUwzwduspFRMLR0mV9LclwE/GBBx7g+9//QbOxtK8pLxAKdBEJRyoz7ww2EVu6FLG6+hWGDh26j3cUPi25iEg49jXzLivLeBPx/PO/uVeYu8eLOsxBM3QRCcuECYk19KbLLh07ZnQlyLZt2+ja9d+ajdXWbqRnz56ZVFowNEMXkXBk+bI+s1izMB82bBju8ZIJc9AMXUTCNHp0xtdlL1myhKOPPqbZWH19HbFY6c1XS++/WESKhlmsWZjfeusE3OMlGeagGbqIFKBHH32U0aMvajZWrJcipqM0/xkTkYJlFmsW5s89N2fvMM/zgyWiQoEuIgXh8ssvb9arHBKz8i9/+cvNXxjCgyWiIqVAN7P/NLPFZvammT1mZgeY2SFmVm1mK8xsmpm1z3WxIlJ6du7ciVmMSZMmJ8fWr1+37yWWEB4sERWtBrqZ9QN+DFS4+zFAGTAKuAP4rbsPBrYA381loSJSenr27EXHjp2Sx0cddRTucfr27bvvN4XwYImoSHXJpR1woJm1AzoCG4DTgCeCX58CnJv98kSkFK1cuRKzGJs3b06O7d69i8WL32z9zSE8WCIqWg10d18P3AWsJRHk/wLmA1vdvS542TqgX0vvN7OxZlZjZjW1tbXZqVpEipZZjMMO+1zy+Nprf4Z7nPLy8tROEMKDJaIilSWX7sBI4BCgL9AJOLuFl7bYWN3dK929wt0revXqlUmtIlLEqqqqWtz0vOOOO9I7UQgPloiKVK5DPwNY5e61AGY2HTgJ6GZm7YJZen/gvdyVKSLFbM8gf+yxRxk1alTbT5iFO1ALUSqBvhY4wcw6AjuB04Ea4HngfGAqMAaYkasiRaQ4DRnyBRYtWtRsTDcItV0qa+jVJDY/FwBvBO+pBK4DfmJmbwMHAZP3eRIRkSZ27dqFWaxZmL/11jKFeYZSuvXf3W8Gbt5jeCVQ3M2FRSTr9lxeAc3Ks0V3iopIXixbtmyvMN+27SOFeRYp0EUk571PzGIceeRRyeN+/frhHqdz585Z/ZxSp0AXKXU57H0yefLkFi9FXLfu3YzPLXtToIuUuhz1PjGLcdlllyePf/GLcdFZXinSbozqhy5S6rLc++Skky7jH//4OdAT2AREbNOz4TuShn/EGr4jgYK/dl0zdJFSl6XeJ59+WofZLP7xj0nAocBJvPDC3GiFORR1N0YFukipy0LvE7MzaN++HXBmMPL/cP8zw4cPz1qZWVPE3RgV6CKlLoPeJ8uXr8FsAzA7GFnMBx98iPu0nJackSLuxqg1dBFpU+8Ts+8Av08ex2L/Tn39vCwXlgMTJjRfQ4ei6caoGbqIpOWhh2ZiBo1hPo36ei+MMIei7saoGbqIpMzsduD65PGFF95IVdUt4RXUVkXajVEzdBFp1YgRPw1m5Q1hPh53CjPMi5gCXUT2qb4+jtkMnn76ruTY1KnPkujXJ1GjJRcRaZHZvwPzSDywDOBi3B8BzgqvKNkvzdBFpJl16z7A7B0SYQ6witWrNwRhLlGmGbqIJJmNBqqA3sHIKbi/EGJFkg7N0EWE6dOfCzY9G5pUPU1dXVxhXmAU6CIlzuxmzjvvtOTxV75yNe7/l7IyxUOh0f8xkRJ1ySXjg1n5+GDk17jD//7vPZmduEhb0xYCraGLlJh43Ckrm0rTxwQ/8MB0xo69NvOTF3Fr2kJg7p63D6uoqPCampq8fZ6INHfAAaeya9fcJiOX4/5g9j5g0KBEiO9p4EBYvTp7n1NizGy+u1e09jotuYiUgM2b/4XZm03C/AOWLFmV3TCHom5NWwi05CJS5My+CTwO/Fswchbuz+bmwwYMaHmGXgStaQuBZugiUZLFDcXZs6uDTc/Hg5E5fPLJp7kLc8jKwzKk7RToIlHRsKG4Zg24N24otiHUza7lzDO/lDyuqLgU99Pp0KE8mxXvrYhb0xYCbYqKREUWNhSvueYe7r776iYjE3G/MhvVSYi0KSpSaDLcUDR7qFmY3377FIV5iVGgi0RFG5912b//yGCt/NJg5Erc4brrxmSzOikACnSRqEhzQ3Hbth2YVbN+/Yxg5CNee20J7hNzW6dElgJdJCrS2FA0G0HXrh2Bho3Pc3DvSkXFUXktWaJFm6IiBeSVV97gxBMPBToFI39n+/YhdOp0YJhlSY6luimqG4tECoTZVcDvkseDBn2DVaumh1eQRI6WXEQibsKE3webng1hPgl3FOayFwW6SISZ/Te/+MV3ksfXXTcR98tCrEiiTIEuEkHHHXdRMCv/YTDyM9zh9tt1XbnsmwJdJJ9a6dXyySe7MZvLP//5x2CkjjlzXsP9znxXKgVIm6Ii+dLKwx/M/gP4G3Bq8IbzcH8SOD7vpUph0gxdJF/GjWsM8wY7drD4ZzditolEmAMsZPPmj4IwF0mdZugi+dJCTxZjLGx4IHncufMZbNs2O59VSRFJaYZuZt3M7AkzW2ZmS83sRDPrYWazzGxF8LV7rosViYS29ixv0pPlv+mJ4UBDmP8RdxTmkpFUl1x+B/zV3Y8AjgWWAtcDc9x9MDAnOBYpbpn0LA96tRjOFdQmhy879VLcL8ph0VIqWg10M+sKDAcmA7j7bnffCowEpgQvmwKcm6siRSJjH+vgjBvX6ltPmLgW2/Fxk5Fb8D9W8eDzD2W3RilZrfZyMbPjgEpgCYnZ+XzgKmC9u3dr8rot7r7XsouZjQXGAgwYMOCLa1pq4C9SKGKxxMx8T2YQj7f4lrq6esrLy5qNVVW9xIUXnpyLCqUIZfMBF+2ALwD3u/sQ4GPSWF5x90p3r3D3il69eqX6NpFoSrNnudkv9wjzP+OOwlxyIpVAXwesc/fq4PgJEgH/gZn1AQi+bsxNiSIRkmLP8uXL3wvu9PxlcmzNmlrctTIpudNqoLv7+8C7ZnZ4MHQ6ieWXmUDDI1HGADNaeLtIcUmhZ7nZbA4/vG+TN12LOwwYoO9QJbdS6ocerKNPAtoDK0k86yoG/AkYAKwFvunuH+7vPOqHLsWssnIO3/ve6c3G6uudWMxCqkiKRVb7obv7IqClk53ewphIyUksrzT+dTjrrN/wt79dAyjMJX90679IBs4+++4gzBu5E4S5SH4p0EXaIB53zOCvf/1Jcuy++55t8YpGkXxRLxeRNJn9FLirychzuJ8GnBVSRSIJmqGLpGj16g+C5ZXGMF+6dH0Q5iLhU6CLpMDsSQ45pHeTkfG4wxFH9AutJpE9KdBF9mPy5GeDWfl5ybFdu+pwvzm0mkT2RYEuxaOtbW33wWwbl13WuC5+8ME/wB3at9fWk0STAl2KQyZtbfdw5pm/CmblXZJj7rBhw/3Zq1ckBxToUhwyaGvblBnMnn1T8vinP31IlyJKwdD3jlIcWni8237H92D2A6DpDLyGxJ3Wl2ZamUjeaIYuxSHNtrYNNm7cGiyvNIb5vHmLSaFthkjkKNClOKTY1rYps4fo3btbk5G7cYeTTz46NzWK5JgCXYpDCm1tGzz11N+DWXnjcsr27Z/g/pO9XitSSLSGLsVj9OgWA7wps7XAScnj8vJL2L37YeCAnJYmkg+aoUtJuOCC3wSz8sY1dXeCMBcpDgp0KXpmMHVqYzvbSy75L12KKEVJSy5StMy+Dfyhycg7uB8G/DikikRySzN0KTrbtu0Mllcaw/zpp18NwlykeCnQpaiYTaRr1wObjDyIO5xzztDQahLJFwW6FIXnnvtnMCu/Mjm2efM23C8PrSaRfNMauhQ8s9eBY5uMXI77gzRtriVSCjRDl4J15ZWVwaz8/yTH3AnCXKT0KNClIJnBvfeOTR6fc86tuhRRSp4CXQpKjx5jg1l5g824w9NP/zy9E2X5YRgiUaBAl4Kwa9enmMGWLZXJsUceeR73g9I/WRYfhiESJQp0iTyz2zjggPImI1Nxh4su+nLbTpilh2GIRI0CXSJr0aKVwfLKDcmxd9/dhPuozE6c4cMwRKJKgS6RZPYiQ4Yc2mTkatyhf/+emZ+8jQ/DEIk6BbpEyh13PBXMyocnx+rrHfd7svchbXgYhkghUKBLZJjB9dd/PXl8xhm34Q6xmO3nXW2QxsMwRAqJeR4v3q2oqPCampq8fZ4Uhi9+8SYWLPhVk5F63MtCq0ckasxsvqfwoFvN0CU09fVxzGgW5vff/zeFuUgbqZeLhMLsBuC2JiN/wf2rwH+EVJFI4dMMXfLq7bc3BJuejWG+fPl7QZiLSCYU6JI3Zs8weHCfJiPjcIfBg/uGVpNIMVGgl5KQ+pdUVj4bzMrPSY7V1cVx12WCItmkNfRS0dC/pOGW94b+JZDTy/XM6oCzksfHHXcjCxfeguYSItmnv1WlIs/9S84667ZgVt44Z3AnCPM0qCuiSMoU6KUiT/1L4nHHDGbNauy/cuut09vWq1xdEUXSknKgm1mZmS00s2eC40PMrNrMVpjZNDNrn7syJWN56F9idhVlZU3v6nwZd7jhhm+07YTqiiiSlnRm6FcBS5sc3wH81t0HA1uA72azMMmyHPYv2bDhw2B55XfJsQULVuI+LLMTqyuiSFpSCnQz6w98DZgUHBtwGvBE8JIpwLm5KFCyJEf9S8weo2/fHk1GbsedPToltpG6IoqkJdUZ+j3AtUA8OD4I2OrudcHxOqBflmuTbBs9Glavhng88TWDMH/88ZeDWfkFybFPPvkU9+szrbKRuiKKpKXVQDezc4CN7j6/6XALL21x28vMxppZjZnV1NbWtrFMiRKzWr71rcbllIEDf4Y7dOhQvp93tYG6IoqkpdVui2Z2G3AxUAccAHQFniLRdONgd68zsxOBX7r7fhtxqNtiYbvggv9i6tQfNxvLY7NOkZKVtW6L7n6Du/d390HAKOA5dx8NPA+cH7xsDDAjg3ol4sxoFubXXPMHhblIxGRyHfp1wE/M7G0Sa+qTs1OSREl5+Q+CtfIGb+IOd9317bBKEpF9SCvQ3X2uu58T/Hyluw9198+5+zfdfVduSpQwbNmyHTOoq7s/OfbCC4txPybEqkRkf3SnqOzFrJIePTo3GbkXdxg+/OjQahKR1inQJWnWrEXB8srY5NhHH+3E/Ueh1SQiqVO3RQHAbAVwXPK4c+cfsW3bvcCBodUkIunRDL3E/ehHk4NZ+eDkmDtBmItIIVGglzAzuO++xhY83/nO/6R+KaLa2opEjpZcSlDv3lexcePvmoy8i/tnge+ndoKQHpYhIvunGXoJ2blzN2Y0C/MZM14NwjwNamsrEkkK9BJh9hs6dmzasv5h3GHEiKHpn0xtbUUiSYFe5F5/fW2w6XlNcmzz5u24X9L2k6qtrUgkKdCLmNlrHHtsY8j263cr7uxx01AbqK2tSCQp0IvQPfc8G8zKj0+OucO6dT/Pzgeora1IJLXaPjeb1D4392yPTvWXX/4IlZUXh1OMiGRF1trnSmE444yJe4T5dtxRmIuUEAV6gaurq8cM5sy5Mjn21FPzcc9wnVxECo4CvYCZ/ZLy8rImI9Nxh3PP/WJoNYlIeBToBejttz8Illd+mRxbv34L7t8IqyQRiQAFehj21wellR4pZrMZPLh38rhLl1/hDn37ds9dTSJSENTLJd/21wcF9vlrU+oGcsklJwNnJF9aX+/EYjfltiZdiihSMHTZYr4NGpQIzD0NHJj42sKvGc3/H51//oM8/vjl+alp9ersfY6ItEmqly1qhp5vafRB+RZn8Dizmo0l/v3NYpinWZOIRJfW0PNtf31Qgl+Lk5iVNw3zhx9+KfVe5dmsSUQKhgI93/bXB2XCBL4Wu46yZkssc/A/VjFmzMnh1CQiBUNLLvnWsMk4blxiSWPAAJgwgQ/P/gYHHdT8+Z1v9z2cw359U+43JvdRkzZERQqLNkUjoF+/V3jvvROSx6ecMpe5c0/N/MRVVQppkSKgTdECUF39Hiec0BdoDPNPP43Trt2pmZ9clyKKlBytoYekXbv1QZgn3HBDNe7Qrl2W/pfoMXEiJUcz9Dx7+eV1nHxyf6Bfciyx6vWl7H6QLkUUKTmaoefRoYe+HIR5wmuvbdCliCKSNQr0PPjDH5ZgBqtWDQPg4otfxB0qKvrs/eJs9VTRpYgiJUdLLjm0e3c93bqtYOfOowAw28SmTZ3o0WN4y2/I5kamLkUUKTmaoe9LhjPl6657hQ4dyti58wgAJkyYTzzekx49Dtz3m7K9kTl6dKIXSzye+KowFylqmqG3JIOZ8vvvb6dPnw40XIrYrdtCamuPpV27FB46oY1MEcmAZugtaeNMeeTIufTp0xkoB2D69BVs2TIk9UsRtZEpIhlQoLckzZlyTc0GzGDmzFMB+PznX8Advv71wel9rjYyRSQDCvSWpDFTPvLIeRx/fOPVKgsXfsDrr5/Sts8dPRoqKxN9yM0SXysrtfYtIilRoLckhZnytGlvYQbLlv07AOedl5iVH3dcbzKijUwRaSNtirZkP5f81dXF6d59Mdu3fz548cds3Gj06tXGWbmISJZohr4vLcyUx49/jfLyWDLMb7zxVdw70atXx/2eai96ILOI5IBm6CnYtGkHvXrVAccD0KnTYj788Ajatx+a/snUBVFEcqTVGbqZfdbMnjezpWa22MyuCsZ7mNksM1sRfO2e+3Lzb9SoF4MZeFcAqqqWsX370bRvX9a2E6oLoojkSCpLLnXANe5+JIm7Za4ws6OA64E57j4YmBMcR0sGSxsrV27EbCvTpiVu0x88OPFMzwsvPCKzmnTzkIjkSKuB7u4b3H1B8PNtwFISvV9HAlOCl00Bzs1VkW3SsLSxZk2iP23D0kYKoT5y5P9w2GE7gG4AVFdvYPnyLD3TUzcPiUiOpLUpamaDgCFANdDb3TdAIvSBz2S7uIy0YWlj3ry3MKti5szvAzv44Q8fxR2GDm2hK2Jb6eYhEcmRlAPdzDoDTwJXu/tHabxvrJnVmFlNbW1tW2psmzSWNuJx5+ij72T48J7ANzEbT23tZ7nvvguzX5duHhKRHEkp0M2snESYV7n79GD4AzPrE/x6H2BjS+9190p3r3D3il69emWj5tSkuLTxyCMvU1Y2myVLfgYs47e/nUs8fjM9e3bJXW26eUhEciCVq1wMmAwsdfe7m/zSTGBM8PMxwIzsl5eBVpY2tm//hK5db+Hb3x4CfIm+fSewa9eXuPrqs/Jfq4hIFqQyQx8GXAycZmaLgh9fBW4HzjSzFcCZwXF07Gdp46ab/kyXLkvYtu1GYBYzZ77D+vXjaN9el+WLSOEyz9lDLfdWUVHhNTU16b2pqiprT91Zu3YTAwc+DFwN1DJs2J948cUfE4tZm84nIpIPZjbf3Stae120b/3P4NLDPY0aNYmBA/8F/BR4mEWLPuWll65SmItI0Yh2oGfhrspXX30Hs4eZNu0yoJ5LL52C+2Uce6yu+xaR4hLtReMM7qp0d0aMGMkzz/QFJgK3smHDDzn44DGtvVVEpCBFe4bexrsq582bRyxWxjPPPAM8yJ13/g33n3Pwwd2yX6OISEREe4Y+YULzzoSw37sqd+/ezRFHHMmqVasA+NznPseSJYspLy/PR7UiIqGK9gw9jbsqq6qq6NDhgGSYz5v3IitWLFeYi0jJiPYMHRLhvZ/LFLdu3Ur37j2Sx+eeey7Tpz9J4n4oEZHSEe0ZeivGjx/fLMyXL3+Lp56arjAXkZIU/Rl6C1atWsWhhx6WPL7++uu47bbbQqxIRCR8BRXo7s4FF1zItGnTkmObNtVy0EEHhViViEg0FMySS3V1NbFYWTLMf//7ybjHFeYiIoGCmKG/9dZbnHDCiQD07duXlSvfoUOHDiFXJSISLQUxQ+/WrRunnnoqs2fPYv36dQpzEZEWFMQMvXfv3jz//HNhlyEiEmkFMUMXEZHWKdBFRIqEAl1EpEgo0EVEioQCXUSkSCjQRUSKhAJdRKRIKNBFRIqEuXv+PsysFliTxlt6AptyVE5bRbEmiGZdUawJollXFGuCaNYVxZogt3UNdPderb0or4GeLjOrcfeKsOtoKoo1QTTrimJNEM26olgTRLOuKNYE0ahLSy4iIkVCgS4iUiSiHuiVYRfQgijWBNGsK4o1QTTrimJNEM26olgTRKCuSK+hi4hI6qI+QxcRkRRFMtDN7PdmttHM3gy7lgZm9lkze97MlprZYjO7KgI1HWBmr5rZP4OaxoddUwMzKzOzhWb2TNi1NDCz1Wb2hpktMrOasOtpYGbdzOwJM1sW/Pk6MeR6Dg9+jxp+fGRmV4dZUwMz+8/gz/qbZvaYmR0QgZquCupZHPbvUySXXMxsOLAd+IO7HxN2PQBm1gfo4+4LzKwLMB84192XhFiTAZ3cfbuZlQMvAVe5+yth1dTAzH4CVABd3f2csOuBRKADFe4eqWuYzWwKMM/dJ5lZe6Cju28Nuy5I/MMMrAe+5O7p3EOSi1r6kfgzfpS77zSzPwF/cfeHQ6zpGGAqMBTYDfwV+IG7rwijnkjO0N39ReDDsOtoyt03uPuC4OfbgKVAv5BrcnffHhyWBz9C/xfazPoDXwMmhV1L1JlZV2A4MBnA3XdHJcwDpwPvhB3mTbQDDjSzdkBH4L2Q6zkSeMXdd7h7HfAC8PWwiolkoEedmQ0ChgDV4VaSXNpYBGwEZrl76DUB9wDXAvGwC9mDA8+a2XwzGxt2MYFDgVrgoWCJapKZdQq7qCZGAY+FXQSAu68H7gLWAhuAf7n7s+FWxZvAcDM7yMw6Al8FPhtWMQr0NJlZZ+BJ4Gp3/yjsety93t2PA/oDQ4NvAUNjZucAG919fph17MMwd/8CcDZwRbC0F7Z2wBeA+919CPAxcH24JSUEyz8jgMfDrgXAzLoDI4FDgL5AJzO7KMya3H0pcAcwi8Ryyz+BurDqUaCnIVinfhKocvfpYdfTVPBt+lzgKyGXMgwYEaxXTwVOM7M/hltSgru/F3zdCDxFYt0zbOuAdU2+s3qCRMBHwdnAAnf/IOxCAmcAq9y91t0/BaYDJ4VcE+4+2d2/4O7DSSwVh7J+Dgr0lAUbkJOBpe5+d9j1AJhZLzPrFvz8QBJ/4JeFWZO73+Du/d19EIlv159z91BnUQBm1inYzCZY0jiLxLfLoXL394F3zezwYOh0ILSN9j1cQESWWwJrgRPMrGPw9/F0EntZoTKzzwRfBwDfIMTfs3ZhffD+mNljwKlATzNbB9zs7pPDrYphwMXAG8GaNcDP3f0vIdbUB5gSXIkQA/7k7pG5TDBiegNPJXKAdsCj7v7XcEtKuhKoCpY4VgKXhlwPwXrwmcD3wq6lgbtXm9kTwAISyxoLicDdmcCTZnYQ8ClwhbtvCauQSF62KCIi6dOSi4hIkVCgi4gUCQW6iEiRUKCLiBQJBbqISJFQoIuIFAkFuohIkVCgi4gUif8PwolKM54b4zgAAAAASUVORK5CYII=\n",
11 |       "text/plain": [
12 |        "<Figure size 432x288 with 1 Axes>"
13 |       ]
14 |      },
15 |      "metadata": {
16 |       "needs_background": "light"
17 |      },
18 |      "output_type": "display_data"
19 |     }
20 |    ],
21 |    "source": [
22 |     "# Data Preprocessing\n",
23 |     "import pandas as pd\n",
24 |     "import numpy as np\n",
25 |     "import matplotlib.pyplot as plt\n",
26 |     "\n",
27 |     "dataset = pd.read_csv('D:\\datasets\\studentscores.csv')\n",
28 |     "X = dataset.iloc[ : ,   : 1 ].values\n",
29 |     "Y = dataset.iloc[ : , 1 ].values\n",
30 |     "print(\"X\")\n",
31 |     "print(X)\n",
32 |     "print(\"Y\")\n",
33 |     "print(Y)\n",
34 |     "\n",
35 |     "\n",
36 |     "from sklearn.model_selection import train_test_split\n",
37 |     "X_train, X_test, Y_train, Y_test = train_test_split( X, Y, test_size = 1/4, random_state = 0) \n",
38 |     "\n",
39 |     "# Fitting Simple Linear Regression Model to the training set\n",
40 |     "from sklearn.linear_model import LinearRegression\n",
41 |     "regressor = LinearRegression()\n",
42 |     "regressor = regressor.fit(X_train, Y_train)\n",
43 |     "\n",
44 |     "# Predecting the Result\n",
45 |     "Y_pred = regressor.predict(X_test)\n",
46 |     "\n",
47 |     "# Visualising the Training results\n",
48 |     "plt.scatter(X_train , Y_train, color = 'red')\n",
49 |     "plt.plot(X_train , regressor.predict(X_train), color ='blue')\n",
50 |     "\n",
51 |     "# Visualizing the test results\n",
52 |     "plt.scatter(X_test , Y_test, color = 'red')\n",
53 |     "plt.plot(X_test , regressor.predict(X_test), color ='blue')\n",
54 |     "plt.show()"
55 |    ]
56 |   },
57 |   {
58 |    "cell_type": "code",
59 |    "execution_count": null,
60 |    "metadata": {},
61 |    "outputs": [],
62 |    "source": []
63 |   },
64 |   {
65 |    "cell_type": "code",
66 |    "execution_count": null,
67 |    "metadata": {},
68 |    "outputs": [],
69 |    "source": []
70 |   }
71 |  ],
72 |  "metadata": {
73 |   "kernelspec": {
74 |    "display_name": "Python 3",
75 |    "language": "python",
76 |    "name": "python3"
77 |   },
78 |   "language_info": {
79 |    "codemirror_mode": {
80 |     "name": "ipython",
81 |     "version": 3
82 |    },
83 |    "file_extension": ".py",
84 |    "mimetype": "text/x-python",
85 |    "name": "python",
86 |    "nbconvert_exporter": "python",
87 |    "pygments_lexer": "ipython3",
88 |    "version": "3.7.3"
89 |   }
90 |  },
91 |  "nbformat": 4,
92 |  "nbformat_minor": 2
93 | }
94 | 


--------------------------------------------------------------------------------
/homework-06/requirement.md:
--------------------------------------------------------------------------------
1 | ## 完成GitHub上100天钟第二天的代码练习
2 | # 上传代码文件，命名为姓名的拼音.ipynb
3 | 


--------------------------------------------------------------------------------