├── First lesson ├── homework0.ipynb ├── 【姚超】工具下载&作业说明.pdf └── 【王明哲】青年AI自强项目第一讲-AI鸟瞰.pptx ├── Homework-01 ├── daizhen1.png ├── daizhen2.png ├── dongxu1.png ├── dongxu2.png ├── fanshengxu1.png.png ├── linxinhui1.png ├── linxinhui2.png ├── liuyi1.png ├── liuyi2.png ├── lixiaoyu1.png ├── lixiaoyu2.png ├── mashiyao1.png ├── mashiyao2.png ├── niexin_01.png ├── niexin_02.png ├── pengziye.png ├── pengziye1.png.png ├── weizhuoqi1.png ├── weizhuoqi2.png ├── yaoyu1.jpg ├── yaoyu2.jpg ├── yuanmanxue1.png └── yuanmanxue2.png ├── Lesson-02 ├── data.xls ├── homework02.py ├── 【王明哲】机器学习入门公式汇总.pdf └── 【王明哲】青年AI自强计划第二讲——机器学习入门.pdf ├── Lesson-03 ├── 青年自强计划第三章-神经网络.pdf └── 青年自强计划第三章-神经网络.pptx ├── README.md ├── fitbit-analyzer-master.zip ├── homework-02 ├── dongxu.ipynb ├── dongxu1.png ├── dongxu2.png ├── fengshuai.ipynb ├── fengshuai1.png ├── fengshuai2.png ├── fengshuai3.png ├── guanluotong(2)1.png ├── guanluotong(2)2.png ├── linxinhui1.png.png ├── linxinhui2.png.png ├── linxinhui3.png.png ├── lixiaoyu1.png ├── lixiaoyu2.png ├── lixiaoyu3.png ├── mashiyao 1.png ├── mashiyao 2.png ├── mashiyao 3.png ├── niexin_01.png ├── niexin_02.png ├── pengziye.ipynb ├── pengziye3.jpg ├── weizhuoqi1.png ├── weizhuoqi2.png ├── weizhuoqi3.png ├── 刘毅-homework-02.pdf ├── 姚宇-homework-02.pdf ├── 戴振-homework-02.pdf ├── 李校宇-homework-02.pdf ├── 袁曼雪-homework-02.pdf └── 说明.md ├── homework-03 ├── Fitbit_Data_Analysis_1.ipynb ├── MINST │ ├── t10k-images-idx3-ubyte.gz │ ├── t10k-images.idx3-ubyte │ ├── t10k-labels-idx1-ubyte.gz │ ├── t10k-labels.idx1-ubyte │ ├── train-images-idx3-ubyte.gz │ ├── train-labels-idx1-ubyte.gz │ └── train-labels.idx1-ubyte ├── ex3-part1 │ └── tensorflow-MNIST-logistRes.py ├── ex3-part2 │ ├── ann_classification_two_hidden_layers.py │ └── data.xls ├── ex3-part3 │ └── tensorflow-MNIST-nn.py ├── 作业说明.txt └── 马士尧 homework 03.pdf ├── homework-04 ├── requirement.md ├── 余欣灿.ipynb ├── 刘广升.ipynb ├── 史一阳.ipynb ├── 吴洁茹.ipynb ├── 吴玉隆.ipynb ├── 周小梅.ipynb ├── 姚宇.ipynb ├── 张博.ipynb ├── 张晏铭.ipynb ├── 张泷玲.ipynb ├── 戴振.ipynb ├── 房增林.ipynb ├── 曲礼阳.ipynb ├── 机器学习第一课.pptx ├── 李校宇.ipynb ├── 胡明玥.ipynb ├── 苏峥.ipynb ├── 范升旭.ipynb ├── 谢易凡.ipynb ├── 闫泳寰.ipynb ├── 阳治玖.ipynb ├── 陈宝旭.ipynb ├── 陈瑞.ipynb ├── 韩依格.ipynb ├── 马士尧.ipynb ├── 高一淇.ipynb ├── 魏卓其.ipynb └── 黄禹霏.ipynb ├── homework-05 ├── fangzenglin.ipynb ├── lixiaoyu.py └── zhangbo.ipynb └── homework-06 └── requirement.md /First lesson/homework0.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "## 示例脚本" 8 | ] 9 | }, 10 | { 11 | "cell_type": "markdown", 12 | "metadata": {}, 13 | "source": [ 14 | "本脚本目的在于验证工作环境是否安装配置成功,在完成PPT展示的安装教程之后,需要在命令行窗口运行如下命令,使得本脚本工作。" 15 | ] 16 | }, 17 | { 18 | "cell_type": "markdown", 19 | "metadata": {}, 20 | "source": [ 21 | "pip install pygame" 22 | ] 23 | }, 24 | { 25 | "cell_type": "code", 26 | "execution_count": 1, 27 | "metadata": {}, 28 | "outputs": [], 29 | "source": [ 30 | "### 黑客帝国代码雨" 31 | ] 32 | }, 33 | { 34 | "cell_type": "code", 35 | "execution_count": 3, 36 | "metadata": {}, 37 | "outputs": [ 38 | { 39 | "name": "stdout", 40 | "output_type": "stream", 41 | "text": [ 42 | "pygame 1.9.4\n", 43 | "Hello from the pygame community. https://www.pygame.org/contribute.html\n" 44 | ] 45 | }, 46 | { 47 | "ename": "KeyboardInterrupt", 48 | "evalue": "", 49 | "output_type": "error", 50 | "traceback": [ 51 | "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m", 52 | "\u001b[1;31mKeyboardInterrupt\u001b[0m Traceback (most recent call last)", 53 | "\u001b[1;32m\u001b[0m in \u001b[0;36m\u001b[1;34m()\u001b[0m\n\u001b[0;32m 35\u001b[0m \u001b[0mx\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;36m0\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 36\u001b[0m \u001b[1;32mfor\u001b[0m \u001b[0mc\u001b[0m \u001b[1;32min\u001b[0m \u001b[0mtext\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m---> 37\u001b[1;33m \u001b[0mscreen\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mblit\u001b[0m\u001b[1;33m(\u001b[0m \u001b[0mfont\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mrender\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mc\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;32mTrue\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;33m(\u001b[0m\u001b[1;36m0\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;36m255\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;36m0\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;33m(\u001b[0m\u001b[0mx\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0my\u001b[0m\u001b[1;33m)\u001b[0m \u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 38\u001b[0m \u001b[0mx\u001b[0m\u001b[1;33m+=\u001b[0m\u001b[1;36m20\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 39\u001b[0m \u001b[0my\u001b[0m\u001b[1;33m-=\u001b[0m\u001b[0mfont_height\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", 54 | "\u001b[1;31mKeyboardInterrupt\u001b[0m: " 55 | ] 56 | } 57 | ], 58 | "source": [ 59 | "import pygame\n", 60 | "import sys\n", 61 | "from pygame.locals import *\n", 62 | "from sys import exit\n", 63 | "import random\n", 64 | " \n", 65 | "pygame.init()\n", 66 | "SCREEN_SIZE = (960, 640)\n", 67 | "screen = pygame.display.set_mode(SCREEN_SIZE, 0, 32)\n", 68 | " \n", 69 | "font = pygame.font.SysFont(\"arial\", 16);\n", 70 | "font_height = font.get_linesize()\n", 71 | "#event_text = []\n", 72 | "texts=[['0']*80]\n", 73 | " \n", 74 | "while True:\n", 75 | " event= pygame.event.poll()\n", 76 | " i=0\n", 77 | " t=80\n", 78 | " tx=[]\n", 79 | " while i\u001b[0m in \u001b[0;36m\u001b[1;34m()\u001b[0m\n\u001b[0;32m 64\u001b[0m \u001b[0mexit\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 65\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m---> 66\u001b[1;33m \u001b[0mscreen\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mfill\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;36m0\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;36m0\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;36m0\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 67\u001b[0m \u001b[1;32mfor\u001b[0m \u001b[0mi\u001b[0m \u001b[1;32min\u001b[0m \u001b[0mrange\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;36m0\u001b[0m\u001b[1;33m,\u001b[0m\u001b[0mgroup_count\u001b[0m\u001b[1;33m)\u001b[0m \u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 68\u001b[0m \u001b[0mgroup\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0madd\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mWord\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mi\u001b[0m \u001b[1;33m*\u001b[0m \u001b[0mFONT_SIZE\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;33m-\u001b[0m\u001b[0mFONT_SIZE\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", 132 | "\u001b[1;31merror\u001b[0m: display Surface quit" 133 | ] 134 | } 135 | ], 136 | "source": [ 137 | "# imports\n", 138 | "import pygame\n", 139 | "import random\n", 140 | "from pygame.locals import *\n", 141 | "from random import randint\n", 142 | "\n", 143 | "\n", 144 | "# define\n", 145 | "SCREEN_WIDTH = 900\n", 146 | "SCREEN_HEIGHT = 600\n", 147 | "LOW_SPEED = 10\n", 148 | "HIGH_SPEED = 20\n", 149 | "LOW_SIZE = 5\n", 150 | "HIGH_SIZE = 50\n", 151 | "FONT_SIZE = 10\n", 152 | "FONT_NAME = \"myfont.ttf\"\n", 153 | "FREQUENCE = 10\n", 154 | "times = 0\n", 155 | "\n", 156 | "# def func\n", 157 | "def randomcolor() :\n", 158 | " return (randint(0,255),randint(0,255),randint(0,255))\n", 159 | "def randomspeed() :\n", 160 | " return randint(LOW_SPEED,HIGH_SPEED)\n", 161 | "def randomposition() :\n", 162 | " return (randint(0,SCREEN_WIDTH),randint(0,SCREEN_HEIGHT))\n", 163 | "def randomsize() :\n", 164 | " return randint(LOW_SIZE,HIGH_SIZE)\n", 165 | "def randomoname() :\n", 166 | " return randint(0,100000)\n", 167 | "def randomvalue() :\n", 168 | " return randint(0,100) #this is your own display number range\n", 169 | "\n", 170 | "# class of sprite\n", 171 | "class Word(pygame.sprite.Sprite) :\n", 172 | " def __init__(self,bornposition) :\n", 173 | " pygame.sprite.Sprite.__init__(self)\n", 174 | " self.value = chr(random.randint(33, 126))\n", 175 | " self.font = pygame.font.Font(FONT_NAME,FONT_SIZE)\n", 176 | " self.image = self.font.render(str(self.value),True,randomcolor())\n", 177 | " self.speed = randomspeed()\n", 178 | " self.rect = self.image.get_rect()\n", 179 | " self.rect.topleft = bornposition\n", 180 | "\n", 181 | " def update(self) :\n", 182 | " self.rect = self.rect.move(0,self.speed)\n", 183 | " if self.rect.top > SCREEN_HEIGHT :\n", 184 | " self.kill()\n", 185 | "\n", 186 | "# init the available modules\n", 187 | "pygame.init()\n", 188 | "screen = pygame.display.set_mode((SCREEN_WIDTH,SCREEN_HEIGHT))\n", 189 | "pygame.display.set_caption(\"ViatorSun HACKER EMPIRE CodeRain\")\n", 190 | "clock = pygame.time.Clock()\n", 191 | "group = pygame.sprite.Group()\n", 192 | "group_count = int(SCREEN_WIDTH / FONT_SIZE)\n", 193 | "\n", 194 | "# mainloop\n", 195 | "while True :\n", 196 | " time = clock.tick(FREQUENCE)\n", 197 | " for event in pygame.event.get() :\n", 198 | " if event.type == QUIT :\n", 199 | " pygame.quit()\n", 200 | " exit()\n", 201 | "\n", 202 | " screen.fill((0,0,0))\n", 203 | " for i in range(0,group_count) :\n", 204 | " group.add(Word((i * FONT_SIZE,-FONT_SIZE)))\n", 205 | "\n", 206 | " group.update()\n", 207 | " group.draw(screen)\n", 208 | " pygame.display.update()\n" 209 | ] 210 | }, 211 | { 212 | "cell_type": "markdown", 213 | "metadata": {}, 214 | "source": [ 215 | " 本脚本程序来源于CSDN,教学使用,非盈利目的 " 216 | ] 217 | } 218 | ], 219 | "metadata": { 220 | "kernelspec": { 221 | "display_name": "Python 3", 222 | "language": "python", 223 | "name": "python3" 224 | }, 225 | "language_info": { 226 | "codemirror_mode": { 227 | "name": "ipython", 228 | "version": 3 229 | }, 230 | "file_extension": ".py", 231 | "mimetype": "text/x-python", 232 | "name": "python", 233 | "nbconvert_exporter": "python", 234 | "pygments_lexer": "ipython3", 235 | "version": "3.5.6" 236 | } 237 | }, 238 | "nbformat": 4, 239 | "nbformat_minor": 2 240 | } 241 | -------------------------------------------------------------------------------- /First lesson/【姚超】工具下载&作业说明.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/heilongjianguniversity/AI-Team/6710f4383b5c33e1dc466112009fba1c05e8069b/First lesson/【姚超】工具下载&作业说明.pdf -------------------------------------------------------------------------------- /First lesson/【王明哲】青年AI自强项目第一讲-AI鸟瞰.pptx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/heilongjianguniversity/AI-Team/6710f4383b5c33e1dc466112009fba1c05e8069b/First lesson/【王明哲】青年AI自强项目第一讲-AI鸟瞰.pptx -------------------------------------------------------------------------------- /Homework-01/daizhen1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/heilongjianguniversity/AI-Team/6710f4383b5c33e1dc466112009fba1c05e8069b/Homework-01/daizhen1.png -------------------------------------------------------------------------------- /Homework-01/daizhen2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/heilongjianguniversity/AI-Team/6710f4383b5c33e1dc466112009fba1c05e8069b/Homework-01/daizhen2.png -------------------------------------------------------------------------------- /Homework-01/dongxu1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/heilongjianguniversity/AI-Team/6710f4383b5c33e1dc466112009fba1c05e8069b/Homework-01/dongxu1.png -------------------------------------------------------------------------------- /Homework-01/dongxu2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/heilongjianguniversity/AI-Team/6710f4383b5c33e1dc466112009fba1c05e8069b/Homework-01/dongxu2.png -------------------------------------------------------------------------------- /Homework-01/fanshengxu1.png.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/heilongjianguniversity/AI-Team/6710f4383b5c33e1dc466112009fba1c05e8069b/Homework-01/fanshengxu1.png.png -------------------------------------------------------------------------------- /Homework-01/linxinhui1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/heilongjianguniversity/AI-Team/6710f4383b5c33e1dc466112009fba1c05e8069b/Homework-01/linxinhui1.png -------------------------------------------------------------------------------- /Homework-01/linxinhui2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/heilongjianguniversity/AI-Team/6710f4383b5c33e1dc466112009fba1c05e8069b/Homework-01/linxinhui2.png -------------------------------------------------------------------------------- /Homework-01/liuyi1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/heilongjianguniversity/AI-Team/6710f4383b5c33e1dc466112009fba1c05e8069b/Homework-01/liuyi1.png -------------------------------------------------------------------------------- /Homework-01/liuyi2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/heilongjianguniversity/AI-Team/6710f4383b5c33e1dc466112009fba1c05e8069b/Homework-01/liuyi2.png -------------------------------------------------------------------------------- /Homework-01/lixiaoyu1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/heilongjianguniversity/AI-Team/6710f4383b5c33e1dc466112009fba1c05e8069b/Homework-01/lixiaoyu1.png -------------------------------------------------------------------------------- /Homework-01/lixiaoyu2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/heilongjianguniversity/AI-Team/6710f4383b5c33e1dc466112009fba1c05e8069b/Homework-01/lixiaoyu2.png -------------------------------------------------------------------------------- /Homework-01/mashiyao1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/heilongjianguniversity/AI-Team/6710f4383b5c33e1dc466112009fba1c05e8069b/Homework-01/mashiyao1.png -------------------------------------------------------------------------------- /Homework-01/mashiyao2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/heilongjianguniversity/AI-Team/6710f4383b5c33e1dc466112009fba1c05e8069b/Homework-01/mashiyao2.png -------------------------------------------------------------------------------- /Homework-01/niexin_01.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/heilongjianguniversity/AI-Team/6710f4383b5c33e1dc466112009fba1c05e8069b/Homework-01/niexin_01.png -------------------------------------------------------------------------------- /Homework-01/niexin_02.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/heilongjianguniversity/AI-Team/6710f4383b5c33e1dc466112009fba1c05e8069b/Homework-01/niexin_02.png -------------------------------------------------------------------------------- /Homework-01/pengziye.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/heilongjianguniversity/AI-Team/6710f4383b5c33e1dc466112009fba1c05e8069b/Homework-01/pengziye.png -------------------------------------------------------------------------------- /Homework-01/pengziye1.png.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/heilongjianguniversity/AI-Team/6710f4383b5c33e1dc466112009fba1c05e8069b/Homework-01/pengziye1.png.png -------------------------------------------------------------------------------- /Homework-01/weizhuoqi1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/heilongjianguniversity/AI-Team/6710f4383b5c33e1dc466112009fba1c05e8069b/Homework-01/weizhuoqi1.png -------------------------------------------------------------------------------- /Homework-01/weizhuoqi2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/heilongjianguniversity/AI-Team/6710f4383b5c33e1dc466112009fba1c05e8069b/Homework-01/weizhuoqi2.png -------------------------------------------------------------------------------- /Homework-01/yaoyu1.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/heilongjianguniversity/AI-Team/6710f4383b5c33e1dc466112009fba1c05e8069b/Homework-01/yaoyu1.jpg -------------------------------------------------------------------------------- /Homework-01/yaoyu2.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/heilongjianguniversity/AI-Team/6710f4383b5c33e1dc466112009fba1c05e8069b/Homework-01/yaoyu2.jpg -------------------------------------------------------------------------------- /Homework-01/yuanmanxue1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/heilongjianguniversity/AI-Team/6710f4383b5c33e1dc466112009fba1c05e8069b/Homework-01/yuanmanxue1.png -------------------------------------------------------------------------------- /Homework-01/yuanmanxue2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/heilongjianguniversity/AI-Team/6710f4383b5c33e1dc466112009fba1c05e8069b/Homework-01/yuanmanxue2.png -------------------------------------------------------------------------------- /Lesson-02/data.xls: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/heilongjianguniversity/AI-Team/6710f4383b5c33e1dc466112009fba1c05e8069b/Lesson-02/data.xls -------------------------------------------------------------------------------- /Lesson-02/【王明哲】机器学习入门公式汇总.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/heilongjianguniversity/AI-Team/6710f4383b5c33e1dc466112009fba1c05e8069b/Lesson-02/【王明哲】机器学习入门公式汇总.pdf -------------------------------------------------------------------------------- /Lesson-02/【王明哲】青年AI自强计划第二讲——机器学习入门.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/heilongjianguniversity/AI-Team/6710f4383b5c33e1dc466112009fba1c05e8069b/Lesson-02/【王明哲】青年AI自强计划第二讲——机器学习入门.pdf -------------------------------------------------------------------------------- /Lesson-03/青年自强计划第三章-神经网络.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/heilongjianguniversity/AI-Team/6710f4383b5c33e1dc466112009fba1c05e8069b/Lesson-03/青年自强计划第三章-神经网络.pdf -------------------------------------------------------------------------------- /Lesson-03/青年自强计划第三章-神经网络.pptx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/heilongjianguniversity/AI-Team/6710f4383b5c33e1dc466112009fba1c05e8069b/Lesson-03/青年自强计划第三章-神经网络.pptx -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # AI-Machine-learning-via-HeilongjiangUniversity 2 | ## http://www.xuetangx.com/courses/course-v1:Tsinghua+20181011X+2018_T2/about 希望大家关注此课程! 3 | -------------------------------------------------------------------------------- /fitbit-analyzer-master.zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/heilongjianguniversity/AI-Team/6710f4383b5c33e1dc466112009fba1c05e8069b/fitbit-analyzer-master.zip -------------------------------------------------------------------------------- /homework-02/dongxu1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/heilongjianguniversity/AI-Team/6710f4383b5c33e1dc466112009fba1c05e8069b/homework-02/dongxu1.png -------------------------------------------------------------------------------- /homework-02/dongxu2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/heilongjianguniversity/AI-Team/6710f4383b5c33e1dc466112009fba1c05e8069b/homework-02/dongxu2.png -------------------------------------------------------------------------------- /homework-02/fengshuai1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/heilongjianguniversity/AI-Team/6710f4383b5c33e1dc466112009fba1c05e8069b/homework-02/fengshuai1.png -------------------------------------------------------------------------------- /homework-02/fengshuai2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/heilongjianguniversity/AI-Team/6710f4383b5c33e1dc466112009fba1c05e8069b/homework-02/fengshuai2.png -------------------------------------------------------------------------------- /homework-02/fengshuai3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/heilongjianguniversity/AI-Team/6710f4383b5c33e1dc466112009fba1c05e8069b/homework-02/fengshuai3.png -------------------------------------------------------------------------------- /homework-02/guanluotong(2)1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/heilongjianguniversity/AI-Team/6710f4383b5c33e1dc466112009fba1c05e8069b/homework-02/guanluotong(2)1.png -------------------------------------------------------------------------------- /homework-02/guanluotong(2)2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/heilongjianguniversity/AI-Team/6710f4383b5c33e1dc466112009fba1c05e8069b/homework-02/guanluotong(2)2.png -------------------------------------------------------------------------------- /homework-02/linxinhui1.png.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/heilongjianguniversity/AI-Team/6710f4383b5c33e1dc466112009fba1c05e8069b/homework-02/linxinhui1.png.png -------------------------------------------------------------------------------- /homework-02/linxinhui2.png.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/heilongjianguniversity/AI-Team/6710f4383b5c33e1dc466112009fba1c05e8069b/homework-02/linxinhui2.png.png -------------------------------------------------------------------------------- /homework-02/linxinhui3.png.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/heilongjianguniversity/AI-Team/6710f4383b5c33e1dc466112009fba1c05e8069b/homework-02/linxinhui3.png.png -------------------------------------------------------------------------------- /homework-02/lixiaoyu1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/heilongjianguniversity/AI-Team/6710f4383b5c33e1dc466112009fba1c05e8069b/homework-02/lixiaoyu1.png -------------------------------------------------------------------------------- /homework-02/lixiaoyu2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/heilongjianguniversity/AI-Team/6710f4383b5c33e1dc466112009fba1c05e8069b/homework-02/lixiaoyu2.png -------------------------------------------------------------------------------- /homework-02/lixiaoyu3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/heilongjianguniversity/AI-Team/6710f4383b5c33e1dc466112009fba1c05e8069b/homework-02/lixiaoyu3.png -------------------------------------------------------------------------------- /homework-02/mashiyao 1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/heilongjianguniversity/AI-Team/6710f4383b5c33e1dc466112009fba1c05e8069b/homework-02/mashiyao 1.png -------------------------------------------------------------------------------- /homework-02/mashiyao 2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/heilongjianguniversity/AI-Team/6710f4383b5c33e1dc466112009fba1c05e8069b/homework-02/mashiyao 2.png -------------------------------------------------------------------------------- /homework-02/mashiyao 3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/heilongjianguniversity/AI-Team/6710f4383b5c33e1dc466112009fba1c05e8069b/homework-02/mashiyao 3.png -------------------------------------------------------------------------------- /homework-02/niexin_01.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/heilongjianguniversity/AI-Team/6710f4383b5c33e1dc466112009fba1c05e8069b/homework-02/niexin_01.png -------------------------------------------------------------------------------- /homework-02/niexin_02.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/heilongjianguniversity/AI-Team/6710f4383b5c33e1dc466112009fba1c05e8069b/homework-02/niexin_02.png -------------------------------------------------------------------------------- /homework-02/pengziye3.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/heilongjianguniversity/AI-Team/6710f4383b5c33e1dc466112009fba1c05e8069b/homework-02/pengziye3.jpg -------------------------------------------------------------------------------- /homework-02/weizhuoqi1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/heilongjianguniversity/AI-Team/6710f4383b5c33e1dc466112009fba1c05e8069b/homework-02/weizhuoqi1.png -------------------------------------------------------------------------------- /homework-02/weizhuoqi2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/heilongjianguniversity/AI-Team/6710f4383b5c33e1dc466112009fba1c05e8069b/homework-02/weizhuoqi2.png -------------------------------------------------------------------------------- /homework-02/weizhuoqi3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/heilongjianguniversity/AI-Team/6710f4383b5c33e1dc466112009fba1c05e8069b/homework-02/weizhuoqi3.png -------------------------------------------------------------------------------- /homework-02/刘毅-homework-02.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/heilongjianguniversity/AI-Team/6710f4383b5c33e1dc466112009fba1c05e8069b/homework-02/刘毅-homework-02.pdf -------------------------------------------------------------------------------- /homework-02/姚宇-homework-02.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/heilongjianguniversity/AI-Team/6710f4383b5c33e1dc466112009fba1c05e8069b/homework-02/姚宇-homework-02.pdf -------------------------------------------------------------------------------- /homework-02/戴振-homework-02.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/heilongjianguniversity/AI-Team/6710f4383b5c33e1dc466112009fba1c05e8069b/homework-02/戴振-homework-02.pdf -------------------------------------------------------------------------------- /homework-02/李校宇-homework-02.pdf: -------------------------------------------------------------------------------- 1 | # -*- coding: UTF-8 -*- 2 | #numpy库的使用,请参考:https://www.jianshu.com/p/a260a8c43e44 3 | #matplotlib库请参考:https://matplotlib.org/api/index.html 4 | 5 | import xlrd # 导入必备的 xlrd 库,目的是为了调用 xlrd.open_workbook 函数打开 excel 文件,读取数据 6 | import matplotlib.pyplot as plt # 导入必备的 matplotlib 库,并且将其中的 matplotlib.pyplot 重名为 plt, 目的是为了后续的绘图需要,也是为了编程方便,简写为 plt 7 | import numpy as np # 导入必备的 numpy 库,并且将其重名为 np, 目的是为了后续的矩阵的定义、运算、操作等,也是为了编程方便,简写为 np 8 | 9 | 10 | # 定义函数loadData函数,输入参数是 filename 指代文件名,返回数据data,目的是从.xls文件中加载数据,并存储为numpy中的array格式 11 | def loadData(filename): #filename 其实就是个形参 12 | workbook = xlrd.open_workbook(filename) # 通过调用 xlrd.open_workbook 函数打开 excel 文件,读取数据,并返回给 workbook 变量 13 | boyinfo = workbook.sheet_by_index(0) # 通过使用属性 sheet_by_index 得到 excel 文件 中的工作簿,其中 sheet_by_index(0) 表示是第一个工作簿,在 python 中,下标从 0 开始 14 | col_num = boyinfo.ncols # 通过使用属性 ncols 得到 excel 文件 中第一个工作簿的 列数,并赋值给 col_num 15 | row_num = boyinfo.nrows # 通过使用属性 nrows 得到 excel 文件 中第一个工作簿的 行数,并赋值给 row_num 16 | col0 = boyinfo.col_values(0)[1:] # 通过使用属性 col_values(0)[1:] 得到 excel 文件 中第一列数据中,从第2行到最后一行的所有数据,并赋值给 col0 17 | data = np.array(col0) # 通过使用 np.array 函数, 将 col0 转换成数组,并赋值给 data 18 | if col_num == 1: # 条件判断语句: 如果列数 col_num 为1, 只有一列,那么直接返回数据 data 19 | return data # 返回data 20 | else: # 否则,如果不止一列数据,需要遍历所有列的数据 21 | for i in range(col_num-1): # 通过使用for循环达到遍历的目的 22 | coltemp = boyinfo.col_values(i+1)[1:] # 从第二行开始,表头不算,遍历从 第二列 开始到最后一列的数据 23 | data = np.c_[data, coltemp] # 通过使用 np.c_ 函数将 第一列的数据 和后面 所有列的数据组合起来,并赋值给 data 24 | return data # 返回data 25 | 26 | # 定义一个 plotData 函数,输入参数是 数据 X 和标志 flag: y,返回作图操作 plt, p1, p2 , 目的是为了画图 27 | def plotData(X, y): 28 | pos = np.where(y==1) # 通过使用 np.where 函数查找所有满足条件的数据,查找所有满足标志 y == 1 的数据,并赋值给 pos 29 | neg = np.where(y==0) # 通过使用 np.where 函数查找所有满足条件的数据,查找所有满足标志 y == 0 的数据,并赋值给 neg 30 | # 通过使用 plt.plot 函数作图,对所有满足标志 y == 1 的数据作图,点采用 s (正方形),代表 square, 点的大小为 7 单位,颜色为 红色 red 31 | p1 = plt.plot(X[pos, 0], X[pos, 1], marker='s', markersize=7, color='red')[0] 32 | # 请补全 通过使用 plt.plot 函数作图,对所有满足标志 y == 0 的数据作图,点采用 o (圆形),代表 circle, 点的大小为 7 单位,颜色为 绿色 green 33 | p2 = plt.plot(X[neg,0],X[neg,1],marker='o',markersize=7,color='green')[0] #请补全 34 | 35 | return p1, p2 # 返回作图操作plt, p1, p2 36 | 37 | # normalization: 定义一个 normalization 函数,输入参数是原始数据 X ,返回归一化后的数据 X_norm , 目的是为了数据预处理,得到归一化后的数据 X_norm 38 | def normalization(X): 39 | Xmin =np.min(X,axis=0) # 请补全 通过使用 np.min 函数,计算原始数据沿着 axis=0 方向的最小值,即:求每一列的最小值,并赋值给 Xmin 40 | Xmax = np.max(X,axis=0) # 请补全 通过使用 np.max 函数,计算原始数据沿着 axis=0 方向的最大值,即:求每一列的最大值,并赋值给 Xmax 41 | Xmu = np.mean ( X,axis=0) # 请补全 通过使用 np.mean 函数,计算原始数据均值,并赋值给 Xmu 42 | X_norm = (X-Xmu)/(Xmax-Xmin) # 请补全 计算归一化后的数据,归一化公式为:(X-Xmu)/(Xmax-Xmin),归一化后数据范围为 [-1,1] 43 | return X_norm # 返回数据预处理,归一化后的数据 X_norm 44 | 45 | # plot decision boundary:定义一个 plotDecisionBoundaryn 函数,输入参数是 训练集 trainX, 训练集 trainY, 直线斜率截距相关参数 w, 迭代次数 iter_num ,目的是为了画出决策的判断边界 46 | def plotDecisionBoundary(trainX, trainY, w, iter_num = 0): 47 | # prepare data 48 | xcord1 = [];ycord1 = [];xcord2 = [];ycord2 = [] # 准备数据,定义四个空的列表,并分别赋值给 xcord1、ycord1、xcord2、ycord2,进行初始化 49 | m, n = np.shape(trainX) # 通过使用 np.shape 函数,得到训练集 trainX 的形状大小,其中,m 为训练集 trainX 的行数,n 为训练集 trainX 的列数 50 | for i in range(m): # 通过使用 for 循环语句,遍历训练集 trainX 所有的行,其中,i 可以取得值分别是 0,1,2,...,m-1,总共是 m 行 51 | if trainY[i] == 1: # 通过使用 if 条件判断语句,如果训练集 trainY(标志)中的元素为 1,那么将训练集 trainX中的 trainX[i,1] 和 trainX[i,2] 分别添加到 xcord1 和 ycord1 列表中 52 | xcord1.append(trainX[i,1]) # 通过 append 的方法,将训练集 trainX中 的 trainX[i,1] 添加到 xcord1 列表中,保存的是 pos 的横坐标, 代表 positive 的数据 53 | ycord1.append(trainX[i,2]) # 通过 append 的方法,将训练集 trainX中 的 trainX[i,2] 添加到 ycord1 列表中,保存的是 pos 的纵坐标, 代表 positive 的数据 54 | else: # 否则,如果训练集 trainY(标志)中的元素不为 1,那么将训练集 trainX中的 trainX[i,1] 和 trainX[i,2] 分别添加到 xcord2 和 ycord2 列表中 55 | xcord2.append(trainX[i,1]) # 通过 append 的方法,将训练集 trainX中 的 trainX[i,1] 添加到 xcord2 列表中,保存的是 neg 的横坐标, 代表 negative 的数据 56 | ycord2.append(trainX[i,2]) # 通过 append 的方法,将训练集 trainX中 的 trainX[i,2] 添加到 ycord2 列表中,保存的是 neg 的纵坐标, 代表 negative 的数据 57 | x_min = min(trainX[:,1]) # 通过使用 min 函数,计算出 trainX[:,1] ,即 trainX 第2列的最小值,并赋值给 x_min 58 | y_min = min(trainX[:,2]) # 通过使用 min 函数,计算出 trainX[:,2] ,即 trainX 第3列的最小值,并赋值给 y_min 59 | x_max = max(trainX[:,1]) # 通过使用 max 函数,计算出 trainX[:,1] ,即 trainX 第2列的最大值,并赋值给 x_max 60 | y_max = max(trainX[:,2]) # 通过使用 max 函数,计算出 trainX[:,2] ,即 trainX 第3列的最大值,并赋值给 y_max 61 | 62 | # plot scatter & legend 63 | fig = plt.figure(1) # 通过使用 plt.figure 函数,开始创建一个图形窗口,并赋值给 fig 64 | # 通过使用 plt.scatter 函数,绘制散点图,横坐标为 xcord1, 纵坐标为 ycord1,标记大小为30,颜色为红色,形状样式为 s (正方形),代表 square, 图例标签为 'I like you' 65 | plt.scatter(xcord1, ycord1, s=30, c='red', marker='s', label='I like you') 66 | # 请补全 通过使用 plt.scatter 函数,绘制散点图,横坐标为 xcord2, 纵坐标为 ycord2,标记大小为30,颜色为绿色,形状样式为 o (圆形),代表 circle, 图例标签为 'I don't like you' 67 | plt.scatter(xcord2, ycord2, s=30, c='green', marker='o', label='I dont like you') 68 | #请补全 69 | 70 | plt.legend(loc='upper right') # 设置图例的位置为右上角 71 | 72 | # set axis and ticks 73 | delta_x = x_max-x_min # 计算横坐标的极差为横坐标最大值与最小值的差,并赋值给 delta_x 74 | delta_y = y_max-y_min # 计算纵坐标的极差为纵坐标最大值与最小值的差,并赋值给 delta_y 75 | # 设置横坐标的刻度:从 x_min - delta_x / 10 到 x_max + delta_x / 10,使用 np.arange 函数创建数组,步长为 1,并赋值给 my_x_ticks 76 | my_x_ticks = np.arange(x_min - delta_x / 10, x_max + delta_x / 10, 1) 77 | # 设置纵坐标的刻度:从 y_min - delta_y / 10 到 y_max + delta_y / 10,使用 np.arange 函数创建数组,步长为 1,并赋值给 my_y_ticks 78 | my_y_ticks = np.arange(y_min - delta_y / 10, y_max + delta_y / 10, 1) 79 | 80 | plt.xticks(my_x_ticks) # 通过使用 plt.xticks 函数,设置作图的横坐标的刻度为 my_x_ticks 81 | plt.yticks(my_y_ticks) # 通过使用 plt.yticks 函数,设置作图的纵坐标的刻度为 my_y_ticks 82 | # 通过使用 plt.axis 函数,设置作图的横坐标和纵坐标的显示范围,分别是[x_min-delta_x/10, x_max+delta_x/10] 和 [y_min-delta_y/10, y_max+delta_y/10] 83 | plt.axis([x_min-delta_x/10, x_max+delta_x/10, y_min-delta_y/10, y_max+delta_y/10]) 84 | 85 | # drwa a line:绘制一条直线,用于决策判断 86 | x = np.arange(x_min-delta_x/10, x_max+delta_x/10, 0.01) # 通过使用 np.arange 函数创建数组, 从 x_min - delta_x / 10 到 x_max + delta_x / 10,步长为 0.01,并赋值给 x 87 | y = (-w[0]-w[1]*x)/w[2] #请补全 # 通过公式计算得到直线的纵坐标: y = (-w[0]-w[1]*x)/w[2] 88 | plt.plot(x, y.T) # 通过使用 plt.plot 函数绘制图象,其中,横坐标是 x , 纵坐标是 y.T, “.T” 表示的是矩阵的转置,因为绘图时需要横纵坐标的维度一致 89 | 90 | # figure name:设置图像的文件名和标题名 91 | # 设置图像的文件名为 'Training ' + str(iter_num) + ' times.png',其中,str(iter_num) 表示将迭代次数 iter_num 转变成字符串,图片格式为 “png” 92 | fig_name = 'Training ' + str(iter_num) + ' times.png' 93 | # 设置图像的标题名为'Training ' + str(iter_num) + ' times.png',其中,str(iter_num) 表示将迭代次数 iter_num 转变成字符串,图片格式为 “png” 94 | plt.title(fig_name) 95 | fig.savefig(fig_name) # 通过使用 fig.savefig 函数,保存图片,分辨率等参数采取默认值 96 | plt.show(fig) # 通过使用 plt.show 函数,显示绘制好的图片,注意的是必须关闭图像窗口,才可以进入执行后续的程序 97 | 98 | # sigmoid: 定义一个 激活(激励)函数 sigmoid 函数 (activation function),输入参数是 wx, 返回的是 sigmoid 函数值 99 | def sigmoid(wx): 100 | sigmoidV = 1.0/(1.0+np.exp(-wx)) # 请补全 计算激活函数 sigmoid 函数 的函数值,计算公式为:1.0/(1.0+np.exp(-wx)) 101 | return sigmoidV 102 | 103 | # loss fuc Y_ 预测值 Y 真值 104 | def loss(X, Y, w): # 定义一个 损失函数 loss 函数 (loss function),输入参数是 X, Y, w, 返回的是 损失函数的值 105 | m, n = np.shape(X) # 通过使用 np.shape 函数,得到数据集 X 的形状大小,其中,m 为数据集 X 的行数,n 为数据集 X 的列数 106 | trainMat = np.mat(X) # 通过使用 np.mat 函数,将数据集 X 转变成矩阵类型,并赋值给 trainMat 107 | Y_ = [] # 准备数据,定义一个空的列表,并赋值给 Y_,进行初始化, 后续会通过 append 的方法向空列表内不断添加新的元素 108 | for i in np.arange(m): # 通过 for 循环结构,遍历数据集 X 所有的行,其中,i 可取的数依次为:0,1 ,2,....,m-1, 数据集 X总共有 m 行 109 | # 通过 append 的方法向空列表 Y_ 内不断添加新的元素,新元素是通过 训练的矩阵数据集 trainMat[i] 乘以权重 w 之后,再计算激活函数 sigmoid 的函数值 110 | Y_.append(sigmoid(trainMat[i]*w)) 111 | m = np.shape(Y_)[0] # 通过使用 np.shape 函数,得到数据集 X 的形状大小,其中,np.shape(Y_)[0] 为数据集 X 的行数,并赋值给 m 112 | sum_err = 0.0 # 初始化误差的总和为 0.0, 赋值给 sum_err, 后续会不断更新 误差的总和 sum_err 的数值 113 | for i in range(m): # 通过 for 循环结构,遍历数据集 Y_ 所有的行,其中,i 可取的数依次为:0,1 ,2,....,m-1, 数据集 Y_ 总共有 m 行 114 | # 请补全 更新误差的总和 sum_err 的数值, 每次 误差的总和 sum_err 递减 Y[i]*np.log(Y_[i])+(1-Y[i])*np.log(1-Y_[i]),这是 交叉熵损失函数( Cross Entropy Loss )的计算公式 115 | sum_err -= Y[i]*np.log(Y_[i])+(1-Y[i])*np.log(1-Y_[i])#请补全 116 | return sum_err/m # 返回 sum_err 117 | 118 | # BGD 批量梯度下降法求最优参数 119 | # 定义一个BGD 函数,即:批量梯度下降法(Batch Gradient Descent,BGD),输入参数是 数据集 X 和 y, 120 | # 迭代次数 iter_num, 学习率 alpha,又写作 lr (learning rate), 它表示每次向着J最陡峭的方向迈步的大小, 返回的是 权重 w 121 | # 通过批量梯度下降法(Batch Gradient Descent,BGD),不断更新权重 W 122 | def BGD(X, y, iter_num, alpha): 123 | trainMat = np.mat(X) # 通过使用 np.mat 函数,将数据集 X 转换成矩阵类型,并赋值给 trainMat 124 | trainY = np.mat(y).T # 通过使用 np.mat 函数,将数据集 y 转换成矩阵类型,并且转置,然后赋值给 trainY 125 | m, n = np.shape(X) # 通过使用 np.shape 函数,得到数据集 X 的形状大小,其中,m 为数据集 X 的行数,n 为数据集 X 的列数 126 | w = np.ones((n,1))# 通过使用 np.ones 函数,创建元素全为 1 的矩阵,矩阵的大小为 n 行 1 列,并赋值给 w, 即:进行权重 w 的初始化,令其全为 1 127 | for i in range(iter_num): # 通过 for 循环结构,开始进行迭代,其中,i 可取的数依次为:0,1 ,2,....,iter_num-1, 迭代次数总共有 iter_num 次 128 | error = sigmoid(trainMat*w)-trainY #请补全 # 计算迭代的误差 error:将预测得到的激活函数的数值 sigmoid(trainMat*w) 减去 实际的 trainY 数值 129 | w = w - (1.0/m)*alpha*trainMat.T*error #请补全 # 更新权重 w , BGD 批量梯度下降法 的核心, w = w - (1.0/m)*alpha*trainMat.T*error 130 | return w # 返回 w 131 | 132 | # classify:定义一个 classify 函数,输入参数是 wx, 返回的是标志 1 或者 0 133 | def classify(wx): 134 | prob = sigmoid(wx) # 计算概率:将激活函数 sigmoid(wx) 的数值作为预测的概率,并赋值给 prob 135 | if prob > 0.5: # 如果 概率 prob 大于 0.5, 那么返回数值 1 136 | return 1 137 | else: # 否则,如果 概率 prob 不大于 0.5, 那么返回数值 0 138 | return 0 139 | 140 | # predict:定义一个 predict 函数,输入参数是 测试集 testX 和权重 w, 返回的是预测的结果 result 141 | def predict(testX, w): 142 | m, n = np.shape(testX) # 通过使用 np.shape 函数,得到测试集 testX 的形状大小,其中,m 为测试集 testX 的行数,n 为测试集 testX 的列数 143 | testMat = np.mat(testX) # 通过使用 np.mat 函数,将测试集 testX 转换成矩阵类型,并赋值给 testMat 144 | result = [] # 准备数据,定义一个空的列表,并赋值给结果 result,进行初始化, 后续会通过 append 的方法向空列表内不断添加新的元素 145 | for i in np.arange(m): # 通过 for 循环结构,遍历测试集 testX 所有的行,其中,i 可取的数依次为:0,1 ,2,....,m-1, 测试集 testX 总共有 m 行 146 | # 通过 append 的方法向空列表 result 内不断添加新的元素,新元素是通过调用 classify 函数进行预测得到,将返回的浮点型的 1 或者 0 添加到 空列表 result 内 147 | result.append(classify(float(testMat[i]*w))) 148 | return result # 返回预测结果result 149 | 150 | # Precision:定义一个 Precision 函数,输入参数是数据集 X, Y 和权重 w, 返回的是 测试集的正确率 151 | def Precision(X, Y, w): 152 | result = predict(X, w) # 通过调用 predict 函数,输入测试集 X 和权重 w, 计算得到预测结果,并把返回的结果赋值给 result 153 | right_sum = 0 # 进行初始化预测正确的数目,赋值 0 给 right_sum,后续如果预测正确,会不断增加 1 154 | # 通过 for 循环结构,开始进行遍历,其中,i 可取的数依次为:0,1 ,2,....,len(result)-1, 预测结果 result 内元素的个数总和为 len(result) 155 | for i in range(len(result)): 156 | if result[i]-int(Y[i]) == 0: # 通过条件判断语句 if, 如果结果 result 的元素与 int(Y[i])相等,即:预测正确! 那么更新预测正确的数目 right_sum 157 | right_sum += 1 # 如果预测正确! 那么更新预测正确的数目 right_sum,每次递增加 1 158 | # 最后返回测试集预测的正确率,计算公式为:1.0*right_sum/len(Y),注意:乘以 1.0 的原因是把正确率变成浮点型,当然也可以直接用 float 强制转换 159 | return 1.0*right_sum/len(Y) 160 | 161 | # python 主程序,当本文件被执行的时候,运行下列语句: 162 | if __name__ == "__main__": 163 | 164 | # load data and visualization,加载数据并可视化 165 | data = loadData('data.xls') # 通过调用 loadData 函数,导入原始数据集 文件 'data.xls',并赋值给 data 166 | X = data[:,:2]# 将数据集 data 的 第一列 和 第二列 的所有行的数据,赋值给 X, 实际对应的是 身高(m)、 月薪(元)的原始数据 167 | y = data[:,2] # 将数据集 data 的 第三列 所有行的数据,赋值给 y,实际对应的是 是否有兴趣尝试交往(Y=1/N=0)的原始数据,可取 0 或 1 168 | 169 | # plot data 170 | plt_data = plt.figure(1) 171 | p1, p2 = plotData(X, y) # 通过调用 plotData 函数,输入参数为 数据集 X 和 y, 绘制图像 172 | 173 | #Labels and Legend 174 | plt.xlabel('tall') # 通过调用 plt.xlabel 函数,设置图像的横坐标名称为'tall',意思是: 身高(m) 175 | plt.ylabel('salary') # 通过调用 plt.ylabel 函数,设置图像的纵坐标名称为'salary',意思是: 月薪(元) 176 | # 通过调用 plt.legend 函数,设置图像的图例分别为 'I like you' 和 "I don't like you" 177 | # 设置 为线条图图例条目创建的标记点数 numpoints 为 1,图例句柄的长度 handlelength 为0,即:只用散点图形表示图例,没有图例句柄的长度的横线 178 | plt.legend((p1, p2), ('I like you', "I don't like you"), numpoints=1, handlelength=0) 179 | 180 | # show and save visualized image 181 | plt_data.savefig('visualization_org.jpg') # 通过调用 plt.savefig 函数,保存图像,并且图像的文件名为:'visualization_org.jpg',其中,图片的格式为 'jpg' 182 | plt.show(plt_data) # 通过调用 plt.show 函数,显示图像 183 | plt.close(plt_data) # 通过调用 plt.close 来关闭窗口 184 | 185 | # normalization and visualization:通过调用 normalization 函数,对原始数据集 X 进行归一化 186 | X_norm = normalization(X) 187 | # plot data 188 | plt_norm = plt.figure(1) 189 | # 通过调用 plotData 函数,进行绘图,输入参数是 归一化后的 X_norm 和标签数据 y,返回的是 plt_norm, p1_norm 和 p2_norm 190 | p1_norm, p2_norm = plotData(X_norm, y) 191 | 192 | # Labels and Legend 193 | plt.xlabel('tall') # 通过调用 plt.xlabel 函数,设置图像的横坐标名称为'tall',意思是: 身高(m) 194 | plt.ylabel('salary') # 通过调用 plt.ylabel 函数,设置图像的纵坐标名称为'salary',意思是: 月薪(元) 195 | # 通过调用 plt.legend 函数,设置图像的图例分别为 'I like you' 和 "I don't like you" 196 | # 设置 为线条图图例条目创建的标记点数 numpoints 为 1,图例句柄的长度 handlelength 为0,即:只用散点图形表示图例,没有图例句柄的长度的横线 197 | plt.legend((p1_norm, p2_norm), ('I like you', "I don't like you"), numpoints=1, handlelength=0) 198 | 199 | # show and save visualized image 200 | # 通过调用 plt.show 函数,显示图像 201 | plt.show(plt_data)#请补全 202 | # 通过调用 plt.savefig 函数,保存图像,并且图像的文件名为:'visualization_norm.jpg',其中,图片的格式为 'jpg' 203 | plt.savefig('visualization_norm.jpg')#请补全 204 | # 通过调用 plt.close 函数,关闭窗口 205 | plt.close(plt_data)#请补全 206 | 207 | # optimizing by BSD 208 | iter_num=200 # 进行初始化迭代的次数 iter_num,赋值 200 给 iter_num 209 | lr=0.05 # 进行初始化学习率 lr,赋值 0.001 给 lr 210 | m,n = np.shape(data) # 通过使用 np.shape 函数,得到数据集 data 的形状大小,其中,m 为数据集 data 的行数,n 为数据集 data 的列数 211 | offset = np.ones((m, 1)) # 通过使用 np.ones 函数,创建元素全为 1 的矩阵,矩阵的大小为 m 行 1 列,并赋值给 offset, 即:进行 offset 的初始化,令其全为 1 212 | trainMat = np.c_[offset, X_norm] # 通过使用 np.c_ 函数将 offset 和 归一化后的 X_norm 数据集组合起来,并赋值给 trainMat 213 | theta=BGD(trainMat,y,iter_num,lr) # 通过调用 BGD 函数,即:批量梯度下降法(Batch Gradient Descent,BGD),返回最优化后的权重, 并赋值给 theta 214 | 215 | ## Plot Boundary # 通过调用 plotDecisionBoundary 函数,绘制分类决策的直线,其中,输入参数分别是:训练集 trainMat, 标签 y, 最优化后的权重 theta 和 迭代次数 iter_num 216 | plotDecisionBoundary(trainMat, y, theta, iter_num) 217 | cost = loss(trainMat, y, theta) # 通过调用 loss 函数,计算出本模型算法的损失函数,其中, 输入参数分别是: 训练集 trainMat, 标签 y 和 最优化后的权重 theta, 并赋值给 cost 218 | print('Cost theta: {0}'.format(cost))# 在屏幕上输出 损失函数的数值,其中,.format(cost) 的格式是更加规范的输出格式,当然也可以用转义字符 %s 219 | 220 | # Compute accuracy on our training set 221 | p = Precision(trainMat, y, theta) # 通过调用 Precision 函数,计算出预测 测试集结果的正确率,其中,输入参数分别是: 训练集 trainMat, 标签 y 和 最优化后的权重 theta, 并赋值给 p 222 | print('Train Accuracy: {0}'.format(p)) # 在屏幕上输出 测试集正确率的数值,其中,.format(p) 的格式是更加规范的输出格式,当然也可以用转义字符 %s 223 | print('finished!') # 在屏幕上输出完成的信息,'finished!' 224 | 225 | 226 | -------------------------------------------------------------------------------- /homework-02/袁曼雪-homework-02.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/heilongjianguniversity/AI-Team/6710f4383b5c33e1dc466112009fba1c05e8069b/homework-02/袁曼雪-homework-02.pdf -------------------------------------------------------------------------------- /homework-02/说明.md: -------------------------------------------------------------------------------- 1 | #作业提交说明 2 | 补充我给你们代码的剩余部分,并将代码跑通之后的图片和源码上传.图片命名:姓名拼音+序号,源码直接以姓名拼音命名. 3 | 有任何问题在微信群里问. 4 | 代码可以优化,请自行了解Pandas库. 5 | 作业时间为一周内完成. 6 | -------------------------------------------------------------------------------- /homework-03/MINST/t10k-images-idx3-ubyte.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/heilongjianguniversity/AI-Team/6710f4383b5c33e1dc466112009fba1c05e8069b/homework-03/MINST/t10k-images-idx3-ubyte.gz -------------------------------------------------------------------------------- /homework-03/MINST/t10k-images.idx3-ubyte: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/heilongjianguniversity/AI-Team/6710f4383b5c33e1dc466112009fba1c05e8069b/homework-03/MINST/t10k-images.idx3-ubyte -------------------------------------------------------------------------------- /homework-03/MINST/t10k-labels-idx1-ubyte.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/heilongjianguniversity/AI-Team/6710f4383b5c33e1dc466112009fba1c05e8069b/homework-03/MINST/t10k-labels-idx1-ubyte.gz -------------------------------------------------------------------------------- /homework-03/MINST/t10k-labels.idx1-ubyte: -------------------------------------------------------------------------------- 1 | '                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                             -------------------------------------------------------------------------------- /homework-03/MINST/train-images-idx3-ubyte.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/heilongjianguniversity/AI-Team/6710f4383b5c33e1dc466112009fba1c05e8069b/homework-03/MINST/train-images-idx3-ubyte.gz -------------------------------------------------------------------------------- /homework-03/MINST/train-labels-idx1-ubyte.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/heilongjianguniversity/AI-Team/6710f4383b5c33e1dc466112009fba1c05e8069b/homework-03/MINST/train-labels-idx1-ubyte.gz -------------------------------------------------------------------------------- /homework-03/MINST/train-labels.idx1-ubyte: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/heilongjianguniversity/AI-Team/6710f4383b5c33e1dc466112009fba1c05e8069b/homework-03/MINST/train-labels.idx1-ubyte -------------------------------------------------------------------------------- /homework-03/ex3-part1/tensorflow-MNIST-logistRes.py: -------------------------------------------------------------------------------- 1 | 2 | #以下函数的使用方法,请参考:https://tensorflow.google.cn/api_docs/python/ 3 | 4 | from tensorflow.examples.tutorials.mnist import input_data # 导入 tensorflow 中的 input_data 子模块,目的是为了后续的导入读取数据 5 | import tensorflow as tf # 导入 tensorflow 库,并且重名为 tf, 便于后面的简写 tf 6 | import numpy as np # 导入 numpy 库,并且重名为 np, 便于后面的简写 np 7 | 8 | #基本参数设置 9 | batchSize = 30 #batchsize的大小,代表每次训练载入的图像张数 10 | lr = 0.005 #学习率的大小,若后面启用learning rate decay策略,则该值为学习率的初始值 11 | iter = 1000000 #训练的迭代次数 12 | saveInter = 100 #保存结果的频率,即每训练100次保存一次模型训练参数及模型性能 13 | sample_size = 55000 #学习example的总大小,MNIST中官方写60000张,实际为55000(训练)+ 5000(校验),本例中只使用了55000 train 14 | 15 | # 对模型输出的结果进行评判,>0.5为“正”,<0.5为“负” 16 | def predict(X): # 定义一个函数 predict, 作用是用来进行预测 17 | num = X.shape[0] # 通过 shape 属性,得到 X 行的个数 18 | result = [] # 定义一个空的列表 result ,后面通过 append 的方式,向里面添加元素 19 | for i in range(num): # for循环语句, i 从0,1,2, 到 num -1 20 | if X[i]>0.5: # 如果 X[i] 大于 0.5 21 | result.append(1.0) # 将 1.0 添加到列表 result 中 22 | else: # 否则,X[i] 小于或等于 0.5 23 | result.append(0.0) # 将 0.0 添加到列表 result 中 24 | return result # 返回 result 的结果 25 | 26 | # 加载数据集,建议提前到官网上下载MNIST数据集,并解压到./MNIST文件夹下 27 | # MNIST下载地址:http://yann.lecun.com/exdb/mnist/ 28 | def loadData(): # 定义一个 loadData 函数 29 | file = "../MNIST" # 数据集 MINIST 30 | mnist = input_data.read_data_sets(file, one_hot=True) # input_data.read_data_sets 读取数据 31 | return mnist # 返回读取的数据 mnist 32 | 33 | # 申请模型输入输出的占位符 34 | def create_placeholder(n_x=784,n_y=0): # 定义一个 create_placeholder 函数 35 | X = tf.placeholder(tf.float32,shape=[None,n_x],name='X') # 调用tf.placeholder函数,tensorflow 中定义 X 36 | Y = tf.placeholder(tf.float32, shape=[None,], name='Y') # 调用tf.placeholder函数,tensorflow 中定义 Y 37 | return X,Y #返回 X 和 Y 的数值 38 | 39 | # 定义参数,W,b 40 | def initialize_parameters(): # 定义一个 initialize_parameters 函数 41 | W = tf.Variable({0}) #调用tf.Variable函数,设置模型参数W,W的维度为[784,1],且初始化为0 42 | b = tf.Variable({0}) #调用tf.Variable函数,设置模型参数b,b的维度为[1 ,1],且初始化为0 43 | parameters={'W': W, # 参数权重 W 44 | 'b': b} # 参数偏置 b 45 | return parameters # 返回参数 46 | 47 | # 将标签转换为one-hot形式,本例中未用到该函数,是因为tensorflow中封装了one-hot功能 48 | def convert_one_hot(Y,C): # 定义一个 convert_one_hot 函数 49 | one_hot=np.eye(C)[Y.reshape(-1)].T # 初始化 one_hot 为对角矩阵 50 | return one_hot # 返回 one_hot 51 | 52 | # 定义网络模型 53 | def forward_propagation(X,parameters): # 定义一个 forward_propagation 函数 54 | W = parameters['W'] # 参数权重 W 55 | b = parameters['b'] # 参数偏置 b 56 | 57 | Z1={0} #调用tensorflow函数,实现Z1=X*W+b 58 | A1={0} #调用tf.nn.sigmoid,实现A1 = sigmoid(Z1) 59 | A1 = tf.clip_by_value({0}) #调用clip_by_value,将A1进行裁剪,使其在[0.001,1.0]之间,是为了避免出现接近于0的极小值,输入np.log()中出现nan的情况 60 | return A1 # 返回 A1 61 | 62 | # 定义loss function 63 | def compute_cost(y_,y,W): # 定义一个 compute_cost 函数 64 | #以下的cross_entropy经过了简单变化,在(1.0-y_)*tf.log(1.0-y)之前乘以0.1,是因为正负样本比例基本上为1:9,严重偏向负样本 65 | #以下添加了正则,也可以尝试去掉 66 | cross_entropy = -(1.0/batchSize)*tf.reduce_sum({0}) #调用tf.reduce_sum函数,实现交叉熵函数 67 | return cross_entropy # 返回 交叉熵函数 的数值 cross_entropy 68 | 69 | # 模型搭建、训练、存储 70 | def model(mnist,Num): # 定义一个 model 函数 71 | x,y_ = create_placeholder(784, 0) # 调用 create_placeholder 函数,初始化 x,y_ 72 | parameters = initialize_parameters() # 调用 initialize_parameters 函数, 初始化 参数 73 | A1 = forward_propagation(x, parameters) # 调用 forward_propagation 函数,实现前向反馈 74 | 75 | #设置learning rate decay策略,随着迭代次数的增加,学习率成指数逐渐减小,减小公式为:decayed_learning_rate = learning_rate * decay_rate ^ (global_step / decay_steps) 76 | global_step = tf.Variable(0) # 调用 tf.Variable 函数, 初始化 global_step 变量 77 | learning_rate = tf.train.exponential_decay(lr,global_step,decay_steps=sample_size/batchSize,decay_rate=0.98,staircase=True) # 设置指数衰减的 学习率,调用tf.train.exponential_decay。 78 | 79 | cost = compute_cost(y_, A1,parameters['W']) # 调用 compute_cost 函数,计算损失函数 80 | train_step = tf.train.GradientDescentOptimizer(learning_rate).minimize(cost,global_step=global_step) # 调用 tf.train.GradientDescentOptimizer 函数, 实现梯度下降的优化 81 | sess = {0} #调用tf.InteractiveSession()函数,创建Session 82 | sess.run({0}) #执行tf.global_variables_initializer(),初始化参数 83 | 84 | #利用全部样本对模型进行测试 85 | testbatchX = mnist.train.images # 导入 mnist 数据中的训练集 图片 86 | testbatchY = mnist.train.labels # 导入 mnist 数据中的训练集 标签 87 | 88 | modelLast = [] # 定义一个空的列表 modelLast 89 | logName = "./log"+str(Num)+".txt" # 新建文件名为 log"+str(Num)+".txt 90 | 91 | #保存模型,且设定保存最大迭代次数的4个 92 | saver = tf.train.Saver(max_to_keep=4) # 调用 tf.train.Saver 函数,保存模型 93 | pf = open(logName, "w") # 以 写入的方式 打开文件 log"+str(Num)+".txt 94 | for i in range(iter): # for 循环结构, 遍历 iter 95 | #加载minibatch=50个训练样本 96 | batch = mnist.train.next_batch(batchSize) # 调用 mnist.train.next_batch 函数,复制给 batch 97 | batchX = batch[0] # 赋值给 batchX 为 batch 中第一个元素 98 | batchY = batch[1] # 赋值给 batchY为 batch 中第二个元素 99 | #执行训练 100 | train_step.run(feed_dict={0}) #执行tensor流图,并为其添加输入x: batchX, y_: batchY[:,Num] 101 | 102 | #每隔saveInter次迭代,保存当前模型的状态,并测试模型精度 103 | if i % saveInter == 0: #条件判断语句 if, 如果 i 整除 iter 104 | [total_cross_entropy,pred,Wsum,lrr] = sess.run([cost,A1,parameters['W'],learning_rate],feed_dict={x:batchX,y_:batchY[:,Num]}) # 调用 sess.run, 启动 tensoflow 105 | pred1 = predict(pred) # 调用 predict 函数,进行预测 106 | 107 | #保存当前模型的学习率lr、在minibatch上的测试精度 108 | print('lr:{:f},train Set Accuracy: {:f}'.format(lrr,(np.mean(pred1 == batchY[:,Num]) * 100))) # 输出训练集的准确率等 109 | pf.write('lr:{:f},train Set Accuracy: {:f}\n'.format(lrr,(np.mean(pred1 == batchY[:,Num]) * 100))) # 写入训练集的准确率 110 | 111 | #保存迭代次数、cross entropy 112 | print("handwrite: %d, iterate times: %d , cross entropy:%g"%(Num,i,total_cross_entropy)) # 输出迭代次数,交叉熵损失函数等 113 | pf.write("handwrite: %d, iterate times: %d , cross entropy:%g, W sum is: %g\n" %(Num,i,total_cross_entropy,np.sum(Wsum))) # 写入出迭代次数,交叉熵损失函数等 114 | 115 | #保存当前参数状态、测试testbatch上的精度 116 | [testpred] = sess.run([A1],feed_dict={x: testbatchX, y_: testbatchY[:, Num]}) # 调用 sess.run, 启动 tensoflow 117 | testpred1 = predict(testpred) # 调用 predict 函数,进行预测 118 | print('predict sum is: {:f},Testing Set Accuracy: {:f}\n'.format(np.sum(testpred1),(np.mean(testpred1 == testbatchY[:, Num]) * 100))) # 输出测试集的准确率等 119 | pf.write('predict sum is: {:f},Testing Set Accuracy: {:f}\n'.format(np.sum(testpred1),(np.mean(testpred1 == testbatchY[:,Num]) * 100))) # 写入测试集的准确率等 120 | pf.write("\n") # 写入换行字符 121 | 122 | #保存当前模型 123 | saveName = "model/my-model-" + str(Num) # 保存模型为 "model/my-model-" + str(Num) 124 | saver.save(sess, saveName, global_step=i) # 调用 saver.save 函数,保存模型 125 | pf.write("save model completed\n") # 写入 save model completed 126 | 127 | #若交叉熵出现nan(出现极值),此时停止训练,保存最新的一次模型名称 128 | if total_cross_entropy != total_cross_entropy: # 条件判断语句 if , 如果 total_cross_entropy 不等于 total_cross_entropy 129 | print("is nan, stop") # 输出 is nan, stop 130 | pf.write("is nan, stop\n") # 写入 is nan, stop 131 | modelLast = "model/my-model-" + str(Num)+str(i-saveInter) # 模型文件名为 "model/my-model-" + str(Num)+str(i-saveInter) 132 | break; # break 跳出循环 133 | pf.close() # close 关闭打开的文件 134 | return modelLast # 返回 modelLast 135 | 136 | # 模型测试 137 | def test_model(): # 定义 test_model 函数 138 | mnist = loadData() # 调用 loadData 函数, 导入数据 139 | classNum = 10 # 类别 初始化赋值为 10 , 共有 10 类 140 | modelNames = [] # 定义一个空的列表 modelNames 141 | logName = "./logModelNames.txt" # 文件名为 logModelNames.txt 142 | pf = open(logName, "w") # 以写入的方式打开 logModelNames.txt 143 | 144 | #循环训练每个类别与其他类别的二分类器,保存10个分类器模型 145 | for i in range(classNum): # for 循环语句, 遍历所有 classNum的类别, 146 | modelNames.append(model(mnist,i)) # 通过 append 的方式, 向 modelNames 里面添加 model(mnist,i) 147 | pf.write(modelNames[i]) # 写入 modelNames[i] 148 | pf.write("\n") # 写入 换行字符 149 | pf.close() # 关闭文件 150 | 151 | if __name__ == '__main__': # 主程序 152 | test_model() # 调用 test_model 函数 153 | 154 | 155 | -------------------------------------------------------------------------------- /homework-03/ex3-part2/ann_classification_two_hidden_layers.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import matplotlib.pyplot as plt 3 | import xlrd # 导入必备的 xlrd 库,目的是为了调用 xlrd.open_workbook 函数打开 excel 文件,读取数据 4 | 5 | class Config: 6 | input_dim = 2 # input layer dimensionality 7 | output_dim = 1 # output layer dimensionality 8 | # Gradient descent parameters (I picked these by hand) 9 | lr = 5 # learning rate for gradient descent 10 | reg_lambda = 0 #0.01 # regularization strength 11 | 12 | # 定义函数loadData函数,输入参数是 filename 指代文件名,返回数据data,目的是从.xls文件中加载数据,并存储为numpy中的array格式 13 | def loadData(filename): 14 | workbook = xlrd.open_workbook(filename) # 通过调用 xlrd.open_workbook 函数打开 excel 文件,读取数据,并返回给 workbook 变量 15 | boyinfo = workbook.sheet_by_index(0) # 通过使用属性 sheet_by_index 得到 excel 文件 中的工作簿,其中 sheet_by_index(0) 表示是第一个工作簿,在 python 中,下标从 0 开始 16 | col_num = boyinfo.ncols # 通过使用属性 ncols 得到 excel 文件 中第一个工作簿的 列数,并赋值给 col_num 17 | row_num = boyinfo.nrows # 通过使用属性 nrows 得到 excel 文件 中第一个工作簿的 行数,并赋值给 row_num 18 | col0 = boyinfo.col_values(0)[1:] # 通过使用属性 col_values(0)[1:] 得到 excel 文件 中第一列数据中,从第2行到最后一行的所有数据,并赋值给 col0 19 | data = np.array(col0) # 通过使用 np.array 函数, 将 col0 转换成数组,并赋值给 data 20 | if col_num == 1: # 条件判断语句: 如果列数 col_num 为1, 只有一列,那么直接返回数据 data 21 | return data # 返回data 22 | else: # 否则,如果不止一列数据,需要遍历所有列的数据 23 | for i in range(col_num-1): # 通过使用for循环达到遍历的目的 24 | coltemp = boyinfo.col_values(i+1)[1:] # 从第二行开始,表头不算,遍历从 第二列 开始到最后一列的数据 25 | data = np.c_[data, coltemp] # 通过使用 np.c_ 函数将 第一列的数据 和后面 所有列的数据组合起来,并赋值给 data 26 | return data # 返回data 27 | 28 | # 定义一个 plotData 函数,输入参数是 数据 X 和标志 flag: y,返回作图操作 plt, p1, p2 , 目的是为了画图 29 | def plotData(X, y): 30 | pos = np.where(y==1) # 通过使用 np.where 函数查找所有满足条件的数据,查找所有满足标志 y == 1 的数据,并赋值给 pos 31 | neg = np.where(y==0) # 通过使用 np.where 函数查找所有满足条件的数据,查找所有满足标志 y == 0 的数据,并赋值给 neg 32 | # 通过使用 plt.plot 函数作图,对所有满足标志 y == 1 的数据作图,点采用 s (正方形),代表 square, 点的大小为 7 单位,颜色为 红色 red 33 | p1 = plt.plot(X[pos, 0], X[pos, 1], marker='s', markersize=3, color='red')[0] 34 | # 通过使用 plt.plot 函数作图,对所有满足标志 y == 1 的数据作图,点采用 o (圆形),代表 circle, 点的大小为 7 单位,颜色为 绿色 green 35 | p2 = plt.plot(X[neg, 0], X[neg, 1], marker='o', markersize=3, color='green')[0] 36 | return plt,p1,p2 # 返回作图操作plt, p1, p2 37 | 38 | # normalization: 定义一个 normalization 函数,输入参数是原始数据 X ,返回归一化后的数据 X_norm , 目的是为了数据预处理,得到归一化后的数据 X_norm 39 | def normalization(X): 40 | mu = np.mean(X, axis=0) # 对数据X的每列求均值,axis = 0 代表在矩阵第一个维度上求均值 41 | Xmin = np.min(X, axis=0) # 对数据X的每列求最小值,axis = 0 代表在矩阵第一个维度上求最小值 42 | Xmax = np.max(X, axis=0) # 对数据X的每列求最大值,axis = 0 代表在矩阵第一个维度上求最大值 43 | X_norm = (X-mu)/(Xmax-Xmin) # 计算归一化后的数据,归一化公式为:(2*(X-Xmin)/(Xmax-Xmin))-1,归一化后数据范围为 [-1,1] 44 | return X_norm # 返回数据预处理,归一化后的数据 X_norm 45 | 46 | 47 | # visualize: 定义一个visualize函数,输入参数为特征矩阵X,标签数据y和分类模型model,函数的作用是:展示出模型的的分类边界 48 | def visualize(X, y, model): 49 | plot_decision_boundary(lambda x:predict(model,x), X, y) # 调用plot_decision_boundary函数 50 | plt.savefig("result.png") # 将plot_decision_boundary函数中返回的图片保存,图片名字为result.png 51 | plt.show() # 展示plot_decision_boundary中返回的图片 52 | 53 | 54 | # plot_decision_boundary:定义决策边界函数,输入为预测函数 特征矩阵数据X 标签数据y 55 | def plot_decision_boundary(pred_func, X, y): 56 | # Set min and max values and give it some padding 57 | x_min, x_max = X[:, 0].min() - .5, X[:, 0].max() + .5 # 将特征矩阵X中第一列中最小值与最大值分别加上0.5 赋予x_min,x_max 58 | y_min, y_max = X[:, 1].min() - .5, X[:, 1].max() + .5 # 将特征矩阵X中第二列中最小值与最大值分别加上0.5 赋予y_min,y_max 59 | h = 0.01 # 步长为0.01 60 | # Generate a grid of points with distance h between them 61 | # 由np.arrange生成一维数组作为np.meshgrid的参数,返回xx矩阵,yy矩阵 62 | xx, yy = np.meshgrid(np.arange(x_min, x_max, h), np.arange(y_min, y_max, h)) 63 | # Predict the function value for the whole gid 64 | # .ravel()方法将xx,yy矩阵压缩为一维向量;np.c_:是按行连接两个矩阵,就是把两矩阵左右相加,要求行数相等 65 | # 合成的矩阵作为pred_func的输入,返回预测值 66 | Z = pred_func(np.c_[xx.ravel(), yy.ravel()]) 67 | Z = (Z.T).reshape(xx.shape) # Z矩阵转置并将维度调整和xx的维度一致 68 | p=plt.figure() # 生成一个“画布” 69 | _,p1,p2=plotData(X,y) # 将特征矩阵X与标签数据y传入plotData函数,返回图操作p1,p2,其中‘_’用来接没有用到的返回值 70 | p3=plt.contour(xx, yy, Z, levels=0,linewidths=2).collections[0] # 画登高线,即决策边界 71 | # label & Legend, specific for the exercise 72 | plt.xlabel("tall") # 横坐标的标签为tall 73 | plt.ylabel("salary") # 纵坐标的标签为salary 74 | plt.legend((p1, p2, p3), ('y = I like you', "y = I don't like you", 'Decision Boundary'), numpoints=1,handlelength=0) # 为每一个绘图添加图例 75 | plt.title("ANN") # 设置图标题 76 | 77 | 78 | # 定义sigmoid激活函数,将输入数据压缩在0-1之间 79 | def sigmoid(z): 80 | g = 1 / (1 + np.exp(-z)) # 根据sigmoid 函数公式写出 81 | return g # 返回函数输出值 82 | 83 | # 定义sigmoidGradient函数,计算sigmoid函数的梯度值 84 | def sigmoidGradient(z): 85 | g = 1.0 / (1.0 + np.exp(-z)) # 根据sigmoid 函数公式写出 86 | g = g * (1 - g) # 根据sigmoid 函数公式写出 87 | return g # 返回梯度值 88 | 89 | # Helper function to evaluate the total loss on the dataset 90 | # 定义损失函数,计算所有样本的损失值 91 | def calculate_loss(model, X, y): 92 | num_examples = X.shape[1] # training set size # X的第二个维度为训练集样本个数 93 | W1, W2, W3,= model['W1'], model['W2'] , model['W3'] # 神经网络为两层隐藏层,对应的参数矩阵分别为W1 W2 W3 94 | # Forward propagation to calculate our predictions 需要补全 95 | a1={0} # 将特征矩阵X赋值给a1 96 | z2 = {0} # 参数矩阵W1与a1做矩阵乘法,得到z2矩阵 97 | a2 = {0} # 对z2矩阵进行sigmoid激活函数处理得到激活后的矩阵a2,即第一层隐藏层数值 98 | a2 = {0} # 为矩阵a2增加一列值为1的偏置 99 | z3 = {0} # 参数矩阵W2与a2做矩阵乘法,得到z3矩阵 100 | a3 = {0} # 对z3矩阵进行sigmoid激活函数处理得到激活后的矩阵a3,即第二层隐藏层数值 101 | a3 = {0} # 为矩阵a3增加一列值为1的偏置 102 | z4 = {0} # 参数矩阵W3与a3做矩阵乘法,得到z4矩阵 103 | a4 = {0} # 对z4矩阵进行sigmoid激活函数处理得到激活后的矩阵a4,即输出值 104 | 105 | # Calculating the loss 106 | one = np.multiply(y, np.log(a4)) # 将真实标签y与预测值a4的对数值对应相乘 107 | two = np.multiply((1 - y), np.log(1-a4)) # 将真实标签(1-y)与预测值(1-a4)对数值对应相乘 108 | data_loss = -(1. / num_examples) * (one + two).sum() # 损失函数的和,对应交叉熵公式 109 | return data_loss # 返回损失值 110 | 111 | 112 | # 定义compare函数,将预测值大于0.5的归为正例,小于0.5的归为负例 113 | def compare(X): 114 | num = X.shape[1] # X的第二个维度为训练集样本个数,注意X为函数的形参,真正数据调用时传入的实参 115 | result = [] # 声明一个存放结果的列表 116 | for i in range(num): # 遍历所有结果 117 | if X[:,i]>0.5: # 判断预测结果是否大于0.5 118 | result.append(1.0) # 如果大于0.5,则在result列表中增加一个1.0 119 | else: 120 | result.append(0.0) # 否则在result列表中增加一个0.0 121 | return result # 返回result列表,里面是预测为正例与反例的结果 122 | 123 | 124 | # 定义predict预测函数,输入为训练好的模型和特征矩阵X,返回预测值 125 | def predict(model, X): 126 | m = X.shape[0] # 将输入矩阵的第一个维度赋值给m 127 | W1, W2, W3= model['W1'], model['W2'] , model['W3'] # 将模型训练好的参数分别赋值给W1 W2 W3 128 | # Forward propagation 需要补全 129 | X_m = np.transpose(np.column_stack((np.ones((m, 1)), X))) # 为输入矩阵增加一列值为1的偏置 130 | a1={0} # 将矩阵X_m赋予a1 131 | z2 = {0} # 参数W1与a1做矩阵乘法 132 | a2 = {0} # 对矩阵z2进行做sigmoid激活 133 | a2 = {0} # 为第一层隐藏层的矩阵a2增加一列值为1的偏置 134 | z3 = {0} # 参数W2与a2做矩阵乘法 135 | a3 = {0} # 对矩阵z3做sigmoid激活 136 | a3 = {0} # 为第二层隐藏层的矩阵a3增加一列值为1的偏置 137 | z4 = {0} # 参数W3与a3做矩阵乘法 138 | a4 = {0} # 对矩阵z4做sigmoid激活 139 | return a4 # 返回输出矩阵 140 | 141 | 142 | # 定义precision函数:输入为训练模型,与特征矩阵,目的是返回样本预测结果,正例为1,反例为0 143 | def precision(model, x): 144 | W1, W2, W3= model['W1'], model['W2'], model['W3'] # 将模型更新后的参数赋值给W1 W2 W3 145 | # Forward propagation 需要补全 146 | a1={0} # 将矩阵x赋予a1 147 | z2 = {0} # 参数W1与a1做矩阵乘法 148 | a2 = {0} # 对矩阵z2进行做sigmoid激活 149 | a2 = {0} # 为第一层隐藏层的矩阵a2增加一列值为1的偏置 150 | z3 = {0} # 参数W2与a2做矩阵乘法 151 | a3 = {0} # 对矩阵z3做sigmoid激活 152 | a3 = {0} # 为第二层隐藏层的矩阵a3增加一列值为1的偏置 153 | z4 = {0} # 参数W3与a3做矩阵乘法 154 | a4 = {0} # 对矩阵z4做sigmoid激活 155 | result = compare(a4) # 调用compare函数,返回预测结果 156 | return result # 返回预测结果 157 | 158 | 159 | # 定义randInitializeWeights,参数为输入维度和输出维度,作用是随机初始化参数矩阵 160 | def randInitializeWeights(L_in, L_out): 161 | W = np.zeros((L_out, 1 + L_in)) #生成一个维度为(L_out, 1 + L_in)的全0矩阵 162 | # Randomly initialize the weights to small values 163 | epsilon_init = 0.12 # 初始化一个很小的数 164 | W = np.random.rand(L_out, 1 + L_in)*(2*epsilon_init) - epsilon_init # 随机生成维度为(L_out, 1 + L_in)的参数矩阵 165 | return W # 返回参数矩阵 166 | 167 | # This function learns parameters for the neural network and returns the model. 168 | # - hidden1_dim: Number of nodes in the hidden layer 1 169 | # - hidden2_dim: Number of nodes in the hidden layer 2 170 | # - iterNum: Number of passes through the training data for gradient descent 171 | # - print_loss: If True, print the loss every 1000 iterations 172 | # 定义build_model函数,输入为特征矩阵X,标签向量y,第一层隐藏层神经元个数,第二层隐藏层神经元个数,迭代次数,是否打印损失函数的布尔变量 173 | # 作用是完成神经网络的前向和反向传播,训练参数W1 W2 W3 174 | def build_model(X, y, hidden1_dim,hidden2_dim, iterNum=2000, print_loss=False): 175 | # Initialize the parameters to random values. We need to learn these. 176 | m = X.shape[0] #将输入矩阵X的第一个维度赋予m 177 | 178 | W1 = randInitializeWeights(Config.input_dim, hidden1_dim) # 调用randInitializeWeights函数,初始化W1 179 | W2 = randInitializeWeights(hidden1_dim, hidden2_dim) # 调用randInitializeWeights函数,初始化W2 180 | W3 = randInitializeWeights(hidden2_dim, Config.output_dim) # 调用randInitializeWeights函数,初始化W3 181 | 182 | # This is what we return at the end 183 | model = {} # 将model声明为字典数据格式 184 | # Gradient descent. 185 | logName = "logText.txt" # 日志文件名称 186 | logFile = open(logName, "w") # 调用open函数,打开文件,模式为写 187 | for t in range(0, iterNum): # 从0循环至iterNum 188 | # Forward propagation 需要补全 189 | X_m = np.transpose(np.column_stack((np.ones((m, 1)), X))) # 为输入矩阵X增加一列偏置为1的值 190 | a1={0} # 将X_m赋给a1 191 | z2 = {0} # 参数W1与a1做矩阵乘法 192 | a2 = {0} # 对矩阵z2进行做sigmoid激活 193 | a2 = {0} # 为第一层隐藏层的矩阵a2增加一列值为1的偏置 194 | z3 = {0} # 参数W2与a2做矩阵乘法 195 | a3 = {0} # 对矩阵z3做sigmoid激活 196 | a3 = {0} # 为第二层隐藏层的矩阵a3增加一列值为1的偏置 197 | z4 = {0} # 参数W3与a3做矩阵乘法 198 | a4 = {0} # 对矩阵z4做sigmoid激活 199 | 200 | # Back propagation 201 | y_m = np.transpose(np.reshape(y, [-1, 1])) #reshape y_m from (n,)to (1,n) 202 | delta4 ={0} # 计算delta4,将预测标签向量a4与y_m做差 203 | delta3 = {0} # 计算delta3,参数矩阵W3转置后与delta4做矩阵乘法,然后与sigmoidGradient(z3)对应位相乘 204 | delta2 = {0} # 计算delta2,参数矩阵W2转置后与delta3做矩阵乘法,然后与sigmoidGradient(z2)对应位相乘 205 | 206 | # layer 4 207 | bigDelta3 = np.zeros(W3.shape) # 初始化一个与W3维度一致的全零矩阵bigDelta3 208 | DW3 = np.zeros(W3.shape) # 初始化一个与W3维度一致的全零矩阵bigDelta3 209 | for i in range(W3.shape[0]): # 根据W3第一个维度大小遍历 210 | for j in range ((W3.shape[1])): # 根据W3的第二个维度大小进行遍历 211 | for n in range(0, m): # 第n样本 212 | bigDelta3[i,j] += a3[j,n]*delta4[i,n] # 将a3[j,n]与delta4[i,n]对应为相乘,然后全部加和求出bigDelta3[i,j] 213 | DW3[i,j]= (1./m) * bigDelta3[i,j] #对bigDelta3[i,j]乘样本个数的倒数得出DW3[i,j] 214 | W3[i,j] += -Config.lr * DW3[i,j] # 学习率-lr乘DW3[i,j]并加和得出W3[i,j] 215 | 216 | # layer 3 217 | bigDelta2 = np.zeros(W2.shape) # 初始化一个与W2维度一致的全零矩阵bigDelta2 218 | DW2 = np.zeros(W2.shape) # 初始化一个与W2维度一致的全零矩阵bigDelta2 219 | for i in range(W2.shape[0]): # 根据W2第一个维度大小遍历 220 | for j in range((W2.shape[1])): # 根据W2的第二个维度大小进行遍历 221 | for n in range(0, m): # 第n样本 222 | bigDelta2[i, j] += a2[j, n] * delta3[i, n] # 将a2[j,n]与delta3[i,n]对应为相乘,然后全部加和求出bigDelta2[i,j] 223 | DW2[i,j] = (1. / m) * bigDelta2[i, j] #对bigDelta2[i,j]乘样本个数的倒数得出DW2[i,j] 224 | W2[i, j] += -Config.lr * DW2[i,j] # 学习率-lr乘DW2[i,j]并加和得出W2[i,j] 225 | 226 | # layer 2 227 | bigDelta1 = np.zeros(W1.shape) # 初始化一个与W1维度一致的全零矩阵bigDelta1 228 | DW1 = np.zeros(W1.shape) # 初始化一个与W1维度一致的全零矩阵bigDelta1 229 | for i in range(W1.shape[0]): # 根据W1第一个维度大小遍历 230 | for j in range((W1.shape[1])): # 根据W1第二个维度大小遍历 231 | for n in range(0, m): # 第n样本 232 | bigDelta1[i, j] += a1[j, n] * delta2[i, n] # 将a1[j,n]与delta2[i,n]对应为相乘,然后全部加和求出bigDelta1[i,j] 233 | DW1[i,j] = (1. / m) * bigDelta1[i, j] #对bigDelta1[i,j]乘样本个数的倒数得出DW1[i,j] 234 | W1[i, j] += -Config.lr * DW1[i,j] # 学习率-lr乘DW2[i,j]并加和得出W2[i,j] 235 | 236 | # 向量运算 237 | # DW3 = (1./m) * np.dot(delta4,a3.T) 238 | # DW2 = (1./m) * np.dot(delta3,a2.T) 239 | # DW1 = (1./m) * np.dot(delta2,a1.T) 240 | 241 | # Gradient descent parameter update 242 | # W1 += -Config.lr * DW1 243 | # W2 += -Config.lr * DW2 244 | # W3 += -Config.lr * DW3 245 | 246 | 247 | # Assign new parameters to the model 248 | model = {'W1': W1, 'W2': W2, 'W3': W3} #模型的键值对分别对应更新后的参数W1 W2 W3 249 | 250 | # Optionally print the loss. 251 | # This is expensive because it uses the whole dataset, so we don't want to do it too often. 252 | if print_loss and t % 1000 == 0: #如果print_loss 与 t是1000的整数倍同时为True,运行下面代码 253 | print("Loss after iteration %i: %f" % (t, calculate_loss(model, X_m, y_m))) #格式化打印语句,输出迭代t次后,损失值是多少 254 | logFile.write("Loss after iteration %i: %f" % (t, calculate_loss(model, X_m, y_m))) # 将输出语句写入日志文件 255 | logFile.write("\n") 256 | result = precision(model, X_m) #调用precision函数,返回预测结果 257 | print("Traning Set Accuracy: {:f}".format((np.mean(result == y) * 100))) #计算准确率 258 | logFile.write("Traning Set Accuracy: {:f}".format((np.mean(result == y) * 100))) # 将输出语句写入日志文件 259 | logFile.write("\n") # 换行 260 | logFile.close() # 关闭文件 261 | 262 | return model # 返回模型,实际是返回模型更新后的参数 263 | 264 | def main(): 265 | # load data 加载数据 266 | data = loadData('data.xls') # 通过调用 loadData 函数,导入原始数据集 文件 'data.xls',并赋值给 data 267 | X = data[:, :2] # 将数据集 data 的 第一列 和 第二列 的所有行的数据,赋值给 X, 实际对应的是 身高(m)、 月薪(元)的原始数据 268 | y = data[:, 2] # 将数据集 data 的 第三列 所有行的数据,赋值给 y,实际对应的是 是否有兴趣尝试交往(Y=1/N=0)的原始数据,可取 0 或 1 269 | # normalization 通过调用 normalization 函数,对原始数据集 X 进行归一化 270 | X_norm = normalization(X) 271 | # 训练模型 272 | model = build_model(X_norm, y, 5, 3, iterNum=20000, print_loss=True) 273 | # 可视化 274 | visualize(X_norm, y, model) 275 | 276 | if __name__ == "__main__": 277 | main() 278 | -------------------------------------------------------------------------------- /homework-03/ex3-part2/data.xls: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/heilongjianguniversity/AI-Team/6710f4383b5c33e1dc466112009fba1c05e8069b/homework-03/ex3-part2/data.xls -------------------------------------------------------------------------------- /homework-03/ex3-part3/tensorflow-MNIST-nn.py: -------------------------------------------------------------------------------- 1 | from tensorflow.examples.tutorials.mnist import input_data 2 | import tensorflow as tf 3 | 4 | # MNIST数据存放的路径 5 | file = "../MNIST" 6 | 7 | # 导入数据,首先检测file路径下是否存在数据集,若不存在,则到网上下载. 8 | # MNIST下载地址:http://yann.lecun.com/exdb/mnist/ 9 | # 注意:下载后需要解压 10 | mnist = input_data.read_data_sets(file, one_hot=True)#读取数据集,标签数据设置为one-hot格式。即n维标签中只有一个数据为1,其余为0 11 | 12 | # 模型的输入和输出 13 | # 为模型的输入输出申请占位符,作为外部数据与网络模型的交互接口 14 | # 784=28*28 15 | x = tf.placeholder(tf.float32, shape={0}) #申请占位符 输入图像 N*784的矩阵 [None, 784] 16 | y_ = tf.placeholder(tf.float32, shape={0}) #申请占位符 输入label N*10的矩阵[None, 10] 17 | 18 | # 将tensor图中的输入和变量进行计算 通过tf.layers.dense搭建全连接网络层,并为该层设置对应的输入、神经元个数、激活函数 19 | # 通过units设置神经元的个数,通过activation设置激活函数,可设定的激活函数,请参考https://tensorflow.google.cn/api_docs/python/tf/nn/softmax 20 | 21 | A1 = tf.layers.dense(inputs=x, units={0},activation=tf.nn.{0}) #{0}为待补充, 添加全连接层,神经元个数为16个,激活函数为sigmoid、tanh或relu 22 | A2 = tf.layers.dense(inputs=A1,units={0},activation=tf.nn.{0}) #{0}为待补充,添加全连接层,神经元个数为16个,激活函数为sigmoid、tanh或relu 23 | y = tf.layers.dense(inputs=A2,units=10, activation=tf.nn.{0}) #{0}为待补充,添加全连接层,设置激活函数为sigmoid或softmax,由于输出类别是10,所以输出层神经元个数为10 24 | 25 | # 交叉熵 用来度量y_与y之间的差异性 26 | # y_表示样本的标签 one-hot形式 ; y表示tensor流图计算出的值,即预测值 27 | cross_entropy = -tf.reduce_sum(y_*tf.log(y))#对损失求和 28 | 29 | # 训练 利用梯度下降法,以0.01的学习率最小化目标函数(cross_entropy) 30 | train_step = tf.train.GradientDescentOptimizer({0}).minimize({0}) #设置随机梯度下降的学习率为0.01,最小化目标函数为cross_entropy 31 | 32 | # 创建Session,用于启动tensor图 33 | sess = tf.InteractiveSession() 34 | 35 | # 调用global_variables_initializer函数,将前面定义的Variable变量按照设置的初始化方式,进行初始化 36 | sess.run({0}) #执行tf.global_variables_initializer(),初始化模型参数 37 | 38 | #循环训练,设置迭代次数为10000 39 | for i in range({0}): 40 | #选取mnist训练数据集,设置minibatchsize为50,即选取样本集中的50个样本 41 | batch = mnist.train.next_batch({0}) 42 | #启动tensor流图,并执行训练,输入数据为图像(batch[0])和对应的标签(batch[1]) 43 | train_step.run(feed_dict={x: batch[0], y_: batch[1]}) 44 | 45 | ################################### 测试 ################################### 46 | # 计算模型预测结果与标签中相等的部分 47 | # 调用tf.equal计算模型预测结果y与标签结果y_的差异,预测正确则返回1,预测错误则返回0; 48 | # tf.argmax(y, 1)为计算y中每行数据最大值的索引; 49 | correct_prediction = tf.equal(tf.argmax(y, 1), {0}) 50 | 51 | # 根据correct_prediction计算模型预测精度 52 | accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32)) 53 | 54 | # 启动tensor流图,计算模型预测精度,模型输入数据为train/test的图像和对应标签 55 | print(sess.run(accuracy, feed_dict={x: mnist.train.images, y_:mnist.train.labels}))#计算模型在训练集上的准确率 56 | print(sess.run(accuracy, feed_dict={x: mnist.test.images, y_:mnist.test.labels}))#计算模型在测试集上的准确率 57 | 58 | # 结果输出 59 | logFileName = "logText.txt" 60 | logFile = open(logFileName, "w") 61 | logFile.write(str(sess.run(accuracy, feed_dict={x: mnist.train.images, y_:mnist.train.labels}))) 62 | logFile.write("\n") 63 | logFile.write(str(sess.run(accuracy, feed_dict={x: mnist.test.images, y_:mnist.test.labels}))) 64 | logFile.close() 65 | -------------------------------------------------------------------------------- /homework-03/作业说明.txt: -------------------------------------------------------------------------------- 1 | 作业说明: 2 | 本次作业分为三个部分,以下是一些注意事项: 3 | 1. 作业形式依旧是程序填空的形式,需要填写的内容在文中用 {0} 代替,其余部分不需要修改,如果出现错误,在微信群里说明。 4 | 2. 作业工作过程在本文件夹中进行,作业提交时,将MINST挪出文件夹,其余部分压缩为一个压缩包,提交压缩包。 5 | -------------------------------------------------------------------------------- /homework-03/马士尧 homework 03.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/heilongjianguniversity/AI-Team/6710f4383b5c33e1dc466112009fba1c05e8069b/homework-03/马士尧 homework 03.pdf -------------------------------------------------------------------------------- /homework-04/requirement.md: -------------------------------------------------------------------------------- 1 | # 姓名.ipynb命名上传代码 2 | 3 | # 上传时间下周五前 4 | -------------------------------------------------------------------------------- /homework-04/余欣灿.ipynb: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import pandas as pd 3 | dataset = pd.read_csv('Data.csv') 4 | X = dataset.iloc[ : , :-1].values 5 | Y = dataset.iloc[ : , 3].values 6 | from sklearn.preprocessing import Imputer 7 | imputer = Imputer(missing_values = "NaN", strategy = "mean", axis = 0) 8 | imputer = imputer.fit(X[ : , 1:3]) 9 | X[ : , 1:3] = imputer.transform(X[ : , 1:3]) 10 | from sklearn.preprocessing import LabelEncoder, OneHotEncoder 11 | labelencoder_X = LabelEncoder() 12 | X[ : , 0] = labelencoder_X.fit_transform(X[ : , 0]) 13 | onehotencoder = OneHotEncoder(categorical_features = [0]) 14 | X = onehotencoder.fit_transform(X).toarray() 15 | labelencoder_Y = LabelEncoder() 16 | Y = labelencoder_Y.fit_transform(Y) 17 | from sklearn.cross_validation import train_test_split 18 | X_train, X_test, Y_train, Y_test = train_test_split( X , Y , test_size = 0.2, random_state = 0) 19 | from sklearn.preprocessing import StandardScaler 20 | sc_X = StandardScaler() 21 | X_train = sc_X.fit_transform(X_train) 22 | X_test = sc_X.fit_transform(X_test) 23 | -------------------------------------------------------------------------------- /homework-04/刘广升.ipynb: -------------------------------------------------------------------------------- 1 | #Day 1: Data Prepocessing 2 | 3 | #Step 1: Importing the libraries 4 | import numpy as np 5 | import pandas as pd 6 | 7 | #Step 2: Importing dataset 8 | dataset = pd.read_csv('../datasets/Data.csv') 9 | X = dataset.iloc[ : , :-1].values 10 | Y = dataset.iloc[ : , 3].values 11 | print("Step 2: Importing dataset") 12 | print("X") 13 | print(X) 14 | print("Y") 15 | print(Y) 16 | 17 | #Step 3: Handling the missing data 18 | from sklearn.preprocessing import Imputer 19 | imputer = Imputer(missing_values = "NaN", strategy = "mean", axis = 0) 20 | imputer = imputer.fit(X[ : , 1:3]) 21 | X[ : , 1:3] = imputer.transform(X[ : , 1:3]) 22 | print("---------------------") 23 | print("Step 3: Handling the missing data") 24 | print("step2") 25 | print("X") 26 | print(X) 27 | 28 | #Step 4: Encoding categorical data 29 | from sklearn.preprocessing import LabelEncoder, OneHotEncoder 30 | labelencoder_X = LabelEncoder() 31 | X[ : , 0] = labelencoder_X.fit_transform(X[ : , 0]) 32 | #Creating a dummy variable 33 | onehotencoder = OneHotEncoder(categorical_features = [0]) 34 | X = onehotencoder.fit_transform(X).toarray() 35 | labelencoder_Y = LabelEncoder() 36 | Y = labelencoder_Y.fit_transform(Y) 37 | print("---------------------") 38 | print("Step 4: Encoding categorical data") 39 | print("X") 40 | print(X) 41 | print("Y") 42 | print(Y) 43 | 44 | #Step 5: Splitting the datasets into training sets and Test sets 45 | from sklearn.model_selection import train_test_split 46 | X_train, X_test, Y_train, Y_test = train_test_split( X , Y , test_size = 0.2, random_state = 0) 47 | print("---------------------") 48 | print("Step 5: Splitting the datasets into training sets and Test sets") 49 | print("X_train") 50 | print(X_train) 51 | print("X_test") 52 | print(X_test) 53 | print("Y_train") 54 | print(Y_train) 55 | print("Y_test") 56 | print(Y_test) 57 | 58 | #Step 6: Feature Scaling 59 | from sklearn.preprocessing import StandardScaler 60 | sc_X = StandardScaler() 61 | X_train = sc_X.fit_transform(X_train) 62 | X_test = sc_X.transform(X_test) 63 | print("---------------------") 64 | print("Step 6: Feature Scaling") 65 | print("X_train") 66 | print(X_train) 67 | print("X_test") 68 | print(X_test) 69 | -------------------------------------------------------------------------------- /homework-04/史一阳.ipynb: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import pandas as nd 3 | dataset = pd.read_csv(' Data.csv ') //读取csv文件 4 | X = dataset.iloc[ : , :-1].values//.iloc[行,列] 5 | Y = dataset.iloc [: , 3 ] .values //: 全部行 or 列;[a]第a行 or 列 6 | // [a,b,c]第 a,b,c 行 or 列 7 | from sklearn.preprocessing import Imputer 8 | imputer = Imputer(missing_values = "NaN", strategy = "mean", axis = 0) 9 | imputer = imputer.fit(X[ : , 1:3]) 10 | X[ : , 1:3] = imputer.transform(X[ : , 1:3]) 11 | from sklearn.preprocessing import LabelEncoder, OneHotEncoder 12 | labelencoder_X = LabelEncoder() 13 | X[ : , 0] = labelencoder_X.fit_transform(X[ : , 0]) 14 | onehotencoder = OneHotEncoder(categorical_features = [0]) 15 | X = onehotencoder.fit_transform(X).toarray() 16 | labelencoder_Y = LabelEncoder() 17 | Y = labelencoder_Y.fit_transform(Y) 18 | from sklearn.model_selection import train_test_split 19 | X_train, X_test, Y_train, Y_test = train_test_split( X , Y , test_size = 0.2, random_state = 0) 20 | from sklearn.preprocessing import StandardScaler 21 | sc_X = StandardScaler() 22 | X_train = sc_X.fit_transform(X_train) 23 | X_test = sc_X.transform(X_test) 24 | 25 | 26 | 27 | -------------------------------------------------------------------------------- /homework-04/吴洁茹.ipynb: -------------------------------------------------------------------------------- 1 | #Day 1: Data Prepocessing 2 | 3 | #Step 1: Importing the libraries 4 | import numpy as np 5 | import pandas as pd 6 | 7 | #Step 2: Importing dataset 8 | dataset = pd.read_csv('../datasets/Data.csv') 9 | X = dataset.iloc[ : , :-1].values 10 | Y = dataset.iloc[ : , 3].values 11 | print("Step 2: Importing dataset") 12 | print("X") 13 | print(X) 14 | print("Y") 15 | print(Y) 16 | 17 | #Step 3: Handling the missing data 18 | from sklearn.preprocessing import Imputer 19 | imputer = Imputer(missing_values = "NaN", strategy = "mean", axis = 0) 20 | imputer = imputer.fit(X[ : , 1:3]) 21 | X[ : , 1:3] = imputer.transform(X[ : , 1:3]) 22 | print("---------------------") 23 | print("Step 3: Handling the missing data") 24 | print("step2") 25 | print("X") 26 | print(X) 27 | 28 | #Step 4: Encoding categorical data 29 | from sklearn.preprocessing import LabelEncoder, OneHotEncoder 30 | labelencoder_X = LabelEncoder() 31 | X[ : , 0] = labelencoder_X.fit_transform(X[ : , 0]) 32 | #Creating a dummy variable 33 | onehotencoder = OneHotEncoder(categorical_features = [0]) 34 | X = onehotencoder.fit_transform(X).toarray() 35 | labelencoder_Y = LabelEncoder() 36 | Y = labelencoder_Y.fit_transform(Y) 37 | print("---------------------") 38 | print("Step 4: Encoding categorical data") 39 | print("X") 40 | print(X) 41 | print("Y") 42 | print(Y) 43 | 44 | #Step 5: Splitting the datasets into training sets and Test sets 45 | from sklearn.model_selection import train_test_split 46 | X_train, X_test, Y_train, Y_test = train_test_split( X , Y , test_size = 0.2, random_state = 0) 47 | print("---------------------") 48 | print("Step 5: Splitting the datasets into training sets and Test sets") 49 | print("X_train") 50 | print(X_train) 51 | print("X_test") 52 | print(X_test) 53 | print("Y_train") 54 | print(Y_train) 55 | print("Y_test") 56 | print(Y_test) 57 | 58 | #Step 6: Feature Scaling 59 | from sklearn.preprocessing import StandardScaler 60 | sc_X = StandardScaler() 61 | X_train = sc_X.fit_transform(X_train) 62 | X_test = sc_X.transform(X_test) 63 | print("---------------------") 64 | print("Step 6: Feature Scaling") 65 | print("X_train") 66 | print(X_train) 67 | print("X_test") 68 | print(X_test) 69 | -------------------------------------------------------------------------------- /homework-04/吴玉隆.ipynb: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Created on Sun Mar 24 22:30:52 2019 4 | 5 | @author: 15188 6 | """ 7 | import numpy as np 8 | import pandas as pd 9 | dataset = pd.read_csv('Data.csv')//读取csv文件 10 | X = dataset.iloc[ : , :-1].values//.iloc[行,列] 11 | Y = dataset.iloc[ : , 3].values // : 全部行 or 列;[a]第a行 or 列 12 | // [a,b,c]第 a,b,c 行 or 列 13 | from sklearn.preprocessing import Imputer 14 | imputer = Imputer(missing_values = "NaN", strategy = "mean", axis = 0) 15 | imputer = imputer.fit(X[ : , 1:3]) 16 | X[ : , 1:3] = imputer.transform(X[ : , 1:3]) 17 | from sklearn.preprocessing import LabelEncoder, OneHotEncoder 18 | labelencoder_X = LabelEncoder() 19 | X[ : , 0] = labelencoder_X.fit_transform(X[ : , 0]) 20 | onehotencoder = OneHotEncoder(categorical_features = [0]) 21 | X = onehotencoder.fit_transform(X).toarray() 22 | labelencoder_Y = LabelEncoder() 23 | Y = labelencoder_Y.fit_transform(Y) 24 | from sklearn.model_selection import train_test_split 25 | X_train, X_test, Y_train, Y_test = train_test_split( X , Y , test_size = 0.2, random_state = 0) 26 | from sklearn.preprocessing import StandardScaler 27 | sc_X = StandardScaler() 28 | X_train = sc_X.fit_transform(X_train) 29 | X_test = sc_X.transform(X_test) 30 | -------------------------------------------------------------------------------- /homework-04/周小梅.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 2, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "\n", 10 | "#1: 导入类库\n", 11 | "\n", 12 | "import numpy as np\n", 13 | "import pandas as pd" 14 | ] 15 | }, 16 | { 17 | "cell_type": "code", 18 | "execution_count": 5, 19 | "metadata": {}, 20 | "outputs": [ 21 | { 22 | "name": "stdout", 23 | "output_type": "stream", 24 | "text": [ 25 | "Step 2: Importing dataset\n", 26 | "X\n", 27 | "[['France' 44.0 72000.0]\n", 28 | " ['Spain' 27.0 48000.0]\n", 29 | " ['Germany' 30.0 54000.0]\n", 30 | " ['Spain' 38.0 61000.0]\n", 31 | " ['Germany' 40.0 nan]\n", 32 | " ['France' 35.0 58000.0]\n", 33 | " ['Spain' nan 52000.0]\n", 34 | " ['France' 48.0 79000.0]\n", 35 | " ['Germany' 50.0 83000.0]\n", 36 | " ['France' 37.0 67000.0]]\n", 37 | "Y\n", 38 | "['No' 'Yes' 'No' 'No' 'Yes' 'Yes' 'No' 'Yes' 'No' 'Yes']\n" 39 | ] 40 | } 41 | ], 42 | "source": [ 43 | "#2:导入数据集\n", 44 | "dataset = pd.read_csv('D:\\Data.csv')\n", 45 | "# 不包括最后一列的所有列\n", 46 | "X = dataset.iloc[ : , :-1].values\n", 47 | "#取最后一列\n", 48 | "Y = dataset.iloc[ : , 3].values\n", 49 | "print(\"Step 2: Importing dataset\")\n", 50 | "print(\"X\")\n", 51 | "print(X)\n", 52 | "print(\"Y\")\n", 53 | "print(Y)" 54 | ] 55 | }, 56 | { 57 | "cell_type": "code", 58 | "execution_count": 6, 59 | "metadata": {}, 60 | "outputs": [ 61 | { 62 | "name": "stdout", 63 | "output_type": "stream", 64 | "text": [ 65 | "---------------------\n", 66 | "Step 3: Handling the missing data\n", 67 | "step2\n", 68 | "X\n", 69 | "[['France' 44.0 72000.0]\n", 70 | " ['Spain' 27.0 48000.0]\n", 71 | " ['Germany' 30.0 54000.0]\n", 72 | " ['Spain' 38.0 61000.0]\n", 73 | " ['Germany' 40.0 63777.77777777778]\n", 74 | " ['France' 35.0 58000.0]\n", 75 | " ['Spain' 38.77777777777778 52000.0]\n", 76 | " ['France' 48.0 79000.0]\n", 77 | " ['Germany' 50.0 83000.0]\n", 78 | " ['France' 37.0 67000.0]]\n" 79 | ] 80 | }, 81 | { 82 | "name": "stderr", 83 | "output_type": "stream", 84 | "text": [ 85 | "C:\\Users\\lenovo\\Anaconda3\\lib\\site-packages\\sklearn\\utils\\deprecation.py:58: DeprecationWarning: Class Imputer is deprecated; Imputer was deprecated in version 0.20 and will be removed in 0.22. Import impute.SimpleImputer from sklearn instead.\n", 86 | " warnings.warn(msg, category=DeprecationWarning)\n" 87 | ] 88 | } 89 | ], 90 | "source": [ 91 | "#3: 处理缺失的数据\n", 92 | "from sklearn.preprocessing import Imputer\n", 93 | "# axis=0表示按列进行\n", 94 | "imputer = Imputer(missing_values = \"NaN\", strategy = \"mean\", axis = 0)\n", 95 | "imputer = imputer.fit(X[ : , 1:3])\n", 96 | "X[ : , 1:3] = imputer.transform(X[ : , 1:3])\n", 97 | "print(\"---------------------\")\n", 98 | "print(\"Step 3: Handling the missing data\")\n", 99 | "print(\"step2\")\n", 100 | "print(\"X\")\n", 101 | "print(X)" 102 | ] 103 | }, 104 | { 105 | "cell_type": "code", 106 | "execution_count": 7, 107 | "metadata": {}, 108 | "outputs": [ 109 | { 110 | "name": "stdout", 111 | "output_type": "stream", 112 | "text": [ 113 | "---------------------\n", 114 | "Step 4: Encoding categorical data\n", 115 | "X\n", 116 | "[[1.00000000e+00 0.00000000e+00 0.00000000e+00 4.40000000e+01\n", 117 | " 7.20000000e+04]\n", 118 | " [0.00000000e+00 0.00000000e+00 1.00000000e+00 2.70000000e+01\n", 119 | " 4.80000000e+04]\n", 120 | " [0.00000000e+00 1.00000000e+00 0.00000000e+00 3.00000000e+01\n", 121 | " 5.40000000e+04]\n", 122 | " [0.00000000e+00 0.00000000e+00 1.00000000e+00 3.80000000e+01\n", 123 | " 6.10000000e+04]\n", 124 | " [0.00000000e+00 1.00000000e+00 0.00000000e+00 4.00000000e+01\n", 125 | " 6.37777778e+04]\n", 126 | " [1.00000000e+00 0.00000000e+00 0.00000000e+00 3.50000000e+01\n", 127 | " 5.80000000e+04]\n", 128 | " [0.00000000e+00 0.00000000e+00 1.00000000e+00 3.87777778e+01\n", 129 | " 5.20000000e+04]\n", 130 | " [1.00000000e+00 0.00000000e+00 0.00000000e+00 4.80000000e+01\n", 131 | " 7.90000000e+04]\n", 132 | " [0.00000000e+00 1.00000000e+00 0.00000000e+00 5.00000000e+01\n", 133 | " 8.30000000e+04]\n", 134 | " [1.00000000e+00 0.00000000e+00 0.00000000e+00 3.70000000e+01\n", 135 | " 6.70000000e+04]]\n", 136 | "Y\n", 137 | "[0 1 0 0 1 1 0 1 0 1]\n" 138 | ] 139 | }, 140 | { 141 | "name": "stderr", 142 | "output_type": "stream", 143 | "text": [ 144 | "C:\\Users\\lenovo\\Anaconda3\\lib\\site-packages\\sklearn\\preprocessing\\_encoders.py:368: FutureWarning: The handling of integer data will change in version 0.22. Currently, the categories are determined based on the range [0, max(values)], while in the future they will be determined based on the unique values.\n", 145 | "If you want the future behaviour and silence this warning, you can specify \"categories='auto'\".\n", 146 | "In case you used a LabelEncoder before this OneHotEncoder to convert the categories to integers, then you can now use the OneHotEncoder directly.\n", 147 | " warnings.warn(msg, FutureWarning)\n", 148 | "C:\\Users\\lenovo\\Anaconda3\\lib\\site-packages\\sklearn\\preprocessing\\_encoders.py:390: DeprecationWarning: The 'categorical_features' keyword is deprecated in version 0.20 and will be removed in 0.22. You can use the ColumnTransformer instead.\n", 149 | " \"use the ColumnTransformer instead.\", DeprecationWarning)\n" 150 | ] 151 | } 152 | ], 153 | "source": [ 154 | "from sklearn.preprocessing import LabelEncoder,OneHotEncoder\n", 155 | "labelencoder_X = LabelEncoder()\n", 156 | "X[ : , 0] = labelencoder_X.fit_transform(X[ : , 0])\n", 157 | "#Creating a dummy variable\n", 158 | "onehotencoder = OneHotEncoder(categorical_features = [0])\n", 159 | "X = onehotencoder.fit_transform(X).toarray()\n", 160 | "labelencoder_Y = LabelEncoder()\n", 161 | "Y = labelencoder_Y.fit_transform(Y)\n", 162 | "print(\"---------------------\")\n", 163 | "print(\"Step 4: Encoding categorical data\")\n", 164 | "print(\"X\")\n", 165 | "print(X)\n", 166 | "print(\"Y\")\n", 167 | "print(Y)" 168 | ] 169 | }, 170 | { 171 | "cell_type": "code", 172 | "execution_count": 8, 173 | "metadata": {}, 174 | "outputs": [ 175 | { 176 | "name": "stdout", 177 | "output_type": "stream", 178 | "text": [ 179 | "---------------------\n", 180 | "Step 5: Splitting the datasets into training sets and Test sets\n", 181 | "X_train\n", 182 | "[[0.00000000e+00 1.00000000e+00 0.00000000e+00 4.00000000e+01\n", 183 | " 6.37777778e+04]\n", 184 | " [1.00000000e+00 0.00000000e+00 0.00000000e+00 3.70000000e+01\n", 185 | " 6.70000000e+04]\n", 186 | " [0.00000000e+00 0.00000000e+00 1.00000000e+00 2.70000000e+01\n", 187 | " 4.80000000e+04]\n", 188 | " [0.00000000e+00 0.00000000e+00 1.00000000e+00 3.87777778e+01\n", 189 | " 5.20000000e+04]\n", 190 | " [1.00000000e+00 0.00000000e+00 0.00000000e+00 4.80000000e+01\n", 191 | " 7.90000000e+04]\n", 192 | " [0.00000000e+00 0.00000000e+00 1.00000000e+00 3.80000000e+01\n", 193 | " 6.10000000e+04]\n", 194 | " [1.00000000e+00 0.00000000e+00 0.00000000e+00 4.40000000e+01\n", 195 | " 7.20000000e+04]\n", 196 | " [1.00000000e+00 0.00000000e+00 0.00000000e+00 3.50000000e+01\n", 197 | " 5.80000000e+04]]\n", 198 | "X_test\n", 199 | "[[0.0e+00 1.0e+00 0.0e+00 3.0e+01 5.4e+04]\n", 200 | " [0.0e+00 1.0e+00 0.0e+00 5.0e+01 8.3e+04]]\n", 201 | "Y_train\n", 202 | "[1 1 1 0 1 0 0 1]\n", 203 | "Y_test\n", 204 | "[0 0]\n" 205 | ] 206 | } 207 | ], 208 | "source": [ 209 | "#Step 5: 切分数据集成训练数据和测试数据\n", 210 | "from sklearn.model_selection import train_test_split\n", 211 | "X_train, X_test, Y_train, Y_test = train_test_split( X , Y , test_size = 0.2, random_state = 0)\n", 212 | "print(\"---------------------\")\n", 213 | "print(\"Step 5: Splitting the datasets into training sets and Test sets\")\n", 214 | "print(\"X_train\")\n", 215 | "print(X_train)\n", 216 | "print(\"X_test\")\n", 217 | "print(X_test)\n", 218 | "print(\"Y_train\")\n", 219 | "print(Y_train)\n", 220 | "print(\"Y_test\")\n", 221 | "print(Y_test)" 222 | ] 223 | }, 224 | { 225 | "cell_type": "code", 226 | "execution_count": 9, 227 | "metadata": {}, 228 | "outputs": [ 229 | { 230 | "name": "stdout", 231 | "output_type": "stream", 232 | "text": [ 233 | "---------------------\n", 234 | "Step 6: Feature Scaling\n", 235 | "X_train\n", 236 | "[[-1. 2.64575131 -0.77459667 0.26306757 0.12381479]\n", 237 | " [ 1. -0.37796447 -0.77459667 -0.25350148 0.46175632]\n", 238 | " [-1. -0.37796447 1.29099445 -1.97539832 -1.53093341]\n", 239 | " [-1. -0.37796447 1.29099445 0.05261351 -1.11141978]\n", 240 | " [ 1. -0.37796447 -0.77459667 1.64058505 1.7202972 ]\n", 241 | " [-1. -0.37796447 1.29099445 -0.0813118 -0.16751412]\n", 242 | " [ 1. -0.37796447 -0.77459667 0.95182631 0.98614835]\n", 243 | " [ 1. -0.37796447 -0.77459667 -0.59788085 -0.48214934]]\n", 244 | "X_test\n", 245 | "[[-1. 2.64575131 -0.77459667 -1.45882927 -0.90166297]\n", 246 | " [-1. 2.64575131 -0.77459667 1.98496442 2.13981082]]\n" 247 | ] 248 | } 249 | ], 250 | "source": [ 251 | "#Step 6: 特征缩放\n", 252 | "from sklearn.preprocessing import StandardScaler\n", 253 | "sc_X = StandardScaler()\n", 254 | "X_train = sc_X.fit_transform(X_train)\n", 255 | "X_test = sc_X.transform(X_test)\n", 256 | "print(\"---------------------\")\n", 257 | "print(\"Step 6: Feature Scaling\")\n", 258 | "print(\"X_train\")\n", 259 | "print(X_train)\n", 260 | "print(\"X_test\")\n", 261 | "print(X_test)" 262 | ] 263 | }, 264 | { 265 | "cell_type": "code", 266 | "execution_count": null, 267 | "metadata": {}, 268 | "outputs": [], 269 | "source": [] 270 | } 271 | ], 272 | "metadata": { 273 | "kernelspec": { 274 | "display_name": "Python 3", 275 | "language": "python", 276 | "name": "python3" 277 | }, 278 | "language_info": { 279 | "codemirror_mode": { 280 | "name": "ipython", 281 | "version": 3 282 | }, 283 | "file_extension": ".py", 284 | "mimetype": "text/x-python", 285 | "name": "python", 286 | "nbconvert_exporter": "python", 287 | "pygments_lexer": "ipython3", 288 | "version": "3.7.1" 289 | }, 290 | "widgets": { 291 | "application/vnd.jupyter.widget-state+json": { 292 | "state": {}, 293 | "version_major": 2, 294 | "version_minor": 0 295 | } 296 | } 297 | }, 298 | "nbformat": 4, 299 | "nbformat_minor": 2 300 | } 301 | -------------------------------------------------------------------------------- /homework-04/姚宇.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 32, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "#1: 导入类库\n", 10 | "\n", 11 | "import numpy as np\n", 12 | "import pandas as pd" 13 | ] 14 | }, 15 | { 16 | "cell_type": "code", 17 | "execution_count": 33, 18 | "metadata": {}, 19 | "outputs": [ 20 | { 21 | "name": "stdout", 22 | "output_type": "stream", 23 | "text": [ 24 | "Step 2: Importing dataset\n", 25 | "X\n", 26 | "[['France' 44.0 72000.0]\n", 27 | " ['Spain' 27.0 48000.0]\n", 28 | " ['Germany' 30.0 54000.0]\n", 29 | " ['Spain' 38.0 61000.0]\n", 30 | " ['Germany' 40.0 nan]\n", 31 | " ['France' 35.0 58000.0]\n", 32 | " ['Spain' nan 52000.0]\n", 33 | " ['France' 48.0 79000.0]\n", 34 | " ['Germany' 50.0 83000.0]\n", 35 | " ['France' 37.0 67000.0]]\n", 36 | "Y\n", 37 | "['No' 'Yes' 'No' 'No' 'Yes' 'Yes' 'No' 'Yes' 'No' 'Yes']\n" 38 | ] 39 | } 40 | ], 41 | "source": [ 42 | "dataset = pd.read_csv('Data.csv')\n", 43 | "# 不包括最后一列的所有列\n", 44 | "X = dataset.iloc[ : , :-1].values\n", 45 | "#取最后一列\n", 46 | "Y = dataset.iloc[ : , 3].values\n", 47 | "print(\"Step 2: Importing dataset\")\n", 48 | "print(\"X\")\n", 49 | "print(X)\n", 50 | "print(\"Y\")\n", 51 | "print(Y)" 52 | ] 53 | }, 54 | { 55 | "cell_type": "code", 56 | "execution_count": 34, 57 | "metadata": {}, 58 | "outputs": [ 59 | { 60 | "name": "stdout", 61 | "output_type": "stream", 62 | "text": [ 63 | "---------------------\n", 64 | "Step 3: Handling the missing data\n", 65 | "step2\n", 66 | "X\n", 67 | "[['France' 44.0 72000.0]\n", 68 | " ['Spain' 27.0 48000.0]\n", 69 | " ['Germany' 30.0 54000.0]\n", 70 | " ['Spain' 38.0 61000.0]\n", 71 | " ['Germany' 40.0 63777.77777777778]\n", 72 | " ['France' 35.0 58000.0]\n", 73 | " ['Spain' 38.77777777777778 52000.0]\n", 74 | " ['France' 48.0 79000.0]\n", 75 | " ['Germany' 50.0 83000.0]\n", 76 | " ['France' 37.0 67000.0]]\n" 77 | ] 78 | }, 79 | { 80 | "name": "stderr", 81 | "output_type": "stream", 82 | "text": [ 83 | "D:\\Anaconda3\\envs\\yaoyu\\lib\\site-packages\\sklearn\\utils\\deprecation.py:58: DeprecationWarning: Class Imputer is deprecated; Imputer was deprecated in version 0.20 and will be removed in 0.22. Import impute.SimpleImputer from sklearn instead.\n", 84 | " warnings.warn(msg, category=DeprecationWarning)\n" 85 | ] 86 | } 87 | ], 88 | "source": [ 89 | "#3: 处理缺失的数据\n", 90 | "from sklearn.preprocessing import Imputer\n", 91 | "# axis=0表示按列进行\n", 92 | "imputer = Imputer(missing_values = \"NaN\", strategy = \"mean\", axis = 0)\n", 93 | "imputer = imputer.fit(X[ : , 1:3])\n", 94 | "X[ : , 1:3] = imputer.transform(X[ : , 1:3])\n", 95 | "print(\"---------------------\")\n", 96 | "print(\"Step 3: Handling the missing data\")\n", 97 | "print(\"step2\")\n", 98 | "print(\"X\")\n", 99 | "print(X)" 100 | ] 101 | }, 102 | { 103 | "cell_type": "code", 104 | "execution_count": 35, 105 | "metadata": {}, 106 | "outputs": [ 107 | { 108 | "name": "stdout", 109 | "output_type": "stream", 110 | "text": [ 111 | "---------------------\n", 112 | "Step 4: Encoding categorical data\n", 113 | "X\n", 114 | "[[1.00000000e+00 0.00000000e+00 0.00000000e+00 4.40000000e+01\n", 115 | " 7.20000000e+04]\n", 116 | " [0.00000000e+00 0.00000000e+00 1.00000000e+00 2.70000000e+01\n", 117 | " 4.80000000e+04]\n", 118 | " [0.00000000e+00 1.00000000e+00 0.00000000e+00 3.00000000e+01\n", 119 | " 5.40000000e+04]\n", 120 | " [0.00000000e+00 0.00000000e+00 1.00000000e+00 3.80000000e+01\n", 121 | " 6.10000000e+04]\n", 122 | " [0.00000000e+00 1.00000000e+00 0.00000000e+00 4.00000000e+01\n", 123 | " 6.37777778e+04]\n", 124 | " [1.00000000e+00 0.00000000e+00 0.00000000e+00 3.50000000e+01\n", 125 | " 5.80000000e+04]\n", 126 | " [0.00000000e+00 0.00000000e+00 1.00000000e+00 3.87777778e+01\n", 127 | " 5.20000000e+04]\n", 128 | " [1.00000000e+00 0.00000000e+00 0.00000000e+00 4.80000000e+01\n", 129 | " 7.90000000e+04]\n", 130 | " [0.00000000e+00 1.00000000e+00 0.00000000e+00 5.00000000e+01\n", 131 | " 8.30000000e+04]\n", 132 | " [1.00000000e+00 0.00000000e+00 0.00000000e+00 3.70000000e+01\n", 133 | " 6.70000000e+04]]\n", 134 | "Y\n", 135 | "[0 1 0 0 1 1 0 1 0 1]\n" 136 | ] 137 | }, 138 | { 139 | "name": "stderr", 140 | "output_type": "stream", 141 | "text": [ 142 | "D:\\Anaconda3\\envs\\yaoyu\\lib\\site-packages\\sklearn\\preprocessing\\_encoders.py:371: FutureWarning: The handling of integer data will change in version 0.22. Currently, the categories are determined based on the range [0, max(values)], while in the future they will be determined based on the unique values.\n", 143 | "If you want the future behaviour and silence this warning, you can specify \"categories='auto'\".\n", 144 | "In case you used a LabelEncoder before this OneHotEncoder to convert the categories to integers, then you can now use the OneHotEncoder directly.\n", 145 | " warnings.warn(msg, FutureWarning)\n", 146 | "D:\\Anaconda3\\envs\\yaoyu\\lib\\site-packages\\sklearn\\preprocessing\\_encoders.py:392: DeprecationWarning: The 'categorical_features' keyword is deprecated in version 0.20 and will be removed in 0.22. You can use the ColumnTransformer instead.\n", 147 | " \"use the ColumnTransformer instead.\", DeprecationWarning)\n" 148 | ] 149 | } 150 | ], 151 | "source": [ 152 | "from sklearn.preprocessing import LabelEncoder,OneHotEncoder\n", 153 | "labelencoder_X = LabelEncoder()\n", 154 | "X[ : , 0] = labelencoder_X.fit_transform(X[ : , 0])\n", 155 | "#Creating a dummy variable\n", 156 | "onehotencoder = OneHotEncoder(categorical_features = [0])\n", 157 | "X = onehotencoder.fit_transform(X).toarray()\n", 158 | "labelencoder_Y = LabelEncoder()\n", 159 | "Y = labelencoder_Y.fit_transform(Y)\n", 160 | "print(\"---------------------\")\n", 161 | "print(\"Step 4: Encoding categorical data\")\n", 162 | "print(\"X\")\n", 163 | "print(X)\n", 164 | "print(\"Y\")\n", 165 | "print(Y)" 166 | ] 167 | }, 168 | { 169 | "cell_type": "code", 170 | "execution_count": 36, 171 | "metadata": {}, 172 | "outputs": [ 173 | { 174 | "name": "stdout", 175 | "output_type": "stream", 176 | "text": [ 177 | "---------------------\n", 178 | "Step 5: Splitting the datasets into training sets and Test sets\n", 179 | "X_train\n", 180 | "[[0.00000000e+00 1.00000000e+00 0.00000000e+00 4.00000000e+01\n", 181 | " 6.37777778e+04]\n", 182 | " [1.00000000e+00 0.00000000e+00 0.00000000e+00 3.70000000e+01\n", 183 | " 6.70000000e+04]\n", 184 | " [0.00000000e+00 0.00000000e+00 1.00000000e+00 2.70000000e+01\n", 185 | " 4.80000000e+04]\n", 186 | " [0.00000000e+00 0.00000000e+00 1.00000000e+00 3.87777778e+01\n", 187 | " 5.20000000e+04]\n", 188 | " [1.00000000e+00 0.00000000e+00 0.00000000e+00 4.80000000e+01\n", 189 | " 7.90000000e+04]\n", 190 | " [0.00000000e+00 0.00000000e+00 1.00000000e+00 3.80000000e+01\n", 191 | " 6.10000000e+04]\n", 192 | " [1.00000000e+00 0.00000000e+00 0.00000000e+00 4.40000000e+01\n", 193 | " 7.20000000e+04]\n", 194 | " [1.00000000e+00 0.00000000e+00 0.00000000e+00 3.50000000e+01\n", 195 | " 5.80000000e+04]]\n", 196 | "X_test\n", 197 | "[[0.0e+00 1.0e+00 0.0e+00 3.0e+01 5.4e+04]\n", 198 | " [0.0e+00 1.0e+00 0.0e+00 5.0e+01 8.3e+04]]\n", 199 | "Y_train\n", 200 | "[1 1 1 0 1 0 0 1]\n", 201 | "Y_test\n", 202 | "[0 0]\n" 203 | ] 204 | } 205 | ], 206 | "source": [ 207 | "#Step 5: 切分数据集成训练数据和测试数据\n", 208 | "from sklearn.model_selection import train_test_split\n", 209 | "X_train, X_test, Y_train, Y_test = train_test_split( X , Y , test_size = 0.2, random_state = 0)\n", 210 | "print(\"---------------------\")\n", 211 | "print(\"Step 5: Splitting the datasets into training sets and Test sets\")\n", 212 | "print(\"X_train\")\n", 213 | "print(X_train)\n", 214 | "print(\"X_test\")\n", 215 | "print(X_test)\n", 216 | "print(\"Y_train\")\n", 217 | "print(Y_train)\n", 218 | "print(\"Y_test\")\n", 219 | "print(Y_test)" 220 | ] 221 | }, 222 | { 223 | "cell_type": "code", 224 | "execution_count": 37, 225 | "metadata": {}, 226 | "outputs": [ 227 | { 228 | "name": "stdout", 229 | "output_type": "stream", 230 | "text": [ 231 | "---------------------\n", 232 | "Step 6: Feature Scaling\n", 233 | "X_train\n", 234 | "[[-1. 2.64575131 -0.77459667 0.26306757 0.12381479]\n", 235 | " [ 1. -0.37796447 -0.77459667 -0.25350148 0.46175632]\n", 236 | " [-1. -0.37796447 1.29099445 -1.97539832 -1.53093341]\n", 237 | " [-1. -0.37796447 1.29099445 0.05261351 -1.11141978]\n", 238 | " [ 1. -0.37796447 -0.77459667 1.64058505 1.7202972 ]\n", 239 | " [-1. -0.37796447 1.29099445 -0.0813118 -0.16751412]\n", 240 | " [ 1. -0.37796447 -0.77459667 0.95182631 0.98614835]\n", 241 | " [ 1. -0.37796447 -0.77459667 -0.59788085 -0.48214934]]\n", 242 | "X_test\n", 243 | "[[-1. 2.64575131 -0.77459667 -1.45882927 -0.90166297]\n", 244 | " [-1. 2.64575131 -0.77459667 1.98496442 2.13981082]]\n" 245 | ] 246 | } 247 | ], 248 | "source": [ 249 | "#Step 6: 特征缩放\n", 250 | "from sklearn.preprocessing import StandardScaler\n", 251 | "sc_X = StandardScaler()\n", 252 | "X_train = sc_X.fit_transform(X_train)\n", 253 | "X_test = sc_X.transform(X_test)\n", 254 | "print(\"---------------------\")\n", 255 | "print(\"Step 6: Feature Scaling\")\n", 256 | "print(\"X_train\")\n", 257 | "print(X_train)\n", 258 | "print(\"X_test\")\n", 259 | "print(X_test)" 260 | ] 261 | }, 262 | { 263 | "cell_type": "code", 264 | "execution_count": null, 265 | "metadata": {}, 266 | "outputs": [], 267 | "source": [] 268 | }, 269 | { 270 | "cell_type": "code", 271 | "execution_count": null, 272 | "metadata": {}, 273 | "outputs": [], 274 | "source": [] 275 | } 276 | ], 277 | "metadata": { 278 | "kernelspec": { 279 | "display_name": "Python 3", 280 | "language": "python", 281 | "name": "python3" 282 | }, 283 | "language_info": { 284 | "codemirror_mode": { 285 | "name": "ipython", 286 | "version": 3 287 | }, 288 | "file_extension": ".py", 289 | "mimetype": "text/x-python", 290 | "name": "python", 291 | "nbconvert_exporter": "python", 292 | "pygments_lexer": "ipython3", 293 | "version": "3.6.8" 294 | } 295 | }, 296 | "nbformat": 4, 297 | "nbformat_minor": 2 298 | } 299 | -------------------------------------------------------------------------------- /homework-04/张博.ipynb: -------------------------------------------------------------------------------- 1 | #Day 1: Data Prepocessing 2 | 3 | #Step 1: Importing the libraries 4 | import numpy as np 5 | import pandas as pd 6 | 7 | #Step 2: Importing dataset 8 | dataset = pd.read_csv('D:\datasets\Data.csv') 9 | X = dataset.iloc[ : , :-1].values 10 | Y = dataset.iloc[ : , 3].values 11 | print("Step 2: Importing dataset") 12 | print("X") 13 | print(X) 14 | print("Y") 15 | print(Y) 16 | 17 | #Step 3: Handling the missing data 18 | from sklearn.preprocessing import Imputer 19 | imputer = Imputer(missing_values = "NaN", strategy = "mean", axis = 0) 20 | imputer = imputer.fit(X[ : , 1:3]) 21 | X[ : , 1:3] = imputer.transform(X[ : , 1:3]) 22 | print("---------------------") 23 | print("Step 3: Handling the missing data") 24 | print("step2") 25 | print("X") 26 | print(X) 27 | 28 | #Step 4: Encoding categorical data 29 | from sklearn.preprocessing import LabelEncoder, OneHotEncoder 30 | labelencoder_X = LabelEncoder() 31 | X[ : , 0] = labelencoder_X.fit_transform(X[ : , 0]) 32 | #Creating a dummy variable 33 | onehotencoder = OneHotEncoder(categorical_features = [0]) 34 | X = onehotencoder.fit_transform(X).toarray() 35 | labelencoder_Y = LabelEncoder() 36 | Y = labelencoder_Y.fit_transform(Y) 37 | print("---------------------") 38 | print("Step 4: Encoding categorical data") 39 | print("X") 40 | print(X) 41 | print("Y") 42 | print(Y) 43 | 44 | #Step 5: Splitting the datasets into training sets and Test sets 45 | from sklearn.model_selection import train_test_split 46 | X_train, X_test, Y_train, Y_test = train_test_split( X , Y , test_size = 0.2, random_state = 0) 47 | print("---------------------") 48 | print("Step 5: Splitting the datasets into training sets and Test sets") 49 | print("X_train") 50 | print(X_train) 51 | print("X_test") 52 | print(X_test) 53 | print("Y_train") 54 | print(Y_train) 55 | print("Y_test") 56 | print(Y_test) 57 | 58 | #Step 6: Feature Scaling 59 | from sklearn.preprocessing import StandardScaler 60 | sc_X = StandardScaler() 61 | X_train = sc_X.fit_transform(X_train) 62 | X_test = sc_X.transform(X_test) 63 | print("---------------------") 64 | print("Step 6: Feature Scaling") 65 | print("X_train") 66 | print(X_train) 67 | print("X_test") 68 | print(X_test) 69 | -------------------------------------------------------------------------------- /homework-04/张晏铭.ipynb: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Created on Sun Mar 24 20:53:21 2019 4 | 5 | @author: 38653 6 | """ 7 | 8 | import numpy as np 9 | import pandas as pd 10 | dataset = pd.read_csv('Data.csv')//读取csv文件 11 | X = dataset.iloc[ : , :-1].values//.iloc[行,列] 12 | Y = dataset.iloc[ : , 3].values // : 全部行 or 列;[a]第a行 or 列 13 | // [a,b,c]第 a,b,c 行 or 列 14 | from sklearn.preprocessing import Imputer 15 | imputer = Imputer(missing_values = "NaN", strategy = "mean", axis = 0) 16 | imputer = imputer.fit(X[ : , 1:3]) 17 | X[ : , 1:3] = imputer.transform(X[ : , 1:3]) 18 | from sklearn.preprocessing import LabelEncoder, OneHotEncoder 19 | labelencoder_X = LabelEncoder() 20 | X[ : , 0] = labelencoder_X.fit_transform(X[ : , 0]) 21 | onehotencoder = OneHotEncoder(categorical_features = [0]) 22 | X = onehotencoder.fit_transform(X).toarray() 23 | labelencoder_Y = LabelEncoder() 24 | Y = labelencoder_Y.fit_transform(Y) 25 | from sklearn.model_selection import train_test_split 26 | X_train, X_test, Y_train, Y_test = train_test_split( X , Y , test_size = 0.2, random_state = 0) 27 | from sklearn.preprocessing import StandardScaler 28 | sc_X = StandardScaler() 29 | X_train = sc_X.fit_transform(X_train) 30 | X_test = sc_X.transform(X_test) -------------------------------------------------------------------------------- /homework-04/张泷玲.ipynb: -------------------------------------------------------------------------------- 1 | 2 | import numpy as np 3 | import pandas as pd 4 | 5 | dataset = pd.read_csv('data.csv') 6 | X = dataset.iloc[ : , :-1].values 7 | Y = dataset.iloc[ : , : 3].values 8 | 9 | from sklearn.perprocessing import Imputer 10 | imputer = Imputer(missing_values = "NAN",strategy = "mean", axis = 0) 11 | imputer = Imputer.fit(X[ : , 1 :3]) 12 | X[ : , 1:3] = imputer.transform(X[ : , 1:3]) 13 | 14 | from sklearn.perprocessing import LabelEncoder , OneHotEncoder 15 | labelencoder_X = LabelEncoder() 16 | X[ : , 0]=labelencoder_X.fit_transform(X[ : , 0]) 17 | 18 | onehotencoder = OneHotEncoder(categorical_features= [0] ) 19 | X = onehotencoder.fix_transform(X).toarray() 20 | labelencoder_Y = LabelEncoder() 21 | Y = labelencoder_Y.fit_transform(Y) 22 | 23 | from sklearn.model_seletion import train_test_split 24 | X_train,X_test,Y_train,Y_test = train_test_split(X , Y ,test_size=0.2,random_state=0) 25 | 26 | from sklearn.perprocessing import StandardScaler 27 | sc_X = StandardScaler() 28 | X_train = sc_X.fit_transform(X_train) 29 | X_test = sc_X.transform(X_test) 30 | 31 | -------------------------------------------------------------------------------- /homework-04/戴振.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 24, 6 | "metadata": { 7 | "collapsed": true 8 | }, 9 | "outputs": [], 10 | "source": [ 11 | "import numpy as np\n", 12 | "import pandas as pd" 13 | ] 14 | }, 15 | { 16 | "cell_type": "code", 17 | "execution_count": 25, 18 | "metadata": {}, 19 | "outputs": [ 20 | { 21 | "name": "stdout", 22 | "output_type": "stream", 23 | "text": [ 24 | "Step 2:Importing dataset\n", 25 | "X\n", 26 | "[['France' 44.0 72000.0]\n", 27 | " ['Spain' 27.0 48000.0]\n", 28 | " ['Germany' 30.0 54000.0]\n", 29 | " ['Spain' 38.0 61000.0]\n", 30 | " ['Germany' 40.0 nan]\n", 31 | " ['France' 35.0 58000.0]\n", 32 | " ['Spain' nan 52000.0]\n", 33 | " ['France' 48.0 79000.0]\n", 34 | " ['Germany' 50.0 83000.0]\n", 35 | " ['France' 37.0 67000.0]]\n", 36 | "Y\n", 37 | "['No' 'Yes' 'No' 'No' 'Yes' 'Yes' 'No' 'Yes' 'No' 'Yes']\n" 38 | ] 39 | } 40 | ], 41 | "source": [ 42 | "dataset=pd.read_csv('Data.csv')\n", 43 | "#iloc方法是对数据行列进行操作,中括号逗号左边是行取全部,使用冒号取左闭右开区间的范围\n", 44 | "#逗号右边的冒号右边-1是列取除最后一列的所有列,此处-1可用3替换,即数据只有4列,只取到前三列\n", 45 | "X=dataset.iloc[ : , :-1].values \n", 46 | "#取第四列数据\n", 47 | "Y=dataset.iloc[ : ,3].values\n", 48 | "print('Step 2:Importing dataset')\n", 49 | "print('X')\n", 50 | "print(X)\n", 51 | "print('Y')\n", 52 | "print(Y)" 53 | ] 54 | }, 55 | { 56 | "cell_type": "code", 57 | "execution_count": 26, 58 | "metadata": {}, 59 | "outputs": [ 60 | { 61 | "name": "stdout", 62 | "output_type": "stream", 63 | "text": [ 64 | "****************\n", 65 | "Step 3:Handling the missing data\n", 66 | "step2\n", 67 | "X\n", 68 | "[['France' 44.0 72000.0]\n", 69 | " ['Spain' 27.0 48000.0]\n", 70 | " ['Germany' 30.0 54000.0]\n", 71 | " ['Spain' 38.0 61000.0]\n", 72 | " ['Germany' 40.0 63777.77777777778]\n", 73 | " ['France' 35.0 58000.0]\n", 74 | " ['Spain' 38.77777777777778 52000.0]\n", 75 | " ['France' 48.0 79000.0]\n", 76 | " ['Germany' 50.0 83000.0]\n", 77 | " ['France' 37.0 67000.0]]\n" 78 | ] 79 | } 80 | ], 81 | "source": [ 82 | "from sklearn.preprocessing import Imputer\n", 83 | "imputer = Imputer(missing_values = \"NaN\", strategy = \"mean\",axis=0)\n", 84 | "imputer = imputer.fit(X[ : ,1:3])\n", 85 | "X[ : , 1:3] = imputer.transform(X[ : , 1:3])\n", 86 | "print(\"****************\")\n", 87 | "print(\"Step 3:Handling the missing data\")\n", 88 | "print(\"step2\")\n", 89 | "print(\"X\")\n", 90 | "print(X)" 91 | ] 92 | }, 93 | { 94 | "cell_type": "code", 95 | "execution_count": 27, 96 | "metadata": {}, 97 | "outputs": [ 98 | { 99 | "name": "stdout", 100 | "output_type": "stream", 101 | "text": [ 102 | "**********************\n", 103 | "Step 4: Encoding categorical data\n", 104 | "X\n", 105 | "[[ 1.00000000e+00 0.00000000e+00 0.00000000e+00 4.40000000e+01\n", 106 | " 7.20000000e+04]\n", 107 | " [ 0.00000000e+00 0.00000000e+00 1.00000000e+00 2.70000000e+01\n", 108 | " 4.80000000e+04]\n", 109 | " [ 0.00000000e+00 1.00000000e+00 0.00000000e+00 3.00000000e+01\n", 110 | " 5.40000000e+04]\n", 111 | " [ 0.00000000e+00 0.00000000e+00 1.00000000e+00 3.80000000e+01\n", 112 | " 6.10000000e+04]\n", 113 | " [ 0.00000000e+00 1.00000000e+00 0.00000000e+00 4.00000000e+01\n", 114 | " 6.37777778e+04]\n", 115 | " [ 1.00000000e+00 0.00000000e+00 0.00000000e+00 3.50000000e+01\n", 116 | " 5.80000000e+04]\n", 117 | " [ 0.00000000e+00 0.00000000e+00 1.00000000e+00 3.87777778e+01\n", 118 | " 5.20000000e+04]\n", 119 | " [ 1.00000000e+00 0.00000000e+00 0.00000000e+00 4.80000000e+01\n", 120 | " 7.90000000e+04]\n", 121 | " [ 0.00000000e+00 1.00000000e+00 0.00000000e+00 5.00000000e+01\n", 122 | " 8.30000000e+04]\n", 123 | " [ 1.00000000e+00 0.00000000e+00 0.00000000e+00 3.70000000e+01\n", 124 | " 6.70000000e+04]]\n", 125 | "Y\n", 126 | "[0 1 0 0 1 1 0 1 0 1]\n" 127 | ] 128 | } 129 | ], 130 | "source": [ 131 | "from sklearn.preprocessing import LabelEncoder, OneHotEncoder\n", 132 | "labelencoder_X=LabelEncoder()\n", 133 | "X[ : , 0]=labelencoder_X.fit_transform(X[ : ,0])\n", 134 | "onehotencoder = OneHotEncoder(categorical_features = [0])\n", 135 | "X = onehotencoder.fit_transform(X).toarray()\n", 136 | "labelencoder_Y = LabelEncoder()\n", 137 | "Y = labelencoder_Y.fit_transform(Y)\n", 138 | "print(\"**********************\")\n", 139 | "print(\"Step 4: Encoding categorical data\")\n", 140 | "print(\"X\")\n", 141 | "print(X)\n", 142 | "print(\"Y\")\n", 143 | "print(Y)" 144 | ] 145 | }, 146 | { 147 | "cell_type": "code", 148 | "execution_count": 28, 149 | "metadata": {}, 150 | "outputs": [ 151 | { 152 | "name": "stdout", 153 | "output_type": "stream", 154 | "text": [ 155 | "---------------------\n", 156 | "Step 5: Splitting the datasets into training sets and Test sets\n", 157 | "X_train\n", 158 | "[[ 0.00000000e+00 1.00000000e+00 0.00000000e+00 4.00000000e+01\n", 159 | " 6.37777778e+04]\n", 160 | " [ 1.00000000e+00 0.00000000e+00 0.00000000e+00 3.70000000e+01\n", 161 | " 6.70000000e+04]\n", 162 | " [ 0.00000000e+00 0.00000000e+00 1.00000000e+00 2.70000000e+01\n", 163 | " 4.80000000e+04]\n", 164 | " [ 0.00000000e+00 0.00000000e+00 1.00000000e+00 3.87777778e+01\n", 165 | " 5.20000000e+04]\n", 166 | " [ 1.00000000e+00 0.00000000e+00 0.00000000e+00 4.80000000e+01\n", 167 | " 7.90000000e+04]\n", 168 | " [ 0.00000000e+00 0.00000000e+00 1.00000000e+00 3.80000000e+01\n", 169 | " 6.10000000e+04]\n", 170 | " [ 1.00000000e+00 0.00000000e+00 0.00000000e+00 4.40000000e+01\n", 171 | " 7.20000000e+04]\n", 172 | " [ 1.00000000e+00 0.00000000e+00 0.00000000e+00 3.50000000e+01\n", 173 | " 5.80000000e+04]]\n", 174 | "X_test\n", 175 | "[[ 0.00000000e+00 1.00000000e+00 0.00000000e+00 3.00000000e+01\n", 176 | " 5.40000000e+04]\n", 177 | " [ 0.00000000e+00 1.00000000e+00 0.00000000e+00 5.00000000e+01\n", 178 | " 8.30000000e+04]]\n", 179 | "Y_train\n", 180 | "[1 1 1 0 1 0 0 1]\n", 181 | "Y_test\n", 182 | "[0 0]\n" 183 | ] 184 | } 185 | ], 186 | "source": [ 187 | "from sklearn.model_selection import train_test_split\n", 188 | "X_train, X_test, Y_train, Y_test = train_test_split( X , Y , test_size = 0.2, random_state = 0)\n", 189 | "print(\"---------------------\")\n", 190 | "print(\"Step 5: Splitting the datasets into training sets and Test sets\")\n", 191 | "print(\"X_train\")\n", 192 | "print(X_train)\n", 193 | "print(\"X_test\")\n", 194 | "print(X_test)\n", 195 | "print(\"Y_train\")\n", 196 | "print(Y_train)\n", 197 | "print(\"Y_test\")\n", 198 | "print(Y_test)" 199 | ] 200 | }, 201 | { 202 | "cell_type": "code", 203 | "execution_count": 29, 204 | "metadata": {}, 205 | "outputs": [ 206 | { 207 | "name": "stdout", 208 | "output_type": "stream", 209 | "text": [ 210 | "---------------------\n", 211 | "Step 6: Feature Scaling\n", 212 | "X_train\n", 213 | "[[-1. 2.64575131 -0.77459667 0.26306757 0.12381479]\n", 214 | " [ 1. -0.37796447 -0.77459667 -0.25350148 0.46175632]\n", 215 | " [-1. -0.37796447 1.29099445 -1.97539832 -1.53093341]\n", 216 | " [-1. -0.37796447 1.29099445 0.05261351 -1.11141978]\n", 217 | " [ 1. -0.37796447 -0.77459667 1.64058505 1.7202972 ]\n", 218 | " [-1. -0.37796447 1.29099445 -0.0813118 -0.16751412]\n", 219 | " [ 1. -0.37796447 -0.77459667 0.95182631 0.98614835]\n", 220 | " [ 1. -0.37796447 -0.77459667 -0.59788085 -0.48214934]]\n", 221 | "X_test\n", 222 | "[[-1. 2.64575131 -0.77459667 -1.45882927 -0.90166297]\n", 223 | " [-1. 2.64575131 -0.77459667 1.98496442 2.13981082]]\n" 224 | ] 225 | } 226 | ], 227 | "source": [ 228 | "from sklearn.preprocessing import StandardScaler\n", 229 | "sc_X = StandardScaler()\n", 230 | "X_train = sc_X.fit_transform(X_train)\n", 231 | "X_test = sc_X.transform(X_test)\n", 232 | "print(\"---------------------\")\n", 233 | "print(\"Step 6: Feature Scaling\")\n", 234 | "print(\"X_train\")\n", 235 | "print(X_train)\n", 236 | "print(\"X_test\")\n", 237 | "print(X_test)" 238 | ] 239 | }, 240 | { 241 | "cell_type": "code", 242 | "execution_count": null, 243 | "metadata": { 244 | "collapsed": true 245 | }, 246 | "outputs": [], 247 | "source": [] 248 | }, 249 | { 250 | "cell_type": "code", 251 | "execution_count": null, 252 | "metadata": { 253 | "collapsed": true 254 | }, 255 | "outputs": [], 256 | "source": [] 257 | }, 258 | { 259 | "cell_type": "code", 260 | "execution_count": null, 261 | "metadata": { 262 | "collapsed": true 263 | }, 264 | "outputs": [], 265 | "source": [] 266 | } 267 | ], 268 | "metadata": { 269 | "kernelspec": { 270 | "display_name": "Python 3", 271 | "language": "python", 272 | "name": "python3" 273 | }, 274 | "language_info": { 275 | "codemirror_mode": { 276 | "name": "ipython", 277 | "version": 3 278 | }, 279 | "file_extension": ".py", 280 | "mimetype": "text/x-python", 281 | "name": "python", 282 | "nbconvert_exporter": "python", 283 | "pygments_lexer": "ipython3", 284 | "version": "3.6.2" 285 | } 286 | }, 287 | "nbformat": 4, 288 | "nbformat_minor": 2 289 | } 290 | -------------------------------------------------------------------------------- /homework-04/房增林.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 5, 6 | "metadata": {}, 7 | "outputs": [ 8 | { 9 | "ename": "FileNotFoundError", 10 | "evalue": "File b'100-Days-Of-ML-Code/datasets/Data.csv' does not exist", 11 | "output_type": "error", 12 | "traceback": [ 13 | "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m", 14 | "\u001b[1;31mFileNotFoundError\u001b[0m Traceback (most recent call last)", 15 | "\u001b[1;32m\u001b[0m in \u001b[0;36m\u001b[1;34m\u001b[0m\n\u001b[0;32m 2\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 3\u001b[0m \u001b[1;31m#Step 2: Importing dataset\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m----> 4\u001b[1;33m \u001b[0mdataset\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mpd\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mread_csv\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;34m'100-Days-Of-ML-Code/datasets/Data.csv'\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 5\u001b[0m \u001b[0mdataset\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mpd\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mread_csv\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;34m'100-Days-Of-ML-Code/datasets/Data.csv'\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 6\u001b[0m \u001b[0mX\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mdataset\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0miloc\u001b[0m\u001b[1;33m[\u001b[0m \u001b[1;33m:\u001b[0m \u001b[1;33m,\u001b[0m \u001b[1;33m:\u001b[0m\u001b[1;33m-\u001b[0m\u001b[1;36m1\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mvalues\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", 16 | "\u001b[1;32mD:\\Anaconda\\lib\\site-packages\\pandas\\io\\parsers.py\u001b[0m in \u001b[0;36mparser_f\u001b[1;34m(filepath_or_buffer, sep, delimiter, header, names, index_col, usecols, squeeze, prefix, mangle_dupe_cols, dtype, engine, converters, true_values, false_values, skipinitialspace, skiprows, nrows, na_values, keep_default_na, na_filter, verbose, skip_blank_lines, parse_dates, infer_datetime_format, keep_date_col, date_parser, dayfirst, iterator, chunksize, compression, thousands, decimal, lineterminator, quotechar, quoting, escapechar, comment, encoding, dialect, tupleize_cols, error_bad_lines, warn_bad_lines, skipfooter, doublequote, delim_whitespace, low_memory, memory_map, float_precision)\u001b[0m\n\u001b[0;32m 676\u001b[0m skip_blank_lines=skip_blank_lines)\n\u001b[0;32m 677\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 678\u001b[1;33m \u001b[1;32mreturn\u001b[0m \u001b[0m_read\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mfilepath_or_buffer\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mkwds\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 679\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 680\u001b[0m \u001b[0mparser_f\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m__name__\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mname\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", 17 | "\u001b[1;32mD:\\Anaconda\\lib\\site-packages\\pandas\\io\\parsers.py\u001b[0m in \u001b[0;36m_read\u001b[1;34m(filepath_or_buffer, kwds)\u001b[0m\n\u001b[0;32m 438\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 439\u001b[0m \u001b[1;31m# Create the parser.\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 440\u001b[1;33m \u001b[0mparser\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mTextFileReader\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mfilepath_or_buffer\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;33m**\u001b[0m\u001b[0mkwds\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 441\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 442\u001b[0m \u001b[1;32mif\u001b[0m \u001b[0mchunksize\u001b[0m \u001b[1;32mor\u001b[0m \u001b[0miterator\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", 18 | "\u001b[1;32mD:\\Anaconda\\lib\\site-packages\\pandas\\io\\parsers.py\u001b[0m in \u001b[0;36m__init__\u001b[1;34m(self, f, engine, **kwds)\u001b[0m\n\u001b[0;32m 785\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0moptions\u001b[0m\u001b[1;33m[\u001b[0m\u001b[1;34m'has_index_names'\u001b[0m\u001b[1;33m]\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mkwds\u001b[0m\u001b[1;33m[\u001b[0m\u001b[1;34m'has_index_names'\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 786\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 787\u001b[1;33m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_make_engine\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mengine\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 788\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 789\u001b[0m \u001b[1;32mdef\u001b[0m \u001b[0mclose\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", 19 | "\u001b[1;32mD:\\Anaconda\\lib\\site-packages\\pandas\\io\\parsers.py\u001b[0m in \u001b[0;36m_make_engine\u001b[1;34m(self, engine)\u001b[0m\n\u001b[0;32m 1012\u001b[0m \u001b[1;32mdef\u001b[0m \u001b[0m_make_engine\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mengine\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;34m'c'\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 1013\u001b[0m \u001b[1;32mif\u001b[0m \u001b[0mengine\u001b[0m \u001b[1;33m==\u001b[0m \u001b[1;34m'c'\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m-> 1014\u001b[1;33m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_engine\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mCParserWrapper\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mf\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;33m**\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0moptions\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 1015\u001b[0m \u001b[1;32melse\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 1016\u001b[0m \u001b[1;32mif\u001b[0m \u001b[0mengine\u001b[0m \u001b[1;33m==\u001b[0m \u001b[1;34m'python'\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", 20 | "\u001b[1;32mD:\\Anaconda\\lib\\site-packages\\pandas\\io\\parsers.py\u001b[0m in \u001b[0;36m__init__\u001b[1;34m(self, src, **kwds)\u001b[0m\n\u001b[0;32m 1706\u001b[0m \u001b[0mkwds\u001b[0m\u001b[1;33m[\u001b[0m\u001b[1;34m'usecols'\u001b[0m\u001b[1;33m]\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0musecols\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 1707\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m-> 1708\u001b[1;33m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_reader\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mparsers\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mTextReader\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0msrc\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;33m**\u001b[0m\u001b[0mkwds\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 1709\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 1710\u001b[0m \u001b[0mpassed_names\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mnames\u001b[0m \u001b[1;32mis\u001b[0m \u001b[1;32mNone\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", 21 | "\u001b[1;32mpandas\\_libs\\parsers.pyx\u001b[0m in \u001b[0;36mpandas._libs.parsers.TextReader.__cinit__\u001b[1;34m()\u001b[0m\n", 22 | "\u001b[1;32mpandas\\_libs\\parsers.pyx\u001b[0m in \u001b[0;36mpandas._libs.parsers.TextReader._setup_parser_source\u001b[1;34m()\u001b[0m\n", 23 | "\u001b[1;31mFileNotFoundError\u001b[0m: File b'100-Days-Of-ML-Code/datasets/Data.csv' does not exist" 24 | ] 25 | } 26 | ], 27 | "source": [ 28 | "import pandas as pd\n", 29 | "dataset = pd.read_csv('100-Days-Of-ML-Code/datasets/Data.csv')\n", 30 | "dataset = pd.read_csv('100-Days-Of-ML-Code/datasets/Data.csv')\n", 31 | "X = dataset.iloc[ : , :-1].values\n", 32 | "Y = dataset.iloc[ : , 3].values\n", 33 | "print(\"Step 2: Importing dataset\")\n", 34 | "print(\"X\")\n", 35 | "print(X)\n", 36 | "print(\"Y\")\n", 37 | "print(Y)\n", 38 | "from sklearn.preprocessing import Imputer\n", 39 | "imputer = Imputer(missing_values = \"NaN\", strategy = \"mean\", axis = 0)\n", 40 | "imputer = imputer.fit(X[ : , 1:3])\n", 41 | "X[ : , 1:3] = imputer.transform(X[ : , 1:3])\n", 42 | "print(\"---------------------\")\n", 43 | "print(\"Step 3: Handling the missing data\")\n", 44 | "print(\"step2\")\n", 45 | "print(\"X\")\n", 46 | "print(X)\n", 47 | "from sklearn.preprocessing import LabelEncoder, OneHotEncoder\n", 48 | "labelencoder_X = LabelEncoder()\n", 49 | "X[ : , 0] = labelencoder_X.fit_transform(X[ : , 0])\n", 50 | "#Creating a dummy variable\n", 51 | "onehotencoder = OneHotEncoder(categorical_features = [0])\n", 52 | "X = onehotencoder.fit_transform(X).toarray()\n", 53 | "labelencoder_Y = LabelEncoder()\n", 54 | "Y = labelencoder_Y.fit_transform(Y)\n", 55 | "print(\"---------------------\")\n", 56 | "print(\"Step 4: Encoding categorical data\")\n", 57 | "print(\"X\") \n", 58 | "print(X)\n", 59 | "print(\"Y\")\n", 60 | "print(Y)\n", 61 | "from sklearn.model_selection import train_test_split\n", 62 | "X_train, X_test, Y_train, Y_test = train_test_split( X , Y , test_size = 0.2, random_state = 0)\n", 63 | "print(\"---------------------\")\n", 64 | "print(\"Step 5: Splitting the datasets into training sets and Test sets\")\n", 65 | "print(\"X_train\")\n", 66 | "print(X_train)\n", 67 | "print(\"X_test\")\n", 68 | "print(X_test)\n", 69 | "print(\"Y_train\")\n", 70 | "print(Y_train)\n", 71 | "print(\"Y_test\")\n", 72 | "print(Y_test)\n", 73 | "from sklearn.preprocessing import StandardScaler\n", 74 | "sc_X = StandardScaler()\n", 75 | "X_train = sc_X.fit_transform(X_train)\n", 76 | "X_test = sc_X.transform(X_test)\n", 77 | "print(\"---------------------\")\n", 78 | "print(\"Step 6: Feature Scaling\")\n", 79 | "print(\"X_train\")\n", 80 | "print(X_train)\n", 81 | "print(\"X_test\")\n", 82 | "print(X_test)" 83 | ] 84 | }, 85 | { 86 | "cell_type": "code", 87 | "execution_count": null, 88 | "metadata": {}, 89 | "outputs": [], 90 | "source": [] 91 | }, 92 | { 93 | "cell_type": "code", 94 | "execution_count": null, 95 | "metadata": {}, 96 | "outputs": [], 97 | "source": [] 98 | }, 99 | { 100 | "cell_type": "code", 101 | "execution_count": null, 102 | "metadata": {}, 103 | "outputs": [], 104 | "source": [] 105 | } 106 | ], 107 | "metadata": { 108 | "kernelspec": { 109 | "display_name": "Python 3", 110 | "language": "python", 111 | "name": "python3" 112 | }, 113 | "language_info": { 114 | "codemirror_mode": { 115 | "name": "ipython", 116 | "version": 3 117 | }, 118 | "file_extension": ".py", 119 | "mimetype": "text/x-python", 120 | "name": "python", 121 | "nbconvert_exporter": "python", 122 | "pygments_lexer": "ipython3", 123 | "version": "3.7.1" 124 | } 125 | }, 126 | "nbformat": 4, 127 | "nbformat_minor": 2 128 | } 129 | -------------------------------------------------------------------------------- /homework-04/曲礼阳.ipynb: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import pandas as pd 3 | 4 | dataset = pd.read_csv('Data.csv') 5 | X = dataset.iloc[ : , :-1].values 6 | Y = dataset.iloc[ : , 3].values 7 | 8 | from sklearn.preprocessing import Imputer 9 | imputer = Imputer(missing_values = "NaN", strategy = "mean", axis = 0) 10 | imputer = imputer.fit(X[ : , 1:3]) 11 | X[ : , 1:3] = imputer.transform(X[ : , 1:3]) 12 | 13 | from sklearn.preprocessing import LabelEncoder, OneHotEncoder 14 | labelencoder_X = LabelEncoder() 15 | X[ : , 0] = labelencoder_X.fit_transform(X[ : , 0]) 16 | 17 | onehotencoder = OneHotEncoder(categorical_features = [0]) 18 | X = onehotencoder.fit_transform(X).toarray() 19 | labelencoder_Y = LabelEncoder() 20 | Y = labelencoder_Y.fit_transform(Y) 21 | 22 | from sklearn.cross_validation import train_test_split 23 | X_train, X_test, Y_train, Y_test = train_test_split( X , Y , test_size = 0.2, random_state = 0) 24 | 25 | from sklearn.preprocessing import StandardScaler 26 | sc_X = StandardScaler() 27 | X_train = sc_X.fit_transform(X_train) 28 | X_test = sc_X.fit_transform(X_test) 29 | -------------------------------------------------------------------------------- /homework-04/机器学习第一课.pptx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/heilongjianguniversity/AI-Team/6710f4383b5c33e1dc466112009fba1c05e8069b/homework-04/机器学习第一课.pptx -------------------------------------------------------------------------------- /homework-04/李校宇.ipynb: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import pandas as pd 3 | 4 | dataset=pd.read_csv('Data.csv') 5 | X=dataset.iloc[ : , :-1].values 6 | Y=dataset.iloc[ : ,3].values 7 | print('Step 2:Importing dataset') 8 | print('X') 9 | print(X) 10 | print('Y') 11 | print(Y) 12 | 13 | from sklearn.preprocessing import Imputer 14 | imputer = Imputer(missing_values = "NaN", strategy = "mean",axis=0) 15 | imputer = imputer.fit(X[ : ,1:3]) 16 | X[ : , 1:3] = imputer.transform(X[ : , 1:3]) 17 | print("****************") 18 | print("Step 3:Handling the missing data") 19 | print("step2") 20 | print("X") 21 | print(X) 22 | 23 | from sklearn.preprocessing import LabelEncoder, OneHotEncoder 24 | labelencoder_X=LabelEncoder() 25 | X[ : , 0]=labelencoder_X.fit_transform(X[ : ,0]) 26 | onehotencoder = OneHotEncoder(categorical_features = [0]) 27 | X = onehotencoder.fit_transform(X).toarray() 28 | labelencoder_Y = LabelEncoder() 29 | Y = labelencoder_Y.fit_transform(Y) 30 | print("**********************") 31 | print("Step 4: Encoding categorical data") 32 | print("X") 33 | print(X) 34 | print("Y") 35 | print(Y) 36 | 37 | from sklearn.model_selection import train_test_split 38 | X_train, X_test, Y_train, Y_test = train_test_split( X , Y , test_size = 0.2, random_state = 0) 39 | print("---------------------") 40 | print("Step 5: Splitting the datasets into training sets and Test sets") 41 | print("X_train") 42 | print(X_train) 43 | print("X_test") 44 | print(X_test) 45 | print("Y_train") 46 | print(Y_train) 47 | print("Y_test") 48 | print(Y_test) 49 | 50 | from sklearn.preprocessing import StandardScaler 51 | sc_X = StandardScaler() 52 | X_train = sc_X.fit_transform(X_train) 53 | X_test = sc_X.transform(X_test) 54 | print("---------------------") 55 | print("Step 6: Feature Scaling") 56 | print("X_train") 57 | print(X_train) 58 | print("X_test") 59 | print(X_test) 60 | -------------------------------------------------------------------------------- /homework-04/胡明玥.ipynb: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import pandas as pd 3 | 4 | dataset = pd.read_csv('Data.csv')//读取csv文件 5 | X = dataset.iloc[ : , :-1].values//.iloc[行,列] 6 | Y = dataset.iloc[ : , 3].values // : 全部行 or 列;[a]第a行 or 列 7 | // [a,b,c]第 a,b,c 行 or 列 8 | 9 | from sklearn.preprocessing import Imputer 10 | imputer = Imputer(missing_values = "NaN", strategy = "mean", axis = 0) 11 | imputer = imputer.fit(X[ : , 1:3]) 12 | X[ : , 1:3] = imputer.transform(X[ : , 1:3]) 13 | 14 | from sklearn.preprocessing import LabelEncoder, OneHotEncoder 15 | labelencoder_X = LabelEncoder() 16 | X[ : , 0] = labelencoder_X.fit_transform(X[ : , 0]) 17 | 18 | onehotencoder = OneHotEncoder(categorical_features = [0]) 19 | X = onehotencoder.fit_transform(X).toarray() 20 | labelencoder_Y = LabelEncoder() 21 | Y = labelencoder_Y.fit_transform(Y) 22 | 23 | from sklearn.model_selection import train_test_split 24 | X_train, X_test, Y_train, Y_test = train_test_split( X , Y , test_size = 0.2, random_state = 0) 25 | 26 | from sklearn.preprocessing import StandardScaler 27 | sc_X = StandardScaler() 28 | X_train = sc_X.fit_transform(X_train) 29 | X_test = sc_X.transform(X_test) 30 | -------------------------------------------------------------------------------- /homework-04/苏峥.ipynb: -------------------------------------------------------------------------------- 1 | #Day 1: Data Prepocessing 2 | 3 | #Step 1: Importing the libraries 4 | import numpy as np 5 | import pandas as pd 6 | 7 | #Step 2: Importing dataset 8 | dataset = pd.read_csv('../datasets/Data.csv') 9 | X = dataset.iloc[ : , :-1].values 10 | Y = dataset.iloc[ : , 3].values 11 | print("Step 2: Importing dataset") 12 | print("X") 13 | print(X) 14 | print("Y") 15 | print(Y) 16 | 17 | #Step 3: Handling the missing data 18 | from sklearn.preprocessing import Imputer 19 | imputer = Imputer(missing_values = "NaN", strategy = "mean", axis = 0) 20 | imputer = imputer.fit(X[ : , 1:3]) 21 | X[ : , 1:3] = imputer.transform(X[ : , 1:3]) 22 | print("---------------------") 23 | print("Step 3: Handling the missing data") 24 | print("step2") 25 | print("X") 26 | print(X) 27 | 28 | #Step 4: Encoding categorical data 29 | from sklearn.preprocessing import LabelEncoder, OneHotEncoder 30 | labelencoder_X = LabelEncoder() 31 | X[ : , 0] = labelencoder_X.fit_transform(X[ : , 0]) 32 | #Creating a dummy variable 33 | onehotencoder = OneHotEncoder(categorical_features = [0]) 34 | X = onehotencoder.fit_transform(X).toarray() 35 | labelencoder_Y = LabelEncoder() 36 | Y = labelencoder_Y.fit_transform(Y) 37 | print("---------------------") 38 | print("Step 4: Encoding categorical data") 39 | print("X") 40 | print(X) 41 | print("Y") 42 | print(Y) 43 | 44 | #Step 5: Splitting the datasets into training sets and Test sets 45 | from sklearn.model_selection import train_test_split 46 | X_train, X_test, Y_train, Y_test = train_test_split( X , Y , test_size = 0.2, random_state = 0) 47 | print("---------------------") 48 | print("Step 5: Splitting the datasets into training sets and Test sets") 49 | print("X_train") 50 | print(X_train) 51 | print("X_test") 52 | print(X_test) 53 | print("Y_train") 54 | print(Y_train) 55 | print("Y_test") 56 | print(Y_test) 57 | 58 | #Step 6: Feature Scaling 59 | from sklearn.preprocessing import StandardScaler 60 | sc_X = StandardScaler() 61 | X_train = sc_X.fit_transform(X_train) 62 | X_test = sc_X.transform(X_test) 63 | print("---------------------") 64 | print("Step 6: Feature Scaling") 65 | print("X_train") 66 | print(X_train) 67 | print("X_test") 68 | print(X_test) 69 | -------------------------------------------------------------------------------- /homework-04/范升旭.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "import numpy as np\n", 10 | "import pandas as pd\n", 11 | "from sklearn.preprocessing import Imputer" 12 | ] 13 | }, 14 | { 15 | "cell_type": "code", 16 | "execution_count": 2, 17 | "metadata": {}, 18 | "outputs": [ 19 | { 20 | "name": "stdout", 21 | "output_type": "stream", 22 | "text": [ 23 | "Step 2: Importing dataset\n", 24 | "X\n", 25 | "[['France' 44.0 72000.0]\n", 26 | " ['Spain' 27.0 48000.0]\n", 27 | " ['Germany' 30.0 54000.0]\n", 28 | " ['Spain' 38.0 61000.0]\n", 29 | " ['Germany' 40.0 nan]\n", 30 | " ['France' 35.0 58000.0]\n", 31 | " ['Spain' nan 52000.0]\n", 32 | " ['France' 48.0 79000.0]\n", 33 | " ['Germany' 50.0 83000.0]\n", 34 | " ['France' 37.0 67000.0]]\n", 35 | "Y\n", 36 | "['No' 'Yes' 'No' 'No' 'Yes' 'Yes' 'No' 'Yes' 'No' 'Yes']\n" 37 | ] 38 | } 39 | ], 40 | "source": [ 41 | "dataset = pd.read_csv('Data.csv')\n", 42 | "X = dataset.iloc[ : , :-1].values ## 创建独立变量---选取前三列\n", 43 | "Y = dataset.iloc[ : , 3].values ## 创建依赖变量-----选取最后一列 返回值的类型仍为 dataframe\n", 44 | "print(\"Step 2: Importing dataset\")\n", 45 | "print(\"X\")\n", 46 | "print(X)\n", 47 | "print(\"Y\")\n", 48 | "print(Y)" 49 | ] 50 | }, 51 | { 52 | "cell_type": "code", 53 | "execution_count": 3, 54 | "metadata": {}, 55 | "outputs": [ 56 | { 57 | "name": "stdout", 58 | "output_type": "stream", 59 | "text": [ 60 | "---------------------\n", 61 | "Step 3: Handling the missing data\n", 62 | "step2\n", 63 | "X\n", 64 | "[['France' 44.0 72000.0]\n", 65 | " ['Spain' 27.0 48000.0]\n", 66 | " ['Germany' 30.0 54000.0]\n", 67 | " ['Spain' 38.0 61000.0]\n", 68 | " ['Germany' 40.0 63777.77777777778]\n", 69 | " ['France' 35.0 58000.0]\n", 70 | " ['Spain' 38.77777777777778 52000.0]\n", 71 | " ['France' 48.0 79000.0]\n", 72 | " ['Germany' 50.0 83000.0]\n", 73 | " ['France' 37.0 67000.0]]\n" 74 | ] 75 | }, 76 | { 77 | "name": "stderr", 78 | "output_type": "stream", 79 | "text": [ 80 | "D:\\anaconda\\envs\\ANACONDA\\lib\\site-packages\\sklearn\\utils\\deprecation.py:58: DeprecationWarning: Class Imputer is deprecated; Imputer was deprecated in version 0.20 and will be removed in 0.22. Import impute.SimpleImputer from sklearn instead.\n", 81 | " warnings.warn(msg, category=DeprecationWarning)\n" 82 | ] 83 | } 84 | ], 85 | "source": [ 86 | "from sklearn.preprocessing import Imputer\n", 87 | "imputer = Imputer(missing_values = \"NaN\", strategy = \"mean\", axis = 0)\n", 88 | "imputer = imputer.fit(X[ : , 1:3])\n", 89 | "X[ : , 1:3] = imputer.transform(X[ : , 1:3]) ## 将其应用到数据\n", 90 | "print(\"---------------------\")\n", 91 | "print(\"Step 3: Handling the missing data\")\n", 92 | "print(\"step2\")\n", 93 | "print(\"X\")\n", 94 | "print(X)" 95 | ] 96 | }, 97 | { 98 | "cell_type": "code", 99 | "execution_count": 4, 100 | "metadata": {}, 101 | "outputs": [ 102 | { 103 | "name": "stdout", 104 | "output_type": "stream", 105 | "text": [ 106 | "---------------------\n", 107 | "Step 3: Handling the missing data\n", 108 | "step2\n", 109 | "X\n", 110 | "[[1.00000000e+00 0.00000000e+00 0.00000000e+00 4.40000000e+01\n", 111 | " 7.20000000e+04]\n", 112 | " [0.00000000e+00 0.00000000e+00 1.00000000e+00 2.70000000e+01\n", 113 | " 4.80000000e+04]\n", 114 | " [0.00000000e+00 1.00000000e+00 0.00000000e+00 3.00000000e+01\n", 115 | " 5.40000000e+04]\n", 116 | " [0.00000000e+00 0.00000000e+00 1.00000000e+00 3.80000000e+01\n", 117 | " 6.10000000e+04]\n", 118 | " [0.00000000e+00 1.00000000e+00 0.00000000e+00 4.00000000e+01\n", 119 | " 6.37777778e+04]\n", 120 | " [1.00000000e+00 0.00000000e+00 0.00000000e+00 3.50000000e+01\n", 121 | " 5.80000000e+04]\n", 122 | " [0.00000000e+00 0.00000000e+00 1.00000000e+00 3.87777778e+01\n", 123 | " 5.20000000e+04]\n", 124 | " [1.00000000e+00 0.00000000e+00 0.00000000e+00 4.80000000e+01\n", 125 | " 7.90000000e+04]\n", 126 | " [0.00000000e+00 1.00000000e+00 0.00000000e+00 5.00000000e+01\n", 127 | " 8.30000000e+04]\n", 128 | " [1.00000000e+00 0.00000000e+00 0.00000000e+00 3.70000000e+01\n", 129 | " 6.70000000e+04]]\n" 130 | ] 131 | }, 132 | { 133 | "name": "stderr", 134 | "output_type": "stream", 135 | "text": [ 136 | "D:\\anaconda\\envs\\ANACONDA\\lib\\site-packages\\sklearn\\preprocessing\\_encoders.py:371: FutureWarning: The handling of integer data will change in version 0.22. Currently, the categories are determined based on the range [0, max(values)], while in the future they will be determined based on the unique values.\n", 137 | "If you want the future behaviour and silence this warning, you can specify \"categories='auto'\".\n", 138 | "In case you used a LabelEncoder before this OneHotEncoder to convert the categories to integers, then you can now use the OneHotEncoder directly.\n", 139 | " warnings.warn(msg, FutureWarning)\n", 140 | "D:\\anaconda\\envs\\ANACONDA\\lib\\site-packages\\sklearn\\preprocessing\\_encoders.py:392: DeprecationWarning: The 'categorical_features' keyword is deprecated in version 0.20 and will be removed in 0.22. You can use the ColumnTransformer instead.\n", 141 | " \"use the ColumnTransformer instead.\", DeprecationWarning)\n" 142 | ] 143 | } 144 | ], 145 | "source": [ 146 | "from sklearn.preprocessing import LabelEncoder, OneHotEncoder\n", 147 | "labelencoder_X = LabelEncoder()\n", 148 | "X[ : , 0] = labelencoder_X.fit_transform(X[ : , 0])\n", 149 | "\n", 150 | "onehotencoder = OneHotEncoder(categorical_features = [0])\n", 151 | "X = onehotencoder.fit_transform(X).toarray()\n", 152 | "\n", 153 | "labelencoder_Y = LabelEncoder() \n", 154 | "Y = labelencoder_Y.fit_transform(Y)\n", 155 | "print(\"---------------------\")\n", 156 | "print(\"Step 3: Handling the missing data\")\n", 157 | "print(\"step2\")\n", 158 | "print(\"X\")\n", 159 | "print(X)" 160 | ] 161 | }, 162 | { 163 | "cell_type": "code", 164 | "execution_count": 5, 165 | "metadata": {}, 166 | "outputs": [], 167 | "source": [ 168 | "from sklearn.model_selection import train_test_split\n", 169 | "X_train, X_test, Y_train, Y_test = train_test_split( X , Y , test_size = 0.2, random_state = 0)" 170 | ] 171 | }, 172 | { 173 | "cell_type": "code", 174 | "execution_count": 6, 175 | "metadata": {}, 176 | "outputs": [ 177 | { 178 | "name": "stdout", 179 | "output_type": "stream", 180 | "text": [ 181 | "---------------------\n", 182 | "Step 6: Feature Scaling\n", 183 | "X_train\n", 184 | "[[-1. 2.64575131 -0.77459667 0.26306757 0.12381479]\n", 185 | " [ 1. -0.37796447 -0.77459667 -0.25350148 0.46175632]\n", 186 | " [-1. -0.37796447 1.29099445 -1.97539832 -1.53093341]\n", 187 | " [-1. -0.37796447 1.29099445 0.05261351 -1.11141978]\n", 188 | " [ 1. -0.37796447 -0.77459667 1.64058505 1.7202972 ]\n", 189 | " [-1. -0.37796447 1.29099445 -0.0813118 -0.16751412]\n", 190 | " [ 1. -0.37796447 -0.77459667 0.95182631 0.98614835]\n", 191 | " [ 1. -0.37796447 -0.77459667 -0.59788085 -0.48214934]]\n", 192 | "X_test\n", 193 | "[[ 0. 0. 0. -1. -1.]\n", 194 | " [ 0. 0. 0. 1. 1.]]\n" 195 | ] 196 | } 197 | ], 198 | "source": [ 199 | "from sklearn.preprocessing import StandardScaler\n", 200 | "sc_X = StandardScaler()\n", 201 | "X_train = sc_X.fit_transform(X_train)\n", 202 | "X_test = sc_X.fit_transform(X_test)\n", 203 | "print(\"---------------------\")\n", 204 | "print(\"Step 6: Feature Scaling\")\n", 205 | "print(\"X_train\")\n", 206 | "print(X_train)\n", 207 | "print(\"X_test\")\n", 208 | "print(X_test)" 209 | ] 210 | }, 211 | { 212 | "cell_type": "code", 213 | "execution_count": null, 214 | "metadata": {}, 215 | "outputs": [], 216 | "source": [] 217 | }, 218 | { 219 | "cell_type": "code", 220 | "execution_count": null, 221 | "metadata": {}, 222 | "outputs": [], 223 | "source": [] 224 | } 225 | ], 226 | "metadata": { 227 | "kernelspec": { 228 | "display_name": "Python 3", 229 | "language": "python", 230 | "name": "python3" 231 | }, 232 | "language_info": { 233 | "codemirror_mode": { 234 | "name": "ipython", 235 | "version": 3 236 | }, 237 | "file_extension": ".py", 238 | "mimetype": "text/x-python", 239 | "name": "python", 240 | "nbconvert_exporter": "python", 241 | "pygments_lexer": "ipython3", 242 | "version": "3.6.6" 243 | } 244 | }, 245 | "nbformat": 4, 246 | "nbformat_minor": 2 247 | } 248 | -------------------------------------------------------------------------------- /homework-04/谢易凡.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": {}, 7 | "outputs": [ 8 | { 9 | "ename": "SyntaxError", 10 | "evalue": "invalid syntax (, line 5)", 11 | "output_type": "error", 12 | "traceback": [ 13 | "\u001b[1;36m File \u001b[1;32m\"\"\u001b[1;36m, line \u001b[1;32m5\u001b[0m\n\u001b[1;33m : -1].values // .iloc[行,列]\u001b[0m\n\u001b[1;37m ^\u001b[0m\n\u001b[1;31mSyntaxError\u001b[0m\u001b[1;31m:\u001b[0m invalid syntax\n" 14 | ] 15 | } 16 | ], 17 | "source": [ 18 | "import numpy as np\n", 19 | "import pandas as pd\n", 20 | "dataset = pd.read_csv('Data .csv') // 读取csv文件\n", 21 | "X = dataset.iloc[ : ,\n", 22 | " : -1].values // .iloc[行,列]\n", 23 | "Y = dataset.iloc[ : , 3].values \n", 24 | "// : 全部行 or 列;[a]第a行 or 列\n", 25 | "// [a,b,c]第 a,b,c 行 or 列\n", 26 | " from sklearn.preprocessing import Imputer\n", 27 | " imputer = Imputer(missing_values = \"NaN\", strategy = \"mean\",axis = 0)\n", 28 | " imputer = imputer.fit(X[ : , 1:3])\n", 29 | " X[ : , 1:3] = imputer.transform(X[ : , 1:3])\n", 30 | " from sklearn.preprocessing import LabelEncoder ,OneHotEncoderEncoder()X[ : , 0] = labelencoder_X.fit_transform(X[ : , 0])\n", 31 | " onehotencoder = OneHotEncoder(categorical_features = [0])\n", 32 | " X = onehotencoder.fit_transform(X).toarr\n", 33 | " labelencoder_Y = LabelEncoder()\n", 34 | " Y = labelencoder_Y.fit_transform(Y)\n", 35 | " from sklearn.model_selection import train_tese_split\n", 36 | " X_train, X_test, Y_train, Y_test = train_tese_split(X ,Y ,test_size = 0.2, random_state = 0)\n", 37 | " from sklearn.preprocessing import StandardScaler \n", 38 | " sc_X = StandardScaler()\n", 39 | " X_train = sc_X.fit_transform(X_train)\n", 40 | " X_tese = sc_X.transform(X_test)" 41 | ] 42 | }, 43 | { 44 | "cell_type": "code", 45 | "execution_count": null, 46 | "metadata": {}, 47 | "outputs": [], 48 | "source": [] 49 | } 50 | ], 51 | "metadata": { 52 | "kernelspec": { 53 | "display_name": "Python 3", 54 | "language": "python", 55 | "name": "python3" 56 | }, 57 | "language_info": { 58 | "codemirror_mode": { 59 | "name": "ipython", 60 | "version": 3 61 | }, 62 | "file_extension": ".py", 63 | "mimetype": "text/x-python", 64 | "name": "python", 65 | "nbconvert_exporter": "python", 66 | "pygments_lexer": "ipython3", 67 | "version": "3.7.1" 68 | } 69 | }, 70 | "nbformat": 4, 71 | "nbformat_minor": 2 72 | } 73 | -------------------------------------------------------------------------------- /homework-04/闫泳寰.ipynb: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import pandas as pd 3 | 4 | dataset = pd.read_csv('../datasets/Data.csv') 5 | X = dataset.iloc[ : , :-1].values 6 | Y = dataset.iloc[ : , 3].values 7 | print("Step 2: Importing dataset") 8 | print("X") 9 | print(X) 10 | print("Y") 11 | print(Y) 12 | 13 | from sklearn.preprocessing import Imputer 14 | imputer = Imputer(missing_values = "NaN", strategy = "mean", axis = 0) 15 | imputer = imputer.fit(X[ : , 1:3]) 16 | X[ : , 1:3] = imputer.transform(X[ : , 1:3]) 17 | print("---------------------") 18 | print("Step 3: Handling the missing data") 19 | print("step2") 20 | print("X") 21 | print(X) 22 | 23 | from sklearn.preprocessing import LabelEncoder, OneHotEncoder 24 | labelencoder_X = LabelEncoder() 25 | X[ : , 0] = labelencoder_X.fit_transform(X[ : , 0]) 26 | #Creating a dummy variable 27 | onehotencoder = OneHotEncoder(categorical_features = [0]) 28 | X = onehotencoder.fit_transform(X).toarray() 29 | labelencoder_Y = LabelEncoder() 30 | Y = labelencoder_Y.fit_transform(Y) 31 | print("---------------------") 32 | print("Step 4: Encoding categorical data") 33 | print("X") 34 | print(X) 35 | print("Y") 36 | print(Y) 37 | 38 | from sklearn.model_selection import train_test_split 39 | X_train, X_test, Y_train, Y_test = train_test_split( X , Y , test_size = 0.2, random_state = 0) 40 | print("---------------------") 41 | print("Step 5: Splitting the datasets into training sets and Test sets") 42 | print("X_train") 43 | print(X_train) 44 | print("X_test") 45 | print(X_test) 46 | print("Y_train") 47 | print(Y_train) 48 | print("Y_test") 49 | print(Y_test) 50 | 51 | from sklearn.preprocessing import StandardScaler 52 | sc_X = StandardScaler() 53 | X_train = sc_X.fit_transform(X_train) 54 | X_test = sc_X.transform(X_test) 55 | print("---------------------") 56 | print("Step 6: Feature Scaling") 57 | print("X_train") 58 | print(X_train) 59 | print("X_test") 60 | print(X_test) -------------------------------------------------------------------------------- /homework-04/阳治玖.ipynb: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import pandas as pd 3 | dataset = pd.read_csv('Data.csv') 4 | X = dataset.iloc[ : , :-1].values 5 | Y = dataset.iloc[ : , 3].values 6 | from sklearn.preprocessing import Imputer 7 | imputer = Imputer(missing_values = "NaN", strategy = "mean", axis = 0) 8 | imputer = imputer.fit(X[ : , 1:3]) 9 | X[ : , 1:3] = imputer.transform(X[ : , 1:3]) 10 | from sklearn.preprocessing import LabelEncoder, OneHotEncoder 11 | labelencoder_X = LabelEncoder() 12 | X[ : , 0] = labelencoder_X.fit_transform(X[ : , 0]) 13 | onehotencoder = OneHotEncoder(categorical_features = [0]) 14 | X = onehotencoder.fit_transform(X).toarray() 15 | labelencoder_Y = LabelEncoder() 16 | Y = labelencoder_Y.fit_transform(Y) 17 | from sklearn.cross_validation import train_test_split 18 | X_train, X_test, Y_train, Y_test = train_test_split( X , Y , test_size = 0.2, random_state = 0) 19 | from sklearn.preprocessing import StandardScaler 20 | sc_X = StandardScaler() 21 | X_train = sc_X.fit_transform(X_train) 22 | X_test = sc_X.fit_transform(X_test) -------------------------------------------------------------------------------- /homework-04/陈宝旭.ipynb: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Spyder Editor 4 | 5 | This is a temporary script file. 6 | """ 7 | 8 | import numpy as np 9 | import pandas as pd 10 | 11 | dataset = pd.read_csv('data.csv') 12 | X = dataset.iloc[ : , :-1].values 13 | Y = dataset.iloc[ : , : 3].values 14 | 15 | from sklearn.perprocessing import Imputer 16 | imputer = Imputer(missing_values = "NAN",strategy = "mean", axis = 0) 17 | imputer = Imputer.fit(X[ : , 1 :3]) 18 | X[ : , 1:3] = imputer.transform(X[ : , 1:3]) 19 | 20 | from sklearn.perprocessing import LabelEncoder, OneHotEncoder 21 | labelencoder_X = LabelEncoder() 22 | X[ : , 0]=labelencoder_X.fit_transform(X[ : , 0]) 23 | 24 | onehotencoder = OneHotEncoder(categorical_features= [0] ) 25 | X=onehotencoder.fix_transform(X).toarray() 26 | labelencoder_Y = LabelEncoder() 27 | Y = labelencoder_Y.fit_transform(Y) 28 | 29 | from sklearn.model_seletion import train_test_split 30 | X_train,X_test,Y_train,Y_test = train_test_split(X , Y ,test_size=0.2,random_state=0) 31 | 32 | from sklearn.perprocessing import StandardScaler 33 | sc_X = StandardScaler() 34 | X_train = sc_X.fit_transform(X_train) 35 | X_test = sc_X.transform(X_test) 36 | 37 | -------------------------------------------------------------------------------- /homework-04/陈瑞.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 2, 6 | "metadata": {}, 7 | "outputs": [ 8 | { 9 | "name": "stdout", 10 | "output_type": "stream", 11 | "text": [ 12 | "Step 2: Importing dataset\n", 13 | "X\n", 14 | "[['France' 44.0 72000.0]\n", 15 | " ['Spain' 27.0 48000.0]\n", 16 | " ['Germany' 30.0 54000.0]\n", 17 | " ['Spain' 38.0 61000.0]\n", 18 | " ['Germany' 40.0 nan]\n", 19 | " ['France' 35.0 58000.0]\n", 20 | " ['Spain' nan 52000.0]\n", 21 | " ['France' 48.0 79000.0]\n", 22 | " ['Germany' 50.0 83000.0]\n", 23 | " ['France' 37.0 67000.0]]\n", 24 | "Y\n", 25 | "['No' 'Yes' 'No' 'No' 'Yes' 'Yes' 'No' 'Yes' 'No' 'Yes']\n" 26 | ] 27 | } 28 | ], 29 | "source": [ 30 | "import numpy as np\n", 31 | "import pandas as pd\n", 32 | "dataset = pd.read_csv('./Data.csv')\n", 33 | "# 不包括最后一列的所有列\n", 34 | "X = dataset.iloc[ : , :-1].values\n", 35 | "#取最后一列\n", 36 | "Y = dataset.iloc[ : , 3].values\n", 37 | "print(\"Step 2: Importing dataset\")\n", 38 | "print(\"X\")\n", 39 | "print(X)\n", 40 | "print(\"Y\")\n", 41 | "print(Y)" 42 | ] 43 | }, 44 | { 45 | "cell_type": "code", 46 | "execution_count": 3, 47 | "metadata": {}, 48 | "outputs": [ 49 | { 50 | "name": "stdout", 51 | "output_type": "stream", 52 | "text": [ 53 | "---------------------\n", 54 | "Step 3: Handling the missing data\n", 55 | "step2\n", 56 | "X\n", 57 | "[['France' 44.0 72000.0]\n", 58 | " ['Spain' 27.0 48000.0]\n", 59 | " ['Germany' 30.0 54000.0]\n", 60 | " ['Spain' 38.0 61000.0]\n", 61 | " ['Germany' 40.0 63777.77777777778]\n", 62 | " ['France' 35.0 58000.0]\n", 63 | " ['Spain' 38.77777777777778 52000.0]\n", 64 | " ['France' 48.0 79000.0]\n", 65 | " ['Germany' 50.0 83000.0]\n", 66 | " ['France' 37.0 67000.0]]\n" 67 | ] 68 | }, 69 | { 70 | "name": "stderr", 71 | "output_type": "stream", 72 | "text": [ 73 | "C:\\Users\\dell\\Anaconda3\\lib\\site-packages\\sklearn\\utils\\deprecation.py:58: DeprecationWarning: Class Imputer is deprecated; Imputer was deprecated in version 0.20 and will be removed in 0.22. Import impute.SimpleImputer from sklearn instead.\n", 74 | " warnings.warn(msg, category=DeprecationWarning)\n" 75 | ] 76 | } 77 | ], 78 | "source": [ 79 | "from sklearn.preprocessing import Imputer\n", 80 | "# axis=0表示按列进行\n", 81 | "imputer = Imputer(missing_values = \"NaN\", strategy = \"mean\", axis = 0)\n", 82 | "imputer = imputer.fit(X[ : , 1:3])\n", 83 | "X[ : , 1:3] = imputer.transform(X[ : , 1:3])\n", 84 | "print(\"---------------------\")\n", 85 | "print(\"Step 3: Handling the missing data\")\n", 86 | "print(\"step2\")\n", 87 | "print(\"X\")\n", 88 | "print(X)\n" 89 | ] 90 | }, 91 | { 92 | "cell_type": "code", 93 | "execution_count": 4, 94 | "metadata": {}, 95 | "outputs": [ 96 | { 97 | "name": "stderr", 98 | "output_type": "stream", 99 | "text": [ 100 | "C:\\Users\\dell\\Anaconda3\\lib\\site-packages\\sklearn\\preprocessing\\_encoders.py:368: FutureWarning: The handling of integer data will change in version 0.22. Currently, the categories are determined based on the range [0, max(values)], while in the future they will be determined based on the unique values.\n", 101 | "If you want the future behaviour and silence this warning, you can specify \"categories='auto'\".\n", 102 | "In case you used a LabelEncoder before this OneHotEncoder to convert the categories to integers, then you can now use the OneHotEncoder directly.\n", 103 | " warnings.warn(msg, FutureWarning)\n", 104 | "C:\\Users\\dell\\Anaconda3\\lib\\site-packages\\sklearn\\preprocessing\\_encoders.py:390: DeprecationWarning: The 'categorical_features' keyword is deprecated in version 0.20 and will be removed in 0.22. You can use the ColumnTransformer instead.\n", 105 | " \"use the ColumnTransformer instead.\", DeprecationWarning)\n" 106 | ] 107 | }, 108 | { 109 | "name": "stdout", 110 | "output_type": "stream", 111 | "text": [ 112 | "---------------------\n", 113 | "Step 4: Encoding categorical data\n", 114 | "X\n", 115 | "[[1.00000000e+00 0.00000000e+00 0.00000000e+00 4.40000000e+01\n", 116 | " 7.20000000e+04]\n", 117 | " [0.00000000e+00 0.00000000e+00 1.00000000e+00 2.70000000e+01\n", 118 | " 4.80000000e+04]\n", 119 | " [0.00000000e+00 1.00000000e+00 0.00000000e+00 3.00000000e+01\n", 120 | " 5.40000000e+04]\n", 121 | " [0.00000000e+00 0.00000000e+00 1.00000000e+00 3.80000000e+01\n", 122 | " 6.10000000e+04]\n", 123 | " [0.00000000e+00 1.00000000e+00 0.00000000e+00 4.00000000e+01\n", 124 | " 6.37777778e+04]\n", 125 | " [1.00000000e+00 0.00000000e+00 0.00000000e+00 3.50000000e+01\n", 126 | " 5.80000000e+04]\n", 127 | " [0.00000000e+00 0.00000000e+00 1.00000000e+00 3.87777778e+01\n", 128 | " 5.20000000e+04]\n", 129 | " [1.00000000e+00 0.00000000e+00 0.00000000e+00 4.80000000e+01\n", 130 | " 7.90000000e+04]\n", 131 | " [0.00000000e+00 1.00000000e+00 0.00000000e+00 5.00000000e+01\n", 132 | " 8.30000000e+04]\n", 133 | " [1.00000000e+00 0.00000000e+00 0.00000000e+00 3.70000000e+01\n", 134 | " 6.70000000e+04]]\n", 135 | "Y\n", 136 | "[0 1 0 0 1 1 0 1 0 1]\n" 137 | ] 138 | } 139 | ], 140 | "source": [ 141 | "from sklearn.preprocessing import LabelEncoder, OneHotEncoder\n", 142 | "labelencoder_X = LabelEncoder()\n", 143 | "X[ : , 0] = labelencoder_X.fit_transform(X[ : , 0])\n", 144 | "#Creating a dummy variable\n", 145 | "onehotencoder = OneHotEncoder(categorical_features = [0])\n", 146 | "X = onehotencoder.fit_transform(X).toarray()\n", 147 | "labelencoder_Y = LabelEncoder()\n", 148 | "Y = labelencoder_Y.fit_transform(Y)\n", 149 | "print(\"---------------------\")\n", 150 | "print(\"Step 4: Encoding categorical data\")\n", 151 | "print(\"X\")\n", 152 | "print(X)\n", 153 | "print(\"Y\")\n", 154 | "print(Y)\n" 155 | ] 156 | }, 157 | { 158 | "cell_type": "code", 159 | "execution_count": 5, 160 | "metadata": {}, 161 | "outputs": [ 162 | { 163 | "name": "stdout", 164 | "output_type": "stream", 165 | "text": [ 166 | "---------------------\n", 167 | "Step 5: Splitting the datasets into training sets and Test sets\n", 168 | "X_train\n", 169 | "[[0.00000000e+00 1.00000000e+00 0.00000000e+00 4.00000000e+01\n", 170 | " 6.37777778e+04]\n", 171 | " [1.00000000e+00 0.00000000e+00 0.00000000e+00 3.70000000e+01\n", 172 | " 6.70000000e+04]\n", 173 | " [0.00000000e+00 0.00000000e+00 1.00000000e+00 2.70000000e+01\n", 174 | " 4.80000000e+04]\n", 175 | " [0.00000000e+00 0.00000000e+00 1.00000000e+00 3.87777778e+01\n", 176 | " 5.20000000e+04]\n", 177 | " [1.00000000e+00 0.00000000e+00 0.00000000e+00 4.80000000e+01\n", 178 | " 7.90000000e+04]\n", 179 | " [0.00000000e+00 0.00000000e+00 1.00000000e+00 3.80000000e+01\n", 180 | " 6.10000000e+04]\n", 181 | " [1.00000000e+00 0.00000000e+00 0.00000000e+00 4.40000000e+01\n", 182 | " 7.20000000e+04]\n", 183 | " [1.00000000e+00 0.00000000e+00 0.00000000e+00 3.50000000e+01\n", 184 | " 5.80000000e+04]]\n", 185 | "X_test\n", 186 | "[[0.0e+00 1.0e+00 0.0e+00 3.0e+01 5.4e+04]\n", 187 | " [0.0e+00 1.0e+00 0.0e+00 5.0e+01 8.3e+04]]\n", 188 | "Y_train\n", 189 | "[1 1 1 0 1 0 0 1]\n", 190 | "Y_test\n", 191 | "[0 0]\n" 192 | ] 193 | } 194 | ], 195 | "source": [ 196 | "from sklearn.model_selection import train_test_split\n", 197 | "X_train, X_test, Y_train, Y_test = train_test_split( X , Y , test_size = 0.2, random_state = 0)\n", 198 | "print(\"---------------------\")\n", 199 | "print(\"Step 5: Splitting the datasets into training sets and Test sets\")\n", 200 | "print(\"X_train\")\n", 201 | "print(X_train)\n", 202 | "print(\"X_test\")\n", 203 | "print(X_test)\n", 204 | "print(\"Y_train\")\n", 205 | "print(Y_train)\n", 206 | "print(\"Y_test\")\n", 207 | "print(Y_test)\n" 208 | ] 209 | }, 210 | { 211 | "cell_type": "code", 212 | "execution_count": 6, 213 | "metadata": {}, 214 | "outputs": [ 215 | { 216 | "name": "stdout", 217 | "output_type": "stream", 218 | "text": [ 219 | "---------------------\n", 220 | "Step 6: Feature Scaling\n", 221 | "X_train\n", 222 | "[[-1. 2.64575131 -0.77459667 0.26306757 0.12381479]\n", 223 | " [ 1. -0.37796447 -0.77459667 -0.25350148 0.46175632]\n", 224 | " [-1. -0.37796447 1.29099445 -1.97539832 -1.53093341]\n", 225 | " [-1. -0.37796447 1.29099445 0.05261351 -1.11141978]\n", 226 | " [ 1. -0.37796447 -0.77459667 1.64058505 1.7202972 ]\n", 227 | " [-1. -0.37796447 1.29099445 -0.0813118 -0.16751412]\n", 228 | " [ 1. -0.37796447 -0.77459667 0.95182631 0.98614835]\n", 229 | " [ 1. -0.37796447 -0.77459667 -0.59788085 -0.48214934]]\n", 230 | "X_test\n", 231 | "[[-1. 2.64575131 -0.77459667 -1.45882927 -0.90166297]\n", 232 | " [-1. 2.64575131 -0.77459667 1.98496442 2.13981082]]\n" 233 | ] 234 | } 235 | ], 236 | "source": [ 237 | "from sklearn.preprocessing import StandardScaler\n", 238 | "sc_X = StandardScaler()\n", 239 | "X_train = sc_X.fit_transform(X_train)\n", 240 | "X_test = sc_X.transform(X_test)\n", 241 | "print(\"---------------------\")\n", 242 | "print(\"Step 6: Feature Scaling\")\n", 243 | "print(\"X_train\")\n", 244 | "print(X_train)\n", 245 | "print(\"X_test\")\n", 246 | "print(X_test)\n" 247 | ] 248 | }, 249 | { 250 | "cell_type": "code", 251 | "execution_count": null, 252 | "metadata": {}, 253 | "outputs": [], 254 | "source": [] 255 | } 256 | ], 257 | "metadata": { 258 | "kernelspec": { 259 | "display_name": "Python 3", 260 | "language": "python", 261 | "name": "python3" 262 | }, 263 | "language_info": { 264 | "codemirror_mode": { 265 | "name": "ipython", 266 | "version": 3 267 | }, 268 | "file_extension": ".py", 269 | "mimetype": "text/x-python", 270 | "name": "python", 271 | "nbconvert_exporter": "python", 272 | "pygments_lexer": "ipython3", 273 | "version": "3.7.1" 274 | } 275 | }, 276 | "nbformat": 4, 277 | "nbformat_minor": 2 278 | } 279 | -------------------------------------------------------------------------------- /homework-04/韩依格.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 50, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "import numpy as np\n", 10 | "import pandas as pd" 11 | ] 12 | }, 13 | { 14 | "cell_type": "code", 15 | "execution_count": 51, 16 | "metadata": {}, 17 | "outputs": [], 18 | "source": [ 19 | "dataset = pd.read_csv('Data.csv')\n", 20 | "X = dataset.iloc[: , : - 1].values\n", 21 | "Y = dataset.iloc[: , 3 ].values" 22 | ] 23 | }, 24 | { 25 | "cell_type": "code", 26 | "execution_count": 52, 27 | "metadata": { 28 | "scrolled": false 29 | }, 30 | "outputs": [], 31 | "source": [ 32 | "from sklearn.preprocessing import Imputer\n", 33 | "imputer = Imputer(missing_values = \"NaN\", strategy = \"mean\", axis = 0)\n", 34 | "imputer = imputer.fit(X[ : , 1:3])\n", 35 | "X[ : , 1:3] = imputer.transform(X[ : , 1:3])" 36 | ] 37 | }, 38 | { 39 | "cell_type": "code", 40 | "execution_count": 53, 41 | "metadata": {}, 42 | "outputs": [], 43 | "source": [ 44 | "from sklearn.preprocessing import LabelEncoder, OneHotEncoder\n", 45 | "labelencoder_X = LabelEncoder()\n", 46 | "X[ : , 0] = labelencoder_X.fit_transform(X[ : , 0])" 47 | ] 48 | }, 49 | { 50 | "cell_type": "code", 51 | "execution_count": 54, 52 | "metadata": {}, 53 | "outputs": [], 54 | "source": [ 55 | "onehotencoder = OneHotEncoder(categorical_features = [0])\n", 56 | "X = onehotencoder.fit_transform(X).toarray()\n", 57 | "labelencoder_Y = LabelEncoder()\n", 58 | "Y = labelencoder_Y.fit_transform(Y)" 59 | ] 60 | }, 61 | { 62 | "cell_type": "code", 63 | "execution_count": 55, 64 | "metadata": {}, 65 | "outputs": [], 66 | "source": [ 67 | "from sklearn.model_selection import train_test_split\n", 68 | "X_train, X_test, Y_train, Y_test = train_test_split( X , Y , test_size = 0.2, random_state = 0)" 69 | ] 70 | }, 71 | { 72 | "cell_type": "code", 73 | "execution_count": 56, 74 | "metadata": {}, 75 | "outputs": [], 76 | "source": [ 77 | "from sklearn.preprocessing import StandardScaler\n", 78 | "sc_X = StandardScaler()\n", 79 | "X_train = sc_X.fit_transform(X_train)\n", 80 | "X_test = sc_X.transform(X_test)" 81 | ] 82 | }, 83 | { 84 | "cell_type": "code", 85 | "execution_count": null, 86 | "metadata": {}, 87 | "outputs": [], 88 | "source": [] 89 | }, 90 | { 91 | "cell_type": "code", 92 | "execution_count": null, 93 | "metadata": {}, 94 | "outputs": [], 95 | "source": [] 96 | } 97 | ], 98 | "metadata": { 99 | "kernelspec": { 100 | "display_name": "Python 3", 101 | "language": "python", 102 | "name": "python3" 103 | }, 104 | "language_info": { 105 | "codemirror_mode": { 106 | "name": "ipython", 107 | "version": 3 108 | }, 109 | "file_extension": ".py", 110 | "mimetype": "text/x-python", 111 | "name": "python", 112 | "nbconvert_exporter": "python", 113 | "pygments_lexer": "ipython3", 114 | "version": "3.6.5" 115 | } 116 | }, 117 | "nbformat": 4, 118 | "nbformat_minor": 2 119 | } 120 | -------------------------------------------------------------------------------- /homework-04/马士尧.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 34, 6 | "metadata": {}, 7 | "outputs": [ 8 | { 9 | "name": "stdout", 10 | "output_type": "stream", 11 | "text": [ 12 | "[['France' 44.0 72000.0]\n", 13 | " ['Spain' 27.0 48000.0]\n", 14 | " ['Germany' 30.0 54000.0]\n", 15 | " ['Spain' 38.0 61000.0]\n", 16 | " ['Germany' 40.0 nan]\n", 17 | " ['France' 35.0 58000.0]\n", 18 | " ['Spain' nan 52000.0]\n", 19 | " ['France' 48.0 79000.0]\n", 20 | " ['Germany' 50.0 83000.0]\n", 21 | " ['France' 37.0 67000.0]]\n", 22 | "['No' 'Yes' 'No' 'No' 'Yes' 'Yes' 'No' 'Yes' 'No' 'Yes']\n", 23 | "new X.1\n", 24 | "[['France' 44.0 72000.0]\n", 25 | " ['Spain' 27.0 48000.0]\n", 26 | " ['Germany' 30.0 54000.0]\n", 27 | " ['Spain' 38.0 61000.0]\n", 28 | " ['Germany' 40.0 48000.0]\n", 29 | " ['France' 35.0 58000.0]\n", 30 | " ['Spain' 27.0 52000.0]\n", 31 | " ['France' 48.0 79000.0]\n", 32 | " ['Germany' 50.0 83000.0]\n", 33 | " ['France' 37.0 67000.0]]\n", 34 | "new X.2\n", 35 | "[[0 44.0 72000.0]\n", 36 | " [2 27.0 48000.0]\n", 37 | " [1 30.0 54000.0]\n", 38 | " [2 38.0 61000.0]\n", 39 | " [1 40.0 48000.0]\n", 40 | " [0 35.0 58000.0]\n", 41 | " [2 27.0 52000.0]\n", 42 | " [0 48.0 79000.0]\n", 43 | " [1 50.0 83000.0]\n", 44 | " [0 37.0 67000.0]]\n", 45 | "StandardScaler(copy=True, with_mean=True, with_std=True)\n", 46 | "[[-1. 2.64575131 -0.77459667 0.4330127 -1.1851228 ]\n", 47 | " [ 1. -0.37796447 -0.77459667 0. 0.59842834]\n", 48 | " [-1. -0.37796447 1.29099445 -1.44337567 -1.1851228 ]\n", 49 | " [-1. -0.37796447 1.29099445 -1.44337567 -0.80963835]\n", 50 | " [ 1. -0.37796447 -0.77459667 1.58771324 1.72488169]\n", 51 | " [-1. -0.37796447 1.29099445 0.14433757 0.03520167]\n", 52 | " [ 1. -0.37796447 -0.77459667 1.01036297 1.0677839 ]\n", 53 | " [ 1. -0.37796447 -0.77459667 -0.28867513 -0.24641167]]\n", 54 | "[[-1. 2.64575131 -0.77459667 -1.01036297 -0.62189612]\n", 55 | " [-1. 2.64575131 -0.77459667 1.87638837 2.10036614]]\n" 56 | ] 57 | }, 58 | { 59 | "name": "stderr", 60 | "output_type": "stream", 61 | "text": [ 62 | "C:\\Anaconda\\lib\\site-packages\\sklearn\\utils\\deprecation.py:58: DeprecationWarning: Class Imputer is deprecated; Imputer was deprecated in version 0.20 and will be removed in 0.22. Import impute.SimpleImputer from sklearn instead.\n", 63 | " warnings.warn(msg, category=DeprecationWarning)\n", 64 | "C:\\Anaconda\\lib\\site-packages\\sklearn\\preprocessing\\_encoders.py:371: FutureWarning: The handling of integer data will change in version 0.22. Currently, the categories are determined based on the range [0, max(values)], while in the future they will be determined based on the unique values.\n", 65 | "If you want the future behaviour and silence this warning, you can specify \"categories='auto'\".\n", 66 | "In case you used a LabelEncoder before this OneHotEncoder to convert the categories to integers, then you can now use the OneHotEncoder directly.\n", 67 | " warnings.warn(msg, FutureWarning)\n", 68 | "C:\\Anaconda\\lib\\site-packages\\sklearn\\preprocessing\\_encoders.py:392: DeprecationWarning: The 'categorical_features' keyword is deprecated in version 0.20 and will be removed in 0.22. You can use the ColumnTransformer instead.\n", 69 | " \"use the ColumnTransformer instead.\", DeprecationWarning)\n" 70 | ] 71 | } 72 | ], 73 | "source": [ 74 | "import numpy as py\n", 75 | "import pandas as pd\n", 76 | "\n", 77 | "dateset = pd.read_csv(r\"C:\\Users\\Administrator\\Data.csv\")\n", 78 | "X = dateset.iloc[: , :-1].values\n", 79 | "Y = dateset.iloc[:,3].values\n", 80 | "print(X)\n", 81 | "print(Y)\n", 82 | "\n", 83 | "from sklearn.preprocessing import Imputer\n", 84 | "imputer = Imputer(missing_values =\"NaN\",strategy = \"most_frequent\", axis = 0)\n", 85 | "imputer = imputer.fit(X[ : , 1:3])\n", 86 | "X[ : , 1:3] = imputer.transform(X[ : , 1:3])\n", 87 | "print(\"new X.1\")\n", 88 | "print(X)\n", 89 | "\n", 90 | "from sklearn.preprocessing import LabelEncoder, OneHotEncoder\n", 91 | "labelencoder_X = LabelEncoder()\n", 92 | "X[ : , 0] = labelencoder_X.fit_transform(X[ : , 0])\n", 93 | "print(\"new X.2\")\n", 94 | "print(X)\n", 95 | "\n", 96 | "onehotencoder = OneHotEncoder(categorical_features = [0])\n", 97 | "X = onehotencoder.fit_transform(X).toarray()\n", 98 | "labelencoder_Y = LabelEncoder()\n", 99 | "Y = labelencoder_Y.fit_transform(Y)\n", 100 | "\n", 101 | "from sklearn.model_selection import train_test_split\n", 102 | "X_train, X_test, Y_train, Y_test = train_test_split( X , Y , test_size = 0.2, random_state = 0)\n", 103 | "\n", 104 | "from sklearn.preprocessing import StandardScaler\n", 105 | "sc_X = StandardScaler()\n", 106 | "X_train = sc_X.fit_transform(X_train)\n", 107 | "X_test = sc_X.transform(X_test)\n", 108 | "print(sc_X,X_train,X_test,sep='\\n')" 109 | ] 110 | }, 111 | { 112 | "cell_type": "code", 113 | "execution_count": null, 114 | "metadata": {}, 115 | "outputs": [], 116 | "source": [] 117 | } 118 | ], 119 | "metadata": { 120 | "kernelspec": { 121 | "display_name": "Python 3", 122 | "language": "python", 123 | "name": "python3" 124 | }, 125 | "language_info": { 126 | "codemirror_mode": { 127 | "name": "ipython", 128 | "version": 3 129 | }, 130 | "file_extension": ".py", 131 | "mimetype": "text/x-python", 132 | "name": "python", 133 | "nbconvert_exporter": "python", 134 | "pygments_lexer": "ipython3", 135 | "version": "3.7.0" 136 | } 137 | }, 138 | "nbformat": 4, 139 | "nbformat_minor": 2 140 | } 141 | -------------------------------------------------------------------------------- /homework-04/高一淇.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 12, 6 | "metadata": {}, 7 | "outputs": [ 8 | { 9 | "name": "stdout", 10 | "output_type": "stream", 11 | "text": [ 12 | "Step 2: Importing dataset\n", 13 | "X\n", 14 | "[['France' 44.0 72000.0]\n", 15 | " ['Spain' 27.0 48000.0]\n", 16 | " ['Germany' 30.0 54000.0]\n", 17 | " ['Spain' 38.0 61000.0]\n", 18 | " ['Germany' 40.0 nan]\n", 19 | " ['France' 35.0 58000.0]\n", 20 | " ['Spain' nan 52000.0]\n", 21 | " ['France' 48.0 79000.0]\n", 22 | " ['Germany' 50.0 83000.0]\n", 23 | " ['France' 37.0 67000.0]]\n", 24 | "Y\n", 25 | "['No' 'Yes' 'No' 'No' 'Yes' 'Yes' 'No' 'Yes' 'No' 'Yes']\n" 26 | ] 27 | } 28 | ], 29 | "source": [ 30 | "import numpy as np\n", 31 | "import pandas as pd\n", 32 | "dataset = pd.read_csv('./Data.csv')\n", 33 | "# 不包括最后一列的所有列\n", 34 | "X = dataset.iloc[ : , :-1].values\n", 35 | "#取最后一列\n", 36 | "Y = dataset.iloc[ : , 3].values\n", 37 | "print(\"Step 2: Importing dataset\")\n", 38 | "print(\"X\")\n", 39 | "print(X)\n", 40 | "print(\"Y\")\n", 41 | "print(Y)\n" 42 | ] 43 | }, 44 | { 45 | "cell_type": "code", 46 | "execution_count": 15, 47 | "metadata": {}, 48 | "outputs": [ 49 | { 50 | "name": "stdout", 51 | "output_type": "stream", 52 | "text": [ 53 | "---------------------\n", 54 | "Step 3: Handling the missing data\n", 55 | "step2\n", 56 | "X\n", 57 | "[['France' 44.0 72000.0]\n", 58 | " ['Spain' 27.0 48000.0]\n", 59 | " ['Germany' 30.0 54000.0]\n", 60 | " ['Spain' 38.0 61000.0]\n", 61 | " ['Germany' 40.0 63777.77777777778]\n", 62 | " ['France' 35.0 58000.0]\n", 63 | " ['Spain' 38.77777777777778 52000.0]\n", 64 | " ['France' 48.0 79000.0]\n", 65 | " ['Germany' 50.0 83000.0]\n", 66 | " ['France' 37.0 67000.0]]\n" 67 | ] 68 | }, 69 | { 70 | "name": "stderr", 71 | "output_type": "stream", 72 | "text": [ 73 | "C:\\Users\\dell\\Anaconda3\\envs\\AAA\\lib\\site-packages\\sklearn\\utils\\deprecation.py:58: DeprecationWarning: Class Imputer is deprecated; Imputer was deprecated in version 0.20 and will be removed in 0.22. Import impute.SimpleImputer from sklearn instead.\n", 74 | " warnings.warn(msg, category=DeprecationWarning)\n" 75 | ] 76 | } 77 | ], 78 | "source": [ 79 | "from sklearn.preprocessing import Imputer\n", 80 | "# axis=0表示按列进行\n", 81 | "imputer = Imputer(missing_values = \"NaN\", strategy = \"mean\", axis = 0)\n", 82 | "imputer = imputer.fit(X[ : , 1:3])\n", 83 | "X[ : , 1:3] = imputer.transform(X[ : , 1:3])\n", 84 | "print(\"---------------------\")\n", 85 | "print(\"Step 3: Handling the missing data\")\n", 86 | "print(\"step2\")\n", 87 | "print(\"X\")\n", 88 | "print(X)" 89 | ] 90 | }, 91 | { 92 | "cell_type": "code", 93 | "execution_count": 16, 94 | "metadata": {}, 95 | "outputs": [ 96 | { 97 | "name": "stdout", 98 | "output_type": "stream", 99 | "text": [ 100 | "---------------------\n", 101 | "Step 4: Encoding categorical data\n", 102 | "X\n", 103 | "[[1.00000000e+00 0.00000000e+00 0.00000000e+00 4.40000000e+01\n", 104 | " 7.20000000e+04]\n", 105 | " [0.00000000e+00 0.00000000e+00 1.00000000e+00 2.70000000e+01\n", 106 | " 4.80000000e+04]\n", 107 | " [0.00000000e+00 1.00000000e+00 0.00000000e+00 3.00000000e+01\n", 108 | " 5.40000000e+04]\n", 109 | " [0.00000000e+00 0.00000000e+00 1.00000000e+00 3.80000000e+01\n", 110 | " 6.10000000e+04]\n", 111 | " [0.00000000e+00 1.00000000e+00 0.00000000e+00 4.00000000e+01\n", 112 | " 6.37777778e+04]\n", 113 | " [1.00000000e+00 0.00000000e+00 0.00000000e+00 3.50000000e+01\n", 114 | " 5.80000000e+04]\n", 115 | " [0.00000000e+00 0.00000000e+00 1.00000000e+00 3.87777778e+01\n", 116 | " 5.20000000e+04]\n", 117 | " [1.00000000e+00 0.00000000e+00 0.00000000e+00 4.80000000e+01\n", 118 | " 7.90000000e+04]\n", 119 | " [0.00000000e+00 1.00000000e+00 0.00000000e+00 5.00000000e+01\n", 120 | " 8.30000000e+04]\n", 121 | " [1.00000000e+00 0.00000000e+00 0.00000000e+00 3.70000000e+01\n", 122 | " 6.70000000e+04]]\n", 123 | "Y\n", 124 | "[0 1 0 0 1 1 0 1 0 1]\n" 125 | ] 126 | }, 127 | { 128 | "name": "stderr", 129 | "output_type": "stream", 130 | "text": [ 131 | "C:\\Users\\dell\\Anaconda3\\envs\\AAA\\lib\\site-packages\\sklearn\\preprocessing\\_encoders.py:371: FutureWarning: The handling of integer data will change in version 0.22. Currently, the categories are determined based on the range [0, max(values)], while in the future they will be determined based on the unique values.\n", 132 | "If you want the future behaviour and silence this warning, you can specify \"categories='auto'\".\n", 133 | "In case you used a LabelEncoder before this OneHotEncoder to convert the categories to integers, then you can now use the OneHotEncoder directly.\n", 134 | " warnings.warn(msg, FutureWarning)\n", 135 | "C:\\Users\\dell\\Anaconda3\\envs\\AAA\\lib\\site-packages\\sklearn\\preprocessing\\_encoders.py:392: DeprecationWarning: The 'categorical_features' keyword is deprecated in version 0.20 and will be removed in 0.22. You can use the ColumnTransformer instead.\n", 136 | " \"use the ColumnTransformer instead.\", DeprecationWarning)\n" 137 | ] 138 | } 139 | ], 140 | "source": [ 141 | "from sklearn.preprocessing import LabelEncoder, OneHotEncoder\n", 142 | "labelencoder_X = LabelEncoder()\n", 143 | "X[ : , 0] = labelencoder_X.fit_transform(X[ : , 0])\n", 144 | "#Creating a dummy variable\n", 145 | "onehotencoder = OneHotEncoder(categorical_features = [0])\n", 146 | "X = onehotencoder.fit_transform(X).toarray()\n", 147 | "labelencoder_Y = LabelEncoder()\n", 148 | "Y = labelencoder_Y.fit_transform(Y)\n", 149 | "print(\"---------------------\")\n", 150 | "print(\"Step 4: Encoding categorical data\")\n", 151 | "print(\"X\")\n", 152 | "print(X)\n", 153 | "print(\"Y\")\n", 154 | "print(Y)" 155 | ] 156 | }, 157 | { 158 | "cell_type": "code", 159 | "execution_count": 17, 160 | "metadata": {}, 161 | "outputs": [ 162 | { 163 | "name": "stdout", 164 | "output_type": "stream", 165 | "text": [ 166 | "---------------------\n", 167 | "Step 5: Splitting the datasets into training sets and Test sets\n", 168 | "X_train\n", 169 | "[[0.00000000e+00 1.00000000e+00 0.00000000e+00 4.00000000e+01\n", 170 | " 6.37777778e+04]\n", 171 | " [1.00000000e+00 0.00000000e+00 0.00000000e+00 3.70000000e+01\n", 172 | " 6.70000000e+04]\n", 173 | " [0.00000000e+00 0.00000000e+00 1.00000000e+00 2.70000000e+01\n", 174 | " 4.80000000e+04]\n", 175 | " [0.00000000e+00 0.00000000e+00 1.00000000e+00 3.87777778e+01\n", 176 | " 5.20000000e+04]\n", 177 | " [1.00000000e+00 0.00000000e+00 0.00000000e+00 4.80000000e+01\n", 178 | " 7.90000000e+04]\n", 179 | " [0.00000000e+00 0.00000000e+00 1.00000000e+00 3.80000000e+01\n", 180 | " 6.10000000e+04]\n", 181 | " [1.00000000e+00 0.00000000e+00 0.00000000e+00 4.40000000e+01\n", 182 | " 7.20000000e+04]\n", 183 | " [1.00000000e+00 0.00000000e+00 0.00000000e+00 3.50000000e+01\n", 184 | " 5.80000000e+04]]\n", 185 | "X_test\n", 186 | "[[0.0e+00 1.0e+00 0.0e+00 3.0e+01 5.4e+04]\n", 187 | " [0.0e+00 1.0e+00 0.0e+00 5.0e+01 8.3e+04]]\n", 188 | "Y_train\n", 189 | "[1 1 1 0 1 0 0 1]\n", 190 | "Y_test\n", 191 | "[0 0]\n" 192 | ] 193 | } 194 | ], 195 | "source": [ 196 | "from sklearn.model_selection import train_test_split\n", 197 | "X_train, X_test, Y_train, Y_test = train_test_split( X , Y , test_size = 0.2, random_state = 0)\n", 198 | "print(\"---------------------\")\n", 199 | "print(\"Step 5: Splitting the datasets into training sets and Test sets\")\n", 200 | "print(\"X_train\")\n", 201 | "print(X_train)\n", 202 | "print(\"X_test\")\n", 203 | "print(X_test)\n", 204 | "print(\"Y_train\")\n", 205 | "print(Y_train)\n", 206 | "print(\"Y_test\")\n", 207 | "print(Y_test)\n" 208 | ] 209 | }, 210 | { 211 | "cell_type": "code", 212 | "execution_count": 18, 213 | "metadata": {}, 214 | "outputs": [ 215 | { 216 | "name": "stdout", 217 | "output_type": "stream", 218 | "text": [ 219 | "---------------------\n", 220 | "Step 6: Feature Scaling\n", 221 | "X_train\n", 222 | "[[-1. 2.64575131 -0.77459667 0.26306757 0.12381479]\n", 223 | " [ 1. -0.37796447 -0.77459667 -0.25350148 0.46175632]\n", 224 | " [-1. -0.37796447 1.29099445 -1.97539832 -1.53093341]\n", 225 | " [-1. -0.37796447 1.29099445 0.05261351 -1.11141978]\n", 226 | " [ 1. -0.37796447 -0.77459667 1.64058505 1.7202972 ]\n", 227 | " [-1. -0.37796447 1.29099445 -0.0813118 -0.16751412]\n", 228 | " [ 1. -0.37796447 -0.77459667 0.95182631 0.98614835]\n", 229 | " [ 1. -0.37796447 -0.77459667 -0.59788085 -0.48214934]]\n", 230 | "X_test\n", 231 | "[[-1. 2.64575131 -0.77459667 -1.45882927 -0.90166297]\n", 232 | " [-1. 2.64575131 -0.77459667 1.98496442 2.13981082]]\n" 233 | ] 234 | } 235 | ], 236 | "source": [ 237 | "from sklearn.preprocessing import StandardScaler\n", 238 | "sc_X = StandardScaler()\n", 239 | "X_train = sc_X.fit_transform(X_train)\n", 240 | "X_test = sc_X.transform(X_test)\n", 241 | "print(\"---------------------\")\n", 242 | "print(\"Step 6: Feature Scaling\")\n", 243 | "print(\"X_train\")\n", 244 | "print(X_train)\n", 245 | "print(\"X_test\")\n", 246 | "print(X_test)" 247 | ] 248 | }, 249 | { 250 | "cell_type": "code", 251 | "execution_count": null, 252 | "metadata": {}, 253 | "outputs": [], 254 | "source": [] 255 | } 256 | ], 257 | "metadata": { 258 | "kernelspec": { 259 | "display_name": "Python 3", 260 | "language": "python", 261 | "name": "python3" 262 | }, 263 | "language_info": { 264 | "codemirror_mode": { 265 | "name": "ipython", 266 | "version": 3 267 | }, 268 | "file_extension": ".py", 269 | "mimetype": "text/x-python", 270 | "name": "python", 271 | "nbconvert_exporter": "python", 272 | "pygments_lexer": "ipython3", 273 | "version": "3.7.0" 274 | } 275 | }, 276 | "nbformat": 4, 277 | "nbformat_minor": 2 278 | } 279 | -------------------------------------------------------------------------------- /homework-04/魏卓其.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 4, 6 | "metadata": { 7 | "collapsed": true 8 | }, 9 | "outputs": [], 10 | "source": [ 11 | "#Day 1: Data Prepocessing\n", 12 | "\n", 13 | "#Step 1: Importing the libraries\n", 14 | "import numpy as np\n", 15 | "import pandas as pd" 16 | ] 17 | }, 18 | { 19 | "cell_type": "code", 20 | "execution_count": 5, 21 | "metadata": {}, 22 | "outputs": [ 23 | { 24 | "name": "stdout", 25 | "output_type": "stream", 26 | "text": [ 27 | "Step 2: Importing dataset\n", 28 | "X\n", 29 | "[['France' 44.0 72000.0]\n", 30 | " ['Spain' 27.0 48000.0]\n", 31 | " ['Germany' 30.0 54000.0]\n", 32 | " ['Spain' 38.0 61000.0]\n", 33 | " ['Germany' 40.0 nan]\n", 34 | " ['France' 35.0 58000.0]\n", 35 | " ['Spain' nan 52000.0]\n", 36 | " ['France' 48.0 79000.0]\n", 37 | " ['Germany' 50.0 83000.0]\n", 38 | " ['France' 37.0 67000.0]]\n", 39 | "Y\n", 40 | "['No' 'Yes' 'No' 'No' 'Yes' 'Yes' 'No' 'Yes' 'No' 'Yes']\n" 41 | ] 42 | } 43 | ], 44 | "source": [ 45 | "#Step 2: Importing dataset\n", 46 | "dataset = pd.read_csv('Data.csv')\n", 47 | "X = dataset.iloc[ : , :-1].values\n", 48 | "Y = dataset.iloc[ : , 3].values\n", 49 | "print(\"Step 2: Importing dataset\")\n", 50 | "print(\"X\")\n", 51 | "print(X)\n", 52 | "print(\"Y\")\n", 53 | "print(Y)" 54 | ] 55 | }, 56 | { 57 | "cell_type": "code", 58 | "execution_count": 8, 59 | "metadata": {}, 60 | "outputs": [ 61 | { 62 | "name": "stdout", 63 | "output_type": "stream", 64 | "text": [ 65 | "---------------------\n", 66 | "Step 3: Handling the missing data\n", 67 | "step2\n", 68 | "X\n", 69 | "[['France' 44.0 72000.0]\n", 70 | " ['Spain' 27.0 48000.0]\n", 71 | " ['Germany' 30.0 54000.0]\n", 72 | " ['Spain' 38.0 61000.0]\n", 73 | " ['Germany' 40.0 63777.77777777778]\n", 74 | " ['France' 35.0 58000.0]\n", 75 | " ['Spain' 38.77777777777778 52000.0]\n", 76 | " ['France' 48.0 79000.0]\n", 77 | " ['Germany' 50.0 83000.0]\n", 78 | " ['France' 37.0 67000.0]]\n" 79 | ] 80 | } 81 | ], 82 | "source": [ 83 | "#Step 3: Handling the missing data\n", 84 | "from sklearn.preprocessing import Imputer\n", 85 | "imputer = Imputer(missing_values = \"NaN\", strategy = \"mean\", axis = 0)\n", 86 | "imputer = imputer.fit(X[ : , 1:3])\n", 87 | "X[ : , 1:3] = imputer.transform(X[ : , 1:3])\n", 88 | "print(\"---------------------\")\n", 89 | "print(\"Step 3: Handling the missing data\")\n", 90 | "print(\"step2\")\n", 91 | "print(\"X\")\n", 92 | "print(X)" 93 | ] 94 | }, 95 | { 96 | "cell_type": "code", 97 | "execution_count": 9, 98 | "metadata": {}, 99 | "outputs": [ 100 | { 101 | "name": "stdout", 102 | "output_type": "stream", 103 | "text": [ 104 | "---------------------\n", 105 | "Step 4: Encoding categorical data\n", 106 | "X\n", 107 | "[[ 1.00000000e+00 0.00000000e+00 0.00000000e+00 4.40000000e+01\n", 108 | " 7.20000000e+04]\n", 109 | " [ 0.00000000e+00 0.00000000e+00 1.00000000e+00 2.70000000e+01\n", 110 | " 4.80000000e+04]\n", 111 | " [ 0.00000000e+00 1.00000000e+00 0.00000000e+00 3.00000000e+01\n", 112 | " 5.40000000e+04]\n", 113 | " [ 0.00000000e+00 0.00000000e+00 1.00000000e+00 3.80000000e+01\n", 114 | " 6.10000000e+04]\n", 115 | " [ 0.00000000e+00 1.00000000e+00 0.00000000e+00 4.00000000e+01\n", 116 | " 6.37777778e+04]\n", 117 | " [ 1.00000000e+00 0.00000000e+00 0.00000000e+00 3.50000000e+01\n", 118 | " 5.80000000e+04]\n", 119 | " [ 0.00000000e+00 0.00000000e+00 1.00000000e+00 3.87777778e+01\n", 120 | " 5.20000000e+04]\n", 121 | " [ 1.00000000e+00 0.00000000e+00 0.00000000e+00 4.80000000e+01\n", 122 | " 7.90000000e+04]\n", 123 | " [ 0.00000000e+00 1.00000000e+00 0.00000000e+00 5.00000000e+01\n", 124 | " 8.30000000e+04]\n", 125 | " [ 1.00000000e+00 0.00000000e+00 0.00000000e+00 3.70000000e+01\n", 126 | " 6.70000000e+04]]\n", 127 | "Y\n", 128 | "[0 1 0 0 1 1 0 1 0 1]\n" 129 | ] 130 | } 131 | ], 132 | "source": [ 133 | "#Step 4: Encoding categorical data\n", 134 | "from sklearn.preprocessing import LabelEncoder, OneHotEncoder\n", 135 | "labelencoder_X = LabelEncoder()\n", 136 | "X[ : , 0] = labelencoder_X.fit_transform(X[ : , 0])\n", 137 | "#Creating a dummy variable\n", 138 | "onehotencoder = OneHotEncoder(categorical_features = [0])\n", 139 | "X = onehotencoder.fit_transform(X).toarray()\n", 140 | "labelencoder_Y = LabelEncoder()\n", 141 | "Y = labelencoder_Y.fit_transform(Y)\n", 142 | "print(\"---------------------\")\n", 143 | "print(\"Step 4: Encoding categorical data\")\n", 144 | "print(\"X\")\n", 145 | "print(X)\n", 146 | "print(\"Y\")\n", 147 | "print(Y)" 148 | ] 149 | }, 150 | { 151 | "cell_type": "code", 152 | "execution_count": 10, 153 | "metadata": {}, 154 | "outputs": [ 155 | { 156 | "name": "stdout", 157 | "output_type": "stream", 158 | "text": [ 159 | "---------------------\n", 160 | "Step 5: Splitting the datasets into training sets and Test sets\n", 161 | "X_train\n", 162 | "[[ 0.00000000e+00 1.00000000e+00 0.00000000e+00 4.00000000e+01\n", 163 | " 6.37777778e+04]\n", 164 | " [ 1.00000000e+00 0.00000000e+00 0.00000000e+00 3.70000000e+01\n", 165 | " 6.70000000e+04]\n", 166 | " [ 0.00000000e+00 0.00000000e+00 1.00000000e+00 2.70000000e+01\n", 167 | " 4.80000000e+04]\n", 168 | " [ 0.00000000e+00 0.00000000e+00 1.00000000e+00 3.87777778e+01\n", 169 | " 5.20000000e+04]\n", 170 | " [ 1.00000000e+00 0.00000000e+00 0.00000000e+00 4.80000000e+01\n", 171 | " 7.90000000e+04]\n", 172 | " [ 0.00000000e+00 0.00000000e+00 1.00000000e+00 3.80000000e+01\n", 173 | " 6.10000000e+04]\n", 174 | " [ 1.00000000e+00 0.00000000e+00 0.00000000e+00 4.40000000e+01\n", 175 | " 7.20000000e+04]\n", 176 | " [ 1.00000000e+00 0.00000000e+00 0.00000000e+00 3.50000000e+01\n", 177 | " 5.80000000e+04]]\n", 178 | "X_test\n", 179 | "[[ 0.00000000e+00 1.00000000e+00 0.00000000e+00 3.00000000e+01\n", 180 | " 5.40000000e+04]\n", 181 | " [ 0.00000000e+00 1.00000000e+00 0.00000000e+00 5.00000000e+01\n", 182 | " 8.30000000e+04]]\n", 183 | "Y_train\n", 184 | "[1 1 1 0 1 0 0 1]\n", 185 | "Y_test\n", 186 | "[0 0]\n" 187 | ] 188 | } 189 | ], 190 | "source": [ 191 | "#Step 5: Splitting the datasets into training sets and Test sets\n", 192 | "from sklearn.model_selection import train_test_split\n", 193 | "X_train, X_test, Y_train, Y_test = train_test_split( X , Y , test_size = 0.2, random_state = 0)\n", 194 | "print(\"---------------------\")\n", 195 | "print(\"Step 5: Splitting the datasets into training sets and Test sets\")\n", 196 | "print(\"X_train\")\n", 197 | "print(X_train)\n", 198 | "print(\"X_test\")\n", 199 | "print(X_test)\n", 200 | "print(\"Y_train\")\n", 201 | "print(Y_train)\n", 202 | "print(\"Y_test\")\n", 203 | "print(Y_test)" 204 | ] 205 | }, 206 | { 207 | "cell_type": "code", 208 | "execution_count": 11, 209 | "metadata": {}, 210 | "outputs": [ 211 | { 212 | "name": "stdout", 213 | "output_type": "stream", 214 | "text": [ 215 | "---------------------\n", 216 | "Step 6: Feature Scaling\n", 217 | "X_train\n", 218 | "[[-1. 2.64575131 -0.77459667 0.26306757 0.12381479]\n", 219 | " [ 1. -0.37796447 -0.77459667 -0.25350148 0.46175632]\n", 220 | " [-1. -0.37796447 1.29099445 -1.97539832 -1.53093341]\n", 221 | " [-1. -0.37796447 1.29099445 0.05261351 -1.11141978]\n", 222 | " [ 1. -0.37796447 -0.77459667 1.64058505 1.7202972 ]\n", 223 | " [-1. -0.37796447 1.29099445 -0.0813118 -0.16751412]\n", 224 | " [ 1. -0.37796447 -0.77459667 0.95182631 0.98614835]\n", 225 | " [ 1. -0.37796447 -0.77459667 -0.59788085 -0.48214934]]\n", 226 | "X_test\n", 227 | "[[-1. 2.64575131 -0.77459667 -1.45882927 -0.90166297]\n", 228 | " [-1. 2.64575131 -0.77459667 1.98496442 2.13981082]]\n" 229 | ] 230 | } 231 | ], 232 | "source": [ 233 | "#Step 6: Feature Scaling\n", 234 | "from sklearn.preprocessing import StandardScaler\n", 235 | "sc_X = StandardScaler()\n", 236 | "X_train = sc_X.fit_transform(X_train)\n", 237 | "X_test = sc_X.transform(X_test)\n", 238 | "print(\"---------------------\")\n", 239 | "print(\"Step 6: Feature Scaling\")\n", 240 | "print(\"X_train\")\n", 241 | "print(X_train)\n", 242 | "print(\"X_test\")\n", 243 | "print(X_test)" 244 | ] 245 | }, 246 | { 247 | "cell_type": "code", 248 | "execution_count": null, 249 | "metadata": { 250 | "collapsed": true 251 | }, 252 | "outputs": [], 253 | "source": [] 254 | } 255 | ], 256 | "metadata": { 257 | "kernelspec": { 258 | "display_name": "Python 3", 259 | "language": "python", 260 | "name": "python3" 261 | }, 262 | "language_info": { 263 | "codemirror_mode": { 264 | "name": "ipython", 265 | "version": 3 266 | }, 267 | "file_extension": ".py", 268 | "mimetype": "text/x-python", 269 | "name": "python", 270 | "nbconvert_exporter": "python", 271 | "pygments_lexer": "ipython3", 272 | "version": "3.6.2" 273 | } 274 | }, 275 | "nbformat": 4, 276 | "nbformat_minor": 2 277 | } 278 | -------------------------------------------------------------------------------- /homework-04/黄禹霏.ipynb: -------------------------------------------------------------------------------- 1 | #Day 1: Data Prepocessing 2 | 3 | #Step 1: Importing the libraries 4 | import numpy as np 5 | import pandas as pd 6 | 7 | #Step 2: Importing dataset 8 | dataset = pd.read_csv('../datasets/Data.csv') 9 | X = dataset.iloc[ : , :-1].values 10 | Y = dataset.iloc[ : , 3].values 11 | print("Step 2: Importing dataset") 12 | print("X") 13 | print(X) 14 | print("Y") 15 | print(Y) 16 | 17 | #Step 3: Handling the missing data 18 | from sklearn.preprocessing import Imputer 19 | imputer = Imputer(missing_values = "NaN", strategy = "mean", axis = 0) 20 | imputer = imputer.fit(X[ : , 1:3]) 21 | X[ : , 1:3] = imputer.transform(X[ : , 1:3]) 22 | print("---------------------") 23 | print("Step 3: Handling the missing data") 24 | print("step2") 25 | print("X") 26 | print(X) 27 | 28 | #Step 4: Encoding categorical data 29 | from sklearn.preprocessing import LabelEncoder, OneHotEncoder 30 | labelencoder_X = LabelEncoder() 31 | X[ : , 0] = labelencoder_X.fit_transform(X[ : , 0]) 32 | #Creating a dummy variable 33 | onehotencoder = OneHotEncoder(categorical_features = [0]) 34 | X = onehotencoder.fit_transform(X).toarray() 35 | labelencoder_Y = LabelEncoder() 36 | Y = labelencoder_Y.fit_transform(Y) 37 | print("---------------------") 38 | print("Step 4: Encoding categorical data") 39 | print("X") 40 | print(X) 41 | print("Y") 42 | print(Y) 43 | 44 | #Step 5: Splitting the datasets into training sets and Test sets 45 | from sklearn.model_selection import train_test_split 46 | X_train, X_test, Y_train, Y_test = train_test_split( X , Y , test_size = 0.2, random_state = 0) 47 | print("---------------------") 48 | print("Step 5: Splitting the datasets into training sets and Test sets") 49 | print("X_train") 50 | print(X_train) 51 | print("X_test") 52 | print(X_test) 53 | print("Y_train") 54 | print(Y_train) 55 | print("Y_test") 56 | print(Y_test) 57 | 58 | #Step 6: Feature Scaling 59 | from sklearn.preprocessing import StandardScaler 60 | sc_X = StandardScaler() 61 | X_train = sc_X.fit_transform(X_train) 62 | X_test = sc_X.transform(X_test) 63 | print("---------------------") 64 | print("Step 6: Feature Scaling") 65 | print("X_train") 66 | print(X_train) 67 | print("X_test") 68 | print(X_test) 69 | -------------------------------------------------------------------------------- /homework-05/lixiaoyu.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import numpy as np 3 | import matplotlib.pyplot as plt 4 | 5 | dataset=pd.read_csv('C:\Users\李校宇\Desktop\studentscores.csv') 6 | X = dataset.iloc[ : , : 1].values 7 | Y = dataset.iloc[ : , 1].values 8 | 9 | from sklearn.model_selection import train_test_split 10 | X_train,X_test,Y_train,Y_test = train_test_split(X,Y,test_size =1/4,random_state=0) 11 | 12 | from sklearn.linear_model import LinearRegression 13 | regressor=LinearRegression() 14 | regressor=regressor.fit(X_train,Y_train) 15 | 16 | Y_pred=regressor.predict(X_test) 17 | 18 | plt.scatter(X_train,Y_train,color='red') 19 | plt.plot(X_train,regressor.predict(X_train),color='blue') 20 | plt.show() 21 | 22 | plt.scatter(X_test,Y_test,color='red') 23 | plt.plot(X_test,regressor.predict(X_test),color='blue') 24 | plt.show() 25 | -------------------------------------------------------------------------------- /homework-05/zhangbo.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 4, 6 | "metadata": {}, 7 | "outputs": [ 8 | { 9 | "data": { 10 | "image/png": "iVBORw0KGgoAAAANSUhEUgAAAXQAAAD8CAYAAABn919SAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDMuMC4zLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvnQurowAAHxtJREFUeJzt3XmUVOW19/HvrqZBmS4gBBkCqGE55lWSlqh40TjdmPiCiSYvigZNlAzG6I2JQ4ga4sIhGmMuer22ECWmFaJiQFduIqAomtjaDFEZBGUSRGkQIgiC3bXfP+p0dTc0dFXXcE5V/T5rsZrzUHVqi/Dj6ec5Zx9zd0REpPDFwi5ARESyQ4EuIlIkFOgiIkVCgS4iUiQU6CIiRUKBLiJSJBToIiJFQoEuIlIkFOgiIkWiXT4/rGfPnj5o0KB8fqSISMGbP3/+Jnfv1drr8hrogwYNoqamJp8fKSJS8MxsTSqv05KLiEiRUKCLiBQJBbqISJFQoIuIFAkFuohIkVCgi4iko6oKBg2CWCzxtaoq7IqS8nrZoohIQauqgrFjYceOxPGaNYljgNGjw6sroBm6iEiqxo1rDPMGO3YkxiNAgS4ikqq1a9MbzzMFuohIqgYMSG88zxToIiKpmjABOnZsPtaxY2I8AhToIiKpGj0aKith4EAwS3ytrNznhmh9fT1jxlzCjBkz8lKeuXtePgigoqLC1ZxLRErB008/zYgRIwE45phjeOON19t8LjOb7+4Vrb1Oly2KiGTRxx9/zGc+05sdwdUwJ554Ii+9NC8vn60lFxGRLJk4cSKdO3dJhvnChQv4+99fJhbLT9Rqhi4ikqH333+fPn36Jo+/+93vMGnSpLzXoRm6iEgGrrzyymZhvm7du6GEOSjQRUTa5M0338Qsxr333gfAnXf+Gvc4/fr1C60mLbmIiKShvr6edu3Kk8dlZWVs2fIhXbp0CbGqBM3QRURSdO211zYL8yeffIK6uk8jEeagGbqISKu2bt1K9+49mo198slOOnToEFJFLdMMXURkP0444cRmYX7vvRNxj0cuzEGBLiLSoqVLl2IWo7q6OjnmHueKK65I/SR5fhiGAl1EZA9mMY466ujk8dy5z+MeT+8kDQ/DWLMG3BsfhpHDUFegi4gEZs6ciVljLHbo0AH3OKecckr6JwvhYRjaFBWRkufuxGJlzcbWrFnNgEz6nIfwMAzN0EWkpN1yyy3NwnzEiBG4xzMLcwjlYRgKdBGJjjxuIm7fvh2zGDfddHNy7OOPtzNjxp+z8wEhPAxDgS4i0ZDHTUSzGF26dE0e33HH7bjH6bhnAGcizYdhZIMecCEi4amqSmwSrl2bmJXX1+/9moEDYfXqrHxcTU0Nxx8/tNlYPF6PmWXl/LmS6gMuNEMXkXDsOSNvKcwha5uIZrFmYd5wg1DUwzwduspFRMLR0mV9LclwE/GBBx7g+9//QbOxtK8pLxAKdBEJRyoz7ww2EVu6FLG6+hWGDh26j3cUPi25iEg49jXzLivLeBPx/PO/uVeYu8eLOsxBM3QRCcuECYk19KbLLh07ZnQlyLZt2+ja9d+ajdXWbqRnz56ZVFowNEMXkXBk+bI+s1izMB82bBju8ZIJc9AMXUTCNHp0xtdlL1myhKOPPqbZWH19HbFY6c1XS++/WESKhlmsWZjfeusE3OMlGeagGbqIFKBHH32U0aMvajZWrJcipqM0/xkTkYJlFmsW5s89N2fvMM/zgyWiQoEuIgXh8ssvb9arHBKz8i9/+cvNXxjCgyWiIqVAN7P/NLPFZvammT1mZgeY2SFmVm1mK8xsmpm1z3WxIlJ6du7ciVmMSZMmJ8fWr1+37yWWEB4sERWtBrqZ9QN+DFS4+zFAGTAKuAP4rbsPBrYA381loSJSenr27EXHjp2Sx0cddRTucfr27bvvN4XwYImoSHXJpR1woJm1AzoCG4DTgCeCX58CnJv98kSkFK1cuRKzGJs3b06O7d69i8WL32z9zSE8WCIqWg10d18P3AWsJRHk/wLmA1vdvS542TqgX0vvN7OxZlZjZjW1tbXZqVpEipZZjMMO+1zy+Nprf4Z7nPLy8tROEMKDJaIilSWX7sBI4BCgL9AJOLuFl7bYWN3dK929wt0revXqlUmtIlLEqqqqWtz0vOOOO9I7UQgPloiKVK5DPwNY5e61AGY2HTgJ6GZm7YJZen/gvdyVKSLFbM8gf+yxRxk1alTbT5iFO1ALUSqBvhY4wcw6AjuB04Ea4HngfGAqMAaYkasiRaQ4DRnyBRYtWtRsTDcItV0qa+jVJDY/FwBvBO+pBK4DfmJmbwMHAZP3eRIRkSZ27dqFWaxZmL/11jKFeYZSuvXf3W8Gbt5jeCVQ3M2FRSTr9lxeAc3Ks0V3iopIXixbtmyvMN+27SOFeRYp0EUk571PzGIceeRRyeN+/frhHqdz585Z/ZxSp0AXKXU57H0yefLkFi9FXLfu3YzPLXtToIuUuhz1PjGLcdlllyePf/GLcdFZXinSbozqhy5S6rLc++Skky7jH//4OdAT2AREbNOz4TuShn/EGr4jgYK/dl0zdJFSl6XeJ59+WofZLP7xj0nAocBJvPDC3GiFORR1N0YFukipy0LvE7MzaN++HXBmMPL/cP8zw4cPz1qZWVPE3RgV6CKlLoPeJ8uXr8FsAzA7GFnMBx98iPu0nJackSLuxqg1dBFpU+8Ts+8Av08ex2L/Tn39vCwXlgMTJjRfQ4ei6caoGbqIpOWhh2ZiBo1hPo36ei+MMIei7saoGbqIpMzsduD65PGFF95IVdUt4RXUVkXajVEzdBFp1YgRPw1m5Q1hPh53CjPMi5gCXUT2qb4+jtkMnn76ruTY1KnPkujXJ1GjJRcRaZHZvwPzSDywDOBi3B8BzgqvKNkvzdBFpJl16z7A7B0SYQ6witWrNwRhLlGmGbqIJJmNBqqA3sHIKbi/EGJFkg7N0EWE6dOfCzY9G5pUPU1dXVxhXmAU6CIlzuxmzjvvtOTxV75yNe7/l7IyxUOh0f8xkRJ1ySXjg1n5+GDk17jD//7vPZmduEhb0xYCraGLlJh43Ckrm0rTxwQ/8MB0xo69NvOTF3Fr2kJg7p63D6uoqPCampq8fZ6INHfAAaeya9fcJiOX4/5g9j5g0KBEiO9p4EBYvTp7n1NizGy+u1e09jotuYiUgM2b/4XZm03C/AOWLFmV3TCHom5NWwi05CJS5My+CTwO/Fswchbuz+bmwwYMaHmGXgStaQuBZugiUZLFDcXZs6uDTc/Hg5E5fPLJp7kLc8jKwzKk7RToIlHRsKG4Zg24N24otiHUza7lzDO/lDyuqLgU99Pp0KE8mxXvrYhb0xYCbYqKREUWNhSvueYe7r776iYjE3G/MhvVSYi0KSpSaDLcUDR7qFmY3377FIV5iVGgi0RFG5912b//yGCt/NJg5Erc4brrxmSzOikACnSRqEhzQ3Hbth2YVbN+/Yxg5CNee20J7hNzW6dElgJdJCrS2FA0G0HXrh2Bho3Pc3DvSkXFUXktWaJFm6IiBeSVV97gxBMPBToFI39n+/YhdOp0YJhlSY6luimqG4tECoTZVcDvkseDBn2DVaumh1eQRI6WXEQibsKE3webng1hPgl3FOayFwW6SISZ/Te/+MV3ksfXXTcR98tCrEiiTIEuEkHHHXdRMCv/YTDyM9zh9tt1XbnsmwJdJJ9a6dXyySe7MZvLP//5x2CkjjlzXsP9znxXKgVIm6Ii+dLKwx/M/gP4G3Bq8IbzcH8SOD7vpUph0gxdJF/GjWsM8wY7drD4ZzditolEmAMsZPPmj4IwF0mdZugi+dJCTxZjLGx4IHncufMZbNs2O59VSRFJaYZuZt3M7AkzW2ZmS83sRDPrYWazzGxF8LV7rosViYS29ixv0pPlv+mJ4UBDmP8RdxTmkpFUl1x+B/zV3Y8AjgWWAtcDc9x9MDAnOBYpbpn0LA96tRjOFdQmhy879VLcL8ph0VIqWg10M+sKDAcmA7j7bnffCowEpgQvmwKcm6siRSJjH+vgjBvX6ltPmLgW2/Fxk5Fb8D9W8eDzD2W3RilZrfZyMbPjgEpgCYnZ+XzgKmC9u3dr8rot7r7XsouZjQXGAgwYMOCLa1pq4C9SKGKxxMx8T2YQj7f4lrq6esrLy5qNVVW9xIUXnpyLCqUIZfMBF+2ALwD3u/sQ4GPSWF5x90p3r3D3il69eqX6NpFoSrNnudkv9wjzP+OOwlxyIpVAXwesc/fq4PgJEgH/gZn1AQi+bsxNiSIRkmLP8uXL3wvu9PxlcmzNmlrctTIpudNqoLv7+8C7ZnZ4MHQ6ieWXmUDDI1HGADNaeLtIcUmhZ7nZbA4/vG+TN12LOwwYoO9QJbdS6ocerKNPAtoDK0k86yoG/AkYAKwFvunuH+7vPOqHLsWssnIO3/ve6c3G6uudWMxCqkiKRVb7obv7IqClk53ewphIyUksrzT+dTjrrN/wt79dAyjMJX90679IBs4+++4gzBu5E4S5SH4p0EXaIB53zOCvf/1Jcuy++55t8YpGkXxRLxeRNJn9FLirychzuJ8GnBVSRSIJmqGLpGj16g+C5ZXGMF+6dH0Q5iLhU6CLpMDsSQ45pHeTkfG4wxFH9AutJpE9KdBF9mPy5GeDWfl5ybFdu+pwvzm0mkT2RYEuxaOtbW33wWwbl13WuC5+8ME/wB3at9fWk0STAl2KQyZtbfdw5pm/CmblXZJj7rBhw/3Zq1ckBxToUhwyaGvblBnMnn1T8vinP31IlyJKwdD3jlIcWni8237H92D2A6DpDLyGxJ3Wl2ZamUjeaIYuxSHNtrYNNm7cGiyvNIb5vHmLSaFthkjkKNClOKTY1rYps4fo3btbk5G7cYeTTz46NzWK5JgCXYpDCm1tGzz11N+DWXnjcsr27Z/g/pO9XitSSLSGLsVj9OgWA7wps7XAScnj8vJL2L37YeCAnJYmkg+aoUtJuOCC3wSz8sY1dXeCMBcpDgp0KXpmMHVqYzvbSy75L12KKEVJSy5StMy+Dfyhycg7uB8G/DikikRySzN0KTrbtu0Mllcaw/zpp18NwlykeCnQpaiYTaRr1wObjDyIO5xzztDQahLJFwW6FIXnnvtnMCu/Mjm2efM23C8PrSaRfNMauhQ8s9eBY5uMXI77gzRtriVSCjRDl4J15ZWVwaz8/yTH3AnCXKT0KNClIJnBvfeOTR6fc86tuhRRSp4CXQpKjx5jg1l5g824w9NP/zy9E2X5YRgiUaBAl4Kwa9enmMGWLZXJsUceeR73g9I/WRYfhiESJQp0iTyz2zjggPImI1Nxh4su+nLbTpilh2GIRI0CXSJr0aKVwfLKDcmxd9/dhPuozE6c4cMwRKJKgS6RZPYiQ4Yc2mTkatyhf/+emZ+8jQ/DEIk6BbpEyh13PBXMyocnx+rrHfd7svchbXgYhkghUKBLZJjB9dd/PXl8xhm34Q6xmO3nXW2QxsMwRAqJeR4v3q2oqPCampq8fZ4Uhi9+8SYWLPhVk5F63MtCq0ckasxsvqfwoFvN0CU09fVxzGgW5vff/zeFuUgbqZeLhMLsBuC2JiN/wf2rwH+EVJFI4dMMXfLq7bc3BJuejWG+fPl7QZiLSCYU6JI3Zs8weHCfJiPjcIfBg/uGVpNIMVGgl5KQ+pdUVj4bzMrPSY7V1cVx12WCItmkNfRS0dC/pOGW94b+JZDTy/XM6oCzksfHHXcjCxfeguYSItmnv1WlIs/9S84667ZgVt44Z3AnCPM0qCuiSMoU6KUiT/1L4nHHDGbNauy/cuut09vWq1xdEUXSknKgm1mZmS00s2eC40PMrNrMVpjZNDNrn7syJWN56F9idhVlZU3v6nwZd7jhhm+07YTqiiiSlnRm6FcBS5sc3wH81t0HA1uA72azMMmyHPYv2bDhw2B55XfJsQULVuI+LLMTqyuiSFpSCnQz6w98DZgUHBtwGvBE8JIpwLm5KFCyJEf9S8weo2/fHk1GbsedPToltpG6IoqkJdUZ+j3AtUA8OD4I2OrudcHxOqBflmuTbBs9Glavhng88TWDMH/88ZeDWfkFybFPPvkU9+szrbKRuiKKpKXVQDezc4CN7j6/6XALL21x28vMxppZjZnV1NbWtrFMiRKzWr71rcbllIEDf4Y7dOhQvp93tYG6IoqkpdVui2Z2G3AxUAccAHQFniLRdONgd68zsxOBX7r7fhtxqNtiYbvggv9i6tQfNxvLY7NOkZKVtW6L7n6Du/d390HAKOA5dx8NPA+cH7xsDDAjg3ol4sxoFubXXPMHhblIxGRyHfp1wE/M7G0Sa+qTs1OSREl5+Q+CtfIGb+IOd9317bBKEpF9SCvQ3X2uu58T/Hyluw9198+5+zfdfVduSpQwbNmyHTOoq7s/OfbCC4txPybEqkRkf3SnqOzFrJIePTo3GbkXdxg+/OjQahKR1inQJWnWrEXB8srY5NhHH+3E/Ueh1SQiqVO3RQHAbAVwXPK4c+cfsW3bvcCBodUkIunRDL3E/ehHk4NZ+eDkmDtBmItIIVGglzAzuO++xhY83/nO/6R+KaLa2opEjpZcSlDv3lexcePvmoy8i/tnge+ndoKQHpYhIvunGXoJ2blzN2Y0C/MZM14NwjwNamsrEkkK9BJh9hs6dmzasv5h3GHEiKHpn0xtbUUiSYFe5F5/fW2w6XlNcmzz5u24X9L2k6qtrUgkKdCLmNlrHHtsY8j263cr7uxx01AbqK2tSCQp0IvQPfc8G8zKj0+OucO6dT/Pzgeora1IJLXaPjeb1D4392yPTvWXX/4IlZUXh1OMiGRF1trnSmE444yJe4T5dtxRmIuUEAV6gaurq8cM5sy5Mjn21FPzcc9wnVxECo4CvYCZ/ZLy8rImI9Nxh3PP/WJoNYlIeBToBejttz8Illd+mRxbv34L7t8IqyQRiQAFehj21wellR4pZrMZPLh38rhLl1/hDn37ds9dTSJSENTLJd/21wcF9vlrU+oGcsklJwNnJF9aX+/EYjfltiZdiihSMHTZYr4NGpQIzD0NHJj42sKvGc3/H51//oM8/vjl+alp9ersfY6ItEmqly1qhp5vafRB+RZn8Dizmo0l/v3NYpinWZOIRJfW0PNtf31Qgl+Lk5iVNw3zhx9+KfVe5dmsSUQKhgI93/bXB2XCBL4Wu46yZkssc/A/VjFmzMnh1CQiBUNLLvnWsMk4blxiSWPAAJgwgQ/P/gYHHdT8+Z1v9z2cw359U+43JvdRkzZERQqLNkUjoF+/V3jvvROSx6ecMpe5c0/N/MRVVQppkSKgTdECUF39Hiec0BdoDPNPP43Trt2pmZ9clyKKlBytoYekXbv1QZgn3HBDNe7Qrl2W/pfoMXEiJUcz9Dx7+eV1nHxyf6Bfciyx6vWl7H6QLkUUKTmaoefRoYe+HIR5wmuvbdCliCKSNQr0PPjDH5ZgBqtWDQPg4otfxB0qKvrs/eJs9VTRpYgiJUdLLjm0e3c93bqtYOfOowAw28SmTZ3o0WN4y2/I5kamLkUUKTmaoe9LhjPl6657hQ4dyti58wgAJkyYTzzekx49Dtz3m7K9kTl6dKIXSzye+KowFylqmqG3JIOZ8vvvb6dPnw40XIrYrdtCamuPpV27FB46oY1MEcmAZugtaeNMeeTIufTp0xkoB2D69BVs2TIk9UsRtZEpIhlQoLckzZlyTc0GzGDmzFMB+PznX8Advv71wel9rjYyRSQDCvSWpDFTPvLIeRx/fOPVKgsXfsDrr5/Sts8dPRoqKxN9yM0SXysrtfYtIilRoLckhZnytGlvYQbLlv07AOedl5iVH3dcbzKijUwRaSNtirZkP5f81dXF6d59Mdu3fz548cds3Gj06tXGWbmISJZohr4vLcyUx49/jfLyWDLMb7zxVdw70atXx/2eai96ILOI5IBm6CnYtGkHvXrVAccD0KnTYj788Ajatx+a/snUBVFEcqTVGbqZfdbMnjezpWa22MyuCsZ7mNksM1sRfO2e+3Lzb9SoF4MZeFcAqqqWsX370bRvX9a2E6oLoojkSCpLLnXANe5+JIm7Za4ws6OA64E57j4YmBMcR0sGSxsrV27EbCvTpiVu0x88OPFMzwsvPCKzmnTzkIjkSKuB7u4b3H1B8PNtwFISvV9HAlOCl00Bzs1VkW3SsLSxZk2iP23D0kYKoT5y5P9w2GE7gG4AVFdvYPnyLD3TUzcPiUiOpLUpamaDgCFANdDb3TdAIvSBz2S7uIy0YWlj3ry3MKti5szvAzv44Q8fxR2GDm2hK2Jb6eYhEcmRlAPdzDoDTwJXu/tHabxvrJnVmFlNbW1tW2psmzSWNuJx5+ij72T48J7ANzEbT23tZ7nvvguzX5duHhKRHEkp0M2snESYV7n79GD4AzPrE/x6H2BjS+9190p3r3D3il69emWj5tSkuLTxyCMvU1Y2myVLfgYs47e/nUs8fjM9e3bJXW26eUhEciCVq1wMmAwsdfe7m/zSTGBM8PMxwIzsl5eBVpY2tm//hK5db+Hb3x4CfIm+fSewa9eXuPrqs/Jfq4hIFqQyQx8GXAycZmaLgh9fBW4HzjSzFcCZwXF07Gdp46ab/kyXLkvYtu1GYBYzZ77D+vXjaN9el+WLSOEyz9lDLfdWUVHhNTU16b2pqiprT91Zu3YTAwc+DFwN1DJs2J948cUfE4tZm84nIpIPZjbf3Stae120b/3P4NLDPY0aNYmBA/8F/BR4mEWLPuWll65SmItI0Yh2oGfhrspXX30Hs4eZNu0yoJ5LL52C+2Uce6yu+xaR4hLtReMM7qp0d0aMGMkzz/QFJgK3smHDDzn44DGtvVVEpCBFe4bexrsq582bRyxWxjPPPAM8yJ13/g33n3Pwwd2yX6OISEREe4Y+YULzzoSw37sqd+/ezRFHHMmqVasA+NznPseSJYspLy/PR7UiIqGK9gw9jbsqq6qq6NDhgGSYz5v3IitWLFeYi0jJiPYMHRLhvZ/LFLdu3Ur37j2Sx+eeey7Tpz9J4n4oEZHSEe0ZeivGjx/fLMyXL3+Lp56arjAXkZIU/Rl6C1atWsWhhx6WPL7++uu47bbbQqxIRCR8BRXo7s4FF1zItGnTkmObNtVy0EEHhViViEg0FMySS3V1NbFYWTLMf//7ybjHFeYiIoGCmKG/9dZbnHDCiQD07duXlSvfoUOHDiFXJSISLQUxQ+/WrRunnnoqs2fPYv36dQpzEZEWFMQMvXfv3jz//HNhlyEiEmkFMUMXEZHWKdBFRIqEAl1EpEgo0EVEioQCXUSkSCjQRUSKhAJdRKRIKNBFRIqEuXv+PsysFliTxlt6AptyVE5bRbEmiGZdUawJollXFGuCaNYVxZogt3UNdPderb0or4GeLjOrcfeKsOtoKoo1QTTrimJNEM26olgTRLOuKNYE0ahLSy4iIkVCgS4iUiSiHuiVYRfQgijWBNGsK4o1QTTrimJNEM26olgTRKCuSK+hi4hI6qI+QxcRkRRFMtDN7PdmttHM3gy7lgZm9lkze97MlprZYjO7KgI1HWBmr5rZP4OaxoddUwMzKzOzhWb2TNi1NDCz1Wb2hpktMrOasOtpYGbdzOwJM1sW/Pk6MeR6Dg9+jxp+fGRmV4dZUwMz+8/gz/qbZvaYmR0QgZquCupZHPbvUySXXMxsOLAd+IO7HxN2PQBm1gfo4+4LzKwLMB84192XhFiTAZ3cfbuZlQMvAVe5+yth1dTAzH4CVABd3f2csOuBRKADFe4eqWuYzWwKMM/dJ5lZe6Cju28Nuy5I/MMMrAe+5O7p3EOSi1r6kfgzfpS77zSzPwF/cfeHQ6zpGGAqMBTYDfwV+IG7rwijnkjO0N39ReDDsOtoyt03uPuC4OfbgKVAv5BrcnffHhyWBz9C/xfazPoDXwMmhV1L1JlZV2A4MBnA3XdHJcwDpwPvhB3mTbQDDjSzdkBH4L2Q6zkSeMXdd7h7HfAC8PWwiolkoEedmQ0ChgDV4VaSXNpYBGwEZrl76DUB9wDXAvGwC9mDA8+a2XwzGxt2MYFDgVrgoWCJapKZdQq7qCZGAY+FXQSAu68H7gLWAhuAf7n7s+FWxZvAcDM7yMw6Al8FPhtWMQr0NJlZZ+BJ4Gp3/yjsety93t2PA/oDQ4NvAUNjZucAG919fph17MMwd/8CcDZwRbC0F7Z2wBeA+919CPAxcH24JSUEyz8jgMfDrgXAzLoDI4FDgL5AJzO7KMya3H0pcAcwi8Ryyz+BurDqUaCnIVinfhKocvfpYdfTVPBt+lzgKyGXMgwYEaxXTwVOM7M/hltSgru/F3zdCDxFYt0zbOuAdU2+s3qCRMBHwdnAAnf/IOxCAmcAq9y91t0/BaYDJ4VcE+4+2d2/4O7DSSwVh7J+Dgr0lAUbkJOBpe5+d9j1AJhZLzPrFvz8QBJ/4JeFWZO73+Du/d19EIlv159z91BnUQBm1inYzCZY0jiLxLfLoXL394F3zezwYOh0ILSN9j1cQESWWwJrgRPMrGPw9/F0EntZoTKzzwRfBwDfIMTfs3ZhffD+mNljwKlATzNbB9zs7pPDrYphwMXAG8GaNcDP3f0vIdbUB5gSXIkQA/7k7pG5TDBiegNPJXKAdsCj7v7XcEtKuhKoCpY4VgKXhlwPwXrwmcD3wq6lgbtXm9kTwAISyxoLicDdmcCTZnYQ8ClwhbtvCauQSF62KCIi6dOSi4hIkVCgi4gUCQW6iEiRUKCLiBQJBbqISJFQoIuIFAkFuohIkVCgi4gUif8PwolKM54b4zgAAAAASUVORK5CYII=\n", 11 | "text/plain": [ 12 | "
" 13 | ] 14 | }, 15 | "metadata": { 16 | "needs_background": "light" 17 | }, 18 | "output_type": "display_data" 19 | } 20 | ], 21 | "source": [ 22 | "# Data Preprocessing\n", 23 | "import pandas as pd\n", 24 | "import numpy as np\n", 25 | "import matplotlib.pyplot as plt\n", 26 | "\n", 27 | "dataset = pd.read_csv('D:\\datasets\\studentscores.csv')\n", 28 | "X = dataset.iloc[ : , : 1 ].values\n", 29 | "Y = dataset.iloc[ : , 1 ].values\n", 30 | "print(\"X\")\n", 31 | "print(X)\n", 32 | "print(\"Y\")\n", 33 | "print(Y)\n", 34 | "\n", 35 | "\n", 36 | "from sklearn.model_selection import train_test_split\n", 37 | "X_train, X_test, Y_train, Y_test = train_test_split( X, Y, test_size = 1/4, random_state = 0) \n", 38 | "\n", 39 | "# Fitting Simple Linear Regression Model to the training set\n", 40 | "from sklearn.linear_model import LinearRegression\n", 41 | "regressor = LinearRegression()\n", 42 | "regressor = regressor.fit(X_train, Y_train)\n", 43 | "\n", 44 | "# Predecting the Result\n", 45 | "Y_pred = regressor.predict(X_test)\n", 46 | "\n", 47 | "# Visualising the Training results\n", 48 | "plt.scatter(X_train , Y_train, color = 'red')\n", 49 | "plt.plot(X_train , regressor.predict(X_train), color ='blue')\n", 50 | "\n", 51 | "# Visualizing the test results\n", 52 | "plt.scatter(X_test , Y_test, color = 'red')\n", 53 | "plt.plot(X_test , regressor.predict(X_test), color ='blue')\n", 54 | "plt.show()" 55 | ] 56 | }, 57 | { 58 | "cell_type": "code", 59 | "execution_count": null, 60 | "metadata": {}, 61 | "outputs": [], 62 | "source": [] 63 | }, 64 | { 65 | "cell_type": "code", 66 | "execution_count": null, 67 | "metadata": {}, 68 | "outputs": [], 69 | "source": [] 70 | } 71 | ], 72 | "metadata": { 73 | "kernelspec": { 74 | "display_name": "Python 3", 75 | "language": "python", 76 | "name": "python3" 77 | }, 78 | "language_info": { 79 | "codemirror_mode": { 80 | "name": "ipython", 81 | "version": 3 82 | }, 83 | "file_extension": ".py", 84 | "mimetype": "text/x-python", 85 | "name": "python", 86 | "nbconvert_exporter": "python", 87 | "pygments_lexer": "ipython3", 88 | "version": "3.7.3" 89 | } 90 | }, 91 | "nbformat": 4, 92 | "nbformat_minor": 2 93 | } 94 | -------------------------------------------------------------------------------- /homework-06/requirement.md: -------------------------------------------------------------------------------- 1 | ## 完成GitHub上100天钟第二天的代码练习 2 | # 上传代码文件,命名为姓名的拼音.ipynb 3 | --------------------------------------------------------------------------------