└── temp222.py


/temp222.py:
--------------------------------------------------------------------------------
   1 | # -*- coding: utf-8 -*-
   2 | """
   3 | Spyder Editor
   4 | 
   5 | This is a temporary script file.
   6 | """
   7 | import numpy as np
   8 | 
   9 | 
  10 | 
  11 | MyList = [1,2,3,4,5,6,7,8,9,10]
  12 | 
  13 | list(filter(lambda x: x % 3 == 0, MyList))
  14 | 
  15 | list(map(lambda x: x * 2, MyList))
  16 | 
  17 | 
  18 | from functools import reduce ##二元计算函数
  19 | reduce(lambda x, y: x + y, MyList) #求和计算
  20 | 
  21 | import os
  22 | print(os.getcwd())
  23 | 
  24 | os.chdir('E:\python documents')
  25 | print(os.getcwd())
  26 | 
  27 | MyArray1 = np.arange(1,20) 
  28 | MyArray1
  29 | 
  30 | range(1,10,2)
  31 | 
  32 | list(range(1,10,2))
  33 | 
  34 | MyArray2=np.array([1,2,3,4,3,5])
  35 | MyArray2
  36 | 
  37 | MyArray3=np.zeros((5,5)) 
  38 | MyArray3
  39 | 
  40 | MyArray4=np.ones((5,5))
  41 | MyArray4
  42 | 
  43 | np.full((3,5),2)
  44 | 
  45 | ##设置随机数种子，方便日后复原结果
  46 | rand=np.random.RandomState(1)
  47 | 
  48 | MyArray5=rand.randint(0,100,[3,5])   #随机取数的上限下限都是在哪里
  49 | MyArray5
  50 | 
  51 | MyArray6=np.zeros([4,5],dtype=np.int)
  52 | MyArray6
  53 | 
  54 | myArray=np.array(range(0,10))
  55 | 
  56 | print("myArray=",myArray)
  57 | print("myArray[1:9:2]=",myArray[1:9:2]) 
  58 | print("myArray[:9:2]=",myArray[:9:2]) 
  59 | print("myArray[::2]=",myArray[::2]) 
  60 | print("myArray[::]=",myArray[::])   
  61 | print("myArray[:8:]=",myArray[:8:])  
  62 | print("myArray[:8]=",myArray[0:8])  
  63 | print("myArray[4::]=",myArray[4::])  
  64 | print("myArray[9:1:-2]=",myArray[9:1:-2]) 
  65 | print("myArray[::-2]=",myArray[::-2])  
  66 | ##初学者容易犯的错误，导致数据维度过多的问题 
  67 | print("myArray[[2,5,6]]=",myArray[[2,5,6]]) 
  68 | print("myArray[myArray>5]=",myArray[myArray>5]) 
  69 | 
  70 | MyArray7=np.arange(1,21)  
  71 | MyArray7
  72 | 
  73 | MyArray7.shape
  74 | 
  75 | MyArray8=MyArray7.reshape(4,5)  
  76 | MyArray8
  77 | 
  78 | #排列顺序按照列进行排列
  79 | MyArray8=MyArray8.swapaxes(0,1)
  80 | MyArray8
  81 | #将多行数据转化成一行的数据
  82 | MyArray8.flatten()
  83 | 
  84 | MyArray8.tolist()
  85 | 
  86 | MyArray8.astype(np.float)
  87 | 
  88 | np.rank(MyArray5)  
  89 | 
  90 | np.ndim(MyArray5)
  91 | 
  92 | np.shape(MyArray5)
  93 | 
  94 | MyArray5.shape
  95 | 
  96 | MyArray5.size
  97 | 
  98 | type(MyArray5)  
  99 | 
 100 | MyArray5*10
 101 | 
 102 | x=np.array([11,12,13,14,15,16,17,18])
 103 | x1,x2,x3=np.split(x,[3,5]) 
 104 | print(x1,x2,x3)
 105 | 
 106 | upper,lower=np.vsplit(MyArray5.reshape(5,3),[1]) ##这个是指下限区间取不到
 107 | print("上半部分为\n",upper)
 108 | print("\n\n下半部分为\n",lower)
 109 | 
 110 | 
 111 | np.concatenate((lower,upper),axis=0)
 112 | 
 113 | np.vstack([upper,lower])  ##倒着堆叠
 114 | 
 115 | np.hstack([upper,lower])  ##水平堆叠
 116 | 
 117 | np.add(MyArray5,1)       ##做的是矩阵加法 每个元素都相加
 118 | 
 119 | np.zeros(10,dtype="int16")
 120 | 
 121 | np.zeros(10,dtype="float")
 122 | 
 123 | a1=np.array([1,2,3,None])
 124 | a1
 125 | 
 126 | a1=np.array([1,2,3,None,np.nan])
 127 | a1
 128 | 
 129 | myArray1=np.array([11,12,13,14,15,16,17,18])
 130 | np.delete(myArray1,2)
 131 | 
 132 | np.insert(myArray1,1,88) #数组、位置、值
 133 | 
 134 | ##缺失值处理
 135 | np.isnan(myArray1)
 136 | 
 137 | np.any(np.isnan(myArray1))
 138 | 
 139 | np.all(np.isnan(myArray1))
 140 | 
 141 | MyArray=np.array([1,2,3,np.nan])
 142 | np.nansum(MyArray)   #连着nan也放在一起进行相加
 143 | 
 144 | 
 145 | 
 146 | ##这个是表示广播原则
 147 | A1=np.array(range(1,10)).reshape([3,3])
 148 | A1
 149 | 
 150 | A2=np.array([10,10,10])
 151 | A2
 152 | 
 153 | A1+A2
 154 | ##列数相同才可以广播相加
 155 | A3=np.arange(10).reshape(2,5)    
 156 | A3
 157 | 
 158 | A4=np.arange(16).reshape(4,4)
 159 | A4
 160 | 
 161 | A3+A4
 162 | 
 163 | ##ndarray的排序原则
 164 | myArray=np.array([11,18,13,12,19,15,14,17,16])
 165 | myArray
 166 | 
 167 | np.sort(myArray)
 168 | 
 169 | np.argsort(myArray)
 170 | 
 171 | MyArray=np.array([[21, 22, 23, 24,25],
 172 |        [35,  34,33, 32, 31],
 173 |        [ 1, 2,  3, 100, 4]])
 174 | 
 175 | np.sort(MyArray,axis=1) 
 176 | 
 177 | np.sort(MyArray,axis=0)
 178 | 
 179 | import pandas as pd
 180 | mySeries1=pd.Series(data = [11,12,13,14,15,16,17],index=["a","b","c","d","e","f","g"]) 
 181 | mySeries1
 182 | 
 183 | mySeries2=pd.Series([10], index=["a","b","c","d","e","f","g"]) 
 184 | mySeries2
 185 | 
 186 | mySeries4=pd.Series([21,22,23,24,25,26,27], index=["a","b","c","d","e","f","g"]) 
 187 | mySeries4.index
 188 | 
 189 | mySeries4.values  
 190 | 
 191 | mySeries4['b']
 192 | 
 193 | mySeries4[["a","b","c"]] 
 194 | 
 195 | mySeries4["a":"d"] 
 196 | 
 197 | mySeries4[1:4:2]
 198 | 
 199 | mySeries4
 200 | 
 201 | "c" in mySeries4
 202 | 
 203 | mySeries4=pd.Series([21,22,23,24,25,26,27], index=["a","b","c","d","e","f","g"]) 
 204 | mySeries5=mySeries4.reindex(index=["b","c","a","d","e","g","f"])
 205 | mySeries5 
 206 | 
 207 | ##关于dataframe的相关操作
 208 | import numpy as np
 209 | 
 210 | df2=pd.DataFrame(np.arange(10).reshape(2,5))
 211 | df2
 212 | df2.index
 213 | 
 214 | df2.index.size
 215 | 
 216 | df2.columns
 217 | 
 218 | df2.columns.size
 219 | 
 220 | df2 = pd.read_csv('C:/Users/Administrator/Desktop/数据分析课件/PythonFromDAToDS-master/DataSets/bc_data.csv')
 221 | df2.shape
 222 | 
 223 | df2=df2[["id","diagnosis","area_mean"]]  ##取三列，然后查看这三列
 224 | 
 225 | df2.head()
 226 | 
 227 | df2.shape
 228 | df2.index.size
 229 | df2.columns
 230 | df2.columns.size
 231 | 
 232 | ##引用行或者列
 233 | df2['id'].head()
 234 | df2.id.head()
 235 | 
 236 | df2["id"][2]
 237 | 
 238 | df2.id[2]
 239 | 
 240 | df2["id"][[2,4]]
 241 | 
 242 | ##第二种方法我们可以称之为iloc方法
 243 | df2.loc[1,"id"] 
 244 | df2.iloc[1,0]
 245 | df2.ix[1,"id"]
 246 | df2.ix[[1,5],["id"]]
 247 | df2.ix[1:5,["id"]]
 248 | df2[["area_mean","id"]].head()  ##可以调整列输出新的数据框
 249 | 
 250 | ###index操作
 251 | df2.index
 252 | df2.columns
 253 | df2["id"].head()
 254 | df2.reindex(index=["1","2","3"],columns=["1","2","3"])
 255 | df2.head()
 256 | 
 257 | df2.reindex(index=[2,3,1], columns=["diagnosis","id","area_mean"])##调整列的位置
 258 | 
 259 | df3=df2.reindex(index=[2,3,1], columns=["diagnosis","id","area_mean","MyNewColumn"],fill_value=100)
 260 | df3
 261 | 
 262 | df2=df2[["id","diagnosis","area_mean"]]
 263 | df2.head()
 264 | df2.drop([2]).head()   ##删除行
 265 | df2.head()
 266 | 
 267 | 
 268 | df2.drop([3,4],axis=1, inplace=True) ##关于行的删除放入列表形式,关于列的删除放入列名称
 269 | df2.drop('id', axis=1, inplace=True)  ##是关于是否更新行索引的办法
 270 | 
 271 | del df2["area_mean"] 
 272 | df2.head()
 273 | 
 274 | df2 =pd.read_csv('C:/Users/Administrator/Desktop/数据分析课件/PythonFromDAToDS-master/DataSets/bc_data.csv')
 275 | df2=df2[["id","diagnosis","area_mean"]]
 276 | df2[df2.area_mean> 1000].head()
 277 | df2[df2.area_mean> 1000][["id","diagnosis"]].head()
 278 | 
 279 | df2.loc[df2.area_mean> 1000,["id","diagnosis"]].head()   #关于上述的表达式，该表达式也是可以的
 280 | 
 281 | df4=pd.DataFrame(np.arange(6).reshape(2,3))
 282 | df4
 283 | df5=pd.DataFrame(np.arange(10).reshape(2,5)) 
 284 | df5
 285 | 
 286 | df4+df5
 287 | df6=df4.add(df5,fill_value=10)  #这个fill_value表示如何将两个表格进行相加，同时将缺失值使用某一个具体值进行替代
 288 | df6
 289 | 
 290 | s1=pd.Series(np.arange(4))
 291 | s1
 292 | df6-s1          ##这个命令表示数据框取多少列的信息
 293 | 
 294 | df5=pd.DataFrame(np.arange(10).reshape(2,5))
 295 | s1=pd.Series(np.arange(3))
 296 | df5-s1
 297 | 
 298 | df7=pd.DataFrame(np.arange(20).reshape(4,5))
 299 | df7+2
 300 | 
 301 | df7.cumsum()
 302 | df7
 303 | 
 304 | df7.rolling(2).sum()   ##这个命令标识按照0轴滚动两两进行求和
 305 | df7.rolling(2,axis=1).sum() ##这个命令表示按照1轴两两进行求和
 306 | 
 307 | df7.cov()
 308 | df7.corr()    ##表示求皮尔逊相关系数  两个变量的变化趋势完全一样，相关系数肯定是等于1
 309 | 
 310 | ##如果单单只是用类似列表的方式来取值，那么就要使用下面的这种方法，如果说要是使用iloc的方法，那就可以使用简便方法
 311 | df2=df2[["id","diagnosis","area_mean"]][2:5]
 312 | df2.T
 313 | 
 314 | 
 315 | 
 316 | df6
 317 | df6>5
 318 | df6>s1
 319 | df6>(2,18)
 320 | 
 321 | 
 322 | df2.describe()  #统计信息
 323 | dt = df2[df2.diagnosis=='M']
 324 | 
 325 | dt.head()
 326 | dt.tail()
 327 | df2[df2.diagnosis=='M'].count()
 328 | df2[["area_mean","id"]].head()
 329 | 
 330 | df2.head(8)
 331 | df2.sort_values(by="area_mean",axis=0,ascending=True).head()
 332 | 
 333 | df2.sort_index(axis=1).head(3)  ##按照索引排序
 334 | 
 335 | df2.sort_index(axis=0,ascending=False).head(3)  ##按照字母顺序来
 336 | 
 337 | df2.head(3).to_excel("df3.xls")  ##数据框导出 to_csv
 338 | 
 339 | ##缺失值数据处理
 340 | df2.empty    ##判断数据框当中是否存在缺失值
 341 | 
 342 | A=pd.DataFrame(np.array([10,10,20,20]).reshape(2,2),columns=list("ab"),index=list("SW"))
 343 | A
 344 | list("ab")
 345 | B=pd.DataFrame(np.array([1,1,1,2,2,2,3,3,3]).reshape(3,3), columns=list("abc"),index=list("SWT"))
 346 | B
 347 | C=A+B 
 348 | C
 349 | A.add(B,fill_value=0) 
 350 | 
 351 | A.add(B,fill_value=A.stack().mean())
 352 | 
 353 | A.mean()
 354 | A.stack() 
 355 | A.stack().mean()
 356 | C
 357 | C.isnull()
 358 | C.dropna(axis='index')
 359 | C.fillna(0)
 360 | C.fillna(method="bfill")   ##这个表示向前传播或者是向后传播，就是拿前一个值插入到缺失值中
 361 | C.fillna(method="ffill",axis=1) ##这个表示按列进行前传和后传的操作
 362 | 
 363 | 
 364 | ##分组操作
 365 | df2
 366 | df2.groupby("diagnosis")["area_mean"].mean()
 367 | 
 368 | df2.groupby("diagnosis")["area_mean"].aggregate(["mean","sum","max","median"])
 369 | 
 370 | df2.groupby("diagnosis")["area_mean"].describe()
 371 | 
 372 | df2.groupby("diagnosis")["area_mean"].aggregate(["mean","sum"])  ##一个是横向排列，下面的是纵向排列
 373 | 
 374 | df2.groupby("diagnosis")["area_mean"].aggregate(["mean","sum"]).unstack()
 375 | 
 376 | def myfunc(x):
 377 |    x["area_mean"]=x["area_mean"].sum()
 378 |    return x
 379 | 
 380 | a=df2.groupby("diagnosis").apply(myfunc).head()
 381 | 
 382 | 
 383 | import pandas as pd
 384 | df = pd.read_excel(r"..\Data\Chapter05.xlsx",sheet_name=2)
 385 | df.drop_duplicates() #删除重复的列
 386 | df.drop_duplicates(subset = "唯一识别码") #指定判断的列
 387 | df.drop_duplicates(subset = ["客户姓名","唯一识别码"])
 388 | df.drop_duplicates(subset = ["客户姓名","唯一识别码"],keep = "last") #keep参数（first,last）first表示保留第一个出现的行，last表示保留最后一个出现的行，false表示全部删除
 389 | 
 390 | df["唯一识别码"].astype("float64")#将唯一识别码冲int类型转为float类型
 391 | 
 392 | ##为数据框设置索引
 393 | df.columns = ["订单编号","客户姓名","唯一识别码","成交时间"]#header需要设置为None，否则会覆盖第一行数据
 394 | df.index = [1,2,3,4,5]
 395 | 
 396 | ##数据框重新设置索引和列名
 397 | df.rename(columns={"订单编号":"新订单编号","客户姓名":"新客户姓名"}) #重命名列索引
 398 | df.rename(index = {1:"一",2:"二",3:"三"}) #重命名行索引
 399 | df.rename(columns={"订单编号":"新订单编号","客户姓名":"新客户姓名"},index = {1:"一",2:"二",3:"三",4:'四'})#同时重命名列和行索引
 400 | 
 401 | ##选择某几列
 402 | df['客户姓名']
 403 | df[['订单编号','客户姓名']]
 404 | df.iloc[:,[0,2]]
 405 | 
 406 | #请切记，列表中关于取值的操作和pandas当中的iloc列表取值完全是两回事情
 407 | #列表中的第一个框其实是规定了行的数量，而第二个框事实上是规定了列的数量
 408 | 
 409 | 
 410 | #选择年龄小于200并且唯一识别码小于200，条件用括号括起来
 411 | df[(df['年龄']<200) & (df['唯一识别码']<102)]
 412 | 
 413 | 
 414 | df = pd.read_excel(r"C:\Users\Administrator\Desktop\Excel-Python-master\Data\Chapter07.xlsx",sheet_name =0)
 415 | #对某一列进行数值替换
 416 | df["年龄"].replace(240,33,inplace = True) #第一个值表示表中的值，第二个数字表示要替换的值
 417 | df
 418 | #对全表中的缺失值进行替换
 419 | df.replace(np.NaN,0)
 420 | 
 421 | df.replace([240,260,280],35)
 422 | 
 423 | df.replace({240:32,260:33,280:34})
 424 | 
 425 | df.sort_values(by=["销售ID"])
 426 | #按照销售ID进行降序排序
 427 | df.sort_values(by=["销售ID"],ascending= False)
 428 | 
 429 | 
 430 | df1 = pd.read_excel(r"C:\Users\Administrator\Desktop\Excel-Python-master\Data\Chapter07.xlsx",sheet_name =2)
 431 | df1
 432 | #默认空值是排在最后面
 433 | df1.sort_values( by = ["销售ID"])
 434 | #通过设置na_position参数将缺失的值显示在前面，默认参数值是last
 435 | df1.sort_values(by = ["销售ID"],na_position = "first")
 436 | 
 437 | ##按照多列数字进行排序
 438 | df3 = pd.read_excel(r"C:\Users\Administrator\Desktop\Excel-Python-master\Data\Chapter07.xlsx",sheet_name =3)
 439 | df3
 440 | #将需要排序的by里面，然后在设置升降序
 441 | df3.sort_values(by=["销售ID","成交时间"],ascending = [True,False])
 442 | 
 443 | df4 = pd.read_excel(r"C:\Users\Administrator\Desktop\Excel-Python-master\Data\Chapter07.xlsx",sheet_name =4)
 444 | df4
 445 | 
 446 | df5 = pd.read_excel(r"C:\Users\Administrator\Desktop\Excel-Python-master\Data\Chapter07.xlsx",sheet_name =1)
 447 | df5["销售ID"]
 448 | #method取average时与Excel中的RANK.AVG函数一样
 449 | df5["销售ID"].rank(method ="average")
 450 | 
 451 | df5["销售ID"].rank(method ="first")  ##排列的是index如果两个相同的话取第一个
 452 | df5["销售ID"].rank(method ="min")    ##如果两个相同取最小的排名
 453 | df5["销售ID"].rank(method ="max")    ##如果两个相同取最大的排名
 454 | 
 455 | ##删除列方法
 456 | df5.drop(["销售ID","成交时间"],axis =1)
 457 | 
 458 | df5.drop(df5.columns[[4,5]],axis=1)     ##这两种方法要注意
 459 | 
 460 | df5.drop(columns = ["销售ID","成交时间"])
 461 | ##删除行的方法
 462 | df5.drop(["0a","1b"],axis = 0)
 463 | df5.drop(df.index[[0,1]])
 464 | df5.drop(index = ["0a","1b"])
 465 | 
 466 | ##删除特定的行
 467 | df5[df5["年龄"]<40]
 468 | 
 469 | ##数值计算
 470 | df5["销售ID"].value_counts()
 471 | #计算销售ID的值占比
 472 | df5["销售ID"].value_counts(normalize = True)
 473 | 
 474 | df5["销售ID"].unique()
 475 | 
 476 | ##采用的是数值查找的形式
 477 | df5['年龄'].isin([31,21])
 478 | 
 479 | df5.isin(["A2",31])
 480 | 
 481 | df6 = pd.read_excel(r"C:\Users\Administrator\Desktop\Excel-Python-master\Data\Chapter07.xlsx",sheet_name =5)
 482 | df6
 483 | pd.cut(df6["年龄"],bins = [0,3,6,9,10])  ##这个函数是用来进行分组的,十分有用
 484 | 
 485 | pd.qcut(df6["年龄"],3)        ##这个函数表示均匀地切分
 486 | 
 487 | ##插入列数据
 488 | df7 = pd.read_excel(r"C:\Users\Administrator\Desktop\Excel-Python-master\Data\Chapter07.xlsx",sheet_name =3)
 489 | df7
 490 | df7.insert(2,"商品类别",["cat01","cat02","cat03","cat04","cat05"])
 491 | 
 492 | df7["商品类别"]= ["cat01","cat02","cat03","cat04","cat05"]
 493 | df7
 494 | 
 495 | ##关于行的插入,没有好的办法，智能将矩阵转秩，然后进行插入
 496 | a=df7.T
 497 | #再转置则回到原来的结果
 498 | df7.T.T
 499 | m=df["成交时间"]
 500 | 
 501 | df8 = pd.read_excel(r"C:\Users\Administrator\Desktop\Excel-Python-master\Data\Chapter07.xlsx",sheet_name =6)
 502 | df8
 503 | df8.stack()
 504 | 
 505 | 
 506 | ###宽表转为长表
 507 | import pandas as pd
 508 | df = pd.read_excel(r"C:\Users\Administrator\Desktop\Excel-Python-master\Data\Chapter07.xlsx",sheet_name =7)
 509 | df
 510 | #设置索引
 511 | df.set_index(["Company","Name"])
 512 | #将列索引转为行索引
 513 | df.set_index(["Company","Name"]).stack()
 514 | #重置索引
 515 | df.set_index(["Company","Name"]).stack().reset_index()
 516 | #重命名索引
 517 | df.set_index(["Company","Name"]).stack().reset_index().rename(columns={"level_2":"Year",0:"sale"})
 518 | 
 519 | df.melt(id_vars=["Company","Name"],var_name="Year",value_name = "Sale")
 520 | 
 521 | ##长表转化为宽表
 522 | df2 = pd.read_excel(r"C:\Users\Administrator\Desktop\Excel-Python-master\Data\Chapter07.xlsx",sheet_name =8)
 523 | df2
 524 | 
 525 | df2.pivot_table(index=["Company","Name"],columns="Year",values="Sale")
 526 | 
 527 | ###apply函数和applymap函数
 528 | df = pd.read_excel(r"C:\Users\Administrator\Desktop\Excel-Python-master\Data\Chapter07.xlsx",sheet_name =9)
 529 | df
 530 | df["C1"].apply(lambda x:x+1)  ##这个表示局部函数，下面那个表示全局函数
 531 | 
 532 | df.applymap(lambda x:x+1)  ##对表内每一个元素都加1
 533 | 
 534 | df.apply(lambda x:x+1,axis=0)
 535 | 
 536 | df.iloc[0,:]=df.iloc[0,:].apply(lambda x:x+1)  ##apply的按行操作
 537 | 
 538 | ##算数相加
 539 | df = pd.read_excel(r"C:/Users/Administrator/Desktop/Excel-Python-master/Data/Chapter08.xlsx",sheet_name = 0)
 540 | df
 541 | df.index=["S1","S2","S3"]
 542 | 
 543 | df["C1"]+df["C2"]
 544 | 
 545 | df["C1"]-df["C2"]
 546 | 
 547 | df["C1"]*df["C2"]
 548 | 
 549 | df["C1"]/df["C2"]
 550 | 
 551 | df["C1"]+1
 552 | df["C1"]-1
 553 | 
 554 | df1 = pd.read_excel(r"C:/Users/Administrator/Desktop/Excel-Python-master/Data/Chapter08.xlsx",sheet_name = 0)
 555 | #添加行索引
 556 | df1.index=["S1","S2","S3"]
 557 | df1
 558 | df1["C1"] > df1["C2"]
 559 | df1["C1"] < df1["C2"]
 560 | df1["C1"] != df1["C2"]
 561 | 
 562 | df1.count()
 563 | 
 564 | df1.count(axis =0)
 565 | 
 566 | df.sum(axis=1)
 567 | 
 568 | df["C1"].sum()
 569 | 
 570 | df.mean()
 571 | 
 572 | df.mean( axis =1)
 573 | 
 574 | df["C1"].mean()
 575 | 
 576 | df.max()
 577 | 
 578 | df.max( axis =1)
 579 | 
 580 | df["C1"].max()
 581 | 
 582 | ##求众数
 583 | df3 = pd.read_excel(r"C:/Users/Administrator/Desktop/Excel-Python-master/Data/Chapter08.xlsx",sheet_name=1)
 584 | df3.index=["S1","S2","S3"]
 585 | df3.mode()
 586 | 
 587 | ##四分位数
 588 | df5 = pd.read_excel(r"C:/Users/Administrator/Desktop/Excel-Python-master/Data/Chapter08.xlsx",sheet_name=1)
 589 | df5.index=["S1","S2","S3","S4","S5"]
 590 | df5
 591 | df5.quantile(0.25)#求四分之一分数位
 592 | df5.quantile(0.75)#求四分之三分数位
 593 | df5.quantile(0.25,axis = 1)#求每一行的四分之一分数位
 594 | 
 595 | df5.corr()
 596 | 
 597 | ##关于时间的计算
 598 | from datetime import datetime
 599 | datetime.now()
 600 | 
 601 | datetime.now().year 
 602 | datetime.now().month 
 603 | datetime.now().day 
 604 | 
 605 | 
 606 | datetime.now().weekday()+1      ##周几
 607 | 
 608 | datetime.now().isocalendar()    ##年月日的返回值
 609 | 
 610 | datetime.now().isocalendar()[1]  ##第几周
 611 | 
 612 | ##显示日期
 613 | datetime.now().date()
 614 | 
 615 | datetime.now().time()
 616 | 
 617 | datetime.now().strftime("%Y-%m-%d")
 618 | 
 619 | index = pd.DatetimeIndex(['2018-01-01','2018-01-02','2018-01-03','2018-01-04','2018-01-05',
 620 |                           '2018-01-06','2018-01-07','2018-01-08','2018-01-09','2018-01-10'])
 621 | data = pd.DataFrame(np.arange(1,11),columns =["num"],index = index)
 622 | data
 623 | 
 624 | data["2018"]
 625 | 
 626 | data["2018-01-01":"2018-01-05"]
 627 | 
 628 | ##时间的比较与计算
 629 | import pandas as pd
 630 | from datetime import datetime
 631 | df9 = pd.read_excel(r"C:/Users/Administrator/Desktop/Excel-Python-master/Data/Chapter06.xlsx",sheet_name = 4)
 632 | df9[df9["成交时间"]>datetime(2018,8,8)]
 633 | 
 634 | df9[df9["成交时间"] == datetime(2018,8,8)]
 635 | 
 636 | df9[(df9["成交时间"]>datetime(2018,8,8))&(df9["成交时间"]< datetime(2018,8,11))]
 637 | 
 638 | cha = datetime(2018,5,21,19,50)-datetime(2018,5,18,20,32)
 639 | cha
 640 | 
 641 | cha.days
 642 | 
 643 | cha.seconds
 644 | 
 645 | from datetime import timedelta,datetime
 646 | date = datetime.now()
 647 | date
 648 | 
 649 | date+timedelta(days =1)
 650 | 
 651 | date+timedelta(seconds = 60)
 652 | 
 653 | date - timedelta(days =1)
 654 | 
 655 | from pandas.tseries.offsets import Hour,Minute,Day,MonthEnd
 656 | date = datetime.now()
 657 | date
 658 | date+Day(1)
 659 | date+Hour(1)
 660 | date+Minute(10)
 661 | date+MonthEnd(1)
 662 | 
 663 | ###数据分组***
 664 | import pandas as pd
 665 | df3 = pd.read_excel(r"C:/Users/Administrator/Desktop/Excel-Python-master/Data/Chapter10.xlsx",sheet_name =0)
 666 | df3
 667 | 
 668 | df3.groupby("客户分类").count()
 669 | g=df3.groupby(df3["客户分类"])
 670 | g.mean()
 671 | g.describe()
 672 | 
 673 | df3.groupby("客户分类").sum()
 674 | 
 675 | df3.groupby(["客户分类","区域"]).count()
 676 | 
 677 | df3.groupby(["客户分类","区域"]).count().reset_index()
 678 | 
 679 | df3.groupby(["客户分类","区域"]).describe()
 680 | 
 681 | df3["客户分类"]
 682 | df3.groupby(df3["客户分类"]).count()
 683 | df3.groupby([df3["客户分类"],df3["用户ID"]]).sum()
 684 | 
 685 | df3.groupby(df3["客户分类"])["用户ID"].count()
 686 | 
 687 | df3.groupby("客户分类").aggregate(["count","sum"])
 688 | df3.groupby("客户分类").aggregate({"用户ID":"count","7月销量":"sum","8月销量":"sum"})
 689 | 
 690 | df3.groupby("客户分类").sum()
 691 | 
 692 | df3.groupby("客户分类").sum().reset_index()
 693 | 
 694 | ##数据透析****
 695 | df7 = pd.read_excel(r"C:/Users/Administrator/Desktop/Excel-Python-master/Data/Chapter10.xlsx",sheet_name =0)
 696 | pd.pivot_table(df7,values = "用户ID",columns ="区域",index="客户分类",aggfunc="count")
 697 | 
 698 | pd.pivot_table(df7,values = "用户ID",columns ="区域",index="客户分类",aggfunc="count",margins = True)
 699 | 
 700 | pd.pivot_table(df7,values = "用户ID",columns ="区域",index="客户分类",aggfunc="count",margins = True,fill_value =0)
 701 | 
 702 | ###这里面主要是有三个参数第一个是values、第二个是columns、第三个是index，主要是这三个参数搞定了的话数据透析也就搞定了
 703 | pd.pivot_table(df7,values = ["用户ID","7月销量"],columns="区域",index="客户分类",aggfunc={"用户ID":"count","7月销量":"sum"})
 704 | 
 705 | pd.pivot_table(df7,values="用户ID",columns="区域",index="客户分类",aggfunc="count")
 706 | 
 707 | pd.pivot_table(df7,values="用户ID",columns="区域",index="客户分类",aggfunc="count").reset_index()
 708 | 
 709 | 
 710 | #关于多表拼接
 711 | df1 = pd.read_excel(r"C:/Users/Administrator/Desktop/Excel-Python-master/Data/Chapter11.xlsx",sheet_name =0)
 712 | df1
 713 | 
 714 | df2 = pd.read_excel(r"C:/Users/Administrator/Desktop/Excel-Python-master/Data/Chapter11.xlsx",sheet_name =1)
 715 | df2
 716 | ###一对一
 717 | pd.merge(df1,df2)  ##如果id有两个相似值的话会出现排列组合的情况
 718 | 
 719 | df3 = pd.read_excel(r"C:/Users/Administrator/Desktop/Excel-Python-master/Data/Chapter11.xlsx",sheet_name =2)
 720 | df3
 721 | 
 722 | ##多对一
 723 | df1 = pd.read_excel(r"C:/Users/Administrator/Desktop/Excel-Python-master/Data/Chapter11.xlsx",sheet_name =2)
 724 | df2 = pd.read_excel(r"C:/Users/Administrator/Desktop/Excel-Python-master/Data/Chapter11.xlsx",sheet_name =3)
 725 | pd.merge(df1,df2,on = "学号")
 726 | 
 727 | ##多对多
 728 | df1 = pd.read_excel(r"C:/Users/Administrator/Desktop/Excel-Python-master/Data/Chapter11.xlsx",sheet_name =4)
 729 | df2 = pd.read_excel(r"C:/Users/Administrator/Desktop/Excel-Python-master/Data/Chapter11.xlsx",sheet_name =3)
 730 | pd.merge(df1,df2)
 731 | 
 732 | ##指定连接键 on
 733 | df1 = pd.read_excel(r"C:/Users/Administrator/Desktop/Excel-Python-master/Data/Chapter11.xlsx",sheet_name =0)
 734 | df2 = pd.read_excel(r"C:/Users/Administrator/Desktop/Excel-Python-master/Data/Chapter11.xlsx",sheet_name =1)
 735 | df1
 736 | df2
 737 | pd.merge(df1,df2)
 738 | ##使用on作为键名连接的方式
 739 | pd.merge(df1,df2,on="学号")
 740 | 
 741 | ###指定多个键名
 742 | df1 = pd.read_excel(r"C:/Users/Administrator/Desktop/Excel-Python-master/Data/Chapter11.xlsx",sheet_name =0)
 743 | df2 = pd.read_excel(r"C:/Users/Administrator/Desktop/Excel-Python-master/Data/Chapter11.xlsx",sheet_name =5)
 744 | pd.merge(df1,df2,on=["姓名","学号"])    ###两个变量叠加形成一个新的独一无二的索引
 745 | 
 746 | df1 = pd.read_excel(r"C:/Users/Administrator/Desktop/Excel-Python-master/Data/Chapter11.xlsx",sheet_name =6)
 747 | df2 = pd.read_excel(r"C:/Users/Administrator/Desktop/Excel-Python-master/Data/Chapter11.xlsx",sheet_name =1)
 748 | pd.merge(df1,df2,left_on = "编号",right_on = "学号")
 749 | 
 750 | df1 = pd.read_excel(r"C:/Users/Administrator/Desktop/Excel-Python-master/Data/Chapter11.xlsx",sheet_name =7)
 751 | df1.set_index("编号")
 752 | 
 753 | df2 = pd.read_excel(r"C:/Users/Administrator/Desktop/Excel-Python-master/Data/Chapter11.xlsx",sheet_name =1)
 754 | df2.set_index("学号")
 755 | 
 756 | ##左右表的连接键均为索引
 757 | pd.merge(df1.set_index("编号"),df2.set_index("学号"),left_index = True,right_index = True)
 758 | 
 759 | df2 = pd.read_excel(r"C:/Users/Administrator/Desktop/Excel-Python-master/Data/chapter11.xlsx",sheet_name =1)
 760 | df2
 761 | 
 762 | pd.merge(df1.set_index("编号"),df2,left_index = True,right_on = "学号")
 763 | 
 764 | ##连接方式，使用how来指定具体的连接方式
 765 | df1 = pd.read_excel(r"C:/Users/Administrator/Desktop/Excel-Python-master/Data/Chapter11.xlsx",sheet_name =0)
 766 | df1
 767 | df2 = pd.read_excel(r"C:/Users/Administrator/Desktop/Excel-Python-master/Data/Chapter11.xlsx",sheet_name =8)
 768 | df2
 769 | pd.merge(df1,df2,on="学号",how="inner")  ##这个连接方式表示取交集
 770 | 
 771 | pd.merge(df1,df2,on="学号",how="left") ##这个连接方式表示取左集合全部
 772 | 
 773 | pd.merge(df1,df2,on="学号",how="right")##这个连接方式表示取右集合的全部
 774 | 
 775 | pd.merge(df1,df2,on="学号",how="outer") ##外连接，两个表的并集
 776 | 
 777 | ###表的纵向合并
 778 | df1 = pd.read_excel(r"C:/Users/Administrator/Desktop/Excel-Python-master/Data/Chapter11.xlsx",sheet_name =9)
 779 | df1
 780 | df2 = pd.read_excel(r"C:/Users/Administrator/Desktop/Excel-Python-master/Data/Chapter11.xlsx",sheet_name =10)
 781 | df2
 782 | pd.concat([df1,df2])
 783 | 
 784 | ##索引设置
 785 | pd.concat([df1.set_index("编号"),df2.set_index("编号")],ignore_index = True)
 786 | 
 787 | ##重叠数据合并
 788 | df1 = pd.read_excel(r"C:/Users/Administrator/Desktop/Excel-Python-master/Data/Chapter11.xlsx",sheet_name =11)
 789 | df1
 790 | df2 = pd.read_excel(r"C:/Users/Administrator/Desktop/Excel-Python-master/Data/Chapter11.xlsx",sheet_name =10)
 791 | df2
 792 | pd.concat([df1.set_index("编号"),df2.set_index("编号")],ignore_index = True)
 793 | 
 794 | ##删除重复值
 795 | pd.concat([df1.set_index("编号"),df2.set_index("编号")],ignore_index = True).drop_duplicates()
 796 | 
 797 | ##导出为xlsx文件
 798 | import pandas as pd
 799 | df = pd.read_excel(r"C:/Users/Administrator/Desktop/Excel-Python-master/Data/Chapter12.xlsx",sheet_name =0 )
 800 | df.to_excel(excel_writer = r"C:\Users\Administrator\Desktop\Excel-Python-master\Note\测试文档01.xlsx")
 801 | 
 802 | df.to_excel(excel_writer = r"C:\Users\Administrator\Desktop\Excel-Python-master\Note\测试文档02.xlsx",
 803 |             sheet_name ="测试")
 804 | df.to_excel(excel_writer = r"C:\Users\Administrator\Desktop\Excel-Python-master\Note\测试文档03.xlsx",
 805 |             index = False)
 806 | 
 807 | df = pd.read_excel(r"C:/Users/Administrator/Desktop/Excel-Python-master/Data/Chapter12.xlsx",sheet_name =0 )
 808 | df.to_excel(excel_writer = r"C:\Users\Administrator\Desktop\Excel-Python-master\Note\测试文档04.xlsx",
 809 |             sheet_name = "测试文档",
 810 |             index=False,columns = ["用户ID","7月销量","8月销量","9月销量"])
 811 | 
 812 | ##关于Excel的输出，需要具备以下几个参数：excel_writer、sheet_name、index、columns
 813 | ##encoding、na_rep=0、
 814 | 
 815 | ##导出为csv格式文件
 816 | df = pd.read_excel(r"C:/Users/Administrator/Desktop/Excel-Python-master/Data/Chapter12.xlsx",sheet_name =2)
 817 | df.to_csv(path_or_buf = r"C:/Users/Administrator/Desktop/Excel-Python-master/Note/测试文档06.csv" ,
 818 |           index= False,
 819 |           columns = ["用户ID","7月销量","8月销量","9月销量"],
 820 |           sep=",",
 821 |           na_rep = 0,
 822 |           encoding = "gbk" #设置为gbk或者utf-8-sig
 823 |          )
 824 | 
 825 | ###关于多表的输出的问题
 826 | df1 = pd.read_excel(r"C:/Users/Administrator/Desktop/Excel-Python-master/Data/Chapter12.xlsx",sheet_name =0)
 827 | df2 = pd.read_excel(r"C:/Users/Administrator/Desktop/Excel-Python-master/Data/Chapter12.xlsx",sheet_name =1)
 828 | df3 = pd.read_excel(r"C:/Users/Administrator/Desktop/Excel-Python-master/Data/Chapter12.xlsx",sheet_name =2)
 829 | #声明一个对象
 830 | writer = pd.ExcelWriter(r"C:/Users/Administrator/Desktop/Excel-Python-master/Data/test03.xlsx",
 831 |                         engine = "xlsxwriter")
 832 | #将df1、df2、df3写入Excel中的sheet1、sheet2、sheet3
 833 | #重命名表1、表2、表3
 834 | df1.to_excel(writer,sheet_name ="表1",index=False)
 835 | df2.to_excel(writer,sheet_name ="表2",index=False)
 836 | df3.to_excel(writer,sheet_name ="表3",index=False)
 837 | #保存读写的内容
 838 | writer.save()
 839 | 
 840 | 
 841 | 
 842 | 
 843 | 
 844 | ###关于Python画图的问题
 845 | ##建立画布
 846 | #导入matplotlib库中的pyplot并起名为plt
 847 | import matplotlib.pyplot as plt
 848 | #让画布直接在jupyter Notebook中展示出来
 849 | %matplotlib inline
 850 | #解决中文乱码问题
 851 | plt.rcParams["font.sans-serif"]='SimHei'  ##字体simhei表示中文中的黑体
 852 | #解决负号无法正常显示问题
 853 | plt.rcParams["axes.unicode_minus"]= False
 854 | #设置为矢量图
 855 | %config InlineBackend.figure_format = 'svg'
 856 | #建立画布
 857 | fig = plt.figure()
 858 | #设置画布的高与长
 859 | plt.figure(figsize = (8,6))
 860 | 
 861 | ##用add_subplot函数建立坐标系
 862 | fig = plt.figure()
 863 | ax1 = fig.add_subplot(1,1,1)
 864 | #建立4个坐标系
 865 | fig = plt.figure()
 866 | ax1 = fig.add_subplot(2,2,1)
 867 | ax2 = fig.add_subplot(2,2,2)
 868 | ax3 = fig.add_subplot(2,2,3)
 869 | ax4 = fig.add_subplot(2,2,4)
 870 | 
 871 | ##用plt.subplot2grid函数建立坐标系
 872 | plt.subplot2grid((2,2),(0,0))
 873 | 
 874 | import numpy as np
 875 | x = np.arange(6)
 876 | y = np.arange(6)
 877 | 
 878 | plt.subplot2grid((2,2),(0,0))
 879 | plt.plot(x,y)
 880 | plt.subplot2grid((2,2),(0,1))
 881 | plt.bar(x,y)
 882 | 
 883 | ##用plt.subplot函数建立坐标系
 884 | #将图表分成2行2列，并在第1个坐标系里面绘图
 885 | plt.subplot(2,2,1)
 886 | import numpy as np
 887 | x = np.arange(6)
 888 | y = np.arange(6)
 889 | #在第1个坐标系上做折线图
 890 | plt.subplot(2,2,1)
 891 | plt.plot(x,y)
 892 | #在第4个坐标系上做柱状图
 893 | plt.subplot(2,2,4)
 894 | plt.bar(x,y)
 895 | 
 896 | ##用plt.subpllots函数建立坐标系
 897 | #将图表整个区域分成2行2列，并将4个坐标系全部返回
 898 | fig,axes = plt.subplots(2,2)
 899 | import numpy as np
 900 | x = np.arange(6)
 901 | y = np.arange(6)
 902 | #在[0,0]坐标系中绘制折线图
 903 | axes[0,0].plot(x,y)
 904 | #在[1,1]坐标系中绘制柱状图
 905 | axes[1,1].bar(x,y)
 906 | 
 907 | ###设置坐标轴
 908 | plt.subplot(1,1,1)
 909 | x = np.array([1,2,3,4,5,6,7,8,9])
 910 | y = np.array([886,2335,5710,6482,6120,1605,3813,4428,4631])
 911 | plt.plot(x,y)
 912 | plt.xlabel("月份")
 913 | plt.ylabel("注册量")
 914 | 
 915 | ##通过设置label参数设置坐标到x和y轴的距离
 916 | plt.subplot(1,1,1)
 917 | x = np.array([1,2,3,4,5,6,7,8,9])
 918 | y = np.array([886,2335,5710,6482,6120,1605,3813,4428,4631])
 919 | plt.plot(x,y)
 920 | plt.xlabel("月份",labelpad = 10)
 921 | plt.ylabel("注册量",labelpad = 10)
 922 | 
 923 | #设置坐标轴的样式,坐标轴的样式可以进行调整的
 924 | plt.subplot(1,1,1)
 925 | x = np.array([1,2,3,4,5,6,7,8,9])
 926 | y = np.array([886,2335,5710,6482,6120,1605,3813,4428,4631])
 927 | plt.plot(x,y)
 928 | plt.xlabel("月份",fontsize="xx-large",color="#70AD47",fontweight="bold")
 929 | plt.ylabel("注册量",labelpad = 10)
 930 | 
 931 | ##设置坐标轴的刻度，这个是设置坐标轴刻度的设置
 932 | ##plt库中使用xticks、yticks，支持文本相关性质设置，使用方法与xlabel、ylabel的文本相关性质设置方法一致
 933 | plt.subplot(1,1,1)
 934 | x = np.array([1,2,3,4,5,6,7,8,9])
 935 | y = np.array([886,2335,5710,6482,6120,1605,3813,4428,4631])
 936 | plt.plot(x,y)
 937 | plt.xlabel("月份")
 938 | plt.ylabel("注册量")
 939 | plt.xticks(np.arange(9),["1月份","2月份","3月份","4月份","5月份","6月份","7月份","8月份","9月份"])
 940 | plt.yticks(np.arange(1000,7000,1000),["1000人","2000人","3000人","4000人","5000人","6000人"])
 941 | 
 942 | ##隐藏坐标轴的刻度
 943 | plt.subplot(1,1,1)
 944 | x = np.array([1,2,3,4,5,6,7,8,9])
 945 | y = np.array([886,2335,5710,6482,6120,1605,3813,4428,4631])
 946 | plt.plot(x,y)
 947 | plt.xlabel("月份")
 948 | plt.ylabel("注册量")
 949 | plt.xticks([])
 950 | plt.yticks([])
 951 | 
 952 | '''
 953 | tick_params函数可以对刻度进行设置
 954 | axis：对那个轴的刻度线进行设置,x、y、both三个可选
 955 | reset：是否重置所有设置，True/False
 956 | which：对那种刻度进行设置,major(主刻度线)、minior(次刻度线)、both三个可选
 957 | direction：刻度的朝向，in(朝里)、out(朝外)、inout(里外均有)三个可选
 958 | length：刻度线长度
 959 | width：刻度线的宽度
 960 | color：刻度线的颜色
 961 | pad：刻度线与刻度标签之间的距离
 962 | labelsize：刻度标签大小 labelcolor：刻度标签的颜色
 963 | top、bottom、left、right：True/False可选，控制上、下、左、右刻度线是否显示
 964 | labeltop、labelbottom、labelleft、labelright：True/False可选，控制上、下、左、右刻度标签是否显示
 965 | '''
 966 | 
 967 | plt.figure(figsize = (6,8))
 968 | x = np.array([1,2,3,4,5,6,7,8,9])
 969 | y = np.array([886,2335,5710,6482,6120,1605,3813,4428,4631])
 970 | #在2X1坐标系上的第一个坐标系中绘图
 971 | plt.subplot(2,1,1)
 972 | plt.plot(x,y)
 973 | plt.xlabel("月份")
 974 | plt.ylabel("注册人数")
 975 | plt.yticks(np.arange(1000,7000,1000),["1000","2000","3000","4000","5000","6000"])
 976 | #轴刻度线设置双向且下刻度线不显示
 977 | plt.tick_params(axis= "both",which = "both", direction = "in" ,bottom=True)
 978 | 
 979 | #在2X1坐标系上的第二个坐标系中绘图
 980 | plt.subplot(2,1,2)
 981 | plt.plot(x,y)
 982 | plt.xlabel("月份")
 983 | plt.ylabel("注册人数")
 984 | plt.yticks(np.arange(1000,7000,1000),["1000","2000","3000","4000","5000","6000"])
 985 | #轴刻度线设置双向且下刻度标签不显示
 986 | plt.tick_params(axis= "both",which = "both", direction = "out" ,labelbottom=False)
 987 | 
 988 | ##设置坐标轴的范围
 989 | x = np.array([1,2,3,4,5,6,7,8,9])
 990 | y = np.array([886,2335,5710,6482,6120,1605,3813,4428,4631])
 991 | plt.plot(x,y)
 992 | plt.xlim(0,10)
 993 | plt.ylim(0,8000)
 994 | 
 995 | ##坐标轴的轴显示设置
 996 | x = np.array([1,2,3,4,5,6,7,8,9])
 997 | y = np.array([886,2335,5710,6482,6120,1605,3813,4428,4631])
 998 | plt.plot(x,y)
 999 | plt.axis("off")
1000 | 
1001 | '''
1002 | 网络线设置
1003 | 通过设置b的值，True来启用网格线
1004 | 通过axis的值(x、y)控制打开那个轴的网格线
1005 | linestyle设置网格线样式
1006 | linewidth设置网格线宽度
1007 | '''
1008 | 
1009 | #设置网格线
1010 | x = np.array([1,2,3,4,5,6,7,8,9])
1011 | y = np.array([886,2335,5710,6482,6120,1605,3813,4428,4631])
1012 | plt.plot(x,y)
1013 | plt.grid(b= True)
1014 | 
1015 | #只启用x轴
1016 | x = np.array([1,2,3,4,5,6,7,8,9])
1017 | y = np.array([886,2335,5710,6482,6120,1605,3813,4428,4631])
1018 | plt.plot(x,y)
1019 | plt.grid(b= True,axis ="x")
1020 | 
1021 | #只启用y轴
1022 | x = np.array([1,2,3,4,5,6,7,8,9])
1023 | y = np.array([886,2335,5710,6482,6120,1605,3813,4428,4631])
1024 | plt.plot(x,y)
1025 | plt.grid(b= True,axis ="y")
1026 | 
1027 | #启用网格线，虚线样式，线宽为1
1028 | x = np.array([1,2,3,4,5,6,7,8,9])
1029 | y = np.array([886,2335,5710,6482,6120,1605,3813,4428,4631])
1030 | plt.plot(x,y)
1031 | plt.grid(b= True,linestyle="dashed",linewidth =1)
1032 | 
1033 | ##设置图例
1034 | x = np.array([1,2,3,4,5,6,7,8,9])
1035 | y = np.array([886,2335,5710,6482,6120,1605,3813,4428,4631])
1036 | plt.plot(x,y,label ="折线图")
1037 | plt.bar(x,y,label="柱形图")
1038 | plt.legend()
1039 | 
1040 | x = np.array([1,2,3,4,5,6,7,8,9])
1041 | y = np.array([886,2335,5710,6482,6120,1605,3813,4428,4631])
1042 | plt.plot(x,y,label ="折线图")
1043 | plt.bar(x,y,label="柱形图")
1044 | plt.legend(loc ="upper left")
1045 | plt.legend(loc=2)
1046 | 
1047 | x = np.array([1,2,3,4,5,6,7,8,9])
1048 | y = np.array([886,2335,5710,6482,6120,1605,3813,4428,4631])
1049 | plt.plot(x,y,label ="折线图")
1050 | plt.bar(x,y,label="柱形图")
1051 | plt.legend(loc ="upper right",fontsize=9,title="测试")
1052 | 
1053 | ###图标标题设置
1054 | x = np.array([1,2,3,4,5,6,7,8,9])
1055 | y = np.array([886,2335,5710,6482,6120,1605,3813,4428,4631])
1056 | plt.plot(x,y)
1057 | plt.title('1-9月份公司注册用户数',loc ="left")
1058 | 
1059 | '''
1060 | 设置数据标签
1061 | 数据标签实现就是根据实际坐标值在对应的位置显示相应的数值，用text函数实现
1062 | plt.text(x,y,str,ha,va,fontsize)
1063 | 
1064 | 参数	说明
1065 | 参数(x、y)	分别表示可以在哪里显示数据
1066 | str	表示显示的具体数值
1067 | horizontalalignment	简称ha,表示str在水平方向的位置，有center、left、right三个值可选
1068 | verticalalignment	简称va,表示str在垂直方向的位置，有center、top、bottom三个值可选
1069 | fontsize	设置str字体大小
1070 | '''
1071 | 
1072 | #在（5，1605）处设置y的值
1073 | import numpy as np
1074 | x = np.array([1,2,3,4,5,6,7,8,9])
1075 | y = np.array([886,2335,5710,6482,6120,1605,3813,4428,4631])
1076 | plt.plot(x,y)
1077 | plt.title("1-9月份公司注册用户数",loc = "center")
1078 | #plt.text(5,1605,"极值点")
1079 | plt.text(5,1605,"1605")
1080 | 
1081 | #在（5，1605）处设置y的值
1082 | x = np.array([1,2,3,4,5,6,7,8,9])
1083 | y = np.array([886,2335,5710,6482,6120,1605,3813,4428,4631])
1084 | plt.plot(x,y)
1085 | plt.title("1-9月份公司注册用户数",loc = "center")
1086 | for a,b in zip(x,y):
1087 |     plt.text(a,b,b,ha="center",va="bottom",fontsize=11)
1088 |     
1089 | '''
1090 | 图表注释
1091 | 图表注释是为了便于更快的获取图表的信息实现方法如下:
1092 | plt.annotate(s,xy,xytext,arrowprops)
1093 | 
1094 | 参数	说明
1095 | s	表示要注释的文本内容
1096 | xy	表示要注释的位置
1097 | xytext	表示要注释的文本的显示位置
1098 | arrowprops	设置箭相关参数、颜色、箭类型设置
1099 | '''
1100 | 
1101 | x = np.array(["1月份","2月份","3月份","4月份","5月份","6月份","7月份","8月份","9月份"])
1102 | y = np.array([886,2335,5710,6482,6120,1605,3813,4428,4631])
1103 | plt.plot(x,y)
1104 | plt.title("1-9月份公司注册用户数",loc = "center")
1105 | plt.annotate("服务器宕机了",
1106 |             xy=(5,1605),xytext=(6,1605),
1107 |             arrowprops=dict(facecolor="black",arrowstyle="->"))
1108 | 
1109 | '''
1110 | 数据表
1111 | 数据表就是在图表的基础上在添加一个表格，使用plt库中的table函数
1112 | table(cellText=None,cellColours=None,
1113 | cellLoc="right",cellWidths=None,
1114 | rowLabels=None,rowColours=None,rowLoc="left",
1115 | collLabels=None, colColours=None, colLoc="center",
1116 | loc="bottom")
1117 | table函数中参数说明：
1118 | 参数	说明
1119 | cellText	数据表内的值
1120 | cellColours	数据表的颜色
1121 | cellLoc	数据表中数值的位置，可选left、right、center
1122 | cellWidths	列宽
1123 | rowLabels	行标签
1124 | rowColours	行标签颜色
1125 | rowLoc	行标签位置
1126 | colLabels	列标签
1127 | colColours	列标签颜色
1128 | colLoc	列标签位置
1129 | loc	整个数据表的位置，可选坐标系上、下、左、右
1130 | '''
1131 | 
1132 | plt.subplot(1,1,1)
1133 | x= np.array(["东区","南区","西区","北区"])
1134 | y1 = np.array([8566,5335,7310,6482])
1135 | y2=  np.array([4283,2667,3655,3241])
1136 | plt.bar(x,y1,width=0.3,label="任务量")
1137 | plt.bar(x,y2,width=0.3,label="完成量")
1138 | plt.xticks([])
1139 | plt.legend(loc ="upper center",fontsize=9,ncol =2)
1140 | plt.title("全国各分区任务量和完成量")
1141 | cellText=[y1,y2]
1142 | rows = ["任务量","完成量"]
1143 | plt.table(cellText= cellText,
1144 |          cellLoc="center",
1145 |          rowLabels= rows,
1146 |          rowColours=["red","yellow"],
1147 |          rowLoc="center",
1148 |          colLabels= x,
1149 |          colColours=["red","yellow","red","yellow"],
1150 |          colLoc="left",
1151 |          loc="bottom")
1152 | plt.savefig(r"C:\Users\Administrator\Desktop\test.jpg")
1153 | 
1154 | ###绘制常用的图表系列
1155 | '''
1156 | 使用plt库的plot方法，具体参数如下：
1157 | plt.plot(x,y,color,linestyle,linewidth,marker,markeredgecolor,markedgwidth,markfacecolor,marksize,label)
1158 | x、y分别表示x轴和y轴的数据（必须项）
1159 | color表示折线的颜色
1160 | linestyle表示线的风格
1161 | linewidth表示线的宽度，传入一个表示宽度的浮点数即可
1162 | marker表示折线图中每点的标记物的形状
1163 | '''
1164 | #导入matplotlib库中的pyplot并起名为plt
1165 | import matplotlib.pyplot as plt
1166 | #让画布直接在jupyter Notebook中展示出来
1167 | %matplotlib inline
1168 | #解决中文乱码问题
1169 | plt.rcParams["font.sans-serif"]='SimHei'
1170 | import numpy as np
1171 | #建立坐标系
1172 | plt.subplot(1,1,1)
1173 | 
1174 | #指明x和y的值
1175 | x = np.array([1,2,3,4,5,6,7,8,9])
1176 | y = np.array([866,2335,5710,6482,6120,1605,3813,4428,4631])
1177 | 
1178 | #绘图
1179 | plt.plot(x,y,color="k",linestyle="dashdot",linewidth=1,marker="o",markersize=5,label="注册用户")
1180 | 
1181 | #设置标题及标题位置
1182 | plt.title("1-9月份注册用户量",loc="center")
1183 | 
1184 | #添加数据标签
1185 | for a,b in zip(x,y):
1186 |     plt.text(a,b,b,ha="center",va="bottom",fontsize=10)
1187 | 
1188 | #设置网格线
1189 | plt.grid(True)
1190 | 
1191 | #设置图例
1192 | plt.legend()
1193 | 
1194 | #保存到本地
1195 | plt.savefig(r"C:\Users\Administrator\Desktop\plot.png")
1196 | 
1197 | '''
1198 | 绘制柱状图
1199 | 使用plt库中的bar方法，具体参数如下：
1200 | plt.bar(x,height,width=0.8 ,bottom=None,align ="center",color,edgecolor)
1201 | x表示在什么位置显示柱状图
1202 | height表示每根柱子的高度
1203 | width表示每个柱子的宽度，每个柱子的宽度可以都一样，也可以各不相同
1204 | bottom表示每个柱子的底部位置，梅根柱子的底部位置可以都不一样，也可以各不相同
1205 | align表示柱子的位置与x值的关系，有center、edge两个参数可选，center表示柱子位于x值的中心位置，edge表示柱子位于x值的边缘位置
1206 | color表示柱子颜色
1207 | edgecolor表示柱子边缘的颜色
1208 | '''
1209 | #建立坐标系
1210 | plt.subplot(1,1,1)
1211 | 
1212 | #指明x和y的值
1213 | x= np.array(["东区","北区","南区","西区"])
1214 | y = np.array([8566,6482,5335,7310,])
1215 | 
1216 | #绘图
1217 | plt.bar(x,y,width=0.5,align="center",label="任务量")
1218 | 
1219 | #设置标题
1220 | plt.title("全国分区任务量",loc="center")
1221 | 
1222 | #添加数据标签
1223 | for a,b in zip(x,y):
1224 |     plt.text(a,b,b,ha="center",va="bottom",fontsize=12)
1225 | 
1226 | #设置x和y轴的名称
1227 | plt.xlabel("分区")
1228 | plt.ylabel("任务量")
1229 | 
1230 | #显示图例
1231 | plt.legend()
1232 | 
1233 | #保存到本地
1234 | plt.savefig(r"C:\Users\Administrator\Desktop\bar.png")
1235 | 
1236 | ###柱状对比图
1237 | 
1238 | #建立一个坐标系
1239 | plt.subplot(1,1,1)
1240 | 
1241 | #指明x和y的值
1242 | x = np.array([1,2,3,4])
1243 | y1 = np.array([8566,5335,7310,6482])
1244 | y2 = np.array([4283,2667,3655,3241])
1245 | 
1246 | #绘图,width=0.3设置柱形图的宽度为0.3
1247 | plt.bar(x,y1,width=0.3,label="任务量")
1248 | #x+0.3相当于把完成量的每个柱子右移0.3        #对比柱形图十分简单
1249 | plt.bar(x+0.3,y2,width=0.3,label="完成量")
1250 | 
1251 | #设置标题
1252 | plt.title("全国各分区任务量和完成量",loc="center")
1253 | 
1254 | #添加标签数据
1255 | for a,b in zip(x,y1):
1256 |     plt.text(a,b,b,ha="center",va="bottom",fontsize =12)
1257 | 
1258 | for a,b in zip(x+0.3,y2):
1259 |     plt.text(a,b,b,ha="center",va="bottom",fontsize=12)
1260 | 
1261 | #设置x和y轴的名称
1262 | plt.xlabel("区域")
1263 | plt.ylabel("任务情况")
1264 | 
1265 | #设置x轴的刻度
1266 | plt.xticks(x+0.15,["东区","南区","西区","北区"])
1267 | 
1268 | #设置网格线
1269 | plt.grid(False)
1270 | 
1271 | #图例设置
1272 | plt.legend()
1273 | 
1274 | #b保存图片
1275 | plt.savefig("C:/Users/Administrator/Desktop/bars.png")
1276 | 
1277 | 
1278 | ##堆积柱状图
1279 | #导入matplotlib库中的pyplot并起名为plt
1280 | import matplotlib.pyplot as plt
1281 | #让画布直接在jupyter Notebook中展示出来
1282 | %matplotlib inline
1283 | #解决中文乱码问题
1284 | plt.rcParams["font.sans-serif"]='SimHei'
1285 | import numpy as np
1286 | 
1287 | #建立一个坐标系
1288 | plt.subplot(1,1,1)
1289 | 
1290 | #指明x和y的值
1291 | x = np.array(["东区","南区","西区","北区"])
1292 | y1 = np.array([8566,5335,7310,6482])
1293 | y2 = np.array([4283,3241,2667,3655])
1294 | 
1295 | #绘图,width=0.3设置柱形图的宽度为0.3
1296 | plt.bar(x,y1,width=0.3,label="任务量")
1297 | #x+0.3相当于把完成量的每个柱子右移0.3
1298 | plt.bar(x,y2,width=0.3,label="完成量")
1299 | 
1300 | #设置标题
1301 | plt.title("全国各分区任务量和完成量",loc="center")
1302 | 
1303 | #添加标签数据
1304 | for a,b in zip(x,y1):
1305 |     plt.text(a,b,b,ha="center",va="bottom",fontsize =12)
1306 | 
1307 | for a,b in zip(x,y2):
1308 |     plt.text(a,b,b,ha="center",va="top",fontsize=12)
1309 | 
1310 | #设置x和y轴的名称
1311 | plt.xlabel("区域")
1312 | plt.ylabel("任务情况")
1313 | 
1314 | #设置网格线
1315 | plt.grid(False)
1316 | 
1317 | #图例设置
1318 | plt.legend(loc="upper center",ncol=2)
1319 | 
1320 | #b保存图片
1321 | plt.savefig(r"C:/Users/Administrator/Desktop/bars.png")
1322 | 
1323 | '''
1324 | 绘制条形图
1325 | 使用plt库中的barh方法参数如下:
1326 | plt.barh(y,width,height,align,color,edgecolor)
1327 | y表示在什么地方限制柱子,即纵坐标
1328 | width表示柱子在横向的宽度,即横坐标 height表示柱子在纵坐标向的高度,即柱子的实际宽度
1329 | align表示柱子的对齐方式
1330 | color表示柱子的颜色
1331 | edgecolor表示柱子边缘的颜色
1332 | '''
1333 | #建立坐标系
1334 | plt.subplot(1,1,1)
1335 | 
1336 | #指明x和y的值
1337 | x = np.array(["东区","南区","西区","北区"])
1338 | y = np.array([8566,5335,7310,6482])
1339 | 
1340 | #绘图
1341 | plt.barh(x,height=0.5,width=y,align="center")
1342 | 
1343 | #设置标题
1344 | plt.title("全国各分区任务量",loc="center")
1345 | 
1346 | #添加数据标签
1347 | for a,b in zip(x,y):
1348 |     plt.text(b,a,b, ha="center",va="center",fontsize=12)
1349 | 
1350 | #设置x和y轴的名称
1351 | plt.xlabel("任务量")
1352 | plt.ylabel("区域")
1353 | 
1354 | #设置网格线
1355 | plt.grid(False)
1356 | 
1357 | '''
1358 | 绘制散点图
1359 | 使用plt库中的scatter方法参数如下:
1360 | plt.scatter(x,y,s,c,marker,linewidths,edgecolors)
1361 | x,y 表示散点的位置
1362 | s 表示每个点的面积,即散点的大小.如果是一个具体的值时,则是由的点大小都一样.也可以呈现多个值,让每个点的大小都不一样,这时候就成了气泡图了.
1363 | c 表示每个点的颜色,如果做只有一种颜色时,则所有的点颜色相同,也可以呈现多哦颜色值,让不同的颜色不同
1364 | marker 表示每个点的标记和折线图的中的marker一致
1365 | linewidths 表四每个散点的宽度
1366 | edgecolors 表示每个散点轮廓的颜色
1367 | '''
1368 | #建立一个坐标系
1369 | plt.subplot(1,1,1)
1370 | 
1371 | #指明x和y的值
1372 | x = [5.5, 6.6, 8.1, 15.8, 19.5, 22.4, 28.3, 28.9]
1373 | y = [2.38, 3.85, 4.41, 5.67, 5.44, 6.03, 8.15, 6.87]
1374 | 
1375 | #绘图
1376 | plt.scatter(x,y,marker="o",s=100)
1377 | 
1378 | #设置标题
1379 | plt.title("1-8月份平均气温与啤酒销量关系图",loc = "center")
1380 | 
1381 | #设置x和y轴名称
1382 | plt.xlabel("平均气温")
1383 | plt.ylabel("啤酒销量")
1384 | 
1385 | #设置网格线
1386 | plt.grid(False)
1387 | 
1388 | #保存到本地
1389 | plt.savefig("C:/Users/Administrator/Desktop/scatter01.jpg")​
1390 | #设置网格线
1391 | plt.grid(False)
1392 | ​
1393 | #保存到本地
1394 | plt.savefig("C:/Users/Administrator/Desktop/scatter01.jpg")
1395 | 
1396 | #建立一个坐标系
1397 | plt.subplot(1,1,1)
1398 | 
1399 | #指明x和y的值
1400 | x = np.array([5.5, 6.6, 8.1, 15.8, 19.5, 22.4, 28.3, 28.9])
1401 | y = np.array([2.38, 3.85, 4.41, 5.67, 5.44, 6.03, 8.15, 6.87])
1402 | 
1403 | #绘图
1404 | colors = y*10
1405 | area = y*100
1406 | plt.scatter(x,y, c = colors,marker="o",s=area)
1407 | 
1408 | #设置标题
1409 | plt.title("1-8月份平均气温与啤酒销量关系图",loc = "center")
1410 | 
1411 | #设置x和y轴名称
1412 | plt.xlabel("平均气温")
1413 | plt.ylabel("啤酒销量")
1414 | 
1415 | #添加数据标签
1416 | for a,b in zip(x,y):
1417 |     plt.text(a,b,b,ha="center",va="center",fontsize=10,color="white")
1418 | #设置网格线
1419 | plt.grid(False)
1420 | 
1421 | #保存到本地
1422 | plt.savefig("C:/Users/Administrator/Desktop/scatter02.jpg")
1423 | '''
1424 | 绘制面积图
1425 | 使用plt库中的stackplot方法参数如下: plt.stackplot(x,y,labels,color)
1426 | x,y 表示x和y坐标数值
1427 | labels 不同系列图标的图例名
1428 | color 不同系列图标的颜色
1429 | '''
1430 | #建立一个坐标系
1431 | plt.subplot(1,1,1)
1432 | 
1433 | #指明x和y的值
1434 | x = np.array([1,2,3,4,5,6,7,8,9])
1435 | y1 = np.array([866,2335,5710,6482,6120,1605,3813,4428,4631])
1436 | y2 = np.array([433,1167,2855,3241,3060,802,1906,2214,2315])
1437 | 
1438 | #绘图
1439 | labels =["注册人数","激活人数"] #指明系列标签
1440 | plt.stackplot(x,y1,y2,labels = labels)
1441 | 
1442 | #设置标题
1443 | plt.title("XXX公司1-9月注册与激活人数",loc ="center")
1444 | 
1445 | #设置x和y轴的名称
1446 | plt.xlabel("月份")
1447 | plt.ylabel("注册与激活人数")
1448 | 
1449 | #设置网格
1450 | plt.grid(False)
1451 | 
1452 | #设置图例
1453 | plt.legend()
1454 | '''
1455 | 绘制面积图
1456 | 使用plt库中的stackplot方法参数如下: plt.stackplot(x,y,labels,color)
1457 | x,y 表示x和y坐标数值
1458 | labels 不同系列图标的图例名
1459 | color 不同系列图标的颜色
1460 | '''
1461 | #建立一个坐标系
1462 | plt.subplot(1,1,1)
1463 | ​
1464 | #指明x和y的值
1465 | x = np.array([1,2,3,4,5,6,7,8,9])
1466 | y1 = np.array([866,2335,5710,6482,6120,1605,3813,4428,4631])
1467 | y2 = np.array([433,1167,2855,3241,3060,802,1906,2214,2315])
1468 | ​
1469 | #绘图
1470 | labels =["注册人数","激活人数"] #指明系列标签
1471 | plt.stackplot(x,y1,y2,labels = labels)
1472 | ​
1473 | #设置标题
1474 | plt.title("XXX公司1-9月注册与激活人数",loc ="center")
1475 | ​
1476 | #设置x和y轴的名称
1477 | plt.xlabel("月份")
1478 | plt.ylabel("注册与激活人数")
1479 | ​
1480 | #设置网格
1481 | plt.grid(False)
1482 | ​
1483 | #设置图例
1484 | plt.legend()
1485 | ​
1486 | #保存图片到本地
1487 | plt.savefig("C:/Users/Administrator/Desktop/stackplot.jpg")
1488 | '''
1489 | 绘制树地图
1490 | 树地图常用来表示同一级中不同列别的占比关系,使用squarify库,具体参数如下:
1491 | squarify.plot(size,label,color,value,edgecolor,linewidth)
1492 | size 待绘图数据
1493 | label 不同列别的图例标签
1494 | color 不同列别的颜色
1495 | value 不同列别的数据标签
1496 | edgecolor 不同列别之间边框的颜色
1497 | linewidth 边框线宽
1498 | '''
1499 | #导入matplotlib库中的pyplot并起名为plt
1500 | import matplotlib.pyplot as plt
1501 | #让画布直接在jupyter Notebook中展示出来
1502 | %matplotlib inline
1503 | #解决中文乱码问题
1504 | plt.rcParams["font.sans-serif"]='SimHei'
1505 | 
1506 | import squarify
1507 | import numpy as np
1508 | #指定每一块的大小
1509 | size = np.array([3.4,0.693,0.585,0.570,0.562,0.531,0.530,0.524,0.501,0.478,0.468,0.436])
1510 | 
1511 | #指定每一块标签文字
1512 | xingzuo = np.array(["未知","摩羯座","天秤座","双鱼座","天蝎座","金牛座","处女座","双子座","射手座","狮子座","水瓶座","白羊座"])
1513 | #指定每一块数值标签
1514 | rate = np.array(["34%","6.93%","5.85%","5.70%","5.62%","5.31%","5.30%","5.24%","5.01%","4.78%","4.68%","4.36%"])
1515 | 
1516 | #指定每一块的颜色
1517 | colors = ["steelblue","#9999ff","red","indianred","green","yellow","orange"]
1518 | 
1519 | #绘图
1520 | plot = squarify.plot(sizes= size,label= xingzuo,color = colors,value = rate, edgecolor = 'white',linewidth =3)
1521 | 
1522 | #设置标题
1523 | plt.title("菊粉星座分布",fontdict={'fontsize':12})
1524 | 
1525 | #去除坐标轴
1526 | plt.axis('off')
1527 | 
1528 | #去除上边框和右边框的刻度
1529 | plt.tick_params(top=False,right=False)
1530 | 
1531 | '''
1532 | 绘制雷达
1533 | 雷达图使用的是plt库中的polar方法,polar是用来建立极坐标系的,其实雷达图就是先将各点展示在极坐标系中,然后用线将各点连接起来,具体参数如下:
1534 | plt.polar(theta,r,color,marker,linewidth)
1535 | theta 每一点在极坐标系中的角度
1536 | r 每一点在极坐标系中的半径
1537 | color 连接各点之间线的颜色
1538 | marker 每点的标记物
1539 | linewidth 连接线的宽度
1540 | '''
1541 | #建立坐标系
1542 | plt.subplot(111,polar = True) #参数polar等于True表示建立一个极坐标系
1543 | ​
1544 | dataLenth = 5 #把圆分成5份
1545 | #np.linspace表示在指定的间隔内返回均匀间隔的数字
1546 | angles = np.linspace(0,2*np.pi,dataLenth,endpoint = False)
1547 | labels = ["沟通能力","业务理解能力","逻辑思维能力","快速学习能力","工具使用能力"]
1548 | data = [2,3.5,4,4.5,5]
1549 | data = np.concatenate((data,[data[0]]))#闭合
1550 | angles = np.concatenate((angles,[angles[0]])) #闭合
1551 | ​
1552 | #绘图
1553 | plt.polar(angles,data,color='r',marker="o")
1554 | ​
1555 | #设置x轴宽度
1556 | plt.xticks(angles,labels)
1557 | ​
1558 | #设置标题
1559 | plt.title("某数据分析师纵隔评级")
1560 | ​
1561 | #保存本地
1562 | plt.savefig("C:/Users/Administrator/Desktop/polarplot.jpg")
1563 | '''
1564 | 绘制箱形图
1565 | 箱形图用来反映一组数据的离散情况,使用plt库中的boxplot方法具体参数如下:
1566 | plt.boxplot(x,vert,widths,labels)
1567 | x 待绘图源数据
1568 | vert 箱形图方向,如果为True则表示纵向；如果是False则表示横向，默认为True
1569 | widths 箱形图的宽度
1570 | labels 箱形图的标签
1571 | '''
1572 | import numpy as np
1573 | #导入matplotlib库中的pyplot并起名为plt
1574 | import matplotlib.pyplot as plt
1575 | #让画布直接在jupyter Notebook中展示出来
1576 | %matplotlib inline
1577 | #解决中文乱码问题
1578 | plt.rcParams["font.sans-serif"]='SimHei'
1579 | #解决负号无法正常显示问题
1580 | plt.rcParams["axes.unicode_minus"]= False
1581 | #设置为矢量图
1582 | %config InlineBackend.figure_format = 'svg'
1583 | ​
1584 | #建立一个坐标系
1585 | plt.subplot(1,1,1)
1586 | ​
1587 | #指明X值
1588 | y1 = np.array([866,2335,5710,6482,6120,1605,3813,4428,4631])
1589 | y2 = np.array([433,1167,2855,3241,3060,802,1906,2214,2315])
1590 | x = [y1,y2]
1591 | ​
1592 | #绘图
1593 | labels = ["注册人数","激活人数"]
1594 | plt.boxplot(x,labels = labels,vert = True,widths = [0.2,0.5])
1595 | ​
1596 | #设置标题
1597 | plt.title("XXX公司1-9月份注册于激活人数",loc = "center")
1598 | ​
1599 | #设置网格线
1600 | plt.grid(False)
1601 | ​
1602 | #保存到本地
1603 | plt.savefig(r"C:\Users\Administrator\Desktop\boxplot.jpg")
1604 | 
1605 | '''
1606 | 绘制饼图
1607 | 饼图也常用来表示一等级中不同类别的占比情况，使用的方法是plt库中的pie方法具体参数如下:
1608 | plt.pie(x,explode,labels,colors,autopct,pctdistance,shadow,labeldistance,startangle,radius,counterclock,wedgeprops,textprops,center,frame)
1609 | x 待绘图的数据
1610 | explode 饼图找哦就能够每一块离心圆的距离
1611 | labels 饼图中每一块的标签 color 饼图中每一块的颜色
1612 | autopct 控制饼图内数值百分比的格式
1613 | pactdistanc 数据标签距中心的距离
1614 | shadow 饼图是否有阴影
1615 | labeldistance 每一块饼图距离中心的距离 startangle 饼图初始角度
1616 | radius 饼图的半径 counterclock 是否让饼图逆时针显示
1617 | wedgeprops 饼图内外边缘属性
1618 | textprops 饼图中文本相关属性
1619 | center 饼图中心位置
1620 | frame 是否显示饼图背后的图框
1621 | '''
1622 | #建立坐标系
1623 | plt.subplot(1,1,1)
1624 | ​
1625 | #指明x值
1626 | x = np.array([8566,5335,7310,6482])
1627 | ​
1628 | #绘图
1629 | labels = ["东区","北区","南区","西区"]
1630 | #让第一块离圆心远点
1631 | explode = [0.05,0,0,0]
1632 | labeldistance = 1.1
1633 | plt.pie(x,labels=labels ,autopct='%.0f%%',shadow = True,explode = explode,radius = 1.0 ,labeldistance = labeldistance)
1634 | ​
1635 | #设置标题
1636 | plt.title("全国各区域人数占比",loc="center")
1637 | ​
1638 | #保存图表到本地
1639 | plt.savefig(r"C:\Users\Administrator\Desktop\pie.jpg")
1640 | 
1641 | #建立坐标系
1642 | plt.subplot(1,1,1)
1643 | 
1644 | #指明x值
1645 | x1 = np.array([8566,5335,7310,6482])
1646 | x2=  np.array([4283,3241,2667,3655])
1647 | 
1648 | #绘图
1649 | labels = ["东区","北区","南区","西区"]
1650 | 
1651 | plt.pie(x1,labels=labels,radius = 1.0 ,wedgeprops=dict(width=0.3,edgecolor="w"))
1652 | plt.pie(x2,radius = 0.7 ,wedgeprops=dict(width=0.3,edgecolor="w"))
1653 | 
1654 | #添加注释
1655 | plt.annotate("完成量",xy=(0.35,0.35),xytext =(0.7,0.45),arrowprops=dict(facecolor="black",arrowstyle="->"))
1656 | plt.annotate("任务量",xy=(0.75,0.20),xytext =(1.1,0.2),arrowprops=dict(facecolor="black",arrowstyle="->"))
1657 | #设置标题
1658 | plt.title("全国各区域人数占比",loc="center")
1659 | 
1660 | #保存图表到本地
1661 | plt.savefig(r"C:\Users\Administrator\Desktop\pie01.jpg")
1662 | '''
1663 | 绘制热力图
1664 | 热力图是将某一事物的响应度反映在图表上，可以快速发现需要重点关注的区域，适应plt库中的imshow方法,具体参数如下:
1665 | plt.imshow(x,cmap)
1666 | x 表示待绘图的数据，需要矩阵形式
1667 | cmap 配色方案，用来避阿明图表渐变的主题色
1668 | cmap的所有可选值都是封装在plt.cm里面
1669 | '''
1670 | import itertools
1671 | #几个相关指标之间的相关性
1672 | cm = np.array([[1,0.082,0.031,-0.0086],
1673 |               [0.082,1,-0.063,0.062],
1674 |               [0.031,-0.09,1,0.026],
1675 |               [-0.0086,0.062,0.026,1]])
1676 | cmap = plt.cm.cool #设置配色方案
1677 | plt.imshow(cm,cmap = cmap)
1678 | plt.colorbar()#显示右边颜色条
1679 | ​
1680 | #设置x和y周的刻度标签
1681 | classes = ["负债率","信贷数量","年龄","家庭数量"]
1682 | tick_marks = np.arange(len(classes))
1683 | plt.xticks(tick_marks,classes)
1684 | plt.yticks(tick_marks,classes)
1685 | ​
1686 | #将数值像是在指定位置
1687 | for i,j in itertools.product(range(cm.shape[0]),range(cm.shape[1])):
1688 |     plt.text(j,i,cm[i,j],horizontalalignment="center")
1689 | ​
1690 | plt.grid(False)  
1691 | ​
1692 | #保存图表到本地
1693 | plt.savefig(r"C:\Users\Administrator\Desktop\imshow.jpg")
1694 | 
1695 | '''
1696 | 绘制水平线和垂直线
1697 | 水平线和垂直线主要用啦做对比参考，使用plt库中的axhline和axvline方法具体参数如下:
1698 | plt.axhline(y,xmin,xmax)
1699 | plt.axvline(x,ymin,ymax)
1700 | x/y 画水平/垂直线上和的横纵坐标 xmin/xmax 水平线起点和终点
1701 | ymin/ymax 垂直线起点和终点
1702 | '''
1703 | 
1704 | #导入matplotlib库中的pyplot并起名为plt
1705 | import matplotlib.pyplot as plt
1706 | ​
1707 | #建立坐标系
1708 | plt.subplot(1,2,1)
1709 | ​
1710 | #绘制一条y等于2且起点是0.2，重点是0.6的水平线
1711 | plt.axhline(y=2,xmin=0.2,xmax=0.6)
1712 | ​
1713 | plt.subplot(1,2,2)
1714 | ​
1715 | #绘制一条x等于2且起点是0.2终点是0.6的垂直线
1716 | plt.axvline(x=2,ymin=0.2,ymax=0.6)
1717 | 
1718 | '''
1719 | 绘制组合图表
1720 | 折线图+折线图
1721 | '''
1722 | #建立一个坐标系
1723 | plt.subplot(1,1,1)
1724 | ​
1725 | #指明x和y的值
1726 | x = np.array([1,2,3,4,5,6,7,8,9])
1727 | y1 = np.array([866,2335,5710,6482,6120,1605,3813,4428,4631])
1728 | y2 = np.array([433,1167,2855,3241,3060,802,1906,2214,2315])
1729 | ​
1730 | #直接绘制两条折线绘图
1731 | plt.plot(x,y1,color="k",linestyle="solid",linewidth=1,marker="o",markersize=3,label="注册人数")
1732 | plt.plot(x,y2,color="k",linestyle="dashdot",linewidth=1,marker="o",markersize=3,label="激活人数")
1733 | ​
1734 | ​
1735 | #设置标题
1736 | plt.title("XXX公司1-9月注册与激活人数",loc ="center")
1737 | ​
1738 | #添加数据标签
1739 | for a,b in zip(x,y1):
1740 |     plt.text(a,b,b,ha="center",va="bottom",fontsize=11)
1741 |     
1742 | for a,b in zip(x,y2):
1743 |     plt.text(a,b,b,ha="center",va="bottom",fontsize=11)    
1744 | ​
1745 | #设置x和y轴的名称
1746 | plt.xlabel("月份")
1747 | plt.ylabel("注册与激活人数")
1748 | ​
1749 | ​
1750 | #设置x和y轴的刻度
1751 | plt.xticks(np.arange(9),["1月份","2月份","3月份","4月份","5月份","6月份","7月份","8月份","9月份"])
1752 | plt.yticks(np.arange(1000,7000,1000),["1000人","2000人","3000人","4000人","5000人","6000人"])
1753 | #设置网格
1754 | plt.grid(False)
1755 | ​
1756 | #设置图例
1757 | plt.legend()
1758 | ​
1759 | #保存图片到本地
1760 | plt.savefig("C:/Users/Administrator/Desktop/plot01.jpg")
1761 | 
1762 | #建立一个坐标系
1763 | plt.subplot(1,1,1)
1764 | 
1765 | #指明x和y的值
1766 | x = np.array([1,2,3,4,5,6,7,8,9])
1767 | y1 = np.array([866,2335,5710,6482,6120,1605,3813,4428,4631])
1768 | y2 = np.array([433,1167,2855,3241,3060,802,1906,2214,2315])
1769 | plt.plot(x,y1,color="r",linestyle="solid",linewidth=1,marker="o",markersize=3,label="注册人数")
1770 | plt.bar(x,y2,color="g",label="激活人数")
1771 | 
1772 | #设置标题及位置
1773 | plt.title("XXX公司1-9月注册与激活人数",loc ="center")
1774 | 
1775 | #添加数据标签
1776 | for a,b in zip(x,y1):
1777 |     plt.text(a,b,b,ha="center",va="bottom",fontsize=11)
1778 |     
1779 | for a,b in zip(x,y2):
1780 |     plt.text(a,b,b,ha="center",va="bottom",fontsize=11)    
1781 | 
1782 | #设置x和y轴的名称
1783 | plt.xlabel("月份")
1784 | plt.ylabel("注册与激活人数")
1785 | 
1786 | 
1787 | #设置x和y轴的刻度
1788 | plt.xticks(np.arange(9),["1月份","2月份","3月份","4月份","5月份","6月份","7月份","8月份","9月份"])
1789 | plt.yticks(np.arange(1000,7000,1000),["1000人","2000人","3000人","4000人","5000人","6000人"])
1790 | #设置网格
1791 | plt.grid(False)
1792 | 
1793 | #设置图例
1794 | plt.legend()
1795 | 
1796 | #保存图片到本地
1797 | plt.savefig("C:/Users/Administrator/Desktop/bar02.jpg")
1798 | '''
1799 | 绘制双坐标轴图表
1800 | 双坐标轴图表就是既有主坐标轴又有次坐标轴图表，两个不同量级的指标放在同一个坐标系中时，就需要开启双坐标轴，比如任务量和我完成率就是连个不同量级的指标
1801 | 
1802 | 绘制双y轴图表
1803 | 使用plt库中的twinx方法，绘制流程为：建立坐标系，然后绘制主坐标轴上的图表，再调用plt.twinx方法，最后绘制次坐标轴的图表
1804 | '''
1805 | #建立一个坐标系
1806 | plt.subplot(1,1,1)
1807 | 
1808 | #指明x和y的值
1809 | x = np.array([1,2,3,4,5,6,7,8,9])
1810 | y1 = np.array([866,2335,5710,6482,6120,1605,3813,4428,4631])
1811 | y2 = np.array([0.54459448,0.32392354,0.39002751,
1812 |               0.41121879,0.31063077,0.33152276,
1813 |               0.92226226,0.02950071,0.15716906])
1814 | 
1815 | #绘制主坐标轴上的图表
1816 | plt.plot(x,y1,color="g",linestyle="solid",linewidth=1,marker="o",markersize=3,label="注册人数")
1817 | 
1818 | #设置主x和y轴的名称
1819 | plt.xlabel("月份")
1820 | plt.ylabel("注册量")
1821 | 
1822 | #设置主坐标的图例
1823 | plt.legend(loc ="upper left")
1824 | 
1825 | #调用twinx方法
1826 | plt.twinx()
1827 | 
1828 | #绘制此坐标轴的图表
1829 | plt.plot(x,y2,color="r",linestyle="dashdot",linewidth=1,marker="o",markersize=3,label="激活率")
1830 | 
1831 | #设置次x和y轴的名称
1832 | plt.xlabel("月份")
1833 | plt.ylabel("激活率")
1834 | 
1835 | #设置次坐标轴的图例
1836 | plt.legend()
1837 | 
1838 | #设置标题及位置
1839 | plt.title("XXX公司1-9月注册量与激活率",loc ="center")
1840 | 
1841 | #保存图片到本地
1842 | plt.savefig("C:/Users/Administrator/Desktop/twinx.jpg")
1843 | #建立一个坐标系
1844 | plt.subplot(1,1,1)
1845 | ​
1846 | #指明x和y的值
1847 | x = np.array([1,2,3,4,5,6,7,8,9])
1848 | y1 = np.array([866,2335,5710,6482,6120,1605,3813,4428,4631])
1849 | y2 = np.array([0.54459448,0.32392354,0.39002751,
1850 |               0.41121879,0.31063077,0.33152276,
1851 |               0.92226226,0.02950071,0.15716906])
1852 | ​
1853 | #绘制主坐标轴上的图表
1854 | plt.plot(x,y1,color="g",linestyle="solid",linewidth=1,marker="o",markersize=3,label="注册人数")
1855 | ​
1856 | #设置主x和y轴的名称
1857 | plt.xlabel("月份")
1858 | plt.ylabel("注册量")
1859 | ​
1860 | #设置主坐标的图例
1861 | plt.legend(loc ="upper left")
1862 | ​
1863 | #调用twinx方法
1864 | plt.twinx()
1865 | ​
1866 | #绘制此坐标轴的图表
1867 | plt.plot(x,y2,color="r",linestyle="dashdot",linewidth=1,marker="o",markersize=3,label="激活率")
1868 | ​
1869 | #设置次x和y轴的名称
1870 | plt.xlabel("月份")
1871 | plt.ylabel("激活率")
1872 | ​
1873 | #设置次坐标轴的图例
1874 | plt.legend()
1875 | ​
1876 | #设置标题及位置
1877 | plt.title("XXX公司1-9月注册量与激活率",loc ="center")
1878 | ​
1879 | #保存图片到本地
1880 | plt.savefig("C:/Users/Administrator/Desktop/twinx.jpg")
1881 | 
1882 | '''
1883 | 绘制双x轴图表
1884 | 使用plt库中的twiny方法，流程与x双y轴的方法一样。
1885 | 
1886 | 绘图样式设置
1887 | matplotlib库默认样式不是很好看，使用plt.style.available即可查看matplotlib库支持的所有样式
1888 | 如果需要使用某种样式在程序开头如下代码:plt.style.use(样式名)
1889 | '''
1890 | plt.style.available
1891 | ['bmh',
1892 |  'classic',
1893 |  'dark_background',
1894 |  'fast',
1895 |  'fivethirtyeight',
1896 |  'ggplot',
1897 |  'grayscale',
1898 |  'seaborn-bright',
1899 |  'seaborn-colorblind',
1900 |  'seaborn-dark-palette',
1901 |  'seaborn-dark',
1902 |  'seaborn-darkgrid',
1903 |  'seaborn-deep',
1904 |  'seaborn-muted',
1905 |  'seaborn-notebook',
1906 |  'seaborn-paper',
1907 |  'seaborn-pastel',
1908 |  'seaborn-poster',
1909 |  'seaborn-talk',
1910 |  'seaborn-ticks',
1911 |  'seaborn-white',
1912 |  'seaborn-whitegrid',
1913 |  'seaborn',
1914 |  'Solarize_Light2',
1915 |  'tableau-colorblind10',
1916 |  '_classic_test']
1917 | #建立一个坐标轴
1918 | plt.subplot(1,1,1)
1919 | #指明x和y的值
1920 | x = np.array([1,2,3,4,5,6,7,8,9])
1921 | y1 = np.array([866,2335,5710,6482,6120,1605,3813,4428,4631])
1922 | y2 = np.array([433,1167,2855,3241,3060,802,1906,2214,2315])
1923 | plt.plot(x,y1,color="r",linestyle="solid",linewidth=1,marker="o",markersize=3,label="注册人数")
1924 | plt.bar(x,y2,color="g",label="激活人数")
1925 | plt.style.use("ggplot")
1926 | 
1927 | #导入matplotlib库中的pyplot并起名为plt
1928 | import matplotlib.pyplot as plt
1929 | #让画布直接在jupyter Notebook中展示出来
1930 | %matplotlib inline
1931 | #解决中文乱码问题
1932 | plt.rcParams["font.sans-serif"]='SimHei'
1933 | 
1934 | import squarify
1935 | import numpy as np
1936 | #指定每一块的大小
1937 | size = np.array([3.4,0.693,0.585,0.570,0.562,0.531,0.530,0.524,0.501,0.478,0.468,0.436])
1938 | 
1939 | #指定每一块标签文字
1940 | xingzuo = np.array(["未知","摩羯座","天秤座","双鱼座","天蝎座","金牛座","处女座","双子座","射手座","狮子座","水瓶座","白羊座"])
1941 | #指定每一块数值标签
1942 | rate = np.array(["34%","6.93%","5.85%","5.70%","5.62%","5.31%","5.30%","5.24%","5.01%","4.78%","4.68%","4.36%"])
1943 | 
1944 | #指定每一块的颜色
1945 | colors = ["steelblue","#9999ff","red","indianred","green","yellow","orange"]
1946 | 
1947 | #绘图
1948 | plot = squarify.plot(sizes= size,label= xingzuo,color = colors,value = rate, edgecolor = 'white',linewidth =3)
1949 | 
1950 | #设置标题
1951 | plt.title("菊粉星座分布",fontdict={'fontsize':12})
1952 | 
1953 | #去除坐标轴
1954 | plt.axis('off')
1955 | 
1956 | #去除上边框和右边框的刻度
1957 | plt.tick_params(top=False,right=False)
1958 | 
1959 | #b保存到本地
1960 | plt.savefig("C:/Users/Administrator/Desktop/squarify.jpg")
1961 | 
1962 | #建立一个坐标系
1963 | plt.subplot(1,1,1)
1964 | 
1965 | #指明x和y的值
1966 | x = np.array([0,1,2,3,4,5,6,7,8])
1967 | y1 = np.array([866,2335,5710,6482,6120,1605,3813,4428,4631])
1968 | y2 = np.array([433,1167,2855,3241,3060,802,1906,2214,2315])
1969 | plt.plot(x,y1,color="r",linestyle="solid",linewidth=1,marker="o",markersize=3,label="注册人数")
1970 | plt.bar(x,y2,color="g",label="激活人数")
1971 | 
1972 | #设置标题及位置
1973 | plt.title("XXX公司1-9月注册与激活人数",loc ="center")
1974 | 
1975 | #添加数据标签
1976 | for a,b in zip(x,y1):
1977 |     plt.text(a,b,b,ha="center",va="bottom",fontsize=11)
1978 |     
1979 | for a,b in zip(x,y2):
1980 |     plt.text(a,b,b,ha="center",va="bottom",fontsize=11)    
1981 | 
1982 | #设置x和y轴的名称
1983 | plt.xlabel("月份")
1984 | plt.ylabel("注册与激活人数")
1985 | 
1986 | 
1987 | #设置x和y轴的刻度
1988 | plt.xticks(np.arange(9),["1月份","2月份","3月份","4月份","5月份","6月份","7月份","8月份","9月份"])
1989 | plt.yticks(np.arange(1000,7000,1000),["1000人","2000人","3000人","4000人","5000人","6000人"])
1990 | #设置网格
1991 | plt.grid(False)
1992 | 
1993 | #设置图例
1994 | plt.legend()
1995 | 
1996 | #保存图片到本地
1997 | plt.savefig("C:/Users/Administrator/Desktop/bar02.jpg")
1998 | 
1999 | 
2000 | ##这个双y轴的图像是excel所不具备的
2001 | #建立一个坐标系
2002 | plt.subplot(1,1,1)
2003 | 
2004 | #指明x和y的值
2005 | x = np.array([1,2,3,4,5,6,7,8,9])
2006 | y1 = np.array([866,2335,5710,6482,6120,1605,3813,4428,4631])
2007 | y2 = np.array([0.54459448,0.32392354,0.39002751,
2008 |               0.41121879,0.31063077,0.33152276,
2009 |               0.92226226,0.02950071,0.15716906])
2010 | 
2011 | #绘制主坐标轴上的图表
2012 | plt.plot(x,y1,color="g",linestyle="solid",linewidth=1,marker="o",markersize=3,label="注册人数")
2013 | 
2014 | #设置主x和y轴的名称
2015 | plt.xlabel("月份")
2016 | plt.ylabel("注册量")
2017 | 
2018 | #设置主坐标的图例
2019 | plt.legend(loc ="upper left")
2020 | 
2021 | #调用twinx方法
2022 | plt.twinx()
2023 | 
2024 | #绘制此坐标轴的图表
2025 | plt.plot(x,y2,color="r",linestyle="dashdot",linewidth=1,marker="o",markersize=3,label="激活率")
2026 | 
2027 | #设置次x和y轴的名称
2028 | plt.xlabel("月份")
2029 | plt.ylabel("激活率")
2030 | 
2031 | #设置次坐标轴的图例
2032 | plt.legend()
2033 | 
2034 | #设置标题及位置
2035 | plt.title("XXX公司1-9月注册量与激活率",loc ="center")
2036 | 
2037 | #保存图片到本地
2038 | plt.savefig("C:/Users/Administrator/Desktop/twinx.jpg")
2039 | 
2040 | 
2041 | ####报表自动化        这一段要重点学习
2042 | import pandas as pd
2043 | from datetime import datetime
2044 | data = pd.read_csv(r"C:/Users/Administrator/Desktop/Excel-Python-master/Data/order-14.1.csv", sep=",",engine = "python",encoding="gbk",parse_dates=["成交时间"])
2045 | data.head()
2046 | data.info()
2047 | this_month = data[(data["成交时间"]>= datetime(2018,2,1)) & (data["成交时间"]<= datetime(2018,2,28))]
2048 | last_month = data[(data["成交时间"]>= datetime(2018,1,1)) & (data["成交时间"]<= datetime(2018,1,31))]
2049 | same_month = data[(data["成交时间"]>= datetime(2017,2,1)) & (data["成交时间"]<= datetime(2017,2,28))]
2050 | 
2051 | def get_month_data(data):
2052 |     sale = (data["单价"]*data["销量"]).sum()
2053 |     traffic = data["订单ID"].drop_duplicates().count()
2054 |     s_t = sale/traffic
2055 |     return (sale,traffic,s_t)
2056 | sale_1,traffic_1,s_t_1 = get_month_data(this_month)
2057 | sale_2,traffic_2,s_t_2 = get_month_data(last_month)
2058 | sale_3,traffic_3,s_t_3 = get_month_data(same_month)
2059 | 
2060 | report = pd.DataFrame([[sale_1,sale_2,sale_3],
2061 |                        [traffic_1,traffic_2,traffic_3],
2062 |                        [s_t_1,s_t_2,s_t_3]],
2063 |                      columns = ["本月累计","上月同期","去年同期"],
2064 |                      index =["销售额","客流量","客单价"])
2065 | report
2066 | 
2067 | #添加同比和环比字段
2068 | report["环比"] = report["本月累计"]/report["上月同期"] -1
2069 | report["同比"] = report["本月累计"]/report["去年同期"] -1
2070 | report
2071 | report.to_csv(r"C:/Users/Administrator/Desktop/Excel-Python-master/Data/order.csv",encoding = "utf-8-sig")
2072 | 
2073 | 
2074 | import pandas as pd
2075 | from datetime import datetime
2076 | data = pd.read_csv(r"C:/Users/Administrator/Desktop/Excel-Python-master/Data/order-14.3.csv", sep=",",engine = "python",encoding="gbk",parse_dates=["成交时间"])
2077 | data.head()
2078 | data.groupby("类别ID")["销量"].sum().reset_index().sort_values(by="销量",ascending = False).head(10)
2079 | pd.pivot_table(data,index="商品ID",values ="销量",aggfunc = "sum").reset_index().sort_values(by="销量",ascending = False).head(10)
2080 | data["销售额"] = data["销量"]*data["单价"]
2081 | data.groupby("门店编号")["销售额"].sum()
2082 | data.groupby("门店编号")["销售额"].sum()/data["销售额"].sum()
2083 | 
2084 | #绘制饼图
2085 | #导入matplotlib库中的pyplot并起名为plt
2086 | import matplotlib.pyplot as plt
2087 | #让画布直接在jupyter Notebook中展示出来
2088 | %matplotlib inline
2089 | #解决中文乱码问题
2090 | plt.rcParams["font.sans-serif"]='SimHei'
2091 | #解决负号无法正常显示问题
2092 | plt.rcParams["axes.unicode_minus"]= False
2093 | #设置为矢量图
2094 | %config InlineBackend.figure_format = 'svg'
2095 | #建立画布
2096 | fig = plt.figure()
2097 | (data.groupby("门店编号")["销售额"].sum()/data["销售额"].sum()).plot.pie()
2098 | 
2099 | #提取小时数
2100 | data["小时"] = data["成交时间"].map(lambda x:int(x.strftime("%H")))
2101 | #对小时和订单去重
2102 | tracffic = data[["小时","订单ID"]].drop_duplicates()
2103 | #求每个小时的客流量
2104 | tracffic.groupby("小时")["订单ID"].count()
2105 | #绘制折线图
2106 | tracffic.groupby("小时")["订单ID"].count().plot()
2107 | 
2108 | 
2109 | 
2110 | 
2111 | 
2112 | 
2113 | 
2114 | 


--------------------------------------------------------------------------------