└── README.md


/README.md:
--------------------------------------------------------------------------------
   1 | # 【2021】Python数据清洗方法汇总
   2 | 
   3 | 
   4 | 
   5 | ## Tips 
   6 | 
   7 | ```python
   8 | #公式用法
   9 | np.random.randint?
  10 | help(np)
  11 | 
  12 | #可以加那些后缀
  13 | dir(np)
  14 | 
  15 | #包导入
  16 | import numpy as np
  17 | import pandas as pd
  18 | import matplotlib.pyplot as plt
  19 | import re
  20 | import os
  21 | import statsmodels.api as sm
  22 | import statsmodels.formula.api as smf
  23 | import time
  24 | import scipy.stats as stats
  25 | 
  26 | #快速查看文件内容
  27 | !type ex5.csv
  28 | 
  29 | #查看dataframe属性
  30 | data.info()
  31 | 
  32 | plt.rcParams['font.sans-serif']=['SimHei']#中文乱码
  33 | plt.rcParams['axes.unicode_minus']=False#中文乱码
  34 | ```
  35 | 
  36 | ## 基础运算
  37 | 
  38 | | 运算符 | 描述                                                         |
  39 | | ------ | ------------------------------------------------------------ |
  40 | | +      | 加 - 两个对象相加                                            |
  41 | | -      | 减 - 得到负数或是一个数减去另一个数                          |
  42 | | *      | 乘 - 两个数相乘或是返回一个被重复若干次的字符串              |
  43 | | /      | 除 - x除以y                                                  |
  44 | | %      | 取模 - 返回除法的余数                                        |
  45 | | **     | 幂 - 返回x的y次幂                                            |
  46 | | //     | 取整除 - 返回商的整数部分（向下取整）                        |
  47 | | ==     | 等于 - 比较对象是否相等                                      |
  48 | | !=     | 不等于 - 比较两个对象是否不相等                              |
  49 | | >      | 大于 - 返回x是否大于y                                        |
  50 | | <      | 小于 - 返回x是否小于y。所有比较运算符返回1表示真，返回0表示假。这分别与特殊的变量True和False等价。 |
  51 | | >=     | 大于等于 - 返回x是否大于等于y。                              |
  52 | | <=     | 小于等于 - 返回x是否小于等于y。                              |
  53 | | &      | 按位与运算符：参与运算的两个值,如果两个相应位都为1,则该位的结果为1,否则为0 |
  54 | | \|     | 按位或运算符：只要对应的二个二进位有一个为1时，结果位就为1。 |
  55 | | ^      | 按位异或运算符：当两对应的二进位相异时，结果为1              |
  56 | | ~      | 按位取反运算符：对数据的每个二进制位取反,即把1变为0,把0变为1 。~x 类似于 -x-1 |
  57 | | <<     | 左移动运算符：运算数的各二进位全部左移若干位，由 << 右边的数字指定了移动的位数，高位丢弃，低位补0。 |
  58 | | >>     | 右移动运算符：把">>"左边的运算数的各二进位全部右移若干位，>> 右边的数字指定了移动的位数 |
  59 | 
  60 | | =    | 简单的赋值运算符 | c = a + b 将 a + b 的运算结果赋值为 c |
  61 | | ---- | ---------------- | ------------------------------------- |
  62 | | +=   | 加法赋值运算符   | c += a 等效于 c = c + a               |
  63 | | -=   | 减法赋值运算符   | c -= a 等效于 c = c - a               |
  64 | | *=   | 乘法赋值运算符   | c *= a 等效于 c = c * a               |
  65 | | /=   | 除法赋值运算符   | c /= a 等效于 c = c / a               |
  66 | | %=   | 取模赋值运算符   | c %= a 等效于 c = c % a               |
  67 | | **=  | 幂赋值运算符     | c = a 等效于 c = c  a                 |
  68 | | //=  | 取整除赋值运算符 | c //= a 等效于 c = c // a             |
  69 | 
  70 | | 运算符 | 逻辑表达式 | 描述                                                         |
  71 | | ------ | ---------- | ------------------------------------------------------------ |
  72 | | and    | x and y    | 布尔"与" - 如果 x 为 False，x and y 返回 False，否则它返回 y 的计算值 |
  73 | | or     | x or y     | 布尔"或" - 如果 x 是非 0，它返回 x 的计算值，否则它返回 y 的计算值。 |
  74 | | not    | not x      | 布尔"非" - 如果 x 为 True，返回 False 。如果 x 为 False，它返回 True。 |
  75 | 
  76 | | 运算符 | 描述                                                    | 实例                                            |
  77 | | ------ | ------------------------------------------------------- | ----------------------------------------------- |
  78 | | in     | 如果在指定的序列中找到值返回 True，否则返回 False。     | x 在 y 序列中 , 如果 x 在 y 序列中返回 True。   |
  79 | | not in | 如果在指定的序列中没有找到值返回 True，否则返回 False。 | x 不在 y 序列中 , 如果 x 不在 y 序列中返回 True |
  80 | 
  81 | | 运算符优先级                           | 描述                                                   |
  82 | | -------------------------------------- | ------------------------------------------------------ |
  83 | | **                                     | 指数 (最高优先级)                                      |
  84 | | ~ 、+、 -                              | 按位翻转, 一元加号和减号 (最后两个的方法名为 +@ 和 -@) |
  85 | | * 、/、 %、 //                         | 乘，除，取模和取整除                                   |
  86 | | + 、-                                  | 加法减法                                               |
  87 | | >> 、<<                                | 右移，左移运算符                                       |
  88 | | &                                      | 位 'AND'                                               |
  89 | | ^、 \|                                 | 位运算符                                               |
  90 | | <=、 <、 >、 >=                        | 比较运算符                                             |
  91 | | ==、 !=                                | 等于运算符                                             |
  92 | | = 、%=、 /=、 //=、 -=、 +=、 *=、 **= | 赋值运算符                                             |
  93 | | is、 is not                            | 身份运算符                                             |
  94 | | in 、not in                            | 成员运算符                                             |
  95 | | not、 and、 or                         | 逻辑运算符                                             |
  96 | 
  97 | ## 数据类型
  98 | 
  99 | ### 字符串
 100 | 
 101 | ```python
 102 | #创建一个字串符
 103 | a='a'
 104 | #以切的方式截取成员，切片区间是左闭右开
 105 | text[1:3]
 106 | 
 107 | #大小写转换
 108 | s.upper()                               #全部大写
 109 | s.lower()                               #全部小写
 110 | s.swapcase()                            #大小写互换
 111 | s.capitalize()                          #字符串的首字母大写,其余小写
 112 | s.title()                               #每個詞的首字母大写
 113 | 
 114 | #填充
 115 | s.ljust(12,'0')      #获取固定长度的字符串,左对齐,右边不够用空格补齐
 116 | s.rjust(width [, fillchar])      #获取固定长度,右对齐,左边不够用空格补齐
 117 | s.just(width [, fillchar])       #获取固定长度,中间对齐,两边不够用空格补齐
 118 | s.zjust(width [, fillchar])      #获取固定长度,原字符串右对齐，前面填充0
 119 | s.expandtabs([tabsize=8])        #把字符串中的 tab 符号('\t')转为空格，tab 符号('\t')默认的空格数是 8。
 120 | 
 121 | #查找
 122 | s.find(str, beg= 0,end=len(string))     #搜索指定字符串,找到返回位置索引，没有返回-1
 123 | s.rfind(str, beg= 0,end=len(string))    #返回字符串最后一次出现的位置(从右向左查询)，如果没有匹配项则返回-1
 124 | s.lfind(str, beg= 0,end=len(string))    #返回字符串最后一次出现的位置(从左向右查询)，如果没有匹配项则返回-1
 125 | s.index(str, beg= 0,end=len(string))    #跟find()方法一样，只不过如果str不在字符串中会报一个异常
 126 | s.count(str, beg= 0,end=len(string))    #统计指定的字符串出现的次数
 127 | 
 128 | #替换
 129 | s.replace(old, new [, max])             #把将字符串中的 old 替换成 new,如果 max 指定，则替换不超过 max 次。
 130 | s.translate(table, deletechars="")      #根据 str 给出的表(包含 256 个字符)转换 string 的字符, 要过滤掉的字符放到 deletechars 参数中
 131 | 
 132 | #删减
 133 | s.strip()                                  #去两边空格或指定字符
 134 | s.lstrip()                                 #截掉字符串左边的空格或指定字符
 135 | s.rstrip()                                 #去右边空格截掉字符串左边的空格或指定字符
 136 | s.split(str="", num=string.count(str))     #以 str 为分隔符截取字符串，如果 num 有指定值，则仅截取 num+1 个子字符串. 切位列表
 137 | s.splitlines([keepends])                   #按照行('\r', '\r\n', \n')分隔，返回一个包含各行作为元素的列表，如果参数 keepends 为 False，不包含换行符，如果为 True，则保留换行符。
 138 | 
 139 | #布尔判断
 140 | s.startswith()                    #判斷是否以指定字符开头
 141 | text.endswith()                   #判斷是否以指定字符结尾
 142 | text.isalpha()                    #判斷是否是否全字母
 143 | text.islower()                    #判斷是否全小写
 144 | text.isupper()                    #判斷是否全大写
 145 | text.istitle()                    #判斷首字母是否为大写
 146 | text.isspace()                    #判斷字符是否全为空格
 147 | 
 148 | #连接
 149 | seperator = "-"                   #分隔符，可以為空:
 150 | sequence = ['what','a','good','day','！']    #要連結的元素: str, list, tuple, dict 都可以
 151 | print (seperator.join(sequence))  #返回一個以分隔符號sep連接多個元素的字符串
 152 | ```
 153 | 
 154 | ## 条件语句
 155 | 
 156 | ```python
 157 | #if
 158 | #if - else
 159 | #if - elif - else
 160 | if x > 5:
 161 |     print('x is greater than 5')
 162 | elif x < 5:
 163 |     print('x is less than 5')
 164 | else:
 165 |     print('x equals 5')
 166 |     
 167 | # and/or 
 168 | if (x > 3) and (x < 10):
 169 |     print('x is between 3 and 10')
 170 | else:
 171 |     print('x is either less than or equal to 3 or greater than or equal to 10')
 172 | 
 173 | # try/except 如果代码可运行，则跳过except，反之亦然
 174 | xxx = '123'
 175 | try:
 176 |     yyy = int(xxx)
 177 |     print('try works')
 178 | except:
 179 |     yyy = -1
 180 |     print('except works')
 181 | print(yyy)
 182 | 
 183 | #while loop 条件循环
 184 | n = 5
 185 | while n > 0:
 186 |     print(n)
 187 |     n = n - 1
 188 | print('finished!')
 189 | 
 190 | #continue break
 191 | while True:
 192 |     ch = input('please enter anything: ')
 193 |     if ch == 'no print':
 194 |         continue                            #继续执行，指导输入正确才能跳出
 195 |     if ch == 'done':
 196 |         break                               #跳出循环
 197 |     print(ch)
 198 | print('Done!')
 199 | 
 200 | #for loop 依次输出目标数据中的内容
 201 | li = [1, 2, 5, 8]
 202 | for i in li:
 203 |     print(i)
 204 |     
 205 | #for zip 同时循环XY
 206 | for x,y in zip(X,Y)：
 207 | 	
 208 | ```
 209 | 
 210 | 
 211 | 
 212 | ## 时间
 213 | 
 214 | ```python
 215 | #时间
 216 | import datetime 
 217 | datetime.date.today() 
 218 | #datetime.date(2021, 12, 15)
 219 | 
 220 | datetime.date.today().day                      #输出日
 221 | datetime.date.today().month                    #输出月
 222 | datetime.date.today().year                     #输出年
 223 | 
 224 | datetime.date.isoformat(datetime.date.today())
 225 | #'2021-12-15'
 226 | 
 227 | time.strftime("%a:%H:%M")  #time.strftime()函数:返回以可读字符串表示的当地时间
 228 | #'Wed:11:06'
 229 | 
 230 | #多时间序列
 231 | pd.date_range('1/1/2000', periods=7)
 232 | 
 233 | #时间序列resample汇总
 234 | rng = pd.date_range('1/1/2012', periods=100, freq='S')
 235 | ts = pd.Series(np.random.randint(0, 500, len(rng)), index=rng)
 236 | s3=ts.resample('3s').sum()                      #三秒建立一个集
 237 | 
 238 | #加时区
 239 | ts_utc = ts.tz_localize('UTC')                 #设置为UTC时间
 240 | ts_utc.tz_convert('US/Eastern')                #转变为US/Eastern
 241 | 
 242 | #建立月数据
 243 | rng = pd.date_range('1/1/2012', periods=5, freq='M')
 244 | ps = ts.to_period()                            #只保留月份
 245 | ps.to_timestamp()                              #设置为每月第一天
 246 | 
 247 | #建立季度数据
 248 | prng = pd.period_range('1990Q1', '2000Q4', freq='Q')
 249 | 
 250 | #添加时间数据 由年季度--年月日时（季度）
 251 | ts.index = (prng.asfreq('M', 'e') + 1).asfreq('H', 's') + 9
 252 | ```
 253 | 
 254 | ## 列表
 255 | 
 256 | ```python
 257 | #创建列表
 258 | s1=[]                                          #創建了一個空列表
 259 | s2=[32.0,212.0,0.0,81.8,100.0,45.3]            #創建了一個浮點數列表
 260 | s3=['toyota','rav4',2.2,]                      #創建了一個含不同類型數據的列表
 261 | list_of_list=[temps, car_details]              #創建了一個所含對象為列表的列表       
 262 | list('asda')                                   #字符串转成list
 263 | 
 264 | #元素
 265 | len(list) #列表元素个数
 266 | max(list) #返回列表元素最大值
 267 | min(list) #返回列表元素最小值
 268 | list(seq) #将元组转换为列表
 269 | list.count(obj) #统计某个元素在列表中出现的次数
 270 | 
 271 | #编辑列表
 272 | found.append('a')                              #添加单个
 273 | nums.remove(2)                                 #移除第二个
 274 | nums.pop(2)								       #移除倒数第二个
 275 | nums.extend([3,4])                             #添加多个
 276 | nums.insert(2,'hello')                         #在第二个位置插入
 277 | nums.reverse()                                 #顺序反转
 278 | third=first.copy()                             #复制
 279 | letter[start:end:step]                         #列表切片
 280 | list.clear()                                   #清空列表
 281 | list.sort( key=None, reverse=False)            #对原列表进行排序
 282 | ```
 283 | 
 284 | | 列表运算表达式                        | 结果                         | 描述                 |
 285 | | ------------------------------------- | ---------------------------- | -------------------- |
 286 | | len([1, 2, 3])                        | 3                            | 长度                 |
 287 | | [1, 2, 3] + [4, 5, 6]                 | [1, 2, 3, 4, 5, 6]           | 组合                 |
 288 | | ['Hi!'] * 4                           | ['Hi!', 'Hi!', 'Hi!', 'Hi!'] | 重复                 |
 289 | | 3 in [1, 2, 3]                        | True                         | 元素是否存在于列表中 |
 290 | | for x in [1, 2, 3]: print(x, end=" ") | 1 2 3                        | 迭代                 |
 291 | 
 292 | ## 元组
 293 | 
 294 | 不可修改性
 295 | 
 296 | ```python
 297 | #创建元组
 298 | vowels2=('a','e','i','o','u')                  #建立元组
 299 | T = tuple([1,2,3,4])                           #list转成tuple
 300 | tuple('spam')                                  #字符串转成tuple
 301 | 
 302 | #元组查找
 303 | list/tuple.index(x, start, end) 
 304 | T = (1, 3, 2, 4, 2, 5, 2)
 305 | T.index(4), T.index(2,3), T.count(2), T.index(2, T.count(2))
 306 | #(3, 4, 3, 4)
 307 | ```
 308 | 
 309 | ## 集合
 310 | 
 311 | ```python
 312 | #创建集合
 313 | vowels={"a",'e','e','i','o','u','u'}            #建立集合
 314 | set([1,2,3,4,5,2,3])  					      #list转set
 315 | vowels2=set("aeeiou")					      #字符串转set
 316 | 
 317 | #增加删除元素
 318 | set.add(obj)
 319 | set.update(obj)                                 #添加元素，且参数可以是列表，元组，字典等
 320 | set.remove(item)                                #移除指定元素
 321 | set.remove( obj )                               #如果元素不存在，则会发生错误
 322 | set.discard( obj )
 323 | set.pop()                                       #随机删除集合中的一个元素
 324 | 
 325 | #并集
 326 | u=v1.union(set(v2))
 327 | set.symmetric_difference(set)                   #返回两个集合中不重复的元素集合
 328 | 
 329 | #差集
 330 | d=v1.difference(set(v2))
 331 | set.symmetric_difference_update(set)            #移除当前集合中在另外一个指定集合相同的元素，并将另外一个指定集合中不同的元素插入到当前集合中。
 332 | 
 333 | #交集
 334 | i=v1.intersection(set(v2))
 335 | set.intersection(set1, set2 ... etc)           #返回多个集合的交集，返回一个新的集合
 336 | set.intersection_update(set1, set2 ... etc)    #返回多个集合的交集，移除不重叠的元素
 337 | 
 338 | #布尔判别
 339 | set.issubset(set)                             #判断指定集合是否为该方法参数集合的子集
 340 | set.issuperset(set)                           #判断该方法的参数集合是否为指定集合的子集,倒过来
 341 | set.isdisjoint(set)                           #判断两个集合是否包含相同的元素
 342 | ```
 343 | 
 344 | ## 字典
 345 | 
 346 | ```python
 347 | #建立字典
 348 | Dict1={}                                         #建立空词典
 349 | Dict2={'name':'Allen','age':21,'gender':'male'}  #建立词典
 350 | Dict3['gender']='male'                           #赋值法/增加新值
 351 | dict(name='Allen', age=21, gender='male')        #dict内建函数
 352 | dict([('name','Allen'),('age',21),('gender','male')])       #键值对列表（k，v）
 353 | 
 354 | #属性
 355 | person3['Name']								  #获取对应值
 356 | person3.keys()                                   #获取键
 357 | person3.values()                                 #获取对应键
 358 | person3.items()                                  #获取对应键值对
 359 | 
 360 | #编辑
 361 | del person3['age']                               #删除
 362 | D.pop("ham")                                     #删除
 363 | D.update(D2)                                     #将新值更新到旧字典中
 364 | 
 365 | #元素查找
 366 | dict.get(key, default=None)                      #返回指定键的值，如果值不在字典中返回default值
 367 | dict.has_key(key)                                #如果键在字典dict里返回true，否则返回false
 368 | 
 369 | #检查成员关系
 370 | vowels=['a','e','i','o','u']
 371 | word=input("Provide a word to search for vowels:")
 372 | found={}
 373 | for letter in word:
 374 |     if letter in vowels:
 375 |         found.setdefault(letter,0)               #初始化
 376 |         found[letter] +=1
 377 | print(found)
 378 | 
 379 | #遍历取值
 380 | for k, v in sorted(found.items()):               #前一个取Keys；后一个取Values
 381 |     print(k, 'was found', v, 'time(s).')
 382 | 
 383 | #合并数据集
 384 | import pprint                                    # 优化数据结构的形式
 385 | pprint.pprint(people)
 386 | 
 387 | #读取数据
 388 | people['003']                                    #单层
 389 | people['003']['gender']                          #多层
 390 | ```
 391 | 
 392 | ## 函数定义
 393 | 
 394 | ```python
 395 | #无参数函数，需要输入
 396 | def search4vowels():                            
 397 |     vowels=set('aeiou')
 398 |     word=input('Provide a word to search for vowels:')
 399 |     found=vowels.intersection(set(word))
 400 |     for vowel in found:
 401 |         print(vowel)
 402 |         
 403 | #参数函数
 404 | def search4vowels(word):
 405 |     vowels=set('aeiou')
 406 |     found=vowels.intersection(set(word))
 407 |     for vowel in found:
 408 |         print(vowel)
 409 |         
 410 | #多参数函数
 411 | def F1 ( x, y ):                                #計算並返回x 除以 y 的余数与商的函数
 412 |     a = x % y
 413 |     b = (x-a) / y
 414 |     return  (a,b)                               # 也可以写作 return a, b
 415 | (c,d) = F1(9, 4)                                # 也可以写作 c ,d = F1 ( 9, 4 )
 416 | print (c,d)
 417 | 
 418 | #进阶函数
 419 | def search4letters(phrase:str, letters:str)->set:   
 420 |    return set(phrase).intersection(set(letters))
 421 | 
 422 | #可变参数
 423 | def sum_my(*n):                                 #输入时由([1, 2, 3])简化为(1, 2, 3)
 424 |     d = 0
 425 |     for n in n:
 426 |         d = d + n 
 427 |     return d
 428 | 
 429 | #函数默认
 430 | def number(a, b=0, *c, **d):                    #a必须定义，b默认为0，c可多定义，d为小字典，可额外补充。
 431 |     
 432 | #Global Variables & Local Variables
 433 | #--local variables are inside the function
 434 | #--global variables are in the main program
 435 | ```
 436 | 
 437 | ## 文件读写
 438 | 
 439 | ```python
 440 | #写入文件
 441 | todos=open('todos.txt','w')                     #清空原来的文件，写一个新文件
 442 | todos.writelines(["put out the trash.","Feed the cat.", "Prepare tax return."]) #写入数据, 不會自動增加换行符（\n）。
 443 | todos.close()  
 444 | 
 445 | 
 446 | #读取文件
 447 | #read
 448 | tasks=open('todos.txt')                         #打开一个文件，返回文件对象tasks
 449 | tasks_content=tasks.read()
 450 | print(type(tasks_content))
 451 | print(tasks_content)
 452 | tasks.close()
 453 | #readline
 454 | tasks=open('todos.txt')     
 455 | tasks_line=tasks.readline()
 456 | print(type(tasks_line))
 457 | print(tasks_line)
 458 | tasks.close()
 459 | #readlines
 460 | tasks=open('todos2.txt')     
 461 | tasks_lines=tasks.readlines()
 462 | print(tasks_lines)
 463 | tasks.close()
 464 | #一站式读取
 465 | with open('todos.txt') as tasks:
 466 |     tasks_content=tasks.read()
 467 |     print(tasks_content)
 468 |     
 469 | #多文件对比是否一直 输出True/False
 470 | import filecmp
 471 | r = filecmp.cmp('lecture3-cmp1.txt', 'lecture3-cmp2.txt')
 472 | print(r)
 473 | ```
 474 | 
 475 | ## CSV文件读写
 476 | 
 477 | ```python
 478 | #路径设置
 479 | import os 
 480 | #os库是Python标准库，提供通用的、基本的操作系统交互功能，包含几百个函数，常用的有路径操作、进程管理、环境参数等。
 481 | os.getcwd() 
 482 | os.chdir("") 
 483 | 
 484 | #导出数据
 485 | df.to_csv('foo.csv')                  #将df导入到foo.csv为csv格式
 486 | df.to_hdf('foo.h5','df')              #将df导入foo.h5为hdf格式
 487 | df.to_excel('foo.xlsx', sheet_name='Sheet1')      #将df导入foo.xlsx为xlsx格式 
 488 | 
 489 | 
 490 | #读取CSV文件 
 491 | #pd.read_csv(filepath_or_buffer, sep=',',header='infer',names=None,index_col=None, encoding=None，skiprows=[0, 2, 3]，nrows=5)
 492 | data = pd.read_csv("7.1.csv")
 493 | data = pd.read_table('ex1.csv', sep=',') 
 494 | !type ex2.csv
 495 | 
 496 | #读取Excel文件  
 497 | #pd.read_excel(io, sheetname=0, header=0, index_col=None, names=None)
 498 | data = pd.read_excel("ex1.xlsx",sheet_name = 0)
 499 | 
 500 | #读取JSON
 501 | data = pd.read_json('example.json')
 502 | 
 503 | #写入数据
 504 | data.to_csv('out.csv'，sep='|'， index=False, header=False，columns=['a', 'b', 'c'])#默认分割逗号，是否田添加行列，自行添加列标
 505 | ```
 506 | 
 507 | ## NumPy
 508 | 
 509 | ```python
 510 | #建立array
 511 | arr = np.array([0, 1, 2, 3, 4, 5, 6, 7])
 512 | 
 513 | #特殊数组
 514 | np.zeros((2,5))                #零数组
 515 | np.ones((6,7))                 #1数组
 516 | np.empty((3,3))                #空数组
 517 | np.arange(1,100,5)             #序列数组
 518 | np.linspace(1,100,5)           #等差序列数组
 519 | np.full((3,4), 1.0)            #size为（3，4）全为1.0的数
 520 | 
 521 | #等差数量
 522 | np.linspace(-1.4, 1.4, 30)     #（min，max，bin）
 523 | 
 524 | #正态分布
 525 | x = np.
 526 | random.randn(8)
 527 | #array([-1.10095156, -0.68793958, -0.44880688,  0.15520171,  0.85216375,-0.55215269, -0.90874604,  0.98619793])
 528 | 
 529 | #生成8个0，1的数
 530 | x = np.random.rand(8)
 531 | #array([0.63663356, 0.74770339, 0.94942448, 0.25861892, 0.13224901,0.34169378, 0.19222976, 0.57935704])
 532 | 
 533 | #生成一个随机数
 534 | x = np.random.randint(8)
 535 | #5
 536 | 
 537 | #生成array，限制区间
 538 | x = np.random.randint(low=0,high=10,size=(2,2))
 539 | #array([[8, 0],[4, 9]])
 540 | 
 541 | #choice  随机选择一个数
 542 | a = ['a','b']
 543 | choice(a)
 544 | 
 545 | 
 546 | #取两个array中大的数
 547 | np.maximum(x, y)
 548 | #array([0.73945297, 0.99010878, 0.10009918, 0.84977493, 0.43784594,0.04473167, 1.056885  , 0.85558624])
 549 | 
 550 | #分离数的整数和小数部分
 551 | remainder, whole_part = np.modf(arr)
 552 | remainder
 553 | #array([ 0.32326675, -0.77144607, -0.15537755, -0.70869285,  0.9733596 ,-0.64643437, -0.50897606])
 554 | whole_part
 555 | #array([ 6., -1., -4., -2.,  6., -4., -7])
 556 | 
 557 | #累加
 558 | arr.cumsum(axis=0)                               # 沿着列方向累加
 559 | 
 560 | #累乘
 561 | arr.cumprod(axis=1)                              # 沿着行方向累乘
 562 | 
 563 | #排序
 564 | arr.sort(axis=)
 565 | df1.sort_values(by="Name")
 566 | 
 567 | #set
 568 | arr.unique()
 569 | 
 570 | #测试values里的数值是否在[2,3,6]内
 571 | np.in1d(values, [2, 3, 6])
 572 | 
 573 | #多维数据变为一维
 574 | g.ravel()
 575 | 
 576 | #a，b大小比较
 577 | np.greater(a, b)
 578 | 
 579 | #a,b取大值
 580 | np.maximum(a, b)
 581 | 
 582 | #数据结构转变
 583 | b.reshape(4,2,6)                                  #4组，2行，6列
 584 | t.transpose((1,2,0))                              # (depth→width, height→depth, width→height)
 585 | ```
 586 | 
 587 | ### Array矩阵运算
 588 | 
 589 | ```python
 590 | #矩阵点乘
 591 | a*b 
 592 | 
 593 | #矩阵点积
 594 | np.vdot(a,b)
 595 | 
 596 | #逆矩阵
 597 | b = np.linalg.inv(a)
 598 | 
 599 | #矩阵乘法
 600 | np.dot(a,b)
 601 | 
 602 | #矩阵标量
 603 | np.linalg.det(a)
 604 | 
 605 | #矩阵求解方程
 606 | np.linalg.solve(A,B)
 607 | 
 608 | #矩阵二维分解
 609 | q, r = linalg.qr(m3)
 610 | ```
 611 | 
 612 | ## Pandas
 613 | 
 614 | ### Series
 615 | 
 616 | ```python
 617 | #输入列index和元素values
 618 | s2=Series([5,'Research Methodology','Python'],index=['coursecode', 'course name', 'software'])
 619 | 
 620 | #判断缺失值
 621 | s3.isnull()
 622 | s3.notnull()
 623 | 
 624 | #布尔
 625 | s3.isnull().any()
 626 | s3.isnull().all()
 627 | s3.isnull().sum()
 628 | 
 629 | #元素查看
 630 | s5.index
 631 | s5.values
 632 | s5.detype
 633 | s5.size
 634 | s5.items
 635 | s[True,True,False]
 636 | 
 637 | #索引
 638 | s['a']
 639 | s['a'：'c']
 640 | s[['a','c']]
 641 | s[[True,True,False]]
 642 | a = np.array([1, 5, 3, 19, 13, 7, 3])
 643 | a[2::2]                                   #从第2个开始依次2跳跃
 644 | a[::-1]                                   #从后往前
 645 | 
 646 | #loc
 647 | s.loc['a']
 648 | s.loc['a'：'c']
 649 | s.loc[['a','c']]
 650 | s.loc[[True,True,False]]
 651 | 
 652 | #iloc
 653 | s.iloc[0] 
 654 | s.iloc[0:2]
 655 | s.iloc[(0,2)] 
 656 | s.iloc[[True,True,False]]
 657 | 
 658 | #布尔索引
 659 | s[s <= 10]                                # 选取所有 value 小于 10 的 s
 660 | s[s.between(10, 20)]                      # 选取所有 value 大于等于 10，小于 等于20 的 s
 661 | s[s.isin([10, 30])]                       # 选取所有 value 等于 10 或 30 的 s
 662 | #等效的三种表达方法
 663 | s.iloc[(s>10).values]
 664 | s.loc[(s.values>10)]
 665 | s.loc[(s>10).values]
 666 | #查看空缺行
 667 | df1[df1['score'].isnull()== True]
 668 | 
 669 | #替换
 670 | s1.replace(-999, np.nan)                  #单对象替换
 671 | s1.replace([-999, -1000], np.nan)         #多对象替换
 672 | s0.replace(to_replace=[12,23], value=[10, 20], inplace=False)        #多对象指定替换
 673 | s1.replace({-999: np.nan, -1000: 0})
 674 | 
 675 | #删除
 676 | del s0['d']
 677 | #df6.drop(index=df6.loc['D':'f'].index,columns=None, axis=0, inplace=True)
 678 | s0.drop(['d', 'c'])
 679 | 
 680 | #排序
 681 | s7.sort_index()
 682 | ```
 683 | 
 684 | ### DataFrame
 685 | 
 686 | ```python
 687 | #建立DataFrame
 688 | d = np.random.rand(5,3)                               #2D ndarray     
 689 | indexd = ['2001', '2002', '2003', '2004', '2005']
 690 | columnd = ['CN', 'US', 'UK']
 691 | df0 = pd.DataFrame(d, index=indexd, columns=columnd)
 692 | 
 693 | a = [[1,np.nan,2],[NA,4,None]]                        #array法
 694 | data = pd.DataFrame(a)
 695 | 
 696 | #新行列增加
 697 | funddata['rank'] = funddata[c]*100
 698 | funddata[c+' rank'] = funddata[c].rank(ascending = False)
 699 | 
 700 | #查看属性
 701 | df0.index
 702 | df0.columns
 703 | df0.shape
 704 | df0.size
 705 | 
 706 | #文件合并
 707 | df1+df2
 708 | pd.concat([df4, df4_2], axis=0)                       #axis： 需要合并链接的轴，0是行，1是列
 709 | #Series与DataFrame相加
 710 | df4+df4.iloc[0]
 711 | 
 712 | #文件相减
 713 | #DataFrame.sub(other, axis='columns', level=None, fill_value=None) 获取DataFrame和其他元素的减法
 714 | df4.sub(df4.iloc[:,0],axis=0)
 715 | 
 716 | #Dataframe读取
 717 | pd.options.display.max_rows = 10
 718 | 
 719 | #行列重新命名
 720 | df4.rename(index=str.title, columns=str.upper)
 721 | 
 722 | #数据标签
 723 | df = pd.DataFrame({"id":[1,2,3,4,5,6], "raw_grade":['a', 'b', 'b', 'a', 'a', 'e']})  #源文件
 724 | df['grade'] = df['raw_grade'].astype("category")      #转类型为分类
 725 | df["grade"].cat.categories = ["very good", "good", "very bad"]       #添加分类（一一对应）
 726 | df["grade"] = df["grade"].cat.set_categories(["very bad", "bad", "medium", "good", "very good"])                                         #当出现一对多时，则按照自身性质顺序对应
 727 | df.groupby("grade").size()                            #对每一类进行统计
 728 | 
 729 | data1['alpha'].idxmax()          #最大值的位置
 730 | data1.loc[data1['alpha'].idxmax()]     #最大值所在位置的行
 731 | ```
 732 | 
 733 | ## 数据处理
 734 | 
 735 | ```python
 736 | #缺失值处理
 737 | #查看缺失值
 738 | df1[df1['score'].isnull()== True]                    #某列有
 739 | data[data.isnull().values== True]                    #但凡有
 740 | 
 741 | #Series的缺失值过滤
 742 | s[s.notnull()]
 743 | s.dropna() 
 744 | df1.dropna(axis=1)
 745 | df1.dropna(inplace=True) 
 746 | df2.dropna(how="all")                                #当整一行都是na时删除
 747 | 
 748 | #过滤特定列的缺失值
 749 | housing.dropna(subset=["total_bedrooms"])
 750 | 
 751 | #保留NA
 752 | df2.dropna(thresh=2)                                 #传入thresh=n保留至少有n个非NaN数据的行
 753 | 
 754 | #把部分值转成缺失值
 755 | result = pd.read_csv('ex5.csv', na_values=['one'])
 756 | df.replace(0,np.nan)
 757 | 
 758 | #把缺失值赋值
 759 | df3.fillna(0)  
 760 | df3.fillna({1: 0.5, 2: 0.2})                         #不同的列使用不同的缺失值
 761 | df4.fillna(method='ffill')                           #前向填充，即使用上一行的值进行填充
 762 | df4.fillna(method='ffill',axis=1)                    #列进行填充；前向填充，即使用左边一列的值进行填充
 763 | df4.fillna(method='ffill', limit=2)                  #limit : int, 默认值 None； 指定连续的前向/后向填充的最大数量。
 764 | df4.fillna(method="bfill")                           #后向填充，使用下一行的值,不存在的时候就不填充
 765 | df5.fillna(df5.mean())                               #平均值填补
 766 | df5.fillna(df5.median(axis=1))                       #中位数（行中位数）填补
 767 | 
 768 | #检测重复值
 769 | df1.duplicated()
 770 | 
 771 | #删除重复值
 772 | df1.drop_duplicates(['k1']) 
 773 | df1.drop_duplicates(['k1', 'k2'], keep='last')        #保留最后一个值
 774 | 
 775 | #数据映射map
 776 | #map()函数会对一个序列对象中的每一个元素应用被传入的函数，并且返回一个包含了所有函数调用结果的一个列表。
 777 | df2['animal'] = lowercasedF.map(meat_to_animal)       #LowercasedF为映射的x，meat_to_animal为映射标准;df2为目标映射区
 778 | 
 779 | #匿名函数
 780 | a = map(lambda x: x ** 2, [1, 2, 3, 4, 5, 6, 7, 8, 9])
 781 | df2['food'].map(lambda x: meat_to_animal[x.lower()])  #返回字典data中food列作為meat_to_anmiaml字典中的鍵值所被映射到的value.
 782 | 
 783 | #离散&箱化
 784 | ages = [20, 22, 25, 27, 21, 23, 37, 31, 61, 45, 41, 32]
 785 | bins = [18, 25, 35, 60, 100]                          #通过给定端点，定义分组的区间
 786 | cats = pd.cut(ages, bins)
 787 | pd.cut(ages, [18, 26, 36, 61, 100], right=False)
 788 | group_names = ['Youth', 'YoungAdult', 'MiddleAged', 'Senior']
 789 | pd.cut(ages, bins, labels=group_names)
 790 | pd.cut(data, 4, precision=2)
 791 | 
 792 | #属性
 793 | cats.categories                                       #輸出category的label
 794 | cats.codes                                            #輸出数据归于第幾個category
 795 | pd.value_counts(cats)                                 #輸出每一類別的頻數分佈表
 796 | 
 797 | #检测过滤异常值
 798 | df6.describe()
 799 | col = df6[2]#标准差
 800 | df6[np.abs(col) > 3]     
 801 | df6[(np.abs(df6) > 3).any(1)]
 802 | funddata[(funddata['growth_overall_income_growth_ratio rank'] < int(len(indexstock))*0.3) & (funddata['growth_np_atsopc_dnrgal_yoy rank'] < int(len(indexstock))*0.3)]                             #过滤体哦阿健
 803 | 
 804 | #随机抽样
 805 | sample1 = np.random.permutation(5) 
 806 | df7.take(sample1,axis=0)
 807 | 
 808 | #抽样个数
 809 | df7.sample(n=3)
 810 | 
 811 | #字符串分割
 812 | val.split(',')
 813 | pieces = [x.strip() for x in val.split(',')]          #先分割再移除字符串头尾的空格
 814 | 
 815 | #字符串合并
 816 | '，'.join(pieces)
 817 | 
 818 | #np array合并
 819 | np.vstack((q1, q2, q3))                               #纵向合并
 820 | np.hstack((q1, q3))                                   #横向合并
 821 | np.concatenate((q1, q2, q3), axis=0)                  #调整axis实现合并
 822 | merge(left,right)
 823 | np.stack((q1, q3))                                    #创建新的axis
 824 | 
 825 | #np array拆分
 826 | np.vsplit(r, 3)                                       #横向拆分(目标数据，分成几组)
 827 | np.hsplit(r, 2)                                       #纵向拆分(目标数据，分成几组)
 828 | 
 829 | #字符串查找                      
 830 | print('a' in info)                                    #返回True False
 831 | print(info.find('a'))                                 #返回位置，若没有返回-1
 832 | print(info.index('a'))                                #返回位置，若没有报错
 833 | print(info.rfind('a'))                                #从末尾开始找，同上
 834 | print(info.rfind('a'))                                #从末尾开始找，同上  
 835 | 
 836 | #字符串统计
 837 | info.count('a')
 838 | 
 839 | #字符串替换
 840 | #replace()： 把字符串中的 old（旧字符串） 替换成 new(新字符串)，如果指定第三个参数max，则替换不超过 max 次。
 841 | str.replace("is", "was", 2)
 842 | 
 843 | #双中括号得出为dataframe不是series
 844 | housing_ocean_proximity = housing[["ocean_proximity"]] # one bracket vs two brackets
 845 | ```
 846 | 
 847 | 
 848 | ## JSON DATA
 849 | 
 850 | ```python
 851 | #解码
 852 | result = json.loads(obj)   
 853 | 
 854 | #编译
 855 | asjson = json.dumps(result)
 856 | 
 857 | #输出
 858 | data.to_json()
 859 | data.to_json(orient='records')                       #输出的数据结构是 列:变量名->值 的形式
 860 | 
 861 | #写文件
 862 | with open("outputjson.json","w") as f:
 863 |     json.dump(data.to_json(), f)                     #将数据转换成json 格式，再使用 json.dump() 输出文件
 864 | !type outputjson.json
 865 | 
 866 | # 读文件
 867 | with open('outputjson.json', 'r') as f:
 868 |     data = json.load(f)                              #使用json.load() 读取JSON数据
 869 | print(data)
 870 | type(data)
 871 | ```
 872 | 
 873 | ## 虚拟变量
 874 | 
 875 | ```python
 876 | #生成虚拟变量
 877 | pd.get_dummies(df8['Gender'])
 878 | pd.get_dummies(df8['Gender'], prefix='Gender')         #虚拟变量加前缀
 879 | df8_with_dummy = df8[['Age']].join(dummies)
 880 | ```
 881 | 
 882 | 
 883 | 
 884 | ## 正则表达式
 885 | 
 886 | | 正则表达式 | 匹配对象              | 正则表达式                              | 匹配对象                                       |
 887 | | ---------- | --------------------- | --------------------------------------- | ---------------------------------------------- |
 888 | | '\d'       | 数字                  | [0-9a-zA-Z_]                            | 匹配一个数字、字母或者下划线                   |
 889 | | '\w'       | 字母                  | [0-9a-zA-Z_]+                           | 匹配至少由一个数字、字母或者下划线组成的字符串 |
 890 | | '\s'       | 空格                  | A\|B                                    | 匹配A或B                                       |
 891 | | .          | 任意字符              | ^                                       | ^\d表示行必须以数字开头                        |
 892 | | *          | 任意个字符（包括0个） | $             | \d$表示行必须以数字结束 |                                                |
 893 | | +          | 至少一个字符          |                                         |                                                |
 894 | | ？         | 0个或1个字符          |                                         |                                                |
 895 | | {n}        | n个字符               |                                         |                                                |
 896 | | {n,m}      | n-m个字符             |                                         |                                                |
 897 | 
 898 | ```python
 899 | import re
 900 | 
 901 | #用re.compile编译，节省CPU
 902 | regex = re.compile('\s+')
 903 | regex = re.compile(r'A\d+', flags=re.IGNORECASE)       #flag是匹配标志位，是否区分大小写，是否多行匹配
 904 | regex.split(text)
 905 | regex.findall(text)
 906 | 
 907 | #查找
 908 | re.search(pattern,str1)                                #扫描整个字符串并返回第一个成功的匹配
 909 | re.findall(pattern,str1)                               #返回string中所有与pattern相匹配的全部字串，返回形式为数组
 910 | re.finditer(pattern,str1)                              #返回string中所有与pattern相匹配的全部字串，返回形式为迭代器
 911 | 
 912 | #案例
 913 | text = """Dave dave@google.com 
 914 |           Steve steve@gmail.com 
 915 |           Rob rob@gmail.com 
 916 |           Ryan ryan@yahoo.com """
 917 | pattern = r'[A-Z0-9._%+-]+@[A-Z0-9.-]+\.[A-Z]{2,4}'
 918 | regex = re.compile(pattern, flags=re.IGNORECASE)
 919 | m = regex.findall(text)
 920 | print(m)
 921 | 
 922 | ```
 923 | 
 924 | 
 925 | 
 926 | ## Matplotilb
 927 | 
 928 | - 颜色字符：'b' 蓝色，'m' 洋红色，'g' 绿色，'y' 黄色，'r' 红色，'k' 黑色，'w' 白色，'c' 青绿色，'#008000' RGB 颜色符串。多条曲线不指定颜色时，会自动选择不同颜色。
 929 | - 线型参数：'‐' 实线，'‐‐' 破折线，'‐.' 点划线，':' 虚线。
 930 | - 标记字符：'.' 点标记，',' 像素标记(极小点)，'o' 实心圈标记，'v' 倒三角标记，'^' 上三角标记，'>' 右三角标记，'<' 左三角标记...等等。
 931 | 
 932 | ```python
 933 | import matplotlib.pyplot as plt
 934 | 
 935 | #创建画板
 936 | fig1 = plt.figure(1)                        #创建第一个画板
 937 | figure = plt.figure(figsize=(15,6))         #画板尺寸
 938 | ax = fig1.add_axes([0,0,1,1])               #在画板上添加一个轴域,即坐标系。0,0表示轴域的坐标点，1,1表示轴域的宽和高，这个是与画板的比例值，这里该轴域与画板一样大。
 939 | ax1 = fig1.add_subplot(2, 2, 1)             #figure是2x2（这样一共有4幅子图），而且我们选中4个subplots（数字从1到4）中的第2个。
 940 | 
 941 | #创建一个pyplot
 942 | xpoints = np.array([0, 6])
 943 | ypoints = np.array([0, 100])
 944 | plt.plot(xpoints, ypoints，'b')              #b蓝色# 创建 y 中数据与 x 中对应值的二维线图，使用默认样式
 945 | plt.show()                                   #plt.show() 默认是在新窗口打开一幅图像
 946 | 
 947 | #属性
 948 | plt.xlabel("x - label")                      #横坐标
 949 | plt.ylabel("y - label")                      #纵坐标
 950 | plt.axis([-1,5,-1,20])                       #坐标轴范围plt.axis([xmin, xmax, ymin, ymax])
 951 | plt.suptitle("RUNOOB subplot Test")          #子标题
 952 | plt.title("RUNOOB TEST TITLE")               #标题
 953 | plt.legend(['First series'])                 #图示
 954 | plt.grid(True)                               #网格线
 955 | plt.show()
 956 | 
 957 | #subplot
 958 | x = np.linspace(-1.4, 1.4, 30)
 959 | plt.subplot(2, 2, 1)  # 2 rows, 2 columns, 1st subplot = top left
 960 | plt.plot(x, x)
 961 | plt.subplot(2, 2, 2)  # 2 rows, 2 columns, 2nd subplot = top right
 962 | plt.plot(x, x**2)
 963 | plt.subplot(2, 2, 3)  # 2 rows, 2 columns, 3rd subplot = bottow left
 964 | plt.plot(x, x**3)
 965 | plt.subplot(2, 2, 4)  # 2 rows, 2 columns, 4th subplot = bottom right
 966 | plt.plot(x, x**4)
 967 | plt.show()
 968 | 
 969 | #subplot子图位置组合
 970 | plt.subplot(2, 2, 1)  # 2 rows, 2 columns, 1st subplot = top left
 971 | plt.plot(x, x)
 972 | plt.subplot(2, 2, 2)  # 2 rows, 2 columns, 2nd subplot = top right
 973 | plt.plot(x, x**2)
 974 | plt.subplot(2, 1, 2)  # 2 rows, *1* column, 2nd subplot = bottom
 975 | plt.plot(x, x**3)
 976 | plt.show()
 977 | 
 978 | #用subplot2grid来更复杂的确定图表位置（左上角开始为（0，0）
 979 | plt.subplot2grid((3,3), (0, 0), rowspan=2, colspan=2)
 980 | plt.plot(x, x**2)
 981 | plt.subplot2grid((3,3), (0, 2))
 982 | plt.plot(x, x**3)
 983 | plt.subplot2grid((3,3), (1, 2), rowspan=2)
 984 | plt.plot(x, x**4)
 985 | plt.subplot2grid((3,3), (2, 0), colspan=2)
 986 | plt.plot(x, x**5)
 987 | plt.show()
 988 | ```
 989 | 
 990 | ### xy轴设置
 991 | 
 992 | ```python
 993 | axes.set_xticks([0,125,250])
 994 | axes.set_xticklabels(['2020-01-14','2020-06-11','2021-01-22'].fontsize=14)
 995 | ```
 996 | 
 997 | ### 密度图
 998 | 
 999 | ```python
1000 | #密度图
1001 | a.plot(kind = 'density', subplots = True, layout=(2,4), sharex = False , figsize = (20,10), fontsize = 15)  #kind图片的类型；subplot=两行四列；sharex是自适应坐标轴函数默认False；figsize图片的大小；
1002 | ```
1003 | 
1004 | ### 直方图
1005 | 
1006 | - data:必选参数，绘图数据
1007 | - bins:直方图的长条形数目，可选项，默认为10
1008 | - normed:是否将得到的直方图向量归一化，可选项，默认为0，代表不归一化，显示频数。normed=1，表示归一化，显示频率。
1009 | - color:长条形的颜色
1010 | - edgecolor:长条形边框的颜色
1011 | - alpha:透明度
1012 | 
1013 | ```python
1014 | matplotlib.pyplot.hist(x, bins=None, range=None, density=None, weights=None, cumulative=False, bottom=None, histtype='bar', align='mid', orientation='vertical', rwidth=None, log=False, color=None, label=None, stacked=False, normed=None, *, data=None, **kwargs)
1015 | 
1016 | #案例
1017 | data = np.random.randn(100)
1018 | plt.hist(data,bins=20,orientation='horizontal',color='black',alpha=0.3,)
1019 | ```
1020 | 
1021 | ### 散点图
1022 | 
1023 | - x：指定 X 轴数据。
1024 | - y：指定 Y 轴数据。
1025 | - s：指定散点的大小。
1026 | - c：指定散点的颜色。
1027 | - alpha：指定散点的透明度。
1028 | - linewidths：指定散点边框线的宽度。
1029 | - edgecolors：指定散点边框的颜色。
1030 | - marker：指定散点的图形样式。应参数支持'.'（点标记）、','（像素标记）、'o'（圆形标记）、'v'（向下三角形标记）、'^'（向上三角形标记）、'<'（向左三角形标记）、'>'（向右三角形标记）、'1'（向下三叉标记）、'2'（向上三叉标记）、'3'（向左三叉标记）、'4'（向右三叉标记）、's'（正方形标记）、'p'（五地形标记）、'*'（星形标记）、'h'（八边形标记）、'H'（另一种八边形标记）、'+'（加号标记）、'x'（x标记）、'D'（菱形标记）、'd'（尖菱形标记）、'|'（竖线标记）、'_'（横线标记）等值。
1031 | - map：指定散点的颜色映射，会使用不同的颜色来区分散点的值。
1032 | 
1033 | ```python
1034 | matplotlib.pyplot.scatter(x, y, s=None'散点大小', c=None'颜色', marker=None, cmap=None, norm=None, vmin=None, vmax=None, alpha=None'透明度', linewidths=None,  verts=None, edgecolors=None, *, plotnonfinite=False, data=None, **kwargs)
1035 | 
1036 | #案例
1037 | x =[5, 7, 8, 7, 2, 17, 2, 9, 4, 11, 12, 9, 6]  
1038 | y =[99, 86, 87, 88, 100, 86, 103, 87, 94, 78, 77, 85, 86] 
1039 | plt.scatter(x, y, c ="blue") 
1040 | ```
1041 | 
1042 | 
1043 | ### 热力图
1044 | 
1045 | ```python
1046 | import seaborn as sns
1047 | 
1048 | sns.heatmap(indexdf.pct_change().corr().annot = True)
1049 | ```
1050 | 
1051 | [seaborn.heatmap-Seaborn 0.9 中文文档 (cntofu.com)](https://cntofu.com/book/172/docs/30.md)
1052 | 
1053 | ### Polar环状图
1054 | 
1055 | ```python
1056 | radius = 1
1057 | theta = np.linspace(0, 2*np.pi*radius, 1000)
1058 | 
1059 | plt.subplot(111, projection='polar')
1060 | plt.plot(theta, np.sin(5*theta), "g-")
1061 | plt.plot(theta, 0.5*np.cos(20*theta), "b-")
1062 | plt.show()
1063 | ```
1064 | 
1065 | ### 3D projection
1066 | 
1067 | ```python
1068 | from mpl_toolkits.mplot3d import Axes3D
1069 | 
1070 | x = np.linspace(-5, 5, 50)
1071 | y = np.linspace(-5, 5, 50)
1072 | X, Y = np.meshgrid(x, y)
1073 | R = np.sqrt(X**2 + Y**2)
1074 | Z = np.sin(R)
1075 | 
1076 | figure = plt.figure(1, figsize = (12, 4))
1077 | subplot3d = plt.subplot(111, projection='3d')
1078 | surface = subplot3d.plot_surface(X, Y, Z, rstride=1, cstride=1,cmap=matplotlib.cm.coolwarm, linewidth=0.1)
1079 | plt.show()
1080 | ```
1081 | 
1082 | 
1083 | ```python
1084 | plt.contourf(X, Y, Z, cmap=matplotlib.cm.coolwarm)
1085 | plt.colorbar()
1086 | plt.show()
1087 | ```
1088 | 
1089 | ### 注释
1090 | 
1091 | axes.annotate(s, xy, *args, **kwargs)用于在图形上给数据添加文本注解，而且支持带箭头的划线工具，方便我们在合适的位置添加描述信息。
1092 | 
1093 | - s：注释文本的内容
1094 | - xy：被注释的坐标点，二维元组形如(x,y)
1095 | - xytext：注释文本的坐标点，也是二维元组，默认与xy相同
1096 | 
1097 | ```python
1098 | ax.test()/plot.test(x, y, string, fontsize=15, verticalalignment="top", horizontalalignment="right")
1099 | 
1100 | #案例
1101 | #箭头
1102 | ax = plt.axes() 
1103 | ax.arrow(0, 0, 0.6, 0.7, head_width = 0.05, head_length = 0.1)
1104 | ax.set_title('matplotlib.axes.Axes.arrow() Example',fontsize = 14, fontweight ='bold')   
1105 | plt.show()
1106 | 
1107 | #文本注释
1108 | ax1.annotate('Starting', xy =(3.3, 1), xytext =(3, 1.8), arrowprops = dict(facecolor ='green', shrink = 1),   ) 
1109 | 
1110 | #照片存储
1111 | plt.savefig('figpath.png', dpi=400, bbox_inches='tight')
1112 | #dpi，控制每英寸长度上的分辨率
1113 | #bbox_inches, 能删除figure周围的空白部分
1114 | 
1115 | plt.text(x,y+0.01,'{}%'.format(round(y,2)), ha ='center',va='bottom',fontsize = 15)  #x,y是注释的坐标，为了防止遮挡，抬升0.01的位置，{}内为实际内容，ha为左右居中，va为上下居下，字体15号
1116 | ```
1117 | 
1118 | 
1119 | 
1120 | 
1121 | 
1122 | ### 全局变量
1123 | 
1124 | - matplotlib很多默认的设置是可以自己定义的，通过修改一些全局设定，比如图大小，subplot间隔，颜色，字体大小，网格样式等等。
1125 | 
1126 | - pyplot可以使用rc配置文件来自定义图形的各种默认属性，被称为rc参数。rc参数k可以动态修改，在修改后，绘图使用的参数就会发生改变。例如，想要设置全局的图大小为10 x 10，键入：
1127 | 
1128 |   plt.rc('figure', figsize=(10, 10))
1129 | 
1130 |   rc中的第一个参数是我们想要自定义的组件，比如'figure', 'axes', 'xtick', 'ytick', 'grid', 'legend'，或其他。然后添加一个关键字来设定新的参数。
1131 | 
1132 | - 一个比较方便的写法是把所有的设定写成一个dict：
1133 | 
1134 |   
1135 | 
1136 |   ```python
1137 |   font_options = {'family': 'monospace','weight': 'bold','size'  : 'small'}
1138 |   ```
1139 | 
1140 |   plt.rc('font', **font_options)
1141 | 
1142 | 更详细的设定可以去看一下文档。
1143 | 
1144 | 
1145 | 
1146 | ## 统计
1147 | 
1148 | ```python
1149 | #线性回归模型拟合
1150 | import statsmodels.api as sm
1151 | import statsmodels.formula.api as smf
1152 | 
1153 | #拟合回归模型
1154 | results = smf.ols('Sales ~ Price + Advertising', data=df1).fit()       #使用statsmodels庫中 OLS对象的 fit() 方法来进行模型拟合
1155 | results.params                          #輸出计算出的回归系数
1156 | results.summary()                       #輸出回归拟合的統計摘要
1157 | df6.describe()
1158 | 
1159 | #计算预测值
1160 | x = df1[['Price', 'Advertising']]
1161 | y_pred = results.predict(x)
1162 | df1['sales_pred'] = y_pred
1163 | df1
1164 | #wls
1165 | from statsmodels.sandbox.regression.predstd import wls_prediction_std
1166 | prstd, iv_l, iv_u = wls_prediction_std(results)
1167 | df1['pred_STD'] = prstd
1168 | df1['pred_LowerLimit'] = iv_l
1169 | df1['pred_UpperLimit'] = iv_u
1170 | df1
1171 | 
1172 | #方差分析
1173 | import scipy.stats as stats             # 導入 python统计函数库scipy.stats
1174 | from statsmodels.formula.api import ols           # ANOVA table for one or more fitted linear models.
1175 | from statsmodels.stats.anova import anova_lm      # anova_lm用于一个或多个因素的方差分析,analysis of variance_linear models 
1176 | 
1177 | at1 = pd.read_csv("7.1.csv")
1178 | model = ols('y ~ C(Variety)',dat1).fit()          # 将Variety作为考察因素，使用最小二乘法OLS
1179 | anovat = anova_lm(model)                # 利用analysis of variance_linear models 解讀模型分析結果
1180 | print(anovat)
1181 | ```
1182 | 
1183 | ## 机器学习scikit-learn
1184 | 
1185 | scikit-learn是一个Python第三方提供的非常强力的机器学习库，它包含了从数据预处理到训练模型的各个方面。使用scikit-learn可以极大的节省我们编写代码的时间以及减少我们的代码量，使我们有更多的精力去分析数据分布，调整模型和修改超参。
1186 | 
1187 | scikit-learn 包含了很多监督式学习和非监督式学习的模型，可以实现分类，聚类，预测等任务。
1188 | 
1189 | 这里用一个经典的kaggle（Kaggle是一个数据建模和数据分析竞赛平台。企业和研究者可在其上发布数据，统计学者和数据挖掘专家可在其上进行竞赛以产生最好的模型）比赛数据集来做例子，泰坦尼克生还者数据集。
1190 | 
1191 | 对于这样的数据集，通常的任务是预测一个乘客最后是否生还。在训练集上训练模型，在测试集上验证效果。
1192 | 
1193 | ```python
1194 | #加载训练集和测试集
1195 | train = pd.read_csv('train.csv')
1196 | test = pd.read_csv('test.csv')
1197 | 
1198 | #中位数填补缺失值
1199 | impute_value = train['Age'].median()
1200 | train['Age'] = train['Age'].fillna(impute_value)
1201 | test['Age'] = test['Age'].fillna(impute_value)
1202 | impute_value
1203 | 
1204 | #建立虚拟变量
1205 | train['IsFemale'] = (train['Sex'] == 'female').astype(int)
1206 | test['IsFemale'] = (test['Sex'] == 'female').astype(int)
1207 | train.head()
1208 | 
1209 | #确定X变量有哪些
1210 | predictors = ['Pclass', 'IsFemale', 'Age']    
1211 | predictors
1212 | 
1213 | #去除对应变量
1214 | X_test = test[predictors].values            #取出测试集里相应的X变量的数据
1215 | X_test[:5]                                  #试看下前五行的数据
1216 | y_train = train['Survived'].values          #取出训练集里相应的y变量的数据
1217 | y_train[:5]
1218 | 
1219 | #逻辑回归
1220 | from sklearn.linear_model import LogisticRegression
1221 | 
1222 | #fit拟合
1223 | model.fit(X_train, y_train)
1224 | 
1225 | #测试机预测
1226 | y_predict = model.predict(X_test)
1227 | y_predict[:10]
1228 | 
1229 | #测试集计算
1230 | y_true= pd.read_csv('test.csv')
1231 | y_true = y_true['Survived'].values
1232 | y_true
1233 | (y_true == y_predict).mean()
1234 | 
1235 | # 查看第一个和第一个训练样本生存的概率
1236 | model.predict_proba(X_train[:1])
1237 | ```
1238 | 
1239 | 实际过程中，训练模型的时候，经常用到交叉验证（cross-validation），用于调参，防止过度拟合。这样得到的预测效果会更好，穩健性更强。
1240 | 
1241 | 交叉验证是把训练集分为几份，每一份上又取出一部分作为测试样本，这些被取出来的测试样本不被用于训练，但我们可以在这些测试样本上验证当前模型的准确率或均方误差（mean squared error），而且还可以在模型参数上进行网格搜索（grid search）。一些模型，比如逻辑回归，自带一个有交叉验证的类。LogisticRegressionCV类可以用于模型调参，使用的时候需要指定正则化项C，来控制网格搜索的密集程度：
1242 | 
1243 | ```python
1244 | from sklearn.linear_model import LogisticRegressionCV
1245 | 
1246 | #交叉验证调参数
1247 | model_cv = LogisticRegressionCV(10)
1248 | model_cv.fit(X_train, y_train)
1249 | 
1250 | #交叉验证
1251 | #如果想要自己来做交叉验证的话，可以使用cross_val_score函数，可以用于数据切分。比如，把整个训练集分为4个不重叠的部分：
1252 | from sklearn.model_selection import cross_val_score
1253 | model = LogisticRegression(C=10)         #C为正则化系数λ的倒数，通常默认为1，smaller values specify stronger regularization.
1254 | scores = cross_val_score(model_cv, X_train, y_train, cv=4)
1255 | scores
1256 | 
1257 | #默认的评价指标每个模型是不一样的，但是可以自己指定评价函数。交差验证的训练时间较长，但通常能得到更好的模型效果。
1258 | ```
1259 | 
1260 | ## 分组
1261 | 
1262 | ```python
1263 | #原文档
1264 | df = pd.DataFrame({'A' : ['foo', 'bar', 'foo', 'bar',
1265 |                           'foo', 'bar', 'foo', 'foo'],
1266 |                    'B' : ['one', 'one', 'two', 'three',
1267 |                            'two', 'two', 'one', 'three'],
1268 |                    'C' : np.random.randn(8),
1269 |                    'D' : np.random.randn(8)})
1270 | #以A为组分类
1271 | df.groupby('A').sum()
1272 | 
1273 | #所有数据以AB为组分类
1274 | df.groupby(['A', 'B']).sum()
1275 | 
1276 | #c以AB为组分类统计
1277 | df.groupby(['A', 'B'])['c'].count()
1278 | 
1279 | #分组统计后汇总为表格
1280 | df.groupby(['A', 'B'])['c'].count().reset_index()
1281 | 
1282 | #groupby行业排名
1283 | adf['ROE-RANK'] = adf.groupby('所属行业')['ROE'].rank(ascending = False)
1284 | 
1285 | #stack函数 类似转置
1286 | #原文档
1287 | tuples = list(zip(*[['bar', 'bar', 'baz', 'baz',
1288 |                      'foo', 'foo', 'qux', 'qux'],
1289 |                    ['one', 'two', 'one', 'two',
1290 |                     'one', 'two', 'one', 'two']]))
1291 | index = pd.MultiIndex.from_tuples(tuples, names=['first', 'second'])   #组合为复合索引
1292 | df = pd.DataFrame(np.random.randn(8, 2), index=index, columns=['A', 'B'])  #建立dataframe
1293 | stacked = df.stack()                     #序列转置
1294 | stacked.unstack()                        #恢复原有序列
1295 | stacked.unstack(1)                       #第二列index变为values
1296 | stacked.unstack(0)                       #第一列index变为values
1297 | 
1298 | #数据透视表
1299 | #原文档
1300 | df = pd.DataFrame({'A' : ['one', 'one', 'two', 'three'] * 3,
1301 |                    'B' : ['A', 'B', 'C'] * 4,
1302 |                    'C' : ['foo', 'foo', 'foo', 'bar', 'bar', 'bar'] * 2,
1303 |                    'D' : np.random.randn(12),
1304 |                    'E' : np.random.randn(12)})
1305 | 
1306 | #数据透视
1307 | df.pivot_table(values='D', index=['A', 'B'], columns='C')
1308 | 
1309 | df.pivot(columns='所属板块'，index='上市年份').fillna(0)
1310 | ```
1311 | 
1312 | ## 分类Class
1313 | 
1314 | ```python
1315 | #建立属性元素
1316 | class Pet():
1317 |     def __init__(self, t, n, a, c, g):  # constructor
1318 |         self.type = t                   # 元素前均为self
1319 |         self.name = n
1320 |         self.age = a
1321 |         self.color = c
1322 |         self.gender = g
1323 |     
1324 |     def Greeting(self):
1325 |         print('hello, my name is '+self.name)
1326 |         
1327 |         if self.type == 'dog':
1328 |             print('woof woof')
1329 |         elif self.type == 'cat':
1330 |             print('meow meow')
1331 |             
1332 | p1 = Pet('dog', 'Lucky', 3, 'brown', 'male')
1333 | p1.Greeting()           
1334 | #输出
1335 | hello, my name is Lucky
1336 | woof woof
1337 | 
1338 | #属性编辑
1339 | getattr(p3, 'age')                      #输出元素对应值
1340 | hasattr(p3, 'name')                     #判断元素是否存在
1341 | delattr(p3, 'age')                      #删除元素
1342 | setattr(p3, 'age', 2)                   #修改对应元素值
1343 | 
1344 | print(p3.__dict__)                      #元素查看
1345 | 
1346 | #Data Structure: Stack
1347 | class Stack:
1348 |     def __init__(self):
1349 |         self.items=[]
1350 | 
1351 |     def is_empty(self):
1352 |         return self.items == []
1353 | 
1354 |     def push(self , item):
1355 |         self.items.append(item)
1356 | 
1357 |     def pop(self):
1358 |         return self.items.pop()
1359 | 
1360 |     def get_size(self):
1361 |         return len(self.items)
1362 | 
1363 |     def top(self):
1364 |         return self.items[-1]
1365 |     
1366 |     def get_stack(self):
1367 |         return self.items
1368 | #输入    
1369 | s1 = Stack()
1370 | s1.push(1)
1371 | s1.push(3)
1372 | s1.push(5)
1373 | print(s1.get_stack())
1374 | print(s1.top())
1375 | s1.pop()
1376 | print(s1.top())
1377 | print(s1.get_stack())
1378 | print(s1.is_empty())
1379 | print(s1.get_size())
1380 | #输出
1381 | [1, 3, 5]
1382 | 5
1383 | 3
1384 | [1, 3]
1385 | False
1386 | 2
1387 | 
1388 | ```
1389 | 
1390 | ------
1391 | 
1392 | # **量化**
1393 | 
1394 | ### 量化包
1395 | 
1396 | ```python
1397 | import talib #技术指标包
1398 | ```
1399 | 
1400 | ### 同花顺数据接口
1401 | 
1402 | ```python
1403 | from iFinDPy import*
1404 | THS_iFinDLogin('账号','密码')
1405 | ```
1406 | 
1407 | ### 成分股获取
1408 | 
1409 | ```python
1410 | stock = get_industry_stocks(indexnum_sw1, date)
1411 | get_all_security('stock','2020-07-13')
1412 | ```
1413 | 
1414 | ### 绩效反馈
1415 | 
1416 | ```python
1417 | #数据回测表现
1418 | 
1419 | def get_performance_analysis(T,year_day = 252):
1420 |     #输入净现值序列 
1421 |     #输出绩效指标
1422 |     
1423 |     #新高日期数
1424 |     max_T = 0
1425 |     #循环净值
1426 |     for s in range(2,len(T)):
1427 |         #节点划分
1428 |         l = T[:s]
1429 |         #判断当前节点为最大值
1430 |         if l[-1] > l[:-1].max():
1431 |         #新高日期+=1
1432 |         	max_T += 1
1433 | 	
1434 |     #净值新高占比数
1435 |     max_day_rate = max_T/(len(T)-1)
1436 |     max_day_rate = round(max_day_rate*100,2)
1437 |     
1438 |     #获取最终净值
1439 |     net_values = round(T[-1],4)
1440 |     
1441 |     #计算算数年华收益率
1442 |     year_ret_mean = T.pct_change().dropna().mean()*year_day
1443 |     year_ret_mean = round(year_ret_mean*100, 2)
1444 |     
1445 |     #计算几何年化收益率
1446 |     year_ret_sqrt = net_values ** (year_day / len(T) - 1)
1447 |     year_ret_sqrt = round(year_ret_sqrt**100 , 2)
1448 |     
1449 |     #计算年化波动率
1450 |     vol = T.pct_change().dropna().std()*np.sqrt(year_day)
1451 |     vol = round(vol*100,2)
1452 |     
1453 |     #计算夏普，无风险收益率3%
1454 |     sharpe = (year_ret_mean - 3%)/vol
1455 |     sharpe = round(sharpe,2)
1456 |     
1457 |     #计算最大回撤
1458 |     downlow = maxdrawdown(T)
1459 |     downlow = round(downlow*100,2)
1460 |     
1461 |     #输出
1462 |     return[net_values,year_ret_sqrt,downlow,sharpe,vol,max_day_rate]
1463 | 
1464 | #最大回测
1465 | # 再次定义函数：计算最大回撤
1466 | def maxdrawdown(arr):
1467 |     '''
1468 |     输入：净值序列
1469 |     输出：最大回撤
1470 |     '''
1471 |     # 最大回撤结束点
1472 |     i = np.argmax((np.maximum.accumulate(arr) - arr)/np.maximum.accumulate(arr))
1473 |     # 开始点
1474 |     j = np.argmax(arr[:i]) # start of period
1475 |     # 输出回撤值
1476 |     return (1-arr[i]/arr[j])
1477 |     
1478 | ```
1479 | 
1480 | ### 指标打分
1481 | 
1482 | ```python
1483 | #打分法
1484 | def get_pct(x):
1485 |     #获取当前值所处的百分位0-100分
1486 |     return round((x - x.min())/(x.max()-x.min())*100,2)
1487 | ```
1488 | 
1489 | ### 净值计算
1490 | 
1491 | ```python
1492 | def get_net(stock,start,end):
1493 |     dailyret = get_price(stock,start,end,'1d',['quote_rate'],is_panel =1)['quote_rate'].mean(axis =1)/100+1
1494 |     dailynet = dailyret.cumprod()
1495 |     return dailynet
1496 | 
1497 | indexdf = indexdf / indexdf.iloc[0] 
1498 | ```
1499 | 
1500 | ### 标准化处理
1501 | 
1502 | ```python
1503 | # 去极值+标准化
1504 | # MAD:中位数去极值
1505 | def filter_extreme_MAD(series,n): 
1506 |     median = series.quantile(0.5)
1507 |     new_median = ((series - median).abs()).quantile(0.50)
1508 |     max_range = median + n*new_median
1509 |     min_range = median - n*new_median
1510 |     return np.clip(series,min_range,max_range)
1511 | # 标准化
1512 | def standardize_series(series):
1513 |     std = series.std()
1514 |     mean = series.mean()
1515 |     return (series-mean)/std
1516 | ```
1517 | 
1518 | ### 财务指标
1519 | 
1520 | ```python
1521 | #### 获取历年财务报告的重要财务指标
1522 | finace_datadf = pd.DataFrame()
1523 | for d in ['2015','2016','2017','2018','2019']:
1524 |     '''
1525 |     四大维度,18项指标
1526 |      '''
1527 |     q = query(asharevalue.symbol, # 股票
1528 |               asharevalue.date, # 时间
1529 |               
1530 |               # 盈利能力
1531 |               ashareprofit.roe_ttm, #ROE
1532 |               ashareprofit.roa_ttm, #ROA
1533 |               ashareprofit.net_sales_rate_ttm, #销售净利率
1534 |               ashareprofit.operating_profit_rate_ttm, #营业利润率TTM
1535 |               ashareprofit.cost_to_revenue_ttm, #营业总成本/营业总收入
1536 |               
1537 |               #运营能力
1538 |               ashareoperate.total_capital_turnover_ttm,#总资产周转率TTM
1539 |               ashareoperate.float_asset_turnover_ttm,#流动资产周转率TTM
1540 |               ashareoperate.inventory_turnover_ttm, #存货周转率TTM
1541 |               ashareoperate.account_receive_turnover_ttm,#应收账款周转率TTM
1542 |               
1543 |               # 偿还能力
1544 |               asharedebt.quick_ratio_mrq,#速动比率MRQ
1545 |               asharedebt.equity_ratio_mrq,#产权比率MRQ
1546 |               asharedebt.current_ratio_mrq,#流动比率MRQ
1547 |               
1548 |               # 成长能力
1549 |               growth.basic_eps_year_growth_ratio,#基本每股收益(增长率)
1550 |               growth.np_atsopc_dnrgal_yoy,#归属母公司股东的净利润-扣除非经常损益(增长率)
1551 |               growth.opt_income_growth_ratio,#营业收入(增长率)
1552 |               growth.ncf_of_oa_yoy,#经营活动产生的现金流量净额(增长率)
1553 |               growth.diluted_net_asset_growth_ratio,#净资产收益率(摊薄)(增长率)
1554 |               
1555 |               ).filter(valuation.symbol.in_(stock))
1556 |     df = get_fundamentals(q, statDate = d)
1557 |     finace_datadf = pd.concat([finace_datadf,df],axis =0)
1558 | finace_datadf
1559 | ```
1560 | 
1561 | ### 信息添加
1562 | 
1563 | ```python
1564 | # 根据股票代码，使用apply函数求各个字段
1565 | finacedf['股票名称'] = finacedf['股票代码'].apply(lambda x : get_security_info(x).display_name)
1566 | finacedf['上市时间'] = finacedf['股票代码'].apply(lambda x : get_security_info(x).start_date)
1567 | finacedf['所属板块'] = finacedf['股票代码'].apply(lambda x : get_stockindex(x))
1568 | finacedf['是否ST'] = finacedf['股票名称'].apply(lambda x : True if 'ST' in x else False)
1569 | finacedf['上市年份'] = finacedf['上市时间'].apply(lambda x : x.strftime('%Y'))
1570 | finacedf['所属申万3级行业'] = finacedf['股票代码'].apply(lambda x : inddict[get_symbol_industry(x,date).s_industryid3])
1571 | 
1572 | #根据股票代码，获取股票所属版块
1573 | def get_stockindex(x):
1574 |     if x[:3]=='688':
1575 |         return '科创版'
1576 |     elif x[0]=='3':
1577 |         return '创业板'
1578 |     elif x[-2:]=='SH':
1579 |         return '上证'
1580 |     elif x[-2:]=='SZ':
1581 |         return '深证'
1582 | ```
1583 | 
1584 | ### 统计学检验
1585 | 
1586 | ```python
1587 | #正态分布
1588 | stats.kstest(data1,'norm')
1589 | 
1590 | #皮尔森相关系数检验
1591 | stats.pearsonr(data1,data2)
1592 | 
1593 | #T检验：检验两个独立样本的均值是否有差异
1594 | stats.ttest_ind(data1,data2)
1595 | 
1596 | #方差齐性检验：检验两组或多组数据与其均值偏离程度是否存在差异，是很多检验和算法的先决条件
1597 | stats.levene(data1,data2)
1598 | 
1599 | #单因素方差分析：检验有单一因素影响的多组样本因某变量的均值是否有显著差异
1600 | stats.f_oneway(data1,data2)
1601 | 
1602 | #卡方检验：检验两个变量之间是否有关系，返回第一个值的统计量值，第二个为P-value值，即相关性不显著。第三个是自由度，第四个结果的数组是列联表的期望值分布
1603 | stats.chi2_contingency(price['000001.SZ'].price['601398.SH'])
1604 | 
1605 | #线性回归
1606 | #x , y 两组数据
1607 | X = sm.add_constant(x)
1608 | model = sm.OLS(y,X)
1609 | results = model.fit()
1610 | results.summary()
1611 | # β = str(results.params[1])
1612 | 
1613 | ```
1614 | 
1615 | 
1616 | 


--------------------------------------------------------------------------------