├── L1 ├── homework │ ├── action1.py │ ├── action2.py │ └── action3.py ├── t1.py ├── t10.py ├── t2.py ├── t3.py ├── t4.py ├── t5.py ├── t6.py ├── t7.py ├── t8.py ├── t9.py └── topn.py ├── L2 ├── dataframe │ ├── dataframe1.py │ ├── dataframe2.py │ ├── dataframe3.py │ ├── dataframe4.py │ ├── dataframe5.py │ ├── heros.csv │ ├── heros.xlsx │ ├── heros2.csv │ ├── result.csv │ └── result.xlsx ├── file_read │ ├── file_read1.py │ ├── file_read2.py │ ├── imagenet_class.csv │ └── temp.txt ├── homework │ └── Action.md ├── map │ ├── map1.py │ └── map2.py ├── regression │ ├── 000001.csv │ ├── 300005.csv │ ├── diabetes_regression.py │ ├── random_regression.py │ └── stock_regression.py ├── series │ └── series1.py ├── show │ ├── bar_show.py │ ├── shanghai_index_1990_12_19_to_2020_03_12.csv │ └── stock_show.py ├── syntax_tree │ ├── expr_syntax.py │ └── hero_syntax.py └── time │ └── time1.py ├── L3 ├── MarketBasket │ └── Market_Basket_Optimisation.csv ├── PythonEDA │ ├── dash1.py │ ├── flask1.py │ ├── python_eda.py │ ├── subplot1.py │ ├── test.csv │ ├── titanic_eda.py │ └── train.csv ├── echarts │ ├── bar1.html │ ├── bar1.py │ ├── line.py │ ├── page.py │ ├── pie.py │ ├── radar.py │ ├── render.html │ ├── rose.py │ └── temp.html ├── feiyan_data │ ├── country_data.csv │ ├── download_dxy_foreign_data_csv.py │ ├── download_qq_foreign_data_csv.py │ └── foreign_country_data.csv ├── feiyan_flask │ ├── App │ │ ├── __init__.py │ │ ├── __pycache__ │ │ │ ├── __init__.cpython-36.pyc │ │ │ ├── __init__.cpython-37.pyc │ │ │ ├── __init__.cpython-38.pyc │ │ │ ├── ext.cpython-36.pyc │ │ │ ├── ext.cpython-37.pyc │ │ │ ├── ext.cpython-38.pyc │ │ │ ├── models.cpython-36.pyc │ │ │ └── models.cpython-37.pyc │ │ ├── ext.py │ │ ├── models.py │ │ ├── static │ │ │ ├── common │ │ │ │ ├── china.js │ │ │ │ ├── echarts.js │ │ │ │ ├── echarts.min.js │ │ │ │ └── jq.js │ │ │ ├── js │ │ │ │ ├── axis.js │ │ │ │ └── map.js │ │ │ └── json │ │ │ │ ├── china.json │ │ │ │ ├── italy.json │ │ │ │ ├── japan.json │ │ │ │ ├── korea.json │ │ │ │ ├── name.json │ │ │ │ └── world.json │ │ ├── templates │ │ │ └── feiyan.html │ │ └── views │ │ │ ├── __init__.py │ │ │ ├── __pycache__ │ │ │ ├── __init__.cpython-36.pyc │ │ │ ├── __init__.cpython-37.pyc │ │ │ ├── __init__.cpython-38.pyc │ │ │ ├── api.cpython-36.pyc │ │ │ └── api.cpython-37.pyc │ │ │ ├── api.py │ │ │ ├── country_data.csv │ │ │ └── foreign_country_data.csv │ ├── __pycache__ │ │ ├── manage.cpython-36.pyc │ │ ├── manage.cpython-37.pyc │ │ └── manage.cpython-38.pyc │ ├── manage.py │ ├── nohup.out │ ├── uwsgi.ini │ ├── uwsgi.log │ └── uwsgi.pid ├── find_median.py ├── homework │ ├── Action1.md │ ├── Market_Basket_Optimisation.csv │ ├── word_cloud1.py │ └── wordcloud.jpg └── wordcloud │ ├── movies.csv │ └── movies_word_cloud.py └── L4 ├── anjuke ├── anjuke_bs.py ├── anjuke_selenium.py ├── auto_login.py └── bs_demo.py ├── pagerank ├── Aliases.csv ├── Emails.csv ├── Persons.csv ├── email_pagerank.py ├── networkx_pagerank.py └── pagerank_simulation.py ├── scc └── scc1.py ├── team_cluster ├── team_cluster.py └── team_cluster_data.csv ├── textrank ├── news.txt ├── news_textrank.py ├── news_textrank_snownlp.py └── sentence_textrank.py └── valid_tree ├── valid_tree.py └── valid_tree2.py /L1/homework/action1.py: -------------------------------------------------------------------------------- 1 | # A+B Problem 2 | while True: 3 | try: 4 | line = input() 5 | a = line.split() 6 | print(int(a[0])+int(a[1])) 7 | except: 8 | break -------------------------------------------------------------------------------- /L1/homework/action2.py: -------------------------------------------------------------------------------- 1 | # 求2+4+6+8+...+100的求和 2 | sum = 0 3 | for i in range(2, 100+1, 2): 4 | sum = sum + i 5 | print(sum) 6 | -------------------------------------------------------------------------------- /L1/homework/action3.py: -------------------------------------------------------------------------------- 1 | # Action3: 统计全班的成绩 2 | import numpy as np 3 | 4 | scoretype = np.dtype({'names': ['name', 'chinese', 'math', 'english'], 5 | 'formats': ['U32', 'i', 'i', 'i']}) 6 | peoples = np.array( 7 | [ 8 | ("张飞", 68, 65, 30), 9 | ("关羽", 95, 76, 98), 10 | ("刘备", 98, 86, 88), 11 | ("典韦", 90, 88, 77), 12 | ("许褚", 80, 90, 90) 13 | ], dtype=scoretype) 14 | print("科目 | 平均成绩 | 最小成绩 | 最大成绩 | 方差 | 标准差") 15 | courses = {'语文': peoples[:]['chinese'], 16 | '英文': peoples[:]['english'], '数学': peoples[:]['math']} 17 | for course, scores in courses.items(): 18 | print(course, np.mean(scores), np.amin(scores), np.amax(scores), np.std(scores), 19 | np.var(scores)) 20 | print('Ranking:') 21 | ranking = sorted(peoples, key=lambda x: x[1]+x[2]+x[3], reverse=True) 22 | print(ranking) -------------------------------------------------------------------------------- /L1/t1.py: -------------------------------------------------------------------------------- 1 | # 输入与输出 2 | name = input("What's your name?") 3 | sum = 100+100 4 | print ('hello,%s' %name) 5 | print ('sum = %d' %sum) 6 | -------------------------------------------------------------------------------- /L1/t10.py: -------------------------------------------------------------------------------- 1 | # numpy中统计函数的使用 2 | import numpy as np 3 | # 最大、最小值 4 | def work1(): 5 | a = np.array([[1,2,3], [4,5,6], [7,8,9]]) 6 | print(np.min(a)) 7 | print(np.min(a,0)) 8 | print(np.min(a,1)) 9 | print(np.max(a)) 10 | print(np.max(a,0)) 11 | print(np.max(a,1)) 12 | 13 | # 统计百分位数 14 | def work2(): 15 | a = np.array([[1,2,3], [4,5,6], [7,8,9]]) 16 | print(np.percentile(a, 50)) 17 | print(np.percentile(a, 50, axis=0)) 18 | print(np.percentile(a, 50, axis=1)) 19 | 20 | # 中位数、平均数 21 | def work3(): 22 | a = np.array([[1,2,3], [4,5,6], [7,8,9]]) 23 | #求中位数 24 | print(np.median(a)) 25 | print(np.median(a, axis=0)) 26 | print(np.median(a, axis=1)) 27 | #求平均数 28 | print(np.mean(a)) 29 | print(np.mean(a, axis=0)) 30 | print(np.mean(a, axis=1)) 31 | 32 | # 加权平均值 33 | def work4(): 34 | a = np.array([1,2,3,4]) 35 | weights = np.array([1,2,3,4]) 36 | print(np.average(a)) 37 | print(np.average(a,weights=weights)) 38 | 39 | # 标准差、方差 40 | def work5(): 41 | a = np.array([1,2,3,4]) 42 | print(np.std(a)) 43 | print(np.var(a)) 44 | 45 | # 对数组进行排序 46 | def work6(): 47 | a = np.array([[4,3,2],[2,4,1]]) 48 | print(np.sort(a)) 49 | print(np.sort(a, axis=None)) 50 | print(np.sort(a, axis=0)) 51 | print(np.sort(a, axis=1)) 52 | print(type(a)) 53 | 54 | # 对数组进行排序 55 | def work7() 56 | # 使用List进行排序 57 | a = [4,3,2,2,4,1] 58 | print(type(a)) 59 | a.sort() 60 | print(a) 61 | a.sort(reverse=True) 62 | print(a) 63 | 64 | #work1() 65 | #work2() 66 | #work3() 67 | #work4() 68 | #work5() 69 | work6() 70 | -------------------------------------------------------------------------------- /L1/t2.py: -------------------------------------------------------------------------------- 1 | # 条件判断 2 | score = 95 3 | if score>= 90: 4 | print('Excellent') 5 | else: 6 | if xscore < 60: 7 | print('Fail') 8 | else: 9 | print('Good Job') -------------------------------------------------------------------------------- /L1/t3.py: -------------------------------------------------------------------------------- 1 | # for循环 2 | sum = 0 3 | for number in range(11): 4 | sum = sum + number 5 | print(sum) 6 | 7 | 8 | # while循环 9 | sum = 0 10 | number = 1 11 | while number < 11: 12 | sum = sum + number 13 | number = number + 1 14 | print(sum) -------------------------------------------------------------------------------- /L1/t4.py: -------------------------------------------------------------------------------- 1 | # 列表 2 | lists = ['zhangfei','guanyu','liubei'] 3 | # 列表中添加元素 4 | lists.append('dianwei') 5 | print(lists) 6 | print(len(lists)) 7 | # 在指定位置添加元素 8 | lists.insert(0,'diaochan') 9 | # 删除末尾元素 10 | lists.pop() 11 | print(lists) -------------------------------------------------------------------------------- /L1/t5.py: -------------------------------------------------------------------------------- 1 | # 元组的使用 2 | tuples = ('zhangfei','65') 3 | print(tuples[0]) 4 | 5 | # 返回字典key组成的元组 6 | print(tuple({'zhangfei':65, 'guanyu':99})) 7 | 8 | # 列表转化为元组 9 | temp_list = [123, 'zhangfei', 'guanyu', 'liubei']; 10 | temp_tuple = tuple(temp_list) 11 | print(temp_tuple) 12 | -------------------------------------------------------------------------------- /L1/t6.py: -------------------------------------------------------------------------------- 1 | # 字典的使用 2 | #定义一个dictionary 3 | score = {'guanyu':96,'zhangfei':95} 4 | 5 | #添加一个元素 6 | score['zhaoyun'] = 98 7 | print(score) 8 | 9 | #删除一个元素 10 | score.pop('zhangfei') 11 | 12 | #查看key是否存在 13 | print('zhangfei' in score) 14 | 15 | #查看一个key对应的值 16 | print(score.get('zhangfei')) 17 | print(score.get('dianwei',99)) -------------------------------------------------------------------------------- /L1/t7.py: -------------------------------------------------------------------------------- 1 | # 集合的使用 2 | s = set(['zhangfei', 'guanyu', 'liubei']) 3 | s.add('diaowei') 4 | s.remove('zhangfei') 5 | print(s) 6 | print('liubei' in s) -------------------------------------------------------------------------------- /L1/t8.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | # ndarray使用 3 | def work1(): 4 | a = np.array([1, 2, 3]) 5 | b = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]]) 6 | b[1,1]=10 7 | print(a.shape) 8 | print(b.shape) 9 | print(a.dtype) 10 | print(b) 11 | 12 | # 结构化数组的使用 13 | def work2(): 14 | persontype = np.dtype({'names':['name', 'age', 'chinese', 'math', 'english'], \ 15 | 'formats':['S32','i', 'i', 'i', 'f']}) 16 | peoples = np.array([("ZhangFei",32,75,100, 90),("GuanYu",24,85,96,88.5), \ 17 | ("ZhaoYun",28,85,92,96.5),("HuangZhong",29,65,85,100)], dtype=persontype) 18 | ages = peoples['age'] 19 | chineses = peoples['chinese'] 20 | maths = peoples['math'] 21 | englishs = peoples['english'] 22 | print(np.mean(ages)) 23 | print(np.mean(chineses)) 24 | print(np.mean(maths)) 25 | print(np.mean(englishs)) 26 | 27 | #work1() 28 | work2() 29 | 30 | 31 | -------------------------------------------------------------------------------- /L1/t9.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | # 连续数组的创建:arange 或 linspace 4 | x1 = np.arange(1,11,2) 5 | x2 = np.linspace(1,9,5) 6 | print('x1=', x1) 7 | print('x2=', x2) 8 | 9 | print(np.add(x1, x2)) 10 | print(np.subtract(x1, x2)) 11 | print(np.multiply(x1, x2)) 12 | print(np.divide(x1, x2)) 13 | print(np.power(x1, x2)) 14 | print(np.remainder(x1, x2)) -------------------------------------------------------------------------------- /L1/topn.py: -------------------------------------------------------------------------------- 1 | """ 2 | 算法面试:10亿个数中取TOP-1000个数 3 | 堆的性质:每一个节点比它的左右子节点小, 4 | 先取前N个数,构成小顶堆,即在内存中维护一个1000数的小顶堆 5 | 然后对文件中读取数据,和堆顶比较: 6 | if 比堆顶小,则丢弃 7 | if 比堆顶大,替换根节点,并且调整堆,保持小顶堆的性质 8 | 所有数据处理完,得到的即是Top-N 9 | """ 10 | 11 | class TopN: 12 | # 父节点下标 13 | def parent(self, n): 14 | return int((n - 1) / 2) 15 | 16 | # 左节点下标 17 | def left(self, n): 18 | return 2 * n + 1 19 | 20 | # 右节点下标 21 | def right(self, n): 22 | return 2 * n + 2 23 | 24 | # 构建小顶堆,保证父节点小于左右子节点 25 | def buildHeap(self, n, data): 26 | for i in range(1, n): 27 | t = i 28 | # 调整堆,如果节点比父亲节点小,则交换 29 | while t != 0 and data[t] < data[self.parent(t)]: 30 | temp = data[t] 31 | data[t] = data[self.parent(t)] 32 | data[self.parent(t)] = temp 33 | t = self.parent(t) 34 | print(data) 35 | 36 | # 调整data[i] 37 | def adjust(self, i, n, data): 38 | # 小于堆的根节点,不调整 39 | if data[i] <= data[0]: 40 | return 41 | 42 | # 置换堆顶 43 | temp = data[i] 44 | data[i] = data[0] 45 | data[0] = temp 46 | # 调整堆顶 47 | t = 0 48 | while (self.left(t) < n and data[self.left(t)] < data[t]) or (self.right(t) < n and data[self.right(t)] < data[t]): 49 | if self.right(t) < n and data[self.right(t)] < data[self.left(t)]: 50 | # 右孩子更小,置换右孩子 51 | temp = data[t] 52 | data[t] = data[self.right(t)] 53 | data[self.right(t)] = temp 54 | t = self.right(t) 55 | else: 56 | # 否则置换左孩子 57 | temp = data[t] 58 | data[t] = data[self.left(t)] 59 | data[self.left(t)] = temp 60 | t = self.left(t) 61 | 62 | # 寻找topN,调整data,将topN排到最前面 63 | def findTopN(self, n, data): 64 | # 先构建n个数的小顶堆 65 | self.buildHeap(n, data); 66 | # n往后的数进行调整 67 | for i in range(n, len(data)): 68 | self.adjust(i, n, data) 69 | return data 70 | 71 | 72 | # 第一组测试 12个 73 | arr1 = [58, 26, 45, 18, 22, 39, 96, 75, 80, 65, 63, 28] 74 | print("原数组:" + str(arr1)) 75 | topn = TopN() 76 | result = topn.findTopN(5, arr1) 77 | print("数组进行Top-N调整:" + str(result)) 78 | 79 | # 第二组测试 随机100个 80 | """ 81 | import random 82 | tempList = [] 83 | for i in range(100): 84 | temp = random.randint(0, 1000) 85 | tempList.append(temp) 86 | print("原数组:" + str(tempList)) 87 | topn = TopN() 88 | result = topn.findTopN(5, tempList) 89 | print("数组进行Top-N调整:" + str(result)) 90 | 91 | """ -------------------------------------------------------------------------------- /L2/dataframe/dataframe1.py: -------------------------------------------------------------------------------- 1 | # dataframe使用 2 | from pandas import Series, DataFrame 3 | data = {'Chinese': [66, 95, 93, 90,80], 'Math': [30, 98, 96, 77, 90], 'English': [65, 85, 92, 88, 90]} 4 | df1 = DataFrame(data) 5 | df2 = DataFrame(data, index=['ZhangFei', 'GuanYu', 'LiuBei', 'DianWei', 'XuChu'], columns=['Chinese', 'Math', 'English']) 6 | print(df1) 7 | print(df2) 8 | 9 | # 对列名进行更换 10 | df2.rename(columns={'Chinese': '语文', 'English': '英语', 'Math': '数学'}, inplace = True) 11 | print(df2.isnull()) 12 | # 输出df2的概要 13 | print(df2.describe()) 14 | 15 | -------------------------------------------------------------------------------- /L2/dataframe/dataframe2.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | from pandas import Series, DataFrame 3 | 4 | # 读取xlsx文件 5 | score = DataFrame(pd.read_excel('heros.xlsx')) 6 | score.to_excel('result.xlsx') 7 | print(score) 8 | # 读取csv文件 9 | score = pd.read_csv('heros.csv') 10 | score.to_csv('result.csv') 11 | -------------------------------------------------------------------------------- /L2/dataframe/dataframe3.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | from pandas import DataFrame 3 | 4 | df1 = DataFrame({'name':['ZhangFei', 'GuanYu', 'a', 'b', 'c'], 'data1':range(1,6)}) 5 | df2 = DataFrame({'name':['ZhangFei', 'GuanYu', 'A', 'B', 'C'], 'data2':range(1,6)}) 6 | #print(df1) 7 | #print(df2) 8 | df3 = pd.merge(df1, df2, on='name') 9 | #print(df3) 10 | df3 = pd.merge(df1, df2, how='inner') 11 | #print(df3) 12 | df3 = pd.merge(df1, df2, how='left') 13 | df3 = pd.merge(df1, df2, how='right') 14 | #print(df3) 15 | df3 = pd.merge(df1, df2, how='outer') 16 | print(df3) 17 | -------------------------------------------------------------------------------- /L2/dataframe/dataframe4.py: -------------------------------------------------------------------------------- 1 | # DataFrame练习 2 | from pandas import Series, DataFrame 3 | data = {'Chinese': [66, 95, 93, 90,80], 'Math': [30, 98, 96, 77, 90], 'English': [65, 85, 92, 88, 90]} 4 | df = DataFrame(data, index=['ZhangFei', 'GuanYu', 'LiuBei', 'DianWei', 'XuChu'], columns=['Chinese', 'Math', 'English']) 5 | 6 | # 提取Index为ZhangFei的行 7 | print(df.loc['ZhangFei']) 8 | # 提取第0行 9 | print(df.iloc[0]) 10 | 11 | # 提取列为English的所有行 12 | print(df.loc[:,['English']]) 13 | # 提取第2列的所有行 14 | print(df.iloc[:,2]) 15 | 16 | # 查看ZhangFei, GuanYu的Chinese Math成绩 17 | print(df.loc[['ZhangFei','GuanYu'], ['Chinese','Math']]) 18 | print(df.iloc[[0,1],[0,1]]) -------------------------------------------------------------------------------- /L2/dataframe/dataframe5.py: -------------------------------------------------------------------------------- 1 | # 读取heros2.csv数据表,按照role进行groupby 2 | import numpy as np 3 | import pandas as pd 4 | # 因为文件中有中文,所以采用gbk编码读取 5 | data = pd.read_csv('heros2.csv', encoding='gbk') 6 | 7 | result = data.groupby('role').agg([np.sum, np.mean]) 8 | print(result) 9 | 10 | -------------------------------------------------------------------------------- /L2/dataframe/heros.csv: -------------------------------------------------------------------------------- 1 | ,Chinese,Maths,English 2 | ZhangFei,66,30,65 3 | GuanYu,95,98,85 4 | LiuBei,93,96,92 5 | DianWei,90,77,88 6 | XuChu,80,90,90 7 | -------------------------------------------------------------------------------- /L2/dataframe/heros.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cystanford/Data_Analysis_with_Python/a33b965d98612031a0effe17a9c6da3a1eaefb5e/L2/dataframe/heros.xlsx -------------------------------------------------------------------------------- /L2/dataframe/heros2.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cystanford/Data_Analysis_with_Python/a33b965d98612031a0effe17a9c6da3a1eaefb5e/L2/dataframe/heros2.csv -------------------------------------------------------------------------------- /L2/dataframe/result.csv: -------------------------------------------------------------------------------- 1 | ,Unnamed: 0,Chinese,Maths,English 2 | 0,ZhangFei,66,30,65 3 | 1,GuanYu,95,98,85 4 | 2,LiuBei,93,96,92 5 | 3,DianWei,90,77,88 6 | 4,XuChu,80,90,90 7 | -------------------------------------------------------------------------------- /L2/dataframe/result.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cystanford/Data_Analysis_with_Python/a33b965d98612031a0effe17a9c6da3a1eaefb5e/L2/dataframe/result.xlsx -------------------------------------------------------------------------------- /L2/file_read/file_read1.py: -------------------------------------------------------------------------------- 1 | # 读取imagenet的分类ID说明 2 | 3 | # 读文件 4 | f = open('imagenet_class.csv') 5 | text = f.read() 6 | #print(text) 7 | 8 | # 读文件,按行来读取 9 | f = open('imagenet_class.csv') 10 | lines = f.readlines() 11 | #print(lines) 12 | print(len(lines)) 13 | print(lines[0]) 14 | 15 | # 写文件 16 | f = open('temp.txt', 'w') 17 | f.write('hello world!') 18 | f.close() 19 | -------------------------------------------------------------------------------- /L2/file_read/file_read2.py: -------------------------------------------------------------------------------- 1 | # pandas读取imagenet classID表示 2 | import pandas as pd 3 | classes = pd.read_csv('imagenet_class.csv', sep=',', header=None) 4 | #print(classes) 5 | # 查看id=207的含义 6 | print(classes[classes[0]==207]) 7 | 8 | -------------------------------------------------------------------------------- /L2/file_read/temp.txt: -------------------------------------------------------------------------------- 1 | hello world! -------------------------------------------------------------------------------- /L2/homework/Action.md: -------------------------------------------------------------------------------- 1 | Action1:参考syntax_tree目录中的两个例子 2 | Action2:参考regression目录 3 | -------------------------------------------------------------------------------- /L2/map/map1.py: -------------------------------------------------------------------------------- 1 | # 计算平方数 2 | def square(x): 3 | return x * x 4 | # 计算列表各个元素的平方 5 | print(list(map(square, [1,2,3,4,5]))) 6 | 7 | # lambda函数 8 | add = lambda x, y: x + y 9 | print(add(5, 6)) 10 | 11 | # 按照x[1]进行列表排序 12 | a = [(2, 56), (3, 12), (6, 10), (9, 13)] 13 | a.sort(key=lambda x: x[1]) 14 | print(a) 15 | -------------------------------------------------------------------------------- /L2/map/map2.py: -------------------------------------------------------------------------------- 1 | """ 2 | 新列表 = 原列表的平方 3 | 使用map, lambda两种方法完成 4 | """ 5 | 6 | numbers = [1, 2, 3, 4, 5, 6, 7, 8, 9] 7 | # 计算平方数 8 | def square(x): 9 | return x * x 10 | print(list(map(square, numbers))) 11 | 12 | # 使用lambda定义函数 13 | print(list(map(lambda x: x*x, numbers))) 14 | 15 | -------------------------------------------------------------------------------- /L2/regression/diabetes_regression.py: -------------------------------------------------------------------------------- 1 | """ 2 | 使用sklearn自带的糖尿病数据集,进行回归分析 3 | Diabetes:包含442个患者的10个生理特征(年龄,性别、体重、血压)和一年以后疾病级数指标 4 | """ 5 | from sklearn import datasets 6 | from sklearn import linear_model 7 | from sklearn.model_selection import train_test_split 8 | from sklearn.metrics import mean_squared_error 9 | 10 | # 加载数据 11 | diabetes = datasets.load_diabetes() 12 | data = diabetes.data 13 | # 数据探索 14 | print(data.shape) 15 | print(data[0]) 16 | 17 | # 训练集 70%,测试集30% 18 | train_x, test_x, train_y, test_y = train_test_split(diabetes.data, diabetes.target, test_size=0.3, random_state=14) 19 | print(len(train_x)) 20 | 21 | #回归训练及预测 22 | clf = linear_model.LinearRegression() 23 | clf.fit(train_x, train_y) 24 | 25 | print(clf.coef_) 26 | #print(train_x.shape) 27 | #print(clf.score(test_x, test_y)) 28 | pred_y = clf.predict(test_x) 29 | print(mean_squared_error(test_y, pred_y)) 30 | -------------------------------------------------------------------------------- /L2/regression/random_regression.py: -------------------------------------------------------------------------------- 1 | # 回归分析 2 | import random 3 | from sklearn import linear_model 4 | reg = linear_model.LinearRegression() 5 | 6 | def generate(x): 7 | y = 2*x+10+random.random() 8 | return y 9 | 10 | train_x = [] 11 | train_y = [] 12 | for x in range(1000): 13 | train_x.append([x]) 14 | y = generate(x) 15 | train_y.append([y]) 16 | 17 | reg.fit (train_x, train_y) 18 | # coef_ 保存线性模型的系数w 19 | print(reg.coef_) 20 | print(reg.intercept_) -------------------------------------------------------------------------------- /L2/regression/stock_regression.py: -------------------------------------------------------------------------------- 1 | import statsmodels.api as sm 2 | import statsmodels.formula.api as smf 3 | import statsmodels.graphics.api as smg 4 | import patsy 5 | import matplotlib.pyplot as plt 6 | import numpy as np 7 | import pandas as pd 8 | from pandas import Series,DataFrame 9 | from scipy import stats 10 | import seaborn as sns 11 | import datetime, os, warnings 12 | 13 | warnings.filterwarnings('ignore') 14 | plt.rcParams['font.sans-serif']=['SimHei'] #用来正常显示中文标签 15 | plt.rcParams['axes.unicode_minus'] = False #可以显示负号 16 | 17 | # 设置起始时间 18 | start = datetime.datetime(2019,1,1) 19 | end = datetime.datetime(2019,12,31) 20 | #print(start) 21 | 22 | from pandas_datareader.data import DataReader 23 | # 读取上证综指 及 探路者数据 24 | def load_data(): 25 | if os.path.exists('000001.csv'): 26 | data_ss = pd.read_csv('000001.csv') 27 | data_tlz = pd.read_csv('300005.csv') 28 | else: 29 | # 上证综指 30 | data_ss = DataReader("000001.SS", "yahoo",start,end) 31 | # 300005 探路者股票 深证 32 | data_tlz = DataReader("300005.SZ", "yahoo",start,end) 33 | data_ss.to_csv('000001.csv') 34 | data_tlz.to_csv('300005.csv') 35 | return data_ss, data_tlz 36 | 37 | data_ss, data_tlz = load_data() 38 | 39 | print(data_ss.head()) 40 | print(data_tlz.head()) 41 | 42 | # 探路者与上证综指 43 | close_ss = data_ss["Close"] 44 | close_tlz = data_tlz["Close"] 45 | 46 | # 将探路者与上证综指进行数据合并 47 | stock = pd.merge(data_ss, data_tlz, left_index = True, right_index = True) 48 | stock = stock[["Close_x","Close_y"]] 49 | stock.columns = ["上证综指","探路者"] 50 | 51 | # 统计每日收益率 52 | daily_return = (stock.diff()/stock.shift(periods = 1)).dropna() 53 | print(daily_return.head()) 54 | # 找出当天收益率大于10%的,应该是没有,因为涨停为10% 55 | print(daily_return[daily_return["探路者"] > 0.1]) 56 | 57 | # 每日收益率可视化 58 | fig,ax = plt.subplots(nrows=1,ncols=2,figsize=(15,6)) 59 | daily_return["上证综指"].plot(ax=ax[0]) 60 | ax[0].set_title("上证综指") 61 | daily_return["探路者"].plot(ax=ax[1]) 62 | ax[1].set_title("探路者") 63 | plt.show() 64 | 65 | 66 | # 散点图 67 | fig,ax = plt.subplots(nrows=1,ncols=1,figsize=(12,6)) 68 | plt.scatter(daily_return["探路者"],daily_return["上证综指"]) 69 | plt.title("每日收益率散点图 from 探路者 & 上证综指") 70 | plt.show() 71 | 72 | # 回归分析 73 | import statsmodels.api as sm 74 | # 加入截距项 75 | daily_return["intercept"]=1.0 76 | model = sm.OLS(daily_return["探路者"],daily_return[["上证综指","intercept"]]) 77 | results = model.fit() 78 | print(results.summary()) 79 | 80 | 81 | 82 | -------------------------------------------------------------------------------- /L2/series/series1.py: -------------------------------------------------------------------------------- 1 | # series使用 2 | from pandas import Series, DataFrame 3 | x1 = Series([1,2,3,4]) 4 | x2 = Series(data=[1,2,3,4], index=['a', 'b', 'c', 'd']) 5 | # 使用字典来进行创建 6 | d = {'a':1, 'b':2, 'c':3, 'd':4} 7 | x3 = Series(d) 8 | 9 | print(x1) 10 | print(x2) 11 | print(x3) -------------------------------------------------------------------------------- /L2/show/bar_show.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import numpy as np 3 | import matplotlib.pyplot as plt 4 | 5 | df = pd.DataFrame(np.random.rand(10,4), columns=['a','b','c','d']) 6 | # 使用bar()生成直方图,barh()生成水平条形图(要生成一个堆积条形图,需要指定stacked=True) 7 | df.plot.bar() 8 | df.plot.bar(stacked=True) 9 | df.plot.barh(stacked=True) 10 | plt.show() -------------------------------------------------------------------------------- /L2/show/stock_show.py: -------------------------------------------------------------------------------- 1 | # 沪市指数回归分析 2 | import numpy as np 3 | import pandas as pd 4 | import matplotlib.pyplot as plt 5 | from statsmodels.tsa.arima_model import ARIMA 6 | import statsmodels.api as sm 7 | import warnings 8 | from itertools import product 9 | from datetime import datetime, timedelta 10 | import calendar 11 | 12 | warnings.filterwarnings('ignore') 13 | plt.rcParams['font.sans-serif']=['SimHei'] #用来正常显示中文标签 14 | # 数据加载 15 | df = pd.read_csv('./shanghai_index_1990_12_19_to_2020_03_12.csv') 16 | df = df[['Timestamp', 'Price']] 17 | 18 | # 将时间作为df的索引 19 | df.Timestamp = pd.to_datetime(df.Timestamp) 20 | df.index = df.Timestamp 21 | # 数据探索 22 | print(df.head()) 23 | # 按照月,季度,年来统计 24 | df_month = df.resample('M').mean() 25 | df_Q = df.resample('Q-DEC').mean() 26 | df_year = df.resample('A-DEC').mean() 27 | print(df_month) 28 | 29 | # 按照天,月,季度,年来显示沪市指数的走势 30 | fig = plt.figure(figsize=[15, 7]) 31 | plt.rcParams['font.sans-serif']=['SimHei'] #用来正常显示中文标签 32 | plt.suptitle('沪市指数', fontsize=20) 33 | plt.subplot(221) 34 | plt.plot(df.Price, '-', label='按天') 35 | plt.legend() 36 | plt.subplot(222) 37 | plt.plot(df_month.Price, '-', label='按月') 38 | plt.legend() 39 | plt.subplot(223) 40 | plt.plot(df_Q.Price, '-', label='按季度') 41 | plt.legend() 42 | plt.subplot(224) 43 | plt.plot(df_year.Price, '-', label='按年') 44 | plt.legend() 45 | plt.show() 46 | -------------------------------------------------------------------------------- /L2/syntax_tree/expr_syntax.py: -------------------------------------------------------------------------------- 1 | # 每个函数,都有env参数 2 | Num = lambda env, n: n 3 | Var = lambda env, x: env[x] 4 | Add = lambda env, a, b:_eval(env, a) + _eval(env, b) 5 | Mul = lambda env, a, b:_eval(env, a) * _eval(env, b) 6 | # 对表达式进行处理,expr[0]为符号,*expr[1:]为传入的参数 7 | _eval = lambda env, expr:expr[0](env, *expr[1:]) 8 | 9 | env = {'a':3, 'b':6} 10 | tree = (Add, (Var, 'a'), 11 | (Mul, (Num, 5), (Var, 'b')) 12 | ) 13 | 14 | print(_eval(env, (Var, 'a'))) 15 | print(_eval(env, (Num, 5))) 16 | print(Num(env, 5)) 17 | print(_eval(env, tree)) 18 | -------------------------------------------------------------------------------- /L2/syntax_tree/hero_syntax.py: -------------------------------------------------------------------------------- 1 | import random 2 | 3 | # 定语从句语法 4 | grammar = ''' 5 | 战斗 => 施法 , 结果 。 6 | 施法 => 主语 动作 技能 7 | 结果 => 主语 获得 效果 8 | 主语 => 张飞 | 关羽 | 赵云 | 典韦 | 许褚 | 刘备 | 黄忠 | 曹操 | 鲁班七号 | 貂蝉 9 | 动作 => 施放 | 使用 | 召唤 10 | 技能 => 一骑当千 | 单刀赴会 | 青龙偃月 | 刀锋铁骑 | 黑暗潜能 | 画地为牢 | 守护机关 | 狂兽血性 | 龙鸣 | 惊雷之龙 | 破云之龙 | 天翔之龙 11 | 获得 => 损失 | 获得 12 | 效果 => 数值 状态 13 | 数值 => 1 | 1000 |5000 | 100 14 | 状态 => 法力 | 生命 15 | ''' 16 | 17 | # 得到语法字典 18 | def getGrammarDict(gram, linesplit = "\n", gramsplit = "=>"): 19 | #定义字典 20 | result = {} 21 | 22 | for line in gram.split(linesplit): 23 | # 去掉首尾空格后,如果为空则退出 24 | if not line.strip(): 25 | continue 26 | expr, statement = line.split(gramsplit) 27 | result[expr.strip()] = [i.split() for i in statement.split("|")] 28 | #print(result) 29 | return result 30 | 31 | # 生成句子 32 | def generate(gramdict, target, isEng = False): 33 | if target not in gramdict: 34 | return target 35 | find = random.choice(gramdict[target]) 36 | #print(find) 37 | blank = '' 38 | # 如果是英文中间间隔为空格 39 | if isEng: 40 | blank = ' ' 41 | return blank.join(generate(gramdict, t, isEng) for t in find) 42 | 43 | gramdict = getGrammarDict(grammar) 44 | print(generate(gramdict,"战斗")) 45 | print(generate(gramdict,"战斗", True)) 46 | 47 | 48 | -------------------------------------------------------------------------------- /L2/time/time1.py: -------------------------------------------------------------------------------- 1 | # 使用time处理时间 2 | import time 3 | timestamp = time.time() 4 | print("当前时间戳为:", timestamp) 5 | # 转化为struct_time类型 6 | localtime = time.localtime(timestamp) 7 | print("本地时间为 :", localtime) 8 | print(type(localtime)) 9 | 10 | 11 | import datetime 12 | date = datetime.date(2020, 3, 1) 13 | print(date) 14 | 15 | time_now = datetime.datetime.now() 16 | delta1 = datetime.timedelta(days=30) 17 | print(time_now) 18 | print(time_now + delta1) 19 | -------------------------------------------------------------------------------- /L3/PythonEDA/dash1.py: -------------------------------------------------------------------------------- 1 | # 使用dash进行股票可视化交互 2 | import dash 3 | from dash.dependencies import Input, Output 4 | import dash_core_components as dcc 5 | import dash_html_components as html 6 | from datetime import datetime as dt 7 | from pandas_datareader.data import DataReader 8 | import tushare as ts 9 | 10 | # 创建一个应用 11 | app = dash.Dash() 12 | # 设置layout 13 | app.layout = html.Div([ 14 | html.H1('k线图'), 15 | dcc.Dropdown( 16 | id='my-dropdown', 17 | options=[ 18 | {'label': '探路者', 'value':'300005'}, 19 | {'label': '莱美药业', 'value':'300006'}, 20 | {'label': '汉威科技', 'value':'300007'}, 21 | {'label': '天海防务', 'value':'300008'}, 22 | {'label': '安科生物', 'value':'300009'}, 23 | ], 24 | value='300005' 25 | ), 26 | dcc.Graph(id='my-graph') 27 | ]) 28 | 29 | @app.callback(Output('my-graph', 'figure'), [Input('my-dropdown', 'value')]) 30 | def update_graph(selected_dropdown_value): 31 | df = ts.get_k_data(selected_dropdown_value, ktype='30') 32 | return { 33 | 'data':[ 34 | { 35 | 'x': df.index, 36 | 'y':df.close 37 | } 38 | ] 39 | } 40 | 41 | if __name__ == '__main__': 42 | app.run_server(host="0.0.0.0") -------------------------------------------------------------------------------- /L3/PythonEDA/flask1.py: -------------------------------------------------------------------------------- 1 | from flask import Flask 2 | app = Flask(__name__) 3 | 4 | @app.route('/') 5 | def hello_world(): 6 | return '你好,世界!' 7 | 8 | if __name__ == "__main__": 9 | app.run(host='127.0.0.1', port=8080) 10 | -------------------------------------------------------------------------------- /L3/PythonEDA/python_eda.py: -------------------------------------------------------------------------------- 1 | import matplotlib.pyplot as plt 2 | import seaborn as sns 3 | import numpy as np 4 | import pandas as pd 5 | import os 6 | from matplotlib.font_manager import FontProperties 7 | 8 | # 散点图 9 | def scatter(): 10 | # 数据准备 11 | N = 500 12 | x = np.random.randn(N) 13 | y = np.random.randn(N) 14 | # 用Matplotlib画散点图 15 | plt.scatter(x, y,marker='x') 16 | plt.show() 17 | # 用Seaborn画散点图 18 | df = pd.DataFrame({'x': x, 'y': y}) 19 | sns.jointplot(x="x", y="y", data=df, kind='scatter'); 20 | plt.show() 21 | 22 | # 折线图 23 | def line_chart(): 24 | # 数据准备 25 | x = [1900, 1901, 1902, 1903, 1904, 1905, 1906, 1907, 1908, 1909, 1910] 26 | y = [265, 323, 136, 220, 305, 350, 419, 450, 560, 720, 830] 27 | # 使用Matplotlib画折线图 28 | plt.plot(x, y) 29 | plt.show() 30 | # 使用Seaborn画折线图 31 | df = pd.DataFrame({'x': x, 'y': y}) 32 | sns.lineplot(x="x", y="y", data=df) 33 | plt.show() 34 | 35 | # 条形图 36 | def bar_chart(): 37 | # 数据准备 38 | x = ['c1', 'c2', 'c3', 'c4'] 39 | y = [15, 18, 5, 26] 40 | # 用Matplotlib画条形图 41 | plt.bar(x, y) 42 | plt.show() 43 | # 用Seaborn画条形图 44 | sns.barplot(x, y) 45 | plt.show() 46 | 47 | # 箱线图 48 | def box_plots(): 49 | # 数据准备 50 | # 生成0-1之间的20*4维度数据 51 | data=np.random.normal(size=(10,4)) 52 | lables = ['A','B','C','D'] 53 | # 用Matplotlib画箱线图 54 | plt.boxplot(data,labels=lables) 55 | plt.show() 56 | # 用Seaborn画箱线图 57 | df = pd.DataFrame(data, columns=lables) 58 | sns.boxplot(data=df) 59 | plt.show() 60 | 61 | # 饼图 62 | def pie_chart(): 63 | # 数据准备 64 | nums = [25, 33, 37] 65 | # 射手adc:法师apc:坦克tk 66 | labels = ['ADC','APC', 'TK'] 67 | # 用Matplotlib画饼图 68 | plt.pie(x = nums, labels=labels) 69 | plt.show() 70 | 71 | # 饼图 72 | def pie_chart2(): 73 | # 数据准备 74 | data = {} 75 | data['ADC'] = 25 76 | data['APC'] = 33 77 | data['TK'] = 37 78 | data = pd.Series(data) 79 | data.plot(kind = "pie", label='heros') 80 | plt.show() 81 | 82 | # 热力图 83 | def thermodynamic(): 84 | # 数据准备 85 | np.random.seed(33) 86 | data = np.random.rand(3, 3) 87 | heatmap = sns.heatmap(data) 88 | plt.show() 89 | 90 | # 蜘蛛图 91 | def spider_chart(): 92 | # 数据准备 93 | labels=np.array([u"推进","KDA",u"生存",u"团战",u"发育",u"输出"]) 94 | stats=[76, 58, 67, 97, 86, 58] 95 | # 画图数据准备,角度、状态值 96 | angles=np.linspace(0, 2*np.pi, len(labels), endpoint=False) 97 | stats=np.concatenate((stats,[stats[0]])) 98 | angles=np.concatenate((angles,[angles[0]])) 99 | # 用Matplotlib画蜘蛛图 100 | fig = plt.figure() 101 | ax = fig.add_subplot(111, polar=True) 102 | ax.plot(angles, stats, 'o-', linewidth=2) 103 | ax.fill(angles, stats, alpha=0.25) 104 | # 设置中文字体 105 | font = FontProperties(fname=r"C:\Windows\Fonts\simhei.ttf", size=14) 106 | ax.set_thetagrids(angles * 180/np.pi, labels, FontProperties=font) 107 | plt.show() 108 | 109 | # 二元变量分布图 110 | def jointplot(): 111 | # 数据准备 112 | flights = sns.load_dataset("flights") 113 | # 用Seaborn画二元变量分布图(散点图,核密度图,Hexbin图) 114 | sns.jointplot(x="year", y="passengers", data=flights, kind='scatter') 115 | sns.jointplot(x="year", y="passengers", data=flights, kind='kde') 116 | sns.jointplot(x="year", y="passengers", data=flights, kind='hex') 117 | plt.show() 118 | 119 | # 成对关系图 120 | def pairplot(): 121 | # 数据准备 122 | flights = sns.load_dataset('flights') 123 | # 用Seaborn画成对关系 124 | sns.pairplot(flights) 125 | plt.show() 126 | 127 | def thermodynamic2(): 128 | flights = sns.load_dataset('flights') 129 | print(flights) 130 | flights=flights.pivot('month','year','passengers') #pivot函数重要 131 | print(flights) 132 | sns.heatmap(flights) #注意这里是直接传入数据集即可,不需要再单独传入x和y了 133 | sns.heatmap(flights,linewidth=.5,annot=True,fmt='d') 134 | plt.show() 135 | 136 | 137 | # 散点图 138 | #scatter() 139 | # 折线图 140 | #line_chart() 141 | # 条形图 142 | bar_chart() 143 | # 箱线图 144 | #box_plots() 145 | # 饼图 146 | #pie_chart() 147 | #pie_chart2() 148 | # 热力图 149 | #thermodynamic() 150 | #thermodynamic2() 151 | # 蜘蛛图 152 | #spider_chart() 153 | # 二元变量分布图 154 | #jointplot() 155 | # 成对关系图 156 | #pairplot() -------------------------------------------------------------------------------- /L3/PythonEDA/subplot1.py: -------------------------------------------------------------------------------- 1 | # subplot使用 2 | import matplotlib.pyplot as plt 3 | import numpy as np 4 | 5 | def f(t): 6 | return np.exp(-t) * np.cos(2 * np.pi * t) 7 | 8 | t1 = np.arange(0, 5, 0.1) 9 | t2 = np.arange(0, 5, 0.02) 10 | 11 | plt.figure() 12 | plt.subplot(221) 13 | plt.plot(t1, f(t1), 'r--') 14 | 15 | plt.subplot(222) 16 | plt.plot(t2, np.cos(2 * np.pi * t2), 'r--') 17 | 18 | plt.subplot(212) 19 | plt.plot([1, 2, 3, 4], [1, 4, 9, 16]) 20 | 21 | plt.show() -------------------------------------------------------------------------------- /L3/PythonEDA/titanic_eda.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import pandas as pd 3 | from sklearn.tree import DecisionTreeClassifier 4 | from sklearn.feature_extraction import DictVectorizer 5 | from sklearn.model_selection import cross_val_score 6 | from sklearn import metrics 7 | from sklearn.feature_extraction import DictVectorizer 8 | import matplotlib.pyplot as plt 9 | import seaborn as sns 10 | 11 | # 数据加载 12 | train_data = pd.read_csv('./train.csv') 13 | test_data = pd.read_csv('./test.csv') 14 | # 使用平均年龄来填充年龄中的nan值 15 | train_data['Age'].fillna(train_data['Age'].mean(), inplace=True) 16 | test_data['Age'].fillna(test_data['Age'].mean(),inplace=True) 17 | # 使用票价的均值填充票价中的nan值 18 | train_data['Fare'].fillna(train_data['Fare'].mean(), inplace=True) 19 | test_data['Fare'].fillna(test_data['Fare'].mean(),inplace=True) 20 | #print(train_data['Embarked'].value_counts()) 21 | # 使用登录最多的港口来填充登录港口的nan值 22 | train_data['Embarked'].fillna('S', inplace=True) 23 | test_data['Embarked'].fillna('S',inplace=True) 24 | # 特征选择 25 | features = ['Pclass', 'Sex', 'Age', 'SibSp', 'Parch', 'Fare', 'Embarked'] 26 | train_features = train_data[features] 27 | 28 | # 显示特征之间的相关系数 29 | plt.figure(figsize=(10, 10)) 30 | plt.title('Pearson Correlation between Features',y=1.05,size=15) 31 | train_data_hot_encoded = train_features.drop('Embarked',1).join(train_features.Embarked.str.get_dummies()) 32 | train_data_hot_encoded = train_data_hot_encoded.drop('Sex',1).join(train_data_hot_encoded.Sex.str.get_dummies()) 33 | # 计算特征之间的Pearson系数,即相似度 34 | sns.heatmap(train_data_hot_encoded.corr(),linewidths=0.1,vmax=1.0, fmt= '.2f', square=True,linecolor='white',annot=True) 35 | plt.show() 36 | 37 | 38 | # 使用饼图来进行Survived取值的可视化 39 | #print(type(train_data["Survived"].value_counts())) 40 | train_data["Survived"].value_counts().plot(kind = "pie", label='Survived') 41 | plt.show() 42 | 43 | # 不同的Pclass,幸存人数(条形图) 44 | sns.barplot(x = 'Pclass', y = "Survived", data = train_data); 45 | plt.show() 46 | 47 | # 不同的Embarked,幸存人数(条形图) 48 | sns.barplot(x = 'Embarked', y = "Survived", data = train_data); 49 | plt.show() 50 | 51 | 52 | # 训练并显示特征向量的重要程度 53 | def train(train_features, train_labels): 54 | # 构造CART决策树 55 | clf = DecisionTreeClassifier() 56 | # 决策树训练 57 | clf.fit(train_features, train_labels) 58 | # 显示特征向量的重要程度 59 | coeffs = clf.feature_importances_ 60 | #print(coeffs) 61 | df_co = pd.DataFrame(coeffs, columns=["importance_"]) 62 | # 下标设置为Feature Name 63 | df_co.index = train_features.columns 64 | #print(df_co.index) 65 | df_co.sort_values("importance_", ascending=True, inplace=True) 66 | df_co.importance_.plot(kind="barh") 67 | 68 | plt.title("Feature Importance") 69 | plt.show() 70 | return clf 71 | 72 | clf = train(train_data_hot_encoded, train_data["Survived"]) 73 | 74 | # 决策树可视化 75 | import pydotplus 76 | from sklearn.externals.six import StringIO 77 | from sklearn.tree import export_graphviz 78 | 79 | def show_tree(clf): 80 | dot_data = StringIO() 81 | export_graphviz(clf, out_file=dot_data) 82 | graph = pydotplus.graph_from_dot_data(dot_data.getvalue()) 83 | graph.write_pdf("titanic_tree.pdf") 84 | 85 | show_tree(clf) -------------------------------------------------------------------------------- /L3/echarts/bar1.html: -------------------------------------------------------------------------------- 1 | 2 | 3 |
4 | 5 |22 | 累计确诊: 23 | ${params.value}人
24 |113 | 累计确诊: 114 | ${params.value}人
115 |200 | 累计确诊: 201 | ${params.value}人
202 |287 | 累计确诊: 288 | ${params.value}人
289 |12 | 13 | Here is the content 14 | 15 |
16 |Python爬虫有很多优秀的工具 17 | BeautifulSoup, 18 | Selenium 19 | and 20 | Scrapy 21 | 他们都可以帮你爬虫想要的页面内容 22 |
23 |...
24 | """ 25 | 26 | # 通过html字符串创建BeautifulSoup对象 27 | soup = BeautifulSoup(html_doc, 'html.parser', from_encoding='utf-8') 28 | 29 | #输出第一个 title 标签 30 | print(soup.title) 31 | #输出第一个 title 标签的标签名称 32 | print(soup.title.name) 33 | #输出第一个 title 标签的包含内容 34 | print(soup.title.string) 35 | #输出第一个 title 标签的父标签的标签名称 36 | print(soup.title.parent.name) 37 | 38 | 39 | #输出第一个p标签 40 | print(soup.p) 41 | #输出第一个 p 标签的 class 属性内容 42 | print(soup.p['class']) 43 | 44 | #输出第一个 a 标签的 href 属性内容 45 | print(soup.a['href']) 46 | # soup的属性操作方法与字典一样,可以被添加,删除或修改. 47 | # 修改第一个 a 标签的href属性为 http://www.baidu.com/ 48 | soup.a['href'] = 'http://www.baidu.com/' 49 | #给第一个 a 标签添加 name 属性 50 | soup.a['name'] = u'百度' 51 | print(soup.a) 52 | #删除第一个 a 标签的 class 属性为 53 | del soup.a['class'] 54 | print(soup.a) 55 | -------------------------------------------------------------------------------- /L4/pagerank/Aliases.csv: -------------------------------------------------------------------------------- 1 | Id,Alias,PersonId 2 | 1,111th congress,1 3 | 2,agna usemb kabul afghanistan,2 4 | 3,ap,3 5 | 4,asuncion,4 6 | 5,alec,5 7 | 6,dupuy alex,6 8 | 7,american beverage association,7 9 | 8,mayock andrew,8 10 | 9,shapiro andrew j,9 11 | 10,shapiroa@state.gov,9 12 | 11,slaughter annmarie,10 13 | 12,slaughter anne marie,10 14 | 13,slaughter annemarie,10 15 | 14,slaughtera@state.gov,10 16 | 15,lake anthony,11 17 | 16,valenzuela arturo a,12 18 | 17,valenzuelaaa@state.gov,12 19 | 18,kimoon ban,13 20 | 19,obama barack,14 21 | 20,president,14 22 | 21,bam@mikulski.senate.gov,15 23 | 22,mikulski bam,15 24 | 23,mikulski bam (mikulski),15 25 | 24,mikulski bam (mitkulski),15 26 | 25,mikulskibam (mikulski),15 27 | 26,betsy.ebeling,16 28 | 27,ebeling betsy,16 29 | 28,betsyebeling,16 30 | 29,betsyebeling1050,16 31 | 30,clinton william j,17 32 | 31,dad,17 33 | 32,biography,18 34 | 33,klehr bonnie,19 35 | 34,brian,20 36 | 35,bstrider,21 37 | 36,strider burns,21 38 | 37,capricia marshall,22 39 | 38,marshall capricia,22 40 | 39,marshall capricia p,22 41 | 40,capriciamarshall,22 42 | 41,capriciamarshall@,22 43 | 42,cmarshall,22 44 | 43,marshallcp@state.gov,22 45 | 44,pascual carlos,23 46 | 45,adler caroline e,24 47 | 46,button case,25 48 | 47,richards cecile,26 49 | 48,eur/rus:weson chad,27 50 | 49,nea/pi/ce:kiamie charles,28 51 | 50,chelsea,29 52 | 51,blair cherie,30 53 | 52,cherie blair,30 54 | 53,cb,30 55 | 54,cherieblair,30 56 | 55,cheryl,31 57 | 56,c:mills cheryl,32 58 | 57,cheryl mills,32 59 | 58,cheryl mills cos,32 60 | 59,mill cheryl,32 61 | 60,mills cherlyl d,32 62 | 61,mills chery d,32 63 | 62,mills cheryl,32 64 | 63,mills cheryl d,32 65 | 64,millscheryl d,32 66 | 65,mills. cherl d,32 67 | 66,mills. cheryl d,32 68 | 67,millscd@state.gov,32 69 | 68,cheryl.mills,32 70 | 69,cheryl.mills@,32 71 | 70,crocker chester a,33 72 | 71,butzgy christopher h,34 73 | 72,edwards christopher,35 74 | 73,eap/j:green christopher,36 75 | 74,hill christopher r (baghdad),37 76 | 75,hillcr@state.gov,37 77 | 76,coleman claire l,38 78 | 77,colin powell,39 79 | 78,council on foreign relations,40 80 | 79,beale courtney a karamer,41 81 | 80,beale courtney a kramer,41 82 | 81,bealeca@state.gov,41 83 | 82,kelly craig a,42 84 | 83,daily sun,43 85 | 84,hyde dana,44 86 | 85,daniel,45 87 | 86,baer daniel,46 88 | 87,baer daniel b,46 89 | 88,baer daniel d,46 90 | 89,baer.daniel,46 91 | 90,daniel.baer,46 92 | 91,inonye daniel,47 93 | 92,schwerin daniel b,48 94 | 93,schwerindb@state.gov,48 95 | 94,brian danielle,49 96 | 95,axelrod david m,50 97 | 96,axelrod_d,50 98 | 97,brock david,51 99 | 98,david_garten@lautenberg.senate.gov,52 100 | 99,garten david (lautenberg),52 101 | 100,inl:johnson david t,53 102 | 101,d.gunners2010,54 103 | 102,department of state,55 104 | 103,chollet derek h,56 105 | 104,cholletdh@state.gov,56 106 | 105,reynolds diane,57 107 | 106,donald,58 108 | 107,band doug,59 109 | 108,hattaway doug,60 110 | 109,doug,60 111 | 110,pelton e,61 112 | 111,politico drew elizabeth,62 113 | 112,tauscher ellen o,63 114 | 113,tauschereo@state.gov,63 115 | 114,faleomavaega eni f,64 116 | 115,woodard eric w,65 117 | 116,brimmer esther d,66 118 | 117,brimmere@state.gov,66 119 | 118,finca international,67 120 | 119,foreign affairs magazine,68 121 | 120,prm/mce:wills g,69 122 | 121,lou de bac g/tip,70 123 | 122,prm/mce:willsg,71 124 | 123,mitchell george,72 125 | 124,glantz gina,73 126 | 125,glantz.,73 127 | 126,govenman etazini,74 128 | 127,haiti,75 129 | 128,duksoo han,76 130 | 129,koh harold hongju,77 131 | 130,kohhh@state.gov,77 132 | 131,heintz,78 133 | 132,hill,79 134 | 133,hillary clinton,80 135 | 134,clinton,80 136 | 135,clinton hillary r,80 137 | 136,clinton hillary,80 138 | 137,clinton hillary rodham,80 139 | 138,h,80 140 | 139,h2,80 141 | 140,hrc,80 142 | 141,hillary,80 143 | 142,hillary rodham clinton,80 144 | 143,madam secretary,80 145 | 144,secretary,80 146 | 145,secretary clinton,80 147 | 146,secretary of state,80 148 | 147,hr15@mycingular.blackberry.net,80 149 | 148,hrod17@clintonemail.com,80 150 | 149,the honorable hillary rodham clinton secretary of state,80 151 | 150,hdr22@clintonemail.com,80 152 | 151,abedin huma,81 153 | 152,abedinh@state.gov,81 154 | 153,abein huma,81 155 | 154,abendin huma,81 156 | 155,adedin huma,81 157 | 156,huma abedin,81 158 | 157,huma@clintonemail.com,81 159 | 158,abedin@state.gov,81 160 | 159,abendinh@state.gov,81 161 | 160,adedinh@state.gov,81 162 | 161,kelly ian,82 163 | 162,ds/pa:finkle j,83 164 | 163,jama,84 165 | 164,newmyer jackie,85 166 | 165,newmyer,85 167 | 166,lew,86 168 | 167,lew jacob,86 169 | 168,lew jacob j,86 170 | 169,jacobjlew,86 171 | 170,lewjj@state.gov,86 172 | 171,sullivan jj@state.gov,87 173 | 172,sullivan jacbo j,87 174 | 173,sullivan jack,87 175 | 174,sullivan jacob,87 176 | 175,sullivan jacob h,87 177 | 176,sullivan jacob j,87 178 | 177,sullivan jake,87 179 | 178,sullivan jake j,87 180 | 179,sullivanjj@state.gov,87 181 | 180,jake. sullivan,87 182 | 181,jake.sullivan,87 183 | 182,jake.sullivan@,87 184 | 183,sulllivanjj@state.gov,87 185 | 184,sullivanil@state.gov,87 186 | 185,sullivann@state.gov.,87 187 | 186,mcgovern james p,88 188 | 187,smith james e,89 189 | 188,steinberg james b,90 190 | 189,steinbergjb@state.gov,90 191 | 190,steinbertjb@state.gov,90 192 | 191,jpiercy,91 193 | 192,jacobs janice l,92 194 | 193,farrow jeffrey,93 195 | 194,farrow jeffrey l,93 196 | 195,jfarrow,93 197 | 196,feltman jeffrey d,94 198 | 197,feltmanjd@state.gov,94 199 | 198,robinson jennifer,95 200 | 199,hoagland jim,96 201 | 200,kennedy jim,97 202 | 201,laszczych joanne,98 203 | 202,olver john,99 204 | 203,podesta john,100 205 | 204,jpodesta,100 206 | 205,carson johnnie,101 207 | 206,carsonj@state.gov,101 208 | 207,jonathan prince,102 209 | 208,daniel joshua j,103 210 | 209,kieffer judith,104 211 | 210,mchale judith,105 212 | 211,mchale judith a,105 213 | 212,mchaleja@state.gov,105 214 | 213,cooper justin,106 215 | 214,kpk,107 216 | 215,kabul lgf request,108 217 | 216,eikenberry karl w,109 218 | 217,kellyc@state.gov,110 219 | 218,conrad kent,111 220 | 219,baldersonkm@state.gov,112 221 | 220,balderston kris m,112 222 | 221,balderstone kris m,112 223 | 222,balderstonkm@state.gov,112 224 | 223,campbell kurt m,113 225 | 224,campbellkm@state.gov,113 226 | 225,eap/k:rosenberger l,114 227 | 226,lgraham,115 228 | 227,jilloty lauren c,116 229 | 228,jiloty cheryl d,116 230 | 229,jiloty lauren,116 231 | 230,jiloty lauren c,116 232 | 231,jiloty lauren cd,116 233 | 232,jiloty. lauren c,116 234 | 233,jilotylc@state.gov,116 235 | 234,jjiloty lauren c,116 236 | 235,jjilotylc@state.gov,116 237 | 236,lauren jiloty,116 238 | 237,rubiner laurie,117 239 | 238,brown lee r,118 240 | 239,feinstein lee,119 241 | 240,dewan linda l,120 242 | 241,captuol,121 243 | 242,caputol,121 244 | 243,imuscatine,122 245 | 244,lissa muscatine,122 246 | 245,muscantine lissa,122 247 | 246,muscatine lissa,122 248 | 247,muscatine lisa,122 249 | 248,muscatinelissa,122 250 | 249,muscatine. lissa,122 251 | 250,muscatinel@state.gov,122 252 | 251,lmuscatine,122 253 | 252,muscantinel@state.gov,122 254 | 253,quam lois,123 255 | 254,valmmorolj@state.gov,124 256 | 255,valmoro lona,124 257 | 256,valmoro lona j,124 258 | 257,valmoroli@state.gov,124 259 | 258,valmorolj@state.gov,124 260 | 259,walmoro lona j,124 261 | 260,long term strategy group,125 262 | 261,diamond louise,126 263 | 262,cue lourdes c,127 264 | 263,g/tip:cdebaca luis,128 265 | 264,luzzatto,129 266 | 265,lynn,130 267 | 266,albright m k,131 268 | 267,albright madeleine,132 269 | 268,williamsbarrett,133 270 | 269,calivis maria,134 271 | 270,otero maria,135 272 | 271,oterom2@state.gov,135 273 | 272,af/pdpa:scott marianne,136 274 | 273,mark,137 275 | 274,markjpenn,138 276 | 275,mtorrey,139 277 | 276,eca:pally maura,140 278 | 277,pally maura m,140 279 | 278,baucus max,141 280 | 279,rooney megan,142 281 | 280,pverveer,143 282 | 281,vereer melanne s,143 283 | 282,verveer melanne e,143 284 | 283,verveer melanne s,143 285 | 284,verveerms@state.gov,143 286 | 285,fuchs michael h,144 287 | 286,fuchsmh@state.gov,144 288 | 287,posner michael h,145 289 | 288,posnermh@state.gov,145 290 | 289,bond michele t,146 291 | 290,michele.flournoy,147 292 | 291,rodriguez miguel e,148 293 | 292,mike,149 294 | 293,hanley monica r,150 295 | 294,hanleymr@state.gov,150 296 | 295,nhla,151 297 | 296,nancy,152 298 | 297,neera tanden,153 299 | 298,tandem neera,153 300 | 299,tanden neera,153 301 | 300,ntanden,153 302 | 301,new york times,154 303 | 302,norman nicholas,155 304 | 303,toiv nora f,156 305 | 304,toivnf@state.gov,156 306 | 305,tov nora f,157 307 | 306,opinion research,158 308 | 307,sanchez oscar arias,159 309 | 308,flores oscar,160 310 | 309,lores oscar,161 311 | 310,pvervee,162 312 | 311,kennedy patrick f,163 313 | 312,m:kennedy patrick f,163 314 | 313,collier paul,164 315 | 314,jones paul w,165 316 | 315,nea/ipa:knopf payton,166 317 | 316,robinson peter,167 318 | 317,crowley philip,168 319 | 318,crowley philip j,168 320 | 319,gordon philip h,169 321 | 320,gordon. philip h,169 322 | 321,gordonph@state.gov,169 323 | 322,pir,170 324 | 323,reines philipe i,170 325 | 324,reines philippe,170 326 | 325,reines philippe i,170 327 | 326,reines phillippe i,170 328 | 327,rines philippe i,170 329 | 328,preines,170 330 | 329,reines@state.gov,170 331 | 330,reinesp@state.gov,170 332 | 331,crowley phillip j,171 333 | 332,crowleypj@state.gov,171 334 | 333,campbell piper,172 335 | 334,prime minister,173 336 | 335,shah rajiv,174 337 | 336,rshah,174 338 | 337,recos,175 339 | 338,philippe reines,176 340 | 339,preval rene,177 341 | 340,lewis reta jo,178 342 | 341,holbrookerc@state.gov,179 343 | 342,verma richard,180 344 | 343,verma richard r,180 345 | 344,vermarr@state.gov,180 346 | 345,rsloan,181 347 | 346,blake robert o,182 348 | 347,danford robert a,183 349 | 348,hormats robert d,184 350 | 349,hormatsrd@state.gov,184 351 | 350,russo robert v,185 352 | 351,russo robert v,185 353 | 352,russorv@state.gov,185 354 | 353,rodriguezme@state.gov,186 355 | 354,howe rosemarie,187 356 | 355,rosemarie.howe,187 357 | 356,zaidi s akbar,188 358 | 357,s/srgia,189 359 | 358,state,190 360 | 359,berger samuel r,191 361 | 360,sberger,191 362 | 361,sandy,191 363 | 362,berger samuel,192 364 | 363,sgration,193 365 | 364,blumenthal sidney,194 366 | 365,sid,194 367 | 366,sbwhoeop,194 368 | 367,sbwhoeop@,194 369 | 368,sbwhoop,194 370 | 369,sir,195 371 | 370,talbott strobe,196 372 | 371,stalbott,196 373 | 372,stallbott,196 374 | 373,rice susan e,197 375 | 374,rice susan e.,197 376 | 375,grantham suzanne l,198 377 | 376,terry.duffy,199 378 | 377,donilon thomas e,200 379 | 378,nides thomas r,201 380 | 379,nidestr@state.gov,201 381 | 380,shannon thomas a,202 382 | 381,wha: shannon thomas a,202 383 | 382,shannonta@state.gov,202 384 | 383,tflourno,203 385 | 384,stern todd,204 386 | 385,stern todd d,204 387 | 386,stern todd d (s/secc),204 388 | 387,sterntd@state.gov,204 389 | 388,tillemann tomicah,205 390 | 389,tillemann tomicah s,205 391 | 390,tillemannts@state.gov,205 392 | 391,elbegdori tsakina,206 393 | 392,u.s. global leadership coalition,207 394 | 393,estados unidos da america,208 395 | 394,estados unidos de norteamerica,208 396 | 395,etatunis damerique,208 397 | 396,etatsunis damerique,208 398 | 397,united states of america,208 399 | 398,nuland victoria j,209 400 | 399,ebeling voda,210 401 | 400,ebelling voda,210 402 | 401,whadp,211 403 | 402,washington post,212 404 | 403,sherman wendy,213 405 | 404,sherman wendy r,213 406 | 405,wsherman,213 407 | 406,ilic werner x,214 408 | 407,white house,215 409 | 408,burns william j,216 410 | 409,burnswj@state.gov,216 411 | 410,wburns,216 412 | 411,wburns66,216 413 | 412,wburns66@,216 414 | 413,hubbard william,217 415 | 414,iscol zachary,218 416 | 415,aclb,219 417 | 416,alcb,220 418 | 417,l,221 419 | 418,latimes.com,222 420 | 419,mh.interiors,223 421 | 420,mhcaleja@state.gov,224 422 | 421,postmaster@state.gov,225 423 | 422,rooneym@state.gov,226 424 | 423,rrh.interiors,227 425 | 424,b6,228 426 | 425,sullivan jacob 1,87 427 | 426,mills cheryl 0,32 428 | 427,nuiand victoria j,229 429 | 428,reines philippe f,230 430 | 429,sullivan jacob j nuland victoria 1,231 431 | 430,sullivan jacobi,87 432 | 431,russorv@stategov,232 433 | 432,sullivan jacob .1 ‹sullivann@state.gov>,87 434 | 433,sullivan jacob i,87 435 | 434,burns strider,21 436 | 435,mcdonough denis r. (mailto: b6,87 437 | 436,sullivan jacob .1,87 438 | 437,abeclin huma,81 439 | 438,oscar flores,233 440 | 439,milts cheryl 0,32 441 | 440,sidney blumenthal,194 442 | 441,su ii iva gll@state.gov.,234 443 | 442,annemarie slaughter,10 444 | 443,sullivahu@state.gov,235 445 | 444,barton rick (cso),32 446 | 445,mills cheryl id,32 447 | 446,russoiv@state.gov,236 448 | 447,miliscd@stategov,237 449 | 448,sullivan jacob 3,87 450 | 449,abedin hume,81 451 | 450,hanky monica r,150 452 | 451,h ehrod17@clintonemailcom>,80 453 | 452,abedinh@stategov,238 454 | 453,lvlills cheryl d,32 455 | 454,tomlinson christina b3 cia pers/org,32 456 | 455,h ‹hrod17@clintonernail.com>,80 457 | 456,tanleyrnr@state.gov,239 458 | 457,hanleymr@stategov,240 459 | 458,hanley monica r .,150 460 | 459,sullivan jacob.),87 461 | 460,hemmen chris,32 462 | 461,rnillscd@state.gov,241 463 | 462,sullivan jacob .‹suilivan1j©st.ate.gov>,87 464 | 463,rnillscd@stategov.,242 465 | 464,sullivanjj@state.gov.,243 466 | 465,mills chey 0,32 467 | 466,reiriesp@state.gov,244 468 | 467,sidney blumenthal b6,194 469 | 468,suilivanii@stategov,245 470 | 469,sullivanj@state.gov,246 471 | 470,suilivanij@state.gok,247 472 | 471,hanleymr@state.gov.,248 473 | 472,reines philippe t,249 474 | 473,hanleyrnr@state.gov,250 475 | 474,h 1,80 476 | 475,hanieymr@state.gov,251 477 | 476,mills. cheryl 0,32 478 | 477,hanleymrgastategov,252 479 | 478,sulliyanfostate.gott,253 480 | 479,aliilscd@state.gov,254 481 | 480,sullivan jacob j esullivanii@stategov>,87 482 | 481,mills cheryl d :miliscd@stategov>,32 483 | 482,mills cheryl d [mailto:millscd@stategovi,80 484 | 483,mills cheryl d [mailto:millscd©state.gov],80 485 | 484,valmoro lona j ‹valmorou c stategov>,124 486 | 485,valnnoro lona j,124 487 | 486,mills cheryl 0emillscd@state.gov>,32 488 | 487,nidesth@stategoy,255 489 | 488,bums william j,216 490 | 489,h ‹hrod17@clintonemailcom>,80 491 | 490,millscd@state.gov.,256 492 | 491,jacob j sullivan,257 493 | 492,reines philippe),170 494 | 493,sullivan jacobj,87 495 | 494,jake sullivan _,87 496 | 495,sullivanji@state.gov,258 497 | 496,stevens john c,87 498 | 497,sullivanjj@state.golt,259 499 | 498,millscd@state.goy,260 500 | 499,nulandyj@state.goy,261 501 | 500,sulliyanij@state.goy,262 502 | 501,rnillscd@state.govs,263 503 | 502,nuland victoria 3,209 504 | 503,jake.sullivar,264 505 | 504,jilotylc@state.gov.,265 506 | 505,jake.sulliyan,266 507 | 506,michele.fl,267 508 | 507,cheryimills millscd@state.gov,268 509 | 508,jake.sulliva,269 510 | 509,valmoroll@state.gov.,270 511 | 510,habedin b6,271 512 | 511,valmorou@state.gov,272 513 | 512,filotylc@state.gov,273 514 | 513,habedin,274 515 | 514,cheryl.mills jake.sullivan,275 516 | 515,abedinh@state.gov.,276 517 | 516,millscd@state.aov,277 518 | 517,illotylc@state.gov,278 519 | 518,millscd@state ov,279 520 | 519,habedin(,280 521 | 520,doug band,281 522 | 521,sullivanij@state.gov.,282 523 | 522,preines@,283 524 | 523,abedinh state ov,284 525 | 524,cheryl.mills abedinh@state.gov,285 526 | 525,cheryl.mill,286 527 | 526,briar,287 528 | 527,abedinh@state.goy,288 529 | 528,a bedinh@state.gov,289 530 | 529,preine,290 531 | 530,valmorol.1@state.gov,291 532 | 531,sullivanij@state.gov,292 533 | 532,preines sullivanjj@state.gov b6,293 534 | 533,valmorolj@state.gov.,294 535 | 534,leltmanjd@state.gov,295 536 | 535,ullivanjj@state.gov,296 537 | 536,sta i bott,297 538 | 537,sullivanjj©state ov,298 539 | 538,millscd@state.00v.,299 540 | 539,steinbergib@state.gov,300 541 | 540,cheryl.millf.,301 542 | 541,mhcaleja@state.gove,302 543 | 542,cheryl.millsi,303 544 | 543,s abedinh@state.gov,304 545 | 544,valmorou@state.gov.,305 546 | 545,chetyl.mills sullivanij@state.gov,306 547 | 546,sullivanu@state.gov.,307 548 | 547,muscatinel@state.goy,308 549 | 548,preines sullivanjj@state.gov,309 550 | 549,axelrod_,310 551 | 550,wburns6,311 552 | 551,valmorol1@state.gov.,312 553 | 552,steinberg1b@state.gov,313 554 | 553,. huma abedin,314 555 | 554,abedinh@stategovl,315 556 | 555,valmorou©state.gov,316 557 | 556,reinesp@state.goy,317 558 | 557,sulliyanjj@state.goy,318 559 | 558,emillscd@state.gov,319 560 | 559,cheryl.mill sullivanjj@state.gov,320 561 | 560,cheryl.mills millscd@state.gov.,321 562 | 561,preines verveerms@state.gov,322 563 | 562,jilotylc@state.goy,323 564 | 563,val moro u@state.gov,324 565 | 564,a bed inh@state.gov,325 566 | 565,mot lc@state.gov,326 567 | 566,jilot lc@state. ov,327 568 | 567,.1ilotylc@state.gov.,328 569 | 568,iilotylc@state.gov.,329 570 | 569,jilotylc©state.gov.,330 571 | 570,cheryl.mills sullivanjj@state.gov,331 572 | 571,iewjj@state.gov,332 573 | 572,cheryl.mills _,333 574 | 573,sulliva njj@state.g ov,334 575 | 574,pverveel,335 576 | 575,. h,80 577 | 576,preines sullivanij@state.gov.,336 578 | 577,sta ibott,337 579 | 578,balderstonkm@state.gov.,338 580 | 579,11,80 581 | 580,rossdb@state.gov,339 582 | 581,bowens,340 583 | 582,jacobjlev,341 584 | 583,yeryeerms@state.goy,342 585 | 584,preines b6,343 586 | 585,valmorou@state.gove,344 587 | 586,abedinh@state.gove,345 588 | 587,campbelikm@state.gov,346 589 | 588,nancy millscd@state.gov b6,347 590 | 589,valmorou©state.gov.,348 591 | 590,andy manatos,80 592 | 591,vanbuskirk michael 1,349 593 | 592,huma abed in,350 594 | 593,caputo,351 595 | 594,.gordonph@state.gov.,352 596 | 595,preines sullivanjj@state.gov,353 597 | 596,cheryl.mills sullivanjj@state.gov b6,354 598 | 597,cheryl.mills( sullivanjj@state.gov,355 599 | 598,huma abedin b6,356 600 | 599,mtorrey1,357 601 | 600,glantz.gina,358 602 | 601,millscd@tate.gov,359 603 | 602,cheryl.mills huma abedin,360 604 | 603,cheryl.mills millscd@state.gov,361 605 | 604,campbelikm©state.gov,362 606 | 605,jacobjlew vermarr@state.gov,363 607 | 606,sullivanjj@state.gov b6,364 608 | 607,iilotylc@state.gov,365 609 | 608,rnillscd@state.gov.,366 610 | 609,sullivanjj@state.govr,367 611 | 610,lewij@state.gov,368 612 | 611,williamsbarrett millscd@state.gov.,369 613 | 612,abedinh©state.gov,370 614 | 613,s sullivanjj@state.gov,371 615 | 614,filotylc@state.gov.,372 616 | 615,..lilotylc@state.gov.,373 617 | 616,baer.danie,374 618 | 617,reines philippe i (pace),170 619 | 618,mills.cheryl d,32 620 | 619,millscd@state.ov,375 621 | 620,cmills,376 622 | 621,newmyer on behalf of jackie newmyer,85 623 | 622,cheryl mills[,32 624 | 623,otero mildred (clinton),377 625 | 624,iewij@state.gov,378 626 | 625,abed in huma,81 627 | 626,jake sullivan,87 628 | 627,preines huma abedin,379 629 | 628,h.,80 630 | 629,ieltmanjd@state.gov.,380 631 | 630,valmoroll@state.gov,381 632 | 631,h [mailto:hdr22@clintonemail.com],32 633 | 632,ben kobren,80 634 | 633,todd stern,382 635 | 634,jonespw2@state.gov.,383 636 | 635,daniel.baer@,384 637 | 636,jonespw2@state.gov,385 638 | 637,cmarshal,386 639 | 638,sbwhoeop b6,387 640 | 639,jackie newmyer,85 641 | 640,pverveer b6,143 642 | 641,cheryl.mill abedin huma,388 643 | 642,lee feinstein,119 644 | 643,cheryl.mills b6,32 645 | 644,strobe talbott,196 646 | 645,berger samuel r.,191 647 | 646,abedin hurna,81 648 | 647,abed in hu ma,81 649 | 648,jeffrey l farrow,93 650 | 649,pvervee _,143 651 | 650,jeffrey farrow,93 652 | 651,eizenstat stuart,32 653 | 652,valmoro lona j walmorou@state.goy>,124 654 | 653,lew jacob i,389 655 | 654,steinberg james,390 656 | 655,rosemarie.howe h,391 657 | 656,abedin hu ma,81 658 | 657,sbwhoeor,194 659 | 658,esullivanjj@state.gov,392 660 | 659,cheryl.miils@ millscd@state.gov,393 661 | 660,millscd©state.gov,394 662 | 661,justin cooper,395 663 | 662,strobe talbott b6,196 664 | 663,valmoro lona 1,124 665 | 664,axelrod david m.,50 666 | 665,john podesta,100 667 | 666,eabedinh@state.gov,396 668 | 667,abedinh@state.govr,397 669 | 668,cheryl.mills@ millscd@state.gov.,398 670 | 669,williamsbarre0,399 671 | 670,lissa muscatine b6,122 672 | 671,minyon moore .,32 673 | 672,capricia penavic marshall,400 674 | 673,maggie williams,401 675 | 674,rosemarie howe,402 676 | 675,valmoro lona j walmorou@state.gov>,124 677 | 676,lewjj@state.gov.,403 678 | 677,wendy sherman,213 679 | 678,jim kennedy,404 680 | 679,jake.sullivan(,87 681 | 680,choilet derek h,56 682 | 681,cdm,405 683 | 682,axelrod david m. b6,50 684 | 683,wburns66 b6 i,216 685 | 684,donilon thomas e.,200 686 | 685,jacob lew,86 687 | 686,lou de baca g/tip,70 688 | 687,* abedin huma,81 689 | 688,pj,406 690 | 689,crowleyp state ov. preines,407 691 | 690,hume abedin,408 692 | 691,valmoro lona1,124 693 | 692,marciel scot a,409 694 | 693,0 mills cheryl d,32 695 | 694,.,410 696 | 695,smith daniel b,411 697 | 696,il,412 698 | 697,cheryl.mill1,413 699 | 698,jpier4,414 700 | 699,berger samuel r. b6,191 701 | 700,sbwhoeopc,194 702 | 701,sullivanii@state.govr,415 703 | 702,holbrooke richard c,416 704 | 703,verveer melanne s werveerms@state.gov>,143 705 | 704,cheryl.millsc,32 706 | 705,m.k albright,131 707 | 706,tauscher ellen 0,63 708 | 707,jake sullivar,87 709 | 708,gina glantz b6,73 710 | 709,voda ebeling,210 711 | 710,abedin huma eabedinh@state.gov>,81 712 | 711,lew jacob 1,86 713 | 712,sbwhoeop b61,194 714 | 713,info@mailva.evite.com,81 715 | 714,luzzatt,417 716 | 715,daniel baer,46 717 | 716,reta jo lewis special representative global intergovernmental affairs,178 718 | 717,daniel.bae,418 719 | 718,valmorou state. ov,419 720 | 719,vanbuskirk michael .3,420 721 | 720,edwards christopher (jakarta/pro),421 722 | 721,merten kenneth h,32 723 | 722,preine h,422 724 | 723,preines h,423 725 | 724,shah rajiv (aid/a),174 726 | 725,sbwhoeor.,194 727 | 726,williamsbarret,424 728 | 727,sullivanii@state.gov,425 729 | 728,hannah richert,426 730 | 729,ross alec j,427 731 | 730,sbwhoecip,194 732 | 731,sbwhoeopi,428 733 | 732,. sullivan jacobi,87 734 | 733,betsyebelin,16 735 | 734,sbwhoeor b6,194 736 | 735,doua band,429 737 | 736,daniel meron,180 738 | 737,adams david s,430 739 | 738,doug hattaway,431 740 | 739,mills; cheryl d,32 741 | 740,colby cooper,81 742 | 741,kritenbrink daniel j,432 743 | 742,cheryl mills b6,32 744 | 743,abdinh@state.gov,433 745 | 744,fareed zein,32 746 | 745,sawsanhassan1,434 747 | 746,stanton katie,435 748 | 747,h b6,436 749 | 748,blake robert 0,182 750 | 749,irussorv@state.gov,437 751 | 750,valmoro lona .1,438 752 | 751,valmorol1@state.gov,439 753 | 752,valmorou@state.goy,440 754 | 753,valmdrou@state.gov,441 755 | 754,dimartino kitty,442 756 | 755,waxman sharon l,443 757 | 756,jcooper,444 758 | 757,lgraham doug band,445 759 | 758,jai:e sullivan,446 760 | 759,crowley philip 1,168 761 | 760,crowley philip1,168 762 | 761,sullivanii@state.gov.,447 763 | 762,hume abed in,448 764 | 763,sbvvhoeop,194 765 | 764,berniertoth michelle,449 766 | 765,coley theodore r,450 767 | 766,c — cheryl mills,32 768 | 767,rice susan e (usun),197 769 | 768,bonnie klehr,451 770 | 769,ramamurthy,452 771 | 770,h i,453 772 | 771,eichensehr kristen e,454 773 | 772,hooke kathleen h,455 774 | 773,johnson clifton m,456 775 | 774,tones susan,457 776 | 775,townley stephen g,458 777 | 776,torres susan,459 778 | 777,abedinhuma,81 779 | 778,evergreen,460 780 | 779,lona valmoro,461 781 | 780,fl,462 782 | 781,elaine weiss,463 783 | 782,kevin m. okeefe,464 784 | 783,michael m. conway,465 785 | 784,lew jacobi,466 786 | 785,hai,467 787 | 786,holbrooke richard,87 788 | 787,harris jennifer m,468 789 | 788,p1r,170 790 | 789,pcharron,469 791 | 790,zachary iscol,218 792 | 791,jiloty lauren c dilotylc@state.gov>,116 793 | 792,preine5,470 794 | 793,samuel berger,192 795 | 794,laurenjiloty jilotylc@state.gov,471 796 | 795,laurenjiloty,472 797 | 796,ian1evqr@state.gov,473 798 | 797,jake sullivan b6,87 799 | 798,woodardew@state.gov,474 800 | 799,sullivan jacob),87 801 | 800,rshal,475 802 | 801,megan rooney,142 803 | 802,millscd@state. ov,476 804 | 803,jacobjle iewjj@state.gov b6,477 805 | 804,declan kelly,478 806 | 805,jake.sullivan b6,87 807 | 806,steinberg1b@state.gov.,479 808 | 807,jake.sullivan h,480 809 | 808,ogordonph@state.gov.,481 810 | 809,crowley @state. ov,482 811 | 810,sbwhoeo,483 812 | 811,steven.everts,80 813 | 812,sullivanij@state.gove,484 814 | 813,p rei n es,485 815 | 814,imuscatine huma abedin b6,486 816 | 815,tillemannts@state.gov.,487 817 | 816,sullivan jacob1,87 818 | 817,markjpenr,488 819 | 818,dad mom,489 820 | 819,vaimorou@state.gov,490 821 | 820,prein6,491 822 | 821,sullivanjj@state.goy,492 823 | 822,. vermarr@state.gov,493 824 | 823,sullivanjj@siate.gov,494 825 | 824,rina amiri,87 826 | 825,feldman daniel f,495 827 | 826,ruggiero frank 3,496 828 | 827,singh vikram,497 829 | 828,.filotylc@state.gov.,498 830 | 829,h hrod17@clintonemail.com>,80 831 | 830,bstrider mmoore,499 832 | 831,.1ilotylc@state.gov,500 833 | 832,abed inh@state.gov.,501 834 | 833,burns strider b6,21 835 | 834,jon davidson,502 836 | 835,aclb b6,220 837 | 836,iynn,503 838 | 837,a bed in h@state.gov,504 839 | 838,sullivan jacob.,87 840 | 839,scott gration,87 841 | 840,s_specialassistants,505 842 | 841,jake.sullivar preines,506 843 | 842,david brock,51 844 | 843,lanny j. davis,72 845 | 844,capriciamarshall huma abedin,507 846 | 845,jilotylc©state.gov,508 847 | 846,marshallcp@state.goy,509 848 | 847,sid blumenthal,510 849 | 848,monica.hanle,511 850 | 849,hanle mr@state.gov,512 851 | 850,ha nleym r@state.gov,513 852 | -------------------------------------------------------------------------------- /L4/pagerank/Persons.csv: -------------------------------------------------------------------------------- 1 | Id,Name 2 | 1,111th Congress 3 | 2,AGNA USEMB Kabul Afghanistan 4 | 3,AP 5 | 4,ASUNCION 6 | 5,Alec 7 | 6,Alex Dupuy 8 | 7,American Beverage Association 9 | 8,Andrew Mayock 10 | 9,Andrew Shapiro 11 | 10,Anne-Marie Slaughter 12 | 11,Anthony Lake 13 | 12,Arturo Valenzuela 14 | 13,Ban Ki-moon 15 | 14,Barack Obama 16 | 15,Barbara Mikulski 17 | 16,Betsy Ebeling 18 | 17,Bill Clinton 19 | 18,Biography 20 | 19,Bonnie Klehr 21 | 20,Brian Greenspun 22 | 21,Burns Strider 23 | 22,Capricia Marshall 24 | 23,Carlos Pascual 25 | 24,Caroline Adler 26 | 25,Case Button 27 | 26,Cecile Richards 28 | 27,Chad Weston 29 | 28,Charles Kiamie 30 | 29,Chelsea Clinton 31 | 30,Cherie Blair 32 | 31,Cheryl 33 | 32,Cheryl Mills 34 | 33,Chester Crocker 35 | 34,Christopher Butzgy 36 | 35,Christopher Edwards 37 | 36,Christopher Green 38 | 37,Christopher Hill 39 | 38,Claire Coleman 40 | 39,Colin Powell 41 | 40,Council on Foreign Relations 42 | 41,Courtney Beale 43 | 42,Craig Kelly 44 | 43,Daily Sun 45 | 44,Dana Hyde 46 | 45,Daniel 47 | 46,Daniel Baer 48 | 47,Daniel Inonye 49 | 48,Daniel Schwerin 50 | 49,Danielle Brian 51 | 50,David Axelrod 52 | 51,David Brock 53 | 52,David Garten 54 | 53,David Johnson 55 | 54,David Miliband 56 | 55,Department of State 57 | 56,Derek Chollet 58 | 57,Diane Reynolds 59 | 58,Donald 60 | 59,Doug Band 61 | 60,Doug Hattaway 62 | 61,E. Pelton 63 | 62,Elizabeth Drew 64 | 63,Ellen Tauscher 65 | 64,Eni Faleomavaega 66 | 65,Eric Woodard 67 | 66,Esther Brimmer 68 | 67,FINCA International 69 | 68,Foreign Affairs Magazine 70 | 69,G Wills 71 | 70,G. Lou de Bac 72 | 71,G. Wills 73 | 72,George Mitchell 74 | 73,Gina Glantz 75 | 74,Govenman Etazini 76 | 75,Haiti 77 | 76,Han Duk-soo 78 | 77,Harold Hongju Koh 79 | 78,Heintz 80 | 79,Hill 81 | 80,Hillary Clinton 82 | 81,Huma Abedin 83 | 82,Ian Kelly 84 | 83,J. Finkle 85 | 84,JAMA 86 | 85,Jackie Newmyer 87 | 86,Jacob Lew 88 | 87,Jake Sullivan 89 | 88,James McGovern 90 | 89,James Smith 91 | 90,James Steinberg 92 | 91,Jan Piercy 93 | 92,Janice Jacobs 94 | 93,Jeffrey Farrow 95 | 94,Jeffrey Feltman 96 | 95,Jennifer Robinson 97 | 96,Jim Hoagland 98 | 97,Jim Kennedy 99 | 98,Joanne Laszczych 100 | 99,John Olver 101 | 100,John Podesta 102 | 101,Johnnie Carson 103 | 102,Jonathan Prince 104 | 103,Joshua Daniel 105 | 104,Judith Kieffer 106 | 105,Judith McHale 107 | 106,Justin Cooper 108 | 107,KPK 109 | 108,Kabul LGF Request 110 | 109,Karl Eikenberry 111 | 110,KellyC@state.gov 112 | 111,Kent Conrad 113 | 112,Kris Balderston 114 | 113,Kurt Campbell 115 | 114,L. Rosenberger 116 | 115,LGraham 117 | 116,Lauren Jiloty 118 | 117,Laurie Rubiner 119 | 118,Lee Brown 120 | 119,Lee Feinstein 121 | 120,Linda Dewan 122 | 121,Lisa Caputo 123 | 122,Lissa Muscatine 124 | 123,Lois Quam 125 | 124,Lona Valmoro 126 | 125,Long Term Strategy Group 127 | 126,Louise Diamond 128 | 127,Lourdes Cue 129 | 128,Luis CdeBaca 130 | 129,Luzzatto 131 | 130,Lynn Forester de Rothschild 132 | 131,M. Albright 133 | 132,Madeleine Albright 134 | 133,Maggie Williams 135 | 134,Maria Calivis 136 | 135,Maria Otero 137 | 136,Marianne Scott 138 | 137,Mark Hyman 139 | 138,Mark Penn 140 | 139,Marty Torrey 141 | 140,Maura Pally 142 | 141,Max Baucus 143 | 142,Megan Rooney 144 | 143,Melanne Verveer 145 | 144,Michael Fuchs 146 | 145,Michael Posner 147 | 146,Michele Bond 148 | 147,Michele Flournoy 149 | 148,Miguel Rodriguez 150 | 149,Mike 151 | 150,Monica Hanley 152 | 151,NHLA 153 | 152,Nancy Parrish 154 | 153,Neera Tanden 155 | 154,New York Times 156 | 155,Nicholas Norman 157 | 156,Nora Toiv 158 | 157,Nora Tov 159 | 158,Opinion Research 160 | 159,Oscar Arias Sanchez 161 | 160,Oscar Flores 162 | 161,Oscar Lores 163 | 162,PVervee 164 | 163,Patrick Kennedy 165 | 164,Paul Collier 166 | 165,Paul Jones 167 | 166,Payton Knopf 168 | 167,Peter Robinson 169 | 168,Philip Crowley 170 | 169,Philip Gordon 171 | 170,Philippe Reines 172 | 171,Phillip Crowley 173 | 172,Piper Campbell 174 | 173,Prime Minister 175 | 174,Rajiv Shah 176 | 175,Recos 177 | 176,Reines Philippe 178 | 177,Rene Preval 179 | 178,Reta Jo Lewis 180 | 179,Richard Holbrooke 181 | 180,Richard Verma 182 | 181,Rick Sloan 183 | 182,Robert Blake 184 | 183,Robert Danford 185 | 184,Robert Hormats 186 | 185,Robert Russo 187 | 186,Rodriguez Miguel 188 | 187,Rosemarie Howe 189 | 188,S. Akbar Zaidi 190 | 189,SRGIA 191 | 190,STATE 192 | 191,"Samuel (""Sandy"") Berger" 193 | 192,Samuel Berger 194 | 193,Scott Gration 195 | 194,Sidney Blumenthal 196 | 195,Sir 197 | 196,Strobe Talbott 198 | 197,Susan Rice 199 | 198,Suzanne Grantham 200 | 199,Terry Duffy 201 | 200,Thomas Donilon 202 | 201,Thomas Nides 203 | 202,Thomas Shannon 204 | 203,Tina Flournoy 205 | 204,Todd Stern 206 | 205,Tomicah Tillemann 207 | 206,Tsakina Elbegdori 208 | 207,U.S. Global Leadership Coalition 209 | 208,United States of America 210 | 209,Victoria Nuland 211 | 210,Voda Ebeling 212 | 211,WHADP 213 | 212,Washington Post 214 | 213,Wendy Sherman 215 | 214,Werner Ilic 216 | 215,White House 217 | 216,William Burns 218 | 217,William Hubbard 219 | 218,Zachary Iscol 220 | 219,aclb 221 | 220,alcb 222 | 221,l 223 | 222,latimes.com 224 | 223,mh.interiors 225 | 224,mhcaleja@state.gov 226 | 225,postmaster@state.gov 227 | 226,rooneym@state.gov 228 | 227,rrh.interiors 229 | 228,b6 230 | 229,nuiand victoria j 231 | 230,reines philippe f 232 | 231,sullivan jacob j nuland victoria 1 233 | 232,russorv@stategov 234 | 233,oscar flores 235 | 234,su ii iva gll@state.gov. 236 | 235,sullivahu@state.gov 237 | 236,russoiv@state.gov 238 | 237,miliscd@stategov 239 | 238,abedinh@stategov 240 | 239,tanleyrnr@state.gov 241 | 240,hanleymr@stategov 242 | 241,rnillscd@state.gov 243 | 242,rnillscd@stategov. 244 | 243,sullivanjj@state.gov. 245 | 244,reiriesp@state.gov 246 | 245,suilivanii@stategov 247 | 246,sullivanj@state.gov 248 | 247,suilivanij@state.gok 249 | 248,hanleymr@state.gov. 250 | 249,reines philippe t 251 | 250,hanleyrnr@state.gov 252 | 251,hanieymr@state.gov 253 | 252,hanleymrgastategov 254 | 253,sulliyanfostate.gott 255 | 254,aliilscd@state.gov 256 | 255,nidesth@stategoy 257 | 256,millscd@state.gov. 258 | 257,jacob j sullivan 259 | 258,sullivanji@state.gov 260 | 259,sullivanjj@state.golt 261 | 260,millscd@state.goy 262 | 261,nulandyj@state.goy 263 | 262,sulliyanij@state.goy 264 | 263,rnillscd@state.govs 265 | 264,jake.sullivar 266 | 265,jilotylc@state.gov. 267 | 266,jake.sulliyan 268 | 267,michele.fl 269 | 268,cheryimills millscd@state.gov 270 | 269,jake.sulliva 271 | 270,valmoroll@state.gov. 272 | 271,habedin b6 273 | 272,valmorou@state.gov 274 | 273,filotylc@state.gov 275 | 274,habedin 276 | 275,cheryl.mills jake.sullivan 277 | 276,abedinh@state.gov. 278 | 277,millscd@state.aov 279 | 278,illotylc@state.gov 280 | 279,millscd@state ov 281 | 280,habedin( 282 | 281,doug band 283 | 282,sullivanij@state.gov. 284 | 283,preines@ 285 | 284,abedinh state ov 286 | 285,cheryl.mills abedinh@state.gov 287 | 286,cheryl.mill 288 | 287,briar 289 | 288,abedinh@state.goy 290 | 289,a bedinh@state.gov 291 | 290,preine 292 | 291,valmorol.1@state.gov 293 | 292,sullivanij@state.gov 294 | 293,preines sullivanjj@state.gov b6 295 | 294,valmorolj@state.gov. 296 | 295,leltmanjd@state.gov 297 | 296,ullivanjj@state.gov 298 | 297,sta i bott 299 | 298,sullivanjj©state ov 300 | 299,millscd@state.00v. 301 | 300,steinbergib@state.gov 302 | 301,cheryl.millf. 303 | 302,mhcaleja@state.gove 304 | 303,cheryl.millsi 305 | 304,s abedinh@state.gov 306 | 305,valmorou@state.gov. 307 | 306,chetyl.mills sullivanij@state.gov 308 | 307,sullivanu@state.gov. 309 | 308,muscatinel@state.goy 310 | 309,preines sullivanjj@state.gov 311 | 310,axelrod_ 312 | 311,wburns6 313 | 312,valmorol1@state.gov. 314 | 313,steinberg1b@state.gov 315 | 314,. huma abedin 316 | 315,abedinh@stategovl 317 | 316,valmorou©state.gov 318 | 317,reinesp@state.goy 319 | 318,sulliyanjj@state.goy 320 | 319,emillscd@state.gov 321 | 320,cheryl.mill sullivanjj@state.gov 322 | 321,cheryl.mills millscd@state.gov. 323 | 322,preines verveerms@state.gov 324 | 323,jilotylc@state.goy 325 | 324,val moro u@state.gov 326 | 325,a bed inh@state.gov 327 | 326,mot lc@state.gov 328 | 327,jilot lc@state. ov 329 | 328,.1ilotylc@state.gov. 330 | 329,iilotylc@state.gov. 331 | 330,jilotylc©state.gov. 332 | 331,cheryl.mills sullivanjj@state.gov 333 | 332,iewjj@state.gov 334 | 333,cheryl.mills _ 335 | 334,sulliva njj@state.g ov 336 | 335,pverveel 337 | 336,preines sullivanij@state.gov. 338 | 337,sta ibott 339 | 338,balderstonkm@state.gov. 340 | 339,rossdb@state.gov 341 | 340,bowens 342 | 341,jacobjlev 343 | 342,yeryeerms@state.goy 344 | 343,preines b6 345 | 344,valmorou@state.gove 346 | 345,abedinh@state.gove 347 | 346,campbelikm@state.gov 348 | 347,nancy millscd@state.gov b6 349 | 348,valmorou©state.gov. 350 | 349,vanbuskirk michael 1 351 | 350,huma abed in 352 | 351,caputo 353 | 352,.gordonph@state.gov. 354 | 353,preines sullivanjj@state.gov 355 | 354,cheryl.mills sullivanjj@state.gov b6 356 | 355,cheryl.mills( sullivanjj@state.gov 357 | 356,huma abedin b6 358 | 357,mtorrey1 359 | 358,glantz.gina 360 | 359,millscd@tate.gov 361 | 360,cheryl.mills huma abedin 362 | 361,cheryl.mills millscd@state.gov 363 | 362,campbelikm©state.gov 364 | 363,jacobjlew vermarr@state.gov 365 | 364,sullivanjj@state.gov b6 366 | 365,iilotylc@state.gov 367 | 366,rnillscd@state.gov. 368 | 367,sullivanjj@state.govr 369 | 368,lewij@state.gov 370 | 369,williamsbarrett millscd@state.gov. 371 | 370,abedinh©state.gov 372 | 371,s sullivanjj@state.gov 373 | 372,filotylc@state.gov. 374 | 373,..lilotylc@state.gov. 375 | 374,baer.danie 376 | 375,millscd@state.ov 377 | 376,cmills 378 | 377,otero mildred (clinton) 379 | 378,iewij@state.gov 380 | 379,preines huma abedin 381 | 380,ieltmanjd@state.gov. 382 | 381,valmoroll@state.gov 383 | 382,todd stern 384 | 383,jonespw2@state.gov. 385 | 384,daniel.baer@ 386 | 385,jonespw2@state.gov 387 | 386,cmarshal 388 | 387,sbwhoeop b6 389 | 388,cheryl.mill abedin huma 390 | 389,lew jacob i 391 | 390,steinberg james 392 | 391,rosemarie.howe h 393 | 392,esullivanjj@state.gov 394 | 393,cheryl.miils@ millscd@state.gov 395 | 394,millscd©state.gov 396 | 395,justin cooper 397 | 396,eabedinh@state.gov 398 | 397,abedinh@state.govr 399 | 398,cheryl.mills@ millscd@state.gov. 400 | 399,williamsbarre0 401 | 400,capricia penavic marshall 402 | 401,maggie williams 403 | 402,rosemarie howe 404 | 403,lewjj@state.gov. 405 | 404,jim kennedy 406 | 405,cdm 407 | 406,pj 408 | 407,crowleyp state ov. preines 409 | 408,hume abedin 410 | 409,marciel scot a 411 | 410,. 412 | 411,smith daniel b 413 | 412,il 414 | 413,cheryl.mill1 415 | 414,jpier4 416 | 415,sullivanii@state.govr 417 | 416,holbrooke richard c 418 | 417,luzzatt 419 | 418,daniel.bae 420 | 419,valmorou state. ov 421 | 420,vanbuskirk michael .3 422 | 421,edwards christopher (jakarta/pro) 423 | 422,preine h 424 | 423,preines h 425 | 424,williamsbarret 426 | 425,sullivanii@state.gov 427 | 426,hannah richert 428 | 427,ross alec j 429 | 428,sbwhoeopi 430 | 429,doua band 431 | 430,adams david s 432 | 431,doug hattaway 433 | 432,kritenbrink daniel j 434 | 433,abdinh@state.gov 435 | 434,sawsanhassan1 436 | 435,stanton katie 437 | 436,h b6 438 | 437,irussorv@state.gov 439 | 438,valmoro lona .1 440 | 439,valmorol1@state.gov 441 | 440,valmorou@state.goy 442 | 441,valmdrou@state.gov 443 | 442,dimartino kitty 444 | 443,waxman sharon l 445 | 444,jcooper 446 | 445,lgraham doug band 447 | 446,jai:e sullivan 448 | 447,sullivanii@state.gov. 449 | 448,hume abed in 450 | 449,berniertoth michelle 451 | 450,coley theodore r 452 | 451,bonnie klehr 453 | 452,ramamurthy 454 | 453,h i 455 | 454,eichensehr kristen e 456 | 455,hooke kathleen h 457 | 456,johnson clifton m 458 | 457,tones susan 459 | 458,townley stephen g 460 | 459,torres susan 461 | 460,evergreen 462 | 461,lona valmoro 463 | 462,fl 464 | 463,elaine weiss 465 | 464,kevin m. okeefe 466 | 465,michael m. conway 467 | 466,lew jacobi 468 | 467,hai 469 | 468,harris jennifer m 470 | 469,pcharron 471 | 470,preine5 472 | 471,laurenjiloty jilotylc@state.gov 473 | 472,laurenjiloty 474 | 473,ian1evqr@state.gov 475 | 474,woodardew@state.gov 476 | 475,rshal 477 | 476,millscd@state. ov 478 | 477,jacobjle iewjj@state.gov b6 479 | 478,declan kelly 480 | 479,steinberg1b@state.gov. 481 | 480,jake.sullivan h 482 | 481,ogordonph@state.gov. 483 | 482,crowley @state. ov 484 | 483,sbwhoeo 485 | 484,sullivanij@state.gove 486 | 485,p rei n es 487 | 486,imuscatine huma abedin b6 488 | 487,tillemannts@state.gov. 489 | 488,markjpenr 490 | 489,dad mom 491 | 490,vaimorou@state.gov 492 | 491,prein6 493 | 492,sullivanjj@state.goy 494 | 493,. vermarr@state.gov 495 | 494,sullivanjj@siate.gov 496 | 495,feldman daniel f 497 | 496,ruggiero frank 3 498 | 497,singh vikram 499 | 498,.filotylc@state.gov. 500 | 499,bstrider mmoore 501 | 500,.1ilotylc@state.gov 502 | 501,abed inh@state.gov. 503 | 502,jon davidson 504 | 503,iynn 505 | 504,a bed in h@state.gov 506 | 505,s_specialassistants 507 | 506,jake.sullivar preines 508 | 507,capriciamarshall huma abedin 509 | 508,jilotylc©state.gov 510 | 509,marshallcp@state.goy 511 | 510,sid blumenthal 512 | 511,monica.hanle 513 | 512,hanle mr@state.gov 514 | 513,ha nleym r@state.gov 515 | -------------------------------------------------------------------------------- /L4/pagerank/email_pagerank.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # 用PageRank挖掘希拉里邮件中的重要任务关系 3 | import pandas as pd 4 | import networkx as nx 5 | import numpy as np 6 | from collections import defaultdict 7 | import matplotlib.pyplot as plt 8 | 9 | # 数据加载 10 | emails = pd.read_csv("./Emails.csv") 11 | # 读取别名文件 12 | file = pd.read_csv("./Aliases.csv") 13 | aliases = {} 14 | for index, row in file.iterrows(): 15 | aliases[row['Alias']] = row['PersonId'] 16 | # 读取人名文件 17 | file = pd.read_csv("./Persons.csv") 18 | persons = {} 19 | for index, row in file.iterrows(): 20 | persons[row['Id']] = row['Name'] 21 | 22 | # 针对别名进行转换 23 | def unify_name(name): 24 | # 姓名统一小写 25 | name = str(name).lower() 26 | # 去掉, 和@后面的内容 27 | name = name.replace(",","").split("@")[0] 28 | # 别名转换 29 | if name in aliases.keys(): 30 | return persons[aliases[name]] 31 | return name 32 | # 画网络图 33 | def show_graph(graph, type = 'spring_layout'): 34 | if type == 'spring_layout': 35 | # 使用Spring Layout布局,类似中心放射状 36 | positions=nx.spring_layout(graph) 37 | if type == 'circular_layout': 38 | # 使用Circular Layout布局,在一个圆环上均匀分布 39 | positions=nx.circular_layout(graph) 40 | 41 | # 设置网络图中的节点大小,大小与pagerank值相关,因为pagerank值很小所以需要*20000 42 | nodesize = [x['pagerank']*20000 for v,x in graph.nodes(data=True)] 43 | # 设置网络图中的边长度 44 | edgesize = [np.sqrt(e[2]['weight']) for e in graph.edges(data=True)] 45 | # 绘制节点 46 | nx.draw_networkx_nodes(graph, positions, node_size=nodesize, alpha=0.4) 47 | # 绘制边 48 | nx.draw_networkx_edges(graph, positions, edge_size=edgesize, alpha=0.2) 49 | # 绘制节点的label 50 | nx.draw_networkx_labels(graph, positions, font_size=10) 51 | # 输出希拉里邮件中的所有人物关系图 52 | plt.show() 53 | 54 | # 将寄件人和收件人的姓名进行规范化 55 | emails.MetadataFrom = emails.MetadataFrom.apply(unify_name) 56 | emails.MetadataTo = emails.MetadataTo.apply(unify_name) 57 | 58 | # 设置遍的权重等于发邮件的次数 59 | edges_weights_temp = defaultdict(list) 60 | for row in zip(emails.MetadataFrom, emails.MetadataTo, emails.RawText): 61 | temp = (row[0], row[1]) 62 | if temp not in edges_weights_temp: 63 | edges_weights_temp[temp] = 1 64 | else: 65 | edges_weights_temp[temp] = edges_weights_temp[temp] + 1 66 | 67 | print(edges_weights_temp) 68 | print('-'*100) 69 | # 转化格式 (from, to), weight => from, to, weight 70 | edges_weights = [(key[0], key[1], val) for key, val in edges_weights_temp.items()] 71 | 72 | # 创建一个有向图 73 | graph = nx.DiGraph() 74 | # 设置有向图中的路径及权重(from, to, weight) 75 | graph.add_weighted_edges_from(edges_weights) 76 | # 计算每个节点(人)的PR值,并作为节点的pagerank属性 77 | pagerank = nx.pagerank(graph) 78 | # 获取每个节点的pagerank数值 79 | pagerank_list = {node: rank for node, rank in pagerank.items()} 80 | # 将pagerank数值作为节点的属性 81 | nx.set_node_attributes(graph, name = 'pagerank', values=pagerank_list) 82 | # 画网络图 83 | show_graph(graph) 84 | 85 | # 将完整的图谱进行精简 86 | # 设置PR值的阈值,筛选大于阈值的重要核心节点 87 | pagerank_threshold = 0.005 88 | # 复制一份计算好的网络图 89 | small_graph = graph.copy() 90 | # 剪掉PR值小于pagerank_threshold的节点 91 | for n, p_rank in graph.nodes(data=True): 92 | if p_rank['pagerank'] < pagerank_threshold: 93 | small_graph.remove_node(n) 94 | # 画网络图 95 | show_graph(small_graph, 'circular_layout') 96 | -------------------------------------------------------------------------------- /L4/pagerank/networkx_pagerank.py: -------------------------------------------------------------------------------- 1 | # 使用networkX计算节点的pagerank 2 | import networkx as nx 3 | import matplotlib.pyplot as plt 4 | 5 | # 创建有向图 6 | G = nx.DiGraph() 7 | # 设置有向图的边集合 8 | edges = [("A", "B"), ("A", "C"), ("A", "D"), ("B", "A"), ("B", "D"), ("C", "A"), ("D", "B"), ("D", "C")] 9 | # 在有向图G中添加边集合 10 | for edge in edges: 11 | G.add_edge(edge[0], edge[1]) 12 | 13 | # 有向图可视化 14 | layout = nx.spring_layout(G) 15 | nx.draw(G, pos=layout, with_labels=True, hold=False) 16 | plt.show() 17 | 18 | # 计算简化模型的PR值 19 | pr = nx.pagerank(G, alpha=1) 20 | print("简化模型的PR值:", pr) 21 | 22 | # 计算随机模型的PR值 23 | pr = nx.pagerank(G, alpha=0.8) 24 | print("随机模型的PR值:", pr) -------------------------------------------------------------------------------- /L4/pagerank/pagerank_simulation.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | a = np.array([[0, 1/2, 1, 0], 3 | [1/3, 0, 0, 1/2], 4 | [1/3, 0, 0, 1/2], 5 | [1/3, 1/2, 0, 0]]) 6 | a_leak = np.array([[0, 0, 0, 1/2], 7 | [0, 0, 0, 1/2], 8 | [0, 1, 0, 0], 9 | [0, 0, 1, 0]]) 10 | 11 | a_sink = np.array([[0, 0, 0, 0], 12 | [1/2, 0, 0, 1], 13 | [0, 1, 1, 0], 14 | [1/2, 0, 0, 0]]) 15 | 16 | b = np.array([1/4, 1/4, 1/4, 1/4]) 17 | w = b 18 | 19 | def work(a, w): 20 | for i in range(100): 21 | w = np.dot(a, w) 22 | print(w) 23 | 24 | def random_work(a, w, n): 25 | d = 0.85 26 | for i in range(100): 27 | w = (1-d)/n + d*np.dot(a, w) 28 | print(w) 29 | 30 | #work(a, w) 31 | #random_work(a, w, 4) 32 | #random_work(a_leak, w, 4) 33 | #random_work(a_sink, w, 4) -------------------------------------------------------------------------------- /L4/scc/scc1.py: -------------------------------------------------------------------------------- 1 | # 计算强连通图 2 | import networkx as nx 3 | import matplotlib.pyplot as plt 4 | 5 | # 创建有向图 6 | G = nx.DiGraph() 7 | # 在图中添加点 8 | G.add_nodes_from(['a','b','c','d','e','f','g','h']) 9 | G.add_edges_from([('a','b'),('b','c'),('b','c'),('c','d'),('d','c'),\ 10 | ('e','a'),('b','e'),('b','f'),('e','f'),('f','g'),('g','f'),\ 11 | ('c','g'),('h','g'),('d','h'),('h','d')]) 12 | 13 | # 有向图可视化 14 | layout = nx.spring_layout(G) 15 | nx.draw(G, pos=layout, with_labels=True, hold=False) 16 | plt.show() 17 | for c in nx.strongly_connected_components(G): 18 | print(c) 19 | 20 | 21 | -------------------------------------------------------------------------------- /L4/team_cluster/team_cluster.py: -------------------------------------------------------------------------------- 1 | # coding: utf-8 2 | from sklearn.cluster import KMeans 3 | from sklearn import preprocessing 4 | import pandas as pd 5 | import numpy as np 6 | # 数据加载 7 | data = pd.read_csv('team_cluster_data.csv', encoding='gbk') 8 | train_x = data[["2019国际排名","2018世界杯排名","2015亚洲杯排名"]] 9 | kmeans = KMeans(n_clusters=3) 10 | # 规范化到 [0,1] 空间 11 | min_max_scaler=preprocessing.MinMaxScaler() 12 | train_x=min_max_scaler.fit_transform(train_x) 13 | #print(train_x) 14 | # kmeans 算法 15 | kmeans.fit(train_x) 16 | predict_y = kmeans.predict(train_x) 17 | # 合并聚类结果,插入到原数据中 18 | result = pd.concat((data,pd.DataFrame(predict_y)),axis=1) 19 | result.rename({0:u'聚类结果'},axis=1,inplace=True) 20 | print(result) 21 | # 将结果导出到CSV文件中 22 | #result.to_csv("team_cluster_result.csv") 23 | 24 | -------------------------------------------------------------------------------- /L4/team_cluster/team_cluster_data.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cystanford/Data_Analysis_with_Python/a33b965d98612031a0effe17a9c6da3a1eaefb5e/L4/team_cluster/team_cluster_data.csv -------------------------------------------------------------------------------- /L4/textrank/news.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cystanford/Data_Analysis_with_Python/a33b965d98612031a0effe17a9c6da3a1eaefb5e/L4/textrank/news.txt -------------------------------------------------------------------------------- /L4/textrank/news_textrank.py: -------------------------------------------------------------------------------- 1 | #-*- encoding:utf-8 -*- 2 | from textrank4zh import TextRank4Keyword, TextRank4Sentence 3 | import jieba 4 | 5 | text = '王者荣耀典韦连招是使用一技能+大招+晕眩+二技能+普攻。这套连招主要用于先手强开团,当发现对面走位失误或撤退不及时,我们就可以利用一技能的加速。此外配合大招减速留住对手,协同队友完成击杀。\ 6 | 当对方站位较集中时,我们同样可以利用“一技能+大招+晕眩”进行团控和吸收伤害。\ 7 | 在吸收伤害的同时我们还可以利二技能打出不错的输出。这套连招重要的是把握时机,要有一夫当关,万夫莫开之势。\ 8 | 缺点是一技能的强化普攻和解除控制的效果会被浪费。\ 9 | 连招二:大招+晕眩+二技能+普攻+一技能+普攻。\ 10 | 这套连招用于偷袭对手后排很是好用,利用草丛埋伏。\ 11 | 大招跳到对面身上。迅速晕眩对手,接着二技能继续减速对手,二技能命中后会提升典韦到极限攻速,这时不断普攻,接下来一般会遇到两种情况,当对手继续逃跑时,我们利用一技能加速追击对手,强化普攻击杀对手。\ 12 | 当对手用技能控住我们我们可以利用一技能解除控制,追击并完成击杀。' 13 | 14 | # 输出关键词,设置文本小写,窗口为2 15 | tr4w = TextRank4Keyword() 16 | tr4w.analyze(text=text, lower=True, window=3) 17 | print('关键词:') 18 | for item in tr4w.get_keywords(20, word_min_len=2): 19 | print(item.word, item.weight) 20 | 21 | 22 | # 输出重要的句子 23 | tr4s = TextRank4Sentence() 24 | tr4s.analyze(text=text, lower=True, source = 'all_filters') 25 | print('摘要:') 26 | # 重要性较高的三个句子 27 | for item in tr4s.get_key_sentences(num=3): 28 | # index是语句在文本中位置,weight表示权重 29 | print(item.index, item.weight, item.sentence) 30 | -------------------------------------------------------------------------------- /L4/textrank/news_textrank_snownlp.py: -------------------------------------------------------------------------------- 1 | from snownlp import SnowNLP 2 | 3 | text = '王者荣耀典韦连招是使用一技能+大招+晕眩+二技能+普攻,这套连招主要用于先手强开团,当发现对面走位失误或撤退不及时,我们就可以利用一技能的加速,配合大招减速留住对手,协同队友完成击杀。\ 4 | 当对方站位较集中时,我们同样可以利用“一技能+大招+晕眩”进行团控和吸收伤害。\ 5 | 在吸收伤害的同时我们还可以利二技能打出不错的输出。这套连招重要的是把握时机,要有一夫当关,万夫莫开之势。\ 6 | 缺点是一技能的强化普攻和解除控制的效果会被浪费。\ 7 | 连招二:大招+晕眩+二技能+普攻+一技能+普攻。\ 8 | 这套连招用于偷袭对手后排很是好用,利用草丛埋伏。\ 9 | 大招跳到对面身上。迅速晕眩对手,接着二技能继续减速对手,二技能命中后会提升典韦到极限攻速,这时不断普攻,接下来一般会遇到两种情况,当对手继续逃跑时,我们利用一技能加速追击对手,强化普攻击杀对手。\ 10 | 当对手用技能控住我们我们可以利用一技能解除控制,追击并完成击杀。' 11 | snow = SnowNLP(text) 12 | # 打印关键词 13 | print(snow.keywords(20)) 14 | 15 | # TextRank算法 16 | print(snow.summary(3)) 17 | print(snow.sentiments) -------------------------------------------------------------------------------- /L4/textrank/sentence_textrank.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | import jieba 3 | import jieba.analyse 4 | import jieba.posseg as pseg 5 | 6 | sentence = '王者荣耀典韦连招是使用一技能+大招+晕眩+二技能+普攻,这套连招主要用于先手强开团,当发现对面走位失误或撤退不及时,我们就可以利用一技能的加速,配合大招减速留住对手,协同队友完成击杀。当对方站位较集中时,我们同样可以利用“一技能+大招+晕眩”进行团控和吸收伤害。在吸收伤害的同时我们还可以利二技能打出不错的输出。这套连招重要的是把握时机,要有一夫当关,万夫莫开之势。缺点是一技能的强化普攻和解除控制的效果会被浪费。' 7 | # 获取分词 8 | seg_list = jieba.cut(sentence, cut_all=False) 9 | print(' '.join(seg_list)) 10 | # 获取分词和词性 11 | words = pseg.cut(sentence) 12 | for word, flag in words: 13 | print('%s, %s' % (word, flag)) 14 | 15 | 16 | # 通过TF-IDF获取关键词 17 | keywords = jieba.analyse.extract_tags(sentence, topK=20, withWeight=True, allowPOS=('n','nr','ns')) 18 | for item in keywords: 19 | print(item[0],item[1]) 20 | print('-'*100) 21 | 22 | # 基于TextRank算法的关键词抽取 23 | #keywords = jieba.analyse.extract_tags(sentence, topK=20, withWeight=True, allowPOS=('n','nr','ns')) 24 | #keywords = jieba.analyse.textrank(sentence, topK=20, withWeight=True, allowPOS=('ns', 'n', 'vn', 'v')) 25 | #keywords = jieba.analyse.textrank(sentence, topK=20, withWeight=True, allowPOS=('n', 'ns')) 26 | keywords = jieba.analyse.textrank(sentence, topK=20, withWeight=True) 27 | print(keywords) 28 | for item in keywords: 29 | print(item[0],item[1]) 30 | -------------------------------------------------------------------------------- /L4/valid_tree/valid_tree.py: -------------------------------------------------------------------------------- 1 | # 判断一个图是否为树,使用DFS 2 | def valid_tree(n, edges): 3 | # 若这个图的边数比n-1多,那么一定有环,如果比n-1小,有孤立点 4 | if len(edges) != n - 1: 5 | return False 6 | 7 | # 初始化邻接表 8 | mat =[[] for i in range(n)] 9 | for i in range(len(edges)): 10 | v, u = edges[i][0], edges[i][1] 11 | mat[v].append(u) 12 | mat[u].append(v) 13 | 14 | # 判定这棵树的连通分量是否为1 15 | count = 0 16 | visit = [0 for i in range(n)] 17 | for i in range(n): 18 | if visit[i] == 0: 19 | dfs(i, visit, mat) 20 | # 统计连通图的个数 21 | count = count + 1 22 | 23 | if count > 1: 24 | return False 25 | return True 26 | 27 | # 深度优先遍历 28 | def dfs(node, visit, mat): 29 | # 访问过的节点不再进行访问 30 | visit[node] = 1 31 | for i in range(len(mat[node])): 32 | neighbor = mat[node][i] 33 | if visit[neighbor] == 0: 34 | dfs(neighbor, visit, mat) 35 | 36 | print(valid_tree(5, [[0,1], [0,2], [0,3], [1,4]])) 37 | print(valid_tree(5, [[0,1], [1,2], [2,3], [1,3], [1,4]])) 38 | -------------------------------------------------------------------------------- /L4/valid_tree/valid_tree2.py: -------------------------------------------------------------------------------- 1 | # 判断一个图是否为树,使用并查集 2 | 3 | # 查找帮主 4 | def find(x, parent): 5 | if parent[x] == x: 6 | return x 7 | return find(parent[x], parent); 8 | 9 | # 将y作为x的父亲节点 10 | def union(x, y, parent): 11 | parent[x] = y 12 | 13 | # 判断一个图是否为树 14 | def valid_tree(n, edges): 15 | # 初始化节点的父亲节点 16 | parent = [0 for i in range(n)] 17 | for i in range(n): 18 | parent[i] = i 19 | 20 | for edge in edges: 21 | p1 = find(edge[0], parent) 22 | p2 = find(edge[1], parent) 23 | # 两个节点的帮主相等,则说明存在环 24 | if p1 == p2: 25 | return False 26 | # p1是p2的父亲 27 | union(p2, p1, parent) 28 | 29 | # 如果连通分量为1,那么这些点的帮主都是同一个 30 | p = find(0, parent) 31 | for i in range(1,n): 32 | if p != find(i, parent): 33 | return False 34 | return True 35 | 36 | print(valid_tree(5, [[0,1], [0,2], [0,3], [1,4]])) 37 | print(valid_tree(5, [[0,1], [1,2], [2,3], [1,3], [1,4]])) 38 | --------------------------------------------------------------------------------