├── README.md ├── movieType.txt ├── movieType.html ├── movie.sql ├── avgScore.html ├── movieTree.html ├── movieAgeScoreJson.txt ├── MysqlQuery.py ├── doubanTop250.py └── movieTreeJson.txt /README.md: -------------------------------------------------------------------------------- 1 | ### 博客具体介绍: 2 | 3 | * [豆瓣电影top250爬虫系列(一)--- 模拟登陆+爬取电影信息](https://www.jianshu.com/p/40adc97ac95c) 4 | 5 | * [豆瓣电影top250爬虫系列(二)--- python数据库连接](https://www.jianshu.com/p/c9b3be4cc721) 6 | 7 | * [豆瓣电影top250爬虫系列(三)--- python+Echarts数据可视化](https://www.jianshu.com/p/08a8de677887) 8 | -------------------------------------------------------------------------------- /movieType.txt: -------------------------------------------------------------------------------- 1 | {"typeNameList": ["\u5267\u60c5", "\u559c\u5267", "\u52a8\u4f5c", "\u7231\u60c5", "\u79d1\u5e7b", "\u60ac\u7591", "\u60ca\u609a", "\u6050\u6016", "\u72af\u7f6a", "\u540c\u6027", "\u97f3\u4e50", "\u6b4c\u821e", "\u4f20\u8bb0", "\u5386\u53f2", "\u6218\u4e89", "\u897f\u90e8", "\u5947\u5e7b", "\u5192\u9669", "\u707e\u96be", "\u6b66\u4fa0", "\u60c5\u8272"], "typeNumList": [191, 44, 31, 58, 24, 32, 34, 2, 43, 8, 7, 5, 15, 12, 20, 4, 32, 43, 1, 3, 1]} -------------------------------------------------------------------------------- /movieType.html: -------------------------------------------------------------------------------- 1 | 2 | 3 |
4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 79 | 80 | -------------------------------------------------------------------------------- /movie.sql: -------------------------------------------------------------------------------- 1 | /* 2 | Navicat MySQL Data Transfer 3 | 4 | Source Server : blog 5 | Source Server Version : 50528 6 | Source Host : 127.0.0.1:3306 7 | Source Database : movie 8 | 9 | Target Server Type : MYSQL 10 | Target Server Version : 50528 11 | File Encoding : 65001 12 | 13 | Date: 2018-05-28 22:46:44 14 | */ 15 | 16 | SET FOREIGN_KEY_CHECKS=0; 17 | 18 | -- ---------------------------- 19 | -- Table structure for `actor` 20 | -- ---------------------------- 21 | DROP TABLE IF EXISTS `actor`; 22 | CREATE TABLE `actor` ( 23 | `id` int(100) unsigned NOT NULL AUTO_INCREMENT, 24 | `movieId` int(100) NOT NULL, 25 | `name` varchar(255) COLLATE utf8mb4_bin NOT NULL, 26 | `link` varchar(255) COLLATE utf8mb4_bin NOT NULL, 27 | PRIMARY KEY (`id`) 28 | ) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_bin; 29 | 30 | -- ---------------------------- 31 | -- Records of actor 32 | -- ---------------------------- 33 | 34 | -- ---------------------------- 35 | -- Table structure for `award` 36 | -- ---------------------------- 37 | DROP TABLE IF EXISTS `award`; 38 | CREATE TABLE `award` ( 39 | `id` int(100) unsigned NOT NULL AUTO_INCREMENT, 40 | `movieId` int(100) NOT NULL, 41 | `name` varchar(255) COLLATE utf8mb4_bin NOT NULL, 42 | `type` varchar(255) COLLATE utf8mb4_bin NOT NULL, 43 | PRIMARY KEY (`id`) 44 | ) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_bin; 45 | 46 | -- ---------------------------- 47 | -- Records of award 48 | -- ---------------------------- 49 | 50 | -- ---------------------------- 51 | -- Table structure for `comment` 52 | -- ---------------------------- 53 | DROP TABLE IF EXISTS `comment`; 54 | CREATE TABLE `comment` ( 55 | `id` int(100) unsigned NOT NULL AUTO_INCREMENT, 56 | `movieId` int(100) NOT NULL, 57 | `content` mediumtext CHARACTER SET utf8mb4 NOT NULL, 58 | `userName` varchar(255) CHARACTER SET utf8mb4 NOT NULL, 59 | `time` varchar(255) COLLATE utf8mb4_bin NOT NULL, 60 | PRIMARY KEY (`id`) 61 | ) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_bin; 62 | 63 | -- ---------------------------- 64 | -- Records of comment 65 | -- ---------------------------- 66 | 67 | -- ---------------------------- 68 | -- Table structure for `movie` 69 | -- ---------------------------- 70 | DROP TABLE IF EXISTS `movie`; 71 | CREATE TABLE `movie` ( 72 | `id` int(100) unsigned NOT NULL AUTO_INCREMENT, 73 | `title` varchar(255) COLLATE utf8mb4_bin DEFAULT NULL, 74 | `director` varchar(255) COLLATE utf8mb4_bin DEFAULT NULL, 75 | `age` varchar(100) COLLATE utf8mb4_bin DEFAULT NULL, 76 | `country` varchar(255) COLLATE utf8mb4_bin DEFAULT NULL, 77 | `type` varchar(255) COLLATE utf8mb4_bin DEFAULT NULL, 78 | `evaluationNum` varchar(255) COLLATE utf8mb4_bin DEFAULT NULL, 79 | `score` varchar(100) COLLATE utf8mb4_bin DEFAULT NULL, 80 | `note` varchar(255) COLLATE utf8mb4_bin DEFAULT NULL, 81 | `link` mediumtext COLLATE utf8mb4_bin, 82 | `commentLink` mediumtext COLLATE utf8mb4_bin, 83 | `time` varchar(255) COLLATE utf8mb4_bin DEFAULT NULL, 84 | `otherName` tinytext COLLATE utf8mb4_bin, 85 | `movieLength` varchar(100) CHARACTER SET utf8mb4 DEFAULT NULL, 86 | `summary` mediumtext COLLATE utf8mb4_bin, 87 | `poster` mediumtext COLLATE utf8mb4_bin, 88 | `language` varchar(100) COLLATE utf8mb4_bin DEFAULT NULL, 89 | PRIMARY KEY (`id`) 90 | ) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_bin; 91 | 92 | -- ---------------------------- 93 | -- Records of movie 94 | -- ---------------------------- 95 | -------------------------------------------------------------------------------- /avgScore.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 123 | 124 | -------------------------------------------------------------------------------- /movieTree.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 109 | 110 | -------------------------------------------------------------------------------- /movieAgeScoreJson.txt: -------------------------------------------------------------------------------- 1 | {"ages": ["Growth", "2017", "2016", "2015", "2014", "2013", "2012", "2011", "2010", "2009", "2008", "2007", "2006", "2005", "2004", "2003", "2002", "2001", "2000", "1999", "1998", "1997", "1996", "1995", "1994", "1993", "1992", "1991", "1990", "1989", "1988", "1987", "1986", "1984", "1982", "1980", "1979", "1975", "1974", "1972", "1971", "1966", "1965", "1961", "1960", "1957", "1954", "1953", "1952", "1950", "1942", "1940", "1939", "1936", "1931"], "ageNames": [], "2017": [9, 0, 0, 9, 0, 0, 0, 0, 0, 9, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], "2016": [9, 9, 0, 8, 0, 9, 9, 0, 9, 0, 0, 0, 9, 9, 9, 0, 0, 9, 0, 0, 0], "2015": [9, 9, 9, 0, 0, 0, 0, 0, 0, 0, 0, 0, 9, 0, 0, 0, 0, 9, 0, 0, 0], "2014": [9, 9, 9, 0, 9, 9, 9, 0, 9, 9, 9, 0, 9, 0, 9, 0, 0, 9, 0, 0, 0], "2013": [9, 9, 0, 9, 8, 9, 8, 0, 9, 9, 8, 0, 9, 0, 0, 0, 9, 9, 0, 0, 0], "2012": [9, 9, 9, 0, 9, 0, 9, 0, 9, 0, 0, 0, 0, 0, 0, 9, 9, 9, 0, 0, 0], "2011": [9, 9, 0, 9, 8, 8, 8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 9, 9, 0, 0, 0], "2010": [9, 9, 9, 9, 9, 9, 9, 0, 0, 0, 0, 0, 8, 8, 0, 9, 9, 9, 0, 0, 0], "2009": [9, 9, 9, 9, 9, 8, 8, 0, 8, 0, 9, 9, 0, 0, 9, 0, 0, 9, 0, 0, 0], "2008": [9, 0, 9, 9, 9, 0, 9, 0, 9, 0, 0, 0, 9, 0, 9, 0, 9, 9, 0, 0, 0], "2007": [9, 8, 9, 8, 8, 9, 9, 0, 0, 0, 0, 0, 9, 0, 0, 0, 0, 9, 0, 0, 0], "2006": [9, 8, 0, 9, 9, 9, 9, 0, 8, 0, 0, 9, 9, 0, 0, 0, 8, 8, 0, 0, 0], "2005": [9, 0, 9, 9, 9, 0, 9, 0, 8, 9, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], "2004": [9, 9, 9, 9, 9, 9, 9, 9, 9, 0, 9, 0, 0, 9, 9, 0, 9, 9, 0, 0, 0], "2003": [9, 8, 9, 8, 8, 9, 9, 0, 9, 0, 0, 0, 0, 0, 0, 0, 9, 9, 0, 0, 0], "2002": [9, 8, 9, 8, 0, 9, 8, 0, 9, 8, 9, 0, 9, 9, 9, 0, 9, 9, 0, 0, 0], "2001": [9, 9, 9, 9, 9, 8, 8, 0, 0, 0, 0, 0, 9, 8, 8, 0, 9, 9, 0, 0, 0], "2000": [9, 0, 0, 9, 0, 8, 8, 0, 8, 0, 0, 0, 0, 9, 9, 0, 0, 8, 0, 0, 9], "1999": [9, 9, 9, 8, 9, 9, 9, 0, 9, 0, 0, 0, 0, 0, 0, 0, 9, 0, 0, 0, 0], "1998": [9, 9, 0, 0, 9, 9, 9, 0, 9, 0, 9, 0, 0, 9, 9, 0, 0, 0, 0, 0, 0], "1997": [9, 10, 9, 9, 9, 0, 9, 0, 8, 9, 0, 0, 0, 0, 9, 0, 9, 9, 9, 0, 0], "1996": [9, 0, 8, 9, 0, 0, 0, 0, 9, 0, 0, 0, 0, 0, 8, 0, 0, 8, 0, 0, 0], "1995": [9, 9, 9, 9, 0, 9, 9, 0, 9, 0, 9, 0, 9, 9, 9, 0, 9, 9, 0, 0, 0], "1994": [9, 9, 9, 9, 0, 0, 0, 0, 9, 0, 0, 9, 0, 9, 0, 9, 0, 9, 0, 9, 0], "1993": [9, 9, 0, 9, 0, 0, 0, 0, 9, 9, 0, 0, 0, 9, 9, 0, 8, 0, 0, 0, 0], "1992": [9, 0, 8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 8, 0], "1991": [9, 9, 9, 0, 9, 0, 9, 0, 9, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], "1990": [9, 0, 0, 9, 0, 0, 0, 0, 9, 0, 0, 0, 0, 0, 0, 0, 9, 0, 0, 0, 0], "1989": [9, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 8, 8, 0, 0, 0], "1988": [9, 0, 0, 9, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 9, 0, 9, 0, 0, 0, 0], "1987": [9, 0, 0, 9, 0, 0, 0, 0, 0, 0, 0, 0, 9, 9, 0, 0, 0, 0, 0, 9, 0], "1986": [0, 0, 9, 0, 0, 0, 0, 0, 9, 0, 0, 0, 0, 0, 0, 0, 9, 9, 0, 0, 0], "1984": [9, 0, 0, 0, 9, 0, 0, 0, 9, 0, 0, 0, 0, 0, 0, 0, 9, 9, 0, 0, 0], "1982": [8, 0, 0, 0, 8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], "1980": [0, 9, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], "1979": [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 9, 9, 0, 0, 0], "1975": [9, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], "1974": [9, 0, 0, 0, 0, 0, 0, 0, 9, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], "1972": [9, 0, 0, 0, 0, 0, 0, 0, 9, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], "1971": [8, 0, 0, 0, 8, 0, 0, 0, 8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], "1966": [0, 9, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 9, 9, 0, 9, 0, 0, 0], "1965": [9, 0, 0, 9, 0, 0, 0, 0, 0, 0, 0, 9, 9, 0, 0, 0, 0, 0, 0, 0, 0], "1961": [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 9, 0, 0, 0, 0], "1960": [0, 0, 0, 0, 0, 9, 9, 9, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], "1957": [10, 0, 0, 0, 0, 10, 0, 0, 10, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], "1954": [9, 0, 9, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 9, 0, 0, 0], "1953": [9, 9, 0, 9, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], "1952": [0, 9, 0, 9, 0, 0, 0, 0, 0, 0, 0, 9, 0, 0, 0, 0, 0, 0, 0, 0, 0], "1950": [9, 0, 0, 0, 0, 9, 0, 0, 9, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], "1942": [9, 0, 0, 9, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 9, 0, 0, 0, 0, 0, 0], "1940": [9, 0, 0, 9, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 9, 0, 0, 0, 0, 0, 0], "1939": [9, 0, 0, 9, 0, 0, 0, 0, 0, 0, 0, 0, 0, 9, 9, 0, 0, 0, 0, 0, 0], "1936": [9, 9, 0, 9, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], "1931": [9, 9, 0, 9, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], "names": ["\u5267\u60c5", "\u559c\u5267", "\u52a8\u4f5c", "\u7231\u60c5", "\u79d1\u5e7b", "\u60ac\u7591", "\u60ca\u609a", "\u6050\u6016", "\u72af\u7f6a", "\u540c\u6027", "\u97f3\u4e50", "\u6b4c\u821e", "\u4f20\u8bb0", "\u5386\u53f2", "\u6218\u4e89", "\u897f\u90e8", "\u5947\u5e7b", "\u5192\u9669", "\u707e\u96be", "\u6b66\u4fa0", "\u60c5\u8272"]} -------------------------------------------------------------------------------- /MysqlQuery.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | from flask import Flask, render_template, request, jsonify 3 | import pymysql 4 | import json 5 | import math 6 | 7 | 8 | conn = pymysql.connect( 9 | host = 'localhost', 10 | port = 3306, 11 | user = 'root', 12 | passwd = '123456', 13 | db = 'movie', 14 | charset = 'utf8mb4' #mysql中utf8不能存储4个字节的字符 15 | ) 16 | 17 | cursor = conn.cursor() 18 | 19 | def getJsonData(sql): 20 | cursor.execute(sql) 21 | 22 | data = cursor.fetchall() 23 | 24 | # print(data) 25 | return data 26 | 27 | 28 | typeNameList = ['剧情','喜剧','动作','爱情','科幻','悬疑','惊悚','恐怖','犯罪', 29 | '同性','音乐','歌舞','传记','历史','战争','西部','奇幻','冒险', 30 | '灾难','武侠','情色'] 31 | def getMovieTypeJson(): 32 | typeNumList = [] 33 | for type in typeNameList: 34 | sql = r"select count(type) from movie where type like '%{}%'".format(type) 35 | dataM = getJsonData(sql) 36 | typeNumList.append(int(str(dataM).strip(r'(').strip(r',)'))) 37 | 38 | return {'typeNameList' : typeNameList, 'typeNumList' : typeNumList} 39 | 40 | 41 | def getPureList(ageList): 42 | numList = [] 43 | # print(ageList) 44 | for age in ageList: 45 | numList.append(str(age).strip(r"('").strip(r"',)")) 46 | # print(numList) 47 | return numList 48 | 49 | 50 | # getNumList((('2009',), ('2006',), ('1994',), ('1965',), ('1952',))) 51 | 52 | 53 | def getMovieTreeJson(): 54 | jsonFinal = '{"types": [' 55 | for type in typeNameList: 56 | sql = r"select distinct age from movie where type like '%{}%' order by age desc".format(type) 57 | ageList = getJsonData(sql) 58 | jsonFinal += '{{"name":"{}", "children":['.format(type) 59 | for age in getPureList(ageList): 60 | sql = r"select distinct country from movie where age = '{}' and type like '%{}%'".format(age, type) 61 | countryList = getJsonData(sql) 62 | countryArr = [] 63 | jsonFinal += '{{"name":"{}", "children":['.format(age) 64 | for country in getPureList(countryList): 65 | if country.split(" ")[0] not in countryArr: 66 | countryArr.append(country.split(" ")[0]) 67 | else: 68 | continue 69 | sql = r"select distinct score from movie where age = '{}' and type like '%{}%' and country like '{}%'" \ 70 | r"order by score desc".format(age, type, country.split(" ")[0]) 71 | scoreList = getJsonData(sql) 72 | jsonFinal += '{{"name":"{}", "children":['.format(country.split(" ")[0]) 73 | for score in getPureList(scoreList): 74 | sql = r"select distinct movieLength from movie where age = '{}' and type like '%{}%' and country like '{}%'" \ 75 | r"and score = '{}' order by score desc".format(age, type, country.split(" ")[0], score) 76 | movieLengthList = getJsonData(sql) 77 | 78 | jsonFinal += '{{"name":"分数{}", "children":['.format(score) 79 | for movieLength in getPureList(movieLengthList): 80 | jsonFinal += '{{"name":"时长{}", "children":['.format(movieLength) 81 | sql = r"select title, note from movie where age = '{}' and type like '%{}%' and country like '{}%'" \ 82 | r"and score = '{}' and movieLength = '{}' order by score desc".format( 83 | age, type, country.split(" ")[0], score, movieLength) 84 | titleNoteList = getJsonData(sql) 85 | 86 | # print(age, type, country.split(" ")[0], score, movieLength, str(titleNoteList[0]).strip(",")) 87 | for title, note in titleNoteList: 88 | jsonFinal += '{{"name":"{}", "value":"{}"}},'.format(title, note) 89 | # print(jsonFinal[:-1]) 90 | jsonFinal = jsonFinal[:-1] + ']},' 91 | jsonFinal = jsonFinal[:-1] + ']},' 92 | jsonFinal = jsonFinal[:-1] + ']},' 93 | jsonFinal = jsonFinal[:-1] + ']},' 94 | jsonFinal = jsonFinal[:-1] + ']},' 95 | jsonFinal = jsonFinal[:-1] + ']},' 96 | 97 | return jsonFinal[:-1] 98 | 99 | 100 | def getAgeScoreJson(): 101 | ageScoreMap = {} 102 | ageScoreMap['ages'] = ['Growth'] 103 | ageScoreMap['ageNames'] = [] 104 | sql = r'select DISTINCT age from movie ORDER BY age desc' 105 | ageList = getPureList(getJsonData(sql)) 106 | # print(ageList) 107 | for age in ageList: 108 | avgScoreList = [] 109 | for type in typeNameList: 110 | sql = r"select avg(score) from movie where age = '{}' and type like '%{}%'".format(age, type) 111 | avgScore = str(getPureList(getJsonData(sql))).strip("['").strip("']") 112 | if avgScore == 'None': 113 | avgScore = 0 114 | avgScoreList.append(round(float(avgScore))) 115 | ageScoreMap[str(age)] = avgScoreList 116 | ageScoreMap['ages'].append(str(age)) 117 | # ageScoreMap['ageNames'].append('result.type' + str(age)) 118 | ageScoreMap['names'] = typeNameList 119 | 120 | return ageScoreMap 121 | 122 | # print(getAgeScoreJson()) 123 | 124 | def writeTypeJsonFile(path): 125 | with open(path, 'w') as f: 126 | json.dump(getMovieTypeJson(), f) 127 | 128 | def writeAgeScoreJsonFile(path): 129 | with open(path, 'w') as f: 130 | json.dump(getAgeScoreJson(), f) 131 | 132 | def writeTreeJsonFile(path): 133 | with open(path, 'w') as f: 134 | json.dump(getMovieTreeJson(), f) 135 | 136 | # writeTypeJsonFile(r'C:\Users\Administrator\Desktop\books\movieType.txt') 137 | # writeTreeJsonFile(r'C:\Users\Administrator\Desktop\books\movieTreeJson.txt') 138 | writeAgeScoreJsonFile(r'C:\Users\Administrator\Desktop\books\movieAgeScoreJson.txt') -------------------------------------------------------------------------------- /doubanTop250.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | import requests 3 | import time 4 | import re 5 | import pymysql 6 | 7 | conn = pymysql.connect( 8 | host = 'localhost', 9 | port = 3306, 10 | user = 'root', 11 | passwd = '123456', 12 | db = 'movie', 13 | charset = 'utf8mb4' #mysql中utf8不能存储4个字节的字符 14 | ) 15 | 16 | cursor = conn.cursor() 17 | 18 | headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/55.0.2883.87 Safari/537.36'} 19 | cookies = {'cookie': 'll="108288"; bid=G4oiRkK4MYo; _vwo_uuid_v2=D09E6E6F27485A2A9534CF9DAC6455E7C|6d2f5c57a592a6cee6084f1489fb46f2; gr_user_id=646ad381-40f6-40d6-af0c-e5cdb6b839b6; _ga=GA1.2.1544213960.1527212266; ps=y; ue="2287093698@qq.com"; push_doumail_num=0; __utmv=30149280.6232; ap=1; __utma=30149280.1544213960.1527212266.1527431188.1527438227.14; __utmb=30149280.0.10.1527438227; __utmc=30149280; __utmz=30149280.1527438227.14.7.utmcsr=baidu|utmccn=(organic)|utmcmd=organic; push_noty_num=0; as="https://movie.douban.com/subject/1292052/"'} 20 | 21 | def getMovieList(start): 22 | res = requests.get('https://movie.douban.com/top250?start='+ start, headers = headers, cookies = cookies) 23 | html = res.text 24 | 25 | # print(html) 26 | reg = r'(.*?) .*?
.*?(.*?) / (.*?) / (.*?)