├── .gitignore ├── Beautiful Soup 爬虫 ├── 001.py ├── 002.py ├── 003.py ├── baidutieba.py ├── bocai.py ├── demo.html ├── dianying.py ├── kouhong.py ├── myiron.py ├── parse_station.py ├── qiubai.py ├── stations.py ├── trainticket.py ├── xiaoshuopaihang.py ├── yueyintai.py ├── 反爬虫.py ├── 沪铜数据.py └── 贴吧帖子结构分析.html ├── Google-Image └── spider.py ├── README.md ├── Scrapy 爬虫框架 ├── 001.py ├── biquge │ ├── biquge │ │ ├── __init__.py │ │ ├── items.py │ │ ├── middlewares.py │ │ ├── pipelines.py │ │ ├── settings.py │ │ └── spiders │ │ │ ├── __init__.py │ │ │ ├── sjzh.py │ │ │ └── xsphspider.py │ └── scrapy.cfg ├── demo.xml ├── mzitu │ ├── mzitu │ │ ├── __init__.py │ │ ├── items.py │ │ ├── middlewares.py │ │ ├── pipelines.py │ │ ├── settings.py │ │ └── spiders │ │ │ ├── __init__.py │ │ │ └── mezitu.py │ └── scrapy.cfg ├── proxy │ ├── proxy │ │ ├── __init__.py │ │ ├── items.py │ │ ├── middlewares.py │ │ ├── pipelines.py │ │ ├── settings.py │ │ └── spiders │ │ │ ├── __init__.py │ │ │ ├── dxdlspider.py │ │ │ └── kdlspider.py │ └── scrapy.cfg ├── test_proxy.py ├── weather │ ├── scrapy.cfg │ └── weather │ │ ├── __init__.py │ │ ├── data │ │ └── weather.json │ │ ├── items.py │ │ ├── middlewares.py │ │ ├── pipelines.py │ │ ├── settings.py │ │ └── spiders │ │ ├── SZtianqi.py │ │ └── __init__.py ├── xiubai │ ├── scrapy.cfg │ └── xiubai │ │ ├── __init__.py │ │ ├── items.py │ │ ├── middlewares.py │ │ ├── middlewares │ │ ├── __init__.py │ │ ├── coustomProxy.py │ │ ├── coustomUserAgent.py │ │ └── proxy.py │ │ ├── pipelines.py │ │ ├── settings.py │ │ └── spiders │ │ ├── __init__.py │ │ └── hotspider.py └── zimuku │ ├── scrapy.cfg │ └── zimuku │ ├── __init__.py │ ├── items.py │ ├── middlewares.py │ ├── pipelines.py │ ├── settings.py │ └── spiders │ ├── __init__.py │ └── demo.py ├── YHShop ├── citydict.py ├── cityid.html ├── handler.py ├── spider.py └── tools.py ├── alipay ├── alipay_v1.py ├── alipay_v2.py └── alipay_v3.py ├── doubanmovie ├── config.py ├── data.py ├── data │ ├── beautifulTop250.json │ ├── cached_douban │ │ ├── 0.html │ │ ├── 100.html │ │ ├── 125.html │ │ ├── 150.html │ │ ├── 175.html │ │ ├── 200.html │ │ ├── 225.html │ │ ├── 25.html │ │ ├── 50.html │ │ └── 75.html │ ├── cached_pansou │ │ ├── 7号房的礼物 .json │ │ ├── E.T. 外星人 .json │ │ ├── V字仇杀队 .json │ │ ├── 一一 .json │ │ ├── 一次别离 .json │ │ ├── 七宗罪 .json │ │ ├── 七武士 .json │ │ ├── 三傻大闹宝莱坞 .json │ │ ├── 上帝之城 .json │ │ ├── 上帝也疯狂 .json │ │ ├── 与狼共舞 .json │ │ ├── 东京物语 .json │ │ ├── 东邪西毒 .json │ │ ├── 两杆大烟枪 .json │ │ ├── 乱世佳人 .json │ │ ├── 二十二 .json │ │ ├── 人工智能 .json │ │ ├── 低俗小说 .json │ │ ├── 侧耳倾听 .json │ │ ├── 借东西的小人阿莉埃蒂 .json │ │ ├── 倩女幽魂 .json │ │ ├── 偷拐抢骗 .json │ │ ├── 傲慢与偏见 .json │ │ ├── 入殓师 .json │ │ ├── 再次出发之纽约遇见你 .json │ │ ├── 冰川时代 .json │ │ ├── 初恋这件小事 .json │ │ ├── 剪刀手爱德华 .json │ │ ├── 加勒比海盗 .json │ │ ├── 勇士 .json │ │ ├── 勇敢的心 .json │ │ ├── 勇闯夺命岛 .json │ │ ├── 十二怒汉 .json │ │ ├── 千与千寻 .json │ │ ├── 千钧一发 .json │ │ ├── 卡萨布兰卡 .json │ │ ├── 卢旺达饭店 .json │ │ ├── 发条橙 .json │ │ ├── 变脸 .json │ │ ├── 叫我第一名 .json │ │ ├── 可可西里 .json │ │ ├── 告白 .json │ │ ├── 哈利·波特与死亡圣器(下) .json │ │ ├── 哈利·波特与魔法石 .json │ │ ├── 哈尔的移动城堡 .json │ │ ├── 哪吒闹海 .json │ │ ├── 唐伯虎点秋香 .json │ │ ├── 喜剧之王 .json │ │ ├── 喜宴 .json │ │ ├── 国王的演讲 .json │ │ ├── 地球上的星星 .json │ │ ├── 城市之光 .json │ │ ├── 夜访吸血鬼 .json │ │ ├── 大卫·戈尔的一生 .json │ │ ├── 大话西游之大圣娶亲 .json │ │ ├── 大话西游之月光宝盒 .json │ │ ├── 大闹天宫 .json │ │ ├── 大鱼 .json │ │ ├── 天使爱美丽 .json │ │ ├── 天堂电影院 .json │ │ ├── 天空之城 .json │ │ ├── 头脑特工队 .json │ │ ├── 完美的世界 .json │ │ ├── 寿司之神 .json │ │ ├── 射雕英雄传之东成西就 .json │ │ ├── 小森林 冬春篇 .json │ │ ├── 小森林 夏秋篇 .json │ │ ├── 小鞋子 .json │ │ ├── 少年派的奇幻漂流 .json │ │ ├── 岁月神偷 .json │ │ ├── 巴黎淘气帮 .json │ │ ├── 布达佩斯大饭店 .json │ │ ├── 幸福终点站 .json │ │ ├── 幽灵公主 .json │ │ ├── 廊桥遗梦 .json │ │ ├── 当幸福来敲门 .json │ │ ├── 彗星来的那一夜 .json │ │ ├── 心灵捕手 .json │ │ ├── 心迷宫 .json │ │ ├── 忠犬八公物语 .json │ │ ├── 忠犬八公的故事 .json │ │ ├── 怦然心动 .json │ │ ├── 怪兽电力公司 .json │ │ ├── 恋恋笔记本 .json │ │ ├── 恐怖游轮 .json │ │ ├── 恐怖直播 .json │ │ ├── 情书 .json │ │ ├── 惊魂记 .json │ │ ├── 我是山姆 .json │ │ ├── 我爱你 .json │ │ ├── 战争之王 .json │ │ ├── 房间 .json │ │ ├── 拯救大兵瑞恩 .json │ │ ├── 指环王1:魔戒再现 .json │ │ ├── 指环王2:双塔奇兵 .json │ │ ├── 指环王3:王者无敌 .json │ │ ├── 控方证人 .json │ │ ├── 搏击俱乐部 .json │ │ ├── 摩登时代 .json │ │ ├── 撞车 .json │ │ ├── 放牛班的春天.json │ │ ├── 放牛班的春天 .json │ │ ├── 教父2 .json │ │ ├── 教父3 .json │ │ ├── 教父 .json │ │ ├── 断背山 .json │ │ ├── 新龙门客栈 .json │ │ ├── 无敌破坏王 .json │ │ ├── 无耻混蛋 .json │ │ ├── 无间道 .json │ │ ├── 时空恋旅人 .json │ │ ├── 星际穿越 .json │ │ ├── 春光乍泄 .json │ │ ├── 暖暖内含光 .json │ │ ├── 曾经 .json │ │ ├── 月球 .json │ │ ├── 朗读者 .json │ │ ├── 未麻的部屋 .json │ │ ├── 末代皇帝 .json │ │ ├── 末路狂花 .json │ │ ├── 本杰明·巴顿奇事 .json │ │ ├── 机器人总动员 .json │ │ ├── 杀人回忆 .json │ │ ├── 枪火 .json │ │ ├── 梦之安魂曲 .json │ │ ├── 楚门的世界 .json │ │ ├── 模仿游戏 .json │ │ ├── 步履不停 .json │ │ ├── 死亡诗社 .json │ │ ├── 沉默的羔羊 .json │ │ ├── 泰坦尼克号 .json │ │ ├── 活着 .json │ │ ├── 浪潮 .json │ │ ├── 海上钢琴师 .json │ │ ├── 海洋 .json │ │ ├── 海盗电台 .json │ │ ├── 海豚湾 .json │ │ ├── 海边的曼彻斯特 .json │ │ ├── 消失的爱人 .json │ │ ├── 源代码 .json │ │ ├── 熔炉 .json │ │ ├── 燃情岁月 .json │ │ ├── 燕尾蝶 .json │ │ ├── 爆裂鼓手 .json │ │ ├── 爱·回家 .json │ │ ├── 爱在午夜降临前 .json │ │ ├── 爱在日落黄昏时 .json │ │ ├── 爱在黎明破晓前 .json │ │ ├── 牯岭街少年杀人事件 .json │ │ ├── 狩猎 .json │ │ ├── 狮子王 .json │ │ ├── 猜火车 .json │ │ ├── 猫鼠游戏 .json │ │ ├── 玛丽和马克思 .json │ │ ├── 玩具总动员3 .json │ │ ├── 甜蜜蜜 .json │ │ ├── 电锯惊魂 .json │ │ ├── 疯狂动物城 .json │ │ ├── 疯狂原始人 .json │ │ ├── 疯狂的石头 .json │ │ ├── 盗梦空间 .json │ │ ├── 看不见的客人 .json │ │ ├── 真爱至上 .json │ │ ├── 碧海蓝天 .json │ │ ├── 神偷奶爸 .json │ │ ├── 禁闭岛 .json │ │ ├── 秒速5厘米 .json │ │ ├── 穆赫兰道 .json │ │ ├── 穿条纹睡衣的男孩 .json │ │ ├── 穿越时空的少女 .json │ │ ├── 窃听风暴 .json │ │ ├── 第六感 .json │ │ ├── 素媛 .json │ │ ├── 红辣椒 .json │ │ ├── 纵横四海 .json │ │ ├── 终结者2:审判日 .json │ │ ├── 绿里奇迹 .json │ │ ├── 罗生门 .json │ │ ├── 罗马假日 .json │ │ ├── 美丽人生 .json │ │ ├── 美丽心灵 .json │ │ ├── 美国丽人 .json │ │ ├── 美国往事 .json │ │ ├── 肖申克的救赎 .json │ │ ├── 致命ID .json │ │ ├── 致命魔术 .json │ │ ├── 花样年华 .json │ │ ├── 英国病人 .json │ │ ├── 英雄本色 .json │ │ ├── 荒岛余生 .json │ │ ├── 荒蛮故事 .json │ │ ├── 荒野生存 .json │ │ ├── 菊次郎的夏天 .json │ │ ├── 萤火之森 .json │ │ ├── 萤火虫之墓 .json │ │ ├── 蓝色大门 .json │ │ ├── 虎口脱险 .json │ │ ├── 蝙蝠侠:黑暗骑士 .json │ │ ├── 蝙蝠侠:黑暗骑士崛起 .json │ │ ├── 蝴蝶 .json │ │ ├── 蝴蝶效应 .json │ │ ├── 血钻 .json │ │ ├── 被嫌弃的松子的一生 .json │ │ ├── 被解救的姜戈 .json │ │ ├── 西西里的美丽传说 .json │ │ ├── 触不可及 .json │ │ ├── 让子弹飞 .json │ │ ├── 记忆碎片 .json │ │ ├── 谍影重重2 .json │ │ ├── 谍影重重3 .json │ │ ├── 谍影重重 .json │ │ ├── 贫民窟的百万富翁 .json │ │ ├── 超能陆战队 .json │ │ ├── 超脱 .json │ │ ├── 辛德勒的名单 .json │ │ ├── 辩护人 .json │ │ ├── 达拉斯买家俱乐部 .json │ │ ├── 迁徙的鸟 .json │ │ ├── 这个杀手不太冷 .json │ │ ├── 这个男人来自地球 .json │ │ ├── 追随 .json │ │ ├── 遗愿清单 .json │ │ ├── 重庆森林 .json │ │ ├── 钢琴家 .json │ │ ├── 闻香识女人 .json │ │ ├── 阳光姐妹淘 .json │ │ ├── 阳光灿烂的日子 .json │ │ ├── 阿凡达 .json │ │ ├── 阿甘正传 .json │ │ ├── 阿飞正传 .json │ │ ├── 雨中曲 .json │ │ ├── 雨人 .json │ │ ├── 霸王别姬 .json │ │ ├── 青蛇 .json │ │ ├── 非常嫌疑犯 .json │ │ ├── 音乐之声 .json │ │ ├── 风之谷 .json │ │ ├── 飞屋环游记 .json │ │ ├── 飞越疯人院 .json │ │ ├── 饮食男女 .json │ │ ├── 香水 .json │ │ ├── 驯龙高手 .json │ │ ├── 鬼子来了 .json │ │ ├── 魂断蓝桥 .json │ │ ├── 魔女宅急便 .json │ │ ├── 麦兜故事 .json │ │ ├── 黄金三镖客 .json │ │ ├── 黑天鹅 .json │ │ ├── 黑客帝国3:矩阵革命 .json │ │ ├── 黑客帝国 .json │ │ └── 龙猫 .json │ ├── doubanTop250.json │ └── 豆瓣电影250.xls ├── doubanspider.py └── panspider.py ├── douyu ├── douyu_danmu.py └── douyu_test.py ├── gamedownload ├── fcspider.py └── readme.md ├── ithome ├── apple.json ├── city_count_res.py ├── config.py ├── datahandleer.py ├── pipeline.py └── spider.py ├── mazhifu ├── config.py ├── readme.md ├── requirements.txt └── spider.py ├── p2p ├── .vscode │ └── launch.json ├── data │ ├── 1.json │ ├── 10.json │ ├── 100.json │ ├── 101.json │ ├── 102.json │ ├── 103.json │ ├── 104.json │ ├── 105.json │ ├── 106.json │ ├── 107.json │ ├── 108.json │ ├── 109.json │ ├── 11.json │ ├── 110.json │ ├── 111.json │ ├── 112.json │ ├── 113.json │ ├── 114.json │ ├── 115.json │ ├── 116.json │ ├── 117.json │ ├── 118.json │ ├── 119.json │ ├── 12.json │ ├── 120.json │ ├── 121.json │ ├── 122.json │ ├── 123.json │ ├── 124.json │ ├── 125.json │ ├── 126.json │ ├── 127.json │ ├── 128.json │ ├── 129.json │ ├── 13.json │ ├── 130.json │ ├── 131.json │ ├── 132.json │ ├── 133.json │ ├── 134.json │ ├── 135.json │ ├── 136.json │ ├── 137.json │ ├── 138.json │ ├── 139.json │ ├── 14.json │ ├── 140.json │ ├── 141.json │ ├── 142.json │ ├── 143.json │ ├── 144.json │ ├── 145.json │ ├── 146.json │ ├── 147.json │ ├── 148.json │ ├── 149.json │ ├── 15.json │ ├── 150.json │ ├── 151.json │ ├── 152.json │ ├── 153.json │ ├── 154.json │ ├── 155.json │ ├── 156.json │ ├── 157.json │ ├── 158.json │ ├── 159.json │ ├── 16.json │ ├── 160.json │ ├── 161.json │ ├── 162.json │ ├── 163.json │ ├── 164.json │ ├── 165.json │ ├── 166.json │ ├── 167.json │ ├── 168.json │ ├── 169.json │ ├── 17.json │ ├── 170.json │ ├── 171.json │ ├── 172.json │ ├── 173.json │ ├── 174.json │ ├── 175.json │ ├── 176.json │ ├── 177.json │ ├── 178.json │ ├── 179.json │ ├── 18.json │ ├── 180.json │ ├── 181.json │ ├── 182.json │ ├── 183.json │ ├── 184.json │ ├── 185.json │ ├── 186.json │ ├── 187.json │ ├── 188.json │ ├── 189.json │ ├── 19.json │ ├── 190.json │ ├── 191.json │ ├── 192.json │ ├── 193.json │ ├── 194.json │ ├── 195.json │ ├── 196.json │ ├── 197.json │ ├── 198.json │ ├── 199.json │ ├── 2.json │ ├── 20.json │ ├── 200.json │ ├── 201.json │ ├── 202.json │ ├── 203.json │ ├── 204.json │ ├── 205.json │ ├── 206.json │ ├── 207.json │ ├── 208.json │ ├── 209.json │ ├── 21.json │ ├── 210.json │ ├── 211.json │ ├── 212.json │ ├── 213.json │ ├── 214.json │ ├── 215.json │ ├── 216.json │ ├── 217.json │ ├── 218.json │ ├── 219.json │ ├── 22.json │ ├── 220.json │ ├── 221.json │ ├── 222.json │ ├── 23.json │ ├── 24.json │ ├── 25.json │ ├── 26.json │ ├── 27.json │ ├── 28.json │ ├── 29.json │ ├── 3.json │ ├── 30.json │ ├── 31.json │ ├── 32.json │ ├── 33.json │ ├── 34.json │ ├── 35.json │ ├── 36.json │ ├── 37.json │ ├── 38.json │ ├── 39.json │ ├── 4.json │ ├── 40.json │ ├── 41.json │ ├── 42.json │ ├── 43.json │ ├── 44.json │ ├── 45.json │ ├── 46.json │ ├── 47.json │ ├── 48.json │ ├── 49.json │ ├── 5.json │ ├── 50.json │ ├── 51.json │ ├── 52.json │ ├── 53.json │ ├── 54.json │ ├── 55.json │ ├── 56.json │ ├── 57.json │ ├── 58.json │ ├── 59.json │ ├── 6.json │ ├── 60.json │ ├── 61.json │ ├── 62.json │ ├── 63.json │ ├── 64.json │ ├── 65.json │ ├── 66.json │ ├── 67.json │ ├── 68.json │ ├── 69.json │ ├── 7.json │ ├── 70.json │ ├── 71.json │ ├── 72.json │ ├── 73.json │ ├── 74.json │ ├── 75.json │ ├── 76.json │ ├── 77.json │ ├── 78.json │ ├── 79.json │ ├── 8.json │ ├── 80.json │ ├── 81.json │ ├── 82.json │ ├── 83.json │ ├── 84.json │ ├── 85.json │ ├── 86.json │ ├── 87.json │ ├── 88.json │ ├── 89.json │ ├── 9.json │ ├── 90.json │ ├── 91.json │ ├── 92.json │ ├── 93.json │ ├── 94.json │ ├── 95.json │ ├── 96.json │ ├── 97.json │ ├── 98.json │ └── 99.json ├── json │ ├── 1.json │ ├── 10.json │ ├── 100.json │ ├── 101.json │ ├── 102.json │ ├── 103.json │ ├── 104.json │ ├── 105.json │ ├── 106.json │ ├── 107.json │ ├── 108.json │ ├── 109.json │ ├── 11.json │ ├── 110.json │ ├── 111.json │ ├── 112.json │ ├── 113.json │ ├── 114.json │ ├── 115.json │ ├── 116.json │ ├── 117.json │ ├── 118.json │ ├── 119.json │ ├── 12.json │ ├── 120.json │ ├── 121.json │ ├── 122.json │ ├── 123.json │ ├── 124.json │ ├── 125.json │ ├── 126.json │ ├── 127.json │ ├── 128.json │ ├── 129.json │ ├── 13.json │ ├── 130.json │ ├── 131.json │ ├── 132.json │ ├── 133.json │ ├── 134.json │ ├── 135.json │ ├── 136.json │ ├── 137.json │ ├── 138.json │ ├── 139.json │ ├── 14.json │ ├── 140.json │ ├── 141.json │ ├── 142.json │ ├── 143.json │ ├── 144.json │ ├── 145.json │ ├── 146.json │ ├── 147.json │ ├── 148.json │ ├── 149.json │ ├── 15.json │ ├── 150.json │ ├── 151.json │ ├── 152.json │ ├── 153.json │ ├── 154.json │ ├── 155.json │ ├── 156.json │ ├── 157.json │ ├── 158.json │ ├── 159.json │ ├── 16.json │ ├── 160.json │ ├── 161.json │ ├── 162.json │ ├── 163.json │ ├── 164.json │ ├── 165.json │ ├── 166.json │ ├── 167.json │ ├── 168.json │ ├── 169.json │ ├── 17.json │ ├── 170.json │ ├── 171.json │ ├── 172.json │ ├── 173.json │ ├── 174.json │ ├── 175.json │ ├── 176.json │ ├── 177.json │ ├── 178.json │ ├── 179.json │ ├── 18.json │ ├── 180.json │ ├── 181.json │ ├── 182.json │ ├── 183.json │ ├── 184.json │ ├── 185.json │ ├── 186.json │ ├── 187.json │ ├── 188.json │ ├── 189.json │ ├── 19.json │ ├── 190.json │ ├── 191.json │ ├── 192.json │ ├── 193.json │ ├── 194.json │ ├── 195.json │ ├── 196.json │ ├── 197.json │ ├── 198.json │ ├── 199.json │ ├── 2.json │ ├── 20.json │ ├── 200.json │ ├── 201.json │ ├── 202.json │ ├── 203.json │ ├── 204.json │ ├── 205.json │ ├── 206.json │ ├── 207.json │ ├── 208.json │ ├── 209.json │ ├── 21.json │ ├── 210.json │ ├── 211.json │ ├── 212.json │ ├── 213.json │ ├── 214.json │ ├── 215.json │ ├── 216.json │ ├── 217.json │ ├── 218.json │ ├── 219.json │ ├── 22.json │ ├── 220.json │ ├── 221.json │ ├── 222.json │ ├── 23.json │ ├── 24.json │ ├── 25.json │ ├── 26.json │ ├── 27.json │ ├── 28.json │ ├── 29.json │ ├── 3.json │ ├── 30.json │ ├── 31.json │ ├── 32.json │ ├── 33.json │ ├── 34.json │ ├── 35.json │ ├── 36.json │ ├── 37.json │ ├── 38.json │ ├── 39.json │ ├── 4.json │ ├── 40.json │ ├── 41.json │ ├── 42.json │ ├── 43.json │ ├── 44.json │ ├── 45.json │ ├── 46.json │ ├── 47.json │ ├── 48.json │ ├── 49.json │ ├── 5.json │ ├── 50.json │ ├── 51.json │ ├── 52.json │ ├── 53.json │ ├── 54.json │ ├── 55.json │ ├── 56.json │ ├── 57.json │ ├── 58.json │ ├── 59.json │ ├── 6.json │ ├── 60.json │ ├── 61.json │ ├── 62.json │ ├── 63.json │ ├── 64.json │ ├── 65.json │ ├── 66.json │ ├── 67.json │ ├── 68.json │ ├── 69.json │ ├── 7.json │ ├── 70.json │ ├── 71.json │ ├── 72.json │ ├── 73.json │ ├── 74.json │ ├── 75.json │ ├── 76.json │ ├── 77.json │ ├── 78.json │ ├── 79.json │ ├── 8.json │ ├── 80.json │ ├── 81.json │ ├── 82.json │ ├── 83.json │ ├── 84.json │ ├── 85.json │ ├── 86.json │ ├── 87.json │ ├── 88.json │ ├── 89.json │ ├── 9.json │ ├── 90.json │ ├── 91.json │ ├── 92.json │ ├── 93.json │ ├── 94.json │ ├── 95.json │ ├── 96.json │ ├── 97.json │ ├── 98.json │ └── 99.json └── p2p_spider.py ├── requestes基本使用 ├── 001.py ├── 002.py ├── login.py └── test.py ├── sougou ├── configs.py ├── jiebao.py ├── spider │ ├── log_SougouDownloader.log.20171118 │ ├── log_SougouSpider.log.20171118 │ └── spider.py ├── store_new │ ├── __init__.py │ └── stroe.py └── utils │ ├── __init__.py │ └── tools.py ├── toapi-91baby ├── .gitignore ├── app.py ├── data.sqlite ├── items │ ├── __init__.py │ ├── book.py │ ├── hotbook.py │ └── search.py ├── settings.py ├── test.py └── wsgi.py ├── wenjuanxin ├── configs.py └── spider.py ├── zhihu └── zhihu_easy │ ├── __init__.py │ ├── client.py │ ├── configs.py │ ├── db_tools.py │ ├── parse.py │ ├── playdata.py │ ├── spider.py │ └── tools.py ├── 悦美整形 └── spider.py ├── 浏览器模拟爬虫 ├── 001.py ├── baidu.py ├── kuaiproxy.py ├── manhua.py └── manhua_mutiprocessing.py ├── 美食杰 └── spider.py ├── 腾讯漫画 ├── 632784.json ├── downloder.py ├── one.json └── spider.py └── 豆瓣影评 └── 锤神3 ├── config.py ├── play_data.py ├── spider.py └── stroe.py /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | env/ 12 | build/ 13 | develop-eggs/ 14 | dist/ 15 | downloads/ 16 | eggs/ 17 | .eggs/ 18 | lib/ 19 | lib64/ 20 | parts/ 21 | sdist/ 22 | var/ 23 | *.egg-info/ 24 | .installed.cfg 25 | *.egg 26 | 27 | # PyInstaller 28 | # Usually these files are written by a python script from a template 29 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 30 | *.manifest 31 | *.spec 32 | 33 | # Installer logs 34 | pip-log.txt 35 | pip-delete-this-directory.txt 36 | 37 | # Unit test / coverage reports 38 | htmlcov/ 39 | .tox/ 40 | .coverage 41 | .coverage.* 42 | .cache 43 | nosetests.xml 44 | coverage.xml 45 | *,cover 46 | .hypothesis/ 47 | 48 | # Translations 49 | *.mo 50 | *.pot 51 | 52 | # Django stuff: 53 | *.log 54 | local_settings.py 55 | 56 | # Flask stuff: 57 | instance/ 58 | .webassets-cache 59 | 60 | # Scrapy stuff: 61 | .scrapy 62 | 63 | # Sphinx documentation 64 | docs/_build/ 65 | 66 | # PyBuilder 67 | target/ 68 | 69 | # IPython Notebook 70 | .ipynb_checkpoints 71 | 72 | # pyenv 73 | .python-version 74 | 75 | # celery beat schedule file 76 | celerybeat-schedule 77 | 78 | # dotenv 79 | .env 80 | 81 | # virtualenv 82 | venv/ 83 | ENV/ 84 | 85 | # Spyder project settings 86 | .spyderproject 87 | 88 | # Rope project settings 89 | .ropeproject 90 | 91 | # mac 92 | .DS_Store 93 | 94 | # vs code 95 | settings.json 96 | -------------------------------------------------------------------------------- /Beautiful Soup 爬虫/001.py: -------------------------------------------------------------------------------- 1 | ''' 2 | 通过一些小例子来了解bs4库的基本使用, 3 | 本篇为lxml解析器的使用 4 | 解析的html为当前目录下的demo.html 5 | ''' 6 | import bs4 7 | 8 | 9 | #首先我们先将html文件已lxml的方式做成一锅汤 10 | soup = bs4.BeautifulSoup(open('Beautiful Soup 爬虫/demo.html'),'lxml') 11 | 12 | #我们把结果输出一下,是一个很清晰的树形结构。 13 | #print(soup.prettify()) 14 | 15 | ''' 16 | OUT: 17 | 18 | 19 | 20 | 21 | The Dormouse's story 22 | 23 | 24 | 25 |

26 | 27 | The Dormouse's story 28 | 29 |

30 |

31 | Once upon a time there were three little sisters; and their names were 32 | 33 | Elsie 34 | 35 | , 36 | 37 | Lacie 38 | 39 | and 40 | 41 | Tillie 42 | 43 | ; 44 | and they lived at the bottom of a well. 45 |

46 |

47 | ... 48 |

49 | 50 | 51 | ''' -------------------------------------------------------------------------------- /Beautiful Soup 爬虫/002.py: -------------------------------------------------------------------------------- 1 | ''' 2 | 简单的bs4库的使用 3 | ''' 4 | 5 | import bs4 6 | 7 | 8 | #首先我们先将html文件已lxml的方式做成一锅汤 9 | soup = bs4.BeautifulSoup(open('Beautiful Soup 爬虫/demo.html'),'lxml') 10 | 11 | #找到head标签里的内容 12 | #print (soup.head) 13 | 14 | #找到所有的text内容 15 | #print (soup.text) 16 | 17 | #找到第一个a标签 18 | #print (soup.a) 19 | 20 | #找到所有a标签 21 | #print (soup.find_all('a')) 22 | 23 | #找到a标签下的所有子节点,一列表方式返回 24 | #print(soup.a.contents) 25 | 26 | #通过.children生成器,我们可以循环迭代出每一个子节点 27 | #tag = soup.body 28 | #for s in tag.children: 29 | # print(s) 30 | 31 | #通过迭代找到所的string 32 | for i in soup.strings: 33 | print(i) -------------------------------------------------------------------------------- /Beautiful Soup 爬虫/003.py: -------------------------------------------------------------------------------- 1 | ''' 2 | 通过Python来学习re库 --- 正则表达式相关内容 3 | 4 | ''' 5 | 6 | import re 7 | 8 | test = 'python is the best language , pretty good !' 9 | 10 | p = re.findall('p+',test) 11 | 12 | print(p) 13 | 14 | 15 | ''' 16 | re.search(pattern, string, flags=0) 17 | 在一个字符串中搜索匹配正则表达式的第一个位置 18 | 返回match对象 19 | ∙ pattern : 正则表达式的字符串或原生字符串表示 20 | ∙ string : 待匹配字符串 21 | ∙ flags : 正则表达式使用时的控制标记 22 | ''' 23 | 24 | str1 = 'hello , world ,life is short ,use Python .WHAT? ' 25 | 26 | a = re.search(r'\w+',str1) 27 | print(a.group()) # hello 28 | 29 | b = re.search(r'w.+D',str1,re.I) 30 | print(b.group()) 31 | 32 | 33 | ''' 34 | re.findall(pattern, string, flags=0) 35 | 搜索字符串,以列表类型返回全部能匹配的子串 36 | ∙ pattern : 正则表达式的字符串或原生字符串表示 37 | ∙ string : 待匹配字符串 38 | ∙ flags : 正则表达式使用时的控制标记 39 | ''' 40 | 41 | c = re.findall(r'\w+',str1) 42 | print (c) 43 | #['hello', 'world', 'life', 'is', 'short', 'use', 'Python', 'WHAT'] 44 | 45 | str2 = 'hssso' 46 | re1 = re.compile(r'h.{3}o') 47 | print(re1.findall(str1)) 48 | print(re1.findall(str2)) 49 | # ['hello'] 50 | #['hssso'] 51 | 52 | ''' 53 | match 对象的属性 54 | 55 | .string : 待匹配的文本 56 | .re : 匹配时使用的patter对象(正则表达式) 57 | .pos : 正则表达式搜索文本的开始位置 58 | .endpos : 正则表达式搜索文本的结束位置 59 | ''' 60 | d = re.search(r'e.+d',str1) 61 | print(d.group()) # ello , world 62 | print (d.string) # hello , world ,life is short ,use Python .WHAT? 63 | print (d.re) # re.compile('e.+d') 64 | print (d.pos) # 0 65 | print (d.endpos) # 48 66 | -------------------------------------------------------------------------------- /Beautiful Soup 爬虫/baidutieba.py: -------------------------------------------------------------------------------- 1 | ''' 2 | 抓取百度贴吧---生活大爆炸吧的基本内容 3 | 爬虫线路: requests - bs4 4 | Python版本: 3.6 5 | OS: mac os 12.12.4 6 | ''' 7 | 8 | import requests 9 | import time 10 | from bs4 import BeautifulSoup 11 | 12 | # 首先我们写好抓取网页的函数 13 | 14 | 15 | def get_html(url): 16 | try: 17 | r = requests.get(url, timeout=30) 18 | r.raise_for_status() 19 | # 这里我们知道百度贴吧的编码是utf-8,所以手动设置的。爬去其他的页面时建议使用: 20 | # r.encoding = r.apparent_encoding 21 | r.encoding = 'utf-8' 22 | return r.text 23 | except: 24 | return " ERROR " 25 | 26 | 27 | def get_content(url): 28 | ''' 29 | 分析贴吧的网页文件,整理信息,保存在列表变量中 30 | ''' 31 | 32 | # 初始化一个列表来保存所有的帖子信息: 33 | comments = [] 34 | # 首先,我们把需要爬取信息的网页下载到本地 35 | html = get_html(url) 36 | 37 | # 我们来做一锅汤 38 | soup = BeautifulSoup(html, 'lxml') 39 | 40 | # 按照之前的分析,我们找到所有具有‘ j_thread_list clearfix’属性的li标签。返回一个列表类型。 41 | liTags = soup.find_all('li', attrs={'class': ' j_thread_list clearfix'}) 42 | 43 | # 通过循环找到每个帖子里的我们需要的信息: 44 | for li in liTags: 45 | # 初始化一个字典来存储文章信息 46 | comment = {} 47 | # 这里使用一个try except 防止爬虫找不到信息从而停止运行 48 | try: 49 | # 开始筛选信息,并保存到字典中 50 | comment['title'] = li.find( 51 | 'a', attrs={'class': 'j_th_tit '}).text.strip() 52 | comment['last_reply_data'] = li.find('span',attrs={'class':'threadlist_reply_date pull_right j_reply_data'}).text.strip() 53 | comments.append(comment) 54 | except: 55 | print('出了点小问题') 56 | 57 | return comments 58 | 59 | 60 | def Out2File(dict): 61 | ''' 62 | 将爬取到的文件写入到本地 63 | 保存到当前目录的 TTBT.txt文件中。 64 | 65 | ''' 66 | with open('TTBT.txt', 'a+') as f: 67 | for comment in dict: 68 | f.write('标题: {} \t 链接:{} \t 发帖人:{} \t 发帖时间:{} \t 回复数量: {} \n'.format( 69 | comment['title'], comment['last_reply_data'])) 70 | 71 | print('当前页面爬取完成') 72 | 73 | 74 | def main(base_url, deep): 75 | url_list = [] 76 | # 将所有需要爬去的url存入列表 77 | for i in range(0, deep): 78 | url_list.append(base_url + '&pn=' + str(50 * i)) 79 | print('所有的网页已经下载到本地! 开始筛选信息。。。。') 80 | 81 | # 循环写入所有的数据 82 | for url in url_list: 83 | content = get_content(url) 84 | Out2File(content) 85 | print('所有的信息都已经保存完毕!') 86 | 87 | 88 | base_url = 'http://tieba.baidu.com/f?kw=%E7%94%9F%E6%B4%BB%E5%A4%A7%E7%88%86%E7%82%B8&ie=utf-8' 89 | # 设置需要爬取的页码数量 90 | deep = 1 91 | 92 | if __name__ == '__main__': 93 | main(base_url, deep) 94 | -------------------------------------------------------------------------------- /Beautiful Soup 爬虫/bocai.py: -------------------------------------------------------------------------------- 1 | ''' 2 | 爬取Dota菠菜结果信息 3 | 使用 requests --- bs4 线路 4 | Python版本: 3.6 5 | OS: mac os 12.12.4 6 | ''' 7 | 8 | import requests 9 | import bs4 10 | 11 | def get_html(url): 12 | try: 13 | r = requests.get(url, timeout=30) 14 | r.raise_for_status() 15 | r.encoding = r.apparent_encoding 16 | return r.text 17 | except: 18 | return " ERROR " 19 | 20 | def print_result(url): 21 | ''' 22 | 查询比赛结果,并格式化输出! 23 | ''' 24 | html = get_html(url) 25 | soup = bs4.BeautifulSoup(html,'lxml') 26 | match_list = soup.find_all('div', attrs={'class': 'matchmain bisai_qukuai'}) 27 | for match in match_list: 28 | time = match.find('div', attrs={'class': 'whenm'}).text.strip() 29 | teamname = match.find_all('span', attrs={'class': 'team_name'}) 30 | 31 | 32 | #由于网站的构造问题,队名有的时候会不显示,所以我们需要过滤掉一些注释,方法如下: 33 | if teamname[0].string[0:3] == 'php': 34 | team1_name = "暂无队名" 35 | else: 36 | team1_name = teamname[0].string 37 | 38 | # 这里我们采用了css选择器:比原来的属性选择更加方便 39 | team1_support_level = match.find('span', class_='team_number_green').string 40 | 41 | team2_name = teamname[1].string 42 | team2_support_level = match.find('span', class_='team_number_red').string 43 | 44 | print('比赛时间:{},\n 队伍一:{} 胜率 {}\n 队伍二:{} 胜率 {} \n'.format(time,team1_name,team1_support_level,team2_name,team2_support_level)) 45 | 46 | 47 | 48 | def main(): 49 | url= 'http://dota2bocai.com/match' 50 | print_result(url) 51 | 52 | if __name__ == '__main__': 53 | main() -------------------------------------------------------------------------------- /Beautiful Soup 爬虫/demo.html: -------------------------------------------------------------------------------- 1 | 2 | The Dormouse's story 3 | 4 |

The Dormouse's story

5 | 6 |

Once upon a time there were three little sisters; and their names were 7 | Elsie, 8 | Lacie and 9 | Tillie; 10 | and they lived at the bottom of a well.

11 | 12 |

...

13 | -------------------------------------------------------------------------------- /Beautiful Soup 爬虫/dianying.py: -------------------------------------------------------------------------------- 1 | ''' 2 | 爬取最新电影排行榜单 3 | url:http://dianying.2345.com/top/ 4 | 使用 requests --- bs4 线路 5 | Python版本: 3.6 6 | OS: mac os 12.12.4 7 | ''' 8 | 9 | import requests 10 | import bs4 11 | 12 | 13 | def get_html(url): 14 | try: 15 | r = requests.get(url, timeout=30) 16 | r.raise_for_status 17 | # 该网站采用gbk编码! 18 | r.encoding = 'gbk' 19 | return r.text 20 | except: 21 | return "someting wrong" 22 | 23 | 24 | def get_content(url): 25 | html = get_html(url) 26 | soup = bs4.BeautifulSoup(html, 'lxml') 27 | 28 | # 找到电影排行榜的ul列表 29 | movies_list = soup.find('ul', class_='picList clearfix') 30 | movies = movies_list.find_all('li') 31 | 32 | for top in movies: 33 | #找到图片连接, 34 | img_url=top.find('img')['src'] 35 | 36 | 37 | name = top.find('span',class_='sTit').a.text 38 | #这里做一个异常捕获,防止没有上映时间的出现 39 | try: 40 | time = top.find('span',class_='sIntro').text 41 | except: 42 | time = "暂无上映时间" 43 | 44 | #这里用bs4库迭代找出“pACtor”的所有子孙节点,即每一位演员解决了名字分割的问题 45 | actors = top.find('p',class_='pActor') 46 | actor= '' 47 | for act in actors.contents: 48 | actor = actor + act.string +' ' 49 | #找到影片简介 50 | intro = top.find('p',class_='pTxt pIntroShow').text 51 | 52 | print("片名:{}\t{}\n{}\n{} \n \n ".format(name,time,actor,intro) ) 53 | 54 | #我们来吧图片下载下来: 55 | with open('/Users/ehco/Desktop/img/'+name+'.png','wb+') as f: 56 | f.write(requests.get(img_url).content) 57 | 58 | 59 | def main(): 60 | url = 'http://dianying.2345.com/top/' 61 | get_content(url) 62 | 63 | if __name__ == "__main__": 64 | main() -------------------------------------------------------------------------------- /Beautiful Soup 爬虫/kouhong.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Author Ehco1996 3 | Time 2017-11-10 4 | 5 | 如何暗示男朋友给自己买火红 6 | ''' 7 | 8 | from bs4 import BeautifulSoup 9 | import requests 10 | import os 11 | 12 | 13 | def get_html_text(url): 14 | try: 15 | r = requests.get(url, timeout=10) 16 | r.raise_for_status 17 | return r.text 18 | except: 19 | return -1 20 | 21 | 22 | def parse_img(html): 23 | data = [] 24 | soup = BeautifulSoup(html, 'lxml') 25 | img_list = soup.find_all('img') 26 | for img in img_list: 27 | data.append({ 28 | 'src': img['src'], 29 | 'name': img['alt'].replace(' ', '').replace('/', '') 30 | }) 31 | return data 32 | 33 | 34 | def get_img_response(url): 35 | try: 36 | r = requests.get(url, stream=True) 37 | r.raise_for_status 38 | return r.content 39 | except: 40 | return -1 41 | 42 | 43 | def download_img(data): 44 | curr_dir = os.path.dirname(os.path.realpath(__file__)) + '/img/' 45 | if not os.path.exists('img'): 46 | os.mkdir('img') 47 | for img in data: 48 | path = os.path.join(curr_dir, img['name'] + '.jpg') 49 | with open(path, 'wb') as f: 50 | f.write(get_img_response(img['src'])) 51 | 52 | 53 | def main(): 54 | html = get_html_text( 55 | 'https://www.1688.com/pic/-.html?spm=a261b.8768355.searchbar.5.oUjRZK&keywords=%BF%DA%BA%EC') 56 | if html != -1: 57 | img_data = parse_img(html) 58 | download_img(img_data) 59 | 60 | 61 | if __name__ == '__main__': 62 | main() 63 | -------------------------------------------------------------------------------- /Beautiful Soup 爬虫/parse_station.py: -------------------------------------------------------------------------------- 1 | import requests 2 | import re 3 | 4 | #关闭https证书验证警告 5 | requests.packages.urllib3.disable_warnings() 6 | 7 | url = 'https://kyfw.12306.cn/otn/resources/js/framework/station_name.js?station_version=1.9018' 8 | r = requests.get(url,verify=False) 9 | pattern = u'([\u4e00-\u9fa5]+)\|([A-Z]+)' 10 | result = re.findall(pattern,r.text) 11 | station = dict(result) 12 | 13 | print(station) -------------------------------------------------------------------------------- /Beautiful Soup 爬虫/qiubai.py: -------------------------------------------------------------------------------- 1 | import requests 2 | from bs4 import BeautifulSoup 3 | 4 | 5 | def get_html_text(url): 6 | try: 7 | r = requests.get(url, timeout=3) 8 | r.raise_for_status() 9 | r.encoding = r.apparent_encoding 10 | return r.text 11 | except: 12 | return 'something wrong' 13 | 14 | 15 | def get_jokes(url): 16 | ''' 17 | 返回当前url页面的糗百的 18 | 段子作者,主体,热评 19 | 返回类型:列表 20 | ''' 21 | joke_list = [] 22 | 23 | html = get_html_text(url) 24 | soup = BeautifulSoup(html, 'lxml') 25 | 26 | articles = soup.find_all('div', class_='article block untagged mb15') 27 | 28 | for article in articles: 29 | body = article.find('span').text 30 | author = article.find('img')['alt'] 31 | try: 32 | comment = article.find( 33 | 'div', class_='main-text').contents[0].replace('\n', '') 34 | except: 35 | comment = '暂时没有热评' 36 | 37 | joke = '作者:{}\n{}\n\n热评{}'.format(author, body, comment) 38 | joke_list.append(joke) 39 | 40 | return joke_list 41 | 42 | 43 | # test: 44 | 45 | url = 'https://www.qiushibaike.com/8hr' 46 | 47 | a = get_jokes(url) 48 | print(a[1]) 49 | -------------------------------------------------------------------------------- /Beautiful Soup 爬虫/反爬虫.py: -------------------------------------------------------------------------------- 1 | import requests 2 | import random 3 | 4 | 5 | def get_html(url): 6 | try: 7 | r = requests.get(url, timeout=30) 8 | print(r.headers) 9 | r.raise_for_status 10 | r.encoding = r.apparent_encoding 11 | 12 | return r.status_code 13 | except: 14 | return "Someting Wrong!" 15 | 16 | 17 | def get_agent(): 18 | ''' 19 | 模拟header的user-agent字段, 20 | 返回一个随机的user-agent字典类型的键值对 21 | ''' 22 | agents = ['Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; Trident/5.0;', 23 | 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.6; rv,2.0.1) Gecko/20100101 Firefox/4.0.1', 24 | 'Opera/9.80 (Macintosh; Intel Mac OS X 10.6.8; U; en) Presto/2.8.131 Version/11.11', 25 | 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_0) AppleWebKit/535.11 (KHTML, like Gecko) Chrome/17.0.963.56 Safari/535.11', 26 | 'Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; 360SE)'] 27 | fakeheader = {} 28 | fakeheader['User-agent'] = agents[random.randint(0, len(agents))] 29 | return fakeheader 30 | 31 | 32 | def get_proxy(): 33 | ''' 34 | 简答模拟代理池 35 | 返回一个字典类型的键值对, 36 | ''' 37 | proxy = ["http://116.211.143.11:80", 38 | "http://183.1.86.235:8118", 39 | "http://183.32.88.244:808", 40 | "http://121.40.42.35:9999", 41 | "http://222.94.148.210:808"] 42 | fakepxs = {} 43 | fakepxs['http'] = proxy[random.randint(0, len(proxy))] 44 | return fakepxs 45 | 46 | 47 | print(get_html('https://zhuanlan.zhihu.com')) -------------------------------------------------------------------------------- /Beautiful Soup 爬虫/沪铜数据.py: -------------------------------------------------------------------------------- 1 | ''' 2 | 获取新浪网 沪铜CUO历史交易记录 3 | 从1999-01-01 到 2017-06-15 4 | 网址:http://vip.stock.finance.sina.com.cn/q/view/vFutures_History.php?page=1&breed=CU0&start=1999-01-01&end=2017-06-15&jys=shfe&pz=CU&hy=CU0&type=inner&name=%A1%E4%A8%AE%26%23182%3B11109 5 | ''' 6 | 7 | import requests 8 | from bs4 import BeautifulSoup 9 | 10 | def get_html_text(url): 11 | try: 12 | r = requests.get(url,timeout=3) 13 | r.raise_for_status() 14 | r.encoding = r.apparent_encoding 15 | return r.text 16 | except: 17 | return 'something wrong' 18 | 19 | 20 | 21 | def get_one_data(url): 22 | data = [] 23 | html = get_html_text(url) 24 | soup = BeautifulSoup(html,'lxml') 25 | days = soup.find('div',class_='historyList') 26 | columns = days.find_all('tr') 27 | 28 | ''' 29 | test area: 30 | info = columns[2].find_all('td') 31 | date = info[0].text 32 | close_price = info[1].text 33 | print(date,close_price) 34 | ''' 35 | 36 | for i in range(2,len(columns)): 37 | try: 38 | info = columns[i].find_all('td') 39 | date = info[0].text 40 | close_price = info[1].text 41 | data.append(date+' : '+close_price) 42 | except: 43 | continue 44 | 45 | return data 46 | 47 | def W2File(data): 48 | with open('cuo_data.txt','a+') as f: 49 | for one in data: 50 | f.write(one+'\n') 51 | 52 | print('数据写入完毕!') 53 | 54 | #url = 'http://vip.stock.finance.sina.com.cn/q/view/vFutures_History.php?page=1&breed=CU0&start=1999-01-01&end=2017-06-15&jys=shfe&pz=CU&hy=CU0&type=inner&name=%A1%E4%A8%AE%26%23182%3B11109' 55 | urls = [] 56 | 57 | for i in range(1,77): 58 | urls.append('http://vip.stock.finance.sina.com.cn/q/view/vFutures_History.php?page='+str(i)+'&breed=CU0&start=1999-01-01&end=2017-06-15&jys=shfe&pz=CU&hy=CU0&type=inner&name=%A1%E4%A8%AE%26%23182%3B11109') 59 | 60 | 61 | for url in urls: 62 | data = get_one_data(url) 63 | W2File(data) -------------------------------------------------------------------------------- /Google-Image/spider.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Google Image Spider 3 | 4 | ''' 5 | 6 | import requests 7 | from bs4 import BeautifulSoup 8 | 9 | 10 | SEARCHRUL = 'https://www.google.com/search?&safe=off&q={}&tbm=isch&tbs=itp:photo,isz:l' 11 | 12 | 13 | def get_html_text(url): 14 | '''获取网页的原始text''' 15 | headers = {} 16 | headers['User-Agent'] = "Mozilla/5.0 (X11; Linux i686) AppleWebKit/537.17 (KHTML, like Gecko) Chrome/24.0.1312.27 Safari/537.17" 17 | try: 18 | r = requests.get(url, timeout=9, headers=headers) 19 | r.raise_for_status 20 | r.encoding = r.apparent_encoding 21 | return r.text 22 | except: 23 | return 'error' 24 | 25 | 26 | def parse_img_url(q, nums): 27 | ''' 28 | 解析返回搜索图片的原始链接 29 | q : 搜索关键词 30 | nums: 返回的结果数量 最大值20 31 | ''' 32 | links = [] 33 | # 防止越界 34 | if nums > 20 or nums > 0: 35 | num = 20 36 | 37 | url = SEARCHRUL.format(q) 38 | print(url) 39 | html = get_html_text(url) 40 | if html != 'error': 41 | soup = BeautifulSoup(html, 'lxml') 42 | content = soup.find_all('div', class_='rg_meta', limit=nums) 43 | for link in content: 44 | rec = eval(link.text) 45 | links.append(rec['ou']) 46 | return links 47 | else: 48 | return 'error' 49 | 50 | 51 | res = parse_img_url('test', 15) 52 | 53 | for url in res: 54 | print(url) 55 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Python-crawler 2 | 3 | > 由于代码是比较早之前写的,抓取的网站目录结构可能有所变动 4 | > 所以部分代码可能不能使用了,欢迎正在学习爬虫的大家给这个项目**提PR** 5 | > 让更多的代码能跑起来~ 6 | 7 | 从零开始系统化的学习写Python爬虫。 8 | 主要是记录一下自己写Python爬虫的经过与心得。 9 | 同时也是为了分享一下如何能更高效率的学习写爬虫。 10 | IDE:Vscode Python版本: 3.6 11 | 12 | * 知乎专栏:https://zhuanlan.zhihu.com/Ehco-python 13 | 14 | 详细学习路径: 15 | ### 一:Beautiful Soup 爬虫 16 | 17 | * requests库的安装与使用 https://zhuanlan.zhihu.com/p/26681429 18 | * 安装beautiful soup 爬虫环境 https://zhuanlan.zhihu.com/p/26683864 19 | * beautiful soup 的解析器 https://zhuanlan.zhihu.com/p/26691931 20 | * re库 正则表达式的使用 https://zhuanlan.zhihu.com/p/26701898 21 | * bs4 爬虫实践: 获取百度贴吧的内容 https://zhuanlan.zhihu.com/p/26722495 22 | * bs4 爬虫实践: 获取双色球中奖信息 https://zhuanlan.zhihu.com/p/26747717 23 | * bs4 爬虫实践: 排行榜小说批量下载 https://zhuanlan.zhihu.com/p/26756909 24 | * bs4 爬虫实践: 获取电影信息 https://zhuanlan.zhihu.com/p/26786056 25 | * bs4 爬虫实践: 悦音台mv排行榜与反爬虫技术 https://zhuanlan.zhihu.com/p/26809626 26 | ------ 27 | ### 二: Scrapy 爬虫框架 28 | 29 | * Scrapy 爬虫框架的安装与基本介绍 https://zhuanlan.zhihu.com/p/26832971 30 | * Scrapy 选择器和基本使用 https://zhuanlan.zhihu.com/p/26854842 31 | * Scrapy 爬虫实践:天气预报&数据存储 https://zhuanlan.zhihu.com/p/26885412 32 | * Scrapy 爬虫实践:代理的爬取和验证 https://zhuanlan.zhihu.com/p/26939527 33 | * Scrapy 爬虫实践:糗事百科&爬虫攻防 https://zhuanlan.zhihu.com/p/26980300 34 | * Scrapy 爬虫实践:重构排行榜小说爬虫&Mysql数据库 https://zhuanlan.zhihu.com/p/27027200 35 | ------ 36 | ### 三: 浏览器模拟爬虫 37 | 38 | * Selenium模拟浏览器 https://zhuanlan.zhihu.com/p/27115580 39 | * 爬虫实践:获取快代理 https://zhuanlan.zhihu.com/p/27150025 40 | * 爬虫实践:漫画批量下载 https://zhuanlan.zhihu.com/p/27155429 41 | 42 | ### 四: 练手项目 43 | 44 | * 爬虫实践:螺纹钢数据&Cookies https://zhuanlan.zhihu.com/p/27232687 45 | * 爬虫实践:登录正方教务系统 https://zhuanlan.zhihu.com/p/27256315 46 | * 爬虫应用: requests+django实现微信公众号后台 https://zhuanlan.zhihu.com/p/27625233 47 | * 爬虫应用: 12306火车票信息查询 https://zhuanlan.zhihu.com/p/27969976 48 | * 爬虫应用: 利用斗鱼Api抓取弹幕 https://zhuanlan.zhihu.com/p/28164017 49 | * 爬虫应用: 获取支付宝账单信息 https://zhuanlan.zhihu.com/p/28537306 50 | * 爬虫应用:IT之家热门段子(评论)爬取 https://zhuanlan.zhihu.com/p/28806210 51 | * 爬虫应用:一号店 商品信息查询程序 https://zhuanlan.zhihu.com/p/28982497 52 | * 爬虫应用:搜狗输入法词库抓取 https://zhuanlan.zhihu.com/p/31186373 53 | * 爬虫应用:复古网盘游戏抓取 https://zhuanlan.zhihu.com/p/32420131 54 | * 爬虫应用:自动填写问卷星 https://zhuanlan.zhihu.com/p/36224375 55 | * 爬虫应用:腾讯漫画下载~ https://zhuanlan.zhihu.com/p/39578774 56 | 57 | 58 | 59 | -------------------------------------------------------------------------------- /Scrapy 爬虫框架/001.py: -------------------------------------------------------------------------------- 1 | ''' 2 | 简单介绍scrapycXPATH选择器的使用 3 | ''' 4 | from scrapy.selector import Selector 5 | 6 | xml = ''' 7 | 8 | 9 | 10 | 王尼玛 11 | 12 | 80 13 | 开车 14 | 15 | 16 | 陈一发 17 | 18 | 28 19 | 开che 20 | 21 | 22 | 狗贼叔叔 23 | 24 | 18 25 | 土豪战 26 | 27 | 28 | 29 | ''' 30 | 31 | a = Selector(text=xml).xpath('/html/body/class[1]').extract() 32 | 33 | print(a) -------------------------------------------------------------------------------- /Scrapy 爬虫框架/biquge/biquge/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ehco1996/Python-crawler/e89ef774653965c6b045a2c4fd101846ee8c62ef/Scrapy 爬虫框架/biquge/biquge/__init__.py -------------------------------------------------------------------------------- /Scrapy 爬虫框架/biquge/biquge/items.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | # Define here the models for your scraped items 4 | # 5 | # See documentation in: 6 | # http://doc.scrapy.org/en/latest/topics/items.html 7 | 8 | import scrapy 9 | 10 | 11 | class BiqugeItem(scrapy.Item): 12 | # define the fields for your item here like: 13 | # name = scrapy.Field() 14 | # 小说名字 15 | bookname = scrapy.Field() 16 | #章节名 17 | title = scrapy.Field() 18 | #正文 19 | body = scrapy.Field() 20 | #排序用id 21 | order_id = scrapy.Field() -------------------------------------------------------------------------------- /Scrapy 爬虫框架/biquge/biquge/middlewares.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | # Define here the models for your spider middleware 4 | # 5 | # See documentation in: 6 | # http://doc.scrapy.org/en/latest/topics/spider-middleware.html 7 | 8 | from scrapy import signals 9 | 10 | 11 | class BiqugeSpiderMiddleware(object): 12 | # Not all methods need to be defined. If a method is not defined, 13 | # scrapy acts as if the spider middleware does not modify the 14 | # passed objects. 15 | 16 | @classmethod 17 | def from_crawler(cls, crawler): 18 | # This method is used by Scrapy to create your spiders. 19 | s = cls() 20 | crawler.signals.connect(s.spider_opened, signal=signals.spider_opened) 21 | return s 22 | 23 | def process_spider_input(response, spider): 24 | # Called for each response that goes through the spider 25 | # middleware and into the spider. 26 | 27 | # Should return None or raise an exception. 28 | return None 29 | 30 | def process_spider_output(response, result, spider): 31 | # Called with the results returned from the Spider, after 32 | # it has processed the response. 33 | 34 | # Must return an iterable of Request, dict or Item objects. 35 | for i in result: 36 | yield i 37 | 38 | def process_spider_exception(response, exception, spider): 39 | # Called when a spider or process_spider_input() method 40 | # (from other spider middleware) raises an exception. 41 | 42 | # Should return either None or an iterable of Response, dict 43 | # or Item objects. 44 | pass 45 | 46 | def process_start_requests(start_requests, spider): 47 | # Called with the start requests of the spider, and works 48 | # similarly to the process_spider_output() method, except 49 | # that it doesn’t have a response associated. 50 | 51 | # Must return only requests (not items). 52 | for r in start_requests: 53 | yield r 54 | 55 | def spider_opened(self, spider): 56 | spider.logger.info('Spider opened: %s' % spider.name) 57 | -------------------------------------------------------------------------------- /Scrapy 爬虫框架/biquge/biquge/pipelines.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | # Define your item pipelines here 4 | # 5 | # Don't forget to add your pipeline to the ITEM_PIPELINES setting 6 | # See: http://doc.scrapy.org/en/latest/topics/item-pipeline.html 7 | import pymysql 8 | 9 | 10 | class BiqugePipeline(object): 11 | def process_item(self, item, spider): 12 | ''' 13 | 将爬到的小数写入数据库 14 | ''' 15 | 16 | # 首先从items里取出数据 17 | name = item['bookname'] 18 | order_id = item['order_id'] 19 | body = item['body'] 20 | title = item['title'] 21 | 22 | # 与本地数据库建立联系 23 | # 和本地的scrapyDB数据库建立连接 24 | connection = pymysql.connect( 25 | host='localhost', # 连接的是本地数据库 26 | user='root', # 自己的mysql用户名 27 | passwd='********', # 自己的密码 28 | db='bqgxiaoshuo', # 数据库的名字 29 | charset='utf8mb4', # 默认的编码方式: 30 | cursorclass=pymysql.cursors.DictCursor) 31 | 32 | try: 33 | with connection.cursor() as cursor: 34 | # 数据库表的sql 35 | sql1 = 'Create Table If Not Exists %s(id int,zjm varchar(20),body text)' % name 36 | # 单章小说的写入 37 | sql = 'Insert into %s values (%d ,\'%s\',\'%s\')' % ( 38 | name, order_id, title, body) 39 | cursor.execute(sql1) 40 | cursor.execute(sql) 41 | 42 | # 提交本次插入的记录 43 | connection.commit() 44 | finally: 45 | # 关闭连接 46 | connection.close() 47 | return item 48 | -------------------------------------------------------------------------------- /Scrapy 爬虫框架/biquge/biquge/spiders/__init__.py: -------------------------------------------------------------------------------- 1 | # This package will contain the spiders of your Scrapy project 2 | # 3 | # Please refer to the documentation for information on how to create and manage 4 | # your spiders. 5 | -------------------------------------------------------------------------------- /Scrapy 爬虫框架/biquge/biquge/spiders/sjzh.py: -------------------------------------------------------------------------------- 1 | ''' 2 | 实现了中文向阿拉伯数字转换 3 | 用于从小说章节名提取id来排序 4 | ''' 5 | 6 | 7 | 8 | chs_arabic_map = {'零': 0, '一': 1, '二': 2, '三': 3, '四': 4, 9 | '五': 5, '六': 6, '七': 7, '八': 8, '九': 9, 10 | '十': 10, '百': 100, '千': 10 ** 3, '万': 10 ** 4, 11 | '〇': 0, '壹': 1, '贰': 2, '叁': 3, '肆': 4, 12 | '伍': 5, '陆': 6, '柒': 7, '捌': 8, '玖': 9, 13 | '拾': 10, '佰': 100, '仟': 10 ** 3, '萬': 10 ** 4, 14 | '亿': 10 ** 8, '億': 10 ** 8, '幺': 1, 15 | '0': 0, '1': 1, '2': 2, '3': 3, '4': 4, '5': 5, 16 | '7': 7, '8': 8, '9': 9} 17 | 18 | num_list = ['1','2','4','5','6','7','8','9','0','一','二','三','四','五','六','七','八','九','十','零','千','百',] 19 | 20 | def get_tit_num(title): 21 | result ='' 22 | for char in title: 23 | if char in num_list: 24 | result+=char 25 | return result 26 | 27 | 28 | def Cn2An(chinese_digits): 29 | 30 | result = 0 31 | tmp = 0 32 | hnd_mln = 0 33 | for count in range(len(chinese_digits)): 34 | curr_char = chinese_digits[count] 35 | curr_digit = chs_arabic_map[curr_char] 36 | # meet 「亿」 or 「億」 37 | if curr_digit == 10 ** 8: 38 | result = result + tmp 39 | result = result * curr_digit 40 | # get result before 「亿」 and store it into hnd_mln 41 | # reset `result` 42 | hnd_mln = hnd_mln * 10 ** 8 + result 43 | result = 0 44 | tmp = 0 45 | # meet 「万」 or 「萬」 46 | elif curr_digit == 10 ** 4: 47 | result = result + tmp 48 | result = result * curr_digit 49 | tmp = 0 50 | # meet 「十」, 「百」, 「千」 or their traditional version 51 | elif curr_digit >= 10: 52 | tmp = 1 if tmp == 0 else tmp 53 | result = result + curr_digit * tmp 54 | tmp = 0 55 | # meet single digit 56 | elif curr_digit is not None: 57 | tmp = tmp * 10 + curr_digit 58 | else: 59 | return result 60 | result = result + tmp 61 | result = result + hnd_mln 62 | return result 63 | 64 | 65 | # test 66 | print (Cn2An(get_tit_num('第一千三百九十一章 你妹妹被我咬了!'))) -------------------------------------------------------------------------------- /Scrapy 爬虫框架/biquge/biquge/spiders/xsphspider.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | import scrapy 3 | from biquge.items import BiqugeItem 4 | from .sjzh import Cn2An,get_tit_num 5 | 6 | 7 | class XsphspiderSpider(scrapy.Spider): 8 | name = "xsphspider" 9 | allowed_domains = ["qu.la"] 10 | start_urls = ['http://www.qu.la/paihangbang/'] 11 | novel_list = [] 12 | 13 | def parse(self, response): 14 | 15 | # 找到各类小说排行榜名单 16 | books = response.xpath('.//div[@class="index_toplist mright mbottom"]') 17 | 18 | # 找到每一类小说排行榜的每一本小说的下载链接 19 | for book in books: 20 | links = book.xpath('.//div[2]/div[2]/ul/li') 21 | for link in links: 22 | url = 'http://www.qu.la' + \ 23 | link.xpath('.//a/@href').extract()[0] 24 | self.novel_list.append(url) 25 | 26 | # 简单的去重 27 | self.novel_list = list(set(self.novel_list)) 28 | 29 | for novel in self.novel_list: 30 | yield scrapy.Request(novel, callback=self.get_page_url) 31 | 32 | def get_page_url(self, response): 33 | ''' 34 | 找到章节链接 35 | ''' 36 | page_urls = response.xpath('.//dd/a/@href').extract() 37 | 38 | for url in page_urls: 39 | yield scrapy.Request('http://www.qu.la' + url,callback=self.get_text) 40 | 41 | def get_text(self, response): 42 | ''' 43 | 找到每一章小说的标题和正文 44 | 并自动生成id字段,用于表的排序 45 | ''' 46 | item = BiqugeItem() 47 | 48 | # 小说名 49 | item['bookname'] = response.xpath( 50 | './/div[@class="con_top"]/a[2]/text()').extract()[0] 51 | 52 | # 章节名 ,将title单独找出来,为了提取章节中的数字 53 | title = response.xpath('.//h1/text()').extract()[0] 54 | item['title'] = title 55 | 56 | # 找到用于排序的id值 57 | item['order_id'] = Cn2An(get_tit_num(title)) 58 | 59 | # 正文部分需要特殊处理 60 | body = response.xpath('.//div[@id="content"]/text()').extract() 61 | 62 | # 将抓到的body转换成字符串,接着去掉\t之类的排版符号, 63 | text = ''.join(body).strip().replace('\u3000', '') 64 | 65 | item['body'] = text 66 | 67 | return item 68 | -------------------------------------------------------------------------------- /Scrapy 爬虫框架/biquge/scrapy.cfg: -------------------------------------------------------------------------------- 1 | # Automatically created by: scrapy startproject 2 | # 3 | # For more information about the [deploy] section see: 4 | # https://scrapyd.readthedocs.org/en/latest/deploy.html 5 | 6 | [settings] 7 | default = biquge.settings 8 | 9 | [deploy] 10 | #url = http://localhost:6800/ 11 | project = biquge 12 | -------------------------------------------------------------------------------- /Scrapy 爬虫框架/demo.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 王尼玛 5 | 6 | 80 7 | 开车 8 | 9 | 10 | 陈一发 11 | 12 | 28 13 | 开che 14 | 15 | 16 | 狗贼叔叔 17 | 18 | 18 19 | 土豪战 20 | 21 | 22 | -------------------------------------------------------------------------------- /Scrapy 爬虫框架/mzitu/mzitu/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ehco1996/Python-crawler/e89ef774653965c6b045a2c4fd101846ee8c62ef/Scrapy 爬虫框架/mzitu/mzitu/__init__.py -------------------------------------------------------------------------------- /Scrapy 爬虫框架/mzitu/mzitu/items.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | # Define here the models for your scraped items 4 | # 5 | # See documentation in: 6 | # http://doc.scrapy.org/en/latest/topics/items.html 7 | 8 | import scrapy 9 | 10 | 11 | class MzituItem(scrapy.Item): 12 | # define the fields for your item here like: 13 | # name = scrapy.Field() 14 | img_urls = scrapy.Field() 15 | name = scrapy.Field() 16 | 17 | -------------------------------------------------------------------------------- /Scrapy 爬虫框架/mzitu/mzitu/middlewares.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | # Define here the models for your spider middleware 4 | # 5 | # See documentation in: 6 | # http://doc.scrapy.org/en/latest/topics/spider-middleware.html 7 | 8 | from scrapy import signals 9 | 10 | 11 | class MzituSpiderMiddleware(object): 12 | # Not all methods need to be defined. If a method is not defined, 13 | # scrapy acts as if the spider middleware does not modify the 14 | # passed objects. 15 | 16 | @classmethod 17 | def from_crawler(cls, crawler): 18 | # This method is used by Scrapy to create your spiders. 19 | s = cls() 20 | crawler.signals.connect(s.spider_opened, signal=signals.spider_opened) 21 | return s 22 | 23 | def process_spider_input(response, spider): 24 | # Called for each response that goes through the spider 25 | # middleware and into the spider. 26 | 27 | # Should return None or raise an exception. 28 | return None 29 | 30 | def process_spider_output(response, result, spider): 31 | # Called with the results returned from the Spider, after 32 | # it has processed the response. 33 | 34 | # Must return an iterable of Request, dict or Item objects. 35 | for i in result: 36 | yield i 37 | 38 | def process_spider_exception(response, exception, spider): 39 | # Called when a spider or process_spider_input() method 40 | # (from other spider middleware) raises an exception. 41 | 42 | # Should return either None or an iterable of Response, dict 43 | # or Item objects. 44 | pass 45 | 46 | def process_start_requests(start_requests, spider): 47 | # Called with the start requests of the spider, and works 48 | # similarly to the process_spider_output() method, except 49 | # that it doesn’t have a response associated. 50 | 51 | # Must return only requests (not items). 52 | for r in start_requests: 53 | yield r 54 | 55 | def spider_opened(self, spider): 56 | spider.logger.info('Spider opened: %s' % spider.name) 57 | -------------------------------------------------------------------------------- /Scrapy 爬虫框架/mzitu/mzitu/pipelines.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | # Define your item pipelines here 4 | # 5 | # Don't forget to add your pipeline to the ITEM_PIPELINES setting 6 | # See: http://doc.scrapy.org/en/latest/topics/item-pipeline.html 7 | import os 8 | import requests 9 | 10 | class MzituPipeline(object): 11 | def process_item(self, item, spider): 12 | 13 | base_dir = '/Users/ehco/Desktop/mezitu/' 14 | # 防止目录不存在! 15 | if not os.path.exists(base_dir+item['name']): 16 | os.makedirs(base_dir+item['name']) 17 | 18 | # 生成图片下载列表: 19 | open(base_dir+item['name']+'/'+item['img_urls'][-6:],'wb').write(requests.get(item['img_urls']).content) 20 | return item 21 | -------------------------------------------------------------------------------- /Scrapy 爬虫框架/mzitu/mzitu/spiders/__init__.py: -------------------------------------------------------------------------------- 1 | # This package will contain the spiders of your Scrapy project 2 | # 3 | # Please refer to the documentation for information on how to create and manage 4 | # your spiders. 5 | -------------------------------------------------------------------------------- /Scrapy 爬虫框架/mzitu/mzitu/spiders/mezitu.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | import scrapy 3 | from mzitu.items import MzituItem 4 | 5 | 6 | class MezituSpider(scrapy.Spider): 7 | name = "mezitu" 8 | allowed_domains = ["mzitu.com"] 9 | start_urls = [] 10 | 11 | for i in range(2,144): 12 | start_urls.append('http://www.mzitu.com/page/'+str(i)) 13 | 14 | 15 | 16 | def parse(self, response): 17 | 18 | # 找到首页的每个妹子图的li列表 19 | main = response.xpath('//ul[@id="pins"]/li') 20 | 21 | for li in main: 22 | # 找到每个妹子图包的baseurl 23 | mzurl = li.xpath('./a/@href').extract()[0] 24 | # 找到每个妹子图的名字,传回去做dirname 25 | name = li.xpath('.//img/@alt').extract()[0] 26 | 27 | # 进入套图页面,抓取每一张图 28 | yield scrapy.Request(mzurl,meta={'name':name}, callback=self.get_page_url) 29 | 30 | def get_page_url(self, response): 31 | ''' 32 | 找到套图的最大页码,并且生成每一页的url连接 page_url 33 | ''' 34 | max_num = response.xpath( 35 | '//div[@class="pagenavi"]/a[last()-1]/span/text()').extract()[0] 36 | for i in range(2, int(max_num) +1): 37 | page_url = response.url + '/' + str(i) 38 | # 这是一个生成器,用来回调img_url函数来抓套图的url链接 39 | yield scrapy.Request(page_url, meta={'name': response.meta['name']}, callback=self.get_img_url) 40 | 41 | def get_img_url(self, response): 42 | ''' 43 | 从page_url的response里 44 | 找到图片的下载连接 45 | ''' 46 | item = MzituItem() 47 | item['name'] = response.meta['name'] 48 | 49 | # 找到图片的下载地址,注意有可能一页有两张图 50 | pic = response.xpath('//div[@class="main-image"]//img/@src').extract() 51 | 52 | for url in pic: 53 | item['img_urls'] = url 54 | yield item 55 | -------------------------------------------------------------------------------- /Scrapy 爬虫框架/mzitu/scrapy.cfg: -------------------------------------------------------------------------------- 1 | # Automatically created by: scrapy startproject 2 | # 3 | # For more information about the [deploy] section see: 4 | # https://scrapyd.readthedocs.org/en/latest/deploy.html 5 | 6 | [settings] 7 | default = mzitu.settings 8 | 9 | [deploy] 10 | #url = http://localhost:6800/ 11 | project = mzitu 12 | -------------------------------------------------------------------------------- /Scrapy 爬虫框架/proxy/proxy/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ehco1996/Python-crawler/e89ef774653965c6b045a2c4fd101846ee8c62ef/Scrapy 爬虫框架/proxy/proxy/__init__.py -------------------------------------------------------------------------------- /Scrapy 爬虫框架/proxy/proxy/items.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | # Define here the models for your scraped items 4 | # 5 | # See documentation in: 6 | # http://doc.scrapy.org/en/latest/topics/items.html 7 | 8 | import scrapy 9 | 10 | 11 | class ProxyItem(scrapy.Item): 12 | # define the fields for your item here like: 13 | # name = scrapy.Field() 14 | 15 | #这个爬虫十分简单,我们要ip+端口,所以一个字段就够用了! 16 | addr = scrapy.Field() 17 | -------------------------------------------------------------------------------- /Scrapy 爬虫框架/proxy/proxy/middlewares.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | # Define here the models for your spider middleware 4 | # 5 | # See documentation in: 6 | # http://doc.scrapy.org/en/latest/topics/spider-middleware.html 7 | 8 | from scrapy import signals 9 | 10 | 11 | class ProxySpiderMiddleware(object): 12 | # Not all methods need to be defined. If a method is not defined, 13 | # scrapy acts as if the spider middleware does not modify the 14 | # passed objects. 15 | 16 | @classmethod 17 | def from_crawler(cls, crawler): 18 | # This method is used by Scrapy to create your spiders. 19 | s = cls() 20 | crawler.signals.connect(s.spider_opened, signal=signals.spider_opened) 21 | return s 22 | 23 | def process_spider_input(response, spider): 24 | # Called for each response that goes through the spider 25 | # middleware and into the spider. 26 | 27 | # Should return None or raise an exception. 28 | return None 29 | 30 | def process_spider_output(response, result, spider): 31 | # Called with the results returned from the Spider, after 32 | # it has processed the response. 33 | 34 | # Must return an iterable of Request, dict or Item objects. 35 | for i in result: 36 | yield i 37 | 38 | def process_spider_exception(response, exception, spider): 39 | # Called when a spider or process_spider_input() method 40 | # (from other spider middleware) raises an exception. 41 | 42 | # Should return either None or an iterable of Response, dict 43 | # or Item objects. 44 | pass 45 | 46 | def process_start_requests(start_requests, spider): 47 | # Called with the start requests of the spider, and works 48 | # similarly to the process_spider_output() method, except 49 | # that it doesn’t have a response associated. 50 | 51 | # Must return only requests (not items). 52 | for r in start_requests: 53 | yield r 54 | 55 | def spider_opened(self, spider): 56 | spider.logger.info('Spider opened: %s' % spider.name) 57 | -------------------------------------------------------------------------------- /Scrapy 爬虫框架/proxy/proxy/pipelines.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | # Define your item pipelines here 4 | # 5 | # Don't forget to add your pipeline to the ITEM_PIPELINES setting 6 | # See: http://doc.scrapy.org/en/latest/topics/item-pipeline.html 7 | 8 | 9 | class ProxyPipeline(object): 10 | ''' 11 | 这里我们通过对spider name的判断 12 | 来分清楚item是哪一个spider传来的 13 | 从而做出不同的处理方式 14 | ''' 15 | 16 | def process_item(self, item, spider): 17 | if spider.name == 'dxdlspider': 18 | content = item['addr'].split('\r\n') 19 | for line in content: 20 | open('/Users/ehco/Desktop/result/dx_proxy.txt','a').write(line+'\n') 21 | 22 | 23 | elif spider.name=='kdlspider': 24 | #我们直接将传来的addr写入文本 25 | open('/Users/ehco/Desktop/result/kdl_proxy.txt','a').write(item['addr']+'\n') 26 | 27 | return item 28 | -------------------------------------------------------------------------------- /Scrapy 爬虫框架/proxy/proxy/spiders/__init__.py: -------------------------------------------------------------------------------- 1 | # This package will contain the spiders of your Scrapy project 2 | # 3 | # Please refer to the documentation for information on how to create and manage 4 | # your spiders. 5 | -------------------------------------------------------------------------------- /Scrapy 爬虫框架/proxy/proxy/spiders/dxdlspider.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | import scrapy 3 | from proxy.items import ProxyItem 4 | 5 | class DxdlspiderSpider(scrapy.Spider): 6 | name = "dxdlspider" 7 | allowed_domains = ["xicidaili.com"] 8 | start_urls = ['http://api.xicidaili.com/free2016.txt'] 9 | 10 | def parse(self, response): 11 | item = ProxyItem() 12 | #因为直接调用网站的api,本身get下来的就是一个text文本, 13 | #我们直接把文本传给item再交给pipeline处理就行 14 | item['addr'] = response.text 15 | return item 16 | 17 | -------------------------------------------------------------------------------- /Scrapy 爬虫框架/proxy/proxy/spiders/kdlspider.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | import scrapy 3 | from proxy.items import ProxyItem 4 | 5 | 6 | class KdlspiderSpider(scrapy.Spider): 7 | name = "kdlspider" 8 | allowed_domains = ["kuaidaili.com"] 9 | start_urls = [] 10 | 11 | # 通过简单的循环,来生成爬取页面的列表 12 | # 这里我们爬1~5页 13 | for i in range(1, 6): 14 | start_urls.append('http://www.kuaidaili.com/free/inha/' + str(i) + '/') 15 | 16 | def parse(self, response): 17 | # 我们先实例化一个item 18 | item = ProxyItem() 19 | 20 | # 通过Xpath找到每条代理的内容 21 | mian = response.xpath( 22 | '//table[@class="table table-bordered table-striped"]/tbody/tr') 23 | 24 | for li in mian: 25 | #找到ip地址 26 | ip = li.xpath('td/text()').extract()[0] 27 | #找到端口: 28 | port =li.xpath('td/text()').extract()[1] 29 | #将两者连接,并返回给item处理 30 | item['addr'] = ip+':'+port 31 | yield item -------------------------------------------------------------------------------- /Scrapy 爬虫框架/proxy/scrapy.cfg: -------------------------------------------------------------------------------- 1 | # Automatically created by: scrapy startproject 2 | # 3 | # For more information about the [deploy] section see: 4 | # https://scrapyd.readthedocs.org/en/latest/deploy.html 5 | 6 | [settings] 7 | default = proxy.settings 8 | 9 | [deploy] 10 | #url = http://localhost:6800/ 11 | project = proxy 12 | -------------------------------------------------------------------------------- /Scrapy 爬虫框架/test_proxy.py: -------------------------------------------------------------------------------- 1 | ''' 2 | 我们通过这个小脚本来判断 3 | 抓取到的ip代理是否可以用! 4 | 5 | 还是通过我最熟悉的request库来实现 6 | 不过这里稍微加一下我也不太熟悉的多线程 7 | ''' 8 | 9 | import requests 10 | 11 | # 引入这个库来获得map函数的并发版本 12 | from multiprocessing.dummy import Pool as ThreadPool 13 | 14 | # 定义全局变量 15 | dir_path = '/Users/ehco/Desktop/result/' 16 | alive_ip = [] 17 | 18 | # 使得map并发!实例化pool对象 19 | pool = ThreadPool() 20 | # 设置并发数量! 21 | pool = ThreadPool(20) 22 | 23 | 24 | def test_alive(proxy): 25 | ''' 26 | 一个简单的函数, 27 | 来判断通过代理访问百度 28 | 筛选通过的代理保存到alive_ip中 29 | ''' 30 | global alive_ip 31 | proxies = {'http': proxy} 32 | print('正在测试:{}'.format(proxies)) 33 | try: 34 | r = requests.get('http://www.baidu.com', proxies=proxies, timeout=3) 35 | if r.status_code == 200: 36 | print('该代理:{}成功存活'.format(proxy)) 37 | alive_ip.append(proxy) 38 | except: 39 | print('该代理{}失效!'.format(proxies)) 40 | 41 | 42 | def Out_file(alive_ip=[]): 43 | global dir_path 44 | with open(dir_path + 'alive_ip.txt', 'a+') as f: 45 | for ip in alive_ip: 46 | f.write(ip + '\n') 47 | print('所有存活ip都已经写入文件!') 48 | 49 | 50 | def test(filename='blank.txt'): 51 | # 循环处理每行文件 52 | with open(dir_path + filename, 'r') as f: 53 | lines = f.readlines() 54 | # 我们去掉lines每一项后面的\n\r之类的空格 55 | # 生成一个新的列表! 56 | proxys = list(map(lambda x: x.strip(), [y for y in lines])) 57 | 58 | #一行代码解决多线程! 59 | pool.map(test_alive,proxys) 60 | pool.close() 61 | pool.join() 62 | 63 | # 将存活的ip写入文件 64 | Out_file(alive_ip) 65 | 66 | 67 | #调用函数! 68 | test('kdl_proxy.txt') 69 | -------------------------------------------------------------------------------- /Scrapy 爬虫框架/weather/scrapy.cfg: -------------------------------------------------------------------------------- 1 | # Automatically created by: scrapy startproject 2 | # 3 | # For more information about the [deploy] section see: 4 | # https://scrapyd.readthedocs.org/en/latest/deploy.html 5 | 6 | [settings] 7 | default = weather.settings 8 | 9 | [deploy] 10 | #url = http://localhost:6800/ 11 | project = weather 12 | -------------------------------------------------------------------------------- /Scrapy 爬虫框架/weather/weather/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ehco1996/Python-crawler/e89ef774653965c6b045a2c4fd101846ee8c62ef/Scrapy 爬虫框架/weather/weather/__init__.py -------------------------------------------------------------------------------- /Scrapy 爬虫框架/weather/weather/items.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | # Define here the models for your scraped items 4 | # 5 | # See documentation in: 6 | # http://doc.scrapy.org/en/latest/topics/items.html 7 | 8 | import scrapy 9 | 10 | 11 | class WeatherItem(scrapy.Item): 12 | # define the fields for your item here like: 13 | # name = scrapy.Field() 14 | date = scrapy.Field() 15 | week = scrapy.Field() 16 | img = scrapy.Field() 17 | temperature = scrapy.Field() 18 | weather = scrapy.Field() 19 | wind = scrapy.Field() 20 | 21 | -------------------------------------------------------------------------------- /Scrapy 爬虫框架/weather/weather/middlewares.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | # Define here the models for your spider middleware 4 | # 5 | # See documentation in: 6 | # http://doc.scrapy.org/en/latest/topics/spider-middleware.html 7 | 8 | from scrapy import signals 9 | 10 | 11 | class WeatherSpiderMiddleware(object): 12 | # Not all methods need to be defined. If a method is not defined, 13 | # scrapy acts as if the spider middleware does not modify the 14 | # passed objects. 15 | 16 | @classmethod 17 | def from_crawler(cls, crawler): 18 | # This method is used by Scrapy to create your spiders. 19 | s = cls() 20 | crawler.signals.connect(s.spider_opened, signal=signals.spider_opened) 21 | return s 22 | 23 | def process_spider_input(response, spider): 24 | # Called for each response that goes through the spider 25 | # middleware and into the spider. 26 | 27 | # Should return None or raise an exception. 28 | return None 29 | 30 | def process_spider_output(response, result, spider): 31 | # Called with the results returned from the Spider, after 32 | # it has processed the response. 33 | 34 | # Must return an iterable of Request, dict or Item objects. 35 | for i in result: 36 | yield i 37 | 38 | def process_spider_exception(response, exception, spider): 39 | # Called when a spider or process_spider_input() method 40 | # (from other spider middleware) raises an exception. 41 | 42 | # Should return either None or an iterable of Response, dict 43 | # or Item objects. 44 | pass 45 | 46 | def process_start_requests(start_requests, spider): 47 | # Called with the start requests of the spider, and works 48 | # similarly to the process_spider_output() method, except 49 | # that it doesn’t have a response associated. 50 | 51 | # Must return only requests (not items). 52 | for r in start_requests: 53 | yield r 54 | 55 | def spider_opened(self, spider): 56 | spider.logger.info('Spider opened: %s' % spider.name) 57 | -------------------------------------------------------------------------------- /Scrapy 爬虫框架/weather/weather/spiders/SZtianqi.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | import scrapy 3 | from weather.items import WeatherItem 4 | 5 | 6 | class SztianqiSpider(scrapy.Spider): 7 | name = "SZtianqi" 8 | # 我们修改一下host,使得Scrapy可以爬取除了苏州之外的天气 9 | allowed_domains = ["tianqi.com"] 10 | 11 | # 建立需要爬取信息的url列表 12 | start_urls = [] 13 | 14 | # 需要爬的城市名称 15 | citys = ['nanjing', 'suzhou', 'shanghai'] 16 | 17 | # 用一个很简答的循环来生成需要爬的链接: 18 | for city in citys: 19 | start_urls.append('http://' + city + '.tianqi.com') 20 | 21 | def parse(self, response): 22 | ''' 23 | 筛选信息的函数: 24 | date = 今日日期 25 | week = 星期几 26 | img = 表示天气的图标 27 | temperature = 当天的温度 28 | weather = 当天的天气 29 | wind = 当天的风向 30 | ''' 31 | 32 | # 先建立一个列表,用来保存每天的信息 33 | items = [] 34 | 35 | # 找到包裹着每天天气信息的div 36 | sixday = response.xpath('//div[@class="tqshow1"]') 37 | 38 | # 循环筛选出每天的信息: 39 | for day in sixday: 40 | # 先申请一个weatheritem 的类型来保存结果 41 | item = WeatherItem() 42 | 43 | # 观察网页,知道h3标签下的不单单是一行str,我们用trick的方式将它连接起来 44 | date = '' 45 | for datetitle in day.xpath('./h3//text()').extract(): 46 | date += datetitle 47 | 48 | item['date'] = date 49 | 50 | item['week'] = day.xpath('./p//text()').extract()[0] 51 | item['img'] = day.xpath( 52 | './ul/li[@class="tqpng"]/img/@src').extract()[0] 53 | tq = day.xpath('./ul/li[2]//text()').extract() 54 | # 我们用第二种取巧的方式,将tq里找到的str连接 55 | item['temperature'] = ''.join(tq) 56 | item['weather'] = day.xpath('./ul/li[3]/text()').extract()[0] 57 | item['wind'] = day.xpath('./ul/li[4]/text()').extract()[0] 58 | items.append(item) 59 | return items 60 | -------------------------------------------------------------------------------- /Scrapy 爬虫框架/weather/weather/spiders/__init__.py: -------------------------------------------------------------------------------- 1 | # This package will contain the spiders of your Scrapy project 2 | # 3 | # Please refer to the documentation for information on how to create and manage 4 | # your spiders. 5 | -------------------------------------------------------------------------------- /Scrapy 爬虫框架/xiubai/scrapy.cfg: -------------------------------------------------------------------------------- 1 | # Automatically created by: scrapy startproject 2 | # 3 | # For more information about the [deploy] section see: 4 | # https://scrapyd.readthedocs.org/en/latest/deploy.html 5 | 6 | [settings] 7 | default = xiubai.settings 8 | 9 | [deploy] 10 | #url = http://localhost:6800/ 11 | project = xiubai 12 | -------------------------------------------------------------------------------- /Scrapy 爬虫框架/xiubai/xiubai/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ehco1996/Python-crawler/e89ef774653965c6b045a2c4fd101846ee8c62ef/Scrapy 爬虫框架/xiubai/xiubai/__init__.py -------------------------------------------------------------------------------- /Scrapy 爬虫框架/xiubai/xiubai/items.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | # Define here the models for your scraped items 4 | # 5 | # See documentation in: 6 | # http://doc.scrapy.org/en/latest/topics/items.html 7 | 8 | import scrapy 9 | 10 | 11 | class XiubaiItem(scrapy.Item): 12 | # define the fields for your item here like: 13 | # name = scrapy.Field() 14 | 15 | author = scrapy.Field() 16 | body = scrapy.Field() 17 | funNum = scrapy.Field() 18 | comNum = scrapy.Field() 19 | -------------------------------------------------------------------------------- /Scrapy 爬虫框架/xiubai/xiubai/middlewares.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | # Define here the models for your spider middleware 4 | # 5 | # See documentation in: 6 | # http://doc.scrapy.org/en/latest/topics/spider-middleware.html 7 | 8 | from scrapy import signals 9 | 10 | 11 | class XiubaiSpiderMiddleware(object): 12 | # Not all methods need to be defined. If a method is not defined, 13 | # scrapy acts as if the spider middleware does not modify the 14 | # passed objects. 15 | 16 | @classmethod 17 | def from_crawler(cls, crawler): 18 | # This method is used by Scrapy to create your spiders. 19 | s = cls() 20 | crawler.signals.connect(s.spider_opened, signal=signals.spider_opened) 21 | return s 22 | 23 | def process_spider_input(response, spider): 24 | # Called for each response that goes through the spider 25 | # middleware and into the spider. 26 | 27 | # Should return None or raise an exception. 28 | return None 29 | 30 | def process_spider_output(response, result, spider): 31 | # Called with the results returned from the Spider, after 32 | # it has processed the response. 33 | 34 | # Must return an iterable of Request, dict or Item objects. 35 | for i in result: 36 | yield i 37 | 38 | def process_spider_exception(response, exception, spider): 39 | # Called when a spider or process_spider_input() method 40 | # (from other spider middleware) raises an exception. 41 | 42 | # Should return either None or an iterable of Response, dict 43 | # or Item objects. 44 | pass 45 | 46 | def process_start_requests(start_requests, spider): 47 | # Called with the start requests of the spider, and works 48 | # similarly to the process_spider_output() method, except 49 | # that it doesn’t have a response associated. 50 | 51 | # Must return only requests (not items). 52 | for r in start_requests: 53 | yield r 54 | 55 | def spider_opened(self, spider): 56 | spider.logger.info('Spider opened: %s' % spider.name) 57 | -------------------------------------------------------------------------------- /Scrapy 爬虫框架/xiubai/xiubai/middlewares/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ehco1996/Python-crawler/e89ef774653965c6b045a2c4fd101846ee8c62ef/Scrapy 爬虫框架/xiubai/xiubai/middlewares/__init__.py -------------------------------------------------------------------------------- /Scrapy 爬虫框架/xiubai/xiubai/middlewares/coustomProxy.py: -------------------------------------------------------------------------------- 1 | ''' 2 | 从本地文件proxy.py中 3 | 读取可以用的代理列表 4 | 并从中随机选择一个代理 5 | 供给spider使用 6 | ''' 7 | 8 | 9 | from xiubai.middlewares.proxy import proxies 10 | import random 11 | 12 | class RandomProxy(object): 13 | def process_request(self,request,spider): 14 | # 从文件中随机选择一个代理 15 | proxy = random.choice(proxies) 16 | 17 | request.meta['proxy'] = 'http://{}'.format(proxy) -------------------------------------------------------------------------------- /Scrapy 爬虫框架/xiubai/xiubai/middlewares/coustomUserAgent.py: -------------------------------------------------------------------------------- 1 | ''' 2 | 自定义scrapy框架的 3 | user-agent头 4 | 从一个被良好维护的user-agent列表里 5 | 随机筛选合适的user-agent 6 | 防止封锁 7 | ''' 8 | from scrapy.contrib.downloadermiddleware.useragent import UserAgentMiddleware 9 | 10 | import random 11 | 12 | 13 | 14 | #一个不容易被封锁的user-agent列表 15 | agents = ['Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; Trident/5.0;', 16 | 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.6; rv,2.0.1) Gecko/20100101 Firefox/4.0.1', 17 | 'Opera/9.80 (Macintosh; Intel Mac OS X 10.6.8; U; en) Presto/2.8.131 Version/11.11', 18 | 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_0) AppleWebKit/535.11 (KHTML, like Gecko) Chrome/17.0.963.56 Safari/535.11', 19 | 'Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; 360SE)'] 20 | 21 | class RandomUserAgent(UserAgentMiddleware): 22 | def process_request(self,request,spider): 23 | ''' 24 | 定义下载中间件, 25 | 必须要写这个函数, 26 | 这是scrapy数据流转的一个环节 27 | 具体可以看文档: 28 | http://scrapy-chs.readthedocs.io/zh_CN/0.24/topics/downloader-middleware.html 29 | ''' 30 | ua = random.choice(agents) 31 | request.headers.setdefault('User-agent',ua) -------------------------------------------------------------------------------- /Scrapy 爬虫框架/xiubai/xiubai/middlewares/proxy.py: -------------------------------------------------------------------------------- 1 | ''' 2 | 通过爬取可用的免费代理, 3 | 进行测试可用度之后 4 | 筛选获得的代理 5 | 6 | 将静态的资源文件写在功能文件之外 7 | 方便及时更新维护。 8 | ''' 9 | 10 | 11 | # 可以自行添加更多代理 12 | 13 | proxies = [ 14 | '89.36.215.72:1189', 15 | '94.177.203.123:1189', 16 | '110.73.11.227:8123', 17 | '180.183.176.189:8080', 18 | '109.62.247.81:8080', 19 | ] 20 | -------------------------------------------------------------------------------- /Scrapy 爬虫框架/xiubai/xiubai/pipelines.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | # Define your item pipelines here 4 | # 5 | # Don't forget to add your pipeline to the ITEM_PIPELINES setting 6 | # See: http://doc.scrapy.org/en/latest/topics/item-pipeline.html 7 | 8 | 9 | class XiubaiPipeline(object): 10 | def process_item(self, item, spider): 11 | 12 | with open("/Users/ehco/Desktop/result/qiubai.txt",'a+') as f: 13 | f.write('作者:{} \n{}\n点赞:{}\t评论数:{}\n\n'.format( 14 | item['author'], item["body"], item['funNum'], item["comNum"])) 15 | -------------------------------------------------------------------------------- /Scrapy 爬虫框架/xiubai/xiubai/spiders/__init__.py: -------------------------------------------------------------------------------- 1 | # This package will contain the spiders of your Scrapy project 2 | # 3 | # Please refer to the documentation for information on how to create and manage 4 | # your spiders. 5 | -------------------------------------------------------------------------------- /Scrapy 爬虫框架/xiubai/xiubai/spiders/hotspider.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | import scrapy 3 | from xiubai.items import XiubaiItem 4 | 5 | class HotspiderSpider(scrapy.Spider): 6 | name = "hotspider" 7 | allowed_domains = ["qiushibaike.com"] 8 | start_urls = [] 9 | # 我们爬取35页的全部热门段子 10 | for i in range(1,3): 11 | start_urls.append('http://www.qiushibaike.com/8hr/page/'+str(i)+'/') 12 | 13 | 14 | def parse(self, response): 15 | item = XiubaiItem() 16 | 17 | # 找到热门段子主体 18 | main = response.xpath('//div[@id="content-left"]/div') 19 | 20 | 21 | for div in main: 22 | #段子作者 23 | item['author'] =div.xpath('.//h2/text()').extract()[0] 24 | #段子主体: 25 | item['body'] = ''.join( div.xpath('a[@class="contentHerf"]/div/span[1]/text()').extract()) 26 | #段子footer 27 | item['funNum']= div.xpath('.//span[@class="stats-vote"]/i/text()').extract()[0] 28 | item['comNum']= div.xpath('.//span[@class="stats-comments"]/a/i/text()').extract()[0] 29 | yield item 30 | -------------------------------------------------------------------------------- /Scrapy 爬虫框架/zimuku/scrapy.cfg: -------------------------------------------------------------------------------- 1 | # Automatically created by: scrapy startproject 2 | # 3 | # For more information about the [deploy] section see: 4 | # https://scrapyd.readthedocs.org/en/latest/deploy.html 5 | 6 | [settings] 7 | default = zimuku.settings 8 | 9 | [deploy] 10 | #url = http://localhost:6800/ 11 | project = zimuku 12 | -------------------------------------------------------------------------------- /Scrapy 爬虫框架/zimuku/zimuku/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ehco1996/Python-crawler/e89ef774653965c6b045a2c4fd101846ee8c62ef/Scrapy 爬虫框架/zimuku/zimuku/__init__.py -------------------------------------------------------------------------------- /Scrapy 爬虫框架/zimuku/zimuku/items.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | # Define here the models for your scraped items 4 | # 5 | # See documentation in: 6 | # http://doc.scrapy.org/en/latest/topics/items.html 7 | 8 | import scrapy 9 | 10 | 11 | class ZimukuItem(scrapy.Item): 12 | # define the fields for your item here like: 13 | # name = scrapy.Field() 14 | subname = scrapy.Field() #字母的名字 15 | -------------------------------------------------------------------------------- /Scrapy 爬虫框架/zimuku/zimuku/middlewares.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | # Define here the models for your spider middleware 4 | # 5 | # See documentation in: 6 | # http://doc.scrapy.org/en/latest/topics/spider-middleware.html 7 | 8 | from scrapy import signals 9 | 10 | 11 | class ZimukuSpiderMiddleware(object): 12 | # Not all methods need to be defined. If a method is not defined, 13 | # scrapy acts as if the spider middleware does not modify the 14 | # passed objects. 15 | 16 | @classmethod 17 | def from_crawler(cls, crawler): 18 | # This method is used by Scrapy to create your spiders. 19 | s = cls() 20 | crawler.signals.connect(s.spider_opened, signal=signals.spider_opened) 21 | return s 22 | 23 | def process_spider_input(response, spider): 24 | # Called for each response that goes through the spider 25 | # middleware and into the spider. 26 | 27 | # Should return None or raise an exception. 28 | return None 29 | 30 | def process_spider_output(response, result, spider): 31 | # Called with the results returned from the Spider, after 32 | # it has processed the response. 33 | 34 | # Must return an iterable of Request, dict or Item objects. 35 | for i in result: 36 | yield i 37 | 38 | def process_spider_exception(response, exception, spider): 39 | # Called when a spider or process_spider_input() method 40 | # (from other spider middleware) raises an exception. 41 | 42 | # Should return either None or an iterable of Response, dict 43 | # or Item objects. 44 | pass 45 | 46 | def process_start_requests(start_requests, spider): 47 | # Called with the start requests of the spider, and works 48 | # similarly to the process_spider_output() method, except 49 | # that it doesn’t have a response associated. 50 | 51 | # Must return only requests (not items). 52 | for r in start_requests: 53 | yield r 54 | 55 | def spider_opened(self, spider): 56 | spider.logger.info('Spider opened: %s' % spider.name) 57 | -------------------------------------------------------------------------------- /Scrapy 爬虫框架/zimuku/zimuku/pipelines.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | # Define your item pipelines here 4 | # 5 | # Don't forget to add your pipeline to the ITEM_PIPELINES setting 6 | # See: http://doc.scrapy.org/en/latest/topics/item-pipeline.html 7 | 8 | 9 | class ZimukuPipeline(object): 10 | def process_item(self, item, spider): 11 | 12 | # 只要求简单的话, 13 | # 我们把爬到的结果打印一下吧 14 | print(item) 15 | 16 | return item 17 | -------------------------------------------------------------------------------- /Scrapy 爬虫框架/zimuku/zimuku/spiders/__init__.py: -------------------------------------------------------------------------------- 1 | # This package will contain the spiders of your Scrapy project 2 | # 3 | # Please refer to the documentation for information on how to create and manage 4 | # your spiders. 5 | -------------------------------------------------------------------------------- /Scrapy 爬虫框架/zimuku/zimuku/spiders/demo.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | import scrapy 3 | 4 | # 将我们需要爬的项目引入进来 5 | from zimuku.items import ZimukuItem 6 | 7 | class DemoSpider(scrapy.Spider): 8 | 9 | #该爬虫的名字 10 | name = "demo" 11 | 12 | #规定爬虫爬取网页的域名 13 | allowed_domains = ["zimuku.net"] 14 | 15 | #开始爬取的url链接 16 | start_urls = ['http://zimuku.net/'] 17 | 18 | def parse(self, response): 19 | ''' 20 | parse()函数接收Response参数,就是网页爬取后返回的数据 21 | 用于处理响应,他负责解析爬取的内容 22 | 生成解析结果的字典,并返回新的需要爬取的请求 23 | ''' 24 | 25 | #由于是demo 我们不做完全的功能, 26 | #只要求爬取出第一个字幕的名字 27 | #xpath规则可以通过查看网页源文件得出 28 | name = response.xpath('//b/text()').extract()[1] 29 | 30 | #建立一个items字典,用于保存我们爬到的结果,并返回给pipline处理 31 | items = {} 32 | items['第一个']= name 33 | 34 | return items 35 | -------------------------------------------------------------------------------- /YHShop/handler.py: -------------------------------------------------------------------------------- 1 | ''' 2 | 一号店商品信息查询 3 | 4 | ''' 5 | 6 | # 导入城市省份资源文件 7 | from citydict import CITY_MAP 8 | 9 | # 导入爬虫程序 10 | from spider import parse_goods_info 11 | import time 12 | 13 | 14 | def main(): 15 | good = input('请输入需要查询的商品:\t') 16 | city = input('请输入查询城市:\t') 17 | provinceId = CITY_MAP[city]['provinceId'] 18 | cityid = CITY_MAP[city]['cityid'] 19 | searc_url = 'http://search.yhd.com/c0-0/k' + good 20 | 21 | print('正在搜索相关商品') 22 | res = parse_goods_info(searc_url, provinceId, cityid) 23 | print('搜索完毕.....正在处理数据') 24 | 25 | for rec in res: 26 | print('型号: {}\t价格: {}\t库存: {}\t地址: {}'.format( 27 | rec['name'], rec['price'], rec['stock'], rec['url'])) 28 | time.sleep(0.5) 29 | 30 | 31 | if __name__ == '__main__': 32 | main() 33 | -------------------------------------------------------------------------------- /YHShop/spider.py: -------------------------------------------------------------------------------- 1 | ''' 2 | 爬取一号店商品信息 3 | ''' 4 | 5 | import requests 6 | from bs4 import BeautifulSoup 7 | 8 | 9 | def get_html_text(url): 10 | ''' 11 | 返回网页text 12 | ''' 13 | try: 14 | r = requests.get(url, timeout=30) 15 | r.raise_for_status() 16 | r.encoding = r.apparent_encoding 17 | return r.text 18 | except: 19 | raise ValueError('errors') 20 | 21 | 22 | def parse_good_detail(pmId, provinceId=5, cityid=37): 23 | ''' 24 | 查询指定id商品的库存和价格 25 | 默认查询 江苏省 南京市 的库存 26 | ''' 27 | # 一号点的Ajax服务器请求地址 28 | # 默认使用江苏省为省份信息 29 | url = 'http://gps.yhd.com/restful/detail?mcsite=1&provinceId={}&cityId={}&pmId={}&ruleType=2&businessTagId=16'.format( 30 | provinceId, cityid, pmId) 31 | text = get_html_text(url) 32 | # 对信息进行初步格式化 删掉data无用信息 33 | content = text[text.find('{') + 1:-2] 34 | data_dict = {} 35 | # 将所有的类json数据格式化存入字典 36 | for rec in content.split(','): 37 | data_dict[rec.split(":")[0].replace( 38 | '"', '').replace('"', '')] = rec.split(':')[1] 39 | 40 | # 查找我们想要的信息 41 | price = data_dict['currentPrice'] 42 | stock = data_dict['currentStockNum'] 43 | 44 | return price, stock 45 | 46 | 47 | def parse_goods_info(url,provinceId=5, cityid=37): 48 | ''' 49 | 抓取指定url的所有商品的 50 | 51 | 商品id 52 | 价格 53 | 库存 54 | 链接 55 | 56 | returen: goods_infolist 57 | ''' 58 | 59 | goods_infolist = [] 60 | 61 | html = get_html_text(url) 62 | soup = BeautifulSoup(html, 'lxml') 63 | goods_list = soup.find_all('a', class_='mainTitle') 64 | 65 | for good in goods_list: 66 | url = good['href'][2:] 67 | title = ''.join(good['title'].split(' ')[:3]) # 对标题稍微格式化一下 68 | pmId = good['pmid'] 69 | try: 70 | price, stock = parse_good_detail(pmId,provinceId,cityid) 71 | except: 72 | price, stock = '信息错误', '信息错误' 73 | 74 | goods_infolist.append( 75 | {'name': title, 'price': price, 'stock': stock, 'url': url}) 76 | 77 | return goods_infolist 78 | 79 | 80 | ''' 81 | # 一号店自营所有小米手机的商品筛选列表 82 | xiaomi_url = 'http://list.yhd.com/c23586-0-81436/b969871-3923/?tc=3.0.10.3923.3&tp=52.23586.107.0.3.LsvLUR1-10-1FRQ7&ti=G78XlK' 83 | # 测试抓取小米手机的信息 84 | xiaomiPhone = parse_goods_info(xiaomi_url) 85 | # 格式化输出一下 86 | for rec in xiaomiPhone: 87 | print('型号: {}\t价格: {}\t库存: {}\t地址: {}'.format( 88 | rec['name'], rec['price'], rec['stock'], rec['url'])) 89 | ''' 90 | -------------------------------------------------------------------------------- /YHShop/tools.py: -------------------------------------------------------------------------------- 1 | ''' 2 | 解析1号店的所有 3 | 省份 4 | 城市信息 5 | ''' 6 | import os 7 | from bs4 import BeautifulSoup 8 | 9 | # 获取当前运行目录 10 | path = os.path.dirname(os.path.abspath(__file__)) 11 | 12 | 13 | with open(path + '/cityid.html') as f: 14 | html = f.read() 15 | 16 | 17 | def get_cityid_map(html): 18 | ''' 19 | 解析一号店省份、城市id 20 | return 21 | ''' 22 | cityid_map = {} 23 | soup = BeautifulSoup(html, 'lxml') 24 | # 找到所有的a标签 25 | citys = soup.find_all('a') 26 | # 开始解析城市名城市id 省份id 27 | for city in citys: 28 | name = city.text.replace('市','') 29 | provinceId = city['data-provinceid'] 30 | cityid = city['data-cityid'] 31 | cityid_map[name] = {'provinceId': provinceId, 'cityid': cityid, } 32 | 33 | return cityid_map 34 | 35 | 36 | print(get_cityid_map(html)) 37 | -------------------------------------------------------------------------------- /doubanmovie/config.py: -------------------------------------------------------------------------------- 1 | 2 | EHCO_DB = { 3 | 'host': '127.0.0.1', 4 | 'user': 'root', 5 | 'password': 'x', 6 | 'db': 'EhcoTestDb' 7 | } 8 | -------------------------------------------------------------------------------- /doubanmovie/data.py: -------------------------------------------------------------------------------- 1 | 2 | # 导入对数据库操作的封装和配置文件 3 | from stroe import DbToMysql 4 | import config 5 | 6 | # 初始化组件 7 | store = DbToMysql(config.EHCO_DB) 8 | 9 | # 数据查询 10 | res = store.find_by_sort('DoubanTop250', 'ranking', 10, 'ASC') 11 | 12 | for data in res: 13 | print(data['name']) 14 | -------------------------------------------------------------------------------- /doubanmovie/data/cached_pansou/七宗罪 .json: -------------------------------------------------------------------------------- 1 | {"list": {"data": [{"title": "七宗罪", "link": "https://pan.baidu.com/share/link?shareid=2840039918&uk=1718950491", "des": "文件数: 1,分享时间: 2017-05-14T19:03:15, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=2840039918&uk=1718950491", "host": "pan.baidu.com", "more": null}, {"title": "七宗罪", "link": "https://pan.baidu.com/share/link?shareid=2363395042&uk=2355940608", "des": "文件数: 1,分享时间: 2016-04-06T14:59:27, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=2363395042&uk=2355940608", "host": "pan.baidu.com", "more": null}, {"title": "七宗罪", "link": "https://pan.baidu.com/share/link?shareid=2200895572&uk=2722415184", "des": "文件数: 1,分享时间: 2016-05-01T11:03:46, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=2200895572&uk=2722415184", "host": "pan.baidu.com", "more": null}, {"title": "七宗罪", "link": "https://pan.baidu.com/share/link?shareid=2889449757&uk=4248409166", "des": "文件数: 1,分享时间: 2016-04-25T13:48:35, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=2889449757&uk=4248409166", "host": "pan.baidu.com", "more": null}, {"title": "七宗罪", "link": "https://pan.baidu.com/share/link?shareid=3190490267&uk=609021507", "des": "文件数: 1,分享时间: 2017-08-12T11:02:44, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=3190490267&uk=609021507", "host": "pan.baidu.com", "more": null}, {"title": "七宗罪", "link": "https://pan.baidu.com/share/link?shareid=2477902678&uk=609021507", "des": "文件数: 1,分享时间: 2017-08-17T09:03:57, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=2477902678&uk=609021507", "host": "pan.baidu.com", "more": null}, {"title": "七宗罪", "link": "https://pan.baidu.com/share/link?shareid=1619783786&uk=609021507", "des": "文件数: 1,分享时间: 2017-09-02T11:34:41, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=1619783786&uk=609021507", "host": "pan.baidu.com", "more": null}, {"title": "七宗罪", "link": "https://pan.baidu.com/share/link?shareid=1027108874&uk=609021507", "des": "文件数: 1,分享时间: 2017-09-03T09:44:31, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=1027108874&uk=609021507", "host": "pan.baidu.com", "more": null}, {"title": "七宗罪", "link": "https://pan.baidu.com/share/link?shareid=2789979607&uk=609021507", "des": "文件数: 1,分享时间: 2017-09-07T10:06:18, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=2789979607&uk=609021507", "host": "pan.baidu.com", "more": null}, {"title": "七宗罪", "link": "https://pan.baidu.com/share/link?shareid=710919621&uk=122944454", "des": "文件数: 1,分享时间: 2017-06-19T16:15:49, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=710919621&uk=122944454", "host": "pan.baidu.com", "more": null}], "count": 341, "q": "七宗罪 ", "p": "1", "runtime": 0.5, "from": "baidu"}, "listcount": 10, "description": "七宗罪 相关信息"} -------------------------------------------------------------------------------- /doubanmovie/data/cached_pansou/人工智能 .json: -------------------------------------------------------------------------------- 1 | {"list": {"data": [{"title": "人工智能", "link": "https://pan.baidu.com/share/link?shareid=3231602468&uk=3794903389", "des": "文件数: 1,分享时间: 2016-09-13T16:47:02, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=3231602468&uk=3794903389", "host": "pan.baidu.com", "more": null}, {"title": "人工智能", "link": "https://pan.baidu.com/share/link?shareid=4015767790&uk=3006233851", "des": "文件数: 1,分享时间: 2016-07-25T15:56:05, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=4015767790&uk=3006233851", "host": "pan.baidu.com", "more": null}, {"title": "人工智能", "link": "https://pan.baidu.com/share/link?shareid=1380811175&uk=609021507", "des": "文件数: 1,分享时间: 2017-08-23T15:34:28, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=1380811175&uk=609021507", "host": "pan.baidu.com", "more": null}, {"title": "人工智能", "link": "https://pan.baidu.com/share/link?shareid=594844002&uk=609021507", "des": "文件数: 1,分享时间: 2017-08-15T10:02:26, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=594844002&uk=609021507", "host": "pan.baidu.com", "more": null}, {"title": "人工智能", "link": "https://pan.baidu.com/share/link?shareid=928363975&uk=609021507", "des": "文件数: 1,分享时间: 2017-08-26T21:30:39, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=928363975&uk=609021507", "host": "pan.baidu.com", "more": null}, {"title": "人工智能", "link": "https://pan.baidu.com/share/link?shareid=4214111557&uk=609021507", "des": "文件数: 1,分享时间: 2017-08-31T15:15:53, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=4214111557&uk=609021507", "host": "pan.baidu.com", "more": null}, {"title": "人工智能", "link": "https://pan.baidu.com/share/link?shareid=1587970034&uk=609021507", "des": "文件数: 1,分享时间: 2017-09-01T11:57:05, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=1587970034&uk=609021507", "host": "pan.baidu.com", "more": null}, {"title": "人工智能", "link": "https://pan.baidu.com/share/link?shareid=2801438492&uk=609021507", "des": "文件数: 1,分享时间: 2017-09-04T13:20:52, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=2801438492&uk=609021507", "host": "pan.baidu.com", "more": null}, {"title": "人工智能", "link": "https://pan.baidu.com/share/link?shareid=3104916544&uk=609021507", "des": "文件数: 1,分享时间: 2017-09-16T10:45:42, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=3104916544&uk=609021507", "host": "pan.baidu.com", "more": null}, {"title": "人工智能", "link": "https://pan.baidu.com/share/link?shareid=2342410367&uk=609021507", "des": "文件数: 1,分享时间: 2017-09-13T12:15:45, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=2342410367&uk=609021507", "host": "pan.baidu.com", "more": null}], "count": 461, "q": "人工智能 ", "p": "1", "runtime": 0.5, "from": "baidu"}, "listcount": 10, "description": "人工智能 相关信息"} -------------------------------------------------------------------------------- /doubanmovie/data/cached_pansou/加勒比海盗 .json: -------------------------------------------------------------------------------- 1 | {"list": {"data": [{"title": "加勒比海盗", "link": "https://pan.baidu.com/pcloud/album/info?uk=2101200196&album_id=8812019080850495109", "des": "专辑内文件数: 4,分享时间: 2017-05-05T11:10:23, 文件大小: 13786050.875k", "blink": "https://pan.baidu.com/pcloud/album/info?uk=2101200196&album_id=8812019080850495109", "host": "pan.baidu.com", "more": null}, {"title": "加勒比海盗", "link": "https://pan.baidu.com/share/link?shareid=2410886285&uk=3464266305", "des": "文件数: 1,分享时间: 2017-05-27T12:40:51, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=2410886285&uk=3464266305", "host": "pan.baidu.com", "more": null}, {"title": "加勒比海盗", "link": "https://pan.baidu.com/share/link?shareid=3878401676&uk=354965675", "des": "文件数: 1,分享时间: 2017-06-03T12:08:35, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=3878401676&uk=354965675", "host": "pan.baidu.com", "more": null}, {"title": "加勒比海盗", "link": "https://pan.baidu.com/share/link?shareid=2255321821&uk=3332511772", "des": "文件数: 1,分享时间: 2017-01-20T01:57:48, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=2255321821&uk=3332511772", "host": "pan.baidu.com", "more": null}, {"title": "加勒比海盗", "link": "https://pan.baidu.com/pcloud/album/info?uk=1582624366&album_id=7511367003517123210", "des": "专辑内文件数: 3,分享时间: 2016-12-19T01:23:44, 文件大小: 26583250.891601562k", "blink": "https://pan.baidu.com/pcloud/album/info?uk=1582624366&album_id=7511367003517123210", "host": "pan.baidu.com", "more": null}, {"title": "加勒比海盗", "link": "https://pan.baidu.com/share/link?shareid=89008288&uk=3927300193", "des": "文件数: 1,分享时间: 2017-04-09T20:41:23, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=89008288&uk=3927300193", "host": "pan.baidu.com", "more": null}, {"title": "加勒比海盗", "link": "https://pan.baidu.com/pcloud/album/info?uk=2101200196&album_id=8812019080850495109", "des": "专辑内文件数: 4,分享时间: 2017-05-05T11:10:23, 文件大小: 13786050.875k", "blink": "https://pan.baidu.com/pcloud/album/info?uk=2101200196&album_id=8812019080850495109", "host": "pan.baidu.com", "more": null}, {"title": "加勒比海盗", "link": "https://pan.baidu.com/share/link?shareid=1893576492&uk=748543823", "des": "文件数: 1,分享时间: 2016-06-12T18:45:01, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=1893576492&uk=748543823", "host": "pan.baidu.com", "more": null}, {"title": "加勒比海盗", "link": "https://pan.baidu.com/share/link?shareid=1781833766&uk=911963550", "des": "文件数: 1,分享时间: 2017-05-23T14:04:17, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=1781833766&uk=911963550", "host": "pan.baidu.com", "more": null}], "count": 2000, "q": "加勒比海盗 ", "p": "1", "runtime": 0.5, "from": "baidu"}, "listcount": 9, "description": "加勒比海盗 相关信息"} -------------------------------------------------------------------------------- /doubanmovie/data/cached_pansou/勇士 .json: -------------------------------------------------------------------------------- 1 | {"list": {"data": [{"title": "勇士", "link": "https://pan.baidu.com/share/link?shareid=650091567&uk=609021507", "des": "文件数: 1,分享时间: 2017-08-23T15:52:52, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=650091567&uk=609021507", "host": "pan.baidu.com", "more": null}, {"title": "勇士", "link": "https://pan.baidu.com/share/link?shareid=3721420131&uk=609021507", "des": "文件数: 1,分享时间: 2017-08-12T11:24:40, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=3721420131&uk=609021507", "host": "pan.baidu.com", "more": null}, {"title": "勇士", "link": "https://pan.baidu.com/share/link?shareid=1524622787&uk=609021507", "des": "文件数: 1,分享时间: 2017-09-03T10:03:36, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=1524622787&uk=609021507", "host": "pan.baidu.com", "more": null}, {"title": "勇士", "link": "https://pan.baidu.com/share/link?shareid=2521405922&uk=609021507", "des": "文件数: 1,分享时间: 2017-09-06T09:45:40, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=2521405922&uk=609021507", "host": "pan.baidu.com", "more": null}, {"title": "勇士", "link": "https://pan.baidu.com/share/link?shareid=139518705&uk=609021507", "des": "文件数: 1,分享时间: 2017-09-10T12:45:09, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=139518705&uk=609021507", "host": "pan.baidu.com", "more": null}, {"title": "勇士", "link": "https://pan.baidu.com/share/link?shareid=1291684143&uk=609021507", "des": "文件数: 1,分享时间: 2017-06-12T14:54:27, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=1291684143&uk=609021507", "host": "pan.baidu.com", "more": null}, {"title": "勇士", "link": "https://pan.baidu.com/share/link?shareid=1718468192&uk=609021507", "des": "文件数: 1,分享时间: 2017-05-31T23:34:27, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=1718468192&uk=609021507", "host": "pan.baidu.com", "more": null}, {"title": "勇士", "link": "https://pan.baidu.com/share/link?shareid=2655359879&uk=609021507", "des": "文件数: 1,分享时间: 2017-09-19T12:03:54, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=2655359879&uk=609021507", "host": "pan.baidu.com", "more": null}, {"title": "勇士", "link": "https://pan.baidu.com/share/link?shareid=27328534&uk=609021507", "des": "文件数: 1,分享时间: 2017-09-23T10:42:03, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=27328534&uk=609021507", "host": "pan.baidu.com", "more": null}, {"title": "勇士", "link": "https://pan.baidu.com/share/link?shareid=2165667159&uk=1929670292", "des": "文件数: 1,分享时间: 2017-05-06T10:17:50, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=2165667159&uk=1929670292", "host": "pan.baidu.com", "more": null}], "count": 2000, "q": "勇士 ", "p": "1", "runtime": 0.5, "from": "baidu"}, "listcount": 10, "description": "勇士 相关信息"} -------------------------------------------------------------------------------- /doubanmovie/data/cached_pansou/变脸 .json: -------------------------------------------------------------------------------- 1 | {"list": {"data": [{"title": "~!变脸", "link": "https://pan.baidu.com/share/link?shareid=2714528440&uk=3697255184", "des": "文件数: 1,分享时间: 2015-07-02T18:58:32, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=2714528440&uk=3697255184", "host": "pan.baidu.com", "more": null}, {"title": "变脸", "link": "https://pan.baidu.com/share/link?shareid=2455604325&uk=3140914463", "des": "文件数: 1,分享时间: 2015-12-08T20:00:18, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=2455604325&uk=3140914463", "host": "pan.baidu.com", "more": null}, {"title": "变脸", "link": "https://pan.baidu.com/share/link?shareid=1088535568&uk=2701181026", "des": "文件数: 1,分享时间: 2016-06-07T19:16:57, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=1088535568&uk=2701181026", "host": "pan.baidu.com", "more": null}, {"title": "变脸", "link": "https://pan.baidu.com/share/link?shareid=3162551740&uk=2701181026", "des": "文件数: 1,分享时间: 2016-06-06T10:21:14, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=3162551740&uk=2701181026", "host": "pan.baidu.com", "more": null}, {"title": "变脸", "link": "https://pan.baidu.com/share/link?shareid=4087228776&uk=187713544", "des": "文件数: 1,分享时间: 2016-11-03T18:49:33, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=4087228776&uk=187713544", "host": "pan.baidu.com", "more": null}, {"title": "~!变脸", "link": "https://pan.baidu.com/share/link?shareid=4019333128&uk=1214698539", "des": "文件数: 1,分享时间: 2015-07-14T09:00:02, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=4019333128&uk=1214698539", "host": "pan.baidu.com", "more": null}, {"title": "变脸", "link": "https://pan.baidu.com/share/link?shareid=676196502&uk=2808886271", "des": "文件数: 1,分享时间: 2017-03-23T14:19:40, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=676196502&uk=2808886271", "host": "pan.baidu.com", "more": null}, {"title": "变脸", "link": "https://pan.baidu.com/share/link?shareid=3528799&uk=3993130536", "des": "文件数: 1,分享时间: 2015-10-09T21:32:17, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=3528799&uk=3993130536", "host": "pan.baidu.com", "more": null}, {"title": "变脸", "link": "https://pan.baidu.com/share/link?shareid=3930472797&uk=2808886271", "des": "文件数: 1,分享时间: 2017-03-23T14:28:45, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=3930472797&uk=2808886271", "host": "pan.baidu.com", "more": null}, {"title": "变脸", "link": "https://pan.baidu.com/share/link?shareid=130247043&uk=2435012951", "des": "文件数: 1,分享时间: 2013-10-07T16:18:56, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=130247043&uk=2435012951", "host": "pan.baidu.com", "more": null}], "count": 289, "q": "变脸 ", "p": "1", "runtime": 0.5, "from": "baidu"}, "listcount": 10, "description": "变脸 相关信息"} -------------------------------------------------------------------------------- /doubanmovie/data/cached_pansou/可可西里 .json: -------------------------------------------------------------------------------- 1 | {"list": {"data": [{"title": "可可西里", "link": "https://pan.baidu.com/share/link?shareid=3947424960&uk=609021507", "des": "文件数: 1,分享时间: 2017-08-23T15:26:11, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=3947424960&uk=609021507", "host": "pan.baidu.com", "more": null}, {"title": "可可西里", "link": "https://pan.baidu.com/share/link?shareid=912308654&uk=609021507", "des": "文件数: 1,分享时间: 2017-08-07T08:47:02, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=912308654&uk=609021507", "host": "pan.baidu.com", "more": null}, {"title": "可可西里", "link": "https://pan.baidu.com/share/link?shareid=892214618&uk=609021507", "des": "文件数: 1,分享时间: 2017-09-04T13:14:01, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=892214618&uk=609021507", "host": "pan.baidu.com", "more": null}, {"title": "可可西里", "link": "https://pan.baidu.com/share/link?shareid=4150464509&uk=609021507", "des": "文件数: 1,分享时间: 2017-09-14T10:28:30, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=4150464509&uk=609021507", "host": "pan.baidu.com", "more": null}, {"title": "可可西里", "link": "https://pan.baidu.com/share/link?shareid=2963532857&uk=609021507", "des": "文件数: 1,分享时间: 2017-09-18T09:46:13, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=2963532857&uk=609021507", "host": "pan.baidu.com", "more": null}, {"title": "可可西里", "link": "https://pan.baidu.com/share/link?shareid=3407281000&uk=609021507", "des": "文件数: 1,分享时间: 2017-06-17T10:48:18, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=3407281000&uk=609021507", "host": "pan.baidu.com", "more": null}, {"title": "可可西里", "link": "https://pan.baidu.com/share/link?shareid=1824952352&uk=487823360", "des": "文件数: 1,分享时间: 2017-02-02T17:24:43, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=1824952352&uk=487823360", "host": "pan.baidu.com", "more": null}, {"title": "可可西里", "link": "https://pan.baidu.com/share/link?shareid=1860749948&uk=487823360", "des": "文件数: 1,分享时间: 2017-05-30T02:38:22, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=1860749948&uk=487823360", "host": "pan.baidu.com", "more": null}, {"title": "可可西里", "link": "https://pan.baidu.com/share/link?shareid=1286241839&uk=609021507", "des": "文件数: 1,分享时间: 2017-08-15T09:54:18, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=1286241839&uk=609021507", "host": "pan.baidu.com", "more": null}, {"title": "可可西里", "link": "https://pan.baidu.com/share/link?shareid=2155337618&uk=609021507", "des": "文件数: 1,分享时间: 2017-09-01T11:50:26, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=2155337618&uk=609021507", "host": "pan.baidu.com", "more": null}], "count": 120, "q": "可可西里 ", "p": "1", "runtime": 0.5, "from": "baidu"}, "listcount": 10, "description": "可可西里 相关信息"} -------------------------------------------------------------------------------- /doubanmovie/data/cached_pansou/寿司之神 .json: -------------------------------------------------------------------------------- 1 | {"list": {"data": [{"title": "寿司之神", "link": "https://pan.baidu.com/share/link?shareid=239970418&uk=4051721367", "des": "文件数: 1,分享时间: 2016-02-18T15:46:04, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=239970418&uk=4051721367", "host": "pan.baidu.com", "more": null}, {"title": "寿司之神", "link": "https://pan.baidu.com/share/link?shareid=2324921645&uk=3993130536", "des": "文件数: 1,分享时间: 2015-10-10T15:55:00, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=2324921645&uk=3993130536", "host": "pan.baidu.com", "more": null}, {"title": "寿司之神", "link": "https://pan.baidu.com/share/link?shareid=987426513&uk=1247192380", "des": "文件数: 1,分享时间: 2015-11-17T18:50:50, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=987426513&uk=1247192380", "host": "pan.baidu.com", "more": null}, {"title": "寿司之神", "link": "https://pan.baidu.com/share/link?shareid=1594929984&uk=574725777", "des": "文件数: 1,分享时间: 2015-11-15T20:48:47, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=1594929984&uk=574725777", "host": "pan.baidu.com", "more": null}, {"title": "寿司之神720p26163723.torrent", "link": "https://pan.baidu.com/share/link?shareid=1360413848&uk=3003776623", "des": "文件数: 1,分享时间: 2013-06-13T00:04:24, 文件大小: 72.677734375k", "blink": "https://pan.baidu.com/share/link?shareid=1360413848&uk=3003776623", "host": "pan.baidu.com", "more": null}, {"title": "236 寿司之神", "link": "https://pan.baidu.com/share/link?shareid=4177675036&uk=1084372931", "des": "文件数: 1,分享时间: 2016-12-24T13:35:45, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=4177675036&uk=1084372931", "host": "pan.baidu.com", "more": null}, {"title": "寿司之神-中文字幕.rmvb", "link": "https://pan.baidu.com/share/link?shareid=3610643957&uk=1325694201", "des": "文件数: 1,分享时间: 2016-08-27T11:55:59, 文件大小: 925566.1962890625k", "blink": "https://pan.baidu.com/share/link?shareid=3610643957&uk=1325694201", "host": "pan.baidu.com", "more": null}, {"title": "04.寿司之神", "link": "https://pan.baidu.com/share/link?shareid=2752181390&uk=1918328979", "des": "文件数: 1,分享时间: 2015-03-28T20:30:05, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=2752181390&uk=1918328979", "host": "pan.baidu.com", "more": null}, {"title": "寿司之神.Jiro.Dreams.Of.Sushi.2011.720p.BluRay.x264.DTS-HDChina.torrent", "link": "https://pan.baidu.com/share/link?shareid=3417882925&uk=1870504545", "des": "文件数: 1,分享时间: 2017-03-11T12:54:35, 文件大小: 18.3564453125k", "blink": "https://pan.baidu.com/share/link?shareid=3417882925&uk=1870504545", "host": "pan.baidu.com", "more": null}], "count": 1651, "q": "寿司之神 ", "p": "1", "runtime": 0.5, "from": "baidu"}, "listcount": 9, "description": "寿司之神 相关信息"} -------------------------------------------------------------------------------- /doubanmovie/data/cached_pansou/心迷宫 .json: -------------------------------------------------------------------------------- 1 | {"list": {"data": [{"title": "心迷宫", "link": "https://pan.baidu.com/share/link?shareid=237923182&uk=609021507", "des": "文件数: 1,分享时间: 2017-09-03T09:57:40, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=237923182&uk=609021507", "host": "pan.baidu.com", "more": null}, {"title": "心迷宫", "link": "https://pan.baidu.com/share/link?shareid=3174485502&uk=609021507", "des": "文件数: 1,分享时间: 2017-09-23T10:36:11, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=3174485502&uk=609021507", "host": "pan.baidu.com", "more": null}, {"title": "心迷宫", "link": "https://pan.baidu.com/share/link?shareid=463359231&uk=609021507", "des": "文件数: 1,分享时间: 2017-09-19T11:58:27, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=463359231&uk=609021507", "host": "pan.baidu.com", "more": null}, {"title": "心迷宫", "link": "https://pan.baidu.com/share/link?shareid=415100132&uk=609021507", "des": "文件数: 1,分享时间: 2017-06-10T23:07:06, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=415100132&uk=609021507", "host": "pan.baidu.com", "more": null}, {"title": "心迷宫", "link": "https://pan.baidu.com/share/link?shareid=3627640785&uk=609021507", "des": "文件数: 1,分享时间: 2017-06-01T12:41:38, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=3627640785&uk=609021507", "host": "pan.baidu.com", "more": null}, {"title": "心迷宫", "link": "https://pan.baidu.com/share/link?shareid=3109449875&uk=609021507", "des": "文件数: 1,分享时间: 2017-08-23T15:47:03, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=3109449875&uk=609021507", "host": "pan.baidu.com", "more": null}, {"title": "心迷宫", "link": "https://pan.baidu.com/share/link?shareid=2194039023&uk=609021507", "des": "文件数: 1,分享时间: 2017-09-14T10:45:37, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=2194039023&uk=609021507", "host": "pan.baidu.com", "more": null}, {"title": "心迷宫", "link": "https://pan.baidu.com/share/link?shareid=3090723172&uk=609021507", "des": "文件数: 1,分享时间: 2017-09-18T10:03:15, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=3090723172&uk=609021507", "host": "pan.baidu.com", "more": null}, {"title": "心迷宫", "link": "https://pan.baidu.com/share/link?shareid=4278278301&uk=609021507", "des": "文件数: 1,分享时间: 2017-09-05T09:36:24, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=4278278301&uk=609021507", "host": "pan.baidu.com", "more": null}, {"title": "心迷宫", "link": "https://pan.baidu.com/share/link?shareid=3752180819&uk=609021507", "des": "文件数: 1,分享时间: 2017-09-06T09:40:21, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=3752180819&uk=609021507", "host": "pan.baidu.com", "more": null}], "count": 2000, "q": "心迷宫 ", "p": "1", "runtime": 0.5, "from": "baidu"}, "listcount": 10, "description": "心迷宫 相关信息"} -------------------------------------------------------------------------------- /doubanmovie/data/cached_pansou/恐怖游轮 .json: -------------------------------------------------------------------------------- 1 | {"list": {"data": [{"title": "恐怖游轮", "link": "https://pan.baidu.com/share/link?shareid=3463913065&uk=2373288888", "des": "文件数: 1,分享时间: 2017-06-04T16:36:08, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=3463913065&uk=2373288888", "host": "pan.baidu.com", "more": null}, {"title": "恐怖游轮", "link": "https://pan.baidu.com/share/link?shareid=1347716781&uk=122944454", "des": "文件数: 1,分享时间: 2017-06-19T16:17:34, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=1347716781&uk=122944454", "host": "pan.baidu.com", "more": null}, {"title": "恐怖游轮", "link": "https://pan.baidu.com/share/link?shareid=172941232&uk=609021507", "des": "文件数: 1,分享时间: 2017-08-12T10:56:24, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=172941232&uk=609021507", "host": "pan.baidu.com", "more": null}, {"title": "恐怖游轮", "link": "https://pan.baidu.com/share/link?shareid=3709245108&uk=609021507", "des": "文件数: 1,分享时间: 2017-09-06T09:23:34, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=3709245108&uk=609021507", "host": "pan.baidu.com", "more": null}, {"title": "恐怖游轮", "link": "https://pan.baidu.com/share/link?shareid=1694647796&uk=609021507", "des": "文件数: 1,分享时间: 2017-09-05T09:19:49, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=1694647796&uk=609021507", "host": "pan.baidu.com", "more": null}, {"title": "恐怖游轮", "link": "https://pan.baidu.com/share/link?shareid=1793017178&uk=609021507", "des": "文件数: 1,分享时间: 2017-09-03T09:39:53, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=1793017178&uk=609021507", "host": "pan.baidu.com", "more": null}, {"title": "恐怖游轮", "link": "https://pan.baidu.com/share/link?shareid=805690592&uk=609021507", "des": "文件数: 1,分享时间: 2017-09-15T11:04:14, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=805690592&uk=609021507", "host": "pan.baidu.com", "more": null}, {"title": "恐怖游轮", "link": "https://pan.baidu.com/share/link?shareid=4117993944&uk=609021507", "des": "文件数: 1,分享时间: 2017-06-10T10:59:37, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=4117993944&uk=609021507", "host": "pan.baidu.com", "more": null}, {"title": "恐怖游轮", "link": "https://pan.baidu.com/share/link?shareid=1686553054&uk=609021507", "des": "文件数: 1,分享时间: 2017-06-16T22:59:51, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=1686553054&uk=609021507", "host": "pan.baidu.com", "more": null}, {"title": "恐怖游轮", "link": "https://pan.baidu.com/share/link?shareid=3731015201&uk=609021507", "des": "文件数: 1,分享时间: 2017-06-01T12:30:36, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=3731015201&uk=609021507", "host": "pan.baidu.com", "more": null}], "count": 2000, "q": "恐怖游轮 ", "p": "1", "runtime": 0.5, "from": "baidu"}, "listcount": 10, "description": "恐怖游轮 相关信息"} -------------------------------------------------------------------------------- /doubanmovie/data/cached_pansou/情书 .json: -------------------------------------------------------------------------------- 1 | {"list": {"data": [{"title": "情书", "link": "https://pan.baidu.com/share/link?shareid=330204841&uk=726451123", "des": "文件数: 1,分享时间: 2016-11-29T09:08:30, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=330204841&uk=726451123", "host": "pan.baidu.com", "more": null}, {"title": "情书", "link": "https://pan.baidu.com/share/link?shareid=4145414954&uk=1963275218", "des": "文件数: 1,分享时间: 2014-01-25T12:10:15, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=4145414954&uk=1963275218", "host": "pan.baidu.com", "more": null}, {"title": "情书", "link": "https://pan.baidu.com/share/link?shareid=4179840654&uk=3243746857", "des": "文件数: 1,分享时间: 2017-02-24T15:41:02, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=4179840654&uk=3243746857", "host": "pan.baidu.com", "more": null}, {"title": "情书", "link": "https://pan.baidu.com/share/link?shareid=2422081593&uk=2005008334", "des": "文件数: 1,分享时间: 2016-10-02T20:23:56, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=2422081593&uk=2005008334", "host": "pan.baidu.com", "more": null}, {"title": "情书", "link": "https://pan.baidu.com/share/link?shareid=3112867171&uk=1208239462", "des": "文件数: 1,分享时间: 2016-11-30T09:32:52, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=3112867171&uk=1208239462", "host": "pan.baidu.com", "more": null}, {"title": "情书", "link": "https://pan.baidu.com/share/link?shareid=3997946062&uk=1785947437", "des": "文件数: 1,分享时间: 2017-03-13T22:43:13, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=3997946062&uk=1785947437", "host": "pan.baidu.com", "more": null}, {"title": "情书", "link": "https://pan.baidu.com/share/link?shareid=3975833307&uk=2005008334", "des": "文件数: 1,分享时间: 2016-10-02T20:24:27, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=3975833307&uk=2005008334", "host": "pan.baidu.com", "more": null}, {"title": "情书", "link": "https://pan.baidu.com/share/link?shareid=20405339&uk=3006233851", "des": "文件数: 1,分享时间: 2016-07-26T18:08:18, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=20405339&uk=3006233851", "host": "pan.baidu.com", "more": null}, {"title": "情书", "link": "https://pan.baidu.com/share/link?shareid=1069563114&uk=3864380585", "des": "文件数: 1,分享时间: 2017-04-02T23:33:10, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=1069563114&uk=3864380585", "host": "pan.baidu.com", "more": null}, {"title": "情书", "link": "https://pan.baidu.com/share/link?shareid=2822023406&uk=2208648241", "des": "文件数: 1,分享时间: 2016-09-18T08:31:37, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=2822023406&uk=2208648241", "host": "pan.baidu.com", "more": null}], "count": 1462, "q": "情书 ", "p": "1", "runtime": 0.5, "from": "baidu"}, "listcount": 10, "description": "情书 相关信息"} -------------------------------------------------------------------------------- /doubanmovie/data/cached_pansou/教父 .json: -------------------------------------------------------------------------------- 1 | {"list": {"data": [{"title": "教父", "link": "https://pan.baidu.com/share/link?shareid=2072868916&uk=1666264965", "des": "文件数: 1,分享时间: 2015-07-31T13:09:36, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=2072868916&uk=1666264965", "host": "pan.baidu.com", "more": null}, {"title": "教父", "link": "https://pan.baidu.com/share/link?shareid=1534281444&uk=3730622674", "des": "文件数: 1,分享时间: 2017-05-03T21:59:04, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=1534281444&uk=3730622674", "host": "pan.baidu.com", "more": null}, {"title": "教父", "link": "https://pan.baidu.com/share/link?shareid=3656944995&uk=797153046", "des": "文件数: 1,分享时间: 2016-01-17T17:46:45, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=3656944995&uk=797153046", "host": "pan.baidu.com", "more": null}, {"title": "教父", "link": "https://pan.baidu.com/share/link?shareid=2685440415&uk=4132777198", "des": "文件数: 1,分享时间: 2016-02-03T12:42:59, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=2685440415&uk=4132777198", "host": "pan.baidu.com", "more": null}, {"title": "教父", "link": "https://pan.baidu.com/share/link?shareid=1494660546&uk=4053121570", "des": "文件数: 1,分享时间: 2016-06-29T19:18:02, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=1494660546&uk=4053121570", "host": "pan.baidu.com", "more": null}, {"title": "教父", "link": "https://pan.baidu.com/share/link?shareid=1125479804&uk=609021507", "des": "文件数: 1,分享时间: 2017-08-17T08:56:55, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=1125479804&uk=609021507", "host": "pan.baidu.com", "more": null}, {"title": "教父", "link": "https://pan.baidu.com/share/link?shareid=1474475813&uk=609021507", "des": "文件数: 1,分享时间: 2017-09-13T12:07:26, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=1474475813&uk=609021507", "host": "pan.baidu.com", "more": null}, {"title": "教父", "link": "https://pan.baidu.com/share/link?shareid=2110798130&uk=609021507", "des": "文件数: 1,分享时间: 2017-09-16T10:37:20, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=2110798130&uk=609021507", "host": "pan.baidu.com", "more": null}, {"title": "教父", "link": "https://pan.baidu.com/share/link?shareid=3910379162&uk=609021507", "des": "文件数: 1,分享时间: 2017-09-09T14:49:31, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=3910379162&uk=609021507", "host": "pan.baidu.com", "more": null}, {"title": "教父", "link": "https://pan.baidu.com/share/link?shareid=2779978727&uk=609021507", "des": "文件数: 1,分享时间: 2017-09-15T11:02:05, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=2779978727&uk=609021507", "host": "pan.baidu.com", "more": null}], "count": 904, "q": "教父 ", "p": "1", "runtime": 0.5, "from": "baidu"}, "listcount": 10, "description": "教父 相关信息"} -------------------------------------------------------------------------------- /doubanmovie/data/cached_pansou/断背山 .json: -------------------------------------------------------------------------------- 1 | {"list": {"data": [{"title": "断背山", "link": "https://pan.baidu.com/share/link?shareid=2182085320&uk=1057283767", "des": "文件数: 1,分享时间: 2017-04-28T21:21:53, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=2182085320&uk=1057283767", "host": "pan.baidu.com", "more": null}, {"title": "断背山", "link": "https://pan.baidu.com/share/link?shareid=4217525942&uk=1057283767", "des": "文件数: 1,分享时间: 2016-12-27T01:13:02, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=4217525942&uk=1057283767", "host": "pan.baidu.com", "more": null}, {"title": "断背山", "link": "https://pan.baidu.com/share/link?shareid=1532938314&uk=2589464121", "des": "文件数: 1,分享时间: 2016-11-04T14:55:17, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=1532938314&uk=2589464121", "host": "pan.baidu.com", "more": null}, {"title": "断背山", "link": "https://pan.baidu.com/share/link?shareid=299369933&uk=1330472352", "des": "文件数: 1,分享时间: 2017-06-07T15:23:23, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=299369933&uk=1330472352", "host": "pan.baidu.com", "more": null}, {"title": "断背山", "link": "https://pan.baidu.com/share/link?shareid=3551503145&uk=1330472352", "des": "文件数: 1,分享时间: 2017-05-31T10:17:48, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=3551503145&uk=1330472352", "host": "pan.baidu.com", "more": null}, {"title": "断背山", "link": "https://pan.baidu.com/share/link?shareid=3416026360&uk=277159352", "des": "文件数: 1,分享时间: 2016-03-09T21:48:32, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=3416026360&uk=277159352", "host": "pan.baidu.com", "more": null}, {"title": "断背山", "link": "https://pan.baidu.com/share/link?shareid=3572919005&uk=1931962571", "des": "文件数: 1,分享时间: 2015-04-25T18:53:11, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=3572919005&uk=1931962571", "host": "pan.baidu.com", "more": null}, {"title": "断背山", "link": "https://pan.baidu.com/share/link?shareid=1418069092&uk=960662928", "des": "文件数: 1,分享时间: 2016-09-14T16:31:25, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=1418069092&uk=960662928", "host": "pan.baidu.com", "more": null}, {"title": "断背山", "link": "https://pan.baidu.com/share/link?shareid=3813826547&uk=3761711380", "des": "文件数: 1,分享时间: 2015-12-28T20:59:47, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=3813826547&uk=3761711380", "host": "pan.baidu.com", "more": null}, {"title": "断背山", "link": "https://pan.baidu.com/share/link?shareid=3291408779&uk=507444115", "des": "文件数: 1,分享时间: 2016-11-10T22:53:46, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=3291408779&uk=507444115", "host": "pan.baidu.com", "more": null}], "count": 124, "q": "断背山 ", "p": "1", "runtime": 0.5, "from": "baidu"}, "listcount": 10, "description": "断背山 相关信息"} -------------------------------------------------------------------------------- /doubanmovie/data/cached_pansou/新龙门客栈 .json: -------------------------------------------------------------------------------- 1 | {"list": {"data": [{"title": "【QQ群88944035】新龙门客栈.New.Dragon.Inn.Repack.1992.720p.HDTV.x264-HQC.mkv.torrent", "link": "https://pan.baidu.com/share/link?shareid=165540377&uk=4103207533", "des": "文件数: 1,分享时间: 2017-05-26T18:34:31, 文件大小: 11.4072265625k", "blink": "https://pan.baidu.com/share/link?shareid=165540377&uk=4103207533", "host": "pan.baidu.com", "more": null}, {"title": "〖-f-〗《新龙门客栈》New.Dragon.Gate.Inn.1992.DVDRip.x264.AC3.2Audios-CMCT《之善寻正》.torrent", "link": "https://pan.baidu.com/share/link?shareid=1075517915&uk=239727952", "des": "文件数: 1,分享时间: 2016-12-27T21:38:20, 文件大小: 81.982421875k", "blink": "https://pan.baidu.com/share/link?shareid=1075517915&uk=239727952", "host": "pan.baidu.com", "more": null}, {"title": "马景涛版新龙门客栈电视剧全集【qq群88944035分享】.torrent", "link": "https://pan.baidu.com/share/link?shareid=4224334131&uk=4103207533", "des": "文件数: 1,分享时间: 2017-05-29T16:07:16, 文件大小: 118.5615234375k", "blink": "https://pan.baidu.com/share/link?shareid=4224334131&uk=4103207533", "host": "pan.baidu.com", "more": null}, {"title": "[新龙门客栈-新龍門客棧][1996][台视][50集全][马景涛][国语繁字][MP4][39.15G].torrent", "link": "https://pan.baidu.com/share/link?shareid=4025766048&uk=297705068", "des": "文件数: 1,分享时间: 2017-06-19T14:15:07, 文件大小: 118.5615234375k", "blink": "https://pan.baidu.com/share/link?shareid=4025766048&uk=297705068", "host": "pan.baidu.com", "more": null}, {"title": "[新龙门客栈-新龍門客棧][1996][台视][50集全][马景涛][国语繁字][MP4][39.15G].torrent", "link": "https://pan.baidu.com/share/link?shareid=3981465773&uk=297705068", "des": "文件数: 1,分享时间: 2017-06-19T14:15:06, 文件大小: 118.5615234375k", "blink": "https://pan.baidu.com/share/link?shareid=3981465773&uk=297705068", "host": "pan.baidu.com", "more": null}], "count": 5, "q": "新龙门客栈 ", "p": "1", "runtime": 0.5, "from": "baidu"}, "listcount": 5, "description": "新龙门客栈 相关信息"} -------------------------------------------------------------------------------- /doubanmovie/data/cached_pansou/无间道 .json: -------------------------------------------------------------------------------- 1 | {"list": {"data": [{"title": "无间道", "link": "https://pan.baidu.com/pcloud/album/info?uk=2557474713&album_id=8927497413943773151", "des": "专辑内文件数: 3,分享时间: 2015-08-11T16:41:19, 文件大小: 7578690.994140625k", "blink": "https://pan.baidu.com/pcloud/album/info?uk=2557474713&album_id=8927497413943773151", "host": "pan.baidu.com", "more": null}, {"title": "【电影】无间道", "link": "https://pan.baidu.com/share/link?shareid=3695933531&uk=2819336189", "des": "文件数: 1,分享时间: 2017-05-12T21:27:31, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=3695933531&uk=2819336189", "host": "pan.baidu.com", "more": null}, {"title": "无间道2", "link": "https://pan.baidu.com/share/link?shareid=488937336&uk=122944454", "des": "文件数: 1,分享时间: 2017-06-19T14:50:30, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=488937336&uk=122944454", "host": "pan.baidu.com", "more": null}, {"title": "水浒无间道", "link": "https://pan.baidu.com/pcloud/album/info?uk=360058412&album_id=579701992494865629", "des": "专辑内文件数: 25,分享时间: 2017-04-02T08:31:11, 文件大小: 16731604.72265625k", "blink": "https://pan.baidu.com/pcloud/album/info?uk=360058412&album_id=579701992494865629", "host": "pan.baidu.com", "more": null}, {"title": "无间道.Infernal.Affairs.2002.BluRay.720P.x264.DTS-WiKi.torrent", "link": "https://pan.baidu.com/share/link?shareid=7039880&uk=3741861429", "des": "文件数: 1,分享时间: 2016-01-28T18:02:29, 文件大小: 34.189453125k", "blink": "https://pan.baidu.com/share/link?shareid=7039880&uk=3741861429", "host": "pan.baidu.com", "more": null}, {"title": "无间道1.torrent", "link": "https://pan.baidu.com/share/link?shareid=3206135234&uk=2003600126", "des": "文件数: 1,分享时间: 2017-04-05T00:04:39, 文件大小: 41.90625k", "blink": "https://pan.baidu.com/share/link?shareid=3206135234&uk=2003600126", "host": "pan.baidu.com", "more": null}, {"title": "水浒无间道", "link": "https://pan.baidu.com/pcloud/album/info?uk=360058412&album_id=579701992494865629", "des": "专辑内文件数: 25,分享时间: 2017-04-02T08:31:11, 文件大小: 16731604.72265625k", "blink": "https://pan.baidu.com/pcloud/album/info?uk=360058412&album_id=579701992494865629", "host": "pan.baidu.com", "more": null}], "count": 80, "q": "无间道 ", "p": "1", "runtime": 0.5, "from": "baidu"}, "listcount": 7, "description": "无间道 相关信息"} -------------------------------------------------------------------------------- /doubanmovie/data/cached_pansou/朗读者 .json: -------------------------------------------------------------------------------- 1 | {"list": {"data": [{"title": "朗读者", "link": "https://pan.baidu.com/share/link?shareid=842931339&uk=657919052", "des": "文件数: 1,分享时间: 2017-03-11T14:23:46, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=842931339&uk=657919052", "host": "pan.baidu.com", "more": null}, {"title": "朗读者", "link": "https://pan.baidu.com/share/link?shareid=2331131539&uk=419567710", "des": "文件数: 1,分享时间: 2017-03-28T10:09:55, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=2331131539&uk=419567710", "host": "pan.baidu.com", "more": null}, {"title": "朗读者", "link": "https://pan.baidu.com/share/link?shareid=1504565040&uk=1870504545", "des": "文件数: 1,分享时间: 2017-03-27T08:15:57, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=1504565040&uk=1870504545", "host": "pan.baidu.com", "more": null}, {"title": "朗读者", "link": "https://pan.baidu.com/share/link?shareid=72240367&uk=1870504545", "des": "文件数: 1,分享时间: 2017-03-17T08:12:31, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=72240367&uk=1870504545", "host": "pan.baidu.com", "more": null}, {"title": "朗读者", "link": "https://pan.baidu.com/share/link?shareid=2470421484&uk=1870504545", "des": "文件数: 1,分享时间: 2017-04-30T23:15:15, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=2470421484&uk=1870504545", "host": "pan.baidu.com", "more": null}, {"title": "朗读者", "link": "https://pan.baidu.com/share/link?shareid=2982430298&uk=543647812", "des": "文件数: 1,分享时间: 2017-04-09T22:41:33, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=2982430298&uk=543647812", "host": "pan.baidu.com", "more": null}, {"title": "朗读者", "link": "https://pan.baidu.com/share/link?shareid=910354290&uk=1266131168", "des": "文件数: 1,分享时间: 2017-03-09T18:46:53, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=910354290&uk=1266131168", "host": "pan.baidu.com", "more": null}, {"title": "朗读者", "link": "https://pan.baidu.com/share/link?shareid=353578939&uk=219081398", "des": "文件数: 1,分享时间: 2017-03-20T13:55:03, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=353578939&uk=219081398", "host": "pan.baidu.com", "more": null}, {"title": "朗读者", "link": "https://pan.baidu.com/share/link?shareid=3815020798&uk=3079055689", "des": "文件数: 1,分享时间: 2017-05-11T01:59:52, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=3815020798&uk=3079055689", "host": "pan.baidu.com", "more": null}, {"title": "朗读者", "link": "https://pan.baidu.com/share/link?shareid=147325984&uk=1870504545", "des": "文件数: 1,分享时间: 2017-03-20T17:25:48, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=147325984&uk=1870504545", "host": "pan.baidu.com", "more": null}], "count": 2000, "q": "朗读者 ", "p": "1", "runtime": 0.5, "from": "baidu"}, "listcount": 10, "description": "朗读者 相关信息"} -------------------------------------------------------------------------------- /doubanmovie/data/cached_pansou/杀人回忆 .json: -------------------------------------------------------------------------------- 1 | {"list": {"data": [{"title": "杀人回忆", "link": "https://pan.baidu.com/share/link?shareid=3128164472&uk=1118100635", "des": "文件数: 1,分享时间: 2017-06-13T22:06:15, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=3128164472&uk=1118100635", "host": "pan.baidu.com", "more": null}, {"title": "杀人回忆", "link": "https://pan.baidu.com/share/link?shareid=130011583&uk=3406855576", "des": "文件数: 1,分享时间: 2016-04-03T21:08:05, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=130011583&uk=3406855576", "host": "pan.baidu.com", "more": null}, {"title": "杀人回忆", "link": "https://pan.baidu.com/share/link?shareid=2361826737&uk=354761511", "des": "文件数: 1,分享时间: 2016-08-05T12:40:29, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=2361826737&uk=354761511", "host": "pan.baidu.com", "more": null}, {"title": "杀人回忆", "link": "https://pan.baidu.com/share/link?shareid=1831872542&uk=609021507", "des": "文件数: 1,分享时间: 2017-08-12T11:07:20, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=1831872542&uk=609021507", "host": "pan.baidu.com", "more": null}, {"title": "杀人回忆", "link": "https://pan.baidu.com/share/link?shareid=472803758&uk=609021507", "des": "文件数: 1,分享时间: 2017-08-22T11:21:31, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=472803758&uk=609021507", "host": "pan.baidu.com", "more": null}, {"title": "杀人回忆", "link": "https://pan.baidu.com/share/link?shareid=395582596&uk=609021507", "des": "文件数: 1,分享时间: 2017-09-09T14:59:09, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=395582596&uk=609021507", "host": "pan.baidu.com", "more": null}, {"title": "杀人回忆", "link": "https://pan.baidu.com/share/link?shareid=2644871282&uk=609021507", "des": "文件数: 1,分享时间: 2017-09-03T09:47:48, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=2644871282&uk=609021507", "host": "pan.baidu.com", "more": null}, {"title": "杀人回忆", "link": "https://pan.baidu.com/share/link?shareid=1741305321&uk=609021507", "des": "文件数: 1,分享时间: 2017-09-05T09:27:07, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=1741305321&uk=609021507", "host": "pan.baidu.com", "more": null}, {"title": "杀人回忆", "link": "https://pan.baidu.com/share/link?shareid=3366680072&uk=609021507", "des": "文件数: 1,分享时间: 2017-09-02T11:38:11, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=3366680072&uk=609021507", "host": "pan.baidu.com", "more": null}, {"title": "杀人回忆", "link": "https://pan.baidu.com/share/link?shareid=3184355789&uk=609021507", "des": "文件数: 1,分享时间: 2017-09-25T12:24:18, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=3184355789&uk=609021507", "host": "pan.baidu.com", "more": null}], "count": 2000, "q": "杀人回忆 ", "p": "1", "runtime": 0.5, "from": "baidu"}, "listcount": 10, "description": "杀人回忆 相关信息"} -------------------------------------------------------------------------------- /doubanmovie/data/cached_pansou/死亡诗社 .json: -------------------------------------------------------------------------------- 1 | {"list": {"data": [{"title": "死亡诗社", "link": "https://pan.baidu.com/share/link?shareid=3727305178&uk=2403808060", "des": "文件数: 1,分享时间: 2017-03-31T12:31:30, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=3727305178&uk=2403808060", "host": "pan.baidu.com", "more": null}, {"title": "死亡诗社", "link": "https://pan.baidu.com/share/link?shareid=3909670611&uk=1191341100", "des": "文件数: 1,分享时间: 2017-03-01T21:34:45, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=3909670611&uk=1191341100", "host": "pan.baidu.com", "more": null}, {"title": "死亡诗社", "link": "https://pan.baidu.com/share/link?shareid=382129627&uk=3730493845", "des": "文件数: 1,分享时间: 2016-01-04T22:25:15, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=382129627&uk=3730493845", "host": "pan.baidu.com", "more": null}, {"title": "死亡诗社", "link": "https://pan.baidu.com/share/link?shareid=4007659466&uk=3512718891", "des": "文件数: 1,分享时间: 2016-10-11T15:35:00, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=4007659466&uk=3512718891", "host": "pan.baidu.com", "more": null}, {"title": "死亡诗社", "link": "https://pan.baidu.com/share/link?shareid=3809478652&uk=3123498894", "des": "文件数: 1,分享时间: 2014-09-13T14:54:14, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=3809478652&uk=3123498894", "host": "pan.baidu.com", "more": null}, {"title": "死亡诗社", "link": "https://pan.baidu.com/share/link?shareid=43246281&uk=956619845", "des": "文件数: 1,分享时间: 2017-06-15T19:26:15, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=43246281&uk=956619845", "host": "pan.baidu.com", "more": null}, {"title": "死亡诗社", "link": "https://pan.baidu.com/share/link?shareid=1052114311&uk=2703225223", "des": "文件数: 1,分享时间: 2017-06-16T11:16:49, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=1052114311&uk=2703225223", "host": "pan.baidu.com", "more": null}, {"title": "死亡诗社", "link": "https://pan.baidu.com/share/link?shareid=3996829556&uk=5561471", "des": "文件数: 1,分享时间: 2016-08-23T22:17:04, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=3996829556&uk=5561471", "host": "pan.baidu.com", "more": null}, {"title": "死亡诗社", "link": "https://pan.baidu.com/share/link?shareid=623714176&uk=187713544", "des": "文件数: 1,分享时间: 2016-11-03T19:19:49, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=623714176&uk=187713544", "host": "pan.baidu.com", "more": null}, {"title": "死亡诗社", "link": "https://pan.baidu.com/share/link?shareid=2678621564&uk=740383300", "des": "文件数: 1,分享时间: 2015-12-02T21:27:32, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=2678621564&uk=740383300", "host": "pan.baidu.com", "more": null}], "count": 2000, "q": "死亡诗社 ", "p": "1", "runtime": 0.5, "from": "baidu"}, "listcount": 10, "description": "死亡诗社 相关信息"} -------------------------------------------------------------------------------- /doubanmovie/data/cached_pansou/消失的爱人 .json: -------------------------------------------------------------------------------- 1 | {"list": {"data": [{"title": "消失的爱人", "link": "https://pan.baidu.com/share/link?shareid=2363510038&uk=1715898802", "des": "文件数: 1,分享时间: 2016-07-28T21:42:16, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=2363510038&uk=1715898802", "host": "pan.baidu.com", "more": null}, {"title": "消失的爱人", "link": "https://pan.baidu.com/share/link?shareid=1659801409&uk=755312366", "des": "文件数: 1,分享时间: 2014-12-12T20:16:38, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=1659801409&uk=755312366", "host": "pan.baidu.com", "more": null}, {"title": "消失爱人", "link": "https://pan.baidu.com/share/link?shareid=1858321086&uk=756635525", "des": "文件数: 1,分享时间: 2016-10-13T15:04:57, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=1858321086&uk=756635525", "host": "pan.baidu.com", "more": null}, {"title": "消失的爱人", "link": "https://pan.baidu.com/share/link?shareid=1577451999&uk=2978720897", "des": "文件数: 1,分享时间: 2016-09-16T20:06:57, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=1577451999&uk=2978720897", "host": "pan.baidu.com", "more": null}, {"title": "消失爱人", "link": "https://pan.baidu.com/share/link?shareid=1119638747&uk=327151364", "des": "文件数: 1,分享时间: 2016-08-13T20:15:59, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=1119638747&uk=327151364", "host": "pan.baidu.com", "more": null}, {"title": "消失的爱人", "link": "https://pan.baidu.com/share/link?shareid=3490537272&uk=1048844575", "des": "文件数: 1,分享时间: 2016-11-10T21:37:49, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=3490537272&uk=1048844575", "host": "pan.baidu.com", "more": null}, {"title": "★《消失的爱人》", "link": "https://pan.baidu.com/share/link?shareid=2727017479&uk=239727952", "des": "文件数: 1,分享时间: 2017-08-18T23:20:27, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=2727017479&uk=239727952", "host": "pan.baidu.com", "more": null}, {"title": "消失的爱人", "link": "https://pan.baidu.com/share/link?shareid=3817451827&uk=977174988", "des": "文件数: 1,分享时间: 2015-02-16T17:05:31, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=3817451827&uk=977174988", "host": "pan.baidu.com", "more": null}], "count": 2000, "q": "消失的爱人 ", "p": "1", "runtime": 0.5, "from": "baidu"}, "listcount": 8, "description": "消失的爱人 相关信息"} -------------------------------------------------------------------------------- /doubanmovie/data/cached_pansou/熔炉 .json: -------------------------------------------------------------------------------- 1 | {"list": {"data": [{"title": "☆《熔炉》", "link": "https://pan.baidu.com/pcloud/album/info?uk=239727952&album_id=2283621070255866984", "des": "专辑内文件数: 1,分享时间: 2017-03-05T21:25:38, 文件大小: 1953371.857421875k", "blink": "https://pan.baidu.com/pcloud/album/info?uk=239727952&album_id=2283621070255866984", "host": "pan.baidu.com", "more": null}, {"title": "☆《熔炉》", "link": "https://pan.baidu.com/pcloud/album/info?uk=239727952&album_id=2283621070255866984", "des": "专辑内文件数: 1,分享时间: 2017-03-05T21:25:38, 文件大小: 1953371.857421875k", "blink": "https://pan.baidu.com/pcloud/album/info?uk=239727952&album_id=2283621070255866984", "host": "pan.baidu.com", "more": null}, {"title": "熔炉", "link": "https://pan.baidu.com/share/link?shareid=1817447799&uk=2537531311", "des": "文件数: 1,分享时间: 2016-07-23T21:13:27, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=1817447799&uk=2537531311", "host": "pan.baidu.com", "more": null}, {"title": "☆《熔炉》", "link": "https://pan.baidu.com/pcloud/album/info?uk=239727952&album_id=2283621070255866984", "des": "专辑内文件数: 1,分享时间: 2017-03-05T21:25:38, 文件大小: 1953371.857421875k", "blink": "https://pan.baidu.com/pcloud/album/info?uk=239727952&album_id=2283621070255866984", "host": "pan.baidu.com", "more": null}, {"title": "☆《熔炉》", "link": "https://pan.baidu.com/pcloud/album/info?uk=239727952&album_id=2283621070255866984", "des": "专辑内文件数: 1,分享时间: 2017-03-05T21:25:38, 文件大小: 1953371.857421875k", "blink": "https://pan.baidu.com/pcloud/album/info?uk=239727952&album_id=2283621070255866984", "host": "pan.baidu.com", "more": null}, {"title": "大熔炉(41集全)", "link": "https://pan.baidu.com/share/link?shareid=1564499095&uk=474715863", "des": "文件数: 1,分享时间: 2016-09-13T14:14:57, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=1564499095&uk=474715863", "host": "pan.baidu.com", "more": null}], "count": 12, "q": "熔炉 ", "p": "1", "runtime": 0.5, "from": "baidu"}, "listcount": 6, "description": "熔炉 相关信息"} -------------------------------------------------------------------------------- /doubanmovie/data/cached_pansou/狩猎 .json: -------------------------------------------------------------------------------- 1 | {"list": {"data": [{"title": "狩猎", "link": "https://pan.baidu.com/share/link?shareid=4153909107&uk=1208518303", "des": "文件数: 1,分享时间: 2015-06-14T13:32:48, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=4153909107&uk=1208518303", "host": "pan.baidu.com", "more": null}, {"title": "狩猎", "link": "https://pan.baidu.com/share/link?shareid=3192930804&uk=1870504545", "des": "文件数: 1,分享时间: 2017-03-24T00:51:51, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=3192930804&uk=1870504545", "host": "pan.baidu.com", "more": null}, {"title": "狩猎", "link": "https://pan.baidu.com/share/link?shareid=2316863542&uk=310496391", "des": "文件数: 1,分享时间: 2016-02-16T09:53:05, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=2316863542&uk=310496391", "host": "pan.baidu.com", "more": null}, {"title": "狩猎", "link": "https://pan.baidu.com/share/link?shareid=4251650440&uk=732877314", "des": "文件数: 1,分享时间: 2017-03-07T13:05:50, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=4251650440&uk=732877314", "host": "pan.baidu.com", "more": null}, {"title": "狩猎", "link": "https://pan.baidu.com/share/link?shareid=3226211850&uk=1027596147", "des": "文件数: 1,分享时间: 2017-03-15T15:44:10, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=3226211850&uk=1027596147", "host": "pan.baidu.com", "more": null}, {"title": "狩猎", "link": "https://pan.baidu.com/share/link?shareid=929722113&uk=187713544", "des": "文件数: 1,分享时间: 2016-11-06T16:04:27, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=929722113&uk=187713544", "host": "pan.baidu.com", "more": null}, {"title": "狩猎", "link": "https://pan.baidu.com/share/link?shareid=3576029197&uk=4034501757", "des": "文件数: 1,分享时间: 2017-03-16T17:39:16, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=3576029197&uk=4034501757", "host": "pan.baidu.com", "more": null}, {"title": "狩猎", "link": "https://pan.baidu.com/share/link?shareid=925880628&uk=591494033", "des": "文件数: 1,分享时间: 2015-11-24T17:42:10, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=925880628&uk=591494033", "host": "pan.baidu.com", "more": null}, {"title": "狩猎", "link": "https://pan.baidu.com/share/link?shareid=2362771963&uk=2258135478", "des": "文件数: 1,分享时间: 2017-01-21T11:43:22, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=2362771963&uk=2258135478", "host": "pan.baidu.com", "more": null}, {"title": "《狩猎》", "link": "https://pan.baidu.com/share/link?shareid=294743764&uk=4281536321", "des": "文件数: 1,分享时间: 2015-06-13T14:53:47, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=294743764&uk=4281536321", "host": "pan.baidu.com", "more": null}], "count": 500, "q": "狩猎 ", "p": "1", "runtime": 0.5, "from": "baidu"}, "listcount": 10, "description": "狩猎 相关信息"} -------------------------------------------------------------------------------- /doubanmovie/data/cached_pansou/狮子王 .json: -------------------------------------------------------------------------------- 1 | {"list": {"data": [{"title": "狮子王", "link": "https://pan.baidu.com/share/link?shareid=39833076&uk=609021507", "des": "文件数: 1,分享时间: 2017-08-11T09:44:56, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=39833076&uk=609021507", "host": "pan.baidu.com", "more": null}, {"title": "狮子王", "link": "https://pan.baidu.com/share/link?shareid=4217001904&uk=609021507", "des": "文件数: 1,分享时间: 2017-08-23T15:38:33, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=4217001904&uk=609021507", "host": "pan.baidu.com", "more": null}, {"title": "狮子王", "link": "https://pan.baidu.com/share/link?shareid=1027350028&uk=609021507", "des": "文件数: 1,分享时间: 2017-08-21T09:09:49, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=1027350028&uk=609021507", "host": "pan.baidu.com", "more": null}, {"title": "狮子王", "link": "https://pan.baidu.com/share/link?shareid=2617480749&uk=609021507", "des": "文件数: 1,分享时间: 2017-08-26T21:34:48, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=2617480749&uk=609021507", "host": "pan.baidu.com", "more": null}, {"title": "狮子王", "link": "https://pan.baidu.com/share/link?shareid=3342794784&uk=609021507", "des": "文件数: 1,分享时间: 2017-09-07T10:11:49, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=3342794784&uk=609021507", "host": "pan.baidu.com", "more": null}, {"title": "狮子王", "link": "https://pan.baidu.com/share/link?shareid=1857047419&uk=609021507", "des": "文件数: 1,分享时间: 2017-09-05T09:29:24, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=1857047419&uk=609021507", "host": "pan.baidu.com", "more": null}, {"title": "狮子王", "link": "https://pan.baidu.com/share/link?shareid=2013262849&uk=609021507", "des": "文件数: 1,分享时间: 2017-09-03T09:50:16, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=2013262849&uk=609021507", "host": "pan.baidu.com", "more": null}, {"title": "狮子王", "link": "https://pan.baidu.com/share/link?shareid=3346553907&uk=609021507", "des": "文件数: 1,分享时间: 2017-09-08T10:38:05, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=3346553907&uk=609021507", "host": "pan.baidu.com", "more": null}, {"title": "狮子王", "link": "https://pan.baidu.com/share/link?shareid=1666829238&uk=609021507", "des": "文件数: 1,分享时间: 2017-06-08T22:52:36, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=1666829238&uk=609021507", "host": "pan.baidu.com", "more": null}, {"title": "狮子王", "link": "https://pan.baidu.com/share/link?shareid=2209933862&uk=609021507", "des": "文件数: 1,分享时间: 2017-06-05T23:51:29, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=2209933862&uk=609021507", "host": "pan.baidu.com", "more": null}], "count": 257, "q": "狮子王 ", "p": "1", "runtime": 0.5, "from": "baidu"}, "listcount": 10, "description": "狮子王 相关信息"} -------------------------------------------------------------------------------- /doubanmovie/data/cached_pansou/猜火车 .json: -------------------------------------------------------------------------------- 1 | {"list": {"data": [{"title": "猜火车", "link": "https://pan.baidu.com/share/link?shareid=927156313&uk=1230070530", "des": "文件数: 1,分享时间: 2017-06-14T21:12:29, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=927156313&uk=1230070530", "host": "pan.baidu.com", "more": null}, {"title": "猜火车", "link": "https://pan.baidu.com/share/link?shareid=130281073&uk=2303787462", "des": "文件数: 1,分享时间: 2017-06-15T22:10:06, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=130281073&uk=2303787462", "host": "pan.baidu.com", "more": null}, {"title": "猜火车", "link": "https://pan.baidu.com/share/link?shareid=4077759104&uk=1785947437", "des": "文件数: 1,分享时间: 2017-04-18T23:59:48, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=4077759104&uk=1785947437", "host": "pan.baidu.com", "more": null}, {"title": "猜火车", "link": "https://pan.baidu.com/share/link?shareid=2231825035&uk=3927300193", "des": "文件数: 1,分享时间: 2017-05-23T18:28:07, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=2231825035&uk=3927300193", "host": "pan.baidu.com", "more": null}, {"title": "猜火车", "link": "https://pan.baidu.com/share/link?shareid=1343462445&uk=236406674", "des": "文件数: 1,分享时间: 2017-05-31T20:39:24, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=1343462445&uk=236406674", "host": "pan.baidu.com", "more": null}, {"title": "猜火车", "link": "https://pan.baidu.com/share/link?shareid=238080704&uk=3527831162", "des": "文件数: 1,分享时间: 2016-06-17T22:42:06, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=238080704&uk=3527831162", "host": "pan.baidu.com", "more": null}, {"title": "猜火车", "link": "https://pan.baidu.com/share/link?shareid=4236323493&uk=3273970976", "des": "文件数: 1,分享时间: 2017-05-22T22:16:41, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=4236323493&uk=3273970976", "host": "pan.baidu.com", "more": null}, {"title": "猜火车", "link": "https://pan.baidu.com/share/link?shareid=2818603821&uk=727778634", "des": "文件数: 1,分享时间: 2017-05-25T14:55:29, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=2818603821&uk=727778634", "host": "pan.baidu.com", "more": null}, {"title": "猜火车", "link": "https://pan.baidu.com/share/link?shareid=177290547&uk=1434767101", "des": "文件数: 1,分享时间: 2017-06-18T18:52:37, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=177290547&uk=1434767101", "host": "pan.baidu.com", "more": null}, {"title": "猜火车", "link": "https://pan.baidu.com/share/link?shareid=3202257989&uk=191691562", "des": "文件数: 1,分享时间: 2017-05-27T21:06:27, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=3202257989&uk=191691562", "host": "pan.baidu.com", "more": null}], "count": 2000, "q": "猜火车 ", "p": "1", "runtime": 0.5, "from": "baidu"}, "listcount": 10, "description": "猜火车 相关信息"} -------------------------------------------------------------------------------- /doubanmovie/data/cached_pansou/甜蜜蜜 .json: -------------------------------------------------------------------------------- 1 | {"list": {"data": [{"title": "甜蜜蜜", "link": "https://pan.baidu.com/share/link?shareid=2154095814&uk=3295145412", "des": "文件数: 1,分享时间: 2015-04-27T02:44:11, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=2154095814&uk=3295145412", "host": "pan.baidu.com", "more": null}, {"title": "甜蜜蜜", "link": "https://pan.baidu.com/share/link?shareid=2887733884&uk=639034247", "des": "文件数: 1,分享时间: 2015-06-03T11:33:34, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=2887733884&uk=639034247", "host": "pan.baidu.com", "more": null}, {"title": "甜蜜蜜", "link": "https://pan.baidu.com/share/link?shareid=1933959369&uk=3123498894", "des": "文件数: 1,分享时间: 2015-02-16T23:47:26, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=1933959369&uk=3123498894", "host": "pan.baidu.com", "more": null}, {"title": "甜蜜蜜", "link": "https://pan.baidu.com/share/link?shareid=2706576419&uk=503420368", "des": "文件数: 1,分享时间: 2017-04-17T09:28:23, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=2706576419&uk=503420368", "host": "pan.baidu.com", "more": null}, {"title": "甜蜜蜜", "link": "https://pan.baidu.com/share/link?shareid=2516488978&uk=3993130536", "des": "文件数: 1,分享时间: 2015-10-08T16:43:37, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=2516488978&uk=3993130536", "host": "pan.baidu.com", "more": null}, {"title": "甜蜜蜜", "link": "https://pan.baidu.com/share/link?shareid=1840956399&uk=4127742961", "des": "文件数: 1,分享时间: 2015-08-07T11:32:33, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=1840956399&uk=4127742961", "host": "pan.baidu.com", "more": null}, {"title": "甜蜜蜜", "link": "https://pan.baidu.com/share/link?shareid=998782074&uk=732877314", "des": "文件数: 1,分享时间: 2017-03-07T13:16:55, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=998782074&uk=732877314", "host": "pan.baidu.com", "more": null}, {"title": "甜蜜蜜", "link": "https://pan.baidu.com/share/link?shareid=1375211056&uk=4132288377", "des": "文件数: 1,分享时间: 2015-05-22T20:57:26, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=1375211056&uk=4132288377", "host": "pan.baidu.com", "more": null}, {"title": "甜蜜蜜", "link": "https://pan.baidu.com/share/link?shareid=1007844290&uk=3244143639", "des": "文件数: 1,分享时间: 2015-05-01T22:51:29, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=1007844290&uk=3244143639", "host": "pan.baidu.com", "more": null}, {"title": "甜蜜蜜", "link": "https://pan.baidu.com/share/link?shareid=3908555732&uk=2785527032", "des": "文件数: 1,分享时间: 2014-07-25T10:12:08, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=3908555732&uk=2785527032", "host": "pan.baidu.com", "more": null}], "count": 248, "q": "甜蜜蜜 ", "p": "1", "runtime": 0.5, "from": "baidu"}, "listcount": 10, "description": "甜蜜蜜 相关信息"} -------------------------------------------------------------------------------- /doubanmovie/data/cached_pansou/神偷奶爸 .json: -------------------------------------------------------------------------------- 1 | {"list": {"data": [{"title": "神偷奶爸3.jpg", "link": "https://pan.baidu.com/share/link?shareid=2355218483&uk=4113435587", "des": "文件数: 1,分享时间: 2017-06-22T22:26:51, 文件大小: 115.724609375k", "blink": "https://pan.baidu.com/share/link?shareid=2355218483&uk=4113435587", "host": "pan.baidu.com", "more": null}, {"title": "神偷奶爸3.2017www.crys520.com.TS720P.mp4", "link": "https://pan.baidu.com/share/link?shareid=3313367249&uk=274421171", "des": "文件数: 1,分享时间: 2017-07-07T19:23:43, 文件大小: 410536.732421875k", "blink": "https://pan.baidu.com/share/link?shareid=3313367249&uk=274421171", "host": "pan.baidu.com", "more": null}, {"title": "神偷奶爸3.2017www.crys520.com.TS720P.mp4", "link": "https://pan.baidu.com/share/link?shareid=3427461371&uk=4135755322", "des": "文件数: 1,分享时间: 2017-07-04T19:17:54, 文件大小: 410536.732421875k", "blink": "https://pan.baidu.com/share/link?shareid=3427461371&uk=4135755322", "host": "pan.baidu.com", "more": null}, {"title": "神偷奶爸3", "link": "https://pan.baidu.com/share/link?shareid=3983479355&uk=1950209840", "des": "文件数: 1,分享时间: 2017-06-23T23:38:29, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=3983479355&uk=1950209840", "host": "pan.baidu.com", "more": null}, {"title": "神偷奶爸(1-2)", "link": "https://pan.baidu.com/share/link?shareid=286894091&uk=2703225223", "des": "文件数: 1,分享时间: 2017-07-07T16:19:20, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=286894091&uk=2703225223", "host": "pan.baidu.com", "more": null}, {"title": "神偷奶爸3", "link": "https://pan.baidu.com/share/link?shareid=680806276&uk=846430438", "des": "文件数: 1,分享时间: 2017-06-22T17:35:45, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=680806276&uk=846430438", "host": "pan.baidu.com", "more": null}, {"title": "神偷奶爸3", "link": "https://pan.baidu.com/share/link?shareid=3774972574&uk=1663767989", "des": "文件数: 1,分享时间: 2017-07-08T22:19:26, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=3774972574&uk=1663767989", "host": "pan.baidu.com", "more": null}, {"title": "神偷奶爸(1-2)", "link": "https://pan.baidu.com/share/link?shareid=1792005916&uk=3777687642", "des": "文件数: 1,分享时间: 2017-07-07T22:14:23, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=1792005916&uk=3777687642", "host": "pan.baidu.com", "more": null}], "count": 2000, "q": "神偷奶爸 ", "p": "1", "runtime": 0.5, "from": "baidu"}, "listcount": 8, "description": "神偷奶爸 相关信息"} -------------------------------------------------------------------------------- /doubanmovie/data/cached_pansou/禁闭岛 .json: -------------------------------------------------------------------------------- 1 | {"list": {"data": [{"title": "禁闭岛", "link": "https://pan.baidu.com/share/link?shareid=3221347862&uk=3798768640", "des": "文件数: 1,分享时间: 2016-01-29T14:54:01, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=3221347862&uk=3798768640", "host": "pan.baidu.com", "more": null}, {"title": "禁闭岛", "link": "https://pan.baidu.com/share/link?shareid=1754170918&uk=3798768640", "des": "文件数: 1,分享时间: 2015-10-09T14:28:13, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=1754170918&uk=3798768640", "host": "pan.baidu.com", "more": null}, {"title": "禁闭岛", "link": "https://pan.baidu.com/share/link?shareid=1314811464&uk=2373288888", "des": "文件数: 1,分享时间: 2017-06-02T07:44:14, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=1314811464&uk=2373288888", "host": "pan.baidu.com", "more": null}, {"title": "禁闭岛", "link": "https://pan.baidu.com/share/link?shareid=1953135446&uk=1059862740", "des": "文件数: 1,分享时间: 2014-03-04T12:43:37, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=1953135446&uk=1059862740", "host": "pan.baidu.com", "more": null}, {"title": "禁闭岛", "link": "https://pan.baidu.com/share/link?shareid=2585195763&uk=2588099636", "des": "文件数: 1,分享时间: 2015-05-21T12:31:36, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=2585195763&uk=2588099636", "host": "pan.baidu.com", "more": null}, {"title": "禁闭岛", "link": "https://pan.baidu.com/share/link?shareid=238865317&uk=609021507", "des": "文件数: 1,分享时间: 2017-08-12T10:55:13, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=238865317&uk=609021507", "host": "pan.baidu.com", "more": null}, {"title": "禁闭岛", "link": "https://pan.baidu.com/share/link?shareid=435902664&uk=609021507", "des": "文件数: 1,分享时间: 2017-08-16T08:49:56, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=435902664&uk=609021507", "host": "pan.baidu.com", "more": null}, {"title": "禁闭岛", "link": "https://pan.baidu.com/share/link?shareid=3487113811&uk=609021507", "des": "文件数: 1,分享时间: 2017-08-21T08:57:06, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=3487113811&uk=609021507", "host": "pan.baidu.com", "more": null}, {"title": "禁闭岛", "link": "https://pan.baidu.com/share/link?shareid=4126350759&uk=609021507", "des": "文件数: 1,分享时间: 2017-08-26T21:21:19, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=4126350759&uk=609021507", "host": "pan.baidu.com", "more": null}, {"title": "禁闭岛", "link": "https://pan.baidu.com/share/link?shareid=2392946760&uk=609021507", "des": "文件数: 1,分享时间: 2017-09-15T11:03:02, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=2392946760&uk=609021507", "host": "pan.baidu.com", "more": null}], "count": 2000, "q": "禁闭岛 ", "p": "1", "runtime": 0.5, "from": "baidu"}, "listcount": 10, "description": "禁闭岛 相关信息"} -------------------------------------------------------------------------------- /doubanmovie/data/cached_pansou/素媛 .json: -------------------------------------------------------------------------------- 1 | {"list": {"data": [{"title": "素媛", "link": "https://pan.baidu.com/share/link?shareid=1554614055&uk=1733885966", "des": "文件数: 1,分享时间: 2015-07-26T00:29:07, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=1554614055&uk=1733885966", "host": "pan.baidu.com", "more": null}, {"title": "素媛", "link": "https://pan.baidu.com/share/link?shareid=1867188809&uk=3818945738", "des": "文件数: 1,分享时间: 2016-10-18T22:20:49, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=1867188809&uk=3818945738", "host": "pan.baidu.com", "more": null}, {"title": "素 媛", "link": "https://pan.baidu.com/share/link?shareid=3892511196&uk=2701350792", "des": "文件数: 1,分享时间: 2016-05-04T10:21:08, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=3892511196&uk=2701350792", "host": "pan.baidu.com", "more": null}, {"title": "素媛", "link": "https://pan.baidu.com/share/link?shareid=412622509&uk=4199993737", "des": "文件数: 1,分享时间: 2017-03-01T22:51:38, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=412622509&uk=4199993737", "host": "pan.baidu.com", "more": null}, {"title": "素媛", "link": "https://pan.baidu.com/share/link?shareid=3476917953&uk=1396576493", "des": "文件数: 1,分享时间: 2016-08-16T14:36:08, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=3476917953&uk=1396576493", "host": "pan.baidu.com", "more": null}, {"title": "素 媛", "link": "https://pan.baidu.com/share/link?shareid=241545913&uk=410113070", "des": "文件数: 1,分享时间: 2017-01-16T08:23:07, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=241545913&uk=410113070", "host": "pan.baidu.com", "more": null}, {"title": "素媛", "link": "https://pan.baidu.com/share/link?shareid=4233566459&uk=1094277376", "des": "文件数: 1,分享时间: 2016-07-18T10:20:55, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=4233566459&uk=1094277376", "host": "pan.baidu.com", "more": null}, {"title": "素媛", "link": "https://pan.baidu.com/share/link?shareid=3546925539&uk=1870504545", "des": "文件数: 1,分享时间: 2017-03-24T00:48:09, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=3546925539&uk=1870504545", "host": "pan.baidu.com", "more": null}, {"title": "素媛", "link": "https://pan.baidu.com/share/link?shareid=665191032&uk=3647220151", "des": "文件数: 1,分享时间: 2015-12-13T14:20:11, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=665191032&uk=3647220151", "host": "pan.baidu.com", "more": null}, {"title": "素~媛", "link": "https://pan.baidu.com/share/link?shareid=58279743&uk=1731503722", "des": "文件数: 1,分享时间: 2016-06-09T12:40:06, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=58279743&uk=1731503722", "host": "pan.baidu.com", "more": null}], "count": 2000, "q": "素媛 ", "p": "1", "runtime": 0.5, "from": "baidu"}, "listcount": 10, "description": "素媛 相关信息"} -------------------------------------------------------------------------------- /doubanmovie/data/cached_pansou/红辣椒 .json: -------------------------------------------------------------------------------- 1 | {"list": {"data": [{"title": "红辣椒", "link": "https://pan.baidu.com/share/link?shareid=1043860283&uk=3277541605", "des": "文件数: 1,分享时间: 2017-05-22T08:25:09, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=1043860283&uk=3277541605", "host": "pan.baidu.com", "more": null}, {"title": "红辣椒", "link": "https://pan.baidu.com/share/link?shareid=455372541&uk=1296098201", "des": "文件数: 1,分享时间: 2016-12-18T17:25:19, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=455372541&uk=1296098201", "host": "pan.baidu.com", "more": null}, {"title": "红辣椒", "link": "https://pan.baidu.com/share/link?shareid=3052892069&uk=591494033", "des": "文件数: 1,分享时间: 2015-11-26T11:24:09, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=3052892069&uk=591494033", "host": "pan.baidu.com", "more": null}, {"title": "红辣椒", "link": "https://pan.baidu.com/share/link?shareid=140210138&uk=624035590", "des": "文件数: 1,分享时间: 2016-09-16T18:24:13, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=140210138&uk=624035590", "host": "pan.baidu.com", "more": null}, {"title": "红辣椒", "link": "https://pan.baidu.com/share/link?shareid=3437813556&uk=727778634", "des": "文件数: 1,分享时间: 2016-12-06T16:00:32, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=3437813556&uk=727778634", "host": "pan.baidu.com", "more": null}, {"title": "红辣椒", "link": "https://pan.baidu.com/share/link?shareid=2941670640&uk=2384719941", "des": "文件数: 1,分享时间: 2014-09-11T12:46:17, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=2941670640&uk=2384719941", "host": "pan.baidu.com", "more": null}, {"title": "红辣椒", "link": "https://pan.baidu.com/share/link?shareid=2255786821&uk=1057283767", "des": "文件数: 1,分享时间: 2017-01-05T18:44:24, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=2255786821&uk=1057283767", "host": "pan.baidu.com", "more": null}, {"title": "红辣椒", "link": "https://pan.baidu.com/share/link?shareid=1040234356&uk=1057283767", "des": "文件数: 1,分享时间: 2017-01-05T11:33:41, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=1040234356&uk=1057283767", "host": "pan.baidu.com", "more": null}, {"title": "红辣椒", "link": "https://pan.baidu.com/share/link?shareid=3839065162&uk=2303787462", "des": "文件数: 1,分享时间: 2016-12-20T19:07:24, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=3839065162&uk=2303787462", "host": "pan.baidu.com", "more": null}, {"title": "红辣椒", "link": "https://pan.baidu.com/share/link?shareid=845204644&uk=1619475041", "des": "文件数: 1,分享时间: 2017-05-24T08:17:57, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=845204644&uk=1619475041", "host": "pan.baidu.com", "more": null}], "count": 286, "q": "红辣椒 ", "p": "1", "runtime": 0.5, "from": "baidu"}, "listcount": 10, "description": "红辣椒 相关信息"} -------------------------------------------------------------------------------- /doubanmovie/data/cached_pansou/荒岛余生 .json: -------------------------------------------------------------------------------- 1 | {"list": {"data": [{"title": "荒岛余生", "link": "https://pan.baidu.com/share/link?shareid=1546312720&uk=176335829", "des": "文件数: 1,分享时间: 2016-10-08T17:06:06, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=1546312720&uk=176335829", "host": "pan.baidu.com", "more": null}, {"title": "荒岛余生", "link": "https://pan.baidu.com/share/link?shareid=3899080969&uk=609021507", "des": "文件数: 1,分享时间: 2017-08-26T21:17:00, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=3899080969&uk=609021507", "host": "pan.baidu.com", "more": null}, {"title": "荒岛余生", "link": "https://pan.baidu.com/share/link?shareid=142744856&uk=609021507", "des": "文件数: 1,分享时间: 2017-09-04T13:09:28, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=142744856&uk=609021507", "host": "pan.baidu.com", "more": null}, {"title": "荒岛余生", "link": "https://pan.baidu.com/share/link?shareid=1593612154&uk=609021507", "des": "文件数: 1,分享时间: 2017-09-14T10:23:51, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=1593612154&uk=609021507", "host": "pan.baidu.com", "more": null}, {"title": "荒岛余生", "link": "https://pan.baidu.com/share/link?shareid=1000327473&uk=609021507", "des": "文件数: 1,分享时间: 2017-09-11T18:59:13, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=1000327473&uk=609021507", "host": "pan.baidu.com", "more": null}, {"title": "荒岛余生", "link": "https://pan.baidu.com/share/link?shareid=2034049928&uk=609021507", "des": "文件数: 1,分享时间: 2017-09-12T09:11:21, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=2034049928&uk=609021507", "host": "pan.baidu.com", "more": null}, {"title": "荒岛余生", "link": "https://pan.baidu.com/share/link?shareid=95799309&uk=609021507", "des": "文件数: 1,分享时间: 2017-09-08T10:23:12, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=95799309&uk=609021507", "host": "pan.baidu.com", "more": null}, {"title": "荒岛余生", "link": "https://pan.baidu.com/share/link?shareid=2691056461&uk=609021507", "des": "文件数: 1,分享时间: 2017-09-18T09:41:22, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=2691056461&uk=609021507", "host": "pan.baidu.com", "more": null}, {"title": "荒岛余生", "link": "https://pan.baidu.com/share/link?shareid=1098881695&uk=609021507", "des": "文件数: 1,分享时间: 2017-06-07T22:44:26, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=1098881695&uk=609021507", "host": "pan.baidu.com", "more": null}, {"title": "荒岛余生", "link": "https://pan.baidu.com/share/link?shareid=3364586732&uk=609021507", "des": "文件数: 1,分享时间: 2017-06-13T18:51:49, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=3364586732&uk=609021507", "host": "pan.baidu.com", "more": null}], "count": 810, "q": "荒岛余生 ", "p": "1", "runtime": 0.5, "from": "baidu"}, "listcount": 10, "description": "荒岛余生 相关信息"} -------------------------------------------------------------------------------- /doubanmovie/data/cached_pansou/蝴蝶 .json: -------------------------------------------------------------------------------- 1 | {"list": {"data": [{"title": "蝴蝶", "link": "https://pan.baidu.com/share/link?shareid=1849672524&uk=2200117301", "des": "文件数: 1,分享时间: 2014-12-26T18:51:27, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=1849672524&uk=2200117301", "host": "pan.baidu.com", "more": null}, {"title": "蝴蝶", "link": "https://pan.baidu.com/share/link?shareid=3888540241&uk=3819391060", "des": "文件数: 1,分享时间: 2016-12-09T22:09:17, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=3888540241&uk=3819391060", "host": "pan.baidu.com", "more": null}, {"title": "蝴蝶", "link": "https://pan.baidu.com/share/link?shareid=114199422&uk=4051721367", "des": "文件数: 1,分享时间: 2016-01-24T18:53:43, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=114199422&uk=4051721367", "host": "pan.baidu.com", "more": null}, {"title": "蝴蝶", "link": "https://pan.baidu.com/share/link?shareid=386756287&uk=3993130536", "des": "文件数: 1,分享时间: 2015-10-09T22:36:09, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=386756287&uk=3993130536", "host": "pan.baidu.com", "more": null}, {"title": "蝴蝶", "link": "https://pan.baidu.com/share/link?shareid=943712282&uk=675111558", "des": "文件数: 1,分享时间: 2016-08-29T23:04:38, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=943712282&uk=675111558", "host": "pan.baidu.com", "more": null}, {"title": "蝴蝶", "link": "https://pan.baidu.com/share/link?shareid=1093344399&uk=3542931456", "des": "文件数: 1,分享时间: 2015-02-12T22:00:42, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=1093344399&uk=3542931456", "host": "pan.baidu.com", "more": null}, {"title": "蝴蝶", "link": "https://pan.baidu.com/share/link?shareid=1250591644&uk=1459987268", "des": "文件数: 1,分享时间: 2016-06-30T09:14:27, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=1250591644&uk=1459987268", "host": "pan.baidu.com", "more": null}, {"title": "蝴蝶", "link": "https://pan.baidu.com/share/link?shareid=16763128&uk=3849557806", "des": "文件数: 1,分享时间: 2015-09-06T14:14:01, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=16763128&uk=3849557806", "host": "pan.baidu.com", "more": null}, {"title": "蝴蝶", "link": "https://pan.baidu.com/share/link?shareid=2723737423&uk=3157223751", "des": "文件数: 1,分享时间: 2013-11-13T18:22:26, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=2723737423&uk=3157223751", "host": "pan.baidu.com", "more": null}, {"title": "蝴蝶", "link": "https://pan.baidu.com/share/link?shareid=4217470321&uk=2938990077", "des": "文件数: 1,分享时间: 2017-03-28T02:09:30, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=4217470321&uk=2938990077", "host": "pan.baidu.com", "more": null}], "count": 2000, "q": "蝴蝶 ", "p": "1", "runtime": 0.5, "from": "baidu"}, "listcount": 10, "description": "蝴蝶 相关信息"} -------------------------------------------------------------------------------- /doubanmovie/data/cached_pansou/血钻 .json: -------------------------------------------------------------------------------- 1 | {"list": {"data": [{"title": "血钻", "link": "https://pan.baidu.com/share/link?shareid=2605659590&uk=740871509", "des": "文件数: 1,分享时间: 2016-10-05T16:25:40, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=2605659590&uk=740871509", "host": "pan.baidu.com", "more": null}, {"title": "血钻", "link": "https://pan.baidu.com/share/link?shareid=3394521028&uk=925544842", "des": "文件数: 1,分享时间: 2016-07-26T17:25:34, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=3394521028&uk=925544842", "host": "pan.baidu.com", "more": null}, {"title": "血钻", "link": "https://pan.baidu.com/share/link?shareid=2070888133&uk=609021507", "des": "文件数: 1,分享时间: 2017-08-22T11:34:52, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=2070888133&uk=609021507", "host": "pan.baidu.com", "more": null}, {"title": "血钻", "link": "https://pan.baidu.com/share/link?shareid=4225105640&uk=609021507", "des": "文件数: 1,分享时间: 2017-08-09T11:58:24, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=4225105640&uk=609021507", "host": "pan.baidu.com", "more": null}, {"title": "血钻", "link": "https://pan.baidu.com/share/link?shareid=3792085428&uk=609021507", "des": "文件数: 1,分享时间: 2017-08-18T09:23:02, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=3792085428&uk=609021507", "host": "pan.baidu.com", "more": null}, {"title": "血钻", "link": "https://pan.baidu.com/share/link?shareid=4126600303&uk=609021507", "des": "文件数: 1,分享时间: 2017-08-31T15:29:03, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=4126600303&uk=609021507", "host": "pan.baidu.com", "more": null}, {"title": "血钻", "link": "https://pan.baidu.com/share/link?shareid=525930478&uk=609021507", "des": "文件数: 1,分享时间: 2017-09-14T10:47:43, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=525930478&uk=609021507", "host": "pan.baidu.com", "more": null}, {"title": "血钻", "link": "https://pan.baidu.com/share/link?shareid=4142596758&uk=609021507", "des": "文件数: 1,分享时间: 2017-06-08T22:57:52, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=4142596758&uk=609021507", "host": "pan.baidu.com", "more": null}, {"title": "血钻", "link": "https://pan.baidu.com/share/link?shareid=687994999&uk=609021507", "des": "文件数: 1,分享时间: 2017-06-01T12:43:07, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=687994999&uk=609021507", "host": "pan.baidu.com", "more": null}, {"title": "血钻", "link": "https://pan.baidu.com/share/link?shareid=3086354255&uk=609021507", "des": "文件数: 1,分享时间: 2017-05-31T23:33:03, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=3086354255&uk=609021507", "host": "pan.baidu.com", "more": null}], "count": 2000, "q": "血钻 ", "p": "1", "runtime": 0.5, "from": "baidu"}, "listcount": 10, "description": "血钻 相关信息"} -------------------------------------------------------------------------------- /doubanmovie/data/cached_pansou/让子弹飞 .json: -------------------------------------------------------------------------------- 1 | {"list": {"data": [{"title": "让子弹飞", "link": "https://pan.baidu.com/share/link?shareid=1678312694&uk=609021507", "des": "文件数: 1,分享时间: 2017-08-07T08:54:50, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=1678312694&uk=609021507", "host": "pan.baidu.com", "more": null}, {"title": "让子弹飞", "link": "https://pan.baidu.com/share/link?shareid=2215525919&uk=609021507", "des": "文件数: 1,分享时间: 2017-08-25T00:00:07, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=2215525919&uk=609021507", "host": "pan.baidu.com", "more": null}, {"title": "让子弹飞", "link": "https://pan.baidu.com/share/link?shareid=4282197672&uk=609021507", "des": "文件数: 1,分享时间: 2017-08-26T21:30:25, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=4282197672&uk=609021507", "host": "pan.baidu.com", "more": null}, {"title": "让子弹飞", "link": "https://pan.baidu.com/share/link?shareid=2996054638&uk=609021507", "des": "文件数: 1,分享时间: 2017-09-06T09:29:19, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=2996054638&uk=609021507", "host": "pan.baidu.com", "more": null}, {"title": "让子弹飞", "link": "https://pan.baidu.com/share/link?shareid=192188661&uk=609021507", "des": "文件数: 1,分享时间: 2017-09-02T11:36:46, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=192188661&uk=609021507", "host": "pan.baidu.com", "more": null}, {"title": "让子弹飞", "link": "https://pan.baidu.com/share/link?shareid=317481197&uk=609021507", "des": "文件数: 1,分享时间: 2017-05-28T12:27:46, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=317481197&uk=609021507", "host": "pan.baidu.com", "more": null}, {"title": "让子弹飞", "link": "https://pan.baidu.com/share/link?shareid=608174175&uk=609021507", "des": "文件数: 1,分享时间: 2017-06-17T10:52:33, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=608174175&uk=609021507", "host": "pan.baidu.com", "more": null}, {"title": "让子弹飞", "link": "https://pan.baidu.com/share/link?shareid=1106147397&uk=609021507", "des": "文件数: 1,分享时间: 2017-06-07T22:50:53, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=1106147397&uk=609021507", "host": "pan.baidu.com", "more": null}, {"title": "让子弹飞", "link": "https://pan.baidu.com/share/link?shareid=609172849&uk=609021507", "des": "文件数: 1,分享时间: 2017-08-15T10:02:11, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=609172849&uk=609021507", "host": "pan.baidu.com", "more": null}, {"title": "让子弹飞", "link": "https://pan.baidu.com/share/link?shareid=666935341&uk=609021507", "des": "文件数: 1,分享时间: 2017-08-17T09:06:21, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=666935341&uk=609021507", "host": "pan.baidu.com", "more": null}], "count": 2000, "q": "让子弹飞 ", "p": "1", "runtime": 0.5, "from": "baidu"}, "listcount": 10, "description": "让子弹飞 相关信息"} -------------------------------------------------------------------------------- /doubanmovie/data/cached_pansou/谍影重重2 .json: -------------------------------------------------------------------------------- 1 | {"list": {"data": [{"title": "谍影重重", "link": "https://pan.baidu.com/share/link?shareid=144453654&uk=3980756933", "des": "文件数: 1,分享时间: 2017-02-14T03:26:55, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=144453654&uk=3980756933", "host": "pan.baidu.com", "more": null}, {"title": "谍丨影重重", "link": "https://pan.baidu.com/share/link?shareid=2799879487&uk=1796201939", "des": "文件数: 1,分享时间: 2017-06-01T16:20:23, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=2799879487&uk=1796201939", "host": "pan.baidu.com", "more": null}, {"title": "谍影重重", "link": "https://pan.baidu.com/pcloud/album/info?uk=442052191&album_id=8783206031530367527", "des": "专辑内文件数: 3,分享时间: 2016-03-18T10:07:00, 文件大小: 5925812.6728515625k", "blink": "https://pan.baidu.com/pcloud/album/info?uk=442052191&album_id=8783206031530367527", "host": "pan.baidu.com", "more": null}, {"title": "★《谍影重重1-5》", "link": "https://pan.baidu.com/share/link?shareid=2874673942&uk=239727952", "des": "文件数: 1,分享时间: 2017-08-18T16:32:39, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=2874673942&uk=239727952", "host": "pan.baidu.com", "more": null}, {"title": "谍丨影重重", "link": "https://pan.baidu.com/share/link?shareid=2078328530&uk=1330472352", "des": "文件数: 1,分享时间: 2017-09-06T07:21:33, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=2078328530&uk=1330472352", "host": "pan.baidu.com", "more": null}, {"title": "谍丨影重重", "link": "https://pan.baidu.com/share/link?shareid=2243744029&uk=2091055519", "des": "文件数: 1,分享时间: 2017-05-26T00:40:47, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=2243744029&uk=2091055519", "host": "pan.baidu.com", "more": null}, {"title": "谍影重重5.torrent", "link": "https://pan.baidu.com/share/link?shareid=172947738&uk=219081398", "des": "文件数: 1,分享时间: 2017-02-15T15:23:50, 文件大小: 82.0595703125k", "blink": "https://pan.baidu.com/share/link?shareid=172947738&uk=219081398", "host": "pan.baidu.com", "more": null}], "count": 2000, "q": "谍影重重2 ", "p": "1", "runtime": 0.5, "from": "baidu"}, "listcount": 7, "description": "谍影重重2 相关信息"} -------------------------------------------------------------------------------- /doubanmovie/data/cached_pansou/谍影重重3 .json: -------------------------------------------------------------------------------- 1 | {"list": {"data": [{"title": "谍影重重", "link": "https://pan.baidu.com/share/link?shareid=144453654&uk=3980756933", "des": "文件数: 1,分享时间: 2017-02-14T03:26:55, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=144453654&uk=3980756933", "host": "pan.baidu.com", "more": null}, {"title": "谍丨影重重", "link": "https://pan.baidu.com/share/link?shareid=2799879487&uk=1796201939", "des": "文件数: 1,分享时间: 2017-06-01T16:20:23, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=2799879487&uk=1796201939", "host": "pan.baidu.com", "more": null}, {"title": "谍影重重", "link": "https://pan.baidu.com/pcloud/album/info?uk=442052191&album_id=8783206031530367527", "des": "专辑内文件数: 3,分享时间: 2016-03-18T10:07:00, 文件大小: 5925812.6728515625k", "blink": "https://pan.baidu.com/pcloud/album/info?uk=442052191&album_id=8783206031530367527", "host": "pan.baidu.com", "more": null}, {"title": "★《谍影重重1-5》", "link": "https://pan.baidu.com/share/link?shareid=2874673942&uk=239727952", "des": "文件数: 1,分享时间: 2017-08-18T16:32:39, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=2874673942&uk=239727952", "host": "pan.baidu.com", "more": null}, {"title": "谍丨影重重", "link": "https://pan.baidu.com/share/link?shareid=2078328530&uk=1330472352", "des": "文件数: 1,分享时间: 2017-09-06T07:21:33, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=2078328530&uk=1330472352", "host": "pan.baidu.com", "more": null}, {"title": "谍丨影重重", "link": "https://pan.baidu.com/share/link?shareid=2243744029&uk=2091055519", "des": "文件数: 1,分享时间: 2017-05-26T00:40:47, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=2243744029&uk=2091055519", "host": "pan.baidu.com", "more": null}, {"title": "谍影重重5.torrent", "link": "https://pan.baidu.com/share/link?shareid=172947738&uk=219081398", "des": "文件数: 1,分享时间: 2017-02-15T15:23:50, 文件大小: 82.0595703125k", "blink": "https://pan.baidu.com/share/link?shareid=172947738&uk=219081398", "host": "pan.baidu.com", "more": null}], "count": 2000, "q": "谍影重重3 ", "p": "1", "runtime": 0.5, "from": "baidu"}, "listcount": 7, "description": "谍影重重3 相关信息"} -------------------------------------------------------------------------------- /doubanmovie/data/cached_pansou/谍影重重 .json: -------------------------------------------------------------------------------- 1 | {"list": {"data": [{"title": "谍影重重", "link": "https://pan.baidu.com/share/link?shareid=144453654&uk=3980756933", "des": "文件数: 1,分享时间: 2017-02-14T03:26:55, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=144453654&uk=3980756933", "host": "pan.baidu.com", "more": null}, {"title": "谍丨影重重", "link": "https://pan.baidu.com/share/link?shareid=2799879487&uk=1796201939", "des": "文件数: 1,分享时间: 2017-06-01T16:20:23, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=2799879487&uk=1796201939", "host": "pan.baidu.com", "more": null}, {"title": "谍影重重", "link": "https://pan.baidu.com/pcloud/album/info?uk=442052191&album_id=8783206031530367527", "des": "专辑内文件数: 3,分享时间: 2016-03-18T10:07:00, 文件大小: 5925812.6728515625k", "blink": "https://pan.baidu.com/pcloud/album/info?uk=442052191&album_id=8783206031530367527", "host": "pan.baidu.com", "more": null}, {"title": "★《谍影重重1-5》", "link": "https://pan.baidu.com/share/link?shareid=2874673942&uk=239727952", "des": "文件数: 1,分享时间: 2017-08-18T16:32:39, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=2874673942&uk=239727952", "host": "pan.baidu.com", "more": null}, {"title": "谍丨影重重", "link": "https://pan.baidu.com/share/link?shareid=2078328530&uk=1330472352", "des": "文件数: 1,分享时间: 2017-09-06T07:21:33, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=2078328530&uk=1330472352", "host": "pan.baidu.com", "more": null}, {"title": "谍丨影重重", "link": "https://pan.baidu.com/share/link?shareid=2243744029&uk=2091055519", "des": "文件数: 1,分享时间: 2017-05-26T00:40:47, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=2243744029&uk=2091055519", "host": "pan.baidu.com", "more": null}, {"title": "谍影重重5.torrent", "link": "https://pan.baidu.com/share/link?shareid=172947738&uk=219081398", "des": "文件数: 1,分享时间: 2017-02-15T15:23:50, 文件大小: 82.0595703125k", "blink": "https://pan.baidu.com/share/link?shareid=172947738&uk=219081398", "host": "pan.baidu.com", "more": null}], "count": 2000, "q": "谍影重重 ", "p": "1", "runtime": 0.5, "from": "baidu"}, "listcount": 7, "description": "谍影重重 相关信息"} -------------------------------------------------------------------------------- /doubanmovie/data/cached_pansou/辛德勒的名单 .json: -------------------------------------------------------------------------------- 1 | {"list": {"data": [{"title": "辛德勒的名单。", "link": "https://pan.baidu.com/share/link?shareid=601527744&uk=3106137638", "des": "文件数: 1,分享时间: 2015-12-08T14:51:17, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=601527744&uk=3106137638", "host": "pan.baidu.com", "more": null}, {"title": "辛德勒名单", "link": "https://pan.baidu.com/share/link?shareid=2152393128&uk=1026789719", "des": "文件数: 1,分享时间: 2016-01-23T12:45:56, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=2152393128&uk=1026789719", "host": "pan.baidu.com", "more": null}, {"title": "辛德勒名单", "link": "https://pan.baidu.com/share/link?shareid=76126071&uk=441722153", "des": "文件数: 1,分享时间: 2016-10-26T22:06:57, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=76126071&uk=441722153", "host": "pan.baidu.com", "more": null}, {"title": "辛德勒名单", "link": "https://pan.baidu.com/share/link?shareid=4147947656&uk=2768931742", "des": "文件数: 1,分享时间: 2017-04-22T21:09:34, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=4147947656&uk=2768931742", "host": "pan.baidu.com", "more": null}, {"title": "【美国】辛德勒名单.rmvb", "link": "https://pan.baidu.com/share/link?shareid=257364283&uk=1430954013", "des": "文件数: 1,分享时间: 2015-07-14T10:28:52, 文件大小: 704213.416015625k", "blink": "https://pan.baidu.com/share/link?shareid=257364283&uk=1430954013", "host": "pan.baidu.com", "more": null}, {"title": "辛德勒名单.mkv", "link": "https://pan.baidu.com/share/link?shareid=300218529&uk=587841182", "des": "文件数: 1,分享时间: 2017-02-17T21:49:25, 文件大小: 2793534.0146484375k", "blink": "https://pan.baidu.com/share/link?shareid=300218529&uk=587841182", "host": "pan.baidu.com", "more": null}, {"title": "辛德勒名单.mkv", "link": "https://pan.baidu.com/share/link?shareid=253067002&uk=271449358", "des": "文件数: 1,分享时间: 2016-11-05T21:02:19, 文件大小: 2793534.0146484375k", "blink": "https://pan.baidu.com/share/link?shareid=253067002&uk=271449358", "host": "pan.baidu.com", "more": null}, {"title": "辛德勒的名单.Schindlers List.1993", "link": "https://pan.baidu.com/share/link?shareid=3741994528&uk=306474339", "des": "文件数: 1,分享时间: 2017-04-10T07:36:05, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=3741994528&uk=306474339", "host": "pan.baidu.com", "more": null}, {"title": "IMDbTOP250.NO.8.辛德勒名单 .mkv", "link": "https://pan.baidu.com/share/link?shareid=1623461302&uk=1093518467", "des": "文件数: 1,分享时间: 2016-08-10T21:45:51, 文件大小: 2793534.0146484375k", "blink": "https://pan.baidu.com/share/link?shareid=1623461302&uk=1093518467", "host": "pan.baidu.com", "more": null}], "count": 1353, "q": "辛德勒的名单 ", "p": "1", "runtime": 0.5, "from": "baidu"}, "listcount": 9, "description": "辛德勒的名单 相关信息"} -------------------------------------------------------------------------------- /doubanmovie/data/cached_pansou/阳光灿烂的日子 .json: -------------------------------------------------------------------------------- 1 | {"list": {"data": [{"title": "79 阳光灿烂d日子", "link": "https://pan.baidu.com/share/link?shareid=2134897303&uk=1084372931", "des": "文件数: 1,分享时间: 2016-12-24T14:09:19, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=2134897303&uk=1084372931", "host": "pan.baidu.com", "more": null}, {"title": "阳光灿烂.avi", "link": "https://pan.baidu.com/share/link?shareid=3063746573&uk=1631372742", "des": "文件数: 1,分享时间: 2016-11-28T14:22:01, 文件大小: 1179606.1484375k", "blink": "https://pan.baidu.com/share/link?shareid=3063746573&uk=1631372742", "host": "pan.baidu.com", "more": null}, {"title": "〖-f-〗《阳光灿烂的日子》.In.the.Heat.of.the.Sun2004.DVDrip.x264.AC3-CMCT《之善寻正》.torrent", "link": "https://pan.baidu.com/share/link?shareid=3377085152&uk=239727952", "des": "文件数: 1,分享时间: 2016-12-27T21:37:59, 文件大小: 49.185546875k", "blink": "https://pan.baidu.com/share/link?shareid=3377085152&uk=239727952", "host": "pan.baidu.com", "more": null}, {"title": "19940125_今夜阳光灿烂", "link": "https://pan.baidu.com/share/link?shareid=1947195049&uk=1346119633", "des": "文件数: 1,分享时间: 2017-01-19T13:42:20, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=1947195049&uk=1346119633", "host": "pan.baidu.com", "more": null}, {"title": "费城永远阳光灿烂.Its.Always.Sunny.in.Philadelphia.S10E08.2015.HDTV.MiniSD-TLF.mkv等", "link": "https://pan.baidu.com/share/link?shareid=2854204889&uk=542310220", "des": "文件数: 6,分享时间: 2017-03-07T21:11:25, 文件大小: 1206457.3837890625k", "blink": "https://pan.baidu.com/share/link?shareid=2854204889&uk=542310220", "host": "pan.baidu.com", "more": null}, {"title": "让心灵阳光灿烂.pdf", "link": "https://pan.baidu.com/share/link?shareid=965101348&uk=941414814", "des": "文件数: 1,分享时间: 2014-01-05T06:07:05, 文件大小: 54.1328125k", "blink": "https://pan.baidu.com/share/link?shareid=965101348&uk=941414814", "host": "pan.baidu.com", "more": null}], "count": 2000, "q": "阳光灿烂的日子 ", "p": "1", "runtime": 0.5, "from": "baidu"}, "listcount": 6, "description": "阳光灿烂的日子 相关信息"} -------------------------------------------------------------------------------- /doubanmovie/data/cached_pansou/雨人 .json: -------------------------------------------------------------------------------- 1 | {"list": {"data": [{"title": "雨人", "link": "https://pan.baidu.com/share/link?shareid=2812274482&uk=3849557806", "des": "文件数: 1,分享时间: 2015-08-30T22:58:20, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=2812274482&uk=3849557806", "host": "pan.baidu.com", "more": null}, {"title": "雨人", "link": "https://pan.baidu.com/share/link?shareid=793081276&uk=3998475506", "des": "文件数: 1,分享时间: 2016-02-12T18:08:19, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=793081276&uk=3998475506", "host": "pan.baidu.com", "more": null}, {"title": "雨人", "link": "https://pan.baidu.com/share/link?shareid=2641151441&uk=3764406524", "des": "文件数: 1,分享时间: 2016-06-11T10:30:55, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=2641151441&uk=3764406524", "host": "pan.baidu.com", "more": null}, {"title": "雨人", "link": "https://pan.baidu.com/share/link?shareid=1919521195&uk=1733885966", "des": "文件数: 1,分享时间: 2015-07-25T13:34:07, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=1919521195&uk=1733885966", "host": "pan.baidu.com", "more": null}, {"title": "雨人", "link": "https://pan.baidu.com/share/link?shareid=4063364495&uk=453039291", "des": "文件数: 1,分享时间: 2013-09-02T20:10:40, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=4063364495&uk=453039291", "host": "pan.baidu.com", "more": null}, {"title": "雨人", "link": "https://pan.baidu.com/share/link?shareid=548944186&uk=3993130536", "des": "文件数: 1,分享时间: 2015-10-08T23:11:31, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=548944186&uk=3993130536", "host": "pan.baidu.com", "more": null}, {"title": "雨人", "link": "https://pan.baidu.com/share/link?shareid=1833586273&uk=1441773", "des": "文件数: 1,分享时间: 2013-09-29T22:09:34, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=1833586273&uk=1441773", "host": "pan.baidu.com", "more": null}, {"title": "雨人", "link": "https://pan.baidu.com/share/link?shareid=1942606523&uk=1733885966", "des": "文件数: 1,分享时间: 2015-07-25T13:37:58, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=1942606523&uk=1733885966", "host": "pan.baidu.com", "more": null}, {"title": "雨人", "link": "https://pan.baidu.com/share/link?shareid=937189341&uk=1902651040", "des": "文件数: 1,分享时间: 2016-03-20T13:23:36, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=937189341&uk=1902651040", "host": "pan.baidu.com", "more": null}, {"title": "雨人", "link": "https://pan.baidu.com/share/link?shareid=4102932210&uk=4115559669", "des": "文件数: 1,分享时间: 2016-09-28T20:29:33, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=4102932210&uk=4115559669", "host": "pan.baidu.com", "more": null}], "count": 2000, "q": "雨人 ", "p": "1", "runtime": 0.5, "from": "baidu"}, "listcount": 10, "description": "雨人 相关信息"} -------------------------------------------------------------------------------- /doubanmovie/data/cached_pansou/香水 .json: -------------------------------------------------------------------------------- 1 | {"list": {"data": [{"title": "香水", "link": "https://pan.baidu.com/share/link?shareid=3561031152&uk=207354018", "des": "文件数: 1,分享时间: 2016-07-27T21:49:42, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=3561031152&uk=207354018", "host": "pan.baidu.com", "more": null}, {"title": "香水", "link": "https://pan.baidu.com/share/link?shareid=4177240851&uk=932687986", "des": "文件数: 1,分享时间: 2016-06-08T08:45:40, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=4177240851&uk=932687986", "host": "pan.baidu.com", "more": null}, {"title": "香水", "link": "https://pan.baidu.com/share/link?shareid=4278611413&uk=2208648241", "des": "文件数: 1,分享时间: 2016-10-13T08:26:23, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=4278611413&uk=2208648241", "host": "pan.baidu.com", "more": null}, {"title": "香水", "link": "https://pan.baidu.com/share/link?shareid=710143097&uk=1344856499", "des": "文件数: 1,分享时间: 2016-10-30T09:28:45, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=710143097&uk=1344856499", "host": "pan.baidu.com", "more": null}, {"title": "香水", "link": "https://pan.baidu.com/share/link?shareid=2372098753&uk=2670459141", "des": "文件数: 1,分享时间: 2015-12-02T08:40:37, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=2372098753&uk=2670459141", "host": "pan.baidu.com", "more": null}, {"title": "香水", "link": "https://pan.baidu.com/share/link?shareid=1090100341&uk=5561471", "des": "文件数: 1,分享时间: 2016-08-01T17:09:27, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=1090100341&uk=5561471", "host": "pan.baidu.com", "more": null}, {"title": "香水", "link": "https://pan.baidu.com/share/link?shareid=3911858411&uk=932687986", "des": "文件数: 1,分享时间: 2016-05-16T08:25:17, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=3911858411&uk=932687986", "host": "pan.baidu.com", "more": null}, {"title": "香水", "link": "https://pan.baidu.com/share/link?shareid=153743779&uk=932687986", "des": "文件数: 1,分享时间: 2016-05-16T08:19:03, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=153743779&uk=932687986", "host": "pan.baidu.com", "more": null}, {"title": "香水", "link": "https://pan.baidu.com/share/link?shareid=270020814&uk=2621639807", "des": "文件数: 1,分享时间: 2017-05-24T23:10:25, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=270020814&uk=2621639807", "host": "pan.baidu.com", "more": null}, {"title": "香水", "link": "https://pan.baidu.com/share/link?shareid=3592229816&uk=1870504545", "des": "文件数: 1,分享时间: 2017-03-24T00:54:48, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=3592229816&uk=1870504545", "host": "pan.baidu.com", "more": null}], "count": 430, "q": "香水 ", "p": "1", "runtime": 0.5, "from": "baidu"}, "listcount": 10, "description": "香水 相关信息"} -------------------------------------------------------------------------------- /doubanmovie/data/cached_pansou/黑天鹅 .json: -------------------------------------------------------------------------------- 1 | {"list": {"data": [{"title": "黑天鹅", "link": "https://pan.baidu.com/share/link?shareid=1741557677&uk=5561471", "des": "文件数: 1,分享时间: 2016-08-14T18:45:08, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=1741557677&uk=5561471", "host": "pan.baidu.com", "more": null}, {"title": "黑天鹅", "link": "https://pan.baidu.com/share/link?shareid=592631430&uk=1213194440", "des": "文件数: 1,分享时间: 2016-08-14T02:02:18, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=592631430&uk=1213194440", "host": "pan.baidu.com", "more": null}, {"title": "黑天鹅", "link": "https://pan.baidu.com/share/link?shareid=3269511204&uk=1644462760", "des": "文件数: 1,分享时间: 2016-04-19T15:42:00, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=3269511204&uk=1644462760", "host": "pan.baidu.com", "more": null}, {"title": "黑天鹅", "link": "https://pan.baidu.com/share/link?shareid=3806622255&uk=727778634", "des": "文件数: 1,分享时间: 2017-03-11T17:18:21, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=3806622255&uk=727778634", "host": "pan.baidu.com", "more": null}, {"title": "黑天鹅", "link": "https://pan.baidu.com/share/link?shareid=3779500045&uk=3644101862", "des": "文件数: 1,分享时间: 2017-05-07T21:42:48, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=3779500045&uk=3644101862", "host": "pan.baidu.com", "more": null}, {"title": "黑天鹅", "link": "https://pan.baidu.com/share/link?shareid=1408913115&uk=1093532973", "des": "文件数: 1,分享时间: 2016-05-03T16:14:32, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=1408913115&uk=1093532973", "host": "pan.baidu.com", "more": null}, {"title": "黑天鹅", "link": "https://pan.baidu.com/share/link?shareid=1116077096&uk=2183062177", "des": "文件数: 1,分享时间: 2016-12-13T15:53:40, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=1116077096&uk=2183062177", "host": "pan.baidu.com", "more": null}, {"title": "黑天鹅", "link": "https://pan.baidu.com/share/link?shareid=114770092&uk=408227742", "des": "文件数: 1,分享时间: 2016-11-05T16:30:34, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=114770092&uk=408227742", "host": "pan.baidu.com", "more": null}, {"title": "黑天鹅", "link": "https://pan.baidu.com/share/link?shareid=2778801398&uk=1107820184", "des": "文件数: 1,分享时间: 2016-07-29T19:05:41, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=2778801398&uk=1107820184", "host": "pan.baidu.com", "more": null}, {"title": "《黑天鹅》", "link": "https://pan.baidu.com/share/link?shareid=2819649913&uk=3255920820", "des": "文件数: 1,分享时间: 2016-12-12T09:02:19, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=2819649913&uk=3255920820", "host": "pan.baidu.com", "more": null}], "count": 185, "q": "黑天鹅 ", "p": "1", "runtime": 0.5, "from": "baidu"}, "listcount": 10, "description": "黑天鹅 相关信息"} -------------------------------------------------------------------------------- /doubanmovie/data/cached_pansou/龙猫 .json: -------------------------------------------------------------------------------- 1 | {"list": {"data": [{"title": "龙猫", "link": "https://pan.baidu.com/share/link?shareid=1638453595&uk=139801957", "des": "文件数: 1,分享时间: 2016-01-08T01:29:37, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=1638453595&uk=139801957", "host": "pan.baidu.com", "more": null}, {"title": "龙猫", "link": "https://pan.baidu.com/share/link?shareid=1771840313&uk=3073055985", "des": "文件数: 1,分享时间: 2016-02-27T18:46:17, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=1771840313&uk=3073055985", "host": "pan.baidu.com", "more": null}, {"title": "龙猫", "link": "https://pan.baidu.com/share/link?shareid=2274649232&uk=3326727685", "des": "文件数: 1,分享时间: 2016-11-08T15:26:35, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=2274649232&uk=3326727685", "host": "pan.baidu.com", "more": null}, {"title": "龙猫", "link": "https://pan.baidu.com/share/link?shareid=3735275052&uk=878728055", "des": "文件数: 1,分享时间: 2016-03-12T10:20:53, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=3735275052&uk=878728055", "host": "pan.baidu.com", "more": null}, {"title": "龙猫", "link": "https://pan.baidu.com/share/link?shareid=3855203431&uk=3762236667", "des": "文件数: 1,分享时间: 2017-03-03T22:16:03, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=3855203431&uk=3762236667", "host": "pan.baidu.com", "more": null}, {"title": "龙猫", "link": "https://pan.baidu.com/share/link?shareid=2803117026&uk=756635525", "des": "文件数: 1,分享时间: 2016-10-18T09:05:47, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=2803117026&uk=756635525", "host": "pan.baidu.com", "more": null}, {"title": "龙猫", "link": "https://pan.baidu.com/share/link?shareid=2480638763&uk=475630542", "des": "文件数: 1,分享时间: 2016-07-03T17:21:47, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=2480638763&uk=475630542", "host": "pan.baidu.com", "more": null}, {"title": "龙猫", "link": "https://pan.baidu.com/share/link?shareid=526505602&uk=3379484742", "des": "文件数: 1,分享时间: 2015-08-12T19:07:35, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=526505602&uk=3379484742", "host": "pan.baidu.com", "more": null}, {"title": "龙猫", "link": "https://pan.baidu.com/share/link?shareid=3315656731&uk=2373288888", "des": "文件数: 1,分享时间: 2017-06-02T07:44:47, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=3315656731&uk=2373288888", "host": "pan.baidu.com", "more": null}, {"title": "龙猫", "link": "https://pan.baidu.com/share/link?shareid=185407033&uk=3966223021", "des": "文件数: 1,分享时间: 2016-10-01T11:44:15, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=185407033&uk=3966223021", "host": "pan.baidu.com", "more": null}], "count": 2000, "q": "龙猫 ", "p": "1", "runtime": 0.5, "from": "baidu"}, "listcount": 10, "description": "龙猫 相关信息"} -------------------------------------------------------------------------------- /doubanmovie/data/豆瓣电影250.xls: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ehco1996/Python-crawler/e89ef774653965c6b045a2c4fd101846ee8c62ef/doubanmovie/data/豆瓣电影250.xls -------------------------------------------------------------------------------- /doubanmovie/panspider.py: -------------------------------------------------------------------------------- 1 | ''' 2 | 根据关键字搜索对应的百度云下载链接 3 | 4 | 搜索地址:http://pansou.com/ 5 | ''' 6 | 7 | 8 | import os 9 | import json 10 | 11 | import requests 12 | from bs4 import BeautifulSoup 13 | 14 | api_url = 'http://api.pansou.com/search_new.php' 15 | 16 | 17 | def cached_json(keyword): 18 | '''缓存下载过的json数据''' 19 | folder = 'cached_pansou' 20 | filename = keyword + '.json' 21 | # 关联目录和文件名生成绝对路劲 22 | path = os.path.join(folder, filename) 23 | 24 | # 当该文件被下载过了,直接从内存读取文件并返回 25 | if os.path.exists(path): 26 | with open(path, 'r') as f: 27 | return json.load(f) 28 | else: 29 | # 建立 cached 文件夹 30 | if not os.path.exists(folder): 31 | os.makedirs(folder) 32 | # 发送网络请求,把结果/json写入文件 33 | data = { 34 | 'q': keyword, 35 | 'p': 1, 36 | } 37 | r = requests.post(api_url, data=data).json() 38 | with open(path, 'a') as f: 39 | json.dump(r, f, ensure_ascii=False) 40 | return r 41 | 42 | 43 | def parse_link(name): 44 | '''解析对应的下载连接''' 45 | j = cached_json(name) 46 | link = j['list']['data'][0]['link'] 47 | return link 48 | 49 | 50 | 51 | -------------------------------------------------------------------------------- /douyu/douyu_test.py: -------------------------------------------------------------------------------- 1 | ''' 2 | 利用第三方模块:danmu 3 | 抓取斗鱼弹幕 4 | ''' 5 | import time, sys 6 | 7 | from danmu import DanMuClient 8 | 9 | def pp(msg): 10 | print(msg.encode(sys.stdin.encoding, 'ignore'). 11 | decode(sys.stdin.encoding)) 12 | 13 | dmc = DanMuClient('https://www.douyu.com/208114') 14 | if not dmc.isValid(): print('Url not valid') 15 | 16 | @dmc.danmu 17 | def danmu_fn(msg): 18 | pp('[%s] %s' % (msg['NickName'], msg['Content'])) 19 | ''' 20 | @dmc.gift 21 | def gift_fn(msg): 22 | pp('[%s] sent a gift!' % msg['NickName']) 23 | 24 | @dmc.other 25 | def other_fn(msg): 26 | pp('Other message received') 27 | ''' 28 | dmc.start(blockThread = True) -------------------------------------------------------------------------------- /gamedownload/readme.md: -------------------------------------------------------------------------------- 1 | ys168网盘的文件地址是动态的 2 | 所以要下载文件的话需要清空cached文件夹下的缓存网页 3 | 这样才能获取到最新的动态地址 -------------------------------------------------------------------------------- /ithome/config.py: -------------------------------------------------------------------------------- 1 | ''' 2 | mongodb的配置文件 3 | ''' 4 | # 数据库url 5 | MONGO_URL = 'localhost' 6 | # 数据库名 7 | MONGO_DB = 'ithome' 8 | # 数据库表 9 | MONGO_TABLE = 'hotcomment_it' -------------------------------------------------------------------------------- /ithome/datahandleer.py: -------------------------------------------------------------------------------- 1 | ''' 2 | 热评数据处理 3 | 4 | 数据: apple.json 苹果分类下的新闻热评 共3672条数据 5 | 字段: 6 | ''' 7 | 8 | import json 9 | 10 | # 读取json数据 11 | with open('apple.json', 'r') as f: 12 | data = json.load(f) 13 | 14 | 15 | def city_count(data): 16 | ''' 17 | 统计城市出现次数 18 | return city 19 | ''' 20 | 21 | city = {} 22 | for i in data: 23 | loc = i['loc'] 24 | if loc in city.keys(): 25 | city[loc] += 1 26 | else: 27 | city[loc] = 1 28 | return city 29 | 30 | 31 | ''' 32 | # 获取所有城市出现的次数 33 | city = city_count(data) 34 | # 找到出现最多的前10名 35 | top_city = sorted(city.items(), key=lambda d: d[1], reverse=True)[:10] 36 | 37 | # 分离数据,方便生成图片 38 | name = [k for k, v in top_city] 39 | count = [v for k, v in top_city] 40 | print(name) 41 | print(count) 42 | ''' 43 | 44 | 45 | def field_ount(data, field): 46 | ''' 47 | 统计数据中字段名出现的次数 48 | return dic 49 | ''' 50 | 51 | dic = {} 52 | 53 | for i in data: 54 | f = i[field] 55 | if f in dic.keys(): 56 | dic[f] += 1 57 | else: 58 | dic[f] = 1 59 | return dic 60 | 61 | 62 | def find_top10(dic): 63 | ''' 64 | 找到传进字典的前10名 65 | 并返回对应的 key value list 66 | ''' 67 | top = sorted(dic.items(), key=lambda d: d[1], reverse=True)[:10] 68 | name = [k for k, v in top] 69 | count = [v for k, v in top] 70 | return name, count 71 | 72 | 73 | ''' 74 | # 获取所有手机厂商 75 | phone_com = field_ount(data,'phone_com') 76 | name,count = find_top10(phone_com) 77 | print(name,count) 78 | ''' 79 | 80 | ''' 81 | # 获取所有手机型号 82 | phone_model = field_ount(data,'phone_model') 83 | name,count = find_top10(phone_model) 84 | print(name,count) 85 | ''' 86 | 87 | 88 | def field_ount_time(data, field): 89 | ''' 90 | 统计数据中字段名出现的次数 91 | return dic 92 | 对于时间特殊处理 93 | ''' 94 | 95 | dic = {} 96 | 97 | for i in data: 98 | f = i[field].split(':')[0] + '点' 99 | if f in dic.keys(): 100 | dic[f] += 1 101 | else: 102 | dic[f] = 1 103 | return dic 104 | 105 | 106 | ''' 107 | # 获取所有发帖时间 108 | time = field_ount_time(data,'time') 109 | name,count = find_top10(time) 110 | print(name,count) 111 | ''' 112 | 113 | ''' 114 | # 获取热评大佬 115 | people = field_ount(data,'name') 116 | name,count = find_top10(people) 117 | print(name,count) 118 | ''' 119 | 120 | # 检测一下有没重复的段子也能上热评? 121 | p = field_ount(data, 'content') 122 | name, count = find_top10(p) 123 | 124 | for duanzi in name: 125 | print(duanzi) 126 | print('\n') 127 | -------------------------------------------------------------------------------- /ithome/pipeline.py: -------------------------------------------------------------------------------- 1 | ''' 2 | 处理数据 3 | 保存到mogodb 4 | ''' 5 | 6 | from pymongo import MongoClient 7 | from config import * 8 | 9 | client = MongoClient(MONGO_URL, connect=True) 10 | db = client[MONGO_DB] 11 | 12 | # 将记录写入数据库 13 | def save_to_mongo(result): 14 | if db[MONGO_TABLE].insert(result): 15 | print('存储成功', result) 16 | return True 17 | return False 18 | 19 | 20 | -------------------------------------------------------------------------------- /mazhifu/config.py: -------------------------------------------------------------------------------- 1 | # 登录用户名和密码 2 | USERNMAE = '' 3 | PASSWD = '' 4 | # mysql数据库配置 5 | TEST_DB = { 6 | 'host': '127.0.0.1', 7 | 'user': 'root', 8 | 'password': '', 9 | 'db': '' 10 | } 11 | -------------------------------------------------------------------------------- /mazhifu/readme.md: -------------------------------------------------------------------------------- 1 | ### 第三方支付平台 码支付账单 csv爬取 2 | 3 | 网站地址:https://codepay.fateqq.com/ 4 | 5 | 6 | 7 | ### 使用步骤 8 | 9 | **安装前置依赖** 10 | 11 | * `brew/yum/apt-get install python3` # 选择你喜欢的方式安装Python3 12 | * `pip install -r requirements.txt` # 安装第三方库 13 | 14 | 15 | **配置个人信息** 16 | 17 | * 打开`config.py`并配置好自己的mysql数据库信息,码支付的账号和密码 18 | * 保证数据库里有一张名为`91pay`的表 19 | 20 | 创建的sql语句如下: 21 | 22 | ```sql 23 | SET NAMES utf8mb4; 24 | SET FOREIGN_KEY_CHECKS = 0; 25 | 26 | -- ---------------------------- 27 | -- Table structure for cmf_pay_orders 28 | -- ---------------------------- 29 | DROP TABLE IF EXISTS `cmf_pay_orders`; 30 | CREATE TABLE `cmf_pay_orders` ( 31 | `id` int(11) NOT NULL AUTO_INCREMENT COMMENT 'ID', 32 | `user_id` int(11) DEFAULT NULL COMMENT '用户ID', 33 | `username` varchar(255) CHARACTER SET utf8mb4 DEFAULT NULL COMMENT '用户名', 34 | `way` varchar(255) CHARACTER SET utf8mb4 DEFAULT NULL COMMENT '支付方式', 35 | `status` varchar(255) CHARACTER SET utf8mb4 DEFAULT NULL COMMENT '订单状态', 36 | `trade_no` varchar(255) CHARACTER SET utf8mb4 DEFAULT NULL COMMENT '订单号', 37 | `raw_price` varchar(255) CHARACTER SET utf8mb4 DEFAULT NULL COMMENT '申请价格', 38 | `pay_price` varchar(255) CHARACTER SET utf8mb4 DEFAULT NULL COMMENT '支付价格', 39 | `date` datetime DEFAULT NULL COMMENT '订单日期', 40 | `cash` int(2) NOT NULL DEFAULT '0' COMMENT '提现状态', 41 | PRIMARY KEY (`id`) USING BTREE 42 | ) ENGINE=InnoDB AUTO_INCREMENT=115 DEFAULT CHARSET=utf8; 43 | 44 | SET FOREIGN_KEY_CHECKS = 1; 45 | ``` 46 | 47 | 48 | **运行程序** 49 | 50 | `python3 spider.py` 51 | 52 | ok,今天的账单信息已经入库了 -------------------------------------------------------------------------------- /mazhifu/requirements.txt: -------------------------------------------------------------------------------- 1 | lazyspider==0.0.2 2 | lxml==4.1.1 3 | PyMySQL==0.8.0 4 | requests==2.18.4 5 | selenium==3.8.0 6 | -------------------------------------------------------------------------------- /requestes基本使用/002.py: -------------------------------------------------------------------------------- 1 | import requests 2 | 3 | 4 | def getHtmlText(url): 5 | try: 6 | r = requests.get(url, timeout=30) 7 | # 如果状态码不是200 则应发HTTOError异常 8 | r.raise_for_status() 9 | # 设置正确的编码方式 10 | r.encoding = r.apparent_encoding() 11 | return r.text 12 | except: 13 | return "Something Wrong!" 14 | -------------------------------------------------------------------------------- /requestes基本使用/login.py: -------------------------------------------------------------------------------- 1 | import requests 2 | import bs4 3 | import os 4 | from PIL import Image 5 | 6 | 7 | def get_post_data(url): 8 | 9 | # 首先获取到登录界面的html 10 | html = requests.get(url 11 | ) 12 | 13 | soup = bs4.BeautifulSoup(html.text, 'lxml') 14 | 15 | # 找到form的验证参数 16 | __VIEWSTATE = soup.find('input', attrs={'name': '__VIEWSTATE'})['value'] 17 | 18 | 19 | # 下载验证码图片 20 | pic = requests.get( 21 | 'http://jw.***.edu.cn/(gxv2le55n4jswm45mkv14o2n)/CheckCode.aspx').content 22 | with open('ver_pic.png', 'wb') as f: 23 | f.write(pic) 24 | 25 | # 打开验证码图片 26 | image = Image.open('{}/ver_pic.png'.format(os.getcwd())) 27 | image.show() 28 | 29 | # 构造需要post的参数表 30 | data = {'txtUserName': '', 31 | 'Textbox1': '', 32 | 'TextBox2': '', 33 | 'txtSecretCode': "", 34 | '__VIEWSTATE': '', 35 | # 这里我将radio栏--学生 encode成gbk编码,以符合数据的要求 36 | 'RadioButtonList1': '\xd1\xa7\xc9\xfa', 37 | 'Button1': '', 38 | 'lbLanguage': '', 39 | 'hidPdrs': '', 40 | 'hidsc': '', } 41 | 42 | # 构造登录的post参数 43 | data['__VIEWSTATE'] = __VIEWSTATE 44 | data['txtSecretCode'] = input('请输入图片中的验证码') 45 | data['txtUserName'] = input("请输入学号") 46 | data['TextBox2'] = input("请输入密码") 47 | 48 | return data 49 | 50 | 51 | # 登录教务系统 52 | def login(url,data): 53 | # 通过requests库构造一个浏览器session,这能帮我们自动、持久的管理cookies, 54 | s = requests.session() 55 | s.post(url, data=data) 56 | return s 57 | 58 | 59 | 60 | base_url = 'http://jw.****.edu.cn/(gxv2le55n4jswm45mkv14o2n)/default2.aspx' 61 | data = get_post_data(base_url) 62 | print(data) 63 | # 模拟登录教务系统 64 | brow = login(base_url,data) 65 | 66 | test = brow.get( 67 | 'http://jw.****.edu.cn/(gxv2le55n4jswm45mkv14o2n)/xs_main.aspx?xh=14200406101') 68 | 69 | # 测试看看是否能找到登陆后的信息 70 | soup = bs4.BeautifulSoup(test.text, 'lxml') 71 | try: 72 | name = soup.find('span', attrs={'id': 'xhxm'}).text 73 | except: 74 | name = '登录失败 ' 75 | 76 | print(name) 77 | 78 | -------------------------------------------------------------------------------- /requestes基本使用/test.py: -------------------------------------------------------------------------------- 1 | import requests 2 | import bs4 3 | import re 4 | 5 | def get_html(url): 6 | try: 7 | r = requests.get(url, timeout=30) 8 | r.raise_for_status 9 | print(r.apparent_encoding) 10 | r.encoding = r.apparent_encoding 11 | return r.text 12 | except: 13 | return "Someting Wrong!" 14 | 15 | 16 | def get_txt_url(url): 17 | ''' 18 | 获取该小说每个章节的url地址: 19 | 20 | ''' 21 | url_list = [] 22 | html = get_html(url) 23 | soup = bs4.BeautifulSoup(html, 'lxml') 24 | lista = soup.find_all('dd') 25 | txt_name = soup.find('h1').text 26 | with open('/Users/ehco/Documents/codestuff/Python-crawler/小说/{}.txt'.format(txt_name),"a+") as f: 27 | f.write('小说标题:{} \n'.format(txt_name)) 28 | for url in lista: 29 | url_list.append('http://www.qu.la/' + url.a['href']) 30 | 31 | 32 | return url_list,txt_name 33 | 34 | 35 | 36 | url = 'http://www.qu.la/book/28888/' 37 | 38 | def get_one_txt(url,txt_name): 39 | html = get_html(url).replace('
','\n') 40 | soup = bs4.BeautifulSoup(html,'lxml') 41 | try: 42 | txt = soup.find('div',id='content').text.replace('chaptererror();','') 43 | title = soup.find('title').text 44 | 45 | with open('/Users/ehco/Documents/codestuff/Python-crawler/小说/{}.txt'.format(txt_name),"a") as f: 46 | f.write(title+'\n\n') 47 | f.write(txt) 48 | print('当前章节{} 已经下载完毕'.format(title)) 49 | except: 50 | print('someting wrong') 51 | 52 | 53 | 54 | a=[1,2,3,4,5] 55 | for i in a: 56 | print(a.index(i)/len(a)*100) -------------------------------------------------------------------------------- /sougou/configs.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf8 -*- 2 | 3 | # 本地服务器 4 | TEST_DB = { 5 | 'host': '127.0.0.1', 6 | 'user': 'root', 7 | 'password': '19960202', 8 | 'db': 'EhcoTestDb', } 9 | 10 | -------------------------------------------------------------------------------- /sougou/store_new/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ehco1996/Python-crawler/e89ef774653965c6b045a2c4fd101846ee8c62ef/sougou/store_new/__init__.py -------------------------------------------------------------------------------- /sougou/utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ehco1996/Python-crawler/e89ef774653965c6b045a2c4fd101846ee8c62ef/sougou/utils/__init__.py -------------------------------------------------------------------------------- /sougou/utils/tools.py: -------------------------------------------------------------------------------- 1 | import os 2 | import time 3 | import logging 4 | 5 | class UtilLogger(object): 6 | 7 | ''' 8 | 建立日志文件,并以特定格式输出日志 9 | Args: 10 | name:logger名字 11 | logfile_name 日志文件名 12 | level:调试级别,日志中只打印高于此级别的日志,例如logging.DEBUG、logging.info,此级别可以在set_level函数里设置 13 | ''' 14 | def __init__(self, name, logfile_name=None, level=logging.DEBUG): 15 | self.logger = logging.getLogger(name) 16 | self.logger.setLevel(level) 17 | formatter = logging.Formatter("%(asctime)s [%(levelname)s] %(name)s - %(message)s") 18 | ch = None 19 | if logfile_name is None: 20 | ch = logging.StreamHandler() 21 | else: 22 | logDir = os.path.dirname(logfile_name) 23 | if logDir != "" and not os.path.exists(logDir): 24 | os.mkdir(logDir) 25 | pass 26 | now = time.localtime() 27 | suffix = '.%d%02d%02d' % (now.tm_year, now.tm_mon, now.tm_mday) 28 | ch = logging.FileHandler(logfile_name+suffix) 29 | ch.setLevel(logging.DEBUG) 30 | ch.setFormatter(formatter) 31 | self.logger.addHandler(ch) 32 | 33 | def set_level(self,level): 34 | ''' 35 | 设置调试等级 36 | Args: 37 | level,字符串,可选debug、info、warning、error 38 | ''' 39 | if level.lower() == "debug": 40 | self.logger.setLevel(logging.DEBUG) 41 | elif level.lower() == "info": 42 | self.logger.setLevel(logging.INFO) 43 | elif level.lower() == "warning": 44 | self.logger.setLevel(logging.WARNING) 45 | elif level.lower() == "error": 46 | self.logger.setLevel(logging.ERROR) 47 | 48 | def debug(self, message): 49 | ''' 50 | 打印函数,最低调试级别的打印, 51 | Args: 52 | message为要打印的信息 53 | info/warn/error函数与此类似 54 | ''' 55 | self.logger.debug(message) 56 | 57 | def info(self,message): 58 | self.logger.info(message) 59 | 60 | def warn(self,message): 61 | self.logger.warn(message) 62 | 63 | def error(self,message): 64 | self.logger.error(message) 65 | 66 | 67 | # def test(): 68 | # log = UtilLogger('testname','test') 69 | # log.set_level('info') 70 | # log.debug('++++++++++++++') 71 | # log.info('--------------') 72 | # log.warn('==============') 73 | # log.error('_____________') 74 | # if __name__ == '__main__': 75 | # test() 76 | -------------------------------------------------------------------------------- /toapi-91baby/.gitignore: -------------------------------------------------------------------------------- 1 | .idea/ 2 | # Byte-compiled / optimized / DLL files 3 | __pycache__/ 4 | *.py[cod] 5 | *$py.class 6 | .html/ 7 | # C extensions 8 | *.so 9 | 10 | # Distribution / packaging 11 | .Python 12 | env/ 13 | env27/ 14 | build/ 15 | develop-eggs/ 16 | dist/ 17 | downloads/ 18 | eggs/ 19 | .eggs/ 20 | lib/ 21 | lib64/ 22 | parts/ 23 | sdist/ 24 | var/ 25 | wheels/ 26 | *.egg-info/ 27 | .installed.cfg 28 | *.egg 29 | 30 | # PyInstaller 31 | # Usually these files are written by a python script from a template 32 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 33 | *.manifest 34 | *.spec 35 | 36 | # Installer logs 37 | pip-log.txt 38 | pip-delete-this-directory.txt 39 | 40 | # Unit test / coverage reports 41 | htmlcov/ 42 | .tox/ 43 | .coverage 44 | .coverage.* 45 | .cache 46 | nosetests.xml 47 | coverage.xml 48 | *.cover 49 | .hypothesis/ 50 | 51 | # Translations 52 | *.mo 53 | *.pot 54 | 55 | # Django stuff: 56 | *.log 57 | local_settings.py 58 | 59 | # Flask stuff: 60 | instance/ 61 | .webassets-cache 62 | 63 | # Scrapy stuff: 64 | .scrapy 65 | 66 | # Sphinx documentation 67 | docs/_build/ 68 | 69 | # PyBuilder 70 | target/ 71 | 72 | # Jupyter Notebook 73 | .ipynb_checkpoints 74 | 75 | # pyenv 76 | .python-version 77 | 78 | # celery beat schedule file 79 | celerybeat-schedule 80 | 81 | # SageMath parsed files 82 | *.sage.py 83 | 84 | # dotenv 85 | .env 86 | 87 | # virtualenv 88 | .venv 89 | venv/ 90 | ENV/ 91 | 92 | # Spyder project settings 93 | .spyderproject 94 | .spyproject 95 | 96 | # Rope project settings 97 | .ropeproject 98 | 99 | # mkdocs documentation 100 | /site 101 | 102 | # mypy 103 | .mypy_cache/ 104 | -------------------------------------------------------------------------------- /toapi-91baby/app.py: -------------------------------------------------------------------------------- 1 | import json 2 | 3 | from toapi import Api 4 | import requests 5 | 6 | from items.hotbook import HotBook 7 | from items.book import Book 8 | from items.search import Search 9 | from settings import MySettings 10 | 11 | 12 | 13 | 14 | 15 | api = Api('', settings=MySettings) 16 | api.register(HotBook) 17 | api.register(Book) 18 | api.register(Search) 19 | 20 | 21 | @api.server.app.route('/search/') 22 | def search_page(keyword): 23 | ''' 24 | 91bay新书论坛 25 | 搜索功能 26 | ''' 27 | data = { 28 | 'searchsel': 'forum', 29 | 'mod': 'forum', 30 | 'srchtype': 'title', 31 | 'srchtxt': keyword, 32 | } 33 | r = requests.post( 34 | 'http://91baby.mama.cn/search.php?searchsubmit=yes', data) 35 | r.encoding = 'utf8' 36 | html = r.text 37 | results = {} 38 | items = [Search] 39 | # 通过toapi的方法对网页进行解析 40 | for item in items: 41 | parsed_item = api.parse_item(html, item) 42 | results[item.__name__] = parsed_item 43 | # 返回json 44 | return api.server.app.response_class( 45 | response=json.dumps(results, ensure_ascii=False), 46 | status=200, 47 | mimetype='application/json' 48 | ) 49 | 50 | 51 | if __name__ == '__main__': 52 | api.serve() 53 | -------------------------------------------------------------------------------- /toapi-91baby/data.sqlite: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ehco1996/Python-crawler/e89ef774653965c6b045a2c4fd101846ee8c62ef/toapi-91baby/data.sqlite -------------------------------------------------------------------------------- /toapi-91baby/items/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ehco1996/Python-crawler/e89ef774653965c6b045a2c4fd101846ee8c62ef/toapi-91baby/items/__init__.py -------------------------------------------------------------------------------- /toapi-91baby/items/book.py: -------------------------------------------------------------------------------- 1 | ''' 2 | 解析91baby 小说内容页 3 | ''' 4 | 5 | from toapi import Item, XPath 6 | 7 | 8 | def strip(text): 9 | '''去除字符串里的空白字符''' 10 | blank_str = ['\u3000\u3000', '\xa0', '\r'] 11 | for i in blank_str: 12 | text = text.replace(i, '') 13 | return text 14 | 15 | 16 | def strip_list(l): 17 | ''' 18 | 删除列表中的短字符串 19 | ''' 20 | new_l = [] 21 | for ele in l: 22 | if len(ele) > 5 and '本帖最后由' not in ele: 23 | new_l.append(ele) 24 | return new_l 25 | 26 | 27 | class Book(Item): 28 | __base_url__ = 'http://91baby.mama.cn' 29 | 30 | title = XPath('//*[@id="wp"]/div[3]/text()[3]') 31 | author = XPath('//*[@id="wp"]/div[3]/text()[3]') 32 | total_page = XPath('//span[@class="pgt"]/div//a') 33 | contents = XPath('//td[@class="t_f"]') 34 | 35 | def clean_title(self, title): 36 | return title.split('《')[1].split('》')[0] 37 | 38 | def clean_author(self, author): 39 | index = author.find('作者:') + 3 40 | return author[index:] 41 | 42 | def clean_contents(self, contents): 43 | chapters = {} 44 | for index, item in enumerate(contents): 45 | content = strip(item.xpath('string(.)')) 46 | # 去掉开头废话 47 | if '当前被收藏数' not in content: 48 | chapters[index] = content 49 | book_contents = {} 50 | for k, v in chapters.items(): 51 | # 过滤超断行 52 | texts = strip_list(v.split('\n')) 53 | book_contents[k] = texts 54 | return book_contents 55 | 56 | def clean_total_page(self, total_page): 57 | try: 58 | for index, page in enumerate(total_page): 59 | num = page.xpath('./text()')[0] 60 | if num == '下一页': 61 | i = int(index) - 1 62 | break 63 | page = total_page[i].xpath('./text()')[0] 64 | if '...' in page: 65 | return int(page.replace('... ', '')) 66 | return int(page) 67 | except: 68 | return 1 69 | 70 | class Meta: 71 | source = None 72 | route = {'/book_id=:id?page=:page': '/thread-:id-:page-1.html'} 73 | -------------------------------------------------------------------------------- /toapi-91baby/items/hotbook.py: -------------------------------------------------------------------------------- 1 | ''' 2 | 解析91baby 新书热书列表页 3 | ''' 4 | 5 | from collections import OrderedDict 6 | from toapi import Item, XPath 7 | 8 | 9 | class MyItem(Item): 10 | @classmethod 11 | def parse(cls, html): 12 | """Parse html to json""" 13 | if cls.Meta.source is None: 14 | return cls._parse_item(html) 15 | else: 16 | sections = cls.Meta.source.parse(html, is_source=True) 17 | results = [] 18 | for section in sections: 19 | res = cls._parse_item(section) 20 | if res: 21 | results.append(res) 22 | return results 23 | 24 | @classmethod 25 | def _parse_item(cls, html): 26 | item = OrderedDict() 27 | for name, selector in cls.__selectors__.items(): 28 | try: 29 | item[name] = selector.parse(html) 30 | except Exception: 31 | item[name] = '' 32 | clean_method = getattr(cls, 'clean_%s' % name, None) 33 | if clean_method is not None: 34 | res = clean_method(cls, item[name]) 35 | if res == None: 36 | return None 37 | else: 38 | item[name] = res 39 | return item 40 | 41 | 42 | class HotBook(MyItem): 43 | __base_url__ = 'http://91baby.mama.cn' 44 | title = XPath('//a[@class="xst"]/text()[1]') 45 | author = XPath('//a[@class="xst"]/text()[1]') 46 | url = XPath('//a[@class="xst"]/@href') 47 | book_id = XPath('//a[@class="xst"]/@href') 48 | 49 | def clean_title(self, title): 50 | if '《' in title: 51 | return title[title.find('\u300a') + 1:title.find('\u300b')][:10] 52 | else: 53 | return None 54 | 55 | def clean_author(self, author): 56 | if ':' in author: 57 | return author[author.find(':') + 1:author.find('(')] 58 | elif ':' in author: 59 | return author[author.find(':') + 1:author.find('(')] 60 | else: 61 | return None 62 | 63 | def clean_book_id(self, book_id): 64 | return book_id.split('-')[1] 65 | 66 | class Meta: 67 | source = XPath('//tbody[@class="thread_tbody"]') 68 | route = {'/hotbook?page=:page': '/forum-171-:page.html'} 69 | -------------------------------------------------------------------------------- /toapi-91baby/items/search.py: -------------------------------------------------------------------------------- 1 | from toapi import Item, XPath 2 | 3 | 4 | class Search(Item): 5 | ''' 6 | 从搜索的界面解析出 7 | 书名 id 链接 简介 8 | ''' 9 | title = XPath('//h3/a/text()') 10 | book_id = XPath('//h3/a/@href') 11 | url = XPath('//h3/a/@href') 12 | content = XPath('//p[2]/text()') 13 | 14 | def clean_title(self, title): 15 | return ''.join(title) 16 | 17 | def clean_book_id(self, book_id): 18 | return book_id.split('-')[1] 19 | 20 | def clean_url(self, url): 21 | return url[:url.find('?')] 22 | 23 | class Meta: 24 | source = XPath('//li[@class="pbw"]') 25 | # 这里的route留空,防止重复注册路由 26 | route = {} 27 | -------------------------------------------------------------------------------- /toapi-91baby/settings.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | from toapi.cache import MemoryCache 4 | from toapi.settings import Settings 5 | 6 | 7 | class MySettings(Settings): 8 | """ 9 | Create custom configuration 10 | http://www.toapi.org/topics/settings/ 11 | """ 12 | 13 | cache = { 14 | 'cache_class': MemoryCache, 15 | 'cache_config': {}, 16 | 'serializer': None, 17 | 'ttl': 10, 18 | } 19 | storage = { 20 | "PATH": os.getcwd(), 21 | # 使用sqlite作为存储介质 22 | "DB_URL": 'sqlite:///data.sqlite', 23 | } 24 | web = { 25 | "with_ajax": False, 26 | "request_config": {}, 27 | "headers": None 28 | } 29 | -------------------------------------------------------------------------------- /toapi-91baby/test.py: -------------------------------------------------------------------------------- 1 | '''测试api的使用''' 2 | 3 | import sys 4 | import requests 5 | from prettytable import PrettyTable 6 | 7 | list_url = 'http://127.0.0.1:5000/hotbook?page={}' 8 | book_url = 'http://127.0.0.1:5000/book_id={}?page={}' 9 | 10 | 11 | def get_json_response(url): 12 | r = requests.get(url) 13 | return r.json() 14 | 15 | 16 | def print_table(header, rows): 17 | x = PrettyTable(header) 18 | for row in rows: 19 | x.add_row(row) 20 | print(x) 21 | 22 | 23 | def get_book_list(page): 24 | '''获取指定页码的书籍列表''' 25 | # 获取第一页的所有书籍信息 26 | page_json = get_json_response(list_url.format(page)) 27 | header = ['书号', '书名', '链接'] 28 | rows = [] 29 | for book in page_json['HotBook']: 30 | rows.append([book['book_id'], book['title'], book['url']]) 31 | # 打印第一页的信息 32 | print_table(header, rows) 33 | 34 | 35 | def get_book_content(book_id, page): 36 | # 获取书籍信息 37 | book_json = get_json_response(book_url.format(book_id, page)) 38 | book = book_json['Book'] 39 | # 打印书籍头 40 | header = ['书名', '作者', '总页数', '当前页'] 41 | rows = [[book['title'], book['author'], book['total_page'], page]] 42 | print_table(header, rows) 43 | # 打印书籍内容 44 | contents = book['contents'] 45 | key = input('要开始看小说么?y键开始\n\n') 46 | if key == 'y': 47 | for i in range(len(contents)): 48 | print(book['title'] + '第{}章节 \n\n'.format(i)) 49 | print(contents[i] + '\n\n') 50 | input('本章已经阅读完,任意键阅读下一章节!\n\n') 51 | 52 | key = input('本页小说已经全部阅读完毕,要看下一页么?y键确定\n\n') 53 | if key == 'y': 54 | page += 1 55 | get_book_content(book_id, page) 56 | else: 57 | sys.exit('退出程序...') 58 | 59 | 60 | def main(): 61 | while True: 62 | page = input( 63 | '想看第几页书的书? 请在下方输入页码 按回车键确定!q键退出 \n\n') 64 | if page == 'q': 65 | sys.exit() 66 | if page == 'y': 67 | book_id = input('请输入书号阅读书籍: \n') 68 | page = 1 69 | get_book_content(book_id, page) 70 | get_book_list(page) 71 | print('找到想看的书了?想进去瞧一眼么?输入 y 进入书号输入界面!\n\n') 72 | 73 | 74 | if __name__ == '__main__': 75 | main() 76 | -------------------------------------------------------------------------------- /toapi-91baby/wsgi.py: -------------------------------------------------------------------------------- 1 | from app import api 2 | 3 | app = api.server.app 4 | -------------------------------------------------------------------------------- /wenjuanxin/configs.py: -------------------------------------------------------------------------------- 1 | QUESTION_ID = 11231 2 | 3 | QUESTION_URL = "https://www.wjx.cn/jq/{}.aspx".format(QUESTION_ID) 4 | 5 | # 提交问卷选项的url 6 | POST_URL_MAP = "https://www.wjx.cn/joinnew/processjq.ashx?submittype=1&curID={}&t={}&starttime={}&rn={}" 7 | 8 | QUESTION_INFO = ''' 9 | 题目:{} 10 | 选项:{} 11 | 12 | 随机选择结果:{} 13 | 14 | ~~~~~~~~~~~~~~~~~~~~~~ 15 | ''' 16 | 17 | # 回答次数 18 | ANSWER_TIMES = 3 19 | -------------------------------------------------------------------------------- /wenjuanxin/spider.py: -------------------------------------------------------------------------------- 1 | import time 2 | from datetime import datetime 3 | from random import randint 4 | 5 | from requests_html import HTMLSession 6 | 7 | from configs import (QUESTION_ID, QUESTION_URL, POST_URL_MAP, 8 | QUESTION_INFO, ANSWER_TIMES) 9 | 10 | 11 | def parse_post_url(resp): 12 | ''' 13 | 解析出提交问卷的url 14 | ''' 15 | # 找到rn 16 | rn = int(resp.html.search('rndnum="{}"')[0].split('.')[0]) 17 | # 提交问卷的时间 18 | raw_t = round(time.time(), 3) 19 | t = int(str(raw_t).replace('.', '')) 20 | # 模拟开始答题时间 21 | starttime = datetime.fromtimestamp( 22 | int(raw_t) - randint(1, 60 * 3)).strftime("%Y/%m/%d %H:%M:%S") 23 | 24 | url = POST_URL_MAP.format(QUESTION_ID, t, starttime, rn) 25 | return url 26 | 27 | 28 | def parse_post_data(resp): 29 | ''' 30 | 解析出问题和选项 31 | 返回post_data 32 | ''' 33 | post_data = {'submitdata': ""} 34 | questions = resp.html.find('fieldset', first=True).find('.div_question') 35 | 36 | for i, q in enumerate(questions): 37 | title = q.find('.div_title_question_all', first=True).text 38 | choices = [t.text for t in q.find('label')] 39 | random_index = randint(0, len(choices) - 1) 40 | choice = choices[random_index] 41 | post_data['submitdata'] += '{}${}}}'.format(i+1, random_index+1) 42 | print(QUESTION_INFO.format(title, choices, choice)) 43 | time.sleep(0.5) 44 | # 去除最后一个不合法的`}` 45 | post_data['submitdata'] = post_data['submitdata'][:-1] 46 | return post_data 47 | 48 | 49 | def post_answer(session, url, data): 50 | ''' 51 | 提交答案 52 | ''' 53 | r = session.post(url, data) 54 | print('提交状态:{}'.format(r.status_code)) 55 | 56 | 57 | def simulate_survey(): 58 | ''' 59 | 模拟回答问卷 60 | ''' 61 | session = HTMLSession() 62 | resp = session.get(QUESTION_URL) 63 | url = parse_post_url(resp) 64 | data = parse_post_data(resp) 65 | post_answer(session, url, data) 66 | 67 | 68 | def main(): 69 | print('开始模拟填写问卷,共模拟{}次'.format(ANSWER_TIMES)) 70 | for i in range(ANSWER_TIMES): 71 | simulate_survey() 72 | sleep_time = randint(1, 60) 73 | print('第{}次问卷填写完毕,即将沉睡{}s'.format(i+1, sleep_time)) 74 | time.sleep(sleep_time) 75 | 76 | 77 | if __name__ == '__main__': 78 | main() 79 | -------------------------------------------------------------------------------- /zhihu/zhihu_easy/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ehco1996/Python-crawler/e89ef774653965c6b045a2c4fd101846ee8c62ef/zhihu/zhihu_easy/__init__.py -------------------------------------------------------------------------------- /zhihu/zhihu_easy/configs.py: -------------------------------------------------------------------------------- 1 | # 知乎账号密码认证 2 | USERNAME = '' 3 | PASSWD = '' 4 | AUTH = '' 5 | 6 | # 用户动态其实api地址 7 | START_URL = 'https://www.zhihu.com/api/v4/members/excited-vczh/activities?limit=8&after_id=1518606558&desktop=True' 8 | 9 | # 数据库配置 10 | LOCAL_DB = { 11 | 'host': '127.0.0.1', 12 | 'user': 'root', 13 | 'password': '', 14 | 'db': '' 15 | } 16 | 17 | # 抓取用户的标志 18 | USER_SIG = 'vczh' 19 | -------------------------------------------------------------------------------- /zhihu/zhihu_easy/db_tools.py: -------------------------------------------------------------------------------- 1 | import os 2 | from lazyspider.lazystore import LazyMysql 3 | 4 | from parse import parse_activities 5 | from configs import LOCAL_DB, USER_SIG 6 | 7 | BASE_DIR = os.path.dirname(os.path.abspath(__file__)) 8 | 9 | 10 | def json_to_db(): 11 | ''' 12 | json->mysql 13 | ''' 14 | store = LazyMysql(LOCAL_DB) 15 | for file in os.listdir(BASE_DIR+'/data/'): 16 | file_abs_path = BASE_DIR+'/data/'+file 17 | # 解析json格式的文件,筛选我们要的数据 18 | res = parse_activities(file_abs_path) 19 | for data in res: 20 | try: 21 | data.update({'username': USER_SIG}) 22 | store.save_one_data(data, 'zhihu_activities') 23 | except: 24 | print('error !!!!!!!!!') 25 | print('所有文件导入完毕') 26 | 27 | 28 | if __name__ == '__main__': 29 | json_to_db() 30 | -------------------------------------------------------------------------------- /zhihu/zhihu_easy/parse.py: -------------------------------------------------------------------------------- 1 | import json 2 | from datetime import datetime 3 | 4 | 5 | def parse_activities(file_path): 6 | ''' 7 | 解析用户动态数据 8 | rtype: 9 | list 10 | ''' 11 | with open(file_path) as f: 12 | try: 13 | data = json.load(f).get('data') 14 | except: 15 | print('{}文件载入失败'.format(file_path)) 16 | return [] 17 | res = [] 18 | for action in data: 19 | verb = action['verb'] 20 | if verb == 'ANSWER_VOTE_UP' or verb == 'ANSWER_CREATE': # 赞同/回答的行为 21 | question_id = action['target']['question']['id'] 22 | question_api_url = action['target']['question']['url'] 23 | question_name = action['target']['question']['title'] 24 | 25 | answer_id = action['target']['id'] 26 | answer_api_url = action['target']['url'] 27 | answer_content = action['target']['excerpt'] 28 | answer_voteup_count = action['target']['voteup_count'] 29 | create_time = datetime.fromtimestamp( 30 | action['target']['created_time']) 31 | 32 | elif verb == 'QUESTION_FOLLOW': # 关注问题的行为 33 | question_id = action['target']['id'] 34 | question_api_url = action['target']['url'] 35 | question_name = action['target']['title'] 36 | 37 | answer_id = '' 38 | answer_api_url = '' 39 | answer_content = '' 40 | answer_voteup_count = 0 41 | create_time = datetime.fromtimestamp( 42 | action['target']['created']) 43 | 44 | else: 45 | continue 46 | 47 | res.append({ 48 | 'question_id': question_id, 49 | 'question_name': question_name, 50 | 'question_api_url': question_api_url, 51 | 'answer_id': answer_id, 52 | 'answer_api_url': answer_api_url, 53 | 'answer_content': answer_content, 54 | 'verb': verb, 55 | 'answer_voteup_count': answer_voteup_count, 56 | 'create_time': create_time, }) 57 | return res 58 | -------------------------------------------------------------------------------- /zhihu/zhihu_easy/spider.py: -------------------------------------------------------------------------------- 1 | import json 2 | import time 3 | import os 4 | 5 | from client import ZhihuClient 6 | from configs import USERNAME, PASSWD, AUTH, START_URL 7 | 8 | BASE_DIR = os.path.dirname(os.path.abspath(__file__)) 9 | 10 | 11 | def download_activs_json(s, url, count=1): 12 | ''' 13 | 获取用户信息的json信息 14 | ''' 15 | res = s.get(url).json() 16 | with open(BASE_DIR+'/data/{}.json'.format(count), 'w') as f: 17 | f.write(json.dumps(res, ensure_ascii=False)) 18 | print('正在下载第{}份动态'.format(count)) 19 | count += 1 20 | time.sleep(1) 21 | # 递归下载 直到动态下载完毕 22 | if res['paging']['is_end'] == False: 23 | next_url = res['paging']['next'] 24 | download_activs_json(s, next_url, count) 25 | else: 26 | print('所有动态下载完毕') 27 | 28 | 29 | def download_activs(): 30 | # 登录知乎 31 | s = ZhihuClient(USERNAME, PASSWD).get_session() 32 | # 增加权限认证 33 | s.headers.update({'authorization': AUTH}) 34 | download_activs_json(s, START_URL) 35 | 36 | 37 | if __name__ == "__main__": 38 | download_activs() 39 | -------------------------------------------------------------------------------- /zhihu/zhihu_easy/tools.py: -------------------------------------------------------------------------------- 1 | import shutil 2 | 3 | import requests 4 | 5 | 6 | def get_image(url, path): 7 | res = requests.get(url, stream=True) 8 | with open(path, 'wb') as f: 9 | shutil.copyfileobj(res.raw, f) 10 | 11 | 12 | def save_html(text, name): 13 | with open(name, 'w') as f: 14 | f.write(text) 15 | -------------------------------------------------------------------------------- /浏览器模拟爬虫/001.py: -------------------------------------------------------------------------------- 1 | ''' 2 | 测试selenium模拟浏览器 3 | 和phantomjs无页面浏览器 4 | 5 | 导出PhantomJs浏览器帮助文档 6 | ''' 7 | 8 | from selenium import webdriver 9 | import sys 10 | 11 | browser = webdriver.PhantomJS() 12 | out = sys.stdout 13 | 14 | sys.stdout = open('browserHelp.txt','w') 15 | help(browser) 16 | sys.stdout.close() 17 | sys.stdout = out 18 | browser.quit() 19 | exit() -------------------------------------------------------------------------------- /浏览器模拟爬虫/baidu.py: -------------------------------------------------------------------------------- 1 | ''' 2 | 使用Selenium模拟浏览器 3 | 抓取百度查询结果 4 | ''' 5 | 6 | # 导入selenium模块中的web引擎 7 | from selenium import webdriver 8 | 9 | 10 | # 建立浏览器对象 ,通过Phantomjs 11 | browser = webdriver.PhantomJS() 12 | 13 | # 设置访问的url 14 | url = 'https://www.baidu.com' 15 | 16 | # 访问url 17 | browser.get(url) 18 | 19 | # 等待一定时间,让js脚本加载完毕 20 | browser.implicitly_wait(3) 21 | 22 | # 找到搜索框 23 | text = browser.find_element_by_id('kw') 24 | 25 | # 清空搜索框的文字 26 | text.clear() 27 | 28 | # 填写搜索框的文字 29 | text.send_keys('python') 30 | 31 | # 找到submit按钮 32 | button = browser.find_element_by_id('su') 33 | 34 | # 点击按钮 提交搜索请求 35 | button.submit() 36 | 37 | 38 | # 查看当前浏览器标题 39 | print(browser.title) 40 | 41 | # 以截图的方式查看浏览器的页面 42 | browser.save_screenshot('text.png') 43 | 44 | # 找到结果 结果保存为列表变量 45 | results = browser.find_elements_by_class_name('t') 46 | 47 | # 循环遍历找出每个结果的标题和url 48 | for result in results: 49 | print('标题:{} 超链接:{}'.format(result.text, 50 | result.find_element_by_tag_name('a').get_attribute('href'))) 51 | -------------------------------------------------------------------------------- /浏览器模拟爬虫/kuaiproxy.py: -------------------------------------------------------------------------------- 1 | ''' 2 | selenium模拟浏览器爬虫 3 | 4 | 爬取快代理:http://www.kuaidaili.com/ 5 | ''' 6 | 7 | from selenium import webdriver 8 | 9 | 10 | class Item(object): 11 | ''' 12 | 我们模拟Scrapy框架 13 | 写一个item类出来, 14 | 用来表示每一个爬到的代理 15 | ''' 16 | 17 | ip = None # ip地址 18 | port = None # 端口 19 | anonymous = None # 是否匿名 20 | type = None # http or https 21 | local = None # 物理地址 22 | speed = None # 速度 23 | 24 | class GetProxy(object): 25 | ''' 26 | 获取代理的类 27 | ''' 28 | 29 | def __init__(self): 30 | ''' 31 | 初始化整个类 32 | ''' 33 | self.starturl = 'http://www.kuaidaili.com/free/inha/' 34 | self.urls = self.get_urls() 35 | self.proxylist = self.get_proxy_list(self.urls) 36 | self.filename = 'proxy.txt' 37 | self.saveFile(self.filename,self.proxylist) 38 | 39 | def get_urls(self): 40 | ''' 41 | 返回一个代理url的列表 42 | ''' 43 | urls = [] 44 | for i in range(1,2): 45 | url = self.starturl+str(i) 46 | urls.append(url) 47 | return urls 48 | 49 | def get_proxy_list(self,urls): 50 | ''' 51 | 返回抓取到代理的列表 52 | 整个爬虫的关键 53 | ''' 54 | 55 | browser = webdriver.PhantomJS() 56 | proxy_list = [] 57 | 58 | 59 | for url in urls: 60 | browser.get(url) 61 | browser.implicitly_wait(3) 62 | # 找到代理table的位置 63 | elements = browser.find_elements_by_xpath('//tbody/tr') 64 | for element in elements: 65 | item = Item() 66 | item.ip = element.find_element_by_xpath('./td[1]').text 67 | item.port = element.find_element_by_xpath('./td[2]').text 68 | item.anonymous = element.find_element_by_xpath('./td[3]').text 69 | item.local = element.find_element_by_xpath('./td[4]').text 70 | item.speed = element.find_element_by_xpath('./td[5]').text 71 | print(item.ip) 72 | proxy_list.append(item) 73 | 74 | browser.quit() 75 | return proxy_list 76 | 77 | def saveFile(self,filename,proxy_list): 78 | ''' 79 | 将爬取到的结果写到本地 80 | ''' 81 | with open(filename,'w') as f: 82 | for item in proxy_list: 83 | 84 | f.write(item.ip + '\t') 85 | f.write(item.port + '\t') 86 | f.write(item.anonymous + '\t') 87 | f.write(item.local + '\t') 88 | f.write(item.speed + '\n\n') 89 | 90 | 91 | if __name__ =='__main__': 92 | Get = GetProxy() -------------------------------------------------------------------------------- /美食杰/spider.py: -------------------------------------------------------------------------------- 1 | ''' 2 | 美食节 各地小吃爬虫 3 | 主页url: http://www.meishij.net/ 4 | 排行榜url: http://top.meishi.cc/lanmu.php?cid=78 5 | ''' 6 | 7 | # 导入相关库 8 | import requests 9 | from bs4 import BeautifulSoup 10 | 11 | 12 | # 排行榜入口url 13 | Top_food_url = 'http://top.meishi.cc/lanmu.php?cid=3' 14 | 15 | # 家常菜谱入口url 16 | Home_food_url = 'http://top.meishi.cc/lanmu.php?cid=13' 17 | 18 | # 中华菜系入口url 19 | China_food_url = 'http://top.meishi.cc/lanmu.php?cid=2' 20 | 21 | # 外国菜入口url 22 | Foreign_food_url = 'http://top.meishi.cc/lanmu.php?cid=10' 23 | 24 | 25 | def get_html_text(url): 26 | '''获取html文本''' 27 | try: 28 | r = requests.get(url, timeout=3) 29 | r.raise_for_status 30 | r.encoding = r.apparent_encoding 31 | return r.text 32 | except: 33 | return 'error' 34 | 35 | 36 | def parse_city_id(url): 37 | '''解析对应的城市排行榜连接''' 38 | 39 | res = [] 40 | html = get_html_text(url) 41 | # 做一个简单的判断 42 | if html != 'error': 43 | soup = BeautifulSoup(html, 'lxml') 44 | # 定位到 全国各地特色小吃排行榜分类,
45 | cityids = soup.find('div', class_='rank_content_top') 46 | for city in cityids.find_all('a'): 47 | res.append({'name': city.text, 'url': city['href']}) 48 | return res 49 | else: 50 | print('error !!!!') 51 | 52 | 53 | def parse_food_info(url): 54 | '''解析对应的美食信息''' 55 | 56 | html = get_html_text(url) 57 | 58 | if html != 'error': 59 | soup = BeautifulSoup(html, 'lxml') 60 | # 定位到具体排行榜的位置 61 | foods = soup.find('div', class_='rank_content_top10_wraper') 62 | # 开始解析信息 63 | for food in foods.find_all('li'): 64 | # 寻找 食品名、做法链接、图片链接 65 | content = food.find('a', class_='img') 66 | name = content['title'] 67 | detial_url = content['href'] 68 | img_url = content.img['src'] 69 | print('正在解析美食:{}'.format(name)) 70 | # 构造一个生成器,分别返回 食物名,做法链接,图片链接 71 | yield name, detial_url, img_url 72 | else: 73 | print('error !!!!') 74 | 75 | 76 | def main(): 77 | '''程序入口''' 78 | # 构造所有起始url列表 79 | url_list = [Top_food_url, Home_food_url, China_food_url, Foreign_food_url] 80 | # 找到所有城市排行榜的url 81 | for url in url_list: 82 | # 找到该分类下的所有cid 83 | res = parse_city_id(url) 84 | for page in res: 85 | # 找到菜系名称 86 | name = page['name'] 87 | # 利用生成器迭代返回结果 88 | for food_name, detail_url, img_url in parse_food_info(page['url']): 89 | # save data 90 | print('菜系:{}\t名字:{}\n做法:{}\n图片:{}'.format( 91 | name, food_name, detail_url, img_url)) 92 | 93 | 94 | main() 95 | -------------------------------------------------------------------------------- /腾讯漫画/downloder.py: -------------------------------------------------------------------------------- 1 | import os 2 | import json 3 | 4 | import requests 5 | 6 | 7 | def download_img(name, url): 8 | with open(name, 'wb') as f: 9 | f.write(requests.get(url).content) 10 | 11 | 12 | def download_comic(comic_name, comic_id): 13 | 14 | # 读取漫画信息 15 | json_file_name = "{}.json".format(comic_id) 16 | with open(json_file_name, 'r') as f: 17 | data = json.load(f) 18 | 19 | # 创建漫画目录 20 | if not os.path.exists(comic_name): 21 | os.mkdir(comic_name) 22 | 23 | for k, v in data.items(): 24 | title = k + '-' + v['title'] 25 | 26 | # 创建章节目录 27 | path = os.path.join(comic_name, title) 28 | if not os.path.exists(path): 29 | os.mkdir(path) 30 | for index, v in enumerate(v['pics']): 31 | name = os.path.join(path, "{}.png".format(index)) 32 | download_img(name, v['url']) 33 | print(title, '下载完毕') 34 | 35 | 36 | def main(): 37 | comic_name = "女巫" 38 | comic_id = 632784 39 | print('开始下载漫画:', comic_name) 40 | download_comic(comic_name, comic_id) 41 | 42 | 43 | if __name__ == '__main__': 44 | main() 45 | -------------------------------------------------------------------------------- /豆瓣影评/锤神3/config.py: -------------------------------------------------------------------------------- 1 | 2 | EHCO_DB = { 3 | 'host': '127.0.0.1', 4 | 'user': 'root', 5 | 'password': 'xxx', 6 | 'db': 'EhcoTestDb' 7 | } 8 | -------------------------------------------------------------------------------- /豆瓣影评/锤神3/play_data.py: -------------------------------------------------------------------------------- 1 | ''' 2 | 对抓取的影评数据 3 | 进行基本的分析统计 4 | 最后并生成词云 5 | ''' 6 | 7 | ''' 8 | 01 最早时间 - - 因为国内外上映时间不一 9 | 统计哪天的评论数量最多 10 | 02 推荐程度 - - 前10000 频率统计 11 | 03 评论内容 - - 前100 评论内容词频分析,做成词云 12 | 13 | ''' 14 | # 导入配置文件和数据库支持 15 | import config 16 | from stroe import DbToMysql 17 | from datetime import datetime 18 | import jieba 19 | 20 | # 初始化数据库链接 21 | store = DbToMysql(config.EHCO_DB) 22 | 23 | 24 | def format_to_week(day): 25 | ''' 26 | 将形如这样的日期转换为周x 27 | ''' 28 | day_map = { 29 | 0: '周一', 30 | 1: '周二', 31 | 2: '周三', 32 | 3: '周四', 33 | 4: '周五', 34 | 5: '周六', 35 | 6: '周日', 36 | } 37 | week = datetime.strptime(day, "%Y-%m-%d").weekday() 38 | return day_map[week] 39 | 40 | 41 | 42 | # 统计评论出现的日期,不同日期下出现的评论数量 43 | date_list = store.find_all('GodOfHammer_1', 19000) 44 | 45 | # 建立统计dict 46 | dateSet = {} 47 | for data in date_list: 48 | week = format_to_week(data['time']) 49 | if week not in dateSet.keys(): 50 | dateSet[week] = 1 51 | else: 52 | dateSet[week] += 1 53 | print(dateSet) 54 | ''' 55 | 结果: 56 | {'周三': 192, '周四': 234, '周五': 4518, '周二': 109, '周六': 6219, '周日': 5441, '周一': 2287} 57 | ''' 58 | 59 | ''' 60 | # 查询点赞数量排名钱10000的留言的 推荐程度 61 | recommend_level_list = store.find_by_sort('GodOfHammer_1', 'vote', 10000) 62 | 63 | # 建立统计dict 64 | recommendSet = {} 65 | # 开始统计不同推荐程度出现的次数 66 | for data in recommend_level_list: 67 | if data['star'] not in recommendset.keys(): 68 | recommendSet[data['star']] = 1 69 | else: 70 | recommendSet[data['star']] += 1 71 | print(recommendSet) 72 | ''' 73 | 74 | ''' 75 | 76 | # 截取前100条热门评论并进行分词统计 77 | comment_data = store.find_by_sort('GodOfHammer_1', 'vote', '100') 78 | comment_detail_list = [] 79 | for data in comment_data: 80 | comment_detail_list.append(data['content']) 81 | # 利用结巴分词工具分词 82 | seg_list = jieba.cut(' '.join(comment_detail_list)) 83 | for word in seg_list: 84 | print (word) 85 | ''' 86 | 87 | 88 | --------------------------------------------------------------------------------