├── .gitignore
├── Beautiful Soup 爬虫
    ├── 001.py
    ├── 002.py
    ├── 003.py
    ├── baidutieba.py
    ├── bocai.py
    ├── demo.html
    ├── dianying.py
    ├── kouhong.py
    ├── myiron.py
    ├── parse_station.py
    ├── qiubai.py
    ├── stations.py
    ├── trainticket.py
    ├── xiaoshuopaihang.py
    ├── yueyintai.py
    ├── 反爬虫.py
    ├── 沪铜数据.py
    └── 贴吧帖子结构分析.html
├── Google-Image
    └── spider.py
├── README.md
├── Scrapy 爬虫框架
    ├── 001.py
    ├── biquge
    │   ├── biquge
    │   │   ├── __init__.py
    │   │   ├── items.py
    │   │   ├── middlewares.py
    │   │   ├── pipelines.py
    │   │   ├── settings.py
    │   │   └── spiders
    │   │   │   ├── __init__.py
    │   │   │   ├── sjzh.py
    │   │   │   └── xsphspider.py
    │   └── scrapy.cfg
    ├── demo.xml
    ├── mzitu
    │   ├── mzitu
    │   │   ├── __init__.py
    │   │   ├── items.py
    │   │   ├── middlewares.py
    │   │   ├── pipelines.py
    │   │   ├── settings.py
    │   │   └── spiders
    │   │   │   ├── __init__.py
    │   │   │   └── mezitu.py
    │   └── scrapy.cfg
    ├── proxy
    │   ├── proxy
    │   │   ├── __init__.py
    │   │   ├── items.py
    │   │   ├── middlewares.py
    │   │   ├── pipelines.py
    │   │   ├── settings.py
    │   │   └── spiders
    │   │   │   ├── __init__.py
    │   │   │   ├── dxdlspider.py
    │   │   │   └── kdlspider.py
    │   └── scrapy.cfg
    ├── test_proxy.py
    ├── weather
    │   ├── scrapy.cfg
    │   └── weather
    │   │   ├── __init__.py
    │   │   ├── data
    │   │       └── weather.json
    │   │   ├── items.py
    │   │   ├── middlewares.py
    │   │   ├── pipelines.py
    │   │   ├── settings.py
    │   │   └── spiders
    │   │       ├── SZtianqi.py
    │   │       └── __init__.py
    ├── xiubai
    │   ├── scrapy.cfg
    │   └── xiubai
    │   │   ├── __init__.py
    │   │   ├── items.py
    │   │   ├── middlewares.py
    │   │   ├── middlewares
    │   │       ├── __init__.py
    │   │       ├── coustomProxy.py
    │   │       ├── coustomUserAgent.py
    │   │       └── proxy.py
    │   │   ├── pipelines.py
    │   │   ├── settings.py
    │   │   └── spiders
    │   │       ├── __init__.py
    │   │       └── hotspider.py
    └── zimuku
    │   ├── scrapy.cfg
    │   └── zimuku
    │       ├── __init__.py
    │       ├── items.py
    │       ├── middlewares.py
    │       ├── pipelines.py
    │       ├── settings.py
    │       └── spiders
    │           ├── __init__.py
    │           └── demo.py
├── YHShop
    ├── citydict.py
    ├── cityid.html
    ├── handler.py
    ├── spider.py
    └── tools.py
├── alipay
    ├── alipay_v1.py
    ├── alipay_v2.py
    └── alipay_v3.py
├── doubanmovie
    ├── config.py
    ├── data.py
    ├── data
    │   ├── beautifulTop250.json
    │   ├── cached_douban
    │   │   ├── 0.html
    │   │   ├── 100.html
    │   │   ├── 125.html
    │   │   ├── 150.html
    │   │   ├── 175.html
    │   │   ├── 200.html
    │   │   ├── 225.html
    │   │   ├── 25.html
    │   │   ├── 50.html
    │   │   └── 75.html
    │   ├── cached_pansou
    │   │   ├── 7号房的礼物 .json
    │   │   ├── E.T. 外星人 .json
    │   │   ├── V字仇杀队 .json
    │   │   ├── 一一 .json
    │   │   ├── 一次别离 .json
    │   │   ├── 七宗罪 .json
    │   │   ├── 七武士 .json
    │   │   ├── 三傻大闹宝莱坞 .json
    │   │   ├── 上帝之城 .json
    │   │   ├── 上帝也疯狂 .json
    │   │   ├── 与狼共舞 .json
    │   │   ├── 东京物语 .json
    │   │   ├── 东邪西毒 .json
    │   │   ├── 两杆大烟枪 .json
    │   │   ├── 乱世佳人 .json
    │   │   ├── 二十二 .json
    │   │   ├── 人工智能 .json
    │   │   ├── 低俗小说 .json
    │   │   ├── 侧耳倾听 .json
    │   │   ├── 借东西的小人阿莉埃蒂 .json
    │   │   ├── 倩女幽魂 .json
    │   │   ├── 偷拐抢骗 .json
    │   │   ├── 傲慢与偏见 .json
    │   │   ├── 入殓师 .json
    │   │   ├── 再次出发之纽约遇见你 .json
    │   │   ├── 冰川时代 .json
    │   │   ├── 初恋这件小事 .json
    │   │   ├── 剪刀手爱德华 .json
    │   │   ├── 加勒比海盗 .json
    │   │   ├── 勇士 .json
    │   │   ├── 勇敢的心 .json
    │   │   ├── 勇闯夺命岛 .json
    │   │   ├── 十二怒汉 .json
    │   │   ├── 千与千寻 .json
    │   │   ├── 千钧一发 .json
    │   │   ├── 卡萨布兰卡 .json
    │   │   ├── 卢旺达饭店 .json
    │   │   ├── 发条橙 .json
    │   │   ├── 变脸 .json
    │   │   ├── 叫我第一名 .json
    │   │   ├── 可可西里 .json
    │   │   ├── 告白 .json
    │   │   ├── 哈利·波特与死亡圣器(下) .json
    │   │   ├── 哈利·波特与魔法石 .json
    │   │   ├── 哈尔的移动城堡 .json
    │   │   ├── 哪吒闹海 .json
    │   │   ├── 唐伯虎点秋香 .json
    │   │   ├── 喜剧之王 .json
    │   │   ├── 喜宴 .json
    │   │   ├── 国王的演讲 .json
    │   │   ├── 地球上的星星 .json
    │   │   ├── 城市之光 .json
    │   │   ├── 夜访吸血鬼 .json
    │   │   ├── 大卫·戈尔的一生 .json
    │   │   ├── 大话西游之大圣娶亲 .json
    │   │   ├── 大话西游之月光宝盒 .json
    │   │   ├── 大闹天宫 .json
    │   │   ├── 大鱼 .json
    │   │   ├── 天使爱美丽 .json
    │   │   ├── 天堂电影院 .json
    │   │   ├── 天空之城 .json
    │   │   ├── 头脑特工队 .json
    │   │   ├── 完美的世界 .json
    │   │   ├── 寿司之神 .json
    │   │   ├── 射雕英雄传之东成西就 .json
    │   │   ├── 小森林 冬春篇 .json
    │   │   ├── 小森林 夏秋篇 .json
    │   │   ├── 小鞋子 .json
    │   │   ├── 少年派的奇幻漂流 .json
    │   │   ├── 岁月神偷 .json
    │   │   ├── 巴黎淘气帮 .json
    │   │   ├── 布达佩斯大饭店 .json
    │   │   ├── 幸福终点站 .json
    │   │   ├── 幽灵公主 .json
    │   │   ├── 廊桥遗梦 .json
    │   │   ├── 当幸福来敲门 .json
    │   │   ├── 彗星来的那一夜 .json
    │   │   ├── 心灵捕手 .json
    │   │   ├── 心迷宫 .json
    │   │   ├── 忠犬八公物语 .json
    │   │   ├── 忠犬八公的故事 .json
    │   │   ├── 怦然心动 .json
    │   │   ├── 怪兽电力公司 .json
    │   │   ├── 恋恋笔记本 .json
    │   │   ├── 恐怖游轮 .json
    │   │   ├── 恐怖直播 .json
    │   │   ├── 情书 .json
    │   │   ├── 惊魂记 .json
    │   │   ├── 我是山姆 .json
    │   │   ├── 我爱你 .json
    │   │   ├── 战争之王 .json
    │   │   ├── 房间 .json
    │   │   ├── 拯救大兵瑞恩 .json
    │   │   ├── 指环王1：魔戒再现 .json
    │   │   ├── 指环王2：双塔奇兵 .json
    │   │   ├── 指环王3：王者无敌 .json
    │   │   ├── 控方证人 .json
    │   │   ├── 搏击俱乐部 .json
    │   │   ├── 摩登时代 .json
    │   │   ├── 撞车 .json
    │   │   ├── 放牛班的春天.json
    │   │   ├── 放牛班的春天 .json
    │   │   ├── 教父2 .json
    │   │   ├── 教父3 .json
    │   │   ├── 教父 .json
    │   │   ├── 断背山 .json
    │   │   ├── 新龙门客栈 .json
    │   │   ├── 无敌破坏王 .json
    │   │   ├── 无耻混蛋 .json
    │   │   ├── 无间道 .json
    │   │   ├── 时空恋旅人 .json
    │   │   ├── 星际穿越 .json
    │   │   ├── 春光乍泄 .json
    │   │   ├── 暖暖内含光 .json
    │   │   ├── 曾经 .json
    │   │   ├── 月球 .json
    │   │   ├── 朗读者 .json
    │   │   ├── 未麻的部屋 .json
    │   │   ├── 末代皇帝 .json
    │   │   ├── 末路狂花 .json
    │   │   ├── 本杰明·巴顿奇事 .json
    │   │   ├── 机器人总动员 .json
    │   │   ├── 杀人回忆 .json
    │   │   ├── 枪火 .json
    │   │   ├── 梦之安魂曲 .json
    │   │   ├── 楚门的世界 .json
    │   │   ├── 模仿游戏 .json
    │   │   ├── 步履不停 .json
    │   │   ├── 死亡诗社 .json
    │   │   ├── 沉默的羔羊 .json
    │   │   ├── 泰坦尼克号 .json
    │   │   ├── 活着 .json
    │   │   ├── 浪潮 .json
    │   │   ├── 海上钢琴师 .json
    │   │   ├── 海洋 .json
    │   │   ├── 海盗电台 .json
    │   │   ├── 海豚湾 .json
    │   │   ├── 海边的曼彻斯特 .json
    │   │   ├── 消失的爱人 .json
    │   │   ├── 源代码 .json
    │   │   ├── 熔炉 .json
    │   │   ├── 燃情岁月 .json
    │   │   ├── 燕尾蝶 .json
    │   │   ├── 爆裂鼓手 .json
    │   │   ├── 爱·回家 .json
    │   │   ├── 爱在午夜降临前 .json
    │   │   ├── 爱在日落黄昏时 .json
    │   │   ├── 爱在黎明破晓前 .json
    │   │   ├── 牯岭街少年杀人事件 .json
    │   │   ├── 狩猎 .json
    │   │   ├── 狮子王 .json
    │   │   ├── 猜火车 .json
    │   │   ├── 猫鼠游戏 .json
    │   │   ├── 玛丽和马克思 .json
    │   │   ├── 玩具总动员3 .json
    │   │   ├── 甜蜜蜜 .json
    │   │   ├── 电锯惊魂 .json
    │   │   ├── 疯狂动物城 .json
    │   │   ├── 疯狂原始人 .json
    │   │   ├── 疯狂的石头 .json
    │   │   ├── 盗梦空间 .json
    │   │   ├── 看不见的客人 .json
    │   │   ├── 真爱至上 .json
    │   │   ├── 碧海蓝天 .json
    │   │   ├── 神偷奶爸 .json
    │   │   ├── 禁闭岛 .json
    │   │   ├── 秒速5厘米 .json
    │   │   ├── 穆赫兰道 .json
    │   │   ├── 穿条纹睡衣的男孩 .json
    │   │   ├── 穿越时空的少女 .json
    │   │   ├── 窃听风暴 .json
    │   │   ├── 第六感 .json
    │   │   ├── 素媛 .json
    │   │   ├── 红辣椒 .json
    │   │   ├── 纵横四海 .json
    │   │   ├── 终结者2：审判日 .json
    │   │   ├── 绿里奇迹 .json
    │   │   ├── 罗生门 .json
    │   │   ├── 罗马假日 .json
    │   │   ├── 美丽人生 .json
    │   │   ├── 美丽心灵 .json
    │   │   ├── 美国丽人 .json
    │   │   ├── 美国往事 .json
    │   │   ├── 肖申克的救赎 .json
    │   │   ├── 致命ID .json
    │   │   ├── 致命魔术 .json
    │   │   ├── 花样年华 .json
    │   │   ├── 英国病人 .json
    │   │   ├── 英雄本色 .json
    │   │   ├── 荒岛余生 .json
    │   │   ├── 荒蛮故事 .json
    │   │   ├── 荒野生存 .json
    │   │   ├── 菊次郎的夏天 .json
    │   │   ├── 萤火之森 .json
    │   │   ├── 萤火虫之墓 .json
    │   │   ├── 蓝色大门 .json
    │   │   ├── 虎口脱险 .json
    │   │   ├── 蝙蝠侠：黑暗骑士 .json
    │   │   ├── 蝙蝠侠：黑暗骑士崛起 .json
    │   │   ├── 蝴蝶 .json
    │   │   ├── 蝴蝶效应 .json
    │   │   ├── 血钻 .json
    │   │   ├── 被嫌弃的松子的一生 .json
    │   │   ├── 被解救的姜戈 .json
    │   │   ├── 西西里的美丽传说 .json
    │   │   ├── 触不可及 .json
    │   │   ├── 让子弹飞 .json
    │   │   ├── 记忆碎片 .json
    │   │   ├── 谍影重重2 .json
    │   │   ├── 谍影重重3 .json
    │   │   ├── 谍影重重 .json
    │   │   ├── 贫民窟的百万富翁 .json
    │   │   ├── 超能陆战队 .json
    │   │   ├── 超脱 .json
    │   │   ├── 辛德勒的名单 .json
    │   │   ├── 辩护人 .json
    │   │   ├── 达拉斯买家俱乐部 .json
    │   │   ├── 迁徙的鸟 .json
    │   │   ├── 这个杀手不太冷 .json
    │   │   ├── 这个男人来自地球 .json
    │   │   ├── 追随 .json
    │   │   ├── 遗愿清单 .json
    │   │   ├── 重庆森林 .json
    │   │   ├── 钢琴家 .json
    │   │   ├── 闻香识女人 .json
    │   │   ├── 阳光姐妹淘 .json
    │   │   ├── 阳光灿烂的日子 .json
    │   │   ├── 阿凡达 .json
    │   │   ├── 阿甘正传 .json
    │   │   ├── 阿飞正传 .json
    │   │   ├── 雨中曲 .json
    │   │   ├── 雨人 .json
    │   │   ├── 霸王别姬 .json
    │   │   ├── 青蛇 .json
    │   │   ├── 非常嫌疑犯 .json
    │   │   ├── 音乐之声 .json
    │   │   ├── 风之谷 .json
    │   │   ├── 飞屋环游记 .json
    │   │   ├── 飞越疯人院 .json
    │   │   ├── 饮食男女 .json
    │   │   ├── 香水 .json
    │   │   ├── 驯龙高手 .json
    │   │   ├── 鬼子来了 .json
    │   │   ├── 魂断蓝桥 .json
    │   │   ├── 魔女宅急便 .json
    │   │   ├── 麦兜故事 .json
    │   │   ├── 黄金三镖客 .json
    │   │   ├── 黑天鹅 .json
    │   │   ├── 黑客帝国3：矩阵革命 .json
    │   │   ├── 黑客帝国 .json
    │   │   └── 龙猫 .json
    │   ├── doubanTop250.json
    │   └── 豆瓣电影250.xls
    ├── doubanspider.py
    └── panspider.py
├── douyu
    ├── douyu_danmu.py
    └── douyu_test.py
├── gamedownload
    ├── fcspider.py
    └── readme.md
├── ithome
    ├── apple.json
    ├── city_count_res.py
    ├── config.py
    ├── datahandleer.py
    ├── pipeline.py
    └── spider.py
├── mazhifu
    ├── config.py
    ├── readme.md
    ├── requirements.txt
    └── spider.py
├── p2p
    ├── .vscode
    │   └── launch.json
    ├── data
    │   ├── 1.json
    │   ├── 10.json
    │   ├── 100.json
    │   ├── 101.json
    │   ├── 102.json
    │   ├── 103.json
    │   ├── 104.json
    │   ├── 105.json
    │   ├── 106.json
    │   ├── 107.json
    │   ├── 108.json
    │   ├── 109.json
    │   ├── 11.json
    │   ├── 110.json
    │   ├── 111.json
    │   ├── 112.json
    │   ├── 113.json
    │   ├── 114.json
    │   ├── 115.json
    │   ├── 116.json
    │   ├── 117.json
    │   ├── 118.json
    │   ├── 119.json
    │   ├── 12.json
    │   ├── 120.json
    │   ├── 121.json
    │   ├── 122.json
    │   ├── 123.json
    │   ├── 124.json
    │   ├── 125.json
    │   ├── 126.json
    │   ├── 127.json
    │   ├── 128.json
    │   ├── 129.json
    │   ├── 13.json
    │   ├── 130.json
    │   ├── 131.json
    │   ├── 132.json
    │   ├── 133.json
    │   ├── 134.json
    │   ├── 135.json
    │   ├── 136.json
    │   ├── 137.json
    │   ├── 138.json
    │   ├── 139.json
    │   ├── 14.json
    │   ├── 140.json
    │   ├── 141.json
    │   ├── 142.json
    │   ├── 143.json
    │   ├── 144.json
    │   ├── 145.json
    │   ├── 146.json
    │   ├── 147.json
    │   ├── 148.json
    │   ├── 149.json
    │   ├── 15.json
    │   ├── 150.json
    │   ├── 151.json
    │   ├── 152.json
    │   ├── 153.json
    │   ├── 154.json
    │   ├── 155.json
    │   ├── 156.json
    │   ├── 157.json
    │   ├── 158.json
    │   ├── 159.json
    │   ├── 16.json
    │   ├── 160.json
    │   ├── 161.json
    │   ├── 162.json
    │   ├── 163.json
    │   ├── 164.json
    │   ├── 165.json
    │   ├── 166.json
    │   ├── 167.json
    │   ├── 168.json
    │   ├── 169.json
    │   ├── 17.json
    │   ├── 170.json
    │   ├── 171.json
    │   ├── 172.json
    │   ├── 173.json
    │   ├── 174.json
    │   ├── 175.json
    │   ├── 176.json
    │   ├── 177.json
    │   ├── 178.json
    │   ├── 179.json
    │   ├── 18.json
    │   ├── 180.json
    │   ├── 181.json
    │   ├── 182.json
    │   ├── 183.json
    │   ├── 184.json
    │   ├── 185.json
    │   ├── 186.json
    │   ├── 187.json
    │   ├── 188.json
    │   ├── 189.json
    │   ├── 19.json
    │   ├── 190.json
    │   ├── 191.json
    │   ├── 192.json
    │   ├── 193.json
    │   ├── 194.json
    │   ├── 195.json
    │   ├── 196.json
    │   ├── 197.json
    │   ├── 198.json
    │   ├── 199.json
    │   ├── 2.json
    │   ├── 20.json
    │   ├── 200.json
    │   ├── 201.json
    │   ├── 202.json
    │   ├── 203.json
    │   ├── 204.json
    │   ├── 205.json
    │   ├── 206.json
    │   ├── 207.json
    │   ├── 208.json
    │   ├── 209.json
    │   ├── 21.json
    │   ├── 210.json
    │   ├── 211.json
    │   ├── 212.json
    │   ├── 213.json
    │   ├── 214.json
    │   ├── 215.json
    │   ├── 216.json
    │   ├── 217.json
    │   ├── 218.json
    │   ├── 219.json
    │   ├── 22.json
    │   ├── 220.json
    │   ├── 221.json
    │   ├── 222.json
    │   ├── 23.json
    │   ├── 24.json
    │   ├── 25.json
    │   ├── 26.json
    │   ├── 27.json
    │   ├── 28.json
    │   ├── 29.json
    │   ├── 3.json
    │   ├── 30.json
    │   ├── 31.json
    │   ├── 32.json
    │   ├── 33.json
    │   ├── 34.json
    │   ├── 35.json
    │   ├── 36.json
    │   ├── 37.json
    │   ├── 38.json
    │   ├── 39.json
    │   ├── 4.json
    │   ├── 40.json
    │   ├── 41.json
    │   ├── 42.json
    │   ├── 43.json
    │   ├── 44.json
    │   ├── 45.json
    │   ├── 46.json
    │   ├── 47.json
    │   ├── 48.json
    │   ├── 49.json
    │   ├── 5.json
    │   ├── 50.json
    │   ├── 51.json
    │   ├── 52.json
    │   ├── 53.json
    │   ├── 54.json
    │   ├── 55.json
    │   ├── 56.json
    │   ├── 57.json
    │   ├── 58.json
    │   ├── 59.json
    │   ├── 6.json
    │   ├── 60.json
    │   ├── 61.json
    │   ├── 62.json
    │   ├── 63.json
    │   ├── 64.json
    │   ├── 65.json
    │   ├── 66.json
    │   ├── 67.json
    │   ├── 68.json
    │   ├── 69.json
    │   ├── 7.json
    │   ├── 70.json
    │   ├── 71.json
    │   ├── 72.json
    │   ├── 73.json
    │   ├── 74.json
    │   ├── 75.json
    │   ├── 76.json
    │   ├── 77.json
    │   ├── 78.json
    │   ├── 79.json
    │   ├── 8.json
    │   ├── 80.json
    │   ├── 81.json
    │   ├── 82.json
    │   ├── 83.json
    │   ├── 84.json
    │   ├── 85.json
    │   ├── 86.json
    │   ├── 87.json
    │   ├── 88.json
    │   ├── 89.json
    │   ├── 9.json
    │   ├── 90.json
    │   ├── 91.json
    │   ├── 92.json
    │   ├── 93.json
    │   ├── 94.json
    │   ├── 95.json
    │   ├── 96.json
    │   ├── 97.json
    │   ├── 98.json
    │   └── 99.json
    ├── json
    │   ├── 1.json
    │   ├── 10.json
    │   ├── 100.json
    │   ├── 101.json
    │   ├── 102.json
    │   ├── 103.json
    │   ├── 104.json
    │   ├── 105.json
    │   ├── 106.json
    │   ├── 107.json
    │   ├── 108.json
    │   ├── 109.json
    │   ├── 11.json
    │   ├── 110.json
    │   ├── 111.json
    │   ├── 112.json
    │   ├── 113.json
    │   ├── 114.json
    │   ├── 115.json
    │   ├── 116.json
    │   ├── 117.json
    │   ├── 118.json
    │   ├── 119.json
    │   ├── 12.json
    │   ├── 120.json
    │   ├── 121.json
    │   ├── 122.json
    │   ├── 123.json
    │   ├── 124.json
    │   ├── 125.json
    │   ├── 126.json
    │   ├── 127.json
    │   ├── 128.json
    │   ├── 129.json
    │   ├── 13.json
    │   ├── 130.json
    │   ├── 131.json
    │   ├── 132.json
    │   ├── 133.json
    │   ├── 134.json
    │   ├── 135.json
    │   ├── 136.json
    │   ├── 137.json
    │   ├── 138.json
    │   ├── 139.json
    │   ├── 14.json
    │   ├── 140.json
    │   ├── 141.json
    │   ├── 142.json
    │   ├── 143.json
    │   ├── 144.json
    │   ├── 145.json
    │   ├── 146.json
    │   ├── 147.json
    │   ├── 148.json
    │   ├── 149.json
    │   ├── 15.json
    │   ├── 150.json
    │   ├── 151.json
    │   ├── 152.json
    │   ├── 153.json
    │   ├── 154.json
    │   ├── 155.json
    │   ├── 156.json
    │   ├── 157.json
    │   ├── 158.json
    │   ├── 159.json
    │   ├── 16.json
    │   ├── 160.json
    │   ├── 161.json
    │   ├── 162.json
    │   ├── 163.json
    │   ├── 164.json
    │   ├── 165.json
    │   ├── 166.json
    │   ├── 167.json
    │   ├── 168.json
    │   ├── 169.json
    │   ├── 17.json
    │   ├── 170.json
    │   ├── 171.json
    │   ├── 172.json
    │   ├── 173.json
    │   ├── 174.json
    │   ├── 175.json
    │   ├── 176.json
    │   ├── 177.json
    │   ├── 178.json
    │   ├── 179.json
    │   ├── 18.json
    │   ├── 180.json
    │   ├── 181.json
    │   ├── 182.json
    │   ├── 183.json
    │   ├── 184.json
    │   ├── 185.json
    │   ├── 186.json
    │   ├── 187.json
    │   ├── 188.json
    │   ├── 189.json
    │   ├── 19.json
    │   ├── 190.json
    │   ├── 191.json
    │   ├── 192.json
    │   ├── 193.json
    │   ├── 194.json
    │   ├── 195.json
    │   ├── 196.json
    │   ├── 197.json
    │   ├── 198.json
    │   ├── 199.json
    │   ├── 2.json
    │   ├── 20.json
    │   ├── 200.json
    │   ├── 201.json
    │   ├── 202.json
    │   ├── 203.json
    │   ├── 204.json
    │   ├── 205.json
    │   ├── 206.json
    │   ├── 207.json
    │   ├── 208.json
    │   ├── 209.json
    │   ├── 21.json
    │   ├── 210.json
    │   ├── 211.json
    │   ├── 212.json
    │   ├── 213.json
    │   ├── 214.json
    │   ├── 215.json
    │   ├── 216.json
    │   ├── 217.json
    │   ├── 218.json
    │   ├── 219.json
    │   ├── 22.json
    │   ├── 220.json
    │   ├── 221.json
    │   ├── 222.json
    │   ├── 23.json
    │   ├── 24.json
    │   ├── 25.json
    │   ├── 26.json
    │   ├── 27.json
    │   ├── 28.json
    │   ├── 29.json
    │   ├── 3.json
    │   ├── 30.json
    │   ├── 31.json
    │   ├── 32.json
    │   ├── 33.json
    │   ├── 34.json
    │   ├── 35.json
    │   ├── 36.json
    │   ├── 37.json
    │   ├── 38.json
    │   ├── 39.json
    │   ├── 4.json
    │   ├── 40.json
    │   ├── 41.json
    │   ├── 42.json
    │   ├── 43.json
    │   ├── 44.json
    │   ├── 45.json
    │   ├── 46.json
    │   ├── 47.json
    │   ├── 48.json
    │   ├── 49.json
    │   ├── 5.json
    │   ├── 50.json
    │   ├── 51.json
    │   ├── 52.json
    │   ├── 53.json
    │   ├── 54.json
    │   ├── 55.json
    │   ├── 56.json
    │   ├── 57.json
    │   ├── 58.json
    │   ├── 59.json
    │   ├── 6.json
    │   ├── 60.json
    │   ├── 61.json
    │   ├── 62.json
    │   ├── 63.json
    │   ├── 64.json
    │   ├── 65.json
    │   ├── 66.json
    │   ├── 67.json
    │   ├── 68.json
    │   ├── 69.json
    │   ├── 7.json
    │   ├── 70.json
    │   ├── 71.json
    │   ├── 72.json
    │   ├── 73.json
    │   ├── 74.json
    │   ├── 75.json
    │   ├── 76.json
    │   ├── 77.json
    │   ├── 78.json
    │   ├── 79.json
    │   ├── 8.json
    │   ├── 80.json
    │   ├── 81.json
    │   ├── 82.json
    │   ├── 83.json
    │   ├── 84.json
    │   ├── 85.json
    │   ├── 86.json
    │   ├── 87.json
    │   ├── 88.json
    │   ├── 89.json
    │   ├── 9.json
    │   ├── 90.json
    │   ├── 91.json
    │   ├── 92.json
    │   ├── 93.json
    │   ├── 94.json
    │   ├── 95.json
    │   ├── 96.json
    │   ├── 97.json
    │   ├── 98.json
    │   └── 99.json
    └── p2p_spider.py
├── requestes基本使用
    ├── 001.py
    ├── 002.py
    ├── login.py
    └── test.py
├── sougou
    ├── configs.py
    ├── jiebao.py
    ├── spider
    │   ├── log_SougouDownloader.log.20171118
    │   ├── log_SougouSpider.log.20171118
    │   └── spider.py
    ├── store_new
    │   ├── __init__.py
    │   └── stroe.py
    └── utils
    │   ├── __init__.py
    │   └── tools.py
├── toapi-91baby
    ├── .gitignore
    ├── app.py
    ├── data.sqlite
    ├── items
    │   ├── __init__.py
    │   ├── book.py
    │   ├── hotbook.py
    │   └── search.py
    ├── settings.py
    ├── test.py
    └── wsgi.py
├── wenjuanxin
    ├── configs.py
    └── spider.py
├── zhihu
    └── zhihu_easy
    │   ├── __init__.py
    │   ├── client.py
    │   ├── configs.py
    │   ├── db_tools.py
    │   ├── parse.py
    │   ├── playdata.py
    │   ├── spider.py
    │   └── tools.py
├── 悦美整形
    └── spider.py
├── 浏览器模拟爬虫
    ├── 001.py
    ├── baidu.py
    ├── kuaiproxy.py
    ├── manhua.py
    └── manhua_mutiprocessing.py
├── 美食杰
    └── spider.py
├── 腾讯漫画
    ├── 632784.json
    ├── downloder.py
    ├── one.json
    └── spider.py
└── 豆瓣影评
    └── 锤神3
        ├── config.py
        ├── play_data.py
        ├── spider.py
        └── stroe.py


/.gitignore:
--------------------------------------------------------------------------------
 1 | # Byte-compiled / optimized / DLL files
 2 | __pycache__/
 3 | *.py[cod]
 4 | *$py.class
 5 | 
 6 | # C extensions
 7 | *.so
 8 | 
 9 | # Distribution / packaging
10 | .Python
11 | env/
12 | build/
13 | develop-eggs/
14 | dist/
15 | downloads/
16 | eggs/
17 | .eggs/
18 | lib/
19 | lib64/
20 | parts/
21 | sdist/
22 | var/
23 | *.egg-info/
24 | .installed.cfg
25 | *.egg
26 | 
27 | # PyInstaller
28 | #  Usually these files are written by a python script from a template
29 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
30 | *.manifest
31 | *.spec
32 | 
33 | # Installer logs
34 | pip-log.txt
35 | pip-delete-this-directory.txt
36 | 
37 | # Unit test / coverage reports
38 | htmlcov/
39 | .tox/
40 | .coverage
41 | .coverage.*
42 | .cache
43 | nosetests.xml
44 | coverage.xml
45 | *,cover
46 | .hypothesis/
47 | 
48 | # Translations
49 | *.mo
50 | *.pot
51 | 
52 | # Django stuff:
53 | *.log
54 | local_settings.py
55 | 
56 | # Flask stuff:
57 | instance/
58 | .webassets-cache
59 | 
60 | # Scrapy stuff:
61 | .scrapy
62 | 
63 | # Sphinx documentation
64 | docs/_build/
65 | 
66 | # PyBuilder
67 | target/
68 | 
69 | # IPython Notebook
70 | .ipynb_checkpoints
71 | 
72 | # pyenv
73 | .python-version
74 | 
75 | # celery beat schedule file
76 | celerybeat-schedule
77 | 
78 | # dotenv
79 | .env
80 | 
81 | # virtualenv
82 | venv/
83 | ENV/
84 | 
85 | # Spyder project settings
86 | .spyderproject
87 | 
88 | # Rope project settings
89 | .ropeproject
90 | 
91 | # mac
92 | .DS_Store
93 | 
94 | # vs code
95 | settings.json
96 | 


--------------------------------------------------------------------------------
/Beautiful Soup 爬虫/001.py:
--------------------------------------------------------------------------------
 1 | '''
 2 | 通过一些小例子来了解bs4库的基本使用，
 3 | 本篇为lxml解析器的使用
 4 | 解析的html为当前目录下的demo.html
 5 | '''
 6 | import bs4
 7 | 
 8 | 
 9 | #首先我们先将html文件已lxml的方式做成一锅汤
10 | soup = bs4.BeautifulSoup(open('Beautiful Soup 爬虫/demo.html'),'lxml')
11 | 
12 | #我们把结果输出一下，是一个很清晰的树形结构。
13 | #print(soup.prettify())
14 | 
15 | '''
16 | OUT:
17 | 
18 | <html>
19 |  <head>
20 |   <title>
21 |    The Dormouse's story
22 |   </title>
23 |  </head>
24 |  <body>
25 |   <p class="title">
26 |    <b>
27 |     The Dormouse's story
28 |    </b>
29 |   </p>
30 |   <p class="story">
31 |    Once upon a time there were three little sisters; and their names were
32 |    <a class="sister" href="http://example.com/elsie" id="link1">
33 |     Elsie
34 |    </a>
35 |    ,
36 |    <a class="sister" href="http://example.com/lacie" id="link2">
37 |     Lacie
38 |    </a>
39 |    and
40 |    <a class="sister" href="http://example.com/tillie" id="link3">
41 |     Tillie
42 |    </a>
43 |    ;
44 | and they lived at the bottom of a well.
45 |   </p>
46 |   <p class="story">
47 |    ...
48 |   </p>
49 |  </body>
50 | </html>
51 | '''


--------------------------------------------------------------------------------
/Beautiful Soup 爬虫/002.py:
--------------------------------------------------------------------------------
 1 | '''
 2 | 简单的bs4库的使用
 3 | '''
 4 | 
 5 | import bs4
 6 | 
 7 | 
 8 | #首先我们先将html文件已lxml的方式做成一锅汤
 9 | soup = bs4.BeautifulSoup(open('Beautiful Soup 爬虫/demo.html'),'lxml')
10 | 
11 | #找到head标签里的内容
12 | #print (soup.head)
13 | 
14 | #找到所有的text内容
15 | #print (soup.text)
16 | 
17 | #找到第一个a标签
18 | #print (soup.a)
19 | 
20 | #找到所有a标签
21 | #print (soup.find_all('a'))
22 | 
23 | #找到a标签下的所有子节点，一列表方式返回
24 | #print(soup.a.contents)
25 | 
26 | #通过.children生成器，我们可以循环迭代出每一个子节点
27 | #tag = soup.body
28 | #for s in tag.children:
29 | #    print(s)
30 | 
31 | #通过迭代找到所的string
32 | for i in soup.strings:
33 |     print(i)


--------------------------------------------------------------------------------
/Beautiful Soup 爬虫/003.py:
--------------------------------------------------------------------------------
 1 | '''
 2 | 通过Python来学习re库 --- 正则表达式相关内容
 3 | 
 4 | '''
 5 | 
 6 | import re
 7 | 
 8 | test = 'python is the best language , pretty good !'
 9 | 
10 | p = re.findall('p+',test)
11 | 
12 | print(p)
13 | 
14 | 
15 | '''
16 | re.search(pattern, string, flags=0) 
17 | 在一个字符串中搜索匹配正则表达式的第一个位置
18 | 返回match对象
19 | ∙ pattern : 正则表达式的字符串或原生字符串表示 
20 | ∙ string : 待匹配字符串
21 | ∙ flags : 正则表达式使用时的控制标记
22 | '''
23 | 
24 | str1 = 'hello , world ,life is short ,use Python .WHAT? '
25 | 
26 | a = re.search(r'\w+',str1)
27 | print(a.group())    #  hello
28 | 
29 | b = re.search(r'w.+D',str1,re.I)
30 | print(b.group())
31 | 
32 | 
33 | '''
34 | re.findall(pattern, string, flags=0) 
35 | 搜索字符串，以列表类型返回全部能匹配的子串
36 | ∙ pattern : 正则表达式的字符串或原生字符串表示 
37 | ∙ string : 待匹配字符串
38 | ∙ flags : 正则表达式使用时的控制标记
39 | '''
40 | 
41 | c = re.findall(r'\w+',str1)
42 | print (c)
43 | #['hello', 'world', 'life', 'is', 'short', 'use', 'Python', 'WHAT']
44 | 
45 | str2 = 'hssso'
46 | re1 = re.compile(r'h.{3}o')
47 | print(re1.findall(str1))
48 | print(re1.findall(str2))
49 | # ['hello']
50 | #['hssso']
51 | 
52 | '''
53 | match 对象的属性
54 | 
55 | .string : 待匹配的文本 
56 | .re     : 匹配时使用的patter对象(正则表达式)
57 | .pos    : 正则表达式搜索文本的开始位置
58 | .endpos : 正则表达式搜索文本的结束位置   
59 | '''
60 | d = re.search(r'e.+d',str1)
61 | print(d.group()) # ello , world
62 | print (d.string) # hello , world ,life is short ,use Python .WHAT?
63 | print (d.re) # re.compile('e.+d')
64 | print (d.pos) # 0
65 | print (d.endpos) # 48
66 | 


--------------------------------------------------------------------------------
/Beautiful Soup 爬虫/baidutieba.py:
--------------------------------------------------------------------------------
 1 | '''
 2 | 抓取百度贴吧---生活大爆炸吧的基本内容
 3 | 爬虫线路： requests - bs4
 4 | Python版本： 3.6
 5 | OS： mac os 12.12.4
 6 | '''
 7 | 
 8 | import requests
 9 | import time
10 | from bs4 import BeautifulSoup
11 | 
12 | # 首先我们写好抓取网页的函数
13 | 
14 | 
15 | def get_html(url):
16 |     try:
17 |         r = requests.get(url, timeout=30)
18 |         r.raise_for_status()
19 |         # 这里我们知道百度贴吧的编码是utf-8，所以手动设置的。爬去其他的页面时建议使用：
20 |         # r.encoding = r.apparent_encoding
21 |         r.encoding = 'utf-8'
22 |         return r.text
23 |     except:
24 |         return " ERROR "
25 | 
26 | 
27 | def get_content(url):
28 |     '''
29 |     分析贴吧的网页文件，整理信息，保存在列表变量中
30 |     '''
31 | 
32 |     # 初始化一个列表来保存所有的帖子信息：
33 |     comments = []
34 |     # 首先，我们把需要爬取信息的网页下载到本地
35 |     html = get_html(url)
36 | 
37 |     # 我们来做一锅汤
38 |     soup = BeautifulSoup(html, 'lxml')
39 | 
40 |     # 按照之前的分析，我们找到所有具有‘ j_thread_list clearfix’属性的li标签。返回一个列表类型。
41 |     liTags = soup.find_all('li', attrs={'class': ' j_thread_list clearfix'})
42 | 
43 |     # 通过循环找到每个帖子里的我们需要的信息：
44 |     for li in liTags:
45 |         # 初始化一个字典来存储文章信息
46 |         comment = {}
47 |         # 这里使用一个try except 防止爬虫找不到信息从而停止运行
48 |         try:
49 |             # 开始筛选信息，并保存到字典中
50 |             comment['title'] = li.find(
51 |                 'a', attrs={'class': 'j_th_tit '}).text.strip()
52 |             comment['last_reply_data'] = li.find('span',attrs={'class':'threadlist_reply_date pull_right j_reply_data'}).text.strip()
53 |             comments.append(comment)
54 |         except:
55 |             print('出了点小问题')
56 | 
57 |     return comments
58 | 
59 | 
60 | def Out2File(dict):
61 |     '''
62 |     将爬取到的文件写入到本地
63 |     保存到当前目录的 TTBT.txt文件中。
64 | 
65 |     '''
66 |     with open('TTBT.txt', 'a+') as f:
67 |         for comment in dict:
68 |             f.write('标题： {} \t 链接：{} \t 发帖人：{} \t 发帖时间：{} \t 回复数量： {} \n'.format(
69 |                 comment['title'], comment['last_reply_data']))
70 | 
71 |         print('当前页面爬取完成')
72 | 
73 | 
74 | def main(base_url, deep):
75 |     url_list = []
76 |     # 将所有需要爬去的url存入列表
77 |     for i in range(0, deep):
78 |         url_list.append(base_url + '&pn=' + str(50 * i))
79 |     print('所有的网页已经下载到本地！ 开始筛选信息。。。。')
80 | 
81 |     # 循环写入所有的数据
82 |     for url in url_list:
83 |         content = get_content(url)
84 |         Out2File(content)
85 |     print('所有的信息都已经保存完毕！')
86 | 
87 | 
88 | base_url = 'http://tieba.baidu.com/f?kw=%E7%94%9F%E6%B4%BB%E5%A4%A7%E7%88%86%E7%82%B8&ie=utf-8'
89 | # 设置需要爬取的页码数量
90 | deep = 1
91 | 
92 | if __name__ == '__main__':
93 |     main(base_url, deep)
94 | 


--------------------------------------------------------------------------------
/Beautiful Soup 爬虫/bocai.py:
--------------------------------------------------------------------------------
 1 | '''
 2 | 爬取Dota菠菜结果信息
 3 | 使用 requests --- bs4 线路
 4 | Python版本： 3.6
 5 | OS： mac os 12.12.4
 6 | '''
 7 | 
 8 | import requests
 9 | import bs4
10 | 
11 | def get_html(url):
12 |     try:
13 |         r = requests.get(url, timeout=30)
14 |         r.raise_for_status()
15 |         r.encoding = r.apparent_encoding
16 |         return r.text
17 |     except:
18 |         return " ERROR "
19 | 
20 | def print_result(url):
21 |     '''
22 |     查询比赛结果，并格式化输出！
23 |     '''
24 |     html = get_html(url)
25 |     soup =  bs4.BeautifulSoup(html,'lxml')
26 |     match_list = soup.find_all('div', attrs={'class': 'matchmain bisai_qukuai'})
27 |     for match in match_list:
28 |         time = match.find('div', attrs={'class': 'whenm'}).text.strip()
29 |         teamname = match.find_all('span', attrs={'class': 'team_name'})
30 |        
31 |         
32 |         #由于网站的构造问题，队名有的时候会不显示，所以我们需要过滤掉一些注释,方法如下:
33 |         if teamname[0].string[0:3] == 'php':
34 |             team1_name = "暂无队名"
35 |         else:
36 |             team1_name = teamname[0].string
37 |         
38 |         # 这里我们采用了css选择器：比原来的属性选择更加方便
39 |         team1_support_level = match.find('span', class_='team_number_green').string
40 | 
41 |         team2_name = teamname[1].string
42 |         team2_support_level = match.find('span', class_='team_number_red').string
43 | 
44 |         print('比赛时间：{}，\n 队伍一：{}      胜率 {}\n 队伍二：{}      胜率 {} \n'.format(time,team1_name,team1_support_level,team2_name,team2_support_level))
45 | 
46 | 
47 | 
48 | def main():
49 |     url= 'http://dota2bocai.com/match'
50 |     print_result(url)
51 | 
52 | if __name__ == '__main__':
53 |     main()


--------------------------------------------------------------------------------
/Beautiful Soup 爬虫/demo.html:
--------------------------------------------------------------------------------
 1 | 
 2 | <html><head><title>The Dormouse's story</title></head>
 3 | <body>
 4 | <p class="title"><b>The Dormouse's story</b></p>
 5 | 
 6 | <p class="story">Once upon a time there were three little sisters; and their names were
 7 | <a href="http://example.com/elsie" class="sister" id="link1">Elsie</a>,
 8 | <a href="http://example.com/lacie" class="sister" id="link2">Lacie</a> and
 9 | <a href="http://example.com/tillie" class="sister" id="link3">Tillie</a>;
10 | and they lived at the bottom of a well.</p>
11 | 
12 | <p class="story">...</p>
13 | 


--------------------------------------------------------------------------------
/Beautiful Soup 爬虫/dianying.py:
--------------------------------------------------------------------------------
 1 | '''
 2 | 爬取最新电影排行榜单
 3 | url：http://dianying.2345.com/top/
 4 | 使用 requests --- bs4 线路
 5 | Python版本： 3.6
 6 | OS： mac os 12.12.4
 7 | '''
 8 | 
 9 | import requests
10 | import bs4
11 | 
12 | 
13 | def get_html(url):
14 |     try:
15 |         r = requests.get(url, timeout=30)
16 |         r.raise_for_status
17 |         # 该网站采用gbk编码！
18 |         r.encoding = 'gbk'
19 |         return r.text
20 |     except:
21 |         return "someting wrong"
22 | 
23 | 
24 | def get_content(url):
25 |     html = get_html(url)
26 |     soup = bs4.BeautifulSoup(html, 'lxml')
27 |     
28 |     # 找到电影排行榜的ul列表
29 |     movies_list = soup.find('ul', class_='picList clearfix')
30 |     movies = movies_list.find_all('li')
31 |     
32 |     for top in movies:
33 |         #找到图片连接，
34 |         img_url=top.find('img')['src']
35 |         
36 | 
37 |         name = top.find('span',class_='sTit').a.text
38 |         #这里做一个异常捕获，防止没有上映时间的出现
39 |         try:
40 |             time = top.find('span',class_='sIntro').text
41 |         except:
42 |             time = "暂无上映时间"
43 |         
44 |         #这里用bs4库迭代找出“pACtor”的所有子孙节点，即每一位演员解决了名字分割的问题
45 |         actors = top.find('p',class_='pActor')
46 |         actor= ''
47 |         for act in actors.contents:
48 |             actor = actor + act.string +'  '
49 |         #找到影片简介
50 |         intro = top.find('p',class_='pTxt pIntroShow').text
51 | 
52 |         print("片名：{}\t{}\n{}\n{} \n \n ".format(name,time,actor,intro) )
53 |         
54 |         #我们来吧图片下载下来：
55 |         with open('/Users/ehco/Desktop/img/'+name+'.png','wb+') as f:
56 |             f.write(requests.get(img_url).content)
57 | 
58 | 
59 | def main():
60 |     url = 'http://dianying.2345.com/top/'
61 |     get_content(url)
62 | 
63 | if __name__ == "__main__":
64 |     main()


--------------------------------------------------------------------------------
/Beautiful Soup 爬虫/kouhong.py:
--------------------------------------------------------------------------------
 1 | '''
 2 | Author Ehco1996
 3 | Time 2017-11-10
 4 | 
 5 | 如何暗示男朋友给自己买火红
 6 | '''
 7 | 
 8 | from bs4 import BeautifulSoup
 9 | import requests
10 | import os
11 | 
12 | 
13 | def get_html_text(url):
14 |     try:
15 |         r = requests.get(url, timeout=10)
16 |         r.raise_for_status
17 |         return r.text
18 |     except:
19 |         return -1
20 | 
21 | 
22 | def parse_img(html):
23 |     data = []
24 |     soup = BeautifulSoup(html, 'lxml')
25 |     img_list = soup.find_all('img')
26 |     for img in img_list:
27 |         data.append({
28 |             'src': img['src'],
29 |             'name': img['alt'].replace(' ', '').replace('/', '')
30 |         })
31 |     return data
32 | 
33 | 
34 | def get_img_response(url):
35 |     try:
36 |         r = requests.get(url, stream=True)
37 |         r.raise_for_status
38 |         return r.content
39 |     except:
40 |         return -1
41 | 
42 | 
43 | def download_img(data):
44 |     curr_dir = os.path.dirname(os.path.realpath(__file__)) + '/img/'
45 |     if not os.path.exists('img'):
46 |         os.mkdir('img')
47 |     for img in data:
48 |         path = os.path.join(curr_dir, img['name'] + '.jpg')
49 |         with open(path, 'wb') as f:
50 |             f.write(get_img_response(img['src']))
51 | 
52 | 
53 | def main():
54 |     html = get_html_text(
55 |         'https://www.1688.com/pic/-.html?spm=a261b.8768355.searchbar.5.oUjRZK&keywords=%BF%DA%BA%EC')
56 |     if html != -1:
57 |         img_data = parse_img(html)
58 |         download_img(img_data)
59 | 
60 | 
61 | if __name__ == '__main__':
62 |     main()
63 | 


--------------------------------------------------------------------------------
/Beautiful Soup 爬虫/parse_station.py:
--------------------------------------------------------------------------------
 1 | import requests
 2 | import re
 3 | 
 4 | #关闭https证书验证警告
 5 | requests.packages.urllib3.disable_warnings()
 6 | 
 7 | url = 'https://kyfw.12306.cn/otn/resources/js/framework/station_name.js?station_version=1.9018'
 8 | r = requests.get(url,verify=False)
 9 | pattern = u'([\u4e00-\u9fa5]+)\|([A-Z]+)'
10 | result = re.findall(pattern,r.text)
11 | station = dict(result)
12 | 
13 | print(station)


--------------------------------------------------------------------------------
/Beautiful Soup 爬虫/qiubai.py:
--------------------------------------------------------------------------------
 1 | import requests
 2 | from bs4 import BeautifulSoup
 3 | 
 4 | 
 5 | def get_html_text(url):
 6 |     try:
 7 |         r = requests.get(url, timeout=3)
 8 |         r.raise_for_status()
 9 |         r.encoding = r.apparent_encoding
10 |         return r.text
11 |     except:
12 |         return 'something wrong'
13 | 
14 | 
15 | def get_jokes(url):
16 |     '''
17 |     返回当前url页面的糗百的
18 |     段子作者，主体，热评
19 |     返回类型：列表
20 |     '''
21 |     joke_list = []
22 | 
23 |     html = get_html_text(url)
24 |     soup = BeautifulSoup(html, 'lxml')
25 | 
26 |     articles = soup.find_all('div', class_='article block untagged mb15')
27 | 
28 |     for article in articles:
29 |         body = article.find('span').text
30 |         author = article.find('img')['alt']
31 |         try:
32 |             comment = article.find(
33 |                 'div', class_='main-text').contents[0].replace('\n', '')
34 |         except:
35 |             comment = '暂时没有热评'
36 | 
37 |         joke = '作者：{}\n{}\n\n热评{}'.format(author, body, comment)
38 |         joke_list.append(joke)
39 | 
40 |     return joke_list
41 | 
42 | 
43 | # test:
44 | 
45 | url = 'https://www.qiushibaike.com/8hr'
46 | 
47 | a = get_jokes(url)
48 | print(a[1])
49 | 


--------------------------------------------------------------------------------
/Beautiful Soup 爬虫/反爬虫.py:
--------------------------------------------------------------------------------
 1 | import requests
 2 | import random
 3 | 
 4 | 
 5 | def get_html(url):
 6 |     try:
 7 |         r = requests.get(url, timeout=30)
 8 |         print(r.headers)
 9 |         r.raise_for_status
10 |         r.encoding = r.apparent_encoding
11 | 
12 |         return r.status_code
13 |     except:
14 |         return "Someting Wrong！"
15 | 
16 | 
17 | def get_agent():
18 |     '''
19 |     模拟header的user-agent字段，
20 |     返回一个随机的user-agent字典类型的键值对
21 |     '''
22 |     agents = ['Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; Trident/5.0;',
23 |               'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.6; rv,2.0.1) Gecko/20100101 Firefox/4.0.1',
24 |               'Opera/9.80 (Macintosh; Intel Mac OS X 10.6.8; U; en) Presto/2.8.131 Version/11.11',
25 |               'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_0) AppleWebKit/535.11 (KHTML, like Gecko) Chrome/17.0.963.56 Safari/535.11',
26 |               'Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; 360SE)']
27 |     fakeheader = {}
28 |     fakeheader['User-agent'] = agents[random.randint(0, len(agents))]
29 |     return fakeheader
30 | 
31 | 
32 | def get_proxy():
33 |     '''
34 |     简答模拟代理池
35 |     返回一个字典类型的键值对，
36 |     '''
37 |     proxy = ["http://116.211.143.11:80",
38 |              "http://183.1.86.235:8118",
39 |              "http://183.32.88.244:808",
40 |              "http://121.40.42.35:9999",
41 |              "http://222.94.148.210:808"]
42 |     fakepxs = {}
43 |     fakepxs['http'] = proxy[random.randint(0, len(proxy))]
44 |     return fakepxs
45 | 
46 | 
47 | print(get_html('https://zhuanlan.zhihu.com'))


--------------------------------------------------------------------------------
/Beautiful Soup 爬虫/沪铜数据.py:
--------------------------------------------------------------------------------
 1 | '''
 2 | 获取新浪网 沪铜CUO历史交易记录
 3 | 从1999-01-01 到 2017-06-15
 4 | 网址：http://vip.stock.finance.sina.com.cn/q/view/vFutures_History.php?page=1&breed=CU0&start=1999-01-01&end=2017-06-15&jys=shfe&pz=CU&hy=CU0&type=inner&name=%A1%E4%A8%AE%26%23182%3B11109
 5 | '''
 6 | 
 7 | import  requests
 8 | from bs4 import BeautifulSoup
 9 | 
10 | def get_html_text(url):
11 |     try:
12 |         r = requests.get(url,timeout=3)
13 |         r.raise_for_status()
14 |         r.encoding = r.apparent_encoding
15 |         return r.text
16 |     except:
17 |         return 'something wrong'
18 | 
19 | 
20 | 
21 | def get_one_data(url):
22 |     data = []
23 |     html = get_html_text(url)
24 |     soup = BeautifulSoup(html,'lxml')
25 |     days = soup.find('div',class_='historyList')
26 |     columns = days.find_all('tr')
27 |     
28 |     '''
29 |     test area:
30 |     info = columns[2].find_all('td')
31 |     date = info[0].text
32 |     close_price = info[1].text
33 |     print(date,close_price)
34 |     '''
35 |     
36 |     for i in range(2,len(columns)):
37 |         try:
38 |             info = columns[i].find_all('td')
39 |             date = info[0].text
40 |             close_price = info[1].text
41 |             data.append(date+' : '+close_price)
42 |         except:
43 |             continue
44 |     
45 |     return data
46 |     
47 | def W2File(data):
48 |     with open('cuo_data.txt','a+') as f:
49 |         for one in data:
50 |             f.write(one+'\n')
51 |     
52 |     print('数据写入完毕！')
53 | 
54 | #url = 'http://vip.stock.finance.sina.com.cn/q/view/vFutures_History.php?page=1&breed=CU0&start=1999-01-01&end=2017-06-15&jys=shfe&pz=CU&hy=CU0&type=inner&name=%A1%E4%A8%AE%26%23182%3B11109'
55 | urls = []
56 | 
57 | for i in range(1,77):
58 |     urls.append('http://vip.stock.finance.sina.com.cn/q/view/vFutures_History.php?page='+str(i)+'&breed=CU0&start=1999-01-01&end=2017-06-15&jys=shfe&pz=CU&hy=CU0&type=inner&name=%A1%E4%A8%AE%26%23182%3B11109')
59 | 
60 | 
61 | for url in urls:
62 |     data = get_one_data(url)
63 |     W2File(data)


--------------------------------------------------------------------------------
/Google-Image/spider.py:
--------------------------------------------------------------------------------
 1 | '''
 2 | Google Image Spider
 3 | 
 4 | '''
 5 | 
 6 | import requests
 7 | from bs4 import BeautifulSoup
 8 | 
 9 | 
10 | SEARCHRUL = 'https://www.google.com/search?&safe=off&q={}&tbm=isch&tbs=itp:photo,isz:l'
11 | 
12 | 
13 | def get_html_text(url):
14 |     '''获取网页的原始text'''
15 |     headers = {}
16 |     headers['User-Agent'] = "Mozilla/5.0 (X11; Linux i686) AppleWebKit/537.17 (KHTML, like Gecko) Chrome/24.0.1312.27 Safari/537.17"
17 |     try:
18 |         r = requests.get(url, timeout=9, headers=headers)
19 |         r.raise_for_status
20 |         r.encoding = r.apparent_encoding
21 |         return r.text
22 |     except:
23 |         return 'error'
24 | 
25 | 
26 | def parse_img_url(q, nums):
27 |     '''
28 |     解析返回搜索图片的原始链接
29 |     q ： 搜索关键词
30 |     nums： 返回的结果数量 最大值20
31 |     '''
32 |     links = []
33 |     # 防止越界
34 |     if nums > 20 or nums > 0:
35 |         num = 20
36 | 
37 |     url = SEARCHRUL.format(q)
38 |     print(url)
39 |     html = get_html_text(url)
40 |     if html != 'error':
41 |         soup = BeautifulSoup(html, 'lxml')
42 |         content = soup.find_all('div', class_='rg_meta', limit=nums)
43 |         for link in content:
44 |             rec = eval(link.text)
45 |             links.append(rec['ou'])
46 |         return links
47 |     else:
48 |         return 'error'
49 | 
50 | 
51 | res = parse_img_url('test', 15)
52 | 
53 | for url in res:
54 |     print(url)
55 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Python-crawler
 2 | 
 3 | > 由于代码是比较早之前写的，抓取的网站目录结构可能有所变动   
 4 | > 所以部分代码可能不能使用了，欢迎正在学习爬虫的大家给这个项目**提PR**  
 5 | > 让更多的代码能跑起来~
 6 | 
 7 | 从零开始系统化的学习写Python爬虫。   
 8 | 主要是记录一下自己写Python爬虫的经过与心得。   
 9 | 同时也是为了分享一下如何能更高效率的学习写爬虫。    
10 | IDE：Vscode    Python版本: 3.6   
11 | 
12 | * 知乎专栏：https://zhuanlan.zhihu.com/Ehco-python
13 | 
14 | 详细学习路径：
15 | ### 一：Beautiful Soup 爬虫
16 | 
17 | * requests库的安装与使用 https://zhuanlan.zhihu.com/p/26681429
18 | * 安装beautiful soup 爬虫环境 https://zhuanlan.zhihu.com/p/26683864
19 | * beautiful soup 的解析器 https://zhuanlan.zhihu.com/p/26691931
20 | * re库 正则表达式的使用 https://zhuanlan.zhihu.com/p/26701898
21 | * bs4 爬虫实践： 获取百度贴吧的内容 https://zhuanlan.zhihu.com/p/26722495
22 | * bs4 爬虫实践： 获取双色球中奖信息 https://zhuanlan.zhihu.com/p/26747717
23 | * bs4 爬虫实践： 排行榜小说批量下载 https://zhuanlan.zhihu.com/p/26756909
24 | * bs4 爬虫实践： 获取电影信息 https://zhuanlan.zhihu.com/p/26786056
25 | * bs4 爬虫实践： 悦音台mv排行榜与反爬虫技术 https://zhuanlan.zhihu.com/p/26809626
26 | ------
27 | ### 二： Scrapy 爬虫框架
28 | 
29 | * Scrapy 爬虫框架的安装与基本介绍 https://zhuanlan.zhihu.com/p/26832971
30 | * Scrapy 选择器和基本使用 https://zhuanlan.zhihu.com/p/26854842
31 | * Scrapy 爬虫实践：天气预报&数据存储 https://zhuanlan.zhihu.com/p/26885412
32 | * Scrapy 爬虫实践：代理的爬取和验证 https://zhuanlan.zhihu.com/p/26939527
33 | * Scrapy 爬虫实践：糗事百科&爬虫攻防 https://zhuanlan.zhihu.com/p/26980300
34 | * Scrapy 爬虫实践：重构排行榜小说爬虫&Mysql数据库 https://zhuanlan.zhihu.com/p/27027200
35 | ------
36 | ### 三： 浏览器模拟爬虫
37 | 
38 | * Selenium模拟浏览器 https://zhuanlan.zhihu.com/p/27115580
39 | * 爬虫实践：获取快代理 https://zhuanlan.zhihu.com/p/27150025
40 | * 爬虫实践：漫画批量下载 https://zhuanlan.zhihu.com/p/27155429
41 | 
42 | ### 四： 练手项目
43 | 
44 | * 爬虫实践：螺纹钢数据&Cookies https://zhuanlan.zhihu.com/p/27232687
45 | * 爬虫实践：登录正方教务系统 https://zhuanlan.zhihu.com/p/27256315
46 | * 爬虫应用： requests+django实现微信公众号后台 https://zhuanlan.zhihu.com/p/27625233
47 | * 爬虫应用： 12306火车票信息查询 https://zhuanlan.zhihu.com/p/27969976
48 | * 爬虫应用： 利用斗鱼Api抓取弹幕 https://zhuanlan.zhihu.com/p/28164017
49 | * 爬虫应用： 获取支付宝账单信息 https://zhuanlan.zhihu.com/p/28537306
50 | * 爬虫应用：IT之家热门段子（评论）爬取 https://zhuanlan.zhihu.com/p/28806210
51 | * 爬虫应用：一号店 商品信息查询程序 https://zhuanlan.zhihu.com/p/28982497
52 | * 爬虫应用：搜狗输入法词库抓取 https://zhuanlan.zhihu.com/p/31186373
53 | * 爬虫应用：复古网盘游戏抓取 https://zhuanlan.zhihu.com/p/32420131
54 | * 爬虫应用：自动填写问卷星  https://zhuanlan.zhihu.com/p/36224375
55 | * 爬虫应用：腾讯漫画下载~  https://zhuanlan.zhihu.com/p/39578774
56 | 
57 | 
58 | 
59 | 


--------------------------------------------------------------------------------
/Scrapy 爬虫框架/001.py:
--------------------------------------------------------------------------------
 1 | '''
 2 | 简单介绍scrapycXPATH选择器的使用
 3 | '''
 4 | from scrapy.selector import Selector 
 5 | 
 6 | xml = '''
 7 | <html>
 8 |     <body>
 9 |         <class id=1>
10 |             <name>王尼玛</name>
11 |             <sex>男</sex>
12 |             <age>80</age>
13 |             <favouite>开车</favouite>
14 |         </class>
15 |         <class id=2>
16 |             <name>陈一发</name>
17 |             <sex>母</sex>
18 |             <age>28</age>
19 |             <favouite>开che</favouite>
20 |         </class>
21 |         <class id=3>
22 |             <name>狗贼叔叔</name>
23 |             <sex>公</sex>
24 |             <age>18</age>
25 |             <favouite>土豪战</favouite>
26 |         </class>
27 |     </body>
28 | </html>
29 | '''
30 | 
31 | a = Selector(text=xml).xpath('/html/body/class[1]').extract()
32 | 
33 | print(a)


--------------------------------------------------------------------------------
/Scrapy 爬虫框架/biquge/biquge/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Ehco1996/Python-crawler/e89ef774653965c6b045a2c4fd101846ee8c62ef/Scrapy 爬虫框架/biquge/biquge/__init__.py


--------------------------------------------------------------------------------
/Scrapy 爬虫框架/biquge/biquge/items.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | # Define here the models for your scraped items
 4 | #
 5 | # See documentation in:
 6 | # http://doc.scrapy.org/en/latest/topics/items.html
 7 | 
 8 | import scrapy
 9 | 
10 | 
11 | class BiqugeItem(scrapy.Item):
12 |     # define the fields for your item here like:
13 |     # name = scrapy.Field()
14 |     # 小说名字
15 |     bookname = scrapy.Field()
16 |     #章节名
17 |     title = scrapy.Field()
18 |     #正文
19 |     body  = scrapy.Field()
20 |     #排序用id
21 |     order_id = scrapy.Field()


--------------------------------------------------------------------------------
/Scrapy 爬虫框架/biquge/biquge/middlewares.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | # Define here the models for your spider middleware
 4 | #
 5 | # See documentation in:
 6 | # http://doc.scrapy.org/en/latest/topics/spider-middleware.html
 7 | 
 8 | from scrapy import signals
 9 | 
10 | 
11 | class BiqugeSpiderMiddleware(object):
12 |     # Not all methods need to be defined. If a method is not defined,
13 |     # scrapy acts as if the spider middleware does not modify the
14 |     # passed objects.
15 | 
16 |     @classmethod
17 |     def from_crawler(cls, crawler):
18 |         # This method is used by Scrapy to create your spiders.
19 |         s = cls()
20 |         crawler.signals.connect(s.spider_opened, signal=signals.spider_opened)
21 |         return s
22 | 
23 |     def process_spider_input(response, spider):
24 |         # Called for each response that goes through the spider
25 |         # middleware and into the spider.
26 | 
27 |         # Should return None or raise an exception.
28 |         return None
29 | 
30 |     def process_spider_output(response, result, spider):
31 |         # Called with the results returned from the Spider, after
32 |         # it has processed the response.
33 | 
34 |         # Must return an iterable of Request, dict or Item objects.
35 |         for i in result:
36 |             yield i
37 | 
38 |     def process_spider_exception(response, exception, spider):
39 |         # Called when a spider or process_spider_input() method
40 |         # (from other spider middleware) raises an exception.
41 | 
42 |         # Should return either None or an iterable of Response, dict
43 |         # or Item objects.
44 |         pass
45 | 
46 |     def process_start_requests(start_requests, spider):
47 |         # Called with the start requests of the spider, and works
48 |         # similarly to the process_spider_output() method, except
49 |         # that it doesn’t have a response associated.
50 | 
51 |         # Must return only requests (not items).
52 |         for r in start_requests:
53 |             yield r
54 | 
55 |     def spider_opened(self, spider):
56 |         spider.logger.info('Spider opened: %s' % spider.name)
57 | 


--------------------------------------------------------------------------------
/Scrapy 爬虫框架/biquge/biquge/pipelines.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | # Define your item pipelines here
 4 | #
 5 | # Don't forget to add your pipeline to the ITEM_PIPELINES setting
 6 | # See: http://doc.scrapy.org/en/latest/topics/item-pipeline.html
 7 | import pymysql
 8 | 
 9 | 
10 | class BiqugePipeline(object):
11 |     def process_item(self, item, spider):
12 |         '''
13 |         将爬到的小数写入数据库
14 |         '''
15 | 
16 |         # 首先从items里取出数据
17 |         name = item['bookname']
18 |         order_id = item['order_id']
19 |         body = item['body']
20 |         title = item['title']
21 | 
22 |         # 与本地数据库建立联系
23 |         # 和本地的scrapyDB数据库建立连接
24 |         connection = pymysql.connect(
25 |             host='localhost',  # 连接的是本地数据库
26 |             user='root',        # 自己的mysql用户名
27 |             passwd='********',  # 自己的密码
28 |             db='bqgxiaoshuo',      # 数据库的名字
29 |             charset='utf8mb4',     # 默认的编码方式：
30 |             cursorclass=pymysql.cursors.DictCursor)
31 | 
32 |         try:
33 |             with connection.cursor() as cursor:
34 |                 # 数据库表的sql
35 |                 sql1 = 'Create Table If Not Exists %s(id int,zjm varchar(20),body text)' % name
36 |                 # 单章小说的写入
37 |                 sql = 'Insert into %s values (%d ,\'%s\',\'%s\')' % (
38 |                     name, order_id, title, body)
39 |                 cursor.execute(sql1)
40 |                 cursor.execute(sql)
41 | 
42 |             # 提交本次插入的记录
43 |             connection.commit()
44 |         finally:
45 |             # 关闭连接
46 |             connection.close()
47 |             return item
48 | 


--------------------------------------------------------------------------------
/Scrapy 爬虫框架/biquge/biquge/spiders/__init__.py:
--------------------------------------------------------------------------------
1 | # This package will contain the spiders of your Scrapy project
2 | #
3 | # Please refer to the documentation for information on how to create and manage
4 | # your spiders.
5 | 


--------------------------------------------------------------------------------
/Scrapy 爬虫框架/biquge/biquge/spiders/sjzh.py:
--------------------------------------------------------------------------------
 1 | '''
 2 | 实现了中文向阿拉伯数字转换
 3 | 用于从小说章节名提取id来排序
 4 | '''
 5 | 
 6 | 
 7 | 
 8 | chs_arabic_map = {'零': 0, '一': 1, '二': 2, '三': 3, '四': 4,
 9 |                   '五': 5, '六': 6, '七': 7, '八': 8, '九': 9,
10 |                   '十': 10, '百': 100, '千': 10 ** 3, '万': 10 ** 4,
11 |                   '〇': 0, '壹': 1, '贰': 2, '叁': 3, '肆': 4,
12 |                   '伍': 5, '陆': 6, '柒': 7, '捌': 8, '玖': 9,
13 |                   '拾': 10, '佰': 100, '仟': 10 ** 3, '萬': 10 ** 4,
14 |                   '亿': 10 ** 8, '億': 10 ** 8, '幺': 1,
15 |                   '0': 0, '1': 1, '2': 2, '3': 3, '4': 4, '5': 5,
16 |                   '7': 7, '8': 8, '9': 9}
17 | 
18 | num_list = ['1','2','4','5','6','7','8','9','0','一','二','三','四','五','六','七','八','九','十','零','千','百',]
19 | 
20 | def get_tit_num(title):
21 |     result =''
22 |     for char in title:
23 |         if char in num_list:
24 |             result+=char
25 |     return result
26 | 
27 | 
28 | def Cn2An(chinese_digits):
29 | 
30 |     result = 0
31 |     tmp = 0
32 |     hnd_mln = 0
33 |     for count in range(len(chinese_digits)):
34 |         curr_char = chinese_digits[count]
35 |         curr_digit = chs_arabic_map[curr_char]
36 |         # meet 「亿」 or 「億」
37 |         if curr_digit == 10 ** 8:
38 |             result = result + tmp
39 |             result = result * curr_digit
40 |             # get result before 「亿」 and store it into hnd_mln
41 |             # reset `result`
42 |             hnd_mln = hnd_mln * 10 ** 8 + result
43 |             result = 0
44 |             tmp = 0
45 |         # meet 「万」 or 「萬」
46 |         elif curr_digit == 10 ** 4:
47 |             result = result + tmp
48 |             result = result * curr_digit
49 |             tmp = 0
50 |         # meet 「十」, 「百」, 「千」 or their traditional version
51 |         elif curr_digit >= 10:
52 |             tmp = 1 if tmp == 0 else tmp
53 |             result = result + curr_digit * tmp
54 |             tmp = 0
55 |         # meet single digit
56 |         elif curr_digit is not None:
57 |             tmp = tmp * 10 + curr_digit
58 |         else:
59 |             return result
60 |     result = result + tmp
61 |     result = result + hnd_mln
62 |     return result
63 |     
64 | 
65 | # test
66 | print (Cn2An(get_tit_num('第一千三百九十一章 你妹妹被我咬了！')))


--------------------------------------------------------------------------------
/Scrapy 爬虫框架/biquge/biquge/spiders/xsphspider.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | import scrapy
 3 | from biquge.items import BiqugeItem
 4 | from .sjzh import Cn2An,get_tit_num
 5 | 
 6 | 
 7 | class XsphspiderSpider(scrapy.Spider):
 8 |     name = "xsphspider"
 9 |     allowed_domains = ["qu.la"]
10 |     start_urls = ['http://www.qu.la/paihangbang/']
11 |     novel_list = []
12 | 
13 |     def parse(self, response):
14 | 
15 |         # 找到各类小说排行榜名单
16 |         books = response.xpath('.//div[@class="index_toplist mright mbottom"]')
17 | 
18 |         # 找到每一类小说排行榜的每一本小说的下载链接
19 |         for book in books:
20 |             links = book.xpath('.//div[2]/div[2]/ul/li')
21 |             for link in links:
22 |                 url = 'http://www.qu.la' + \
23 |                     link.xpath('.//a/@href').extract()[0]
24 |                 self.novel_list.append(url)
25 | 
26 |         # 简单的去重
27 |         self.novel_list = list(set(self.novel_list))
28 | 
29 |         for novel in self.novel_list:
30 |            yield scrapy.Request(novel, callback=self.get_page_url)
31 | 
32 |     def get_page_url(self, response):
33 |         '''
34 |         找到章节链接
35 |         '''
36 |         page_urls = response.xpath('.//dd/a/@href').extract()
37 | 
38 |         for url in page_urls:
39 |            yield scrapy.Request('http://www.qu.la' + url,callback=self.get_text)
40 | 
41 |     def get_text(self, response):
42 |         '''
43 |         找到每一章小说的标题和正文
44 |         并自动生成id字段，用于表的排序
45 |         '''
46 |         item = BiqugeItem()
47 | 
48 |         # 小说名
49 |         item['bookname'] = response.xpath(
50 |             './/div[@class="con_top"]/a[2]/text()').extract()[0]
51 |         
52 |         # 章节名 ,将title单独找出来，为了提取章节中的数字
53 |         title = response.xpath('.//h1/text()').extract()[0]
54 |         item['title'] = title
55 |         
56 |         #  找到用于排序的id值
57 |         item['order_id'] = Cn2An(get_tit_num(title))
58 |         
59 |         # 正文部分需要特殊处理
60 |         body = response.xpath('.//div[@id="content"]/text()').extract()
61 |         
62 |         # 将抓到的body转换成字符串，接着去掉\t之类的排版符号，
63 |         text = ''.join(body).strip().replace('\u3000', '')
64 |         
65 |         item['body'] = text
66 | 
67 |         return item
68 | 


--------------------------------------------------------------------------------
/Scrapy 爬虫框架/biquge/scrapy.cfg:
--------------------------------------------------------------------------------
 1 | # Automatically created by: scrapy startproject
 2 | #
 3 | # For more information about the [deploy] section see:
 4 | # https://scrapyd.readthedocs.org/en/latest/deploy.html
 5 | 
 6 | [settings]
 7 | default = biquge.settings
 8 | 
 9 | [deploy]
10 | #url = http://localhost:6800/
11 | project = biquge
12 | 


--------------------------------------------------------------------------------
/Scrapy 爬虫框架/demo.xml:
--------------------------------------------------------------------------------
 1 | <html>
 2 |     <body>
 3 |         <class>
 4 |             <name>王尼玛</name>
 5 |             <sex>男</sex>
 6 |             <age>80</age>
 7 |             <favouite>开车</favouite>
 8 |         </class>
 9 |         <class>
10 |             <name>陈一发</name>
11 |             <sex>母</sex>
12 |             <age>28</age>
13 |             <favouite>开che</favouite>
14 |         </class>
15 |         <class>
16 |             <name>狗贼叔叔</name>
17 |             <sex>公</sex>
18 |             <age>18</age>
19 |             <favouite>土豪战</favouite>
20 |         </class>
21 |     </body>
22 | </html>


--------------------------------------------------------------------------------
/Scrapy 爬虫框架/mzitu/mzitu/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Ehco1996/Python-crawler/e89ef774653965c6b045a2c4fd101846ee8c62ef/Scrapy 爬虫框架/mzitu/mzitu/__init__.py


--------------------------------------------------------------------------------
/Scrapy 爬虫框架/mzitu/mzitu/items.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | # Define here the models for your scraped items
 4 | #
 5 | # See documentation in:
 6 | # http://doc.scrapy.org/en/latest/topics/items.html
 7 | 
 8 | import scrapy
 9 | 
10 | 
11 | class MzituItem(scrapy.Item):
12 |     # define the fields for your item here like:
13 |     # name = scrapy.Field()
14 |     img_urls = scrapy.Field()
15 |     name = scrapy.Field()
16 |     
17 | 


--------------------------------------------------------------------------------
/Scrapy 爬虫框架/mzitu/mzitu/middlewares.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | # Define here the models for your spider middleware
 4 | #
 5 | # See documentation in:
 6 | # http://doc.scrapy.org/en/latest/topics/spider-middleware.html
 7 | 
 8 | from scrapy import signals
 9 | 
10 | 
11 | class MzituSpiderMiddleware(object):
12 |     # Not all methods need to be defined. If a method is not defined,
13 |     # scrapy acts as if the spider middleware does not modify the
14 |     # passed objects.
15 | 
16 |     @classmethod
17 |     def from_crawler(cls, crawler):
18 |         # This method is used by Scrapy to create your spiders.
19 |         s = cls()
20 |         crawler.signals.connect(s.spider_opened, signal=signals.spider_opened)
21 |         return s
22 | 
23 |     def process_spider_input(response, spider):
24 |         # Called for each response that goes through the spider
25 |         # middleware and into the spider.
26 | 
27 |         # Should return None or raise an exception.
28 |         return None
29 | 
30 |     def process_spider_output(response, result, spider):
31 |         # Called with the results returned from the Spider, after
32 |         # it has processed the response.
33 | 
34 |         # Must return an iterable of Request, dict or Item objects.
35 |         for i in result:
36 |             yield i
37 | 
38 |     def process_spider_exception(response, exception, spider):
39 |         # Called when a spider or process_spider_input() method
40 |         # (from other spider middleware) raises an exception.
41 | 
42 |         # Should return either None or an iterable of Response, dict
43 |         # or Item objects.
44 |         pass
45 | 
46 |     def process_start_requests(start_requests, spider):
47 |         # Called with the start requests of the spider, and works
48 |         # similarly to the process_spider_output() method, except
49 |         # that it doesn’t have a response associated.
50 | 
51 |         # Must return only requests (not items).
52 |         for r in start_requests:
53 |             yield r
54 | 
55 |     def spider_opened(self, spider):
56 |         spider.logger.info('Spider opened: %s' % spider.name)
57 | 


--------------------------------------------------------------------------------
/Scrapy 爬虫框架/mzitu/mzitu/pipelines.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | # Define your item pipelines here
 4 | #
 5 | # Don't forget to add your pipeline to the ITEM_PIPELINES setting
 6 | # See: http://doc.scrapy.org/en/latest/topics/item-pipeline.html
 7 | import os
 8 | import requests
 9 | 
10 | class MzituPipeline(object):
11 |     def process_item(self, item, spider):
12 |         
13 |         base_dir = '/Users/ehco/Desktop/mezitu/'
14 |         # 防止目录不存在！
15 |         if not os.path.exists(base_dir+item['name']):
16 |             os.makedirs(base_dir+item['name'])
17 |         
18 |         # 生成图片下载列表：
19 |         open(base_dir+item['name']+'/'+item['img_urls'][-6:],'wb').write(requests.get(item['img_urls']).content)        
20 |         return item
21 | 


--------------------------------------------------------------------------------
/Scrapy 爬虫框架/mzitu/mzitu/spiders/__init__.py:
--------------------------------------------------------------------------------
1 | # This package will contain the spiders of your Scrapy project
2 | #
3 | # Please refer to the documentation for information on how to create and manage
4 | # your spiders.
5 | 


--------------------------------------------------------------------------------
/Scrapy 爬虫框架/mzitu/mzitu/spiders/mezitu.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | import scrapy
 3 | from mzitu.items import MzituItem
 4 | 
 5 | 
 6 | class MezituSpider(scrapy.Spider):
 7 |     name = "mezitu"
 8 |     allowed_domains = ["mzitu.com"]
 9 |     start_urls = []
10 |     
11 |     for i in range(2,144):
12 |         start_urls.append('http://www.mzitu.com/page/'+str(i))
13 | 
14 |     
15 |     
16 |     def parse(self, response):
17 | 
18 |         # 找到首页的每个妹子图的li列表
19 |         main = response.xpath('//ul[@id="pins"]/li')
20 | 
21 |         for li in main:
22 |             # 找到每个妹子图包的baseurl
23 |             mzurl = li.xpath('./a/@href').extract()[0]
24 |             # 找到每个妹子图的名字，传回去做dirname
25 |             name = li.xpath('.//img/@alt').extract()[0]
26 | 
27 |             # 进入套图页面，抓取每一张图
28 |             yield scrapy.Request(mzurl,meta={'name':name}, callback=self.get_page_url)
29 | 
30 |     def get_page_url(self, response):
31 |         '''
32 |         找到套图的最大页码，并且生成每一页的url连接 page_url
33 |         '''
34 |         max_num = response.xpath(
35 |             '//div[@class="pagenavi"]/a[last()-1]/span/text()').extract()[0]
36 |         for i in range(2, int(max_num) +1):
37 |             page_url = response.url + '/' + str(i)
38 |             # 这是一个生成器，用来回调img_url函数来抓套图的url链接
39 |             yield scrapy.Request(page_url, meta={'name': response.meta['name']}, callback=self.get_img_url)
40 | 
41 |     def get_img_url(self, response):
42 |         '''
43 |         从page_url的response里
44 |         找到图片的下载连接
45 |         '''
46 |         item = MzituItem()
47 |         item['name'] = response.meta['name']
48 | 
49 |         # 找到图片的下载地址，注意有可能一页有两张图
50 |         pic = response.xpath('//div[@class="main-image"]//img/@src').extract()
51 | 
52 |         for url in pic:
53 |             item['img_urls'] = url
54 |             yield item
55 | 


--------------------------------------------------------------------------------
/Scrapy 爬虫框架/mzitu/scrapy.cfg:
--------------------------------------------------------------------------------
 1 | # Automatically created by: scrapy startproject
 2 | #
 3 | # For more information about the [deploy] section see:
 4 | # https://scrapyd.readthedocs.org/en/latest/deploy.html
 5 | 
 6 | [settings]
 7 | default = mzitu.settings
 8 | 
 9 | [deploy]
10 | #url = http://localhost:6800/
11 | project = mzitu
12 | 


--------------------------------------------------------------------------------
/Scrapy 爬虫框架/proxy/proxy/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Ehco1996/Python-crawler/e89ef774653965c6b045a2c4fd101846ee8c62ef/Scrapy 爬虫框架/proxy/proxy/__init__.py


--------------------------------------------------------------------------------
/Scrapy 爬虫框架/proxy/proxy/items.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | # Define here the models for your scraped items
 4 | #
 5 | # See documentation in:
 6 | # http://doc.scrapy.org/en/latest/topics/items.html
 7 | 
 8 | import scrapy
 9 | 
10 | 
11 | class ProxyItem(scrapy.Item):
12 |     # define the fields for your item here like:
13 |     # name = scrapy.Field()
14 |     
15 |     #这个爬虫十分简单，我们要ip+端口，所以一个字段就够用了！
16 |     addr = scrapy.Field()
17 | 


--------------------------------------------------------------------------------
/Scrapy 爬虫框架/proxy/proxy/middlewares.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | # Define here the models for your spider middleware
 4 | #
 5 | # See documentation in:
 6 | # http://doc.scrapy.org/en/latest/topics/spider-middleware.html
 7 | 
 8 | from scrapy import signals
 9 | 
10 | 
11 | class ProxySpiderMiddleware(object):
12 |     # Not all methods need to be defined. If a method is not defined,
13 |     # scrapy acts as if the spider middleware does not modify the
14 |     # passed objects.
15 | 
16 |     @classmethod
17 |     def from_crawler(cls, crawler):
18 |         # This method is used by Scrapy to create your spiders.
19 |         s = cls()
20 |         crawler.signals.connect(s.spider_opened, signal=signals.spider_opened)
21 |         return s
22 | 
23 |     def process_spider_input(response, spider):
24 |         # Called for each response that goes through the spider
25 |         # middleware and into the spider.
26 | 
27 |         # Should return None or raise an exception.
28 |         return None
29 | 
30 |     def process_spider_output(response, result, spider):
31 |         # Called with the results returned from the Spider, after
32 |         # it has processed the response.
33 | 
34 |         # Must return an iterable of Request, dict or Item objects.
35 |         for i in result:
36 |             yield i
37 | 
38 |     def process_spider_exception(response, exception, spider):
39 |         # Called when a spider or process_spider_input() method
40 |         # (from other spider middleware) raises an exception.
41 | 
42 |         # Should return either None or an iterable of Response, dict
43 |         # or Item objects.
44 |         pass
45 | 
46 |     def process_start_requests(start_requests, spider):
47 |         # Called with the start requests of the spider, and works
48 |         # similarly to the process_spider_output() method, except
49 |         # that it doesn’t have a response associated.
50 | 
51 |         # Must return only requests (not items).
52 |         for r in start_requests:
53 |             yield r
54 | 
55 |     def spider_opened(self, spider):
56 |         spider.logger.info('Spider opened: %s' % spider.name)
57 | 


--------------------------------------------------------------------------------
/Scrapy 爬虫框架/proxy/proxy/pipelines.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | # Define your item pipelines here
 4 | #
 5 | # Don't forget to add your pipeline to the ITEM_PIPELINES setting
 6 | # See: http://doc.scrapy.org/en/latest/topics/item-pipeline.html
 7 | 
 8 | 
 9 | class ProxyPipeline(object):
10 |     '''
11 |     这里我们通过对spider name的判断
12 |     来分清楚item是哪一个spider传来的
13 |     从而做出不同的处理方式
14 |     '''
15 |     
16 |     def process_item(self, item, spider):
17 |         if spider.name == 'dxdlspider':
18 |             content = item['addr'].split('\r\n')
19 |             for line in content:
20 |                 open('/Users/ehco/Desktop/result/dx_proxy.txt','a').write(line+'\n')
21 | 
22 | 
23 |         elif spider.name=='kdlspider':
24 |             #我们直接将传来的addr写入文本
25 |             open('/Users/ehco/Desktop/result/kdl_proxy.txt','a').write(item['addr']+'\n')
26 | 
27 |         return item
28 | 


--------------------------------------------------------------------------------
/Scrapy 爬虫框架/proxy/proxy/spiders/__init__.py:
--------------------------------------------------------------------------------
1 | # This package will contain the spiders of your Scrapy project
2 | #
3 | # Please refer to the documentation for information on how to create and manage
4 | # your spiders.
5 | 


--------------------------------------------------------------------------------
/Scrapy 爬虫框架/proxy/proxy/spiders/dxdlspider.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | import scrapy
 3 | from proxy.items import ProxyItem
 4 | 
 5 | class DxdlspiderSpider(scrapy.Spider):
 6 |     name = "dxdlspider"
 7 |     allowed_domains = ["xicidaili.com"]
 8 |     start_urls = ['http://api.xicidaili.com/free2016.txt']
 9 | 
10 |     def parse(self, response):
11 |         item = ProxyItem()
12 |         #因为直接调用网站的api，本身get下来的就是一个text文本，
13 |         #我们直接把文本传给item再交给pipeline处理就行
14 |         item['addr'] = response.text
15 |         return item
16 |         
17 | 


--------------------------------------------------------------------------------
/Scrapy 爬虫框架/proxy/proxy/spiders/kdlspider.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | import scrapy
 3 | from proxy.items import ProxyItem
 4 | 
 5 | 
 6 | class KdlspiderSpider(scrapy.Spider):
 7 |     name = "kdlspider"
 8 |     allowed_domains = ["kuaidaili.com"]
 9 |     start_urls = []
10 | 
11 |     # 通过简单的循环，来生成爬取页面的列表
12 |     # 这里我们爬1~5页
13 |     for i in range(1, 6):
14 |         start_urls.append('http://www.kuaidaili.com/free/inha/' + str(i) + '/')
15 | 
16 |     def parse(self, response):
17 |         # 我们先实例化一个item
18 |         item = ProxyItem()
19 | 
20 |         # 通过Xpath找到每条代理的内容
21 |         mian = response.xpath(
22 |             '//table[@class="table table-bordered table-striped"]/tbody/tr')
23 | 
24 |         for li in mian:
25 |             #找到ip地址
26 |             ip = li.xpath('td/text()').extract()[0]
27 |             #找到端口：
28 |             port =li.xpath('td/text()').extract()[1]
29 |             #将两者连接，并返回给item处理
30 |             item['addr'] = ip+':'+port
31 |             yield item


--------------------------------------------------------------------------------
/Scrapy 爬虫框架/proxy/scrapy.cfg:
--------------------------------------------------------------------------------
 1 | # Automatically created by: scrapy startproject
 2 | #
 3 | # For more information about the [deploy] section see:
 4 | # https://scrapyd.readthedocs.org/en/latest/deploy.html
 5 | 
 6 | [settings]
 7 | default = proxy.settings
 8 | 
 9 | [deploy]
10 | #url = http://localhost:6800/
11 | project = proxy
12 | 


--------------------------------------------------------------------------------
/Scrapy 爬虫框架/test_proxy.py:
--------------------------------------------------------------------------------
 1 | '''
 2 | 我们通过这个小脚本来判断
 3 | 抓取到的ip代理是否可以用！
 4 | 
 5 | 还是通过我最熟悉的request库来实现
 6 | 不过这里稍微加一下我也不太熟悉的多线程
 7 | '''
 8 | 
 9 | import requests
10 | 
11 | # 引入这个库来获得map函数的并发版本
12 | from multiprocessing.dummy import Pool as ThreadPool
13 | 
14 | # 定义全局变量
15 | dir_path = '/Users/ehco/Desktop/result/'
16 | alive_ip = []
17 | 
18 | # 使得map并发！实例化pool对象
19 | pool = ThreadPool()
20 | # 设置并发数量！
21 | pool = ThreadPool(20)
22 | 
23 | 
24 | def test_alive(proxy):
25 |     '''
26 |     一个简单的函数，
27 |     来判断通过代理访问百度
28 |     筛选通过的代理保存到alive_ip中
29 |     '''
30 |     global alive_ip
31 |     proxies = {'http': proxy}
32 |     print('正在测试：{}'.format(proxies))
33 |     try:
34 |         r = requests.get('http://www.baidu.com', proxies=proxies, timeout=3)
35 |         if r.status_code == 200:
36 |             print('该代理：{}成功存活'.format(proxy))
37 |             alive_ip.append(proxy)
38 |     except:
39 |         print('该代理{}失效！'.format(proxies))
40 | 
41 | 
42 | def Out_file(alive_ip=[]):
43 |     global dir_path
44 |     with open(dir_path + 'alive_ip.txt', 'a+') as f:
45 |         for ip in alive_ip:
46 |             f.write(ip + '\n')
47 |         print('所有存活ip都已经写入文件！')
48 | 
49 | 
50 | def test(filename='blank.txt'):
51 |     # 循环处理每行文件
52 |     with open(dir_path + filename, 'r') as f:
53 |         lines = f.readlines()
54 |         # 我们去掉lines每一项后面的\n\r之类的空格
55 |         # 生成一个新的列表！
56 |         proxys = list(map(lambda x: x.strip(), [y for y in lines]))
57 | 
58 |         #一行代码解决多线程！
59 |         pool.map(test_alive,proxys)
60 |         pool.close()
61 |         pool.join()
62 |        
63 |     # 将存活的ip写入文件
64 |     Out_file(alive_ip)
65 | 
66 | 
67 | #调用函数！
68 | test('kdl_proxy.txt')
69 | 


--------------------------------------------------------------------------------
/Scrapy 爬虫框架/weather/scrapy.cfg:
--------------------------------------------------------------------------------
 1 | # Automatically created by: scrapy startproject
 2 | #
 3 | # For more information about the [deploy] section see:
 4 | # https://scrapyd.readthedocs.org/en/latest/deploy.html
 5 | 
 6 | [settings]
 7 | default = weather.settings
 8 | 
 9 | [deploy]
10 | #url = http://localhost:6800/
11 | project = weather
12 | 


--------------------------------------------------------------------------------
/Scrapy 爬虫框架/weather/weather/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Ehco1996/Python-crawler/e89ef774653965c6b045a2c4fd101846ee8c62ef/Scrapy 爬虫框架/weather/weather/__init__.py


--------------------------------------------------------------------------------
/Scrapy 爬虫框架/weather/weather/items.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | # Define here the models for your scraped items
 4 | #
 5 | # See documentation in:
 6 | # http://doc.scrapy.org/en/latest/topics/items.html
 7 | 
 8 | import scrapy
 9 | 
10 | 
11 | class WeatherItem(scrapy.Item):
12 |     # define the fields for your item here like:
13 |     # name = scrapy.Field()
14 |     date = scrapy.Field()
15 |     week = scrapy.Field()
16 |     img = scrapy.Field()
17 |     temperature = scrapy.Field()
18 |     weather = scrapy.Field()
19 |     wind = scrapy.Field()
20 |     
21 | 


--------------------------------------------------------------------------------
/Scrapy 爬虫框架/weather/weather/middlewares.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | # Define here the models for your spider middleware
 4 | #
 5 | # See documentation in:
 6 | # http://doc.scrapy.org/en/latest/topics/spider-middleware.html
 7 | 
 8 | from scrapy import signals
 9 | 
10 | 
11 | class WeatherSpiderMiddleware(object):
12 |     # Not all methods need to be defined. If a method is not defined,
13 |     # scrapy acts as if the spider middleware does not modify the
14 |     # passed objects.
15 | 
16 |     @classmethod
17 |     def from_crawler(cls, crawler):
18 |         # This method is used by Scrapy to create your spiders.
19 |         s = cls()
20 |         crawler.signals.connect(s.spider_opened, signal=signals.spider_opened)
21 |         return s
22 | 
23 |     def process_spider_input(response, spider):
24 |         # Called for each response that goes through the spider
25 |         # middleware and into the spider.
26 | 
27 |         # Should return None or raise an exception.
28 |         return None
29 | 
30 |     def process_spider_output(response, result, spider):
31 |         # Called with the results returned from the Spider, after
32 |         # it has processed the response.
33 | 
34 |         # Must return an iterable of Request, dict or Item objects.
35 |         for i in result:
36 |             yield i
37 | 
38 |     def process_spider_exception(response, exception, spider):
39 |         # Called when a spider or process_spider_input() method
40 |         # (from other spider middleware) raises an exception.
41 | 
42 |         # Should return either None or an iterable of Response, dict
43 |         # or Item objects.
44 |         pass
45 | 
46 |     def process_start_requests(start_requests, spider):
47 |         # Called with the start requests of the spider, and works
48 |         # similarly to the process_spider_output() method, except
49 |         # that it doesn’t have a response associated.
50 | 
51 |         # Must return only requests (not items).
52 |         for r in start_requests:
53 |             yield r
54 | 
55 |     def spider_opened(self, spider):
56 |         spider.logger.info('Spider opened: %s' % spider.name)
57 | 


--------------------------------------------------------------------------------
/Scrapy 爬虫框架/weather/weather/spiders/SZtianqi.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | import scrapy
 3 | from weather.items import WeatherItem
 4 | 
 5 | 
 6 | class SztianqiSpider(scrapy.Spider):
 7 |     name = "SZtianqi"
 8 |     # 我们修改一下host，使得Scrapy可以爬取除了苏州之外的天气
 9 |     allowed_domains = ["tianqi.com"]
10 | 
11 |     # 建立需要爬取信息的url列表
12 |     start_urls = []
13 | 
14 |     # 需要爬的城市名称
15 |     citys = ['nanjing', 'suzhou', 'shanghai']
16 | 
17 |     # 用一个很简答的循环来生成需要爬的链接：
18 |     for city in citys:
19 |         start_urls.append('http://' + city + '.tianqi.com')
20 | 
21 |     def parse(self, response):
22 |         '''
23 |         筛选信息的函数：
24 |         date = 今日日期
25 |         week = 星期几
26 |         img = 表示天气的图标
27 |         temperature = 当天的温度
28 |         weather = 当天的天气
29 |         wind = 当天的风向
30 |         '''
31 | 
32 |         # 先建立一个列表，用来保存每天的信息
33 |         items = []
34 | 
35 |         # 找到包裹着每天天气信息的div
36 |         sixday = response.xpath('//div[@class="tqshow1"]')
37 | 
38 |         # 循环筛选出每天的信息：
39 |         for day in sixday:
40 |             # 先申请一个weatheritem 的类型来保存结果
41 |             item = WeatherItem()
42 | 
43 |             # 观察网页，知道h3标签下的不单单是一行str，我们用trick的方式将它连接起来
44 |             date = ''
45 |             for datetitle in day.xpath('./h3//text()').extract():
46 |                 date += datetitle
47 |             
48 |             item['date'] = date
49 | 
50 |             item['week'] = day.xpath('./p//text()').extract()[0]
51 |             item['img'] = day.xpath(
52 |                 './ul/li[@class="tqpng"]/img/@src').extract()[0]
53 |             tq = day.xpath('./ul/li[2]//text()').extract()
54 |             # 我们用第二种取巧的方式，将tq里找到的str连接
55 |             item['temperature'] = ''.join(tq)
56 |             item['weather'] = day.xpath('./ul/li[3]/text()').extract()[0]
57 |             item['wind'] = day.xpath('./ul/li[4]/text()').extract()[0]
58 |             items.append(item)
59 |         return items
60 | 


--------------------------------------------------------------------------------
/Scrapy 爬虫框架/weather/weather/spiders/__init__.py:
--------------------------------------------------------------------------------
1 | # This package will contain the spiders of your Scrapy project
2 | #
3 | # Please refer to the documentation for information on how to create and manage
4 | # your spiders.
5 | 


--------------------------------------------------------------------------------
/Scrapy 爬虫框架/xiubai/scrapy.cfg:
--------------------------------------------------------------------------------
 1 | # Automatically created by: scrapy startproject
 2 | #
 3 | # For more information about the [deploy] section see:
 4 | # https://scrapyd.readthedocs.org/en/latest/deploy.html
 5 | 
 6 | [settings]
 7 | default = xiubai.settings
 8 | 
 9 | [deploy]
10 | #url = http://localhost:6800/
11 | project = xiubai
12 | 


--------------------------------------------------------------------------------
/Scrapy 爬虫框架/xiubai/xiubai/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Ehco1996/Python-crawler/e89ef774653965c6b045a2c4fd101846ee8c62ef/Scrapy 爬虫框架/xiubai/xiubai/__init__.py


--------------------------------------------------------------------------------
/Scrapy 爬虫框架/xiubai/xiubai/items.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | # Define here the models for your scraped items
 4 | #
 5 | # See documentation in:
 6 | # http://doc.scrapy.org/en/latest/topics/items.html
 7 | 
 8 | import scrapy
 9 | 
10 | 
11 | class XiubaiItem(scrapy.Item):
12 |     # define the fields for your item here like:
13 |     # name = scrapy.Field()
14 |     
15 |     author = scrapy.Field()
16 |     body = scrapy.Field()
17 |     funNum = scrapy.Field()
18 |     comNum = scrapy.Field()
19 |     


--------------------------------------------------------------------------------
/Scrapy 爬虫框架/xiubai/xiubai/middlewares.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | # Define here the models for your spider middleware
 4 | #
 5 | # See documentation in:
 6 | # http://doc.scrapy.org/en/latest/topics/spider-middleware.html
 7 | 
 8 | from scrapy import signals
 9 | 
10 | 
11 | class XiubaiSpiderMiddleware(object):
12 |     # Not all methods need to be defined. If a method is not defined,
13 |     # scrapy acts as if the spider middleware does not modify the
14 |     # passed objects.
15 | 
16 |     @classmethod
17 |     def from_crawler(cls, crawler):
18 |         # This method is used by Scrapy to create your spiders.
19 |         s = cls()
20 |         crawler.signals.connect(s.spider_opened, signal=signals.spider_opened)
21 |         return s
22 | 
23 |     def process_spider_input(response, spider):
24 |         # Called for each response that goes through the spider
25 |         # middleware and into the spider.
26 | 
27 |         # Should return None or raise an exception.
28 |         return None
29 | 
30 |     def process_spider_output(response, result, spider):
31 |         # Called with the results returned from the Spider, after
32 |         # it has processed the response.
33 | 
34 |         # Must return an iterable of Request, dict or Item objects.
35 |         for i in result:
36 |             yield i
37 | 
38 |     def process_spider_exception(response, exception, spider):
39 |         # Called when a spider or process_spider_input() method
40 |         # (from other spider middleware) raises an exception.
41 | 
42 |         # Should return either None or an iterable of Response, dict
43 |         # or Item objects.
44 |         pass
45 | 
46 |     def process_start_requests(start_requests, spider):
47 |         # Called with the start requests of the spider, and works
48 |         # similarly to the process_spider_output() method, except
49 |         # that it doesn’t have a response associated.
50 | 
51 |         # Must return only requests (not items).
52 |         for r in start_requests:
53 |             yield r
54 | 
55 |     def spider_opened(self, spider):
56 |         spider.logger.info('Spider opened: %s' % spider.name)
57 | 


--------------------------------------------------------------------------------
/Scrapy 爬虫框架/xiubai/xiubai/middlewares/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Ehco1996/Python-crawler/e89ef774653965c6b045a2c4fd101846ee8c62ef/Scrapy 爬虫框架/xiubai/xiubai/middlewares/__init__.py


--------------------------------------------------------------------------------
/Scrapy 爬虫框架/xiubai/xiubai/middlewares/coustomProxy.py:
--------------------------------------------------------------------------------
 1 | '''
 2 | 从本地文件proxy.py中
 3 | 读取可以用的代理列表
 4 | 并从中随机选择一个代理
 5 | 供给spider使用
 6 | '''
 7 | 
 8 | 
 9 | from xiubai.middlewares.proxy import proxies
10 | import random
11 | 
12 | class RandomProxy(object):
13 |     def process_request(self,request,spider):
14 |         # 从文件中随机选择一个代理
15 |         proxy = random.choice(proxies)
16 | 
17 |         request.meta['proxy'] = 'http://{}'.format(proxy)


--------------------------------------------------------------------------------
/Scrapy 爬虫框架/xiubai/xiubai/middlewares/coustomUserAgent.py:
--------------------------------------------------------------------------------
 1 | '''
 2 | 自定义scrapy框架的
 3 | user-agent头
 4 | 从一个被良好维护的user-agent列表里
 5 | 随机筛选合适的user-agent 
 6 | 防止封锁
 7 | '''
 8 | from scrapy.contrib.downloadermiddleware.useragent import UserAgentMiddleware
 9 | 
10 | import random
11 | 
12 | 
13 | 
14 | #一个不容易被封锁的user-agent列表
15 | agents = ['Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; Trident/5.0;',
16 |               'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.6; rv,2.0.1) Gecko/20100101 Firefox/4.0.1',
17 |               'Opera/9.80 (Macintosh; Intel Mac OS X 10.6.8; U; en) Presto/2.8.131 Version/11.11',
18 |               'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_0) AppleWebKit/535.11 (KHTML, like Gecko) Chrome/17.0.963.56 Safari/535.11',
19 |               'Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; 360SE)']
20 | 
21 | class RandomUserAgent(UserAgentMiddleware):
22 |     def process_request(self,request,spider):
23 |         '''
24 |         定义下载中间件，
25 |         必须要写这个函数，
26 |         这是scrapy数据流转的一个环节
27 |         具体可以看文档:
28 |         http://scrapy-chs.readthedocs.io/zh_CN/0.24/topics/downloader-middleware.html
29 |         '''
30 |         ua = random.choice(agents)
31 |         request.headers.setdefault('User-agent',ua)


--------------------------------------------------------------------------------
/Scrapy 爬虫框架/xiubai/xiubai/middlewares/proxy.py:
--------------------------------------------------------------------------------
 1 | '''
 2 | 通过爬取可用的免费代理，
 3 | 进行测试可用度之后
 4 | 筛选获得的代理
 5 | 
 6 | 将静态的资源文件写在功能文件之外
 7 | 方便及时更新维护。
 8 | '''
 9 | 
10 | 
11 | # 可以自行添加更多代理
12 | 
13 | proxies = [
14 |     '89.36.215.72:1189',
15 |     '94.177.203.123:1189',
16 |     '110.73.11.227:8123',
17 |     '180.183.176.189:8080',
18 |     '109.62.247.81:8080',
19 | ]
20 | 


--------------------------------------------------------------------------------
/Scrapy 爬虫框架/xiubai/xiubai/pipelines.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | # Define your item pipelines here
 4 | #
 5 | # Don't forget to add your pipeline to the ITEM_PIPELINES setting
 6 | # See: http://doc.scrapy.org/en/latest/topics/item-pipeline.html
 7 | 
 8 | 
 9 | class XiubaiPipeline(object):
10 |     def process_item(self, item, spider):
11 | 
12 |         with open("/Users/ehco/Desktop/result/qiubai.txt",'a+') as f:
13 |             f.write('作者：{} \n{}\n点赞：{}\t评论数：{}\n\n'.format(
14 |                 item['author'], item["body"], item['funNum'], item["comNum"]))
15 | 


--------------------------------------------------------------------------------
/Scrapy 爬虫框架/xiubai/xiubai/spiders/__init__.py:
--------------------------------------------------------------------------------
1 | # This package will contain the spiders of your Scrapy project
2 | #
3 | # Please refer to the documentation for information on how to create and manage
4 | # your spiders.
5 | 


--------------------------------------------------------------------------------
/Scrapy 爬虫框架/xiubai/xiubai/spiders/hotspider.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | import scrapy
 3 | from xiubai.items import XiubaiItem
 4 | 
 5 | class HotspiderSpider(scrapy.Spider):
 6 |     name = "hotspider"
 7 |     allowed_domains = ["qiushibaike.com"]
 8 |     start_urls = []
 9 |     # 我们爬取35页的全部热门段子
10 |     for i in range(1,3):
11 |         start_urls.append('http://www.qiushibaike.com/8hr/page/'+str(i)+'/')
12 |     
13 |     
14 |     def parse(self, response):
15 |         item = XiubaiItem()
16 | 
17 |         # 找到热门段子主体
18 |         main = response.xpath('//div[@id="content-left"]/div')
19 | 
20 | 
21 |         for div in main:
22 |             #段子作者
23 |             item['author'] =div.xpath('.//h2/text()').extract()[0]
24 |             #段子主体： 
25 |             item['body'] = ''.join( div.xpath('a[@class="contentHerf"]/div/span[1]/text()').extract())
26 |             #段子footer
27 |             item['funNum']= div.xpath('.//span[@class="stats-vote"]/i/text()').extract()[0]
28 |             item['comNum']= div.xpath('.//span[@class="stats-comments"]/a/i/text()').extract()[0]
29 |             yield item
30 |         


--------------------------------------------------------------------------------
/Scrapy 爬虫框架/zimuku/scrapy.cfg:
--------------------------------------------------------------------------------
 1 | # Automatically created by: scrapy startproject
 2 | #
 3 | # For more information about the [deploy] section see:
 4 | # https://scrapyd.readthedocs.org/en/latest/deploy.html
 5 | 
 6 | [settings]
 7 | default = zimuku.settings
 8 | 
 9 | [deploy]
10 | #url = http://localhost:6800/
11 | project = zimuku
12 | 


--------------------------------------------------------------------------------
/Scrapy 爬虫框架/zimuku/zimuku/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Ehco1996/Python-crawler/e89ef774653965c6b045a2c4fd101846ee8c62ef/Scrapy 爬虫框架/zimuku/zimuku/__init__.py


--------------------------------------------------------------------------------
/Scrapy 爬虫框架/zimuku/zimuku/items.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | # Define here the models for your scraped items
 4 | #
 5 | # See documentation in:
 6 | # http://doc.scrapy.org/en/latest/topics/items.html
 7 | 
 8 | import scrapy
 9 | 
10 | 
11 | class ZimukuItem(scrapy.Item):
12 |     # define the fields for your item here like:
13 |     # name = scrapy.Field()
14 |     subname = scrapy.Field() #字母的名字
15 | 


--------------------------------------------------------------------------------
/Scrapy 爬虫框架/zimuku/zimuku/middlewares.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | # Define here the models for your spider middleware
 4 | #
 5 | # See documentation in:
 6 | # http://doc.scrapy.org/en/latest/topics/spider-middleware.html
 7 | 
 8 | from scrapy import signals
 9 | 
10 | 
11 | class ZimukuSpiderMiddleware(object):
12 |     # Not all methods need to be defined. If a method is not defined,
13 |     # scrapy acts as if the spider middleware does not modify the
14 |     # passed objects.
15 | 
16 |     @classmethod
17 |     def from_crawler(cls, crawler):
18 |         # This method is used by Scrapy to create your spiders.
19 |         s = cls()
20 |         crawler.signals.connect(s.spider_opened, signal=signals.spider_opened)
21 |         return s
22 | 
23 |     def process_spider_input(response, spider):
24 |         # Called for each response that goes through the spider
25 |         # middleware and into the spider.
26 | 
27 |         # Should return None or raise an exception.
28 |         return None
29 | 
30 |     def process_spider_output(response, result, spider):
31 |         # Called with the results returned from the Spider, after
32 |         # it has processed the response.
33 | 
34 |         # Must return an iterable of Request, dict or Item objects.
35 |         for i in result:
36 |             yield i
37 | 
38 |     def process_spider_exception(response, exception, spider):
39 |         # Called when a spider or process_spider_input() method
40 |         # (from other spider middleware) raises an exception.
41 | 
42 |         # Should return either None or an iterable of Response, dict
43 |         # or Item objects.
44 |         pass
45 | 
46 |     def process_start_requests(start_requests, spider):
47 |         # Called with the start requests of the spider, and works
48 |         # similarly to the process_spider_output() method, except
49 |         # that it doesn’t have a response associated.
50 | 
51 |         # Must return only requests (not items).
52 |         for r in start_requests:
53 |             yield r
54 | 
55 |     def spider_opened(self, spider):
56 |         spider.logger.info('Spider opened: %s' % spider.name)
57 | 


--------------------------------------------------------------------------------
/Scrapy 爬虫框架/zimuku/zimuku/pipelines.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | # Define your item pipelines here
 4 | #
 5 | # Don't forget to add your pipeline to the ITEM_PIPELINES setting
 6 | # See: http://doc.scrapy.org/en/latest/topics/item-pipeline.html
 7 | 
 8 | 
 9 | class ZimukuPipeline(object):
10 |     def process_item(self, item, spider):
11 | 
12 |         # 只要求简单的话，
13 |         # 我们把爬到的结果打印一下吧
14 |         print(item)
15 | 
16 |         return item
17 | 


--------------------------------------------------------------------------------
/Scrapy 爬虫框架/zimuku/zimuku/spiders/__init__.py:
--------------------------------------------------------------------------------
1 | # This package will contain the spiders of your Scrapy project
2 | #
3 | # Please refer to the documentation for information on how to create and manage
4 | # your spiders.
5 | 


--------------------------------------------------------------------------------
/Scrapy 爬虫框架/zimuku/zimuku/spiders/demo.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | import scrapy
 3 | 
 4 | # 将我们需要爬的项目引入进来
 5 | from zimuku.items import ZimukuItem
 6 | 
 7 | class DemoSpider(scrapy.Spider):
 8 |     
 9 |     #该爬虫的名字
10 |     name = "demo"
11 |     
12 |     #规定爬虫爬取网页的域名   
13 |     allowed_domains = ["zimuku.net"]
14 |     
15 |     #开始爬取的url链接
16 |     start_urls = ['http://zimuku.net/']
17 | 
18 |     def parse(self, response):
19 |         '''
20 |         parse()函数接收Response参数，就是网页爬取后返回的数据
21 |         用于处理响应，他负责解析爬取的内容
22 |         生成解析结果的字典，并返回新的需要爬取的请求
23 |         '''
24 | 
25 |         #由于是demo 我们不做完全的功能，
26 |         #只要求爬取出第一个字幕的名字
27 |         #xpath规则可以通过查看网页源文件得出
28 |         name = response.xpath('//b/text()').extract()[1]
29 | 
30 |         #建立一个items字典，用于保存我们爬到的结果，并返回给pipline处理
31 |         items = {}
32 |         items['第一个']= name
33 | 
34 |         return items
35 | 


--------------------------------------------------------------------------------
/YHShop/handler.py:
--------------------------------------------------------------------------------
 1 | '''
 2 | 一号店商品信息查询
 3 | 
 4 | '''
 5 | 
 6 | # 导入城市省份资源文件
 7 | from citydict import CITY_MAP
 8 | 
 9 | # 导入爬虫程序
10 | from spider import parse_goods_info
11 | import time
12 | 
13 | 
14 | def main():
15 |     good = input('请输入需要查询的商品:\t')
16 |     city = input('请输入查询城市:\t')
17 |     provinceId = CITY_MAP[city]['provinceId']
18 |     cityid = CITY_MAP[city]['cityid']
19 |     searc_url = 'http://search.yhd.com/c0-0/k' + good
20 | 
21 |     print('正在搜索相关商品')
22 |     res = parse_goods_info(searc_url, provinceId, cityid)
23 |     print('搜索完毕.....正在处理数据')
24 | 
25 |     for rec in res:
26 |         print('型号: {}\t价格: {}\t库存: {}\t地址: {}'.format(
27 |             rec['name'], rec['price'], rec['stock'], rec['url']))
28 |         time.sleep(0.5)
29 | 
30 | 
31 | if __name__ == '__main__':
32 |     main()
33 | 


--------------------------------------------------------------------------------
/YHShop/spider.py:
--------------------------------------------------------------------------------
 1 | '''
 2 | 爬取一号店商品信息
 3 | '''
 4 | 
 5 | import requests
 6 | from bs4 import BeautifulSoup
 7 | 
 8 | 
 9 | def get_html_text(url):
10 |     '''
11 |     返回网页text
12 |     '''
13 |     try:
14 |         r = requests.get(url, timeout=30)
15 |         r.raise_for_status()
16 |         r.encoding = r.apparent_encoding
17 |         return r.text
18 |     except:
19 |         raise ValueError('errors')
20 | 
21 | 
22 | def parse_good_detail(pmId, provinceId=5, cityid=37):
23 |     '''
24 |     查询指定id商品的库存和价格
25 |     默认查询 江苏省 南京市 的库存
26 |     '''
27 |     # 一号点的Ajax服务器请求地址
28 |     # 默认使用江苏省为省份信息
29 |     url = 'http://gps.yhd.com/restful/detail?mcsite=1&provinceId={}&cityId={}&pmId={}&ruleType=2&businessTagId=16'.format(
30 |         provinceId, cityid, pmId)
31 |     text = get_html_text(url)
32 |     # 对信息进行初步格式化 删掉data无用信息
33 |     content = text[text.find('{') + 1:-2]
34 |     data_dict = {}
35 |     # 将所有的类json数据格式化存入字典
36 |     for rec in content.split(','):
37 |         data_dict[rec.split(":")[0].replace(
38 |             '"', '').replace('"', '')] = rec.split(':')[1]
39 | 
40 |     # 查找我们想要的信息
41 |     price = data_dict['currentPrice']
42 |     stock = data_dict['currentStockNum']
43 | 
44 |     return price, stock
45 | 
46 | 
47 | def parse_goods_info(url,provinceId=5, cityid=37):
48 |     '''
49 |     抓取指定url的所有商品的
50 | 
51 |     商品id
52 |     价格
53 |     库存
54 |     链接
55 | 
56 |     returen: goods_infolist<dict in list>
57 |     '''
58 | 
59 |     goods_infolist = []
60 | 
61 |     html = get_html_text(url)
62 |     soup = BeautifulSoup(html, 'lxml')
63 |     goods_list = soup.find_all('a', class_='mainTitle')
64 | 
65 |     for good in goods_list:
66 |         url = good['href'][2:]
67 |         title = ''.join(good['title'].split(' ')[:3])  # 对标题稍微格式化一下
68 |         pmId = good['pmid']
69 |         try:
70 |             price, stock = parse_good_detail(pmId,provinceId,cityid)
71 |         except:
72 |             price, stock = '信息错误', '信息错误'
73 | 
74 |         goods_infolist.append(
75 |             {'name': title, 'price': price, 'stock': stock, 'url': url})
76 | 
77 |     return goods_infolist
78 | 
79 | 
80 | '''
81 | # 一号店自营所有小米手机的商品筛选列表
82 | xiaomi_url = 'http://list.yhd.com/c23586-0-81436/b969871-3923/?tc=3.0.10.3923.3&tp=52.23586.107.0.3.LsvLUR1-10-1FRQ7&ti=G78XlK'
83 | # 测试抓取小米手机的信息
84 | xiaomiPhone = parse_goods_info(xiaomi_url)
85 | # 格式化输出一下
86 | for rec in xiaomiPhone:
87 |     print('型号: {}\t价格: {}\t库存: {}\t地址: {}'.format(
88 |         rec['name'], rec['price'], rec['stock'], rec['url']))
89 | '''
90 | 


--------------------------------------------------------------------------------
/YHShop/tools.py:
--------------------------------------------------------------------------------
 1 | '''
 2 | 解析1号店的所有
 3 | 省份
 4 | 城市信息
 5 | '''
 6 | import os
 7 | from bs4 import BeautifulSoup
 8 | 
 9 | # 获取当前运行目录
10 | path = os.path.dirname(os.path.abspath(__file__))
11 | 
12 | 
13 | with open(path + '/cityid.html') as f:
14 |     html = f.read()
15 | 
16 | 
17 | def get_cityid_map(html):
18 |     '''
19 |     解析一号店省份、城市id
20 |     return <dict>
21 |     '''
22 |     cityid_map = {}
23 |     soup = BeautifulSoup(html, 'lxml')
24 |     # 找到所有的a标签
25 |     citys = soup.find_all('a')
26 |     # 开始解析城市名城市id 省份id
27 |     for city in citys:
28 |         name = city.text.replace('市','')
29 |         provinceId = city['data-provinceid']
30 |         cityid = city['data-cityid']
31 |         cityid_map[name] = {'provinceId': provinceId, 'cityid': cityid, }
32 | 
33 |     return cityid_map
34 | 
35 | 
36 | print(get_cityid_map(html))
37 | 


--------------------------------------------------------------------------------
/doubanmovie/config.py:
--------------------------------------------------------------------------------
1 | 
2 | EHCO_DB = {
3 |     'host': '127.0.0.1',
4 |     'user': 'root',
5 |     'password': 'x',
6 |     'db': 'EhcoTestDb'
7 | }
8 | 


--------------------------------------------------------------------------------
/doubanmovie/data.py:
--------------------------------------------------------------------------------
 1 | 
 2 | # 导入对数据库操作的封装和配置文件
 3 | from stroe import DbToMysql
 4 | import config
 5 | 
 6 | # 初始化组件
 7 | store = DbToMysql(config.EHCO_DB)
 8 | 
 9 | # 数据查询
10 | res = store.find_by_sort('DoubanTop250', 'ranking', 10, 'ASC')
11 | 
12 | for data in res:
13 |     print(data['name'])
14 | 


--------------------------------------------------------------------------------
/doubanmovie/data/cached_pansou/七宗罪 .json:
--------------------------------------------------------------------------------
1 | {"list": {"data": [{"title": "七宗罪", "link": "https://pan.baidu.com/share/link?shareid=2840039918&uk=1718950491", "des": "文件数: 1,分享时间: 2017-05-14T19:03:15, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=2840039918&uk=1718950491", "host": "pan.baidu.com", "more": null}, {"title": "七宗罪", "link": "https://pan.baidu.com/share/link?shareid=2363395042&uk=2355940608", "des": "文件数: 1,分享时间: 2016-04-06T14:59:27, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=2363395042&uk=2355940608", "host": "pan.baidu.com", "more": null}, {"title": "七宗罪", "link": "https://pan.baidu.com/share/link?shareid=2200895572&uk=2722415184", "des": "文件数: 1,分享时间: 2016-05-01T11:03:46, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=2200895572&uk=2722415184", "host": "pan.baidu.com", "more": null}, {"title": "七宗罪", "link": "https://pan.baidu.com/share/link?shareid=2889449757&uk=4248409166", "des": "文件数: 1,分享时间: 2016-04-25T13:48:35, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=2889449757&uk=4248409166", "host": "pan.baidu.com", "more": null}, {"title": "七宗罪", "link": "https://pan.baidu.com/share/link?shareid=3190490267&uk=609021507", "des": "文件数: 1,分享时间: 2017-08-12T11:02:44, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=3190490267&uk=609021507", "host": "pan.baidu.com", "more": null}, {"title": "七宗罪", "link": "https://pan.baidu.com/share/link?shareid=2477902678&uk=609021507", "des": "文件数: 1,分享时间: 2017-08-17T09:03:57, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=2477902678&uk=609021507", "host": "pan.baidu.com", "more": null}, {"title": "七宗罪", "link": "https://pan.baidu.com/share/link?shareid=1619783786&uk=609021507", "des": "文件数: 1,分享时间: 2017-09-02T11:34:41, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=1619783786&uk=609021507", "host": "pan.baidu.com", "more": null}, {"title": "七宗罪", "link": "https://pan.baidu.com/share/link?shareid=1027108874&uk=609021507", "des": "文件数: 1,分享时间: 2017-09-03T09:44:31, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=1027108874&uk=609021507", "host": "pan.baidu.com", "more": null}, {"title": "七宗罪", "link": "https://pan.baidu.com/share/link?shareid=2789979607&uk=609021507", "des": "文件数: 1,分享时间: 2017-09-07T10:06:18, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=2789979607&uk=609021507", "host": "pan.baidu.com", "more": null}, {"title": "七宗罪", "link": "https://pan.baidu.com/share/link?shareid=710919621&uk=122944454", "des": "文件数: 1,分享时间: 2017-06-19T16:15:49, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=710919621&uk=122944454", "host": "pan.baidu.com", "more": null}], "count": 341, "q": "七宗罪 ", "p": "1", "runtime": 0.5, "from": "baidu"}, "listcount": 10, "description": "七宗罪 相关信息"}


--------------------------------------------------------------------------------
/doubanmovie/data/cached_pansou/人工智能 .json:
--------------------------------------------------------------------------------
1 | {"list": {"data": [{"title": "人工智能", "link": "https://pan.baidu.com/share/link?shareid=3231602468&uk=3794903389", "des": "文件数: 1,分享时间: 2016-09-13T16:47:02, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=3231602468&uk=3794903389", "host": "pan.baidu.com", "more": null}, {"title": "人工智能", "link": "https://pan.baidu.com/share/link?shareid=4015767790&uk=3006233851", "des": "文件数: 1,分享时间: 2016-07-25T15:56:05, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=4015767790&uk=3006233851", "host": "pan.baidu.com", "more": null}, {"title": "人工智能", "link": "https://pan.baidu.com/share/link?shareid=1380811175&uk=609021507", "des": "文件数: 1,分享时间: 2017-08-23T15:34:28, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=1380811175&uk=609021507", "host": "pan.baidu.com", "more": null}, {"title": "人工智能", "link": "https://pan.baidu.com/share/link?shareid=594844002&uk=609021507", "des": "文件数: 1,分享时间: 2017-08-15T10:02:26, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=594844002&uk=609021507", "host": "pan.baidu.com", "more": null}, {"title": "人工智能", "link": "https://pan.baidu.com/share/link?shareid=928363975&uk=609021507", "des": "文件数: 1,分享时间: 2017-08-26T21:30:39, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=928363975&uk=609021507", "host": "pan.baidu.com", "more": null}, {"title": "人工智能", "link": "https://pan.baidu.com/share/link?shareid=4214111557&uk=609021507", "des": "文件数: 1,分享时间: 2017-08-31T15:15:53, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=4214111557&uk=609021507", "host": "pan.baidu.com", "more": null}, {"title": "人工智能", "link": "https://pan.baidu.com/share/link?shareid=1587970034&uk=609021507", "des": "文件数: 1,分享时间: 2017-09-01T11:57:05, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=1587970034&uk=609021507", "host": "pan.baidu.com", "more": null}, {"title": "人工智能", "link": "https://pan.baidu.com/share/link?shareid=2801438492&uk=609021507", "des": "文件数: 1,分享时间: 2017-09-04T13:20:52, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=2801438492&uk=609021507", "host": "pan.baidu.com", "more": null}, {"title": "人工智能", "link": "https://pan.baidu.com/share/link?shareid=3104916544&uk=609021507", "des": "文件数: 1,分享时间: 2017-09-16T10:45:42, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=3104916544&uk=609021507", "host": "pan.baidu.com", "more": null}, {"title": "人工智能", "link": "https://pan.baidu.com/share/link?shareid=2342410367&uk=609021507", "des": "文件数: 1,分享时间: 2017-09-13T12:15:45, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=2342410367&uk=609021507", "host": "pan.baidu.com", "more": null}], "count": 461, "q": "人工智能 ", "p": "1", "runtime": 0.5, "from": "baidu"}, "listcount": 10, "description": "人工智能 相关信息"}


--------------------------------------------------------------------------------
/doubanmovie/data/cached_pansou/加勒比海盗 .json:
--------------------------------------------------------------------------------
1 | {"list": {"data": [{"title": "加勒比海盗", "link": "https://pan.baidu.com/pcloud/album/info?uk=2101200196&album_id=8812019080850495109", "des": "专辑内文件数: 4,分享时间: 2017-05-05T11:10:23, 文件大小: 13786050.875k", "blink": "https://pan.baidu.com/pcloud/album/info?uk=2101200196&album_id=8812019080850495109", "host": "pan.baidu.com", "more": null}, {"title": "加勒比海盗", "link": "https://pan.baidu.com/share/link?shareid=2410886285&uk=3464266305", "des": "文件数: 1,分享时间: 2017-05-27T12:40:51, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=2410886285&uk=3464266305", "host": "pan.baidu.com", "more": null}, {"title": "加勒比海盗", "link": "https://pan.baidu.com/share/link?shareid=3878401676&uk=354965675", "des": "文件数: 1,分享时间: 2017-06-03T12:08:35, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=3878401676&uk=354965675", "host": "pan.baidu.com", "more": null}, {"title": "加勒比海盗", "link": "https://pan.baidu.com/share/link?shareid=2255321821&uk=3332511772", "des": "文件数: 1,分享时间: 2017-01-20T01:57:48, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=2255321821&uk=3332511772", "host": "pan.baidu.com", "more": null}, {"title": "加勒比海盗", "link": "https://pan.baidu.com/pcloud/album/info?uk=1582624366&album_id=7511367003517123210", "des": "专辑内文件数: 3,分享时间: 2016-12-19T01:23:44, 文件大小: 26583250.891601562k", "blink": "https://pan.baidu.com/pcloud/album/info?uk=1582624366&album_id=7511367003517123210", "host": "pan.baidu.com", "more": null}, {"title": "加勒比海盗", "link": "https://pan.baidu.com/share/link?shareid=89008288&uk=3927300193", "des": "文件数: 1,分享时间: 2017-04-09T20:41:23, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=89008288&uk=3927300193", "host": "pan.baidu.com", "more": null}, {"title": "加勒比海盗", "link": "https://pan.baidu.com/pcloud/album/info?uk=2101200196&album_id=8812019080850495109", "des": "专辑内文件数: 4,分享时间: 2017-05-05T11:10:23, 文件大小: 13786050.875k", "blink": "https://pan.baidu.com/pcloud/album/info?uk=2101200196&album_id=8812019080850495109", "host": "pan.baidu.com", "more": null}, {"title": "加勒比海盗", "link": "https://pan.baidu.com/share/link?shareid=1893576492&uk=748543823", "des": "文件数: 1,分享时间: 2016-06-12T18:45:01, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=1893576492&uk=748543823", "host": "pan.baidu.com", "more": null}, {"title": "加勒比海盗", "link": "https://pan.baidu.com/share/link?shareid=1781833766&uk=911963550", "des": "文件数: 1,分享时间: 2017-05-23T14:04:17, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=1781833766&uk=911963550", "host": "pan.baidu.com", "more": null}], "count": 2000, "q": "加勒比海盗 ", "p": "1", "runtime": 0.5, "from": "baidu"}, "listcount": 9, "description": "加勒比海盗 相关信息"}


--------------------------------------------------------------------------------
/doubanmovie/data/cached_pansou/勇士 .json:
--------------------------------------------------------------------------------
1 | {"list": {"data": [{"title": "勇士", "link": "https://pan.baidu.com/share/link?shareid=650091567&uk=609021507", "des": "文件数: 1,分享时间: 2017-08-23T15:52:52, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=650091567&uk=609021507", "host": "pan.baidu.com", "more": null}, {"title": "勇士", "link": "https://pan.baidu.com/share/link?shareid=3721420131&uk=609021507", "des": "文件数: 1,分享时间: 2017-08-12T11:24:40, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=3721420131&uk=609021507", "host": "pan.baidu.com", "more": null}, {"title": "勇士", "link": "https://pan.baidu.com/share/link?shareid=1524622787&uk=609021507", "des": "文件数: 1,分享时间: 2017-09-03T10:03:36, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=1524622787&uk=609021507", "host": "pan.baidu.com", "more": null}, {"title": "勇士", "link": "https://pan.baidu.com/share/link?shareid=2521405922&uk=609021507", "des": "文件数: 1,分享时间: 2017-09-06T09:45:40, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=2521405922&uk=609021507", "host": "pan.baidu.com", "more": null}, {"title": "勇士", "link": "https://pan.baidu.com/share/link?shareid=139518705&uk=609021507", "des": "文件数: 1,分享时间: 2017-09-10T12:45:09, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=139518705&uk=609021507", "host": "pan.baidu.com", "more": null}, {"title": "勇士", "link": "https://pan.baidu.com/share/link?shareid=1291684143&uk=609021507", "des": "文件数: 1,分享时间: 2017-06-12T14:54:27, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=1291684143&uk=609021507", "host": "pan.baidu.com", "more": null}, {"title": "勇士", "link": "https://pan.baidu.com/share/link?shareid=1718468192&uk=609021507", "des": "文件数: 1,分享时间: 2017-05-31T23:34:27, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=1718468192&uk=609021507", "host": "pan.baidu.com", "more": null}, {"title": "勇士", "link": "https://pan.baidu.com/share/link?shareid=2655359879&uk=609021507", "des": "文件数: 1,分享时间: 2017-09-19T12:03:54, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=2655359879&uk=609021507", "host": "pan.baidu.com", "more": null}, {"title": "勇士", "link": "https://pan.baidu.com/share/link?shareid=27328534&uk=609021507", "des": "文件数: 1,分享时间: 2017-09-23T10:42:03, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=27328534&uk=609021507", "host": "pan.baidu.com", "more": null}, {"title": "勇士", "link": "https://pan.baidu.com/share/link?shareid=2165667159&uk=1929670292", "des": "文件数: 1,分享时间: 2017-05-06T10:17:50, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=2165667159&uk=1929670292", "host": "pan.baidu.com", "more": null}], "count": 2000, "q": "勇士 ", "p": "1", "runtime": 0.5, "from": "baidu"}, "listcount": 10, "description": "勇士 相关信息"}


--------------------------------------------------------------------------------
/doubanmovie/data/cached_pansou/变脸 .json:
--------------------------------------------------------------------------------
1 | {"list": {"data": [{"title": "~！变脸", "link": "https://pan.baidu.com/share/link?shareid=2714528440&uk=3697255184", "des": "文件数: 1,分享时间: 2015-07-02T18:58:32, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=2714528440&uk=3697255184", "host": "pan.baidu.com", "more": null}, {"title": "变脸", "link": "https://pan.baidu.com/share/link?shareid=2455604325&uk=3140914463", "des": "文件数: 1,分享时间: 2015-12-08T20:00:18, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=2455604325&uk=3140914463", "host": "pan.baidu.com", "more": null}, {"title": "变脸", "link": "https://pan.baidu.com/share/link?shareid=1088535568&uk=2701181026", "des": "文件数: 1,分享时间: 2016-06-07T19:16:57, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=1088535568&uk=2701181026", "host": "pan.baidu.com", "more": null}, {"title": "变脸", "link": "https://pan.baidu.com/share/link?shareid=3162551740&uk=2701181026", "des": "文件数: 1,分享时间: 2016-06-06T10:21:14, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=3162551740&uk=2701181026", "host": "pan.baidu.com", "more": null}, {"title": "变脸", "link": "https://pan.baidu.com/share/link?shareid=4087228776&uk=187713544", "des": "文件数: 1,分享时间: 2016-11-03T18:49:33, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=4087228776&uk=187713544", "host": "pan.baidu.com", "more": null}, {"title": "~！变脸", "link": "https://pan.baidu.com/share/link?shareid=4019333128&uk=1214698539", "des": "文件数: 1,分享时间: 2015-07-14T09:00:02, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=4019333128&uk=1214698539", "host": "pan.baidu.com", "more": null}, {"title": "变脸", "link": "https://pan.baidu.com/share/link?shareid=676196502&uk=2808886271", "des": "文件数: 1,分享时间: 2017-03-23T14:19:40, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=676196502&uk=2808886271", "host": "pan.baidu.com", "more": null}, {"title": "变脸", "link": "https://pan.baidu.com/share/link?shareid=3528799&uk=3993130536", "des": "文件数: 1,分享时间: 2015-10-09T21:32:17, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=3528799&uk=3993130536", "host": "pan.baidu.com", "more": null}, {"title": "变脸", "link": "https://pan.baidu.com/share/link?shareid=3930472797&uk=2808886271", "des": "文件数: 1,分享时间: 2017-03-23T14:28:45, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=3930472797&uk=2808886271", "host": "pan.baidu.com", "more": null}, {"title": "变脸", "link": "https://pan.baidu.com/share/link?shareid=130247043&uk=2435012951", "des": "文件数: 1,分享时间: 2013-10-07T16:18:56, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=130247043&uk=2435012951", "host": "pan.baidu.com", "more": null}], "count": 289, "q": "变脸 ", "p": "1", "runtime": 0.5, "from": "baidu"}, "listcount": 10, "description": "变脸 相关信息"}


--------------------------------------------------------------------------------
/doubanmovie/data/cached_pansou/可可西里 .json:
--------------------------------------------------------------------------------
1 | {"list": {"data": [{"title": "可可西里", "link": "https://pan.baidu.com/share/link?shareid=3947424960&uk=609021507", "des": "文件数: 1,分享时间: 2017-08-23T15:26:11, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=3947424960&uk=609021507", "host": "pan.baidu.com", "more": null}, {"title": "可可西里", "link": "https://pan.baidu.com/share/link?shareid=912308654&uk=609021507", "des": "文件数: 1,分享时间: 2017-08-07T08:47:02, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=912308654&uk=609021507", "host": "pan.baidu.com", "more": null}, {"title": "可可西里", "link": "https://pan.baidu.com/share/link?shareid=892214618&uk=609021507", "des": "文件数: 1,分享时间: 2017-09-04T13:14:01, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=892214618&uk=609021507", "host": "pan.baidu.com", "more": null}, {"title": "可可西里", "link": "https://pan.baidu.com/share/link?shareid=4150464509&uk=609021507", "des": "文件数: 1,分享时间: 2017-09-14T10:28:30, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=4150464509&uk=609021507", "host": "pan.baidu.com", "more": null}, {"title": "可可西里", "link": "https://pan.baidu.com/share/link?shareid=2963532857&uk=609021507", "des": "文件数: 1,分享时间: 2017-09-18T09:46:13, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=2963532857&uk=609021507", "host": "pan.baidu.com", "more": null}, {"title": "可可西里", "link": "https://pan.baidu.com/share/link?shareid=3407281000&uk=609021507", "des": "文件数: 1,分享时间: 2017-06-17T10:48:18, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=3407281000&uk=609021507", "host": "pan.baidu.com", "more": null}, {"title": "可可西里", "link": "https://pan.baidu.com/share/link?shareid=1824952352&uk=487823360", "des": "文件数: 1,分享时间: 2017-02-02T17:24:43, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=1824952352&uk=487823360", "host": "pan.baidu.com", "more": null}, {"title": "可可西里", "link": "https://pan.baidu.com/share/link?shareid=1860749948&uk=487823360", "des": "文件数: 1,分享时间: 2017-05-30T02:38:22, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=1860749948&uk=487823360", "host": "pan.baidu.com", "more": null}, {"title": "可可西里", "link": "https://pan.baidu.com/share/link?shareid=1286241839&uk=609021507", "des": "文件数: 1,分享时间: 2017-08-15T09:54:18, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=1286241839&uk=609021507", "host": "pan.baidu.com", "more": null}, {"title": "可可西里", "link": "https://pan.baidu.com/share/link?shareid=2155337618&uk=609021507", "des": "文件数: 1,分享时间: 2017-09-01T11:50:26, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=2155337618&uk=609021507", "host": "pan.baidu.com", "more": null}], "count": 120, "q": "可可西里 ", "p": "1", "runtime": 0.5, "from": "baidu"}, "listcount": 10, "description": "可可西里 相关信息"}


--------------------------------------------------------------------------------
/doubanmovie/data/cached_pansou/寿司之神 .json:
--------------------------------------------------------------------------------
1 | {"list": {"data": [{"title": "寿司之神", "link": "https://pan.baidu.com/share/link?shareid=239970418&uk=4051721367", "des": "文件数: 1,分享时间: 2016-02-18T15:46:04, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=239970418&uk=4051721367", "host": "pan.baidu.com", "more": null}, {"title": "寿司之神", "link": "https://pan.baidu.com/share/link?shareid=2324921645&uk=3993130536", "des": "文件数: 1,分享时间: 2015-10-10T15:55:00, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=2324921645&uk=3993130536", "host": "pan.baidu.com", "more": null}, {"title": "寿司之神", "link": "https://pan.baidu.com/share/link?shareid=987426513&uk=1247192380", "des": "文件数: 1,分享时间: 2015-11-17T18:50:50, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=987426513&uk=1247192380", "host": "pan.baidu.com", "more": null}, {"title": "寿司之神", "link": "https://pan.baidu.com/share/link?shareid=1594929984&uk=574725777", "des": "文件数: 1,分享时间: 2015-11-15T20:48:47, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=1594929984&uk=574725777", "host": "pan.baidu.com", "more": null}, {"title": "寿司之神720p26163723.torrent", "link": "https://pan.baidu.com/share/link?shareid=1360413848&uk=3003776623", "des": "文件数: 1,分享时间: 2013-06-13T00:04:24, 文件大小: 72.677734375k", "blink": "https://pan.baidu.com/share/link?shareid=1360413848&uk=3003776623", "host": "pan.baidu.com", "more": null}, {"title": "236 寿司之神", "link": "https://pan.baidu.com/share/link?shareid=4177675036&uk=1084372931", "des": "文件数: 1,分享时间: 2016-12-24T13:35:45, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=4177675036&uk=1084372931", "host": "pan.baidu.com", "more": null}, {"title": "寿司之神-中文字幕.rmvb", "link": "https://pan.baidu.com/share/link?shareid=3610643957&uk=1325694201", "des": "文件数: 1,分享时间: 2016-08-27T11:55:59, 文件大小: 925566.1962890625k", "blink": "https://pan.baidu.com/share/link?shareid=3610643957&uk=1325694201", "host": "pan.baidu.com", "more": null}, {"title": "04.寿司之神", "link": "https://pan.baidu.com/share/link?shareid=2752181390&uk=1918328979", "des": "文件数: 1,分享时间: 2015-03-28T20:30:05, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=2752181390&uk=1918328979", "host": "pan.baidu.com", "more": null}, {"title": "寿司之神.Jiro.Dreams.Of.Sushi.2011.720p.BluRay.x264.DTS-HDChina.torrent", "link": "https://pan.baidu.com/share/link?shareid=3417882925&uk=1870504545", "des": "文件数: 1,分享时间: 2017-03-11T12:54:35, 文件大小: 18.3564453125k", "blink": "https://pan.baidu.com/share/link?shareid=3417882925&uk=1870504545", "host": "pan.baidu.com", "more": null}], "count": 1651, "q": "寿司之神 ", "p": "1", "runtime": 0.5, "from": "baidu"}, "listcount": 9, "description": "寿司之神 相关信息"}


--------------------------------------------------------------------------------
/doubanmovie/data/cached_pansou/心迷宫 .json:
--------------------------------------------------------------------------------
1 | {"list": {"data": [{"title": "心迷宫", "link": "https://pan.baidu.com/share/link?shareid=237923182&uk=609021507", "des": "文件数: 1,分享时间: 2017-09-03T09:57:40, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=237923182&uk=609021507", "host": "pan.baidu.com", "more": null}, {"title": "心迷宫", "link": "https://pan.baidu.com/share/link?shareid=3174485502&uk=609021507", "des": "文件数: 1,分享时间: 2017-09-23T10:36:11, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=3174485502&uk=609021507", "host": "pan.baidu.com", "more": null}, {"title": "心迷宫", "link": "https://pan.baidu.com/share/link?shareid=463359231&uk=609021507", "des": "文件数: 1,分享时间: 2017-09-19T11:58:27, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=463359231&uk=609021507", "host": "pan.baidu.com", "more": null}, {"title": "心迷宫", "link": "https://pan.baidu.com/share/link?shareid=415100132&uk=609021507", "des": "文件数: 1,分享时间: 2017-06-10T23:07:06, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=415100132&uk=609021507", "host": "pan.baidu.com", "more": null}, {"title": "心迷宫", "link": "https://pan.baidu.com/share/link?shareid=3627640785&uk=609021507", "des": "文件数: 1,分享时间: 2017-06-01T12:41:38, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=3627640785&uk=609021507", "host": "pan.baidu.com", "more": null}, {"title": "心迷宫", "link": "https://pan.baidu.com/share/link?shareid=3109449875&uk=609021507", "des": "文件数: 1,分享时间: 2017-08-23T15:47:03, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=3109449875&uk=609021507", "host": "pan.baidu.com", "more": null}, {"title": "心迷宫", "link": "https://pan.baidu.com/share/link?shareid=2194039023&uk=609021507", "des": "文件数: 1,分享时间: 2017-09-14T10:45:37, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=2194039023&uk=609021507", "host": "pan.baidu.com", "more": null}, {"title": "心迷宫", "link": "https://pan.baidu.com/share/link?shareid=3090723172&uk=609021507", "des": "文件数: 1,分享时间: 2017-09-18T10:03:15, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=3090723172&uk=609021507", "host": "pan.baidu.com", "more": null}, {"title": "心迷宫", "link": "https://pan.baidu.com/share/link?shareid=4278278301&uk=609021507", "des": "文件数: 1,分享时间: 2017-09-05T09:36:24, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=4278278301&uk=609021507", "host": "pan.baidu.com", "more": null}, {"title": "心迷宫", "link": "https://pan.baidu.com/share/link?shareid=3752180819&uk=609021507", "des": "文件数: 1,分享时间: 2017-09-06T09:40:21, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=3752180819&uk=609021507", "host": "pan.baidu.com", "more": null}], "count": 2000, "q": "心迷宫 ", "p": "1", "runtime": 0.5, "from": "baidu"}, "listcount": 10, "description": "心迷宫 相关信息"}


--------------------------------------------------------------------------------
/doubanmovie/data/cached_pansou/恐怖游轮 .json:
--------------------------------------------------------------------------------
1 | {"list": {"data": [{"title": "恐怖游轮", "link": "https://pan.baidu.com/share/link?shareid=3463913065&uk=2373288888", "des": "文件数: 1,分享时间: 2017-06-04T16:36:08, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=3463913065&uk=2373288888", "host": "pan.baidu.com", "more": null}, {"title": "恐怖游轮", "link": "https://pan.baidu.com/share/link?shareid=1347716781&uk=122944454", "des": "文件数: 1,分享时间: 2017-06-19T16:17:34, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=1347716781&uk=122944454", "host": "pan.baidu.com", "more": null}, {"title": "恐怖游轮", "link": "https://pan.baidu.com/share/link?shareid=172941232&uk=609021507", "des": "文件数: 1,分享时间: 2017-08-12T10:56:24, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=172941232&uk=609021507", "host": "pan.baidu.com", "more": null}, {"title": "恐怖游轮", "link": "https://pan.baidu.com/share/link?shareid=3709245108&uk=609021507", "des": "文件数: 1,分享时间: 2017-09-06T09:23:34, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=3709245108&uk=609021507", "host": "pan.baidu.com", "more": null}, {"title": "恐怖游轮", "link": "https://pan.baidu.com/share/link?shareid=1694647796&uk=609021507", "des": "文件数: 1,分享时间: 2017-09-05T09:19:49, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=1694647796&uk=609021507", "host": "pan.baidu.com", "more": null}, {"title": "恐怖游轮", "link": "https://pan.baidu.com/share/link?shareid=1793017178&uk=609021507", "des": "文件数: 1,分享时间: 2017-09-03T09:39:53, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=1793017178&uk=609021507", "host": "pan.baidu.com", "more": null}, {"title": "恐怖游轮", "link": "https://pan.baidu.com/share/link?shareid=805690592&uk=609021507", "des": "文件数: 1,分享时间: 2017-09-15T11:04:14, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=805690592&uk=609021507", "host": "pan.baidu.com", "more": null}, {"title": "恐怖游轮", "link": "https://pan.baidu.com/share/link?shareid=4117993944&uk=609021507", "des": "文件数: 1,分享时间: 2017-06-10T10:59:37, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=4117993944&uk=609021507", "host": "pan.baidu.com", "more": null}, {"title": "恐怖游轮", "link": "https://pan.baidu.com/share/link?shareid=1686553054&uk=609021507", "des": "文件数: 1,分享时间: 2017-06-16T22:59:51, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=1686553054&uk=609021507", "host": "pan.baidu.com", "more": null}, {"title": "恐怖游轮", "link": "https://pan.baidu.com/share/link?shareid=3731015201&uk=609021507", "des": "文件数: 1,分享时间: 2017-06-01T12:30:36, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=3731015201&uk=609021507", "host": "pan.baidu.com", "more": null}], "count": 2000, "q": "恐怖游轮 ", "p": "1", "runtime": 0.5, "from": "baidu"}, "listcount": 10, "description": "恐怖游轮 相关信息"}


--------------------------------------------------------------------------------
/doubanmovie/data/cached_pansou/情书 .json:
--------------------------------------------------------------------------------
1 | {"list": {"data": [{"title": "情书", "link": "https://pan.baidu.com/share/link?shareid=330204841&uk=726451123", "des": "文件数: 1,分享时间: 2016-11-29T09:08:30, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=330204841&uk=726451123", "host": "pan.baidu.com", "more": null}, {"title": "情书", "link": "https://pan.baidu.com/share/link?shareid=4145414954&uk=1963275218", "des": "文件数: 1,分享时间: 2014-01-25T12:10:15, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=4145414954&uk=1963275218", "host": "pan.baidu.com", "more": null}, {"title": "情书", "link": "https://pan.baidu.com/share/link?shareid=4179840654&uk=3243746857", "des": "文件数: 1,分享时间: 2017-02-24T15:41:02, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=4179840654&uk=3243746857", "host": "pan.baidu.com", "more": null}, {"title": "情书", "link": "https://pan.baidu.com/share/link?shareid=2422081593&uk=2005008334", "des": "文件数: 1,分享时间: 2016-10-02T20:23:56, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=2422081593&uk=2005008334", "host": "pan.baidu.com", "more": null}, {"title": "情书", "link": "https://pan.baidu.com/share/link?shareid=3112867171&uk=1208239462", "des": "文件数: 1,分享时间: 2016-11-30T09:32:52, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=3112867171&uk=1208239462", "host": "pan.baidu.com", "more": null}, {"title": "情书", "link": "https://pan.baidu.com/share/link?shareid=3997946062&uk=1785947437", "des": "文件数: 1,分享时间: 2017-03-13T22:43:13, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=3997946062&uk=1785947437", "host": "pan.baidu.com", "more": null}, {"title": "情书", "link": "https://pan.baidu.com/share/link?shareid=3975833307&uk=2005008334", "des": "文件数: 1,分享时间: 2016-10-02T20:24:27, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=3975833307&uk=2005008334", "host": "pan.baidu.com", "more": null}, {"title": "情书", "link": "https://pan.baidu.com/share/link?shareid=20405339&uk=3006233851", "des": "文件数: 1,分享时间: 2016-07-26T18:08:18, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=20405339&uk=3006233851", "host": "pan.baidu.com", "more": null}, {"title": "情书", "link": "https://pan.baidu.com/share/link?shareid=1069563114&uk=3864380585", "des": "文件数: 1,分享时间: 2017-04-02T23:33:10, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=1069563114&uk=3864380585", "host": "pan.baidu.com", "more": null}, {"title": "情书", "link": "https://pan.baidu.com/share/link?shareid=2822023406&uk=2208648241", "des": "文件数: 1,分享时间: 2016-09-18T08:31:37, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=2822023406&uk=2208648241", "host": "pan.baidu.com", "more": null}], "count": 1462, "q": "情书 ", "p": "1", "runtime": 0.5, "from": "baidu"}, "listcount": 10, "description": "情书 相关信息"}


--------------------------------------------------------------------------------
/doubanmovie/data/cached_pansou/教父 .json:
--------------------------------------------------------------------------------
1 | {"list": {"data": [{"title": "教父", "link": "https://pan.baidu.com/share/link?shareid=2072868916&uk=1666264965", "des": "文件数: 1,分享时间: 2015-07-31T13:09:36, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=2072868916&uk=1666264965", "host": "pan.baidu.com", "more": null}, {"title": "教父", "link": "https://pan.baidu.com/share/link?shareid=1534281444&uk=3730622674", "des": "文件数: 1,分享时间: 2017-05-03T21:59:04, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=1534281444&uk=3730622674", "host": "pan.baidu.com", "more": null}, {"title": "教父", "link": "https://pan.baidu.com/share/link?shareid=3656944995&uk=797153046", "des": "文件数: 1,分享时间: 2016-01-17T17:46:45, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=3656944995&uk=797153046", "host": "pan.baidu.com", "more": null}, {"title": "教父", "link": "https://pan.baidu.com/share/link?shareid=2685440415&uk=4132777198", "des": "文件数: 1,分享时间: 2016-02-03T12:42:59, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=2685440415&uk=4132777198", "host": "pan.baidu.com", "more": null}, {"title": "教父", "link": "https://pan.baidu.com/share/link?shareid=1494660546&uk=4053121570", "des": "文件数: 1,分享时间: 2016-06-29T19:18:02, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=1494660546&uk=4053121570", "host": "pan.baidu.com", "more": null}, {"title": "教父", "link": "https://pan.baidu.com/share/link?shareid=1125479804&uk=609021507", "des": "文件数: 1,分享时间: 2017-08-17T08:56:55, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=1125479804&uk=609021507", "host": "pan.baidu.com", "more": null}, {"title": "教父", "link": "https://pan.baidu.com/share/link?shareid=1474475813&uk=609021507", "des": "文件数: 1,分享时间: 2017-09-13T12:07:26, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=1474475813&uk=609021507", "host": "pan.baidu.com", "more": null}, {"title": "教父", "link": "https://pan.baidu.com/share/link?shareid=2110798130&uk=609021507", "des": "文件数: 1,分享时间: 2017-09-16T10:37:20, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=2110798130&uk=609021507", "host": "pan.baidu.com", "more": null}, {"title": "教父", "link": "https://pan.baidu.com/share/link?shareid=3910379162&uk=609021507", "des": "文件数: 1,分享时间: 2017-09-09T14:49:31, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=3910379162&uk=609021507", "host": "pan.baidu.com", "more": null}, {"title": "教父", "link": "https://pan.baidu.com/share/link?shareid=2779978727&uk=609021507", "des": "文件数: 1,分享时间: 2017-09-15T11:02:05, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=2779978727&uk=609021507", "host": "pan.baidu.com", "more": null}], "count": 904, "q": "教父 ", "p": "1", "runtime": 0.5, "from": "baidu"}, "listcount": 10, "description": "教父 相关信息"}


--------------------------------------------------------------------------------
/doubanmovie/data/cached_pansou/断背山 .json:
--------------------------------------------------------------------------------
1 | {"list": {"data": [{"title": "断背山", "link": "https://pan.baidu.com/share/link?shareid=2182085320&uk=1057283767", "des": "文件数: 1,分享时间: 2017-04-28T21:21:53, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=2182085320&uk=1057283767", "host": "pan.baidu.com", "more": null}, {"title": "断背山", "link": "https://pan.baidu.com/share/link?shareid=4217525942&uk=1057283767", "des": "文件数: 1,分享时间: 2016-12-27T01:13:02, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=4217525942&uk=1057283767", "host": "pan.baidu.com", "more": null}, {"title": "断背山", "link": "https://pan.baidu.com/share/link?shareid=1532938314&uk=2589464121", "des": "文件数: 1,分享时间: 2016-11-04T14:55:17, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=1532938314&uk=2589464121", "host": "pan.baidu.com", "more": null}, {"title": "断背山", "link": "https://pan.baidu.com/share/link?shareid=299369933&uk=1330472352", "des": "文件数: 1,分享时间: 2017-06-07T15:23:23, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=299369933&uk=1330472352", "host": "pan.baidu.com", "more": null}, {"title": "断背山", "link": "https://pan.baidu.com/share/link?shareid=3551503145&uk=1330472352", "des": "文件数: 1,分享时间: 2017-05-31T10:17:48, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=3551503145&uk=1330472352", "host": "pan.baidu.com", "more": null}, {"title": "断背山", "link": "https://pan.baidu.com/share/link?shareid=3416026360&uk=277159352", "des": "文件数: 1,分享时间: 2016-03-09T21:48:32, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=3416026360&uk=277159352", "host": "pan.baidu.com", "more": null}, {"title": "断背山", "link": "https://pan.baidu.com/share/link?shareid=3572919005&uk=1931962571", "des": "文件数: 1,分享时间: 2015-04-25T18:53:11, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=3572919005&uk=1931962571", "host": "pan.baidu.com", "more": null}, {"title": "断背山", "link": "https://pan.baidu.com/share/link?shareid=1418069092&uk=960662928", "des": "文件数: 1,分享时间: 2016-09-14T16:31:25, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=1418069092&uk=960662928", "host": "pan.baidu.com", "more": null}, {"title": "断背山", "link": "https://pan.baidu.com/share/link?shareid=3813826547&uk=3761711380", "des": "文件数: 1,分享时间: 2015-12-28T20:59:47, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=3813826547&uk=3761711380", "host": "pan.baidu.com", "more": null}, {"title": "断背山", "link": "https://pan.baidu.com/share/link?shareid=3291408779&uk=507444115", "des": "文件数: 1,分享时间: 2016-11-10T22:53:46, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=3291408779&uk=507444115", "host": "pan.baidu.com", "more": null}], "count": 124, "q": "断背山 ", "p": "1", "runtime": 0.5, "from": "baidu"}, "listcount": 10, "description": "断背山 相关信息"}


--------------------------------------------------------------------------------
/doubanmovie/data/cached_pansou/新龙门客栈 .json:
--------------------------------------------------------------------------------
1 | {"list": {"data": [{"title": "【QQ群88944035】新龙门客栈.New.Dragon.Inn.Repack.1992.720p.HDTV.x264-HQC.mkv.torrent", "link": "https://pan.baidu.com/share/link?shareid=165540377&uk=4103207533", "des": "文件数: 1,分享时间: 2017-05-26T18:34:31, 文件大小: 11.4072265625k", "blink": "https://pan.baidu.com/share/link?shareid=165540377&uk=4103207533", "host": "pan.baidu.com", "more": null}, {"title": "〖-f-〗《新龙门客栈》New.Dragon.Gate.Inn.1992.DVDRip.x264.AC3.2Audios-CMCT《之善寻正》.torrent", "link": "https://pan.baidu.com/share/link?shareid=1075517915&uk=239727952", "des": "文件数: 1,分享时间: 2016-12-27T21:38:20, 文件大小: 81.982421875k", "blink": "https://pan.baidu.com/share/link?shareid=1075517915&uk=239727952", "host": "pan.baidu.com", "more": null}, {"title": "马景涛版新龙门客栈电视剧全集【qq群88944035分享】.torrent", "link": "https://pan.baidu.com/share/link?shareid=4224334131&uk=4103207533", "des": "文件数: 1,分享时间: 2017-05-29T16:07:16, 文件大小: 118.5615234375k", "blink": "https://pan.baidu.com/share/link?shareid=4224334131&uk=4103207533", "host": "pan.baidu.com", "more": null}, {"title": "[新龙门客栈-新龍門客棧][1996][台视][50集全][马景涛][国语繁字][MP4][39.15G].torrent", "link": "https://pan.baidu.com/share/link?shareid=4025766048&uk=297705068", "des": "文件数: 1,分享时间: 2017-06-19T14:15:07, 文件大小: 118.5615234375k", "blink": "https://pan.baidu.com/share/link?shareid=4025766048&uk=297705068", "host": "pan.baidu.com", "more": null}, {"title": "[新龙门客栈-新龍門客棧][1996][台视][50集全][马景涛][国语繁字][MP4][39.15G].torrent", "link": "https://pan.baidu.com/share/link?shareid=3981465773&uk=297705068", "des": "文件数: 1,分享时间: 2017-06-19T14:15:06, 文件大小: 118.5615234375k", "blink": "https://pan.baidu.com/share/link?shareid=3981465773&uk=297705068", "host": "pan.baidu.com", "more": null}], "count": 5, "q": "新龙门客栈 ", "p": "1", "runtime": 0.5, "from": "baidu"}, "listcount": 5, "description": "新龙门客栈 相关信息"}


--------------------------------------------------------------------------------
/doubanmovie/data/cached_pansou/无间道 .json:
--------------------------------------------------------------------------------
1 | {"list": {"data": [{"title": "无间道", "link": "https://pan.baidu.com/pcloud/album/info?uk=2557474713&album_id=8927497413943773151", "des": "专辑内文件数: 3,分享时间: 2015-08-11T16:41:19, 文件大小: 7578690.994140625k", "blink": "https://pan.baidu.com/pcloud/album/info?uk=2557474713&album_id=8927497413943773151", "host": "pan.baidu.com", "more": null}, {"title": "【电影】无间道", "link": "https://pan.baidu.com/share/link?shareid=3695933531&uk=2819336189", "des": "文件数: 1,分享时间: 2017-05-12T21:27:31, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=3695933531&uk=2819336189", "host": "pan.baidu.com", "more": null}, {"title": "无间道2", "link": "https://pan.baidu.com/share/link?shareid=488937336&uk=122944454", "des": "文件数: 1,分享时间: 2017-06-19T14:50:30, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=488937336&uk=122944454", "host": "pan.baidu.com", "more": null}, {"title": "水浒无间道", "link": "https://pan.baidu.com/pcloud/album/info?uk=360058412&album_id=579701992494865629", "des": "专辑内文件数: 25,分享时间: 2017-04-02T08:31:11, 文件大小: 16731604.72265625k", "blink": "https://pan.baidu.com/pcloud/album/info?uk=360058412&album_id=579701992494865629", "host": "pan.baidu.com", "more": null}, {"title": "无间道.Infernal.Affairs.2002.BluRay.720P.x264.DTS-WiKi.torrent", "link": "https://pan.baidu.com/share/link?shareid=7039880&uk=3741861429", "des": "文件数: 1,分享时间: 2016-01-28T18:02:29, 文件大小: 34.189453125k", "blink": "https://pan.baidu.com/share/link?shareid=7039880&uk=3741861429", "host": "pan.baidu.com", "more": null}, {"title": "无间道1.torrent", "link": "https://pan.baidu.com/share/link?shareid=3206135234&uk=2003600126", "des": "文件数: 1,分享时间: 2017-04-05T00:04:39, 文件大小: 41.90625k", "blink": "https://pan.baidu.com/share/link?shareid=3206135234&uk=2003600126", "host": "pan.baidu.com", "more": null}, {"title": "水浒无间道", "link": "https://pan.baidu.com/pcloud/album/info?uk=360058412&album_id=579701992494865629", "des": "专辑内文件数: 25,分享时间: 2017-04-02T08:31:11, 文件大小: 16731604.72265625k", "blink": "https://pan.baidu.com/pcloud/album/info?uk=360058412&album_id=579701992494865629", "host": "pan.baidu.com", "more": null}], "count": 80, "q": "无间道 ", "p": "1", "runtime": 0.5, "from": "baidu"}, "listcount": 7, "description": "无间道 相关信息"}


--------------------------------------------------------------------------------
/doubanmovie/data/cached_pansou/朗读者 .json:
--------------------------------------------------------------------------------
1 | {"list": {"data": [{"title": "朗读者", "link": "https://pan.baidu.com/share/link?shareid=842931339&uk=657919052", "des": "文件数: 1,分享时间: 2017-03-11T14:23:46, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=842931339&uk=657919052", "host": "pan.baidu.com", "more": null}, {"title": "朗读者", "link": "https://pan.baidu.com/share/link?shareid=2331131539&uk=419567710", "des": "文件数: 1,分享时间: 2017-03-28T10:09:55, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=2331131539&uk=419567710", "host": "pan.baidu.com", "more": null}, {"title": "朗读者", "link": "https://pan.baidu.com/share/link?shareid=1504565040&uk=1870504545", "des": "文件数: 1,分享时间: 2017-03-27T08:15:57, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=1504565040&uk=1870504545", "host": "pan.baidu.com", "more": null}, {"title": "朗读者", "link": "https://pan.baidu.com/share/link?shareid=72240367&uk=1870504545", "des": "文件数: 1,分享时间: 2017-03-17T08:12:31, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=72240367&uk=1870504545", "host": "pan.baidu.com", "more": null}, {"title": "朗读者", "link": "https://pan.baidu.com/share/link?shareid=2470421484&uk=1870504545", "des": "文件数: 1,分享时间: 2017-04-30T23:15:15, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=2470421484&uk=1870504545", "host": "pan.baidu.com", "more": null}, {"title": "朗读者", "link": "https://pan.baidu.com/share/link?shareid=2982430298&uk=543647812", "des": "文件数: 1,分享时间: 2017-04-09T22:41:33, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=2982430298&uk=543647812", "host": "pan.baidu.com", "more": null}, {"title": "朗读者", "link": "https://pan.baidu.com/share/link?shareid=910354290&uk=1266131168", "des": "文件数: 1,分享时间: 2017-03-09T18:46:53, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=910354290&uk=1266131168", "host": "pan.baidu.com", "more": null}, {"title": "朗读者", "link": "https://pan.baidu.com/share/link?shareid=353578939&uk=219081398", "des": "文件数: 1,分享时间: 2017-03-20T13:55:03, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=353578939&uk=219081398", "host": "pan.baidu.com", "more": null}, {"title": "朗读者", "link": "https://pan.baidu.com/share/link?shareid=3815020798&uk=3079055689", "des": "文件数: 1,分享时间: 2017-05-11T01:59:52, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=3815020798&uk=3079055689", "host": "pan.baidu.com", "more": null}, {"title": "朗读者", "link": "https://pan.baidu.com/share/link?shareid=147325984&uk=1870504545", "des": "文件数: 1,分享时间: 2017-03-20T17:25:48, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=147325984&uk=1870504545", "host": "pan.baidu.com", "more": null}], "count": 2000, "q": "朗读者 ", "p": "1", "runtime": 0.5, "from": "baidu"}, "listcount": 10, "description": "朗读者 相关信息"}


--------------------------------------------------------------------------------
/doubanmovie/data/cached_pansou/杀人回忆 .json:
--------------------------------------------------------------------------------
1 | {"list": {"data": [{"title": "杀人回忆", "link": "https://pan.baidu.com/share/link?shareid=3128164472&uk=1118100635", "des": "文件数: 1,分享时间: 2017-06-13T22:06:15, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=3128164472&uk=1118100635", "host": "pan.baidu.com", "more": null}, {"title": "杀人回忆", "link": "https://pan.baidu.com/share/link?shareid=130011583&uk=3406855576", "des": "文件数: 1,分享时间: 2016-04-03T21:08:05, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=130011583&uk=3406855576", "host": "pan.baidu.com", "more": null}, {"title": "杀人回忆", "link": "https://pan.baidu.com/share/link?shareid=2361826737&uk=354761511", "des": "文件数: 1,分享时间: 2016-08-05T12:40:29, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=2361826737&uk=354761511", "host": "pan.baidu.com", "more": null}, {"title": "杀人回忆", "link": "https://pan.baidu.com/share/link?shareid=1831872542&uk=609021507", "des": "文件数: 1,分享时间: 2017-08-12T11:07:20, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=1831872542&uk=609021507", "host": "pan.baidu.com", "more": null}, {"title": "杀人回忆", "link": "https://pan.baidu.com/share/link?shareid=472803758&uk=609021507", "des": "文件数: 1,分享时间: 2017-08-22T11:21:31, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=472803758&uk=609021507", "host": "pan.baidu.com", "more": null}, {"title": "杀人回忆", "link": "https://pan.baidu.com/share/link?shareid=395582596&uk=609021507", "des": "文件数: 1,分享时间: 2017-09-09T14:59:09, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=395582596&uk=609021507", "host": "pan.baidu.com", "more": null}, {"title": "杀人回忆", "link": "https://pan.baidu.com/share/link?shareid=2644871282&uk=609021507", "des": "文件数: 1,分享时间: 2017-09-03T09:47:48, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=2644871282&uk=609021507", "host": "pan.baidu.com", "more": null}, {"title": "杀人回忆", "link": "https://pan.baidu.com/share/link?shareid=1741305321&uk=609021507", "des": "文件数: 1,分享时间: 2017-09-05T09:27:07, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=1741305321&uk=609021507", "host": "pan.baidu.com", "more": null}, {"title": "杀人回忆", "link": "https://pan.baidu.com/share/link?shareid=3366680072&uk=609021507", "des": "文件数: 1,分享时间: 2017-09-02T11:38:11, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=3366680072&uk=609021507", "host": "pan.baidu.com", "more": null}, {"title": "杀人回忆", "link": "https://pan.baidu.com/share/link?shareid=3184355789&uk=609021507", "des": "文件数: 1,分享时间: 2017-09-25T12:24:18, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=3184355789&uk=609021507", "host": "pan.baidu.com", "more": null}], "count": 2000, "q": "杀人回忆 ", "p": "1", "runtime": 0.5, "from": "baidu"}, "listcount": 10, "description": "杀人回忆 相关信息"}


--------------------------------------------------------------------------------
/doubanmovie/data/cached_pansou/死亡诗社 .json:
--------------------------------------------------------------------------------
1 | {"list": {"data": [{"title": "死亡诗社", "link": "https://pan.baidu.com/share/link?shareid=3727305178&uk=2403808060", "des": "文件数: 1,分享时间: 2017-03-31T12:31:30, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=3727305178&uk=2403808060", "host": "pan.baidu.com", "more": null}, {"title": "死亡诗社", "link": "https://pan.baidu.com/share/link?shareid=3909670611&uk=1191341100", "des": "文件数: 1,分享时间: 2017-03-01T21:34:45, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=3909670611&uk=1191341100", "host": "pan.baidu.com", "more": null}, {"title": "死亡诗社", "link": "https://pan.baidu.com/share/link?shareid=382129627&uk=3730493845", "des": "文件数: 1,分享时间: 2016-01-04T22:25:15, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=382129627&uk=3730493845", "host": "pan.baidu.com", "more": null}, {"title": "死亡诗社", "link": "https://pan.baidu.com/share/link?shareid=4007659466&uk=3512718891", "des": "文件数: 1,分享时间: 2016-10-11T15:35:00, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=4007659466&uk=3512718891", "host": "pan.baidu.com", "more": null}, {"title": "死亡诗社", "link": "https://pan.baidu.com/share/link?shareid=3809478652&uk=3123498894", "des": "文件数: 1,分享时间: 2014-09-13T14:54:14, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=3809478652&uk=3123498894", "host": "pan.baidu.com", "more": null}, {"title": "死亡诗社", "link": "https://pan.baidu.com/share/link?shareid=43246281&uk=956619845", "des": "文件数: 1,分享时间: 2017-06-15T19:26:15, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=43246281&uk=956619845", "host": "pan.baidu.com", "more": null}, {"title": "死亡诗社", "link": "https://pan.baidu.com/share/link?shareid=1052114311&uk=2703225223", "des": "文件数: 1,分享时间: 2017-06-16T11:16:49, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=1052114311&uk=2703225223", "host": "pan.baidu.com", "more": null}, {"title": "死亡诗社", "link": "https://pan.baidu.com/share/link?shareid=3996829556&uk=5561471", "des": "文件数: 1,分享时间: 2016-08-23T22:17:04, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=3996829556&uk=5561471", "host": "pan.baidu.com", "more": null}, {"title": "死亡诗社", "link": "https://pan.baidu.com/share/link?shareid=623714176&uk=187713544", "des": "文件数: 1,分享时间: 2016-11-03T19:19:49, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=623714176&uk=187713544", "host": "pan.baidu.com", "more": null}, {"title": "死亡诗社", "link": "https://pan.baidu.com/share/link?shareid=2678621564&uk=740383300", "des": "文件数: 1,分享时间: 2015-12-02T21:27:32, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=2678621564&uk=740383300", "host": "pan.baidu.com", "more": null}], "count": 2000, "q": "死亡诗社 ", "p": "1", "runtime": 0.5, "from": "baidu"}, "listcount": 10, "description": "死亡诗社 相关信息"}


--------------------------------------------------------------------------------
/doubanmovie/data/cached_pansou/消失的爱人 .json:
--------------------------------------------------------------------------------
1 | {"list": {"data": [{"title": "消失的爱人", "link": "https://pan.baidu.com/share/link?shareid=2363510038&uk=1715898802", "des": "文件数: 1,分享时间: 2016-07-28T21:42:16, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=2363510038&uk=1715898802", "host": "pan.baidu.com", "more": null}, {"title": "消失的爱人", "link": "https://pan.baidu.com/share/link?shareid=1659801409&uk=755312366", "des": "文件数: 1,分享时间: 2014-12-12T20:16:38, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=1659801409&uk=755312366", "host": "pan.baidu.com", "more": null}, {"title": "消失爱人", "link": "https://pan.baidu.com/share/link?shareid=1858321086&uk=756635525", "des": "文件数: 1,分享时间: 2016-10-13T15:04:57, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=1858321086&uk=756635525", "host": "pan.baidu.com", "more": null}, {"title": "消失的爱人", "link": "https://pan.baidu.com/share/link?shareid=1577451999&uk=2978720897", "des": "文件数: 1,分享时间: 2016-09-16T20:06:57, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=1577451999&uk=2978720897", "host": "pan.baidu.com", "more": null}, {"title": "消失爱人", "link": "https://pan.baidu.com/share/link?shareid=1119638747&uk=327151364", "des": "文件数: 1,分享时间: 2016-08-13T20:15:59, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=1119638747&uk=327151364", "host": "pan.baidu.com", "more": null}, {"title": "消失的爱人", "link": "https://pan.baidu.com/share/link?shareid=3490537272&uk=1048844575", "des": "文件数: 1,分享时间: 2016-11-10T21:37:49, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=3490537272&uk=1048844575", "host": "pan.baidu.com", "more": null}, {"title": "★《消失的爱人》", "link": "https://pan.baidu.com/share/link?shareid=2727017479&uk=239727952", "des": "文件数: 1,分享时间: 2017-08-18T23:20:27, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=2727017479&uk=239727952", "host": "pan.baidu.com", "more": null}, {"title": "消失的爱人", "link": "https://pan.baidu.com/share/link?shareid=3817451827&uk=977174988", "des": "文件数: 1,分享时间: 2015-02-16T17:05:31, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=3817451827&uk=977174988", "host": "pan.baidu.com", "more": null}], "count": 2000, "q": "消失的爱人 ", "p": "1", "runtime": 0.5, "from": "baidu"}, "listcount": 8, "description": "消失的爱人 相关信息"}


--------------------------------------------------------------------------------
/doubanmovie/data/cached_pansou/熔炉 .json:
--------------------------------------------------------------------------------
1 | {"list": {"data": [{"title": "☆《熔炉》", "link": "https://pan.baidu.com/pcloud/album/info?uk=239727952&album_id=2283621070255866984", "des": "专辑内文件数: 1,分享时间: 2017-03-05T21:25:38, 文件大小: 1953371.857421875k", "blink": "https://pan.baidu.com/pcloud/album/info?uk=239727952&album_id=2283621070255866984", "host": "pan.baidu.com", "more": null}, {"title": "☆《熔炉》", "link": "https://pan.baidu.com/pcloud/album/info?uk=239727952&album_id=2283621070255866984", "des": "专辑内文件数: 1,分享时间: 2017-03-05T21:25:38, 文件大小: 1953371.857421875k", "blink": "https://pan.baidu.com/pcloud/album/info?uk=239727952&album_id=2283621070255866984", "host": "pan.baidu.com", "more": null}, {"title": "熔炉", "link": "https://pan.baidu.com/share/link?shareid=1817447799&uk=2537531311", "des": "文件数: 1,分享时间: 2016-07-23T21:13:27, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=1817447799&uk=2537531311", "host": "pan.baidu.com", "more": null}, {"title": "☆《熔炉》", "link": "https://pan.baidu.com/pcloud/album/info?uk=239727952&album_id=2283621070255866984", "des": "专辑内文件数: 1,分享时间: 2017-03-05T21:25:38, 文件大小: 1953371.857421875k", "blink": "https://pan.baidu.com/pcloud/album/info?uk=239727952&album_id=2283621070255866984", "host": "pan.baidu.com", "more": null}, {"title": "☆《熔炉》", "link": "https://pan.baidu.com/pcloud/album/info?uk=239727952&album_id=2283621070255866984", "des": "专辑内文件数: 1,分享时间: 2017-03-05T21:25:38, 文件大小: 1953371.857421875k", "blink": "https://pan.baidu.com/pcloud/album/info?uk=239727952&album_id=2283621070255866984", "host": "pan.baidu.com", "more": null}, {"title": "大熔炉（41集全）", "link": "https://pan.baidu.com/share/link?shareid=1564499095&uk=474715863", "des": "文件数: 1,分享时间: 2016-09-13T14:14:57, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=1564499095&uk=474715863", "host": "pan.baidu.com", "more": null}], "count": 12, "q": "熔炉 ", "p": "1", "runtime": 0.5, "from": "baidu"}, "listcount": 6, "description": "熔炉 相关信息"}


--------------------------------------------------------------------------------
/doubanmovie/data/cached_pansou/狩猎 .json:
--------------------------------------------------------------------------------
1 | {"list": {"data": [{"title": "狩猎", "link": "https://pan.baidu.com/share/link?shareid=4153909107&uk=1208518303", "des": "文件数: 1,分享时间: 2015-06-14T13:32:48, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=4153909107&uk=1208518303", "host": "pan.baidu.com", "more": null}, {"title": "狩猎", "link": "https://pan.baidu.com/share/link?shareid=3192930804&uk=1870504545", "des": "文件数: 1,分享时间: 2017-03-24T00:51:51, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=3192930804&uk=1870504545", "host": "pan.baidu.com", "more": null}, {"title": "狩猎", "link": "https://pan.baidu.com/share/link?shareid=2316863542&uk=310496391", "des": "文件数: 1,分享时间: 2016-02-16T09:53:05, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=2316863542&uk=310496391", "host": "pan.baidu.com", "more": null}, {"title": "狩猎", "link": "https://pan.baidu.com/share/link?shareid=4251650440&uk=732877314", "des": "文件数: 1,分享时间: 2017-03-07T13:05:50, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=4251650440&uk=732877314", "host": "pan.baidu.com", "more": null}, {"title": "狩猎", "link": "https://pan.baidu.com/share/link?shareid=3226211850&uk=1027596147", "des": "文件数: 1,分享时间: 2017-03-15T15:44:10, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=3226211850&uk=1027596147", "host": "pan.baidu.com", "more": null}, {"title": "狩猎", "link": "https://pan.baidu.com/share/link?shareid=929722113&uk=187713544", "des": "文件数: 1,分享时间: 2016-11-06T16:04:27, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=929722113&uk=187713544", "host": "pan.baidu.com", "more": null}, {"title": "狩猎", "link": "https://pan.baidu.com/share/link?shareid=3576029197&uk=4034501757", "des": "文件数: 1,分享时间: 2017-03-16T17:39:16, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=3576029197&uk=4034501757", "host": "pan.baidu.com", "more": null}, {"title": "狩猎", "link": "https://pan.baidu.com/share/link?shareid=925880628&uk=591494033", "des": "文件数: 1,分享时间: 2015-11-24T17:42:10, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=925880628&uk=591494033", "host": "pan.baidu.com", "more": null}, {"title": "狩猎", "link": "https://pan.baidu.com/share/link?shareid=2362771963&uk=2258135478", "des": "文件数: 1,分享时间: 2017-01-21T11:43:22, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=2362771963&uk=2258135478", "host": "pan.baidu.com", "more": null}, {"title": "《狩猎》", "link": "https://pan.baidu.com/share/link?shareid=294743764&uk=4281536321", "des": "文件数: 1,分享时间: 2015-06-13T14:53:47, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=294743764&uk=4281536321", "host": "pan.baidu.com", "more": null}], "count": 500, "q": "狩猎 ", "p": "1", "runtime": 0.5, "from": "baidu"}, "listcount": 10, "description": "狩猎 相关信息"}


--------------------------------------------------------------------------------
/doubanmovie/data/cached_pansou/狮子王 .json:
--------------------------------------------------------------------------------
1 | {"list": {"data": [{"title": "狮子王", "link": "https://pan.baidu.com/share/link?shareid=39833076&uk=609021507", "des": "文件数: 1,分享时间: 2017-08-11T09:44:56, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=39833076&uk=609021507", "host": "pan.baidu.com", "more": null}, {"title": "狮子王", "link": "https://pan.baidu.com/share/link?shareid=4217001904&uk=609021507", "des": "文件数: 1,分享时间: 2017-08-23T15:38:33, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=4217001904&uk=609021507", "host": "pan.baidu.com", "more": null}, {"title": "狮子王", "link": "https://pan.baidu.com/share/link?shareid=1027350028&uk=609021507", "des": "文件数: 1,分享时间: 2017-08-21T09:09:49, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=1027350028&uk=609021507", "host": "pan.baidu.com", "more": null}, {"title": "狮子王", "link": "https://pan.baidu.com/share/link?shareid=2617480749&uk=609021507", "des": "文件数: 1,分享时间: 2017-08-26T21:34:48, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=2617480749&uk=609021507", "host": "pan.baidu.com", "more": null}, {"title": "狮子王", "link": "https://pan.baidu.com/share/link?shareid=3342794784&uk=609021507", "des": "文件数: 1,分享时间: 2017-09-07T10:11:49, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=3342794784&uk=609021507", "host": "pan.baidu.com", "more": null}, {"title": "狮子王", "link": "https://pan.baidu.com/share/link?shareid=1857047419&uk=609021507", "des": "文件数: 1,分享时间: 2017-09-05T09:29:24, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=1857047419&uk=609021507", "host": "pan.baidu.com", "more": null}, {"title": "狮子王", "link": "https://pan.baidu.com/share/link?shareid=2013262849&uk=609021507", "des": "文件数: 1,分享时间: 2017-09-03T09:50:16, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=2013262849&uk=609021507", "host": "pan.baidu.com", "more": null}, {"title": "狮子王", "link": "https://pan.baidu.com/share/link?shareid=3346553907&uk=609021507", "des": "文件数: 1,分享时间: 2017-09-08T10:38:05, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=3346553907&uk=609021507", "host": "pan.baidu.com", "more": null}, {"title": "狮子王", "link": "https://pan.baidu.com/share/link?shareid=1666829238&uk=609021507", "des": "文件数: 1,分享时间: 2017-06-08T22:52:36, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=1666829238&uk=609021507", "host": "pan.baidu.com", "more": null}, {"title": "狮子王", "link": "https://pan.baidu.com/share/link?shareid=2209933862&uk=609021507", "des": "文件数: 1,分享时间: 2017-06-05T23:51:29, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=2209933862&uk=609021507", "host": "pan.baidu.com", "more": null}], "count": 257, "q": "狮子王 ", "p": "1", "runtime": 0.5, "from": "baidu"}, "listcount": 10, "description": "狮子王 相关信息"}


--------------------------------------------------------------------------------
/doubanmovie/data/cached_pansou/猜火车 .json:
--------------------------------------------------------------------------------
1 | {"list": {"data": [{"title": "猜火车", "link": "https://pan.baidu.com/share/link?shareid=927156313&uk=1230070530", "des": "文件数: 1,分享时间: 2017-06-14T21:12:29, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=927156313&uk=1230070530", "host": "pan.baidu.com", "more": null}, {"title": "猜火车", "link": "https://pan.baidu.com/share/link?shareid=130281073&uk=2303787462", "des": "文件数: 1,分享时间: 2017-06-15T22:10:06, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=130281073&uk=2303787462", "host": "pan.baidu.com", "more": null}, {"title": "猜火车", "link": "https://pan.baidu.com/share/link?shareid=4077759104&uk=1785947437", "des": "文件数: 1,分享时间: 2017-04-18T23:59:48, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=4077759104&uk=1785947437", "host": "pan.baidu.com", "more": null}, {"title": "猜火车", "link": "https://pan.baidu.com/share/link?shareid=2231825035&uk=3927300193", "des": "文件数: 1,分享时间: 2017-05-23T18:28:07, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=2231825035&uk=3927300193", "host": "pan.baidu.com", "more": null}, {"title": "猜火车", "link": "https://pan.baidu.com/share/link?shareid=1343462445&uk=236406674", "des": "文件数: 1,分享时间: 2017-05-31T20:39:24, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=1343462445&uk=236406674", "host": "pan.baidu.com", "more": null}, {"title": "猜火车", "link": "https://pan.baidu.com/share/link?shareid=238080704&uk=3527831162", "des": "文件数: 1,分享时间: 2016-06-17T22:42:06, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=238080704&uk=3527831162", "host": "pan.baidu.com", "more": null}, {"title": "猜火车", "link": "https://pan.baidu.com/share/link?shareid=4236323493&uk=3273970976", "des": "文件数: 1,分享时间: 2017-05-22T22:16:41, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=4236323493&uk=3273970976", "host": "pan.baidu.com", "more": null}, {"title": "猜火车", "link": "https://pan.baidu.com/share/link?shareid=2818603821&uk=727778634", "des": "文件数: 1,分享时间: 2017-05-25T14:55:29, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=2818603821&uk=727778634", "host": "pan.baidu.com", "more": null}, {"title": "猜火车", "link": "https://pan.baidu.com/share/link?shareid=177290547&uk=1434767101", "des": "文件数: 1,分享时间: 2017-06-18T18:52:37, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=177290547&uk=1434767101", "host": "pan.baidu.com", "more": null}, {"title": "猜火车", "link": "https://pan.baidu.com/share/link?shareid=3202257989&uk=191691562", "des": "文件数: 1,分享时间: 2017-05-27T21:06:27, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=3202257989&uk=191691562", "host": "pan.baidu.com", "more": null}], "count": 2000, "q": "猜火车 ", "p": "1", "runtime": 0.5, "from": "baidu"}, "listcount": 10, "description": "猜火车 相关信息"}


--------------------------------------------------------------------------------
/doubanmovie/data/cached_pansou/甜蜜蜜 .json:
--------------------------------------------------------------------------------
1 | {"list": {"data": [{"title": "甜蜜蜜", "link": "https://pan.baidu.com/share/link?shareid=2154095814&uk=3295145412", "des": "文件数: 1,分享时间: 2015-04-27T02:44:11, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=2154095814&uk=3295145412", "host": "pan.baidu.com", "more": null}, {"title": "甜蜜蜜", "link": "https://pan.baidu.com/share/link?shareid=2887733884&uk=639034247", "des": "文件数: 1,分享时间: 2015-06-03T11:33:34, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=2887733884&uk=639034247", "host": "pan.baidu.com", "more": null}, {"title": "甜蜜蜜", "link": "https://pan.baidu.com/share/link?shareid=1933959369&uk=3123498894", "des": "文件数: 1,分享时间: 2015-02-16T23:47:26, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=1933959369&uk=3123498894", "host": "pan.baidu.com", "more": null}, {"title": "甜蜜蜜", "link": "https://pan.baidu.com/share/link?shareid=2706576419&uk=503420368", "des": "文件数: 1,分享时间: 2017-04-17T09:28:23, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=2706576419&uk=503420368", "host": "pan.baidu.com", "more": null}, {"title": "甜蜜蜜", "link": "https://pan.baidu.com/share/link?shareid=2516488978&uk=3993130536", "des": "文件数: 1,分享时间: 2015-10-08T16:43:37, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=2516488978&uk=3993130536", "host": "pan.baidu.com", "more": null}, {"title": "甜蜜蜜", "link": "https://pan.baidu.com/share/link?shareid=1840956399&uk=4127742961", "des": "文件数: 1,分享时间: 2015-08-07T11:32:33, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=1840956399&uk=4127742961", "host": "pan.baidu.com", "more": null}, {"title": "甜蜜蜜", "link": "https://pan.baidu.com/share/link?shareid=998782074&uk=732877314", "des": "文件数: 1,分享时间: 2017-03-07T13:16:55, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=998782074&uk=732877314", "host": "pan.baidu.com", "more": null}, {"title": "甜蜜蜜", "link": "https://pan.baidu.com/share/link?shareid=1375211056&uk=4132288377", "des": "文件数: 1,分享时间: 2015-05-22T20:57:26, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=1375211056&uk=4132288377", "host": "pan.baidu.com", "more": null}, {"title": "甜蜜蜜", "link": "https://pan.baidu.com/share/link?shareid=1007844290&uk=3244143639", "des": "文件数: 1,分享时间: 2015-05-01T22:51:29, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=1007844290&uk=3244143639", "host": "pan.baidu.com", "more": null}, {"title": "甜蜜蜜", "link": "https://pan.baidu.com/share/link?shareid=3908555732&uk=2785527032", "des": "文件数: 1,分享时间: 2014-07-25T10:12:08, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=3908555732&uk=2785527032", "host": "pan.baidu.com", "more": null}], "count": 248, "q": "甜蜜蜜 ", "p": "1", "runtime": 0.5, "from": "baidu"}, "listcount": 10, "description": "甜蜜蜜 相关信息"}


--------------------------------------------------------------------------------
/doubanmovie/data/cached_pansou/神偷奶爸 .json:
--------------------------------------------------------------------------------
1 | {"list": {"data": [{"title": "神偷奶爸3.jpg", "link": "https://pan.baidu.com/share/link?shareid=2355218483&uk=4113435587", "des": "文件数: 1,分享时间: 2017-06-22T22:26:51, 文件大小: 115.724609375k", "blink": "https://pan.baidu.com/share/link?shareid=2355218483&uk=4113435587", "host": "pan.baidu.com", "more": null}, {"title": "神偷奶爸3.2017www.crys520.com.TS720P.mp4", "link": "https://pan.baidu.com/share/link?shareid=3313367249&uk=274421171", "des": "文件数: 1,分享时间: 2017-07-07T19:23:43, 文件大小: 410536.732421875k", "blink": "https://pan.baidu.com/share/link?shareid=3313367249&uk=274421171", "host": "pan.baidu.com", "more": null}, {"title": "神偷奶爸3.2017www.crys520.com.TS720P.mp4", "link": "https://pan.baidu.com/share/link?shareid=3427461371&uk=4135755322", "des": "文件数: 1,分享时间: 2017-07-04T19:17:54, 文件大小: 410536.732421875k", "blink": "https://pan.baidu.com/share/link?shareid=3427461371&uk=4135755322", "host": "pan.baidu.com", "more": null}, {"title": "神偷奶爸3", "link": "https://pan.baidu.com/share/link?shareid=3983479355&uk=1950209840", "des": "文件数: 1,分享时间: 2017-06-23T23:38:29, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=3983479355&uk=1950209840", "host": "pan.baidu.com", "more": null}, {"title": "神偷奶爸（1-2）", "link": "https://pan.baidu.com/share/link?shareid=286894091&uk=2703225223", "des": "文件数: 1,分享时间: 2017-07-07T16:19:20, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=286894091&uk=2703225223", "host": "pan.baidu.com", "more": null}, {"title": "神偷奶爸3", "link": "https://pan.baidu.com/share/link?shareid=680806276&uk=846430438", "des": "文件数: 1,分享时间: 2017-06-22T17:35:45, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=680806276&uk=846430438", "host": "pan.baidu.com", "more": null}, {"title": "神偷奶爸3", "link": "https://pan.baidu.com/share/link?shareid=3774972574&uk=1663767989", "des": "文件数: 1,分享时间: 2017-07-08T22:19:26, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=3774972574&uk=1663767989", "host": "pan.baidu.com", "more": null}, {"title": "神偷奶爸（1-2）", "link": "https://pan.baidu.com/share/link?shareid=1792005916&uk=3777687642", "des": "文件数: 1,分享时间: 2017-07-07T22:14:23, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=1792005916&uk=3777687642", "host": "pan.baidu.com", "more": null}], "count": 2000, "q": "神偷奶爸 ", "p": "1", "runtime": 0.5, "from": "baidu"}, "listcount": 8, "description": "神偷奶爸 相关信息"}


--------------------------------------------------------------------------------
/doubanmovie/data/cached_pansou/禁闭岛 .json:
--------------------------------------------------------------------------------
1 | {"list": {"data": [{"title": "禁闭岛", "link": "https://pan.baidu.com/share/link?shareid=3221347862&uk=3798768640", "des": "文件数: 1,分享时间: 2016-01-29T14:54:01, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=3221347862&uk=3798768640", "host": "pan.baidu.com", "more": null}, {"title": "禁闭岛", "link": "https://pan.baidu.com/share/link?shareid=1754170918&uk=3798768640", "des": "文件数: 1,分享时间: 2015-10-09T14:28:13, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=1754170918&uk=3798768640", "host": "pan.baidu.com", "more": null}, {"title": "禁闭岛", "link": "https://pan.baidu.com/share/link?shareid=1314811464&uk=2373288888", "des": "文件数: 1,分享时间: 2017-06-02T07:44:14, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=1314811464&uk=2373288888", "host": "pan.baidu.com", "more": null}, {"title": "禁闭岛", "link": "https://pan.baidu.com/share/link?shareid=1953135446&uk=1059862740", "des": "文件数: 1,分享时间: 2014-03-04T12:43:37, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=1953135446&uk=1059862740", "host": "pan.baidu.com", "more": null}, {"title": "禁闭岛", "link": "https://pan.baidu.com/share/link?shareid=2585195763&uk=2588099636", "des": "文件数: 1,分享时间: 2015-05-21T12:31:36, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=2585195763&uk=2588099636", "host": "pan.baidu.com", "more": null}, {"title": "禁闭岛", "link": "https://pan.baidu.com/share/link?shareid=238865317&uk=609021507", "des": "文件数: 1,分享时间: 2017-08-12T10:55:13, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=238865317&uk=609021507", "host": "pan.baidu.com", "more": null}, {"title": "禁闭岛", "link": "https://pan.baidu.com/share/link?shareid=435902664&uk=609021507", "des": "文件数: 1,分享时间: 2017-08-16T08:49:56, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=435902664&uk=609021507", "host": "pan.baidu.com", "more": null}, {"title": "禁闭岛", "link": "https://pan.baidu.com/share/link?shareid=3487113811&uk=609021507", "des": "文件数: 1,分享时间: 2017-08-21T08:57:06, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=3487113811&uk=609021507", "host": "pan.baidu.com", "more": null}, {"title": "禁闭岛", "link": "https://pan.baidu.com/share/link?shareid=4126350759&uk=609021507", "des": "文件数: 1,分享时间: 2017-08-26T21:21:19, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=4126350759&uk=609021507", "host": "pan.baidu.com", "more": null}, {"title": "禁闭岛", "link": "https://pan.baidu.com/share/link?shareid=2392946760&uk=609021507", "des": "文件数: 1,分享时间: 2017-09-15T11:03:02, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=2392946760&uk=609021507", "host": "pan.baidu.com", "more": null}], "count": 2000, "q": "禁闭岛 ", "p": "1", "runtime": 0.5, "from": "baidu"}, "listcount": 10, "description": "禁闭岛 相关信息"}


--------------------------------------------------------------------------------
/doubanmovie/data/cached_pansou/素媛 .json:
--------------------------------------------------------------------------------
1 | {"list": {"data": [{"title": "素媛", "link": "https://pan.baidu.com/share/link?shareid=1554614055&uk=1733885966", "des": "文件数: 1,分享时间: 2015-07-26T00:29:07, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=1554614055&uk=1733885966", "host": "pan.baidu.com", "more": null}, {"title": "素媛", "link": "https://pan.baidu.com/share/link?shareid=1867188809&uk=3818945738", "des": "文件数: 1,分享时间: 2016-10-18T22:20:49, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=1867188809&uk=3818945738", "host": "pan.baidu.com", "more": null}, {"title": "素  媛", "link": "https://pan.baidu.com/share/link?shareid=3892511196&uk=2701350792", "des": "文件数: 1,分享时间: 2016-05-04T10:21:08, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=3892511196&uk=2701350792", "host": "pan.baidu.com", "more": null}, {"title": "素媛", "link": "https://pan.baidu.com/share/link?shareid=412622509&uk=4199993737", "des": "文件数: 1,分享时间: 2017-03-01T22:51:38, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=412622509&uk=4199993737", "host": "pan.baidu.com", "more": null}, {"title": "素媛", "link": "https://pan.baidu.com/share/link?shareid=3476917953&uk=1396576493", "des": "文件数: 1,分享时间: 2016-08-16T14:36:08, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=3476917953&uk=1396576493", "host": "pan.baidu.com", "more": null}, {"title": "素  媛", "link": "https://pan.baidu.com/share/link?shareid=241545913&uk=410113070", "des": "文件数: 1,分享时间: 2017-01-16T08:23:07, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=241545913&uk=410113070", "host": "pan.baidu.com", "more": null}, {"title": "素媛", "link": "https://pan.baidu.com/share/link?shareid=4233566459&uk=1094277376", "des": "文件数: 1,分享时间: 2016-07-18T10:20:55, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=4233566459&uk=1094277376", "host": "pan.baidu.com", "more": null}, {"title": "素媛", "link": "https://pan.baidu.com/share/link?shareid=3546925539&uk=1870504545", "des": "文件数: 1,分享时间: 2017-03-24T00:48:09, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=3546925539&uk=1870504545", "host": "pan.baidu.com", "more": null}, {"title": "素媛", "link": "https://pan.baidu.com/share/link?shareid=665191032&uk=3647220151", "des": "文件数: 1,分享时间: 2015-12-13T14:20:11, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=665191032&uk=3647220151", "host": "pan.baidu.com", "more": null}, {"title": "素~媛", "link": "https://pan.baidu.com/share/link?shareid=58279743&uk=1731503722", "des": "文件数: 1,分享时间: 2016-06-09T12:40:06, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=58279743&uk=1731503722", "host": "pan.baidu.com", "more": null}], "count": 2000, "q": "素媛 ", "p": "1", "runtime": 0.5, "from": "baidu"}, "listcount": 10, "description": "素媛 相关信息"}


--------------------------------------------------------------------------------
/doubanmovie/data/cached_pansou/红辣椒 .json:
--------------------------------------------------------------------------------
1 | {"list": {"data": [{"title": "红辣椒", "link": "https://pan.baidu.com/share/link?shareid=1043860283&uk=3277541605", "des": "文件数: 1,分享时间: 2017-05-22T08:25:09, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=1043860283&uk=3277541605", "host": "pan.baidu.com", "more": null}, {"title": "红辣椒", "link": "https://pan.baidu.com/share/link?shareid=455372541&uk=1296098201", "des": "文件数: 1,分享时间: 2016-12-18T17:25:19, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=455372541&uk=1296098201", "host": "pan.baidu.com", "more": null}, {"title": "红辣椒", "link": "https://pan.baidu.com/share/link?shareid=3052892069&uk=591494033", "des": "文件数: 1,分享时间: 2015-11-26T11:24:09, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=3052892069&uk=591494033", "host": "pan.baidu.com", "more": null}, {"title": "红辣椒", "link": "https://pan.baidu.com/share/link?shareid=140210138&uk=624035590", "des": "文件数: 1,分享时间: 2016-09-16T18:24:13, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=140210138&uk=624035590", "host": "pan.baidu.com", "more": null}, {"title": "红辣椒", "link": "https://pan.baidu.com/share/link?shareid=3437813556&uk=727778634", "des": "文件数: 1,分享时间: 2016-12-06T16:00:32, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=3437813556&uk=727778634", "host": "pan.baidu.com", "more": null}, {"title": "红辣椒", "link": "https://pan.baidu.com/share/link?shareid=2941670640&uk=2384719941", "des": "文件数: 1,分享时间: 2014-09-11T12:46:17, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=2941670640&uk=2384719941", "host": "pan.baidu.com", "more": null}, {"title": "红辣椒", "link": "https://pan.baidu.com/share/link?shareid=2255786821&uk=1057283767", "des": "文件数: 1,分享时间: 2017-01-05T18:44:24, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=2255786821&uk=1057283767", "host": "pan.baidu.com", "more": null}, {"title": "红辣椒", "link": "https://pan.baidu.com/share/link?shareid=1040234356&uk=1057283767", "des": "文件数: 1,分享时间: 2017-01-05T11:33:41, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=1040234356&uk=1057283767", "host": "pan.baidu.com", "more": null}, {"title": "红辣椒", "link": "https://pan.baidu.com/share/link?shareid=3839065162&uk=2303787462", "des": "文件数: 1,分享时间: 2016-12-20T19:07:24, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=3839065162&uk=2303787462", "host": "pan.baidu.com", "more": null}, {"title": "红辣椒", "link": "https://pan.baidu.com/share/link?shareid=845204644&uk=1619475041", "des": "文件数: 1,分享时间: 2017-05-24T08:17:57, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=845204644&uk=1619475041", "host": "pan.baidu.com", "more": null}], "count": 286, "q": "红辣椒 ", "p": "1", "runtime": 0.5, "from": "baidu"}, "listcount": 10, "description": "红辣椒 相关信息"}


--------------------------------------------------------------------------------
/doubanmovie/data/cached_pansou/荒岛余生 .json:
--------------------------------------------------------------------------------
1 | {"list": {"data": [{"title": "荒岛余生", "link": "https://pan.baidu.com/share/link?shareid=1546312720&uk=176335829", "des": "文件数: 1,分享时间: 2016-10-08T17:06:06, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=1546312720&uk=176335829", "host": "pan.baidu.com", "more": null}, {"title": "荒岛余生", "link": "https://pan.baidu.com/share/link?shareid=3899080969&uk=609021507", "des": "文件数: 1,分享时间: 2017-08-26T21:17:00, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=3899080969&uk=609021507", "host": "pan.baidu.com", "more": null}, {"title": "荒岛余生", "link": "https://pan.baidu.com/share/link?shareid=142744856&uk=609021507", "des": "文件数: 1,分享时间: 2017-09-04T13:09:28, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=142744856&uk=609021507", "host": "pan.baidu.com", "more": null}, {"title": "荒岛余生", "link": "https://pan.baidu.com/share/link?shareid=1593612154&uk=609021507", "des": "文件数: 1,分享时间: 2017-09-14T10:23:51, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=1593612154&uk=609021507", "host": "pan.baidu.com", "more": null}, {"title": "荒岛余生", "link": "https://pan.baidu.com/share/link?shareid=1000327473&uk=609021507", "des": "文件数: 1,分享时间: 2017-09-11T18:59:13, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=1000327473&uk=609021507", "host": "pan.baidu.com", "more": null}, {"title": "荒岛余生", "link": "https://pan.baidu.com/share/link?shareid=2034049928&uk=609021507", "des": "文件数: 1,分享时间: 2017-09-12T09:11:21, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=2034049928&uk=609021507", "host": "pan.baidu.com", "more": null}, {"title": "荒岛余生", "link": "https://pan.baidu.com/share/link?shareid=95799309&uk=609021507", "des": "文件数: 1,分享时间: 2017-09-08T10:23:12, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=95799309&uk=609021507", "host": "pan.baidu.com", "more": null}, {"title": "荒岛余生", "link": "https://pan.baidu.com/share/link?shareid=2691056461&uk=609021507", "des": "文件数: 1,分享时间: 2017-09-18T09:41:22, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=2691056461&uk=609021507", "host": "pan.baidu.com", "more": null}, {"title": "荒岛余生", "link": "https://pan.baidu.com/share/link?shareid=1098881695&uk=609021507", "des": "文件数: 1,分享时间: 2017-06-07T22:44:26, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=1098881695&uk=609021507", "host": "pan.baidu.com", "more": null}, {"title": "荒岛余生", "link": "https://pan.baidu.com/share/link?shareid=3364586732&uk=609021507", "des": "文件数: 1,分享时间: 2017-06-13T18:51:49, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=3364586732&uk=609021507", "host": "pan.baidu.com", "more": null}], "count": 810, "q": "荒岛余生 ", "p": "1", "runtime": 0.5, "from": "baidu"}, "listcount": 10, "description": "荒岛余生 相关信息"}


--------------------------------------------------------------------------------
/doubanmovie/data/cached_pansou/蝴蝶 .json:
--------------------------------------------------------------------------------
1 | {"list": {"data": [{"title": "蝴蝶", "link": "https://pan.baidu.com/share/link?shareid=1849672524&uk=2200117301", "des": "文件数: 1,分享时间: 2014-12-26T18:51:27, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=1849672524&uk=2200117301", "host": "pan.baidu.com", "more": null}, {"title": "蝴蝶", "link": "https://pan.baidu.com/share/link?shareid=3888540241&uk=3819391060", "des": "文件数: 1,分享时间: 2016-12-09T22:09:17, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=3888540241&uk=3819391060", "host": "pan.baidu.com", "more": null}, {"title": "蝴蝶", "link": "https://pan.baidu.com/share/link?shareid=114199422&uk=4051721367", "des": "文件数: 1,分享时间: 2016-01-24T18:53:43, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=114199422&uk=4051721367", "host": "pan.baidu.com", "more": null}, {"title": "蝴蝶", "link": "https://pan.baidu.com/share/link?shareid=386756287&uk=3993130536", "des": "文件数: 1,分享时间: 2015-10-09T22:36:09, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=386756287&uk=3993130536", "host": "pan.baidu.com", "more": null}, {"title": "蝴蝶", "link": "https://pan.baidu.com/share/link?shareid=943712282&uk=675111558", "des": "文件数: 1,分享时间: 2016-08-29T23:04:38, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=943712282&uk=675111558", "host": "pan.baidu.com", "more": null}, {"title": "蝴蝶", "link": "https://pan.baidu.com/share/link?shareid=1093344399&uk=3542931456", "des": "文件数: 1,分享时间: 2015-02-12T22:00:42, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=1093344399&uk=3542931456", "host": "pan.baidu.com", "more": null}, {"title": "蝴蝶", "link": "https://pan.baidu.com/share/link?shareid=1250591644&uk=1459987268", "des": "文件数: 1,分享时间: 2016-06-30T09:14:27, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=1250591644&uk=1459987268", "host": "pan.baidu.com", "more": null}, {"title": "蝴蝶", "link": "https://pan.baidu.com/share/link?shareid=16763128&uk=3849557806", "des": "文件数: 1,分享时间: 2015-09-06T14:14:01, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=16763128&uk=3849557806", "host": "pan.baidu.com", "more": null}, {"title": "蝴蝶", "link": "https://pan.baidu.com/share/link?shareid=2723737423&uk=3157223751", "des": "文件数: 1,分享时间: 2013-11-13T18:22:26, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=2723737423&uk=3157223751", "host": "pan.baidu.com", "more": null}, {"title": "蝴蝶", "link": "https://pan.baidu.com/share/link?shareid=4217470321&uk=2938990077", "des": "文件数: 1,分享时间: 2017-03-28T02:09:30, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=4217470321&uk=2938990077", "host": "pan.baidu.com", "more": null}], "count": 2000, "q": "蝴蝶 ", "p": "1", "runtime": 0.5, "from": "baidu"}, "listcount": 10, "description": "蝴蝶 相关信息"}


--------------------------------------------------------------------------------
/doubanmovie/data/cached_pansou/血钻 .json:
--------------------------------------------------------------------------------
1 | {"list": {"data": [{"title": "血钻", "link": "https://pan.baidu.com/share/link?shareid=2605659590&uk=740871509", "des": "文件数: 1,分享时间: 2016-10-05T16:25:40, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=2605659590&uk=740871509", "host": "pan.baidu.com", "more": null}, {"title": "血钻", "link": "https://pan.baidu.com/share/link?shareid=3394521028&uk=925544842", "des": "文件数: 1,分享时间: 2016-07-26T17:25:34, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=3394521028&uk=925544842", "host": "pan.baidu.com", "more": null}, {"title": "血钻", "link": "https://pan.baidu.com/share/link?shareid=2070888133&uk=609021507", "des": "文件数: 1,分享时间: 2017-08-22T11:34:52, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=2070888133&uk=609021507", "host": "pan.baidu.com", "more": null}, {"title": "血钻", "link": "https://pan.baidu.com/share/link?shareid=4225105640&uk=609021507", "des": "文件数: 1,分享时间: 2017-08-09T11:58:24, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=4225105640&uk=609021507", "host": "pan.baidu.com", "more": null}, {"title": "血钻", "link": "https://pan.baidu.com/share/link?shareid=3792085428&uk=609021507", "des": "文件数: 1,分享时间: 2017-08-18T09:23:02, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=3792085428&uk=609021507", "host": "pan.baidu.com", "more": null}, {"title": "血钻", "link": "https://pan.baidu.com/share/link?shareid=4126600303&uk=609021507", "des": "文件数: 1,分享时间: 2017-08-31T15:29:03, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=4126600303&uk=609021507", "host": "pan.baidu.com", "more": null}, {"title": "血钻", "link": "https://pan.baidu.com/share/link?shareid=525930478&uk=609021507", "des": "文件数: 1,分享时间: 2017-09-14T10:47:43, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=525930478&uk=609021507", "host": "pan.baidu.com", "more": null}, {"title": "血钻", "link": "https://pan.baidu.com/share/link?shareid=4142596758&uk=609021507", "des": "文件数: 1,分享时间: 2017-06-08T22:57:52, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=4142596758&uk=609021507", "host": "pan.baidu.com", "more": null}, {"title": "血钻", "link": "https://pan.baidu.com/share/link?shareid=687994999&uk=609021507", "des": "文件数: 1,分享时间: 2017-06-01T12:43:07, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=687994999&uk=609021507", "host": "pan.baidu.com", "more": null}, {"title": "血钻", "link": "https://pan.baidu.com/share/link?shareid=3086354255&uk=609021507", "des": "文件数: 1,分享时间: 2017-05-31T23:33:03, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=3086354255&uk=609021507", "host": "pan.baidu.com", "more": null}], "count": 2000, "q": "血钻 ", "p": "1", "runtime": 0.5, "from": "baidu"}, "listcount": 10, "description": "血钻 相关信息"}


--------------------------------------------------------------------------------
/doubanmovie/data/cached_pansou/让子弹飞 .json:
--------------------------------------------------------------------------------
1 | {"list": {"data": [{"title": "让子弹飞", "link": "https://pan.baidu.com/share/link?shareid=1678312694&uk=609021507", "des": "文件数: 1,分享时间: 2017-08-07T08:54:50, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=1678312694&uk=609021507", "host": "pan.baidu.com", "more": null}, {"title": "让子弹飞", "link": "https://pan.baidu.com/share/link?shareid=2215525919&uk=609021507", "des": "文件数: 1,分享时间: 2017-08-25T00:00:07, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=2215525919&uk=609021507", "host": "pan.baidu.com", "more": null}, {"title": "让子弹飞", "link": "https://pan.baidu.com/share/link?shareid=4282197672&uk=609021507", "des": "文件数: 1,分享时间: 2017-08-26T21:30:25, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=4282197672&uk=609021507", "host": "pan.baidu.com", "more": null}, {"title": "让子弹飞", "link": "https://pan.baidu.com/share/link?shareid=2996054638&uk=609021507", "des": "文件数: 1,分享时间: 2017-09-06T09:29:19, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=2996054638&uk=609021507", "host": "pan.baidu.com", "more": null}, {"title": "让子弹飞", "link": "https://pan.baidu.com/share/link?shareid=192188661&uk=609021507", "des": "文件数: 1,分享时间: 2017-09-02T11:36:46, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=192188661&uk=609021507", "host": "pan.baidu.com", "more": null}, {"title": "让子弹飞", "link": "https://pan.baidu.com/share/link?shareid=317481197&uk=609021507", "des": "文件数: 1,分享时间: 2017-05-28T12:27:46, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=317481197&uk=609021507", "host": "pan.baidu.com", "more": null}, {"title": "让子弹飞", "link": "https://pan.baidu.com/share/link?shareid=608174175&uk=609021507", "des": "文件数: 1,分享时间: 2017-06-17T10:52:33, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=608174175&uk=609021507", "host": "pan.baidu.com", "more": null}, {"title": "让子弹飞", "link": "https://pan.baidu.com/share/link?shareid=1106147397&uk=609021507", "des": "文件数: 1,分享时间: 2017-06-07T22:50:53, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=1106147397&uk=609021507", "host": "pan.baidu.com", "more": null}, {"title": "让子弹飞", "link": "https://pan.baidu.com/share/link?shareid=609172849&uk=609021507", "des": "文件数: 1,分享时间: 2017-08-15T10:02:11, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=609172849&uk=609021507", "host": "pan.baidu.com", "more": null}, {"title": "让子弹飞", "link": "https://pan.baidu.com/share/link?shareid=666935341&uk=609021507", "des": "文件数: 1,分享时间: 2017-08-17T09:06:21, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=666935341&uk=609021507", "host": "pan.baidu.com", "more": null}], "count": 2000, "q": "让子弹飞 ", "p": "1", "runtime": 0.5, "from": "baidu"}, "listcount": 10, "description": "让子弹飞 相关信息"}


--------------------------------------------------------------------------------
/doubanmovie/data/cached_pansou/谍影重重2 .json:
--------------------------------------------------------------------------------
1 | {"list": {"data": [{"title": "谍影重重", "link": "https://pan.baidu.com/share/link?shareid=144453654&uk=3980756933", "des": "文件数: 1,分享时间: 2017-02-14T03:26:55, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=144453654&uk=3980756933", "host": "pan.baidu.com", "more": null}, {"title": "谍丨影重重", "link": "https://pan.baidu.com/share/link?shareid=2799879487&uk=1796201939", "des": "文件数: 1,分享时间: 2017-06-01T16:20:23, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=2799879487&uk=1796201939", "host": "pan.baidu.com", "more": null}, {"title": "谍影重重", "link": "https://pan.baidu.com/pcloud/album/info?uk=442052191&album_id=8783206031530367527", "des": "专辑内文件数: 3,分享时间: 2016-03-18T10:07:00, 文件大小: 5925812.6728515625k", "blink": "https://pan.baidu.com/pcloud/album/info?uk=442052191&album_id=8783206031530367527", "host": "pan.baidu.com", "more": null}, {"title": "★《谍影重重1-5》", "link": "https://pan.baidu.com/share/link?shareid=2874673942&uk=239727952", "des": "文件数: 1,分享时间: 2017-08-18T16:32:39, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=2874673942&uk=239727952", "host": "pan.baidu.com", "more": null}, {"title": "谍丨影重重", "link": "https://pan.baidu.com/share/link?shareid=2078328530&uk=1330472352", "des": "文件数: 1,分享时间: 2017-09-06T07:21:33, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=2078328530&uk=1330472352", "host": "pan.baidu.com", "more": null}, {"title": "谍丨影重重", "link": "https://pan.baidu.com/share/link?shareid=2243744029&uk=2091055519", "des": "文件数: 1,分享时间: 2017-05-26T00:40:47, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=2243744029&uk=2091055519", "host": "pan.baidu.com", "more": null}, {"title": "谍影重重5.torrent", "link": "https://pan.baidu.com/share/link?shareid=172947738&uk=219081398", "des": "文件数: 1,分享时间: 2017-02-15T15:23:50, 文件大小: 82.0595703125k", "blink": "https://pan.baidu.com/share/link?shareid=172947738&uk=219081398", "host": "pan.baidu.com", "more": null}], "count": 2000, "q": "谍影重重2 ", "p": "1", "runtime": 0.5, "from": "baidu"}, "listcount": 7, "description": "谍影重重2 相关信息"}


--------------------------------------------------------------------------------
/doubanmovie/data/cached_pansou/谍影重重3 .json:
--------------------------------------------------------------------------------
1 | {"list": {"data": [{"title": "谍影重重", "link": "https://pan.baidu.com/share/link?shareid=144453654&uk=3980756933", "des": "文件数: 1,分享时间: 2017-02-14T03:26:55, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=144453654&uk=3980756933", "host": "pan.baidu.com", "more": null}, {"title": "谍丨影重重", "link": "https://pan.baidu.com/share/link?shareid=2799879487&uk=1796201939", "des": "文件数: 1,分享时间: 2017-06-01T16:20:23, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=2799879487&uk=1796201939", "host": "pan.baidu.com", "more": null}, {"title": "谍影重重", "link": "https://pan.baidu.com/pcloud/album/info?uk=442052191&album_id=8783206031530367527", "des": "专辑内文件数: 3,分享时间: 2016-03-18T10:07:00, 文件大小: 5925812.6728515625k", "blink": "https://pan.baidu.com/pcloud/album/info?uk=442052191&album_id=8783206031530367527", "host": "pan.baidu.com", "more": null}, {"title": "★《谍影重重1-5》", "link": "https://pan.baidu.com/share/link?shareid=2874673942&uk=239727952", "des": "文件数: 1,分享时间: 2017-08-18T16:32:39, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=2874673942&uk=239727952", "host": "pan.baidu.com", "more": null}, {"title": "谍丨影重重", "link": "https://pan.baidu.com/share/link?shareid=2078328530&uk=1330472352", "des": "文件数: 1,分享时间: 2017-09-06T07:21:33, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=2078328530&uk=1330472352", "host": "pan.baidu.com", "more": null}, {"title": "谍丨影重重", "link": "https://pan.baidu.com/share/link?shareid=2243744029&uk=2091055519", "des": "文件数: 1,分享时间: 2017-05-26T00:40:47, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=2243744029&uk=2091055519", "host": "pan.baidu.com", "more": null}, {"title": "谍影重重5.torrent", "link": "https://pan.baidu.com/share/link?shareid=172947738&uk=219081398", "des": "文件数: 1,分享时间: 2017-02-15T15:23:50, 文件大小: 82.0595703125k", "blink": "https://pan.baidu.com/share/link?shareid=172947738&uk=219081398", "host": "pan.baidu.com", "more": null}], "count": 2000, "q": "谍影重重3 ", "p": "1", "runtime": 0.5, "from": "baidu"}, "listcount": 7, "description": "谍影重重3 相关信息"}


--------------------------------------------------------------------------------
/doubanmovie/data/cached_pansou/谍影重重 .json:
--------------------------------------------------------------------------------
1 | {"list": {"data": [{"title": "谍影重重", "link": "https://pan.baidu.com/share/link?shareid=144453654&uk=3980756933", "des": "文件数: 1,分享时间: 2017-02-14T03:26:55, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=144453654&uk=3980756933", "host": "pan.baidu.com", "more": null}, {"title": "谍丨影重重", "link": "https://pan.baidu.com/share/link?shareid=2799879487&uk=1796201939", "des": "文件数: 1,分享时间: 2017-06-01T16:20:23, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=2799879487&uk=1796201939", "host": "pan.baidu.com", "more": null}, {"title": "谍影重重", "link": "https://pan.baidu.com/pcloud/album/info?uk=442052191&album_id=8783206031530367527", "des": "专辑内文件数: 3,分享时间: 2016-03-18T10:07:00, 文件大小: 5925812.6728515625k", "blink": "https://pan.baidu.com/pcloud/album/info?uk=442052191&album_id=8783206031530367527", "host": "pan.baidu.com", "more": null}, {"title": "★《谍影重重1-5》", "link": "https://pan.baidu.com/share/link?shareid=2874673942&uk=239727952", "des": "文件数: 1,分享时间: 2017-08-18T16:32:39, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=2874673942&uk=239727952", "host": "pan.baidu.com", "more": null}, {"title": "谍丨影重重", "link": "https://pan.baidu.com/share/link?shareid=2078328530&uk=1330472352", "des": "文件数: 1,分享时间: 2017-09-06T07:21:33, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=2078328530&uk=1330472352", "host": "pan.baidu.com", "more": null}, {"title": "谍丨影重重", "link": "https://pan.baidu.com/share/link?shareid=2243744029&uk=2091055519", "des": "文件数: 1,分享时间: 2017-05-26T00:40:47, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=2243744029&uk=2091055519", "host": "pan.baidu.com", "more": null}, {"title": "谍影重重5.torrent", "link": "https://pan.baidu.com/share/link?shareid=172947738&uk=219081398", "des": "文件数: 1,分享时间: 2017-02-15T15:23:50, 文件大小: 82.0595703125k", "blink": "https://pan.baidu.com/share/link?shareid=172947738&uk=219081398", "host": "pan.baidu.com", "more": null}], "count": 2000, "q": "谍影重重 ", "p": "1", "runtime": 0.5, "from": "baidu"}, "listcount": 7, "description": "谍影重重 相关信息"}


--------------------------------------------------------------------------------
/doubanmovie/data/cached_pansou/辛德勒的名单 .json:
--------------------------------------------------------------------------------
1 | {"list": {"data": [{"title": "辛德勒的名单。", "link": "https://pan.baidu.com/share/link?shareid=601527744&uk=3106137638", "des": "文件数: 1,分享时间: 2015-12-08T14:51:17, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=601527744&uk=3106137638", "host": "pan.baidu.com", "more": null}, {"title": "辛德勒名单", "link": "https://pan.baidu.com/share/link?shareid=2152393128&uk=1026789719", "des": "文件数: 1,分享时间: 2016-01-23T12:45:56, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=2152393128&uk=1026789719", "host": "pan.baidu.com", "more": null}, {"title": "辛德勒名单", "link": "https://pan.baidu.com/share/link?shareid=76126071&uk=441722153", "des": "文件数: 1,分享时间: 2016-10-26T22:06:57, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=76126071&uk=441722153", "host": "pan.baidu.com", "more": null}, {"title": "辛德勒名单", "link": "https://pan.baidu.com/share/link?shareid=4147947656&uk=2768931742", "des": "文件数: 1,分享时间: 2017-04-22T21:09:34, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=4147947656&uk=2768931742", "host": "pan.baidu.com", "more": null}, {"title": "【美国】辛德勒名单.rmvb", "link": "https://pan.baidu.com/share/link?shareid=257364283&uk=1430954013", "des": "文件数: 1,分享时间: 2015-07-14T10:28:52, 文件大小: 704213.416015625k", "blink": "https://pan.baidu.com/share/link?shareid=257364283&uk=1430954013", "host": "pan.baidu.com", "more": null}, {"title": "辛德勒名单.mkv", "link": "https://pan.baidu.com/share/link?shareid=300218529&uk=587841182", "des": "文件数: 1,分享时间: 2017-02-17T21:49:25, 文件大小: 2793534.0146484375k", "blink": "https://pan.baidu.com/share/link?shareid=300218529&uk=587841182", "host": "pan.baidu.com", "more": null}, {"title": "辛德勒名单.mkv", "link": "https://pan.baidu.com/share/link?shareid=253067002&uk=271449358", "des": "文件数: 1,分享时间: 2016-11-05T21:02:19, 文件大小: 2793534.0146484375k", "blink": "https://pan.baidu.com/share/link?shareid=253067002&uk=271449358", "host": "pan.baidu.com", "more": null}, {"title": "辛德勒的名单.Schindlers List.1993", "link": "https://pan.baidu.com/share/link?shareid=3741994528&uk=306474339", "des": "文件数: 1,分享时间: 2017-04-10T07:36:05, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=3741994528&uk=306474339", "host": "pan.baidu.com", "more": null}, {"title": "IMDbTOP250.NO.8.辛德勒名单 .mkv", "link": "https://pan.baidu.com/share/link?shareid=1623461302&uk=1093518467", "des": "文件数: 1,分享时间: 2016-08-10T21:45:51, 文件大小: 2793534.0146484375k", "blink": "https://pan.baidu.com/share/link?shareid=1623461302&uk=1093518467", "host": "pan.baidu.com", "more": null}], "count": 1353, "q": "辛德勒的名单 ", "p": "1", "runtime": 0.5, "from": "baidu"}, "listcount": 9, "description": "辛德勒的名单 相关信息"}


--------------------------------------------------------------------------------
/doubanmovie/data/cached_pansou/阳光灿烂的日子 .json:
--------------------------------------------------------------------------------
1 | {"list": {"data": [{"title": "79 阳光灿烂d日子", "link": "https://pan.baidu.com/share/link?shareid=2134897303&uk=1084372931", "des": "文件数: 1,分享时间: 2016-12-24T14:09:19, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=2134897303&uk=1084372931", "host": "pan.baidu.com", "more": null}, {"title": "阳光灿烂.avi", "link": "https://pan.baidu.com/share/link?shareid=3063746573&uk=1631372742", "des": "文件数: 1,分享时间: 2016-11-28T14:22:01, 文件大小: 1179606.1484375k", "blink": "https://pan.baidu.com/share/link?shareid=3063746573&uk=1631372742", "host": "pan.baidu.com", "more": null}, {"title": "〖-f-〗《阳光灿烂的日子》.In.the.Heat.of.the.Sun2004.DVDrip.x264.AC3-CMCT《之善寻正》.torrent", "link": "https://pan.baidu.com/share/link?shareid=3377085152&uk=239727952", "des": "文件数: 1,分享时间: 2016-12-27T21:37:59, 文件大小: 49.185546875k", "blink": "https://pan.baidu.com/share/link?shareid=3377085152&uk=239727952", "host": "pan.baidu.com", "more": null}, {"title": "19940125_今夜阳光灿烂", "link": "https://pan.baidu.com/share/link?shareid=1947195049&uk=1346119633", "des": "文件数: 1,分享时间: 2017-01-19T13:42:20, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=1947195049&uk=1346119633", "host": "pan.baidu.com", "more": null}, {"title": "费城永远阳光灿烂.Its.Always.Sunny.in.Philadelphia.S10E08.2015.HDTV.MiniSD-TLF.mkv等", "link": "https://pan.baidu.com/share/link?shareid=2854204889&uk=542310220", "des": "文件数: 6,分享时间: 2017-03-07T21:11:25, 文件大小: 1206457.3837890625k", "blink": "https://pan.baidu.com/share/link?shareid=2854204889&uk=542310220", "host": "pan.baidu.com", "more": null}, {"title": "让心灵阳光灿烂.pdf", "link": "https://pan.baidu.com/share/link?shareid=965101348&uk=941414814", "des": "文件数: 1,分享时间: 2014-01-05T06:07:05, 文件大小: 54.1328125k", "blink": "https://pan.baidu.com/share/link?shareid=965101348&uk=941414814", "host": "pan.baidu.com", "more": null}], "count": 2000, "q": "阳光灿烂的日子 ", "p": "1", "runtime": 0.5, "from": "baidu"}, "listcount": 6, "description": "阳光灿烂的日子 相关信息"}


--------------------------------------------------------------------------------
/doubanmovie/data/cached_pansou/雨人 .json:
--------------------------------------------------------------------------------
1 | {"list": {"data": [{"title": "雨人", "link": "https://pan.baidu.com/share/link?shareid=2812274482&uk=3849557806", "des": "文件数: 1,分享时间: 2015-08-30T22:58:20, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=2812274482&uk=3849557806", "host": "pan.baidu.com", "more": null}, {"title": "雨人", "link": "https://pan.baidu.com/share/link?shareid=793081276&uk=3998475506", "des": "文件数: 1,分享时间: 2016-02-12T18:08:19, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=793081276&uk=3998475506", "host": "pan.baidu.com", "more": null}, {"title": "雨人", "link": "https://pan.baidu.com/share/link?shareid=2641151441&uk=3764406524", "des": "文件数: 1,分享时间: 2016-06-11T10:30:55, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=2641151441&uk=3764406524", "host": "pan.baidu.com", "more": null}, {"title": "雨人", "link": "https://pan.baidu.com/share/link?shareid=1919521195&uk=1733885966", "des": "文件数: 1,分享时间: 2015-07-25T13:34:07, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=1919521195&uk=1733885966", "host": "pan.baidu.com", "more": null}, {"title": "雨人", "link": "https://pan.baidu.com/share/link?shareid=4063364495&uk=453039291", "des": "文件数: 1,分享时间: 2013-09-02T20:10:40, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=4063364495&uk=453039291", "host": "pan.baidu.com", "more": null}, {"title": "雨人", "link": "https://pan.baidu.com/share/link?shareid=548944186&uk=3993130536", "des": "文件数: 1,分享时间: 2015-10-08T23:11:31, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=548944186&uk=3993130536", "host": "pan.baidu.com", "more": null}, {"title": "雨人", "link": "https://pan.baidu.com/share/link?shareid=1833586273&uk=1441773", "des": "文件数: 1,分享时间: 2013-09-29T22:09:34, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=1833586273&uk=1441773", "host": "pan.baidu.com", "more": null}, {"title": "雨人", "link": "https://pan.baidu.com/share/link?shareid=1942606523&uk=1733885966", "des": "文件数: 1,分享时间: 2015-07-25T13:37:58, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=1942606523&uk=1733885966", "host": "pan.baidu.com", "more": null}, {"title": "雨人", "link": "https://pan.baidu.com/share/link?shareid=937189341&uk=1902651040", "des": "文件数: 1,分享时间: 2016-03-20T13:23:36, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=937189341&uk=1902651040", "host": "pan.baidu.com", "more": null}, {"title": "雨人", "link": "https://pan.baidu.com/share/link?shareid=4102932210&uk=4115559669", "des": "文件数: 1,分享时间: 2016-09-28T20:29:33, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=4102932210&uk=4115559669", "host": "pan.baidu.com", "more": null}], "count": 2000, "q": "雨人 ", "p": "1", "runtime": 0.5, "from": "baidu"}, "listcount": 10, "description": "雨人 相关信息"}


--------------------------------------------------------------------------------
/doubanmovie/data/cached_pansou/香水 .json:
--------------------------------------------------------------------------------
1 | {"list": {"data": [{"title": "香水", "link": "https://pan.baidu.com/share/link?shareid=3561031152&uk=207354018", "des": "文件数: 1,分享时间: 2016-07-27T21:49:42, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=3561031152&uk=207354018", "host": "pan.baidu.com", "more": null}, {"title": "香水", "link": "https://pan.baidu.com/share/link?shareid=4177240851&uk=932687986", "des": "文件数: 1,分享时间: 2016-06-08T08:45:40, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=4177240851&uk=932687986", "host": "pan.baidu.com", "more": null}, {"title": "香水", "link": "https://pan.baidu.com/share/link?shareid=4278611413&uk=2208648241", "des": "文件数: 1,分享时间: 2016-10-13T08:26:23, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=4278611413&uk=2208648241", "host": "pan.baidu.com", "more": null}, {"title": "香水", "link": "https://pan.baidu.com/share/link?shareid=710143097&uk=1344856499", "des": "文件数: 1,分享时间: 2016-10-30T09:28:45, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=710143097&uk=1344856499", "host": "pan.baidu.com", "more": null}, {"title": "香水", "link": "https://pan.baidu.com/share/link?shareid=2372098753&uk=2670459141", "des": "文件数: 1,分享时间: 2015-12-02T08:40:37, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=2372098753&uk=2670459141", "host": "pan.baidu.com", "more": null}, {"title": "香水", "link": "https://pan.baidu.com/share/link?shareid=1090100341&uk=5561471", "des": "文件数: 1,分享时间: 2016-08-01T17:09:27, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=1090100341&uk=5561471", "host": "pan.baidu.com", "more": null}, {"title": "香水", "link": "https://pan.baidu.com/share/link?shareid=3911858411&uk=932687986", "des": "文件数: 1,分享时间: 2016-05-16T08:25:17, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=3911858411&uk=932687986", "host": "pan.baidu.com", "more": null}, {"title": "香水", "link": "https://pan.baidu.com/share/link?shareid=153743779&uk=932687986", "des": "文件数: 1,分享时间: 2016-05-16T08:19:03, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=153743779&uk=932687986", "host": "pan.baidu.com", "more": null}, {"title": "香水", "link": "https://pan.baidu.com/share/link?shareid=270020814&uk=2621639807", "des": "文件数: 1,分享时间: 2017-05-24T23:10:25, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=270020814&uk=2621639807", "host": "pan.baidu.com", "more": null}, {"title": "香水", "link": "https://pan.baidu.com/share/link?shareid=3592229816&uk=1870504545", "des": "文件数: 1,分享时间: 2017-03-24T00:54:48, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=3592229816&uk=1870504545", "host": "pan.baidu.com", "more": null}], "count": 430, "q": "香水 ", "p": "1", "runtime": 0.5, "from": "baidu"}, "listcount": 10, "description": "香水 相关信息"}


--------------------------------------------------------------------------------
/doubanmovie/data/cached_pansou/黑天鹅 .json:
--------------------------------------------------------------------------------
1 | {"list": {"data": [{"title": "黑天鹅", "link": "https://pan.baidu.com/share/link?shareid=1741557677&uk=5561471", "des": "文件数: 1,分享时间: 2016-08-14T18:45:08, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=1741557677&uk=5561471", "host": "pan.baidu.com", "more": null}, {"title": "黑天鹅", "link": "https://pan.baidu.com/share/link?shareid=592631430&uk=1213194440", "des": "文件数: 1,分享时间: 2016-08-14T02:02:18, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=592631430&uk=1213194440", "host": "pan.baidu.com", "more": null}, {"title": "黑天鹅", "link": "https://pan.baidu.com/share/link?shareid=3269511204&uk=1644462760", "des": "文件数: 1,分享时间: 2016-04-19T15:42:00, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=3269511204&uk=1644462760", "host": "pan.baidu.com", "more": null}, {"title": "黑天鹅", "link": "https://pan.baidu.com/share/link?shareid=3806622255&uk=727778634", "des": "文件数: 1,分享时间: 2017-03-11T17:18:21, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=3806622255&uk=727778634", "host": "pan.baidu.com", "more": null}, {"title": "黑天鹅", "link": "https://pan.baidu.com/share/link?shareid=3779500045&uk=3644101862", "des": "文件数: 1,分享时间: 2017-05-07T21:42:48, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=3779500045&uk=3644101862", "host": "pan.baidu.com", "more": null}, {"title": "黑天鹅", "link": "https://pan.baidu.com/share/link?shareid=1408913115&uk=1093532973", "des": "文件数: 1,分享时间: 2016-05-03T16:14:32, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=1408913115&uk=1093532973", "host": "pan.baidu.com", "more": null}, {"title": "黑天鹅", "link": "https://pan.baidu.com/share/link?shareid=1116077096&uk=2183062177", "des": "文件数: 1,分享时间: 2016-12-13T15:53:40, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=1116077096&uk=2183062177", "host": "pan.baidu.com", "more": null}, {"title": "黑天鹅", "link": "https://pan.baidu.com/share/link?shareid=114770092&uk=408227742", "des": "文件数: 1,分享时间: 2016-11-05T16:30:34, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=114770092&uk=408227742", "host": "pan.baidu.com", "more": null}, {"title": "黑天鹅", "link": "https://pan.baidu.com/share/link?shareid=2778801398&uk=1107820184", "des": "文件数: 1,分享时间: 2016-07-29T19:05:41, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=2778801398&uk=1107820184", "host": "pan.baidu.com", "more": null}, {"title": "《黑天鹅》", "link": "https://pan.baidu.com/share/link?shareid=2819649913&uk=3255920820", "des": "文件数: 1,分享时间: 2016-12-12T09:02:19, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=2819649913&uk=3255920820", "host": "pan.baidu.com", "more": null}], "count": 185, "q": "黑天鹅 ", "p": "1", "runtime": 0.5, "from": "baidu"}, "listcount": 10, "description": "黑天鹅 相关信息"}


--------------------------------------------------------------------------------
/doubanmovie/data/cached_pansou/龙猫 .json:
--------------------------------------------------------------------------------
1 | {"list": {"data": [{"title": "龙猫", "link": "https://pan.baidu.com/share/link?shareid=1638453595&uk=139801957", "des": "文件数: 1,分享时间: 2016-01-08T01:29:37, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=1638453595&uk=139801957", "host": "pan.baidu.com", "more": null}, {"title": "龙猫", "link": "https://pan.baidu.com/share/link?shareid=1771840313&uk=3073055985", "des": "文件数: 1,分享时间: 2016-02-27T18:46:17, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=1771840313&uk=3073055985", "host": "pan.baidu.com", "more": null}, {"title": "龙猫", "link": "https://pan.baidu.com/share/link?shareid=2274649232&uk=3326727685", "des": "文件数: 1,分享时间: 2016-11-08T15:26:35, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=2274649232&uk=3326727685", "host": "pan.baidu.com", "more": null}, {"title": "龙猫", "link": "https://pan.baidu.com/share/link?shareid=3735275052&uk=878728055", "des": "文件数: 1,分享时间: 2016-03-12T10:20:53, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=3735275052&uk=878728055", "host": "pan.baidu.com", "more": null}, {"title": "龙猫", "link": "https://pan.baidu.com/share/link?shareid=3855203431&uk=3762236667", "des": "文件数: 1,分享时间: 2017-03-03T22:16:03, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=3855203431&uk=3762236667", "host": "pan.baidu.com", "more": null}, {"title": "龙猫", "link": "https://pan.baidu.com/share/link?shareid=2803117026&uk=756635525", "des": "文件数: 1,分享时间: 2016-10-18T09:05:47, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=2803117026&uk=756635525", "host": "pan.baidu.com", "more": null}, {"title": "龙猫", "link": "https://pan.baidu.com/share/link?shareid=2480638763&uk=475630542", "des": "文件数: 1,分享时间: 2016-07-03T17:21:47, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=2480638763&uk=475630542", "host": "pan.baidu.com", "more": null}, {"title": "龙猫", "link": "https://pan.baidu.com/share/link?shareid=526505602&uk=3379484742", "des": "文件数: 1,分享时间: 2015-08-12T19:07:35, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=526505602&uk=3379484742", "host": "pan.baidu.com", "more": null}, {"title": "龙猫", "link": "https://pan.baidu.com/share/link?shareid=3315656731&uk=2373288888", "des": "文件数: 1,分享时间: 2017-06-02T07:44:47, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=3315656731&uk=2373288888", "host": "pan.baidu.com", "more": null}, {"title": "龙猫", "link": "https://pan.baidu.com/share/link?shareid=185407033&uk=3966223021", "des": "文件数: 1,分享时间: 2016-10-01T11:44:15, 文件大小: 1k", "blink": "https://pan.baidu.com/share/link?shareid=185407033&uk=3966223021", "host": "pan.baidu.com", "more": null}], "count": 2000, "q": "龙猫 ", "p": "1", "runtime": 0.5, "from": "baidu"}, "listcount": 10, "description": "龙猫 相关信息"}


--------------------------------------------------------------------------------
/doubanmovie/data/豆瓣电影250.xls:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Ehco1996/Python-crawler/e89ef774653965c6b045a2c4fd101846ee8c62ef/doubanmovie/data/豆瓣电影250.xls


--------------------------------------------------------------------------------
/doubanmovie/panspider.py:
--------------------------------------------------------------------------------
 1 | '''
 2 | 根据关键字搜索对应的百度云下载链接
 3 | 
 4 | 搜索地址：http://pansou.com/
 5 | '''
 6 | 
 7 | 
 8 | import os
 9 | import json
10 | 
11 | import requests
12 | from bs4 import BeautifulSoup
13 | 
14 | api_url = 'http://api.pansou.com/search_new.php'
15 | 
16 | 
17 | def cached_json(keyword):
18 |     '''缓存下载过的json数据'''
19 |     folder = 'cached_pansou'
20 |     filename = keyword + '.json'
21 |     # 关联目录和文件名生成绝对路劲
22 |     path = os.path.join(folder, filename)
23 | 
24 |     # 当该文件被下载过了，直接从内存读取文件并返回
25 |     if os.path.exists(path):
26 |         with open(path, 'r') as f:
27 |             return json.load(f)
28 |     else:
29 |         # 建立 cached 文件夹
30 |         if not os.path.exists(folder):
31 |             os.makedirs(folder)
32 |         # 发送网络请求，把结果/json写入文件
33 |         data = {
34 |             'q': keyword,
35 |             'p': 1,
36 |         }
37 |         r = requests.post(api_url, data=data).json()
38 |         with open(path, 'a') as f:
39 |             json.dump(r, f, ensure_ascii=False)
40 |         return r
41 | 
42 | 
43 | def parse_link(name):
44 |     '''解析对应的下载连接'''
45 |     j = cached_json(name)
46 |     link = j['list']['data'][0]['link']
47 |     return link
48 | 
49 | 
50 | 
51 | 


--------------------------------------------------------------------------------
/douyu/douyu_test.py:
--------------------------------------------------------------------------------
 1 | '''
 2 | 利用第三方模块：danmu
 3 | 抓取斗鱼弹幕
 4 | '''
 5 | import time, sys
 6 | 
 7 | from danmu import DanMuClient
 8 | 
 9 | def pp(msg):
10 |     print(msg.encode(sys.stdin.encoding, 'ignore').
11 |         decode(sys.stdin.encoding))
12 | 
13 | dmc = DanMuClient('https://www.douyu.com/208114')
14 | if not dmc.isValid(): print('Url not valid')
15 | 
16 | @dmc.danmu
17 | def danmu_fn(msg):
18 |     pp('[%s] %s' % (msg['NickName'], msg['Content']))
19 | '''
20 | @dmc.gift
21 | def gift_fn(msg):
22 |     pp('[%s] sent a gift!' % msg['NickName'])
23 | 
24 | @dmc.other
25 | def other_fn(msg):
26 |     pp('Other message received')
27 | '''
28 | dmc.start(blockThread = True)


--------------------------------------------------------------------------------
/gamedownload/readme.md:
--------------------------------------------------------------------------------
1 | ys168网盘的文件地址是动态的
2 | 所以要下载文件的话需要清空cached文件夹下的缓存网页
3 | 这样才能获取到最新的动态地址


--------------------------------------------------------------------------------
/ithome/config.py:
--------------------------------------------------------------------------------
1 | '''
2 | mongodb的配置文件
3 | '''
4 | # 数据库url
5 | MONGO_URL = 'localhost'
6 | # 数据库名
7 | MONGO_DB = 'ithome'
8 | # 数据库表
9 | MONGO_TABLE = 'hotcomment_it'


--------------------------------------------------------------------------------
/ithome/datahandleer.py:
--------------------------------------------------------------------------------
  1 | '''
  2 | 热评数据处理
  3 | 
  4 | 数据： apple.json  苹果分类下的新闻热评 共3672条数据
  5 | 字段： <id, name, content, phone_com, phone_model, loc, time>
  6 | '''
  7 | 
  8 | import json
  9 | 
 10 | # 读取json数据
 11 | with open('apple.json', 'r') as f:
 12 |     data = json.load(f)
 13 | 
 14 | 
 15 | def city_count(data):
 16 |     '''
 17 |     统计城市出现次数
 18 |     return city
 19 |     '''
 20 | 
 21 |     city = {}
 22 |     for i in data:
 23 |         loc = i['loc']
 24 |         if loc in city.keys():
 25 |             city[loc] += 1
 26 |         else:
 27 |             city[loc] = 1
 28 |     return city
 29 | 
 30 | 
 31 | '''
 32 | # 获取所有城市出现的次数
 33 | city = city_count(data)
 34 | #  找到出现最多的前10名
 35 | top_city = sorted(city.items(), key=lambda d: d[1], reverse=True)[:10]
 36 | 
 37 | # 分离数据，方便生成图片
 38 | name = [k for k, v in top_city]
 39 | count = [v for k, v in top_city]
 40 | print(name)
 41 | print(count)
 42 | '''
 43 | 
 44 | 
 45 | def field_ount(data, field):
 46 |     '''
 47 |     统计数据中字段名出现的次数
 48 |     return dic
 49 |     '''
 50 | 
 51 |     dic = {}
 52 | 
 53 |     for i in data:
 54 |         f = i[field]
 55 |         if f in dic.keys():
 56 |             dic[f] += 1
 57 |         else:
 58 |             dic[f] = 1
 59 |     return dic
 60 | 
 61 | 
 62 | def find_top10(dic):
 63 |     '''
 64 |     找到传进字典的前10名
 65 |     并返回对应的 key value list
 66 |     '''
 67 |     top = sorted(dic.items(), key=lambda d: d[1], reverse=True)[:10]
 68 |     name = [k for k, v in top]
 69 |     count = [v for k, v in top]
 70 |     return name, count
 71 | 
 72 | 
 73 | '''
 74 | # 获取所有手机厂商
 75 | phone_com = field_ount(data,'phone_com')
 76 | name,count = find_top10(phone_com)
 77 | print(name,count)
 78 | '''
 79 | 
 80 | '''
 81 | # 获取所有手机型号
 82 | phone_model = field_ount(data,'phone_model')
 83 | name,count = find_top10(phone_model)
 84 | print(name,count)
 85 | '''
 86 | 
 87 | 
 88 | def field_ount_time(data, field):
 89 |     '''
 90 |     统计数据中字段名出现的次数
 91 |     return dic
 92 |     对于时间特殊处理
 93 |     '''
 94 | 
 95 |     dic = {}
 96 | 
 97 |     for i in data:
 98 |         f = i[field].split(':')[0] + '点'
 99 |         if f in dic.keys():
100 |             dic[f] += 1
101 |         else:
102 |             dic[f] = 1
103 |     return dic
104 | 
105 | 
106 | '''
107 | # 获取所有发帖时间
108 | time = field_ount_time(data,'time')
109 | name,count = find_top10(time)
110 | print(name,count)
111 | '''
112 | 
113 | '''
114 | # 获取热评大佬
115 | people = field_ount(data,'name')
116 | name,count = find_top10(people)
117 | print(name,count)
118 | '''
119 | 
120 | # 检测一下有没重复的段子也能上热评？
121 | p = field_ount(data, 'content')
122 | name, count = find_top10(p)
123 | 
124 | for duanzi in name:
125 |     print(duanzi)
126 |     print('\n')
127 | 


--------------------------------------------------------------------------------
/ithome/pipeline.py:
--------------------------------------------------------------------------------
 1 | '''
 2 | 处理数据
 3 | 保存到mogodb
 4 | '''
 5 | 
 6 | from pymongo import MongoClient
 7 | from config import *
 8 | 
 9 | client = MongoClient(MONGO_URL, connect=True)
10 | db = client[MONGO_DB]
11 | 
12 | # 将记录写入数据库
13 | def save_to_mongo(result):
14 |     if db[MONGO_TABLE].insert(result):
15 |         print('存储成功', result)
16 |         return True
17 |     return False
18 | 
19 | 
20 | 


--------------------------------------------------------------------------------
/mazhifu/config.py:
--------------------------------------------------------------------------------
 1 | # 登录用户名和密码
 2 | USERNMAE = ''
 3 | PASSWD = ''
 4 | # mysql数据库配置
 5 | TEST_DB = {
 6 |     'host': '127.0.0.1',
 7 |     'user': 'root',
 8 |     'password': '',
 9 |     'db': ''
10 | }
11 | 


--------------------------------------------------------------------------------
/mazhifu/readme.md:
--------------------------------------------------------------------------------
 1 | ### 第三方支付平台 码支付账单 csv爬取
 2 | 
 3 | 网站地址：https://codepay.fateqq.com/
 4 | 
 5 | 
 6 | 
 7 | ### 使用步骤
 8 | 
 9 | **安装前置依赖**
10 | 
11 | * `brew/yum/apt-get install python3` # 选择你喜欢的方式安装Python3
12 | * `pip install -r requirements.txt` # 安装第三方库
13 | 
14 | 
15 | **配置个人信息**
16 | 
17 | * 打开`config.py`并配置好自己的mysql数据库信息，码支付的账号和密码
18 | * 保证数据库里有一张名为`91pay`的表
19 | 
20 | 创建的sql语句如下：
21 | 
22 | ```sql
23 | SET NAMES utf8mb4;
24 | SET FOREIGN_KEY_CHECKS = 0;
25 | 
26 | -- ----------------------------
27 | -- Table structure for cmf_pay_orders
28 | -- ----------------------------
29 | DROP TABLE IF EXISTS `cmf_pay_orders`;
30 | CREATE TABLE `cmf_pay_orders` (
31 |   `id` int(11) NOT NULL AUTO_INCREMENT COMMENT 'ID',
32 |   `user_id` int(11) DEFAULT NULL COMMENT '用户ID',
33 |   `username` varchar(255) CHARACTER SET utf8mb4 DEFAULT NULL COMMENT '用户名',
34 |   `way` varchar(255) CHARACTER SET utf8mb4 DEFAULT NULL COMMENT '支付方式',
35 |   `status` varchar(255) CHARACTER SET utf8mb4 DEFAULT NULL COMMENT '订单状态',
36 |   `trade_no` varchar(255) CHARACTER SET utf8mb4 DEFAULT NULL COMMENT '订单号',
37 |   `raw_price` varchar(255) CHARACTER SET utf8mb4 DEFAULT NULL COMMENT '申请价格',
38 |   `pay_price` varchar(255) CHARACTER SET utf8mb4 DEFAULT NULL COMMENT '支付价格',
39 |   `date` datetime DEFAULT NULL COMMENT '订单日期',
40 |   `cash` int(2) NOT NULL DEFAULT '0' COMMENT '提现状态',
41 |   PRIMARY KEY (`id`) USING BTREE
42 | ) ENGINE=InnoDB AUTO_INCREMENT=115 DEFAULT CHARSET=utf8;
43 | 
44 | SET FOREIGN_KEY_CHECKS = 1;
45 | ```
46 | 
47 | 
48 | **运行程序**
49 | 
50 | `python3 spider.py`
51 | 
52 | ok，今天的账单信息已经入库了


--------------------------------------------------------------------------------
/mazhifu/requirements.txt:
--------------------------------------------------------------------------------
1 | lazyspider==0.0.2
2 | lxml==4.1.1
3 | PyMySQL==0.8.0
4 | requests==2.18.4
5 | selenium==3.8.0
6 | 


--------------------------------------------------------------------------------
/requestes基本使用/002.py:
--------------------------------------------------------------------------------
 1 | import requests
 2 | 
 3 | 
 4 | def getHtmlText(url):
 5 |     try:
 6 |         r = requests.get(url, timeout=30)
 7 |         # 如果状态码不是200 则应发HTTOError异常
 8 |         r.raise_for_status()
 9 |         # 设置正确的编码方式
10 |         r.encoding = r.apparent_encoding()
11 |         return r.text
12 |     except:
13 |         return "Something Wrong!"
14 | 


--------------------------------------------------------------------------------
/requestes基本使用/login.py:
--------------------------------------------------------------------------------
 1 | import requests
 2 | import bs4
 3 | import os
 4 | from PIL import Image
 5 | 
 6 | 
 7 | def get_post_data(url):
 8 | 
 9 |     # 首先获取到登录界面的html
10 |     html = requests.get(url
11 |                         )
12 | 
13 |     soup = bs4.BeautifulSoup(html.text, 'lxml')
14 | 
15 |     # 找到form的验证参数
16 |     __VIEWSTATE = soup.find('input', attrs={'name': '__VIEWSTATE'})['value']
17 |     
18 | 
19 |     # 下载验证码图片
20 |     pic = requests.get(
21 |         'http://jw.***.edu.cn/(gxv2le55n4jswm45mkv14o2n)/CheckCode.aspx').content
22 |     with open('ver_pic.png', 'wb') as f:
23 |         f.write(pic)
24 | 
25 |     # 打开验证码图片
26 |     image = Image.open('{}/ver_pic.png'.format(os.getcwd()))
27 |     image.show()
28 | 
29 |     # 构造需要post的参数表
30 |     data = {'txtUserName': '',
31 |             'Textbox1': '',
32 |             'TextBox2': '',
33 |             'txtSecretCode': "",
34 |             '__VIEWSTATE': '',
35 |             # 这里我将radio栏--学生 encode成gbk编码，以符合数据的要求
36 |             'RadioButtonList1': '\xd1\xa7\xc9\xfa',
37 |             'Button1': '',
38 |             'lbLanguage': '',
39 |             'hidPdrs': '',
40 |             'hidsc': '', }
41 | 
42 |     # 构造登录的post参数
43 |     data['__VIEWSTATE'] = __VIEWSTATE
44 |     data['txtSecretCode'] = input('请输入图片中的验证码')
45 |     data['txtUserName'] = input("请输入学号")
46 |     data['TextBox2'] = input("请输入密码")
47 | 
48 |     return data
49 | 
50 | 
51 | # 登录教务系统
52 | def login(url,data):
53 |     # 通过requests库构造一个浏览器session，这能帮我们自动、持久的管理cookies，
54 |     s = requests.session()
55 |     s.post(url, data=data)
56 |     return s
57 | 
58 | 
59 | 
60 | base_url = 'http://jw.****.edu.cn/(gxv2le55n4jswm45mkv14o2n)/default2.aspx'
61 | data = get_post_data(base_url)
62 | print(data)
63 | # 模拟登录教务系统
64 | brow = login(base_url,data)
65 | 
66 | test = brow.get(
67 |     'http://jw.****.edu.cn/(gxv2le55n4jswm45mkv14o2n)/xs_main.aspx?xh=14200406101')
68 | 
69 | # 测试看看是否能找到登陆后的信息
70 | soup = bs4.BeautifulSoup(test.text, 'lxml')
71 | try:
72 |     name = soup.find('span', attrs={'id': 'xhxm'}).text
73 | except:
74 |     name = '登录失败 '
75 | 
76 | print(name)
77 | 
78 | 


--------------------------------------------------------------------------------
/requestes基本使用/test.py:
--------------------------------------------------------------------------------
 1 | import requests
 2 | import bs4
 3 | import re
 4 | 
 5 | def get_html(url):
 6 |     try:
 7 |         r = requests.get(url, timeout=30)
 8 |         r.raise_for_status
 9 |         print(r.apparent_encoding)
10 |         r.encoding = r.apparent_encoding
11 |         return r.text
12 |     except:
13 |         return "Someting Wrong！"
14 | 
15 | 
16 | def get_txt_url(url):
17 |     '''
18 |     获取该小说每个章节的url地址：
19 | 
20 |     '''
21 |     url_list = []
22 |     html = get_html(url)
23 |     soup = bs4.BeautifulSoup(html, 'lxml')
24 |     lista = soup.find_all('dd')
25 |     txt_name = soup.find('h1').text
26 |     with open('/Users/ehco/Documents/codestuff/Python-crawler/小说/{}.txt'.format(txt_name),"a+") as f:
27 |       f.write('小说标题：{} \n'.format(txt_name))
28 |     for url in lista:
29 |         url_list.append('http://www.qu.la/' + url.a['href'])
30 | 
31 |     
32 |     return url_list,txt_name
33 | 
34 | 
35 | 
36 | url = 'http://www.qu.la/book/28888/'
37 | 
38 | def get_one_txt(url,txt_name):
39 |   html = get_html(url).replace('<br/>','\n')  
40 |   soup = bs4.BeautifulSoup(html,'lxml')
41 |   try:
42 |     txt  = soup.find('div',id='content').text.replace('chaptererror();','')
43 |     title = soup.find('title').text
44 |   
45 |     with open('/Users/ehco/Documents/codestuff/Python-crawler/小说/{}.txt'.format(txt_name),"a") as f:
46 |       f.write(title+'\n\n')
47 |       f.write(txt)
48 |       print('当前章节{} 已经下载完毕'.format(title))
49 |   except:
50 |     print('someting wrong')
51 | 
52 | 
53 | 
54 | a=[1,2,3,4,5]
55 | for i in a:
56 |   print(a.index(i)/len(a)*100)


--------------------------------------------------------------------------------
/sougou/configs.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf8 -*-
 2 | 
 3 | # 本地服务器
 4 | TEST_DB = {
 5 |     'host': '127.0.0.1',
 6 |     'user': 'root',
 7 |     'password': '19960202',
 8 |     'db': 'EhcoTestDb', }
 9 | 
10 | 


--------------------------------------------------------------------------------
/sougou/store_new/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Ehco1996/Python-crawler/e89ef774653965c6b045a2c4fd101846ee8c62ef/sougou/store_new/__init__.py


--------------------------------------------------------------------------------
/sougou/utils/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Ehco1996/Python-crawler/e89ef774653965c6b045a2c4fd101846ee8c62ef/sougou/utils/__init__.py


--------------------------------------------------------------------------------
/sougou/utils/tools.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import time
 3 | import logging
 4 | 
 5 | class UtilLogger(object):
 6 | 
 7 |     '''
 8 |     建立日志文件，并以特定格式输出日志
 9 |     Args:
10 |         name:logger名字
11 |         logfile_name 日志文件名
12 |         level:调试级别，日志中只打印高于此级别的日志，例如logging.DEBUG、logging.info，此级别可以在set_level函数里设置
13 |     '''
14 |     def __init__(self, name, logfile_name=None, level=logging.DEBUG):
15 |         self.logger = logging.getLogger(name)
16 |         self.logger.setLevel(level)
17 |         formatter = logging.Formatter("%(asctime)s [%(levelname)s] %(name)s - %(message)s")
18 |         ch = None
19 |         if logfile_name is None:
20 |             ch = logging.StreamHandler()
21 |         else:
22 |             logDir = os.path.dirname(logfile_name)
23 |             if logDir != "" and not os.path.exists(logDir):
24 |                 os.mkdir(logDir)
25 |                 pass
26 |             now = time.localtime()
27 |             suffix = '.%d%02d%02d' % (now.tm_year, now.tm_mon, now.tm_mday)
28 |             ch = logging.FileHandler(logfile_name+suffix)
29 |         ch.setLevel(logging.DEBUG)
30 |         ch.setFormatter(formatter)
31 |         self.logger.addHandler(ch)
32 | 
33 |     def set_level(self,level):
34 |         '''
35 |         设置调试等级
36 |         Args:
37 |             level，字符串，可选debug、info、warning、error
38 |         '''
39 |         if level.lower() == "debug":
40 |             self.logger.setLevel(logging.DEBUG)
41 |         elif level.lower() == "info":
42 |             self.logger.setLevel(logging.INFO)
43 |         elif level.lower() == "warning":
44 |             self.logger.setLevel(logging.WARNING)
45 |         elif level.lower() == "error":
46 |             self.logger.setLevel(logging.ERROR)
47 | 
48 |     def debug(self, message):
49 |         '''
50 |         打印函数，最低调试级别的打印，
51 |         Args:
52 |             message为要打印的信息
53 |         info/warn/error函数与此类似
54 |         '''
55 |         self.logger.debug(message)
56 | 
57 |     def info(self,message):
58 | 	    self.logger.info(message)
59 | 
60 |     def warn(self,message):
61 | 	    self.logger.warn(message)
62 | 
63 |     def error(self,message):
64 | 	    self.logger.error(message)
65 | 
66 | 
67 | # def test():
68 | #     log = UtilLogger('testname','test')
69 | #     log.set_level('info')
70 | #     log.debug('++++++++++++++')
71 | #     log.info('--------------')
72 | #     log.warn('==============')
73 | #     log.error('_____________')
74 | # if __name__ == '__main__':
75 | #     test()
76 | 


--------------------------------------------------------------------------------
/toapi-91baby/.gitignore:
--------------------------------------------------------------------------------
  1 | .idea/
  2 | # Byte-compiled / optimized / DLL files
  3 | __pycache__/
  4 | *.py[cod]
  5 | *$py.class
  6 | .html/
  7 | # C extensions
  8 | *.so
  9 | 
 10 | # Distribution / packaging
 11 | .Python
 12 | env/
 13 | env27/
 14 | build/
 15 | develop-eggs/
 16 | dist/
 17 | downloads/
 18 | eggs/
 19 | .eggs/
 20 | lib/
 21 | lib64/
 22 | parts/
 23 | sdist/
 24 | var/
 25 | wheels/
 26 | *.egg-info/
 27 | .installed.cfg
 28 | *.egg
 29 | 
 30 | # PyInstaller
 31 | #  Usually these files are written by a python script from a template
 32 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 33 | *.manifest
 34 | *.spec
 35 | 
 36 | # Installer logs
 37 | pip-log.txt
 38 | pip-delete-this-directory.txt
 39 | 
 40 | # Unit test / coverage reports
 41 | htmlcov/
 42 | .tox/
 43 | .coverage
 44 | .coverage.*
 45 | .cache
 46 | nosetests.xml
 47 | coverage.xml
 48 | *.cover
 49 | .hypothesis/
 50 | 
 51 | # Translations
 52 | *.mo
 53 | *.pot
 54 | 
 55 | # Django stuff:
 56 | *.log
 57 | local_settings.py
 58 | 
 59 | # Flask stuff:
 60 | instance/
 61 | .webassets-cache
 62 | 
 63 | # Scrapy stuff:
 64 | .scrapy
 65 | 
 66 | # Sphinx documentation
 67 | docs/_build/
 68 | 
 69 | # PyBuilder
 70 | target/
 71 | 
 72 | # Jupyter Notebook
 73 | .ipynb_checkpoints
 74 | 
 75 | # pyenv
 76 | .python-version
 77 | 
 78 | # celery beat schedule file
 79 | celerybeat-schedule
 80 | 
 81 | # SageMath parsed files
 82 | *.sage.py
 83 | 
 84 | # dotenv
 85 | .env
 86 | 
 87 | # virtualenv
 88 | .venv
 89 | venv/
 90 | ENV/
 91 | 
 92 | # Spyder project settings
 93 | .spyderproject
 94 | .spyproject
 95 | 
 96 | # Rope project settings
 97 | .ropeproject
 98 | 
 99 | # mkdocs documentation
100 | /site
101 | 
102 | # mypy
103 | .mypy_cache/
104 | 


--------------------------------------------------------------------------------
/toapi-91baby/app.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | 
 3 | from toapi import Api
 4 | import requests
 5 | 
 6 | from items.hotbook import HotBook
 7 | from items.book import Book
 8 | from items.search import Search
 9 | from settings import MySettings
10 | 
11 | 
12 | 
13 | 
14 | 
15 | api = Api('', settings=MySettings)
16 | api.register(HotBook)
17 | api.register(Book)
18 | api.register(Search)
19 | 
20 | 
21 | @api.server.app.route('/search/<keyword>')
22 | def search_page(keyword):
23 |     '''
24 |     91bay新书论坛
25 |     搜索功能
26 |     '''
27 |     data = {
28 |         'searchsel': 'forum',
29 |         'mod': 'forum',
30 |         'srchtype': 'title',
31 |         'srchtxt': keyword,
32 |     }
33 |     r = requests.post(
34 |         'http://91baby.mama.cn/search.php?searchsubmit=yes', data)
35 |     r.encoding = 'utf8'
36 |     html = r.text
37 |     results = {}
38 |     items = [Search]
39 |     # 通过toapi的方法对网页进行解析
40 |     for item in items:
41 |         parsed_item = api.parse_item(html, item)
42 |         results[item.__name__] = parsed_item
43 |     # 返回json
44 |     return api.server.app.response_class(
45 |         response=json.dumps(results, ensure_ascii=False),
46 |         status=200,
47 |         mimetype='application/json'
48 |     )
49 | 
50 | 
51 | if __name__ == '__main__':
52 |     api.serve()
53 | 


--------------------------------------------------------------------------------
/toapi-91baby/data.sqlite:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Ehco1996/Python-crawler/e89ef774653965c6b045a2c4fd101846ee8c62ef/toapi-91baby/data.sqlite


--------------------------------------------------------------------------------
/toapi-91baby/items/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Ehco1996/Python-crawler/e89ef774653965c6b045a2c4fd101846ee8c62ef/toapi-91baby/items/__init__.py


--------------------------------------------------------------------------------
/toapi-91baby/items/book.py:
--------------------------------------------------------------------------------
 1 | '''
 2 | 解析91baby 小说内容页
 3 | '''
 4 | 
 5 | from toapi import Item, XPath
 6 | 
 7 | 
 8 | def strip(text):
 9 |     '''去除字符串里的空白字符'''
10 |     blank_str = ['\u3000\u3000', '\xa0', '\r']
11 |     for i in blank_str:
12 |         text = text.replace(i, '')
13 |     return text
14 | 
15 | 
16 | def strip_list(l):
17 |     '''
18 |     删除列表中的短字符串
19 |     '''
20 |     new_l = []
21 |     for ele in l:
22 |         if len(ele) > 5 and '本帖最后由' not in ele:
23 |             new_l.append(ele)
24 |     return new_l
25 | 
26 | 
27 | class Book(Item):
28 |     __base_url__ = 'http://91baby.mama.cn'
29 | 
30 |     title = XPath('//*[@id="wp"]/div[3]/text()[3]')
31 |     author = XPath('//*[@id="wp"]/div[3]/text()[3]')
32 |     total_page = XPath('//span[@class="pgt"]/div//a')
33 |     contents = XPath('//td[@class="t_f"]')
34 | 
35 |     def clean_title(self, title):
36 |         return title.split('《')[1].split('》')[0]
37 | 
38 |     def clean_author(self, author):
39 |         index = author.find('作者：') + 3
40 |         return author[index:]
41 | 
42 |     def clean_contents(self, contents):
43 |         chapters = {}
44 |         for index, item in enumerate(contents):
45 |             content = strip(item.xpath('string(.)'))
46 |             # 去掉开头废话
47 |             if '当前被收藏数' not in content:
48 |                 chapters[index] = content
49 |         book_contents = {}
50 |         for k, v in chapters.items():
51 |             # 过滤超断行
52 |             texts = strip_list(v.split('\n'))
53 |             book_contents[k] = texts
54 |         return book_contents
55 | 
56 |     def clean_total_page(self, total_page):
57 |         try:
58 |             for index, page in enumerate(total_page):
59 |                 num = page.xpath('./text()')[0]
60 |                 if num == '下一页':
61 |                     i = int(index) - 1
62 |                     break
63 |             page = total_page[i].xpath('./text()')[0]
64 |             if '...' in page:
65 |                 return int(page.replace('... ', ''))
66 |             return int(page)
67 |         except:
68 |             return 1
69 | 
70 |     class Meta:
71 |         source = None
72 |         route = {'/book_id=:id?page=:page': '/thread-:id-:page-1.html'}
73 | 


--------------------------------------------------------------------------------
/toapi-91baby/items/hotbook.py:
--------------------------------------------------------------------------------
 1 | '''
 2 | 解析91baby 新书热书列表页
 3 | '''
 4 | 
 5 | from collections import OrderedDict
 6 | from toapi import Item, XPath
 7 | 
 8 | 
 9 | class MyItem(Item):
10 |     @classmethod
11 |     def parse(cls, html):
12 |         """Parse html to json"""
13 |         if cls.Meta.source is None:
14 |             return cls._parse_item(html)
15 |         else:
16 |             sections = cls.Meta.source.parse(html, is_source=True)
17 |             results = []
18 |             for section in sections:
19 |                 res = cls._parse_item(section)
20 |                 if res:
21 |                     results.append(res)
22 |             return results
23 | 
24 |     @classmethod
25 |     def _parse_item(cls, html):
26 |         item = OrderedDict()
27 |         for name, selector in cls.__selectors__.items():
28 |             try:
29 |                 item[name] = selector.parse(html)
30 |             except Exception:
31 |                 item[name] = ''
32 |             clean_method = getattr(cls, 'clean_%s' % name, None)
33 |             if clean_method is not None:
34 |                 res = clean_method(cls, item[name])
35 |                 if res == None:
36 |                     return None
37 |                 else:
38 |                     item[name] = res
39 |         return item
40 | 
41 | 
42 | class HotBook(MyItem):
43 |     __base_url__ = 'http://91baby.mama.cn'
44 |     title = XPath('//a[@class="xst"]/text()[1]')
45 |     author = XPath('//a[@class="xst"]/text()[1]')
46 |     url = XPath('//a[@class="xst"]/@href')
47 |     book_id = XPath('//a[@class="xst"]/@href')
48 | 
49 |     def clean_title(self, title):
50 |         if '《' in title:
51 |             return title[title.find('\u300a') + 1:title.find('\u300b')][:10]
52 |         else:
53 |             return None
54 | 
55 |     def clean_author(self, author):
56 |         if ':' in author:
57 |             return author[author.find(':') + 1:author.find('(')]
58 |         elif '：' in author:
59 |             return author[author.find('：') + 1:author.find('（')]
60 |         else:
61 |             return None
62 | 
63 |     def clean_book_id(self, book_id):
64 |         return book_id.split('-')[1]
65 | 
66 |     class Meta:
67 |         source = XPath('//tbody[@class="thread_tbody"]')
68 |         route = {'/hotbook?page=:page': '/forum-171-:page.html'}
69 | 


--------------------------------------------------------------------------------
/toapi-91baby/items/search.py:
--------------------------------------------------------------------------------
 1 | from toapi import Item, XPath
 2 | 
 3 | 
 4 | class Search(Item):
 5 |     '''
 6 |     从搜索的界面解析出
 7 |     书名 id 链接 简介
 8 |     '''
 9 |     title = XPath('//h3/a/text()')
10 |     book_id = XPath('//h3/a/@href')
11 |     url = XPath('//h3/a/@href')
12 |     content = XPath('//p[2]/text()')
13 | 
14 |     def clean_title(self, title):
15 |         return ''.join(title)
16 | 
17 |     def clean_book_id(self, book_id):
18 |         return book_id.split('-')[1]
19 | 
20 |     def clean_url(self, url):
21 |         return url[:url.find('?')]
22 | 
23 |     class Meta:
24 |         source = XPath('//li[@class="pbw"]')
25 |         # 这里的route留空，防止重复注册路由
26 |         route = {}
27 | 


--------------------------------------------------------------------------------
/toapi-91baby/settings.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | 
 3 | from toapi.cache import MemoryCache
 4 | from toapi.settings import Settings
 5 | 
 6 | 
 7 | class MySettings(Settings):
 8 |     """
 9 |     Create custom configuration
10 |     http://www.toapi.org/topics/settings/
11 |     """
12 | 
13 |     cache = {
14 |         'cache_class': MemoryCache,
15 |         'cache_config': {},
16 |         'serializer': None,
17 |         'ttl': 10,
18 |     }
19 |     storage = {
20 |         "PATH": os.getcwd(),
21 |         # 使用sqlite作为存储介质
22 |         "DB_URL": 'sqlite:///data.sqlite',
23 |     }
24 |     web = {
25 |         "with_ajax": False,
26 |         "request_config": {},
27 |         "headers": None
28 |     }
29 | 


--------------------------------------------------------------------------------
/toapi-91baby/test.py:
--------------------------------------------------------------------------------
 1 | '''测试api的使用'''
 2 | 
 3 | import sys
 4 | import requests
 5 | from prettytable import PrettyTable
 6 | 
 7 | list_url = 'http://127.0.0.1:5000/hotbook?page={}'
 8 | book_url = 'http://127.0.0.1:5000/book_id={}?page={}'
 9 | 
10 | 
11 | def get_json_response(url):
12 |     r = requests.get(url)
13 |     return r.json()
14 | 
15 | 
16 | def print_table(header, rows):
17 |     x = PrettyTable(header)
18 |     for row in rows:
19 |         x.add_row(row)
20 |     print(x)
21 | 
22 | 
23 | def get_book_list(page):
24 |     '''获取指定页码的书籍列表'''
25 |     # 获取第一页的所有书籍信息
26 |     page_json = get_json_response(list_url.format(page))
27 |     header = ['书号', '书名', '链接']
28 |     rows = []
29 |     for book in page_json['HotBook']:
30 |         rows.append([book['book_id'], book['title'], book['url']])
31 |     # 打印第一页的信息
32 |     print_table(header, rows)
33 | 
34 | 
35 | def get_book_content(book_id, page):
36 |     # 获取书籍信息
37 |     book_json = get_json_response(book_url.format(book_id, page))
38 |     book = book_json['Book']
39 |     # 打印书籍头
40 |     header = ['书名', '作者', '总页数', '当前页']
41 |     rows = [[book['title'], book['author'], book['total_page'], page]]
42 |     print_table(header, rows)
43 |     # 打印书籍内容
44 |     contents = book['contents']
45 |     key = input('要开始看小说么？y键开始\n\n')
46 |     if key == 'y':
47 |         for i in range(len(contents)):
48 |             print(book['title'] + '第{}章节 \n\n'.format(i))
49 |             print(contents[i] + '\n\n')
50 |             input('本章已经阅读完，任意键阅读下一章节！\n\n')
51 |             
52 |         key = input('本页小说已经全部阅读完毕，要看下一页么？y键确定\n\n')
53 |         if key == 'y':
54 |             page += 1
55 |             get_book_content(book_id, page)
56 |     else:
57 |         sys.exit('退出程序...')
58 | 
59 | 
60 | def main():
61 |     while True:
62 |         page = input(
63 |             '想看第几页书的书？ 请在下方输入页码 按回车键确定！q键退出 \n\n')
64 |         if page == 'q':
65 |             sys.exit()
66 |         if page == 'y':
67 |             book_id = input('请输入书号阅读书籍: \n')
68 |             page = 1
69 |             get_book_content(book_id, page)
70 |         get_book_list(page)
71 |         print('找到想看的书了？想进去瞧一眼么？输入 y 进入书号输入界面！\n\n')
72 | 
73 | 
74 | if __name__ == '__main__':
75 |     main()
76 | 


--------------------------------------------------------------------------------
/toapi-91baby/wsgi.py:
--------------------------------------------------------------------------------
1 | from app import api
2 | 
3 | app = api.server.app
4 | 


--------------------------------------------------------------------------------
/wenjuanxin/configs.py:
--------------------------------------------------------------------------------
 1 | QUESTION_ID = 11231
 2 | 
 3 | QUESTION_URL = "https://www.wjx.cn/jq/{}.aspx".format(QUESTION_ID)
 4 | 
 5 | # 提交问卷选项的url
 6 | POST_URL_MAP = "https://www.wjx.cn/joinnew/processjq.ashx?submittype=1&curID={}&t={}&starttime={}&rn={}"
 7 | 
 8 | QUESTION_INFO = '''
 9 | 题目：{}
10 | 选项：{}
11 | 
12 | 随机选择结果：{}
13 | 
14 | ~~~~~~~~~~~~~~~~~~~~~~
15 | '''
16 | 
17 | # 回答次数
18 | ANSWER_TIMES = 3
19 | 


--------------------------------------------------------------------------------
/wenjuanxin/spider.py:
--------------------------------------------------------------------------------
 1 | import time
 2 | from datetime import datetime
 3 | from random import randint
 4 | 
 5 | from requests_html import HTMLSession
 6 | 
 7 | from configs import (QUESTION_ID, QUESTION_URL, POST_URL_MAP,
 8 |                      QUESTION_INFO, ANSWER_TIMES)
 9 | 
10 | 
11 | def parse_post_url(resp):
12 |     '''
13 |     解析出提交问卷的url
14 |     '''
15 |     # 找到rn
16 |     rn = int(resp.html.search('rndnum="{}"')[0].split('.')[0])
17 |     # 提交问卷的时间
18 |     raw_t = round(time.time(), 3)
19 |     t = int(str(raw_t).replace('.', ''))
20 |     # 模拟开始答题时间
21 |     starttime = datetime.fromtimestamp(
22 |         int(raw_t) - randint(1, 60 * 3)).strftime("%Y/%m/%d %H:%M:%S")
23 | 
24 |     url = POST_URL_MAP.format(QUESTION_ID, t, starttime, rn)
25 |     return url
26 | 
27 | 
28 | def parse_post_data(resp):
29 |     '''
30 |     解析出问题和选项
31 |     返回post_data
32 |     '''
33 |     post_data = {'submitdata': ""}
34 |     questions = resp.html.find('fieldset', first=True).find('.div_question')
35 | 
36 |     for i, q in enumerate(questions):
37 |         title = q.find('.div_title_question_all', first=True).text
38 |         choices = [t.text for t in q.find('label')]
39 |         random_index = randint(0, len(choices) - 1)
40 |         choice = choices[random_index]
41 |         post_data['submitdata'] += '{}${}}}'.format(i+1, random_index+1)
42 |         print(QUESTION_INFO.format(title, choices, choice))
43 |         time.sleep(0.5)
44 |     # 去除最后一个不合法的`}`
45 |     post_data['submitdata'] = post_data['submitdata'][:-1]
46 |     return post_data
47 | 
48 | 
49 | def post_answer(session, url, data):
50 |     '''
51 |     提交答案
52 |     '''
53 |     r = session.post(url, data)
54 |     print('提交状态：{}'.format(r.status_code))
55 | 
56 | 
57 | def simulate_survey():
58 |     '''
59 |     模拟回答问卷
60 |     '''
61 |     session = HTMLSession()
62 |     resp = session.get(QUESTION_URL)
63 |     url = parse_post_url(resp)
64 |     data = parse_post_data(resp)
65 |     post_answer(session, url, data)
66 | 
67 | 
68 | def main():
69 |     print('开始模拟填写问卷,共模拟{}次'.format(ANSWER_TIMES))
70 |     for i in range(ANSWER_TIMES):
71 |         simulate_survey()
72 |         sleep_time = randint(1, 60)
73 |         print('第{}次问卷填写完毕，即将沉睡{}s'.format(i+1, sleep_time))
74 |         time.sleep(sleep_time)
75 | 
76 | 
77 | if __name__ == '__main__':
78 |     main()
79 | 


--------------------------------------------------------------------------------
/zhihu/zhihu_easy/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Ehco1996/Python-crawler/e89ef774653965c6b045a2c4fd101846ee8c62ef/zhihu/zhihu_easy/__init__.py


--------------------------------------------------------------------------------
/zhihu/zhihu_easy/configs.py:
--------------------------------------------------------------------------------
 1 | # 知乎账号密码认证
 2 | USERNAME = ''
 3 | PASSWD = ''
 4 | AUTH = ''
 5 | 
 6 | # 用户动态其实api地址
 7 | START_URL = 'https://www.zhihu.com/api/v4/members/excited-vczh/activities?limit=8&after_id=1518606558&desktop=True'
 8 | 
 9 | # 数据库配置
10 | LOCAL_DB = {
11 |     'host': '127.0.0.1',
12 |             'user': 'root',
13 |             'password': '',
14 |             'db': ''
15 | }
16 | 
17 | # 抓取用户的标志
18 | USER_SIG = 'vczh'
19 | 


--------------------------------------------------------------------------------
/zhihu/zhihu_easy/db_tools.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | from lazyspider.lazystore import LazyMysql
 3 | 
 4 | from parse import parse_activities
 5 | from configs import LOCAL_DB, USER_SIG
 6 | 
 7 | BASE_DIR = os.path.dirname(os.path.abspath(__file__))
 8 | 
 9 | 
10 | def json_to_db():
11 |     '''
12 |     json->mysql
13 |     '''
14 |     store = LazyMysql(LOCAL_DB)
15 |     for file in os.listdir(BASE_DIR+'/data/'):
16 |         file_abs_path = BASE_DIR+'/data/'+file
17 |         # 解析json格式的文件，筛选我们要的数据
18 |         res = parse_activities(file_abs_path)
19 |         for data in res:
20 |             try:
21 |                 data.update({'username': USER_SIG})
22 |                 store.save_one_data(data, 'zhihu_activities')
23 |             except:
24 |                 print('error !!!!!!!!!')
25 |         print('所有文件导入完毕')
26 |         
27 | 
28 | if __name__ == '__main__':
29 |     json_to_db()
30 | 


--------------------------------------------------------------------------------
/zhihu/zhihu_easy/parse.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | from datetime import datetime
 3 | 
 4 | 
 5 | def parse_activities(file_path):
 6 |     '''
 7 |     解析用户动态数据
 8 |     rtype:
 9 |         list
10 |     '''
11 |     with open(file_path) as f:
12 |         try:
13 |             data = json.load(f).get('data')
14 |         except:
15 |             print('{}文件载入失败'.format(file_path))
16 |             return []
17 |         res = []
18 |         for action in data:
19 |             verb = action['verb']
20 |             if verb == 'ANSWER_VOTE_UP' or verb == 'ANSWER_CREATE':  # 赞同/回答的行为
21 |                 question_id = action['target']['question']['id']
22 |                 question_api_url = action['target']['question']['url']
23 |                 question_name = action['target']['question']['title']
24 | 
25 |                 answer_id = action['target']['id']
26 |                 answer_api_url = action['target']['url']
27 |                 answer_content = action['target']['excerpt']
28 |                 answer_voteup_count = action['target']['voteup_count']
29 |                 create_time = datetime.fromtimestamp(
30 |                     action['target']['created_time'])
31 | 
32 |             elif verb == 'QUESTION_FOLLOW':  # 关注问题的行为
33 |                 question_id = action['target']['id']
34 |                 question_api_url = action['target']['url']
35 |                 question_name = action['target']['title']
36 | 
37 |                 answer_id = ''
38 |                 answer_api_url = ''
39 |                 answer_content = ''
40 |                 answer_voteup_count = 0
41 |                 create_time = datetime.fromtimestamp(
42 |                     action['target']['created'])
43 | 
44 |             else:
45 |                 continue
46 | 
47 |             res.append({
48 |                 'question_id': question_id,
49 |                 'question_name': question_name,
50 |                 'question_api_url': question_api_url,
51 |                 'answer_id': answer_id,
52 |                 'answer_api_url': answer_api_url,
53 |                 'answer_content': answer_content,
54 |                 'verb': verb,
55 |                 'answer_voteup_count': answer_voteup_count,
56 |                 'create_time': create_time, })
57 |         return res
58 | 


--------------------------------------------------------------------------------
/zhihu/zhihu_easy/spider.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | import time
 3 | import os
 4 | 
 5 | from client import ZhihuClient
 6 | from configs import USERNAME, PASSWD, AUTH, START_URL
 7 | 
 8 | BASE_DIR = os.path.dirname(os.path.abspath(__file__))
 9 | 
10 | 
11 | def download_activs_json(s, url, count=1):
12 |     '''
13 |     获取用户信息的json信息
14 |     '''
15 |     res = s.get(url).json()
16 |     with open(BASE_DIR+'/data/{}.json'.format(count), 'w') as f:
17 |         f.write(json.dumps(res, ensure_ascii=False))
18 |     print('正在下载第{}份动态'.format(count))
19 |     count += 1
20 |     time.sleep(1)
21 |     # 递归下载 直到动态下载完毕
22 |     if res['paging']['is_end'] == False:
23 |         next_url = res['paging']['next']
24 |         download_activs_json(s, next_url, count)
25 |     else:
26 |         print('所有动态下载完毕')
27 | 
28 | 
29 | def download_activs():
30 |     # 登录知乎
31 |     s = ZhihuClient(USERNAME, PASSWD).get_session()
32 |     # 增加权限认证
33 |     s.headers.update({'authorization': AUTH})
34 |     download_activs_json(s, START_URL)
35 | 
36 | 
37 | if __name__ == "__main__":
38 |     download_activs()
39 | 


--------------------------------------------------------------------------------
/zhihu/zhihu_easy/tools.py:
--------------------------------------------------------------------------------
 1 | import shutil
 2 | 
 3 | import requests
 4 | 
 5 | 
 6 | def get_image(url, path):
 7 |     res = requests.get(url, stream=True)
 8 |     with open(path, 'wb') as f:
 9 |         shutil.copyfileobj(res.raw, f)
10 | 
11 | 
12 | def save_html(text, name):
13 |     with open(name, 'w') as f:
14 |         f.write(text)
15 | 


--------------------------------------------------------------------------------
/浏览器模拟爬虫/001.py:
--------------------------------------------------------------------------------
 1 | '''
 2 | 测试selenium模拟浏览器
 3 | 和phantomjs无页面浏览器
 4 | 
 5 | 导出PhantomJs浏览器帮助文档
 6 | '''
 7 | 
 8 | from selenium import webdriver
 9 | import sys
10 | 
11 | browser = webdriver.PhantomJS()
12 | out = sys.stdout
13 | 
14 | sys.stdout = open('browserHelp.txt','w')
15 | help(browser)
16 | sys.stdout.close()
17 | sys.stdout = out
18 | browser.quit()
19 | exit()


--------------------------------------------------------------------------------
/浏览器模拟爬虫/baidu.py:
--------------------------------------------------------------------------------
 1 | '''
 2 | 使用Selenium模拟浏览器
 3 | 抓取百度查询结果
 4 | '''
 5 | 
 6 | # 导入selenium模块中的web引擎
 7 | from selenium import webdriver
 8 | 
 9 | 
10 | # 建立浏览器对象 ，通过Phantomjs
11 | browser = webdriver.PhantomJS()
12 | 
13 | # 设置访问的url
14 | url = 'https://www.baidu.com'
15 | 
16 | # 访问url
17 | browser.get(url)
18 | 
19 | # 等待一定时间，让js脚本加载完毕
20 | browser.implicitly_wait(3)
21 | 
22 | # 找到搜索框
23 | text = browser.find_element_by_id('kw')
24 | 
25 | # 清空搜索框的文字
26 | text.clear()
27 | 
28 | # 填写搜索框的文字
29 | text.send_keys('python')
30 | 
31 | # 找到submit按钮
32 | button = browser.find_element_by_id('su')
33 | 
34 | # 点击按钮 提交搜索请求
35 | button.submit()
36 | 
37 | 
38 | # 查看当前浏览器标题
39 | print(browser.title)
40 | 
41 | # 以截图的方式查看浏览器的页面
42 | browser.save_screenshot('text.png')
43 | 
44 | # 找到结果 结果保存为列表变量
45 | results = browser.find_elements_by_class_name('t')
46 | 
47 | # 循环遍历找出每个结果的标题和url
48 | for result in results:
49 |     print('标题：{} 超链接：{}'.format(result.text,
50 |                                 result.find_element_by_tag_name('a').get_attribute('href')))
51 | 


--------------------------------------------------------------------------------
/浏览器模拟爬虫/kuaiproxy.py:
--------------------------------------------------------------------------------
 1 | '''
 2 | selenium模拟浏览器爬虫
 3 | 
 4 | 爬取快代理：http://www.kuaidaili.com/
 5 | '''
 6 | 
 7 | from selenium import webdriver
 8 | 
 9 | 
10 | class Item(object):
11 |     '''
12 |     我们模拟Scrapy框架
13 |     写一个item类出来，
14 |     用来表示每一个爬到的代理
15 |     '''
16 | 
17 |     ip = None  # ip地址
18 |     port = None  # 端口
19 |     anonymous = None  # 是否匿名
20 |     type = None  # http or https
21 |     local = None  # 物理地址
22 |     speed = None  # 速度
23 | 
24 | class GetProxy(object):
25 |     '''
26 |     获取代理的类
27 |     '''
28 | 
29 |     def __init__(self):
30 |         '''
31 |         初始化整个类
32 |         '''
33 |         self.starturl = 'http://www.kuaidaili.com/free/inha/'
34 |         self.urls = self.get_urls()
35 |         self.proxylist = self.get_proxy_list(self.urls)
36 |         self.filename = 'proxy.txt'
37 |         self.saveFile(self.filename,self.proxylist)
38 | 
39 |     def get_urls(self):
40 |         '''
41 |         返回一个代理url的列表
42 |         '''
43 |         urls = []
44 |         for i in range(1,2):
45 |             url = self.starturl+str(i)
46 |             urls.append(url)
47 |         return urls
48 | 
49 |     def get_proxy_list(self,urls):
50 |         '''
51 |         返回抓取到代理的列表
52 |         整个爬虫的关键
53 |         '''
54 | 
55 |         browser = webdriver.PhantomJS()
56 |         proxy_list = []
57 |         
58 |         
59 |         for url in urls:
60 |             browser.get(url)
61 |             browser.implicitly_wait(3)
62 |             # 找到代理table的位置
63 |             elements = browser.find_elements_by_xpath('//tbody/tr')
64 |             for element in elements:
65 |                 item = Item()
66 |                 item.ip = element.find_element_by_xpath('./td[1]').text
67 |                 item.port = element.find_element_by_xpath('./td[2]').text
68 |                 item.anonymous = element.find_element_by_xpath('./td[3]').text
69 |                 item.local = element.find_element_by_xpath('./td[4]').text
70 |                 item.speed = element.find_element_by_xpath('./td[5]').text
71 |                 print(item.ip)
72 |                 proxy_list.append(item)
73 |                 
74 |         browser.quit()
75 |         return proxy_list
76 |         
77 |     def saveFile(self,filename,proxy_list):
78 |         '''
79 |         将爬取到的结果写到本地
80 |         '''
81 |         with open(filename,'w') as f:
82 |             for item in proxy_list:
83 |                 
84 |                 f.write(item.ip + '\t')
85 |                 f.write(item.port + '\t')
86 |                 f.write(item.anonymous + '\t')
87 |                 f.write(item.local + '\t')
88 |                 f.write(item.speed + '\n\n')
89 | 
90 | 
91 | if __name__ =='__main__':
92 |     Get = GetProxy()                


--------------------------------------------------------------------------------
/美食杰/spider.py:
--------------------------------------------------------------------------------
 1 | '''
 2 | 美食节 各地小吃爬虫
 3 | 主页url:  http://www.meishij.net/
 4 | 排行榜url： http://top.meishi.cc/lanmu.php?cid=78
 5 | '''
 6 | 
 7 | # 导入相关库
 8 | import requests
 9 | from bs4 import BeautifulSoup
10 | 
11 | 
12 | # 排行榜入口url
13 | Top_food_url = 'http://top.meishi.cc/lanmu.php?cid=3'
14 | 
15 | # 家常菜谱入口url
16 | Home_food_url = 'http://top.meishi.cc/lanmu.php?cid=13'
17 | 
18 | # 中华菜系入口url
19 | China_food_url = 'http://top.meishi.cc/lanmu.php?cid=2'
20 | 
21 | # 外国菜入口url
22 | Foreign_food_url = 'http://top.meishi.cc/lanmu.php?cid=10'
23 | 
24 | 
25 | def get_html_text(url):
26 |     '''获取html文本'''
27 |     try:
28 |         r = requests.get(url, timeout=3)
29 |         r.raise_for_status
30 |         r.encoding = r.apparent_encoding
31 |         return r.text
32 |     except:
33 |         return 'error'
34 | 
35 | 
36 | def parse_city_id(url):
37 |     '''解析对应的城市排行榜连接'''
38 | 
39 |     res = []
40 |     html = get_html_text(url)
41 |     # 做一个简单的判断
42 |     if html != 'error':
43 |         soup = BeautifulSoup(html, 'lxml')
44 |         # 定位到 全国各地特色小吃排行榜分类,<div>
45 |         cityids = soup.find('div', class_='rank_content_top')
46 |         for city in cityids.find_all('a'):
47 |             res.append({'name': city.text, 'url': city['href']})
48 |         return res
49 |     else:
50 |         print('error !!!!')
51 | 
52 | 
53 | def parse_food_info(url):
54 |     '''解析对应的美食信息'''
55 | 
56 |     html = get_html_text(url)
57 | 
58 |     if html != 'error':
59 |         soup = BeautifulSoup(html, 'lxml')
60 |         # 定位到具体排行榜的位置
61 |         foods = soup.find('div', class_='rank_content_top10_wraper')
62 |         # 开始解析信息
63 |         for food in foods.find_all('li'):
64 |             # 寻找 食品名、做法链接、图片链接
65 |             content = food.find('a', class_='img')
66 |             name = content['title']
67 |             detial_url = content['href']
68 |             img_url = content.img['src']
69 |             print('正在解析美食：{}'.format(name))
70 |             # 构造一个生成器，分别返回 食物名,做法链接,图片链接
71 |             yield name, detial_url, img_url
72 |     else:
73 |         print('error !!!!')
74 | 
75 | 
76 | def main():
77 |     '''程序入口'''
78 |     # 构造所有起始url列表
79 |     url_list = [Top_food_url, Home_food_url, China_food_url, Foreign_food_url]
80 |     # 找到所有城市排行榜的url
81 |     for url in url_list:
82 |         # 找到该分类下的所有cid
83 |         res = parse_city_id(url)
84 |         for page in res:
85 |             # 找到菜系名称
86 |             name = page['name']
87 |             # 利用生成器迭代返回结果
88 |             for food_name, detail_url, img_url in parse_food_info(page['url']):
89 |                 #  save data
90 |                 print('菜系：{}\t名字：{}\n做法：{}\n图片：{}'.format(
91 |                     name, food_name, detail_url, img_url))
92 | 
93 | 
94 | main()
95 | 


--------------------------------------------------------------------------------
/腾讯漫画/downloder.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import json
 3 | 
 4 | import requests
 5 | 
 6 | 
 7 | def download_img(name, url):
 8 |     with open(name, 'wb') as f:
 9 |         f.write(requests.get(url).content)
10 | 
11 | 
12 | def download_comic(comic_name, comic_id):
13 | 
14 |     # 读取漫画信息
15 |     json_file_name = "{}.json".format(comic_id)
16 |     with open(json_file_name, 'r') as f:
17 |         data = json.load(f)
18 | 
19 |     # 创建漫画目录
20 |     if not os.path.exists(comic_name):
21 |         os.mkdir(comic_name)
22 | 
23 |     for k, v in data.items():
24 |         title = k + '-' + v['title']
25 | 
26 |         # 创建章节目录
27 |         path = os.path.join(comic_name, title)
28 |         if not os.path.exists(path):
29 |             os.mkdir(path)
30 |         for index, v in enumerate(v['pics']):
31 |             name = os.path.join(path, "{}.png".format(index))
32 |             download_img(name, v['url'])
33 |         print(title, '下载完毕')
34 | 
35 | 
36 | def main():
37 |     comic_name = "女巫"
38 |     comic_id = 632784
39 |     print('开始下载漫画：', comic_name)
40 |     download_comic(comic_name, comic_id)
41 | 
42 | 
43 | if __name__ == '__main__':
44 |     main()
45 | 


--------------------------------------------------------------------------------
/豆瓣影评/锤神3/config.py:
--------------------------------------------------------------------------------
1 | 
2 | EHCO_DB = {
3 |     'host': '127.0.0.1',
4 |     'user': 'root',
5 |     'password': 'xxx',
6 |     'db': 'EhcoTestDb'
7 | }
8 | 


--------------------------------------------------------------------------------
/豆瓣影评/锤神3/play_data.py:
--------------------------------------------------------------------------------
 1 | '''
 2 | 对抓取的影评数据
 3 | 进行基本的分析统计
 4 | 最后并生成词云
 5 | '''
 6 | 
 7 | '''
 8 | 01 最早时间 - - 因为国内外上映时间不一
 9 |     统计哪天的评论数量最多
10 | 02 推荐程度 - - 前10000 频率统计
11 | 03 评论内容 - - 前100 评论内容词频分析，做成词云
12 | 
13 | '''
14 | # 导入配置文件和数据库支持
15 | import config
16 | from stroe import DbToMysql
17 | from datetime import datetime
18 | import jieba
19 | 
20 | # 初始化数据库链接
21 | store = DbToMysql(config.EHCO_DB)
22 | 
23 | 
24 | def format_to_week(day):
25 |     '''
26 |     将形如这样的日期转换为周x
27 |     '''
28 |     day_map = {
29 |         0: '周一',
30 |         1: '周二',
31 |         2: '周三',
32 |         3: '周四',
33 |         4: '周五',
34 |         5: '周六',
35 |         6: '周日',
36 |     }
37 |     week = datetime.strptime(day, "%Y-%m-%d").weekday()
38 |     return day_map[week]
39 | 
40 | 
41 | 
42 | # 统计评论出现的日期，不同日期下出现的评论数量
43 | date_list = store.find_all('GodOfHammer_1', 19000)
44 | 
45 | # 建立统计dict
46 | dateSet = {}
47 | for data in date_list:
48 |     week = format_to_week(data['time'])
49 |     if week not in dateSet.keys():
50 |         dateSet[week] = 1
51 |     else:
52 |         dateSet[week] += 1
53 | print(dateSet)
54 | '''
55 | 结果：
56 | {'周三': 192, '周四': 234, '周五': 4518, '周二': 109, '周六': 6219, '周日': 5441, '周一': 2287}
57 | '''
58 | 
59 | '''
60 | # 查询点赞数量排名钱10000的留言的 推荐程度
61 | recommend_level_list = store.find_by_sort('GodOfHammer_1', 'vote', 10000)
62 | 
63 | # 建立统计dict
64 | recommendSet = {}
65 | # 开始统计不同推荐程度出现的次数
66 | for data in recommend_level_list:
67 |     if data['star'] not in recommendset.keys():
68 |         recommendSet[data['star']] = 1
69 |     else:
70 |         recommendSet[data['star']] += 1
71 | print(recommendSet)
72 | '''
73 | 
74 | '''
75 | 
76 | # 截取前100条热门评论并进行分词统计
77 | comment_data = store.find_by_sort('GodOfHammer_1', 'vote', '100')
78 | comment_detail_list = []
79 | for data in comment_data:
80 |     comment_detail_list.append(data['content'])
81 | # 利用结巴分词工具分词
82 | seg_list = jieba.cut(' '.join(comment_detail_list))
83 | for word in seg_list:
84 |     print (word)
85 | '''
86 | 
87 | 
88 | 


--------------------------------------------------------------------------------