├── 1024 └── down.py ├── ethCrawler ├── log.txt ├── readme.txt ├── EthTradeList.py ├── TradeList.py ├── etherscantradelist.sql ├── eth.sql ├── crawler.py └── etherEumCrawler.py ├── appdata ├── __init__.py ├── common │ ├── __init__.py │ └── User.py ├── userinfo (2).sql └── crawlinfo.py ├── dingding ├── __init__.py ├── common │ ├── __init__.py │ ├── token.txt │ ├── config.py │ └── functions.py ├── mappers │ ├── __init__.py │ ├── CarCostHistory.py │ ├── ComplainRecord.py │ ├── DailyWorkReport.py │ ├── ServerCheck.py │ ├── Returnvisit.py │ ├── FaultHistory.py │ ├── ImportantEvent.py │ ├── InspectionRecord.py │ └── CostApplication.py ├── error.log ├── test.json └── main.py ├── jingdong ├── __init__.py ├── jingdong │ ├── __init__.py │ ├── spiders │ │ ├── __init__.py │ │ └── price.py │ ├── pipelines.py │ ├── items.py │ ├── middlewares.py │ └── settings.py └── scrapy.cfg ├── miaosha ├── __init__.py ├── checkcode.jpg ├── miaosha │ ├── __init__.py │ ├── spiders │ │ └── __init__.py │ ├── pipelines.py │ ├── items.py │ ├── middlewares.py │ └── settings.py └── scrapy.cfg ├── crawl_fund ├── Spiders │ ├── __init__.py │ └── __pycache__ │ │ ├── Fund.cpython-36.pyc │ │ └── __init__.cpython-36.pyc ├── common │ ├── __init__.py │ ├── __pycache__ │ │ ├── config.cpython-36.pyc │ │ ├── __init__.cpython-36.pyc │ │ └── function.cpython-36.pyc │ ├── function.py │ └── config.py ├── mappers │ ├── __init__.py │ ├── __pycache__ │ │ ├── Fund.cpython-36.pyc │ │ └── __init__.cpython-36.pyc │ ├── Detail.py │ └── Fund.py ├── csvfiles │ └── fund.csv ├── main.py ├── sql │ └── funddetail.sql └── htmls │ └── details │ ├── 580005 │ ├── 101.txt │ ├── 100.txt │ ├── 16.txt │ ├── 19.txt │ ├── 25.txt │ ├── 30.txt │ ├── 32.txt │ ├── 4.txt │ ├── 58.txt │ ├── 88.txt │ ├── 92.txt │ ├── 1.txt │ ├── 10.txt │ ├── 11.txt │ ├── 13.txt │ ├── 14.txt │ ├── 17.txt │ ├── 18.txt │ ├── 2.txt │ ├── 21.txt │ ├── 22.txt │ ├── 23.txt │ └── 24.txt │ └── 001112 │ ├── 30.txt │ ├── 25.txt │ ├── 27.txt │ ├── 8.txt │ ├── 9.txt │ ├── 10.txt │ ├── 11.txt │ ├── 12.txt │ ├── 13.txt │ ├── 15.txt │ ├── 23.txt │ ├── 24.txt │ ├── 28.txt │ └── 29.txt ├── .gitignore ├── alishiyong ├── report.csv ├── 冬虫夏草试用报告.xlsx └── itemLinks.txt ├── doubandingtie └── code.jpg ├── crawlDajiawen ├── dajiawen.csv ├── 大家问冬虫夏草Top50问答数据.xlsx ├── goodid.txt └── spider.py ├── easy_distributed_crawler ├── 执行流程.png ├── 爬虫结构.png ├── 爬虫队列.png ├── SlaveNode │ ├── HtmlDownloader.py │ ├── HtmlParser.py │ └── SlaveWork.py ├── readme.md └── MasterNode │ ├── DataOuput.py │ └── URlManager.py ├── souhuVideoUpload └── upload.py ├── README.md └── pdfdownload ├── pdfdown.py └── pdfdown_mutiprocess.py /ethCrawler/log.txt: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /appdata/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /dingding/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /jingdong/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /miaosha/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /miaosha/checkcode.jpg: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /appdata/common/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /dingding/common/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /dingding/mappers/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /miaosha/miaosha/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /crawl_fund/Spiders/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /crawl_fund/common/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /crawl_fund/mappers/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /jingdong/jingdong/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /dingding/common/token.txt: -------------------------------------------------------------------------------- 1 | ae2cb26810763b2c946b767dea54e932 -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .idea/ 2 | ghostdriver.log 3 | __pycache__/ 4 | test.py -------------------------------------------------------------------------------- /dingding/error.log: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shisiying/crawer_python/HEAD/dingding/error.log -------------------------------------------------------------------------------- /alishiyong/report.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shisiying/crawer_python/HEAD/alishiyong/report.csv -------------------------------------------------------------------------------- /doubandingtie/code.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shisiying/crawer_python/HEAD/doubandingtie/code.jpg -------------------------------------------------------------------------------- /alishiyong/冬虫夏草试用报告.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shisiying/crawer_python/HEAD/alishiyong/冬虫夏草试用报告.xlsx -------------------------------------------------------------------------------- /ethCrawler/readme.txt: -------------------------------------------------------------------------------- 1 | - 新建一个数据库,命名叫eth 2 | - 在数据库下执行目录中的你文件eth.sql 3 | - 修改crawler中的dburl修改的mysql密码以及服务器地址 -------------------------------------------------------------------------------- /crawlDajiawen/dajiawen.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shisiying/crawer_python/HEAD/crawlDajiawen/dajiawen.csv -------------------------------------------------------------------------------- /easy_distributed_crawler/执行流程.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shisiying/crawer_python/HEAD/easy_distributed_crawler/执行流程.png -------------------------------------------------------------------------------- /easy_distributed_crawler/爬虫结构.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shisiying/crawer_python/HEAD/easy_distributed_crawler/爬虫结构.png -------------------------------------------------------------------------------- /easy_distributed_crawler/爬虫队列.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shisiying/crawer_python/HEAD/easy_distributed_crawler/爬虫队列.png -------------------------------------------------------------------------------- /crawlDajiawen/大家问冬虫夏草Top50问答数据.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shisiying/crawer_python/HEAD/crawlDajiawen/大家问冬虫夏草Top50问答数据.xlsx -------------------------------------------------------------------------------- /crawl_fund/Spiders/__pycache__/Fund.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shisiying/crawer_python/HEAD/crawl_fund/Spiders/__pycache__/Fund.cpython-36.pyc -------------------------------------------------------------------------------- /crawl_fund/common/__pycache__/config.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shisiying/crawer_python/HEAD/crawl_fund/common/__pycache__/config.cpython-36.pyc -------------------------------------------------------------------------------- /crawl_fund/mappers/__pycache__/Fund.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shisiying/crawer_python/HEAD/crawl_fund/mappers/__pycache__/Fund.cpython-36.pyc -------------------------------------------------------------------------------- /crawl_fund/Spiders/__pycache__/__init__.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shisiying/crawer_python/HEAD/crawl_fund/Spiders/__pycache__/__init__.cpython-36.pyc -------------------------------------------------------------------------------- /crawl_fund/common/__pycache__/__init__.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shisiying/crawer_python/HEAD/crawl_fund/common/__pycache__/__init__.cpython-36.pyc -------------------------------------------------------------------------------- /crawl_fund/common/__pycache__/function.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shisiying/crawer_python/HEAD/crawl_fund/common/__pycache__/function.cpython-36.pyc -------------------------------------------------------------------------------- /crawl_fund/mappers/__pycache__/__init__.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shisiying/crawer_python/HEAD/crawl_fund/mappers/__pycache__/__init__.cpython-36.pyc -------------------------------------------------------------------------------- /miaosha/miaosha/spiders/__init__.py: -------------------------------------------------------------------------------- 1 | # This package will contain the spiders of your Scrapy project 2 | # 3 | # Please refer to the documentation for information on how to create and manage 4 | # your spiders. 5 | -------------------------------------------------------------------------------- /crawl_fund/common/function.py: -------------------------------------------------------------------------------- 1 | def getText(element): 2 | if element!=None: 3 | txt=element.get_text() 4 | if str(txt).strip()=="---": 5 | txt="0" 6 | return txt 7 | return "" -------------------------------------------------------------------------------- /jingdong/jingdong/spiders/__init__.py: -------------------------------------------------------------------------------- 1 | # This package will contain the spiders of your Scrapy project 2 | # 3 | # Please refer to the documentation for information on how to create and manage 4 | # your spiders. 5 | -------------------------------------------------------------------------------- /crawl_fund/common/config.py: -------------------------------------------------------------------------------- 1 | dbconfig={"host":'localhost',"user":'root',"password":'hello2016',"db":'jijin',"charset":'utf8'} 2 | dburl="mysql+pymysql://root:hello2016@localhost/jijin?charset=utf8" 3 | detailurl="http://fund.eastmoney.com/f10/jjjz_580005.html" 4 | 5 | -------------------------------------------------------------------------------- /miaosha/scrapy.cfg: -------------------------------------------------------------------------------- 1 | # Automatically created by: scrapy startproject 2 | # 3 | # For more information about the [deploy] section see: 4 | # https://scrapyd.readthedocs.org/en/latest/deploy.html 5 | 6 | [settings] 7 | default = miaosha.settings 8 | 9 | [deploy] 10 | #url = http://localhost:6800/ 11 | project = miaosha 12 | -------------------------------------------------------------------------------- /jingdong/scrapy.cfg: -------------------------------------------------------------------------------- 1 | # Automatically created by: scrapy startproject 2 | # 3 | # For more information about the [deploy] section see: 4 | # https://scrapyd.readthedocs.org/en/latest/deploy.html 5 | 6 | [settings] 7 | default = jingdong.settings 8 | 9 | [deploy] 10 | #url = http://localhost:6800/ 11 | project = jingdong 12 | -------------------------------------------------------------------------------- /dingding/common/config.py: -------------------------------------------------------------------------------- 1 | # sqlacodegen --tables returnvisit --outfile Returnvisit.py mysql+pymysql://root:mysql123456@1.85.18.26/jtdz?charset=utf8 2 | dburl="mysql+pymysql://root:mysql123456@1.185.118.26/jtdz?charset=utf8" 3 | corpid = 'ding95d0e17f21c9bFDFD99a' ##假的id 4 | corpsecret = 'sn8LZ2Vg-ryUtk9YcyyGoIcRBfJ7NoevxwUlh4eXXaySDkwBpKkDFDSDSa3P2QMjitc1fElk' ##假的 5 | -------------------------------------------------------------------------------- /miaosha/miaosha/pipelines.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | # Define your item pipelines here 4 | # 5 | # Don't forget to add your pipeline to the ITEM_PIPELINES setting 6 | # See: http://doc.scrapy.org/en/latest/topics/item-pipeline.html 7 | 8 | 9 | class MiaoshaPipeline(object): 10 | def process_item(self, item, spider): 11 | return item 12 | -------------------------------------------------------------------------------- /jingdong/jingdong/pipelines.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | # Define your item pipelines here 4 | # 5 | # Don't forget to add your pipeline to the ITEM_PIPELINES setting 6 | # See: http://doc.scrapy.org/en/latest/topics/item-pipeline.html 7 | 8 | 9 | class JingdongPipeline(object): 10 | def process_item(self, item, spider): 11 | return item 12 | -------------------------------------------------------------------------------- /miaosha/miaosha/items.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | # Define here the models for your scraped items 4 | # 5 | # See documentation in: 6 | # http://doc.scrapy.org/en/latest/topics/items.html 7 | 8 | import scrapy 9 | 10 | 11 | class MiaoshaItem(scrapy.Item): 12 | # define the fields for your item here like: 13 | # name = scrapy.Field() 14 | pass 15 | -------------------------------------------------------------------------------- /jingdong/jingdong/items.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | # Define here the models for your scraped items 4 | # 5 | # See documentation in: 6 | # http://doc.scrapy.org/en/latest/topics/items.html 7 | 8 | import scrapy 9 | 10 | 11 | class JingdongItem(scrapy.Item): 12 | # define the fields for your item here like: 13 | # name = scrapy.Field() 14 | pass 15 | -------------------------------------------------------------------------------- /easy_distributed_crawler/SlaveNode/HtmlDownloader.py: -------------------------------------------------------------------------------- 1 | # coding:utf-8 2 | import requests 3 | 4 | class HtmlDownloader(object): 5 | 6 | def download(self,url): 7 | if url is None: 8 | return None 9 | user_agent = 'Mozilla/4.0 (compatible; MSIE 5.5; Windows NT)' 10 | headers = {'User-Agent':user_agent} 11 | 12 | r = requests.get(url,headers=headers) 13 | if r.status_code ==200: 14 | r.encoding='utf-8' 15 | return r.text 16 | return None -------------------------------------------------------------------------------- /easy_distributed_crawler/readme.md: -------------------------------------------------------------------------------- 1 | ## 项目介绍 2 | 简单分布式爬虫项目,该项目,分布式采用简单的主从模式,采用分布式进程和进程间的通信,同时,涵盖了普通爬虫应有的几个模块,URL管理模块,Html解析模块,Html下载模块,数据存储模块,爬虫调度模块 3 | 4 | ### 项目目录介绍 5 | MasterNode--主节点 6 | SlaveNode--从节点 7 | 8 | ### 爬虫结构 9 | 10 | ![](https://github.com/shisiying/crawer_python/blob/master/easy_distributed_crawler/爬虫结构.png) 11 | 12 | ### 爬虫执行流程 13 | 14 | ![](https://github.com/shisiying/crawer_python/blob/master/easy_distributed_crawler/执行流程.png) 15 | 16 | ### 爬虫分布式进程通信的队列 17 | 18 | ![](https://github.com/shisiying/crawer_python/blob/master/easy_distributed_crawler/爬虫队列.png) 19 | 20 | 21 | -------------------------------------------------------------------------------- /crawl_fund/mappers/Detail.py: -------------------------------------------------------------------------------- 1 | # coding: utf-8 2 | from sqlalchemy import Column, DateTime, Integer, Numeric, String 3 | from sqlalchemy.ext.declarative import declarative_base 4 | 5 | 6 | Base = declarative_base() 7 | metadata = Base.metadata 8 | 9 | 10 | class Funddetail(Base): 11 | __tablename__ = 'funddetail' 12 | 13 | id = Column(Integer, primary_key=True) 14 | fcode = Column(String(10), nullable=False) 15 | fdate = Column(DateTime) 16 | NAV = Column(Numeric(10, 4)) 17 | ACCNAV = Column(Numeric(10, 4)) 18 | DGR = Column(String(20)) 19 | pstate = Column(String(20)) 20 | rstate = Column(String(20)) 21 | -------------------------------------------------------------------------------- /appdata/common/User.py: -------------------------------------------------------------------------------- 1 | # coding: utf-8 2 | from sqlalchemy import Column, Integer, String 3 | from sqlalchemy.ext.declarative import declarative_base 4 | 5 | 6 | Base = declarative_base() 7 | metadata = Base.metadata 8 | 9 | 10 | class Userinfo(Base): 11 | __tablename__ = 'userinfo' 12 | 13 | id = Column(Integer, primary_key=True) 14 | phone = Column(String(20), nullable=False) 15 | datetime = Column(String(20), nullable=False) 16 | amount = Column(Integer, nullable=False) 17 | num = Column(String(50), nullable=False) 18 | userid = Column(Integer, nullable=False) 19 | name = Column(String(10), nullable=False) 20 | -------------------------------------------------------------------------------- /crawl_fund/mappers/Fund.py: -------------------------------------------------------------------------------- 1 | # coding: utf-8 2 | from sqlalchemy import Column, DateTime, Numeric, String 3 | from sqlalchemy.ext.declarative import declarative_base 4 | 5 | 6 | Base = declarative_base() 7 | metadata = Base.metadata 8 | 9 | 10 | class Myfund(Base): 11 | __tablename__ = 'myfund' 12 | 13 | fcode = Column(String(20), primary_key=True, nullable=False) 14 | fname = Column(String(20)) 15 | NAV = Column(Numeric(10, 4)) 16 | ACCNAV = Column(Numeric(10, 4)) 17 | updatetime = Column(DateTime) 18 | fdate = Column(DateTime, primary_key=True, nullable=False) 19 | DGR = Column(String(20)) 20 | DGV = Column(String(20)) 21 | fee = Column(String(20)) 22 | -------------------------------------------------------------------------------- /ethCrawler/EthTradeList.py: -------------------------------------------------------------------------------- 1 | # coding: utf-8 2 | from sqlalchemy import Column, Integer, String 3 | from sqlalchemy.ext.declarative import declarative_base 4 | 5 | 6 | Base = declarative_base() 7 | metadata = Base.metadata 8 | 9 | 10 | class Etherscantradelist(Base): 11 | __tablename__ = 'etherscantradelist' 12 | 13 | id = Column(Integer, primary_key=True) 14 | txHash = Column(String(70, 'utf8_unicode_ci')) 15 | age = Column(String(30, 'utf8_unicode_ci')) 16 | fromadress = Column(String(42, 'utf8_unicode_ci')) 17 | to = Column(String(42, 'utf8_unicode_ci')) 18 | value = Column(String(20, 'utf8_unicode_ci')) 19 | token = Column(String(42, 'utf8_unicode_ci')) 20 | name = Column(String(50, 'utf8_unicode_ci')) 21 | -------------------------------------------------------------------------------- /ethCrawler/TradeList.py: -------------------------------------------------------------------------------- 1 | # coding: utf-8 2 | from sqlalchemy import Column, DateTime, Integer, String 3 | from sqlalchemy.ext.declarative import declarative_base 4 | 5 | 6 | Base = declarative_base() 7 | metadata = Base.metadata 8 | 9 | 10 | class tradelist(Base): 11 | __tablename__ = 'tradelist' 12 | 13 | id = Column(Integer, primary_key=True) 14 | txHash = Column(String(70, 'utf8_unicode_ci')) 15 | blockHeight = Column(String(10, 'utf8_unicode_ci')) 16 | amount = Column(String(30, 'utf8_unicode_ci')) 17 | originatorAdress = Column(String(50, 'utf8_unicode_ci')) 18 | recevierAdress = Column(String(50, 'utf8_unicode_ci')) 19 | confirmTime = Column(DateTime) 20 | brokerage = Column(String(15, 'utf8_unicode_ci')) 21 | -------------------------------------------------------------------------------- /crawlDajiawen/goodid.txt: -------------------------------------------------------------------------------- 1 | 560734559975 2 | 549724367159 3 | 551078442907 4 | 548380875678 5 | 547316356177 6 | 563146870502 7 | 557266664952 8 | 546603177326 9 | 556584196136 10 | 546202817226 11 | 529058554339 12 | 541786219450 13 | 20201622423 14 | 558884189926 15 | 43448003366 16 | 528989787954 17 | 526945890628 18 | 556196525667 19 | 561737055846 20 | 560884655029 21 | 548519225616 22 | 531125275296 23 | 556588556661 24 | 558366035242 25 | 528754947994 26 | 555944007766 27 | 36150091939 28 | 560801413654 29 | 36589584338 30 | 564333118607 31 | 560508968944 32 | 547416061912 33 | 525751414595 34 | 562277053264 35 | 560282892865 36 | 525596083676 37 | 549823127904 38 | 533060108962 39 | 530738234731 40 | 560283404399 41 | 14365652310 42 | 536454208173 43 | 554671065906 44 | 560601761588 45 | -------------------------------------------------------------------------------- /souhuVideoUpload/upload.py: -------------------------------------------------------------------------------- 1 | # _*_ coding: utf-8 _*_ 2 | import requests 3 | 4 | def login(user_name,passwd): 5 | headers = { 6 | 'user-agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/60.0.3112.113 Safari/537.36', 7 | 'cookie': 'beans_dmp_done = 1;IPLOC = CN;SUV = 1710021626598866;reqtype = pc;gidinf = x099980109ee0ce6660204c290009a05135098259632;beans_freq = 1;lastpassport = 15626832124;jv = 4de511653f75dab9336e058a95ad09ef - qgxCfp3p1510458408529' 8 | } 9 | form_data = { 10 | 'userid': user_name, 11 | 'password': passwd, 12 | 'persistentCookie': 1, 13 | 'appid': 107405, 14 | 'callback': 'passport401_cb1510458090735' 15 | } 16 | 17 | 18 | s = requests.Session() 19 | user_name = '15626832124' 20 | passwd = 'hello2016' 21 | 22 | 23 | 24 | -------------------------------------------------------------------------------- /jingdong/jingdong/spiders/price.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | import scrapy 3 | import chardet 4 | import json 5 | import re 6 | from urllib.parse import urlencode 7 | from bs4 import BeautifulSoup 8 | 9 | 10 | class PriceSpider(scrapy.Spider): 11 | name = 'price' 12 | allowed_domains = ['jd.com'] 13 | 14 | def start_requests(self): 15 | commodity_url = 'https://item.jd.com/15890328841.html' 16 | commodity_id = re.findall('(\d+)', commodity_url)[0] 17 | url_end = '&area=1_2901_4135_0&cat=737,794,878&extraParam={"originid":"1"}' 18 | price_url = 'https://c0.3.cn/stock?skuId={}{}'.format( 19 | commodity_id, url_end) 20 | yield scrapy.Request(price_url) 21 | 22 | def parse(self, response): 23 | print(response.body.decode('cp1251').encode('utf8')) 24 | data = json.loads(response.text) 25 | -------------------------------------------------------------------------------- /crawl_fund/csvfiles/fund.csv: -------------------------------------------------------------------------------- 1 | fcode,fname,NAV,ACCNAV,updatetime,fdate,DGR,DGV,fee 2 | 000001,华夏成长,1.1900,3.5210,2017-10-04 23:12:27,2017-09-28 00:00:00,0.25%,0.0030,0.15% 3 | 000003,中海可转债A,0.8120,1.0220,2017-10-04 23:12:20,2017-09-28 00:00:00,-0.12%,-0.0010,0.08% 4 | 000004,中海可转债C,0.8150,1.0250,2017-10-04 23:12:20,2017-09-28 00:00:00,-0.12%,-0.0010,0.00% 5 | 000005,嘉实增强信用定期债券,1.0040,1.2050,2017-10-04 23:12:17,2017-09-28 00:00:00,0.00%,0.0000,0.08% 6 | 000007,鹏华国企债债券,1.1381,1.1469,2017-10-04 23:12:18,2017-09-28 00:00:00,-0.05%,-0.0006,0.08% 7 | 000008,嘉实中证500ETF联接,1.8007,1.8007,2017-10-04 23:12:21,2017-09-28 00:00:00,-0.21%,-0.0038,0.12% 8 | 000011,华夏大盘精选,12.2000,16.2800,2017-10-04 23:12:27,2017-09-28 00:00:00,0.24%,0.0290,0.15% 9 | 000014,华夏聚利债券,1.1670,1.1670,2017-10-04 23:12:20,2017-09-28 00:00:00,-0.09%,-0.0010,0.06% 10 | 000015,华夏纯债债券A,1.1700,1.2000,2017-10-04 23:12:30,2017-09-28 00:00:00,0.09%,0.0010,0.08% 11 | 000016,华夏纯债债券C,1.1480,1.1780,2017-10-04 23:12:15,2017-09-28 00:00:00,0.00%,0.0000,0.00% 12 | -------------------------------------------------------------------------------- /crawl_fund/main.py: -------------------------------------------------------------------------------- 1 | from crawl_fund.Spiders.Fund import SaveDb,getFundhtml 2 | from sqlalchemy import create_engine 3 | from crawl_fund.common.config import dburl 4 | from sqlalchemy.orm import sessionmaker 5 | from crawl_fund.mappers.Fund import Myfund 6 | import csv 7 | import pandas 8 | if __name__=='__main__': 9 | #将抓取的数据文件入库 10 | # SaveDb() 11 | #写入csv文件当中 12 | # engine = create_engine(dburl, echo=True) 13 | # mysession=sessionmaker(bind=engine)() 14 | # result=mysession.query(Myfund).limit(10).all() 15 | # with open('./csvfiles/fund.csv','w',encoding='UTF-8') as file: 16 | # writer=csv.writer(file) 17 | # writer.writerow(['fcode','fname','NAV','ACCNAV','updatetime','fdate','DGR','DGV','fee']) 18 | # for re in result: 19 | # writer.writerow([re.fcode, re.fname, re.NAV, re.ACCNAV, re.updatetime, re.fdate, re.DGR, re.DGV, re.fee]) 20 | # file.close() 21 | pd=pandas.read_csv('./csvfiles/fund.csv',dtype={'fcode':pandas.np.str}) 22 | result=pd.sort_values(by='NAV',ascending=False) 23 | print(result) -------------------------------------------------------------------------------- /crawl_fund/sql/funddetail.sql: -------------------------------------------------------------------------------- 1 | /* 2 | Navicat MySQL Data Transfer 3 | 4 | Source Server : 5kcrm 5 | Source Server Version : 50505 6 | Source Host : localhost:3306 7 | Source Database : jijin 8 | 9 | Target Server Type : MYSQL 10 | Target Server Version : 50505 11 | File Encoding : 65001 12 | 13 | Date: 2017-10-07 22:00:34 14 | */ 15 | 16 | SET FOREIGN_KEY_CHECKS=0; 17 | 18 | -- ---------------------------- 19 | -- Table structure for `funddetail` 20 | -- ---------------------------- 21 | DROP TABLE IF EXISTS `funddetail`; 22 | CREATE TABLE `funddetail` ( 23 | `id` int(11) NOT NULL COMMENT '自增字段', 24 | `fcode` varchar(10) NOT NULL COMMENT '基金编码', 25 | `fdate` datetime DEFAULT NULL COMMENT '基金日期', 26 | `NAV` decimal(10,4) DEFAULT NULL COMMENT '单位净值', 27 | `ACCNAV` decimal(10,4) DEFAULT NULL COMMENT '累计净值', 28 | `DGR` varchar(20) DEFAULT NULL COMMENT '日增长率', 29 | `pstate` varchar(20) DEFAULT NULL COMMENT '申购状态', 30 | `rstate` varchar(20) DEFAULT NULL COMMENT '赎回状态', 31 | PRIMARY KEY (`id`) 32 | ) ENGINE=InnoDB DEFAULT CHARSET=utf8; 33 | 34 | -- ---------------------------- 35 | -- Records of funddetail 36 | -- ---------------------------- 37 | -------------------------------------------------------------------------------- /dingding/mappers/CarCostHistory.py: -------------------------------------------------------------------------------- 1 | # coding: utf-8 2 | from sqlalchemy import Column, DateTime, String, Text 3 | from sqlalchemy.ext.declarative import declarative_base 4 | 5 | 6 | Base = declarative_base() 7 | metadata = Base.metadata 8 | 9 | 10 | class Carcosthistory(Base): 11 | __tablename__ = 'carcosthistory' 12 | 13 | approvalNumber = Column(String(255), primary_key=True) 14 | headlin = Column(String(255)) 15 | approvalStatus = Column(String(255)) 16 | approvalResult = Column(String(255)) 17 | approvalTime = Column(DateTime) 18 | approvalFinshTime = Column(DateTime) 19 | initiatorsNumber = Column(String(255)) 20 | initiatorsUserID = Column(String(255)) 21 | initiatorsName = Column(String(255)) 22 | initiatorsDepartment = Column(String(255)) 23 | historicalApproverName = Column(String(255)) 24 | approvalHistory = Column(String(255)) 25 | currentProcessingName = Column(String(255)) 26 | reviewsTake = Column(String(255)) 27 | carNumber = Column(String(255)) 28 | highwaySection = Column(String(255)) 29 | mileage = Column(String(255)) 30 | oilPrice = Column(String(255)) 31 | cost = Column(String(255)) 32 | instrumenBoardPhoto = Column(Text) 33 | receiptPhoto = Column(Text) 34 | -------------------------------------------------------------------------------- /dingding/mappers/ComplainRecord.py: -------------------------------------------------------------------------------- 1 | # coding: utf-8 2 | from sqlalchemy import Column, DateTime, String, Text 3 | from sqlalchemy.ext.declarative import declarative_base 4 | 5 | 6 | Base = declarative_base() 7 | metadata = Base.metadata 8 | 9 | 10 | class Complainrecord(Base): 11 | __tablename__ = 'complainrecord' 12 | 13 | approvalNumber = Column(String(255), primary_key=True, nullable=False) 14 | headline = Column(String(255)) 15 | approvalStatus = Column(String(255)) 16 | approvalResult = Column(String(255)) 17 | approvalTime = Column(DateTime) 18 | approvalFinishTime = Column(DateTime) 19 | initiatorsNumber = Column(String(255)) 20 | initiatorsUserID = Column(String(255)) 21 | initiatorsName = Column(String(255)) 22 | initiatorsDepartment = Column(String(255)) 23 | historicalApproverName = Column(String(255)) 24 | approverHistory = Column(Text) 25 | currentProcessingName = Column(String(255)) 26 | reviewTake = Column(String(255)) 27 | customerName = Column(String(255)) 28 | highwaySection = Column(String(255)) 29 | list = Column(String(255), primary_key=True, nullable=False) 30 | complain = Column(String(255)) 31 | photo = Column(String(255)) 32 | accessory = Column(Text) 33 | -------------------------------------------------------------------------------- /ethCrawler/etherscantradelist.sql: -------------------------------------------------------------------------------- 1 | /* 2 | Navicat MySQL Data Transfer 3 | 4 | Source Server : 5kcrm 5 | Source Server Version : 50505 6 | Source Host : localhost:3306 7 | Source Database : eth 8 | 9 | Target Server Type : MYSQL 10 | Target Server Version : 50505 11 | File Encoding : 65001 12 | 13 | Date: 2018-06-03 20:57:19 14 | */ 15 | 16 | SET FOREIGN_KEY_CHECKS=0; 17 | 18 | -- ---------------------------- 19 | -- Table structure for `etherscantradelist` 20 | -- ---------------------------- 21 | DROP TABLE IF EXISTS `etherscantradelist`; 22 | CREATE TABLE `etherscantradelist` ( 23 | `id` int(11) NOT NULL AUTO_INCREMENT, 24 | `txHash` varchar(70) COLLATE utf8_unicode_ci DEFAULT NULL, 25 | `age` varchar(30) COLLATE utf8_unicode_ci DEFAULT NULL, 26 | `fromadress` varchar(42) COLLATE utf8_unicode_ci DEFAULT NULL, 27 | `to` varchar(42) COLLATE utf8_unicode_ci DEFAULT NULL, 28 | `value` varchar(20) COLLATE utf8_unicode_ci DEFAULT NULL, 29 | `token` varchar(42) COLLATE utf8_unicode_ci DEFAULT NULL, 30 | `name` varchar(50) COLLATE utf8_unicode_ci DEFAULT NULL, 31 | PRIMARY KEY (`id`) 32 | ) ENGINE=InnoDB DEFAULT CHARSET=utf8 COLLATE=utf8_unicode_ci; 33 | 34 | -- ---------------------------- 35 | -- Records of etherscantradelist 36 | -- ---------------------------- 37 | -------------------------------------------------------------------------------- /dingding/test.json: -------------------------------------------------------------------------------- 1 | # 'approvalNumber': data_list['process_instance_id'], 2 | # 'headline': data_list['title'], 3 | # 'approvalStatus': 'COMPLETED', 4 | # 'approvalResult': data_list['process_instance_result'], 5 | # 'approvalTime': data_list['create_time'], 6 | # 'approvalFinshTime': data_list['finish_time'], 7 | # 'initiatorsNumber': None, 8 | # 'initiatorsUserID': data_list['originator_userid'], 9 | # 'initiatorsName': getName(data_list['title']), 10 | # 'initiatorsDepartment': data_list['originator_dept_id'], 11 | # 'historicalApproverName': data_list['approver_userid_list']['string'], 12 | # 'approvalHistory' = data_list['approver_userid_list']['string'], 13 | # 'currentProcessingName' = 14 | # data_list['approver_userid_list']['string'][-1], 15 | # 'reviewTake' = duration(data_list['create_time'], data_list['finish_time']), ##day 16 | # -------------------------------------------------------------------------------- /ethCrawler/eth.sql: -------------------------------------------------------------------------------- 1 | /* 2 | Navicat MySQL Data Transfer 3 | 4 | Source Server : 5kcrm 5 | Source Server Version : 50505 6 | Source Host : localhost:3306 7 | Source Database : eth 8 | 9 | Target Server Type : MYSQL 10 | Target Server Version : 50505 11 | File Encoding : 65001 12 | 13 | Date: 2018-06-02 13:45:15 14 | */ 15 | 16 | SET FOREIGN_KEY_CHECKS=0; 17 | 18 | -- ---------------------------- 19 | -- Table structure for `tradelist` 20 | -- ---------------------------- 21 | DROP TABLE IF EXISTS `tradelist`; 22 | CREATE TABLE `tradelist` ( 23 | `id` int(8) NOT NULL AUTO_INCREMENT COMMENT 'id', 24 | `txHash` varchar(70) COLLATE utf8_unicode_ci DEFAULT NULL COMMENT '交易哈希', 25 | `blockHeight` varchar(10) COLLATE utf8_unicode_ci DEFAULT NULL COMMENT '高度', 26 | `amount` varchar(30) COLLATE utf8_unicode_ci DEFAULT NULL COMMENT '金额变化数量', 27 | `originatorAdress` varchar(50) COLLATE utf8_unicode_ci DEFAULT NULL COMMENT '发送方地址', 28 | `recevierAdress` varchar(50) COLLATE utf8_unicode_ci DEFAULT NULL COMMENT '接受者地址', 29 | `confirmTime` datetime DEFAULT NULL COMMENT '确认时间', 30 | `brokerage` varchar(15) COLLATE utf8_unicode_ci DEFAULT NULL COMMENT '矿工费', 31 | PRIMARY KEY (`id`) 32 | ) ENGINE=InnoDB DEFAULT CHARSET=utf8 COLLATE=utf8_unicode_ci; 33 | 34 | -- ---------------------------- 35 | -- Records of tradelist 36 | -- ---------------------------- 37 | -------------------------------------------------------------------------------- /dingding/mappers/DailyWorkReport.py: -------------------------------------------------------------------------------- 1 | # coding: utf-8 2 | from sqlalchemy import Column, Date, DateTime, String, Text 3 | from sqlalchemy.ext.declarative import declarative_base 4 | 5 | 6 | Base = declarative_base() 7 | metadata = Base.metadata 8 | 9 | 10 | class Dailyworkreport(Base): 11 | __tablename__ = 'dailyworkreport' 12 | 13 | approvalNumber = Column(String(255), primary_key=True) 14 | headline = Column(String(255)) 15 | approvalStatus = Column(String(255)) 16 | approvalResult = Column(String(255)) 17 | approvalTime = Column(DateTime) 18 | approvalFinishTime = Column(DateTime) 19 | initiatorsNumber = Column(String(255)) 20 | initiatorsUserID = Column(String(255)) 21 | initiatorsName = Column(String(255)) 22 | initiatorsDepartment = Column(String(255)) 23 | historicalApproverName = Column(String(255)) 24 | approverHistory = Column(Text) 25 | currentProcessingName = Column(String(255)) 26 | reviewTake = Column(String(255)) 27 | highwaySection = Column(String(255)) 28 | date = Column(Date) 29 | weather = Column(String(255)) 30 | temperature = Column(String(255)) 31 | rate = Column(String(255)) 32 | ratePhoto = Column(Text) 33 | workGoing = Column(String(255)) 34 | unfinshedWork = Column(String(255)) 35 | importantEvent = Column(String(255)) 36 | photo = Column(Text) 37 | accessory = Column(Text) 38 | -------------------------------------------------------------------------------- /easy_distributed_crawler/SlaveNode/HtmlParser.py: -------------------------------------------------------------------------------- 1 | #coding:utf-8 2 | 3 | import re 4 | from urllib.parse import urljoin 5 | 6 | from bs4 import BeautifulSoup 7 | 8 | class HtmlParse(object): 9 | 10 | ##解析网页内容抽取url和数据 11 | def parser(self,page_url,html_cont): 12 | if page_url is None or html_cont is None: 13 | return 14 | soup = BeautifulSoup(html_cont,'html.parser',from_encoding='utf-8') 15 | new_urls = self.get_new_urls(page_url,soup) 16 | new_data = self.get_new_data(page_url,soup) 17 | 18 | return new_urls,new_data 19 | ##抽取新的url集合 20 | def get_new_urls(self,page_url,soup): 21 | 22 | new_urls =set() 23 | 24 | links = soup.find_all('a',href=re.compile(r'/item/.*')) 25 | for link in links: 26 | ##提取href属性 27 | new_url = link['href'] 28 | #拼接成完整网址 29 | new_full_url = urljoin(page_url,new_url) 30 | new_urls.add(new_full_url) 31 | return new_urls 32 | 33 | ##抽取有效数据 34 | def get_new_data(self,page_url,soup): 35 | data = {} 36 | data['url'] = page_url 37 | title = soup.find('dd', class_='lemmaWgt-lemmaTitle-title').find('h1') 38 | data['title'] = title.get_text() 39 | summary = soup.find('div', class_='lemma-summary') 40 | # 获取到tag中包含的所有文版内容包括子孙tag中的内容,并将结果作为Unicode字符串返回 41 | data['summary'] = summary.get_text() 42 | return data 43 | -------------------------------------------------------------------------------- /dingding/mappers/ServerCheck.py: -------------------------------------------------------------------------------- 1 | # coding: utf-8 2 | from sqlalchemy import Column, DateTime, String, Text 3 | from sqlalchemy.ext.declarative import declarative_base 4 | 5 | 6 | Base = declarative_base() 7 | metadata = Base.metadata 8 | 9 | 10 | class Servercheck(Base): 11 | __tablename__ = 'servercheck' 12 | 13 | approvalNumber = Column(String(255), primary_key=True) 14 | headline = Column(String(255)) 15 | approvalStatus = Column(String(255)) 16 | approvalResult = Column(String(255)) 17 | approvalTime = Column(DateTime) 18 | approvalFinshTime = Column(DateTime) 19 | initiatorsNumber = Column(String(255)) 20 | initiatorsUserID = Column(String(255)) 21 | initiatorsName = Column(String(255)) 22 | initiatorsDepartment = Column(String(255)) 23 | historicalApproverName = Column(String(255)) 24 | approvalHistory = Column(Text) 25 | currentProcessingName = Column(String(255)) 26 | reviewTake = Column(String(255)) 27 | highwaySection = Column(String(255)) 28 | serverName = Column(String(255)) 29 | CPU = Column(String(255)) 30 | RAM = Column(String(255)) 31 | virusDB = Column(String(255)) 32 | virusDBphoto = Column(Text) 33 | CPUphoto = Column(Text) 34 | presentTime = Column(String(255)) 35 | presentSite = Column(String(255)) 36 | serverBrand = Column(String(255)) 37 | serverStatus = Column(String(255)) 38 | statusSign = Column(String(255)) 39 | hddSign = Column(String(255)) 40 | -------------------------------------------------------------------------------- /easy_distributed_crawler/MasterNode/DataOuput.py: -------------------------------------------------------------------------------- 1 | # coding:utf-8 2 | 3 | import codecs 4 | import time 5 | class DataOutput(object): 6 | 7 | def __init__(self): 8 | self.filepath = 'baike_%s.html' % (time.strftime("%Y_%m_%d_%H_%M_%S",time.localtime())) 9 | self.output_head(self.filepath) 10 | self.datas = [] 11 | 12 | def store_data(self,data): 13 | if data is None: 14 | return 15 | self.datas.append(data) 16 | if len(self.datas)>10: 17 | self.output_html(self.filepath) 18 | 19 | ##写入html头 20 | def output_head(self,path): 21 | fout = codecs.open(path,'w',encoding='utf-8') 22 | fout.write("") 23 | fout.write("") 24 | fout.write("") 25 | fout.close() 26 | 27 | 28 | ##将数据写入html文件中 29 | def output_html(self,path): 30 | fout = codecs.open(path,'a',encoding='utf-8') 31 | for data in self.datas: 32 | fout.write("") 33 | fout.write(""%data['url']) 34 | fout.write(""%data['title']) 35 | fout.write(""%data['summary']) 36 | fout.write("") 37 | self.datas.remove(data) 38 | fout.close() 39 | 40 | ###输出html结束 41 | def output_end(self,path): 42 | fout = codecs.open(path,'a',encoding='utf-8') 43 | fout.write("
%s%s%s
") 44 | fout.write("") 45 | fout.write("") 46 | -------------------------------------------------------------------------------- /dingding/mappers/Returnvisit.py: -------------------------------------------------------------------------------- 1 | # coding: utf-8 2 | from sqlalchemy import Column, DateTime, String, Text 3 | from sqlalchemy.ext.declarative import declarative_base 4 | 5 | 6 | Base = declarative_base() 7 | metadata = Base.metadata 8 | 9 | 10 | class Returnvisit(Base): 11 | __tablename__ = 'returnvisit' 12 | 13 | approvalNumber = Column(String(255), primary_key=True) 14 | headline = Column(String(255)) 15 | approvalStatus = Column(String(255)) 16 | approvalResult = Column(String(255)) 17 | approvalTime = Column(DateTime) 18 | approvalFinishTime = Column(DateTime) 19 | initiatorsNumber = Column(String(255)) 20 | initiatorsUserID = Column(String(255)) 21 | initiatorsName = Column(String(255)) 22 | initiatorsDepartment = Column(String(255)) 23 | historicalApproverName = Column(String(255)) 24 | approverHistory = Column(Text) 25 | currentProcessingName = Column(String(255)) 26 | reviewTake = Column(String(255)) 27 | 28 | highwaySection = Column(String(255)) 29 | teamName = Column(String(255)) 30 | chargePersonName = Column(String(255)) 31 | customerName = Column(String(255)) 32 | complain = Column(String(255)) 33 | feedBack = Column(String(255)) 34 | faultComplain = Column(String(255)) 35 | dress = Column(String(255)) 36 | speed = Column(String(255)) 37 | ability = Column(String(255)) 38 | attitude = Column(String(255)) 39 | communication = Column(String(255)) 40 | accessory = Column(Text) 41 | -------------------------------------------------------------------------------- /dingding/mappers/FaultHistory.py: -------------------------------------------------------------------------------- 1 | # coding: utf-8 2 | from sqlalchemy import Column, DateTime, String, Text 3 | from sqlalchemy.ext.declarative import declarative_base 4 | 5 | 6 | Base = declarative_base() 7 | metadata = Base.metadata 8 | 9 | 10 | class Faulthistory(Base): 11 | __tablename__ = 'faulthistory' 12 | 13 | approvalNumber = Column(String(255), primary_key=True) 14 | headline = Column(String(255)) 15 | approvalStatus = Column(String(255)) 16 | approvalResult = Column(String(255)) 17 | approvalTime = Column(DateTime) 18 | approvalFinshTime = Column(DateTime) 19 | initiatorsNumber = Column(String(255)) 20 | initiatorsUserID = Column(String(255)) 21 | initiatorsName = Column(String(255)) 22 | initiatorsDepartment = Column(String(255)) 23 | historicalApproverName = Column(Text) 24 | approvalHistory = Column(Text) 25 | currentProcessingName = Column(String(255)) 26 | reviewTake = Column(String(255)) 27 | highwaySection = Column(String(255)) 28 | controalStation = Column(String(255)) 29 | Station = Column(String(255)) 30 | lane = Column(String(255)) 31 | faultType = Column(String(255)) 32 | faultPhenomenon = Column(String(255)) 33 | otherPhenomenon = Column(String(255)) 34 | result = Column(String(255)) 35 | presentTime = Column(String(255)) 36 | presentSite = Column(String(255)) 37 | photo = Column(Text) 38 | photo2 = Column(Text) 39 | photo3 = Column(Text) 40 | photo4 = Column(Text) 41 | -------------------------------------------------------------------------------- /dingding/mappers/ImportantEvent.py: -------------------------------------------------------------------------------- 1 | # coding: utf-8 2 | from sqlalchemy import Column, DateTime, String, Text 3 | from sqlalchemy.ext.declarative import declarative_base 4 | 5 | 6 | Base = declarative_base() 7 | metadata = Base.metadata 8 | 9 | 10 | class Importantevent(Base): 11 | __tablename__ = 'importantevent' 12 | 13 | approvalNumber = Column(String(255), primary_key=True) 14 | headline = Column(String(255)) 15 | approvalStatus = Column(String(255)) 16 | approvalResult = Column(String(255)) 17 | approvalTime = Column(DateTime) 18 | approvalFinishTime = Column(DateTime) 19 | initiatorsNumber = Column(String(255)) 20 | initiatorsUserID = Column(String(255)) 21 | initiatorsName = Column(String(255)) 22 | InitiatorsDepartment = Column(String(255), nullable=False) 23 | historicalApproverName = Column(Text) 24 | approvalHistory = Column(Text) 25 | currentProcessingName = Column(String(255)) 26 | reviewTake = Column(String(255)) 27 | department = Column(String(255)) 28 | highwaySection = Column(String(255)) 29 | eventTime = Column(String(255)) 30 | FinshTime = Column(String(255)) 31 | influenceTime = Column(String(255)) 32 | eventSite = Column(String(255)) 33 | eventType = Column(String(255)) 34 | eventDescription = Column(String(255)) 35 | influence = Column(String(255)) 36 | method = Column(String(255)) 37 | loss = Column(String(255)) 38 | lossCapital = Column(String(255)) 39 | photo = Column(Text) 40 | accessory = Column(String(255)) 41 | -------------------------------------------------------------------------------- /dingding/mappers/InspectionRecord.py: -------------------------------------------------------------------------------- 1 | # coding: utf-8 2 | from sqlalchemy import Column, DateTime, String 3 | from sqlalchemy.ext.declarative import declarative_base 4 | 5 | 6 | Base = declarative_base() 7 | metadata = Base.metadata 8 | 9 | 10 | class Inspectionrecord(Base): 11 | __tablename__ = 'inspectionrecord' 12 | 13 | type = Column(String(255)) 14 | approvalNumber = Column(String(255), primary_key=True) 15 | headline = Column(String(255)) 16 | approvalStatus = Column(String(255)) 17 | approvalResult = Column(String(255)) 18 | approvalTime = Column(DateTime) 19 | approvalFinshTime = Column(DateTime) 20 | initiatorsNumber = Column(String(255)) 21 | initiatorsUserID = Column(String(255)) 22 | initiatorsName = Column(String(255)) 23 | initiatorsDepartment = Column(String(255)) 24 | historicalApproverName = Column(String(255)) 25 | approvalHistory = Column(String(255)) 26 | currentProcessingName = Column(String(255)) 27 | reviewTake = Column(String(255)) 28 | highwaySection = Column(String(255)) 29 | recordType = Column(String(255)) 30 | site = Column(String(255)) 31 | otherSite = Column(String(255)) 32 | temperature = Column(String(255)) 33 | humidness = Column(String(255)) 34 | jobContent = Column(String(255)) 35 | foundFault = Column(String(255)) 36 | presentTime = Column(String(255)) 37 | presentSite = Column(String(255)) 38 | photo = Column(String(255)) 39 | photo2 = Column(String(255)) 40 | photo3 = Column(String(255)) 41 | photo4 = Column(String(255)) 42 | -------------------------------------------------------------------------------- /crawl_fund/htmls/details/001112/30.txt: -------------------------------------------------------------------------------- 1 |
净值日期单位净值累计净值日增长率申购状态赎回状态分红送配
2015-05-221.07701.0770封闭期封闭期
2015-05-151.02101.0210封闭期封闭期
2015-05-081.02201.0220封闭期封闭期
2015-04-301.02101.0210封闭期封闭期
2015-04-241.01101.0110封闭期封闭期
2015-04-171.01501.0150封闭期封闭期
2015-04-101.00901.0090封闭期封闭期
2015-04-071.00001.0000封闭期封闭期
-------------------------------------------------------------------------------- /appdata/userinfo (2).sql: -------------------------------------------------------------------------------- 1 | -- phpMyAdmin SQL Dump 2 | -- version 4.7.4 3 | -- https://www.phpmyadmin.net/ 4 | -- 5 | -- Host: 127.0.0.1 6 | -- Generation Time: 2017-11-18 04:46:36 7 | -- 服务器版本: 10.1.26-MariaDB 8 | -- PHP Version: 7.0.23 9 | 10 | SET SQL_MODE = "NO_AUTO_VALUE_ON_ZERO"; 11 | SET AUTOCOMMIT = 0; 12 | START TRANSACTION; 13 | SET time_zone = "+00:00"; 14 | 15 | 16 | /*!40101 SET @OLD_CHARACTER_SET_CLIENT=@@CHARACTER_SET_CLIENT */; 17 | /*!40101 SET @OLD_CHARACTER_SET_RESULTS=@@CHARACTER_SET_RESULTS */; 18 | /*!40101 SET @OLD_COLLATION_CONNECTION=@@COLLATION_CONNECTION */; 19 | /*!40101 SET NAMES utf8mb4 */; 20 | 21 | -- 22 | -- Database: `appdata` 23 | -- 24 | 25 | -- -------------------------------------------------------- 26 | 27 | -- 28 | -- 表的结构 `userinfo` 29 | -- 30 | 31 | CREATE TABLE `userinfo` ( 32 | `id` int(11) NOT NULL, 33 | `phone` varchar(20) NOT NULL, 34 | `datetime` varchar(20) NOT NULL, 35 | `amount` int(11) NOT NULL, 36 | `num` varchar(50) NOT NULL, 37 | `userid` int(11) NOT NULL, 38 | `name` varchar(10) NOT NULL 39 | ) ENGINE=InnoDB DEFAULT CHARSET=utf8; 40 | 41 | -- 42 | -- Indexes for dumped tables 43 | -- 44 | 45 | -- 46 | -- Indexes for table `userinfo` 47 | -- 48 | ALTER TABLE `userinfo` 49 | ADD PRIMARY KEY (`id`); 50 | 51 | -- 52 | -- 在导出的表使用AUTO_INCREMENT 53 | -- 54 | 55 | -- 56 | -- 使用表AUTO_INCREMENT `userinfo` 57 | -- 58 | ALTER TABLE `userinfo` 59 | MODIFY `id` int(11) NOT NULL AUTO_INCREMENT; 60 | COMMIT; 61 | 62 | /*!40101 SET CHARACTER_SET_CLIENT=@OLD_CHARACTER_SET_CLIENT */; 63 | /*!40101 SET CHARACTER_SET_RESULTS=@OLD_CHARACTER_SET_RESULTS */; 64 | /*!40101 SET COLLATION_CONNECTION=@OLD_COLLATION_CONNECTION */; 65 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | 2 | ## [简单分布式多进程爬虫](https://github.com/shisiying/crawer_python/tree/master/easy_distributed_crawler) 3 | 4 | 简单分布式爬虫项目,该项目,分布式采用简单的主从模式,采用分布式进程和进程间的通信,同时,涵盖了普通爬虫应有的几个模块,URL管理模块,Html解析模块,Html下载模块,数据存储模块,爬虫调度模块 5 | 6 | ## [基金爬虫](https://github.com/shisiying/crawer_python/blob/master/crawl_fund/Spiders/FundDetail.py) 7 | 8 | This is a demo for crawling the website 'http://fund.eastmoney.com/fund.html' 9 | at this demo you can learn how to use the selenium,beautifulsoup,sqlacheme,process,and manager modules 10 | 11 | ## [豆瓣模拟登陆人工打码自动顶贴](https://github.com/shisiying/crawer_python/blob/master/doubandingtie/login_douban.py) 12 | 13 | the robot for the douban comment 14 | 15 | ## [多线程整站下载pdf和dwg文件](https://github.com/shisiying/crawer_python/blob/master/pdfdownload/pdfdown_mutiprocess.py) 16 | 17 | the crawler for the website http://www.jameshardie.co.nz/specifiers/cad-library 18 | 19 | ## [appapi数据获取批量入库](https://github.com/shisiying/crawer_python/blob/master/appdata/crawlinfo.py) 20 | the crawler for the app api 21 | 22 | ## [钉钉数据同步多线程更新入库](https://github.com/shisiying/crawer_python/blob/master/dingding/main.py) 23 | the auto crawler for dingding data 24 | 25 | ## [使用selnium+chrome+asyncio+aiohttp多进程异步抓取今日头条整站数据](https://github.com/shisiying/crawer_python/blob/master/aiohttptoutiao/toutiao.py) 26 | 今日头条整站数据 27 | 28 | ## [使用selnium+chrome抓取淘宝大家问的评论数据](https://github.com/shisiying/crawer_python/tree/master/crawlDajiawen) 29 | 淘宝商品大家问的评论数据 30 | 31 | ## [使用selnium+chrome抓取商品阿里试用报告的数据](https://github.com/shisiying/crawer_python/tree/master/alishiyong) 32 | 阿里试用报告的用户评分及其他数据 33 | 34 | ## [post提交json参数分页抓取区块链交易记录](https://github.com/shisiying/crawer_python/blob/master/ethCrawler/crawler.py) 35 | 稍微改造可以抓取整站需要抓取的交易记录 36 | -------------------------------------------------------------------------------- /dingding/mappers/CostApplication.py: -------------------------------------------------------------------------------- 1 | # coding: utf-8 2 | from sqlalchemy import Column, DateTime, String, Text,Integer 3 | from sqlalchemy.ext.declarative import declarative_base 4 | '''费用申请表''' 5 | 6 | Base = declarative_base() 7 | metadata = Base.metadata 8 | 9 | 10 | class Costapplication(Base): 11 | __tablename__ = 'costapplication' 12 | 13 | costType = Column(String(255)) 14 | approvalNumber = Column(String(255), primary_key=True, nullable=False) 15 | headlin = Column(String(255)) 16 | approvalStatus = Column(String(255)) 17 | approvalResult = Column(String(255)) 18 | approvalTime = Column(DateTime) 19 | approvalFinshTime = Column(DateTime) 20 | initiatorsNumber = Column(String(255)) 21 | initiatorsUserID = Column(String(255)) 22 | initiatorsName = Column(String(255)) 23 | InitiatorsDepartment = Column(String(255)) 24 | historicalApproverName = Column(Text) 25 | approvalHistory = Column(Text) 26 | currentProcessingName = Column(String(255)) 27 | reviewsTake = Column(String(255)) 28 | companyName = Column(String(255)) 29 | highwaySection = Column(String(255)) 30 | type = Column(String(255)) 31 | expensesStatement = Column(Integer, primary_key=True, nullable=False) 32 | projectName = Column(String(255)) 33 | tradeMark = Column(String(255)) 34 | specificationModels = Column(String(255)) 35 | units = Column(String(255)) 36 | amount = Column(String(255)) 37 | unitPrice = Column(String(255)) 38 | totalPrice = Column(String(255)) 39 | stationName = Column(String(255)) 40 | laneNumber = Column(String(255)) 41 | useLocation = Column(String(255)) 42 | remark = Column(String(255)) 43 | photo = Column(Text) 44 | otherAccessory = Column(String(Text)) 45 | applicaionReason = Column(String(255)) 46 | -------------------------------------------------------------------------------- /easy_distributed_crawler/MasterNode/URlManager.py: -------------------------------------------------------------------------------- 1 | # coding:utf-8 2 | import pickle 3 | import hashlib 4 | 5 | class UrlManager(object): 6 | def __init__(self): 7 | ## 未爬取URL集合 8 | self.new_urls = self.load_progress('new_urls.txt') 9 | ## 已经爬取的URl集合 10 | self.old_urls = self.load_progress('old_urls.txt') 11 | 12 | ##判断是否有未爬取的Url 13 | def has_new_url(self): 14 | return self.new_url_size()!=0 15 | 16 | ##获取未爬取URl集合的大小 17 | def new_url_size(self): 18 | return len(self.new_urls) 19 | 20 | ## 获取未爬取的URl 21 | def get_new_url(self): 22 | new_url = self.new_urls.pop() 23 | m = hashlib.md5() 24 | m.update(new_url) 25 | self.old_urls.add(m.hexdigest()[8:-8]) 26 | return new_url 27 | 28 | ## 新的url添加到未爬取的url集合中 29 | def add_new_url(self,url): 30 | if url is None: 31 | return 32 | m = hashlib.md5() 33 | m.update(url) 34 | url_md5 = m.hexdigest()[8:-8] 35 | if url not in self.new_urls and url_md5 not in self.old_urls: 36 | self.new_urls.add(url) 37 | 38 | ##将新的url添加到未爬取的url集合 39 | def add_new_urls(self,urls): 40 | 41 | if urls is None or len(urls)==0: 42 | return 43 | 44 | for url in urls: 45 | self.add_new_url(url) 46 | 47 | ##获取已爬取url集合大小 48 | def old_url_size(self): 49 | return len(self.old_urls) 50 | 51 | ##保存进度 52 | def save_progress(self,path,data): 53 | with open(path,'wb') as f: 54 | pickle.dump(data,f) 55 | 56 | ###从本地文件加载进度 57 | def load_progress(self,path): 58 | print('[+]从文件中加载进度:%s'% path) 59 | try: 60 | with open(path,'rb') as f: 61 | tmp = pickle.load(f) 62 | return tmp 63 | except: 64 | print("【!】无进度文件,创建: %s" % path) 65 | return set() -------------------------------------------------------------------------------- /easy_distributed_crawler/SlaveNode/SlaveWork.py: -------------------------------------------------------------------------------- 1 | from multiprocessing.managers import BaseManager 2 | 3 | from easy_distributed_crawler.SlaveNode.HtmlDownloader import HtmlDownloader 4 | from easy_distributed_crawler.SlaveNode.HtmlParser import HtmlParse 5 | 6 | class SlaveWork(object): 7 | 8 | def __init__(self): 9 | 10 | #初始化分布式进程中的工作节点的链接工作 11 | #实现第一步,使用basemanager注册获取queue的方法名称 12 | BaseManager.register('get_task_queue') 13 | BaseManager.register('get_result_queue') 14 | 15 | ##实现第二步,连接到服务器 16 | server_addr = '127.0.0.1' 17 | # 端口和验证口令注意保持与服务进程设置的完全一致: 18 | self.m = BaseManager(address=(server_addr, 8081), authkey='seven') 19 | # 从网络连接: 20 | self.m.connect() 21 | 22 | ##实现第三步 23 | self.task = self.m.get_task_queue() 24 | self.result = self.m.get_result_queue() 25 | 26 | ##初始化网页下载器和解析器 27 | self.downloader = HtmlDownloader() 28 | self.parser = HtmlParse() 29 | 30 | def crawl(self): 31 | while(True): 32 | try: 33 | if not self.task.empty(): 34 | url = self.task.get() 35 | if url =='end': 36 | print("控制节点通知爬虫节点停止工作") 37 | self.result.put({'new_urls':'end','data':'end'}) 38 | return 39 | print('爬虫节点正在解析:%s' % url.encode('utf-8')) 40 | content = self.downloader.download(url) 41 | new_urls, data = self.parser.parser(url, content) 42 | self.result.put({"new_urls": new_urls, "data": data}) 43 | except EOFError: 44 | print("连接工作节点失败") 45 | return 46 | except Exception: 47 | print('Crawl fali ') 48 | 49 | if __name__=="__main__": 50 | spider = SlaveWork() 51 | spider.crawl() -------------------------------------------------------------------------------- /jingdong/jingdong/middlewares.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | # Define here the models for your spider middleware 4 | # 5 | # See documentation in: 6 | # http://doc.scrapy.org/en/latest/topics/spider-middleware.html 7 | 8 | from scrapy import signals 9 | 10 | 11 | class JingdongSpiderMiddleware(object): 12 | # Not all methods need to be defined. If a method is not defined, 13 | # scrapy acts as if the spider middleware does not modify the 14 | # passed objects. 15 | 16 | @classmethod 17 | def from_crawler(cls, crawler): 18 | # This method is used by Scrapy to create your spiders. 19 | s = cls() 20 | crawler.signals.connect(s.spider_opened, signal=signals.spider_opened) 21 | return s 22 | 23 | def process_spider_input(self, response, spider): 24 | # Called for each response that goes through the spider 25 | # middleware and into the spider. 26 | 27 | # Should return None or raise an exception. 28 | return None 29 | 30 | def process_spider_output(self, response, result, spider): 31 | # Called with the results returned from the Spider, after 32 | # it has processed the response. 33 | 34 | # Must return an iterable of Request, dict or Item objects. 35 | for i in result: 36 | yield i 37 | 38 | def process_spider_exception(self, response, exception, spider): 39 | # Called when a spider or process_spider_input() method 40 | # (from other spider middleware) raises an exception. 41 | 42 | # Should return either None or an iterable of Response, dict 43 | # or Item objects. 44 | pass 45 | 46 | def process_start_requests(self, start_requests, spider): 47 | # Called with the start requests of the spider, and works 48 | # similarly to the process_spider_output() method, except 49 | # that it doesn’t have a response associated. 50 | 51 | # Must return only requests (not items). 52 | for r in start_requests: 53 | yield r 54 | 55 | def spider_opened(self, spider): 56 | spider.logger.info('Spider opened: %s' % spider.name) 57 | -------------------------------------------------------------------------------- /miaosha/miaosha/middlewares.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | # Define here the models for your spider middleware 4 | # 5 | # See documentation in: 6 | # http://doc.scrapy.org/en/latest/topics/spider-middleware.html 7 | 8 | from scrapy import signals 9 | 10 | 11 | class MiaoshaSpiderMiddleware(object): 12 | # Not all methods need to be defined. If a method is not defined, 13 | # scrapy acts as if the spider middleware does not modify the 14 | # passed objects. 15 | 16 | @classmethod 17 | def from_crawler(cls, crawler): 18 | # This method is used by Scrapy to create your spiders. 19 | s = cls() 20 | crawler.signals.connect(s.spider_opened, signal=signals.spider_opened) 21 | return s 22 | 23 | def process_spider_input(self, response, spider): 24 | # Called for each response that goes through the spider 25 | # middleware and into the spider. 26 | 27 | # Should return None or raise an exception. 28 | return None 29 | 30 | def process_spider_output(self, response, result, spider): 31 | # Called with the results returned from the Spider, after 32 | # it has processed the response. 33 | 34 | # Must return an iterable of Request, dict or Item objects. 35 | for i in result: 36 | yield i 37 | 38 | def process_spider_exception(self, response, exception, spider): 39 | # Called when a spider or process_spider_input() method 40 | # (from other spider middleware) raises an exception. 41 | 42 | # Should return either None or an iterable of Response, dict 43 | # or Item objects. 44 | pass 45 | 46 | def process_start_requests(self, start_requests, spider): 47 | # Called with the start requests of the spider, and works 48 | # similarly to the process_spider_output() method, except 49 | # that it doesn’t have a response associated. 50 | 51 | # Must return only requests (not items). 52 | for r in start_requests: 53 | yield r 54 | 55 | def spider_opened(self, spider): 56 | spider.logger.info('Spider opened: %s' % spider.name) 57 | -------------------------------------------------------------------------------- /1024/down.py: -------------------------------------------------------------------------------- 1 | import requests 2 | import os 3 | from bs4 import BeautifulSoup 4 | from multiprocessing import Process 5 | import sys 6 | import time 7 | sys.setrecursionlimit(1000000) #例如这里设置为一百万 8 | 9 | url = [ 10 | 'https://ns.postcc.us/htm_data/8/1711/2813398.html' 11 | ] 12 | 13 | def getImageUrl(url): 14 | header = { 15 | 'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/35.0.1916.114 Safari/537.36', 16 | } 17 | time.sleep(2) 18 | html = requests.get(url,headers=header) 19 | html.encoding = 'gbk' 20 | Soup = BeautifulSoup(html.text,'lxml') 21 | title = Soup.title.get_text().split('-')[0].split(' ')[0] 22 | imgsrc = Soup.select('input[type="image"]') 23 | return {'title':title,'imgsrcs':imgsrc} 24 | 25 | def downloadImg(imageLists,title,range_list): 26 | 27 | header = { 28 | 'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/35.0.1916.114 Safari/537.36', 29 | } 30 | if not os.path.exists('img/'+title): 31 | os.makedirs('img/'+title) 32 | print("创建文件夹--{}成功".format(title)) 33 | for imglist in range_list: 34 | try: 35 | img = imageLists[imglist] 36 | except: 37 | pass 38 | print("正在下载图片"+str(imglist)) 39 | img_response = requests.get(img.get('src'), stream=True, headers=header) 40 | with open('img/'+title + '/' + str(imglist) + '.jpg', 'wb') as img_file: 41 | img_file.write(img_response.content) 42 | print("下载图片" + str(imglist)+'成功') 43 | 44 | def run(imageLists): 45 | blocks = range(1, len(imageLists['imgsrcs']) + 1) 46 | step = 10 47 | ##将数据分段,实行多线程下载 48 | range_lists = [blocks[x:x + step] for x in range(0, len(blocks), step)] 49 | processlist = [] 50 | for range_list in range_lists: 51 | p = Process(target=downloadImg, args=(imageLists['imgsrcs'],imageLists['title'], range_list)) 52 | processlist.append(p) 53 | for ps in processlist: 54 | ps.start() 55 | 56 | if __name__ == '__main__': 57 | for ll in url: 58 | imglist = getImageUrl(ll) 59 | run(imglist) 60 | -------------------------------------------------------------------------------- /dingding/main.py: -------------------------------------------------------------------------------- 1 | from dingapi import writeAccessToken,getAccessToken,insertIntoCostapplication,insertCarCostHistory,insertComplainRocord,insertDailyWorkReport,insertFaultHistory,insertImportantEvent,insertServerCheck,inspectionRecord,insertReturnvisit 2 | from common.config import corpid,corpsecret 3 | from dingapi import sendMessage 4 | 5 | import time 6 | import threading 7 | import datetime 8 | 9 | if __name__ == '__main__': 10 | 11 | 12 | end_time = datetime.datetime.now() 13 | d1 = end_time 14 | end_time = time.mktime(end_time.timetuple())*1000 15 | start_time = d1 - datetime.timedelta(days=60) 16 | start_time = time.mktime(start_time.timetuple())*1000 17 | 18 | start_time = str(start_time).split('.')[0] 19 | end_time = str(end_time).split('.')[0] 20 | # sendMessage('开始爬虫','爬取爬取两个月以来的数据') 21 | 22 | AccessToken = getAccessToken() 23 | 24 | #多綫程執行不同的入庫 25 | threads = [] 26 | t1 = threading.Thread(target=insertIntoCostapplication, args=(start_time, end_time, AccessToken)) 27 | threads.append(t1) 28 | # t2 = threading.Thread(target=insertCarCostHistory, args=(start_time, end_time, AccessToken)) 29 | # threads.append(t2) 30 | # t3 = threading.Thread(target=insertComplainRocord, args=(start_time, end_time, AccessToken)) 31 | # threads.append(t3) 32 | # t4 = threading.Thread(target=insertDailyWorkReport, args=(start_time, end_time, AccessToken)) 33 | # threads.append(t4) 34 | # t5 = threading.Thread(target=insertFaultHistory, args=(start_time, end_time, AccessToken)) 35 | # threads.append(t5) 36 | # t6 = threading.Thread(target=insertImportantEvent, args=(start_time, end_time, AccessToken)) 37 | # threads.append(t6) 38 | # t7 = threading.Thread(target=insertServerCheck, args=(start_time, end_time, AccessToken)) 39 | # threads.append(t7) 40 | # t8 = threading.Thread(target=inspectionRecord, args=(start_time, end_time, AccessToken)) 41 | # threads.append(t8) 42 | # t9 = threading.Thread(target=insertReturnvisit, args=(start_time, end_time, AccessToken)) 43 | # threads.append(t9) 44 | for t in threads: 45 | t.start() 46 | for t in threads: 47 | t.join() 48 | 49 | # sendMessage('爬虫结束', '两个月的数据已经更新完毕') 50 | 51 | print('Done!') -------------------------------------------------------------------------------- /dingding/common/functions.py: -------------------------------------------------------------------------------- 1 | from datetime import datetime 2 | import re 3 | 4 | def duration(start_time,end_time): 5 | a = datetime.strptime(str(start_time), "%Y-%m-%d %H:%M:%S") 6 | b = datetime.strptime(str(end_time), "%Y-%m-%d %H:%M:%S") 7 | return (b-a).days 8 | 9 | def getName(title): 10 | try: 11 | name = re.search('(\w+)的', title).group(1) 12 | except: 13 | name =None 14 | return name 15 | 16 | def getAccsory(data): 17 | if 'value' in data: 18 | res = data['value'] 19 | else: 20 | res = None 21 | return res 22 | 23 | def getProjectName(data): 24 | if data['label'] == '项目名称': 25 | return data['value'] 26 | elif data['label']=='设备名称': 27 | return data['value'] 28 | else: 29 | return None 30 | 31 | def getTradeMark(data): 32 | if data['label'] == '品牌': 33 | return data['value'] 34 | else: 35 | return None 36 | 37 | def getSpecificationModels(data): 38 | if data['label'] == '规格/型号': 39 | return data['value'] 40 | else: 41 | return None 42 | 43 | def getUnits(data): 44 | if data['label'] == '单位': 45 | return data['value'] 46 | else: 47 | return None 48 | 49 | def getAmount(data): 50 | if data['label'] == '数量': 51 | return data['value'] 52 | else: 53 | return None 54 | 55 | def getUnitPrice(data): 56 | if data['label'] == '单价(元)': 57 | return data['value'] 58 | else: 59 | return None 60 | 61 | def getTotalPrice(data): 62 | if data['label'] == '合计金额': 63 | return data['value'] 64 | else: 65 | return None 66 | 67 | def getStationName(data): 68 | if data['label'] == '站名': 69 | return data['value'] 70 | else: 71 | return None 72 | 73 | def getLaneNumber(data): 74 | if data['label'] == '车道号': 75 | return data['value'] 76 | else: 77 | return None 78 | 79 | def getUseLocation(data): 80 | if data['label'] == '使用位置': 81 | return data['value'] 82 | else: 83 | return None 84 | 85 | def getRemark(data): 86 | if data['label'] == '备注': 87 | return data['value'] 88 | else: 89 | return None 90 | 91 | def getPhoto(data): 92 | if data['label'] == '报送照片': 93 | return data['value'] 94 | else: 95 | return None 96 | -------------------------------------------------------------------------------- /appdata/crawlinfo.py: -------------------------------------------------------------------------------- 1 | # _*_ coding: utf-8 _*_ 2 | __author__ = 'seven' 3 | __date__ = '2017/11/17 20:51' 4 | 5 | import json 6 | import time 7 | import requests 8 | from sqlalchemy import create_engine 9 | from sqlalchemy.orm import sessionmaker 10 | from appdata.common.User import Userinfo 11 | 12 | 13 | ##target url 14 | userinfourl = 'http://api.renrengyw.com/Api/Userv9/recomLog' 15 | 16 | ##your head 17 | heads = { 18 | 'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/60.0.3112.113 Safari/537.36', 19 | } 20 | ## database connect 21 | dburl="mysql+pymysql://root:[密码]@localhost/[数据库名字]?charset=utf8" 22 | engine = create_engine(dburl,echo=True) 23 | mysession = sessionmaker(bind=engine)() 24 | 25 | ##获取数据 26 | def getdatafromuser(heads,userid,page): 27 | 28 | taget_url = userinfourl+"?p={}&logintype=1&userid={}".format(page,userid) 29 | header = { 30 | 'User-Agent':heads['User-Agent'], 31 | } 32 | response = requests.get(url=taget_url,headers=header) 33 | return response.json() 34 | 35 | ##批量插入数据库 36 | def insertdata(user_data,userid): 37 | datalist = [] 38 | for user_data in user_data: 39 | user = Userinfo(phone=user_data['phone'],datetime=user_data['datetime'],amount=user_data['amount'],num=user_data['num'],userid=userid,name=user_data['name']) 40 | ##构造数据库实体化对象列表,方便批量插入 41 | datalist.append(user) 42 | # 批量插入 43 | mysession.add_all(datalist) # 批量新增 44 | mysession.commit() 45 | mysession.close() 46 | 47 | if __name__ == '__main__': 48 | ##genrate userid 49 | for userid in range(1,200000): 50 | print('userid') 51 | print(userid) 52 | ##set True 53 | flag = True 54 | ##set page =1 55 | page = 1 56 | ##genrate 57 | while flag: 58 | print('page:') 59 | print(page) 60 | return_data = getdatafromuser(heads=heads,userid=userid,page=page) 61 | ##data is empty,set page flag False 62 | if len(return_data['result']['list'])==0: 63 | flag = False 64 | else: 65 | ##page+1 66 | page =page +1 67 | ##数据批量入库 68 | insertdata(return_data['result']['list'],userid) 69 | ##延时3秒 70 | time.sleep(3) 71 | 72 | 73 | 74 | -------------------------------------------------------------------------------- /ethCrawler/crawler.py: -------------------------------------------------------------------------------- 1 | import requests 2 | import json 3 | from sqlalchemy import create_engine 4 | from sqlalchemy.orm import sessionmaker 5 | from TradeList import tradelist 6 | 7 | import re 8 | 9 | dburl="mysql+pymysql://root:123@localhost/eth?charset=utf8" 10 | 11 | 12 | ##初始化数据库 13 | engine = create_engine(dburl, echo=True) 14 | 15 | def sendPost(page): 16 | 17 | payload ={'address':'0xc38e2669cc249748eab2c86e9e371481a1919293','currency':'ETH','page':page,'pageSize':20} 18 | headers = { 19 | 'Host': 'scan-api.spancer.cn', 20 | 'Accept': 'application/json, text/plain, */*', 21 | 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.84 Safari/537.36C', 22 | 'Content-Type': 'application/json;charset=UTF-8', 23 | 'Accept-Language':'zh-CN,zh;q=0.9', 24 | 'Accept-Encoding':'gzip, deflate', 25 | 'Origin':'http://www.qukuai.com', 26 | } 27 | r = requests.post('http://scan-api.spancer.cn//v1/address/getInfo',data=json.dumps(payload),headers=headers) 28 | result = json.loads(r.text) 29 | 30 | if result['code']==200 and len(result['data']['tradeList']): 31 | return result['data']['tradeList'] 32 | else: 33 | return None 34 | def saveData(data): 35 | ##批量插入 36 | ##初始化数据库连接 37 | mysession = sessionmaker(bind=engine)() 38 | dataList = [] 39 | tradeRow={} 40 | for item in data: 41 | 42 | ###判断交易哈希是否存在 43 | res = mysession.query(tradelist).filter_by( 44 | txHash=item['txHash']).all() 45 | if len(res) != 0: 46 | continue 47 | if int(item['confirmCount'])>5 and float(re.split('[+-]',item['amount'])[1])>0: 48 | tradeRow['txHash'] = item['txHash'] 49 | tradeRow['blockHeight'] = item['blockHeight'] 50 | tradeRow['amount'] = item['amount'] 51 | tradeRow['confirmTime'] = item['confirmTime'] 52 | tradeRow['originatorAdress'] = item['inList'][0]['address'] 53 | tradeRow['recevierAdress'] = item['outList'][0]['address'] 54 | tradeRow['brokerage'] = item['brokerage'] 55 | treadList = tradelist(**tradeRow) 56 | dataList.append(treadList) 57 | 58 | mysession.add_all(dataList) # 批量新增 59 | mysession.commit() 60 | mysession.close() 61 | 62 | 63 | def main(): 64 | for page in range(1,9999): 65 | data = sendPost(page) 66 | if data!=None: 67 | saveData(data) 68 | else: 69 | exit() 70 | 71 | main() -------------------------------------------------------------------------------- /pdfdownload/pdfdown.py: -------------------------------------------------------------------------------- 1 | __author__ = 'seven' 2 | import requests 3 | import codecs 4 | import json 5 | import os 6 | import re 7 | ''' 8 | 单进程下载 9 | ''' 10 | header = { 11 | 'Referer': 'http://www.jameshardie.co.nz/specifiers/cad-library', 12 | 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/61.0.3163.100 Safari/537.36' 13 | } 14 | 15 | ##将要获取的数据源写入文件当中 16 | def writedatatojson(): 17 | data_response = requests.get('http://cdnmaster.smartb.im/staging/td/jh/scripts/databoom.js',headers = header) 18 | datatable = re.search('my.dt=(.)*;my',data_response.text).group(0).split(';my')[0] 19 | datatable = json.loads(datatable[7:]) 20 | with codecs.open('data.json', 'w') as file: 21 | file.write(json.dumps(datatable)) 22 | file.close() 23 | ##从文件当中获取json数据 24 | def getdatafromjson(): 25 | with open('data.json') as json_file: 26 | data = json.load(json_file) 27 | return data 28 | ##下载文件 29 | def download(category,file_name,pdf_url,dwg_url,gif_url): 30 | #新建文件夹 31 | if not os.path.exists(category): 32 | os.makedirs(category) 33 | print("创建文件夹成功") 34 | #下载pdf 35 | print("正在下载pdf") 36 | pdf_response = requests.get(pdf_url,stream=True,headers = header) 37 | with open(category+'/'+file_name+'.pdf','wb') as pdf_file: 38 | pdf_file.write(pdf_response.content) 39 | print("pdf下载完成") 40 | 41 | print("正在下载dwg") 42 | dwg_response = requests.get(dwg_url,stream=True,headers = header) 43 | with open(category+'/'+file_name + '.dwg', 'wb') as dwg_file: 44 | dwg_file.write(dwg_response.content) 45 | print("dwg下载完成") 46 | 47 | print("正在下载gif") 48 | gif_response = requests.get(gif_url,stream=True,headers = header) 49 | with open(category+'/'+file_name + '.gif', 'wb') as gif_file: 50 | gif_file.write(gif_response.content) 51 | print("gif下载完成") 52 | 53 | if __name__ == '__main__': 54 | baseurl = 'http://cdnmaster.smartb.im/staging/td/jh/cadbim/' 55 | current_dir = os.getcwd() 56 | writedatatojson() 57 | datas = getdatafromjson() 58 | for data in datas[1:]: 59 | os.chdir(os.path.join(current_dir)) 60 | category = data[-4] 61 | file_name = str(data[-1]).replace(category+'/','') 62 | down_url =str(data[-1]) 63 | #pdf 下载链接 64 | pdf_url = '%s%s.pdf'%(baseurl,'pdf/'+down_url) 65 | #dwg下载链接 66 | dwg_url = '%s%s.dwg'%(baseurl,'dwg/'+down_url) 67 | #gif下载链接 68 | gif_url = '%s%s.gif'%(baseurl,'thumbs/'+down_url) 69 | download(category,file_name,pdf_url,dwg_url,gif_url) 70 | 71 | 72 | -------------------------------------------------------------------------------- /ethCrawler/etherEumCrawler.py: -------------------------------------------------------------------------------- 1 | import requests 2 | from bs4 import BeautifulSoup 3 | import re 4 | import logging 5 | import datetime 6 | 7 | from sqlalchemy import create_engine 8 | from sqlalchemy.orm import sessionmaker 9 | from EthTradeList import Etherscantradelist 10 | 11 | dburl="mysql+pymysql://root:hello2016@localhost/eth?charset=utf8" 12 | 13 | 14 | ##初始化数据库 15 | engine = create_engine(dburl, echo=True) 16 | mysession = sessionmaker(bind=engine)() 17 | 18 | logger = logging.getLogger(__name__) 19 | logger.setLevel(level = logging.INFO) 20 | handler = logging.FileHandler("log.txt") 21 | handler.setLevel(logging.INFO) 22 | formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s') 23 | handler.setFormatter(formatter) 24 | logger.addHandler(handler) 25 | 26 | token ='0x0d0707963952f2fba59dd06f2b425ace40b492fe' 27 | page =1 28 | 29 | def sendRequest(token,page): 30 | targetUrl = 'https://etherscan.io/tokentxns?a={token}&ps=100&p={page}'.format(token=token,page=page) 31 | headers ={ 32 | 'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.84 Safari/537.36', 33 | 'Accept':'*/*' 34 | } 35 | r= requests.get(targetUrl,headers=headers) 36 | if r.status_code==200: 37 | soup = BeautifulSoup(r.content,'lxml') 38 | return soup.find(name='table',class_='table table-hover') 39 | else: 40 | logger.info(targetUrl+' reposne is not 200') 41 | return False 42 | 43 | def parseHtml(htmlData): 44 | 45 | 46 | if htmlData!=None: 47 | tradeRow = {} 48 | dataList = [] 49 | 50 | for datatr in htmlData.find_all(name='tr')[1:]: 51 | tdRow = datatr.find_all(name='td') 52 | 53 | ###判断交易哈希是否存在 54 | res = mysession.query(Etherscantradelist).filter_by( 55 | txHash=tdRow[0].get_text().strip()).all() 56 | if len(res) != 0: 57 | continue 58 | 59 | tradeRow['txHash'] = tdRow[0].get_text().strip() 60 | tradeRow['age'] = datetime.datetime.strptime(tdRow[1].span['title'].strip(),'%b-%d-%Y %I:%M:%S %p') 61 | tradeRow['fromadress'] = tdRow[2].get_text().strip() 62 | tradeRow['to'] = tdRow[4].get_text().strip() 63 | tradeRow['value'] = tdRow[5].get_text().replace(',','').strip() 64 | tradeRow['token'] = re.match('/token/(.+)\?',tdRow[6].a['href']).group(1).strip() 65 | tradeRow['name'] = str(tdRow[6].get_text()).lower().strip() 66 | treadList = Etherscantradelist(**tradeRow) 67 | dataList.append(treadList) 68 | return dataList 69 | 70 | 71 | def saveToDataBase(dataModel): 72 | mysession.add_all(dataModel) # 批量新增 73 | mysession.commit() 74 | mysession.close() 75 | 76 | def main(): 77 | pages = 1000 78 | for page in range(1,pages+1): 79 | saveToDataBase(parseHtml(sendRequest(token,page))) 80 | 81 | if __name__ == '__main__': 82 | main() 83 | -------------------------------------------------------------------------------- /pdfdownload/pdfdown_mutiprocess.py: -------------------------------------------------------------------------------- 1 | __author__ = 'seven' 2 | import requests 3 | import codecs 4 | import json 5 | import os 6 | import re 7 | from multiprocessing import Process 8 | 9 | ''' 10 | 分割任务多进程程下载 11 | ''' 12 | header = { 13 | 'Referer': 'http://www.jameshardie.co.nz/specifiers/cad-library', 14 | 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/61.0.3163.100 Safari/537.36' 15 | } 16 | baseurl = 'http://cdnmaster.smartb.im/staging/td/jh/cadbim/' 17 | 18 | ##将要获取的数据源写入文件当中 19 | def writedatatojson(): 20 | data_response = requests.get('http://cdnmaster.smartb.im/staging/td/jh/scripts/databoom.js',headers = header) 21 | datatable = re.search('my.dt=(.)*;my',data_response.text).group(0).split(';my')[0] 22 | datatable = json.loads(datatable[7:]) 23 | with codecs.open('data.json', 'w') as file: 24 | file.write(json.dumps(datatable)) 25 | file.close() 26 | ##从文件当中获取json数据 27 | def getdatafromjson(): 28 | with open('data.json') as json_file: 29 | data = json.load(json_file) 30 | return data 31 | ##下载文件 32 | def download(datas,myrange): 33 | for order in myrange: 34 | data = datas[order] 35 | category = data[-4] 36 | file_name = str(data[-1]).replace(category + '/', '') 37 | down_url = str(data[-1]) 38 | # pdf 下载链接 39 | pdf_url = '%s%s.pdf' % (baseurl, 'pdf/' + down_url) 40 | # dwg下载链接 41 | dwg_url = '%s%s.dwg' % (baseurl, 'dwg/' + down_url) 42 | # gif下载链接 43 | gif_url = '%s%s.gif' % (baseurl, 'thumbs/' + down_url) 44 | #新建文件夹 45 | if not os.path.exists(category): 46 | os.makedirs(category) 47 | print("创建文件夹成功") 48 | #下载pdf 49 | print("正在下载pdf") 50 | pdf_response = requests.get(pdf_url,stream=True,headers = header) 51 | with open(category+'/'+file_name+'.pdf','wb') as pdf_file: 52 | pdf_file.write(pdf_response.content) 53 | print("pdf下载完成") 54 | 55 | print("正在下载dwg") 56 | dwg_response = requests.get(dwg_url,stream=True,headers = header) 57 | with open(category+'/'+file_name + '.dwg', 'wb') as dwg_file: 58 | dwg_file.write(dwg_response.content) 59 | print("dwg下载完成") 60 | 61 | print("正在下载gif") 62 | gif_response = requests.get(gif_url,stream=True,headers = header) 63 | with open(category+'/'+file_name + '.gif', 'wb') as gif_file: 64 | gif_file.write(gif_response.content) 65 | print("gif下载完成") 66 | 67 | if __name__ == '__main__': 68 | writedatatojson() 69 | step = 100 70 | datas = getdatafromjson() 71 | blocks = range(1,len(datas)+1) 72 | ##将数据分段,实行多线程下载 73 | range_lists = [blocks[x:x + step] for x in range(0, len(blocks), step)] 74 | processlist = [] 75 | 76 | for range_list in range_lists: 77 | p = Process(target=download,args=(datas,range_list)) 78 | processlist.append(p) 79 | for p in processlist: 80 | p.start() 81 | 82 | 83 | 84 | 85 | -------------------------------------------------------------------------------- /miaosha/miaosha/settings.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | # Scrapy settings for miaosha project 4 | # 5 | # For simplicity, this file contains only settings considered important or 6 | # commonly used. You can find more settings consulting the documentation: 7 | # 8 | # http://doc.scrapy.org/en/latest/topics/settings.html 9 | # http://scrapy.readthedocs.org/en/latest/topics/downloader-middleware.html 10 | # http://scrapy.readthedocs.org/en/latest/topics/spider-middleware.html 11 | 12 | BOT_NAME = 'miaosha' 13 | 14 | SPIDER_MODULES = ['miaosha.spiders'] 15 | NEWSPIDER_MODULE = 'miaosha.spiders' 16 | 17 | 18 | # Crawl responsibly by identifying yourself (and your website) on the user-agent 19 | #USER_AGENT = 'miaosha (+http://www.yourdomain.com)' 20 | 21 | # Obey robots.txt rules 22 | ROBOTSTXT_OBEY = True 23 | 24 | # Configure maximum concurrent requests performed by Scrapy (default: 16) 25 | #CONCURRENT_REQUESTS = 32 26 | 27 | # Configure a delay for requests for the same website (default: 0) 28 | # See http://scrapy.readthedocs.org/en/latest/topics/settings.html#download-delay 29 | # See also autothrottle settings and docs 30 | #DOWNLOAD_DELAY = 3 31 | # The download delay setting will honor only one of: 32 | #CONCURRENT_REQUESTS_PER_DOMAIN = 16 33 | #CONCURRENT_REQUESTS_PER_IP = 16 34 | 35 | # Disable cookies (enabled by default) 36 | #COOKIES_ENABLED = False 37 | 38 | # Disable Telnet Console (enabled by default) 39 | #TELNETCONSOLE_ENABLED = False 40 | 41 | # Override the default request headers: 42 | #DEFAULT_REQUEST_HEADERS = { 43 | # 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8', 44 | # 'Accept-Language': 'en', 45 | #} 46 | 47 | # Enable or disable spider middlewares 48 | # See http://scrapy.readthedocs.org/en/latest/topics/spider-middleware.html 49 | #SPIDER_MIDDLEWARES = { 50 | # 'miaosha.middlewares.MiaoshaSpiderMiddleware': 543, 51 | #} 52 | 53 | # Enable or disable downloader middlewares 54 | # See http://scrapy.readthedocs.org/en/latest/topics/downloader-middleware.html 55 | #DOWNLOADER_MIDDLEWARES = { 56 | # 'miaosha.middlewares.MyCustomDownloaderMiddleware': 543, 57 | #} 58 | 59 | # Enable or disable extensions 60 | # See http://scrapy.readthedocs.org/en/latest/topics/extensions.html 61 | #EXTENSIONS = { 62 | # 'scrapy.extensions.telnet.TelnetConsole': None, 63 | #} 64 | 65 | # Configure item pipelines 66 | # See http://scrapy.readthedocs.org/en/latest/topics/item-pipeline.html 67 | #ITEM_PIPELINES = { 68 | # 'miaosha.pipelines.MiaoshaPipeline': 300, 69 | #} 70 | 71 | # Enable and configure the AutoThrottle extension (disabled by default) 72 | # See http://doc.scrapy.org/en/latest/topics/autothrottle.html 73 | #AUTOTHROTTLE_ENABLED = True 74 | # The initial download delay 75 | #AUTOTHROTTLE_START_DELAY = 5 76 | # The maximum download delay to be set in case of high latencies 77 | #AUTOTHROTTLE_MAX_DELAY = 60 78 | # The average number of requests Scrapy should be sending in parallel to 79 | # each remote server 80 | #AUTOTHROTTLE_TARGET_CONCURRENCY = 1.0 81 | # Enable showing throttling stats for every response received: 82 | #AUTOTHROTTLE_DEBUG = False 83 | 84 | # Enable and configure HTTP caching (disabled by default) 85 | # See http://scrapy.readthedocs.org/en/latest/topics/downloader-middleware.html#httpcache-middleware-settings 86 | #HTTPCACHE_ENABLED = True 87 | #HTTPCACHE_EXPIRATION_SECS = 0 88 | #HTTPCACHE_DIR = 'httpcache' 89 | #HTTPCACHE_IGNORE_HTTP_CODES = [] 90 | #HTTPCACHE_STORAGE = 'scrapy.extensions.httpcache.FilesystemCacheStorage' 91 | -------------------------------------------------------------------------------- /jingdong/jingdong/settings.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | # Scrapy settings for jingdong project 4 | # 5 | # For simplicity, this file contains only settings considered important or 6 | # commonly used. You can find more settings consulting the documentation: 7 | # 8 | # http://doc.scrapy.org/en/latest/topics/settings.html 9 | # http://scrapy.readthedocs.org/en/latest/topics/downloader-middleware.html 10 | # http://scrapy.readthedocs.org/en/latest/topics/spider-middleware.html 11 | 12 | BOT_NAME = 'jingdong' 13 | 14 | SPIDER_MODULES = ['jingdong.spiders'] 15 | NEWSPIDER_MODULE = 'jingdong.spiders' 16 | 17 | 18 | # Crawl responsibly by identifying yourself (and your website) on the user-agent 19 | #USER_AGENT = 'jingdong (+http://www.yourdomain.com)' 20 | 21 | # Obey robots.txt rules 22 | ROBOTSTXT_OBEY = True 23 | 24 | # Configure maximum concurrent requests performed by Scrapy (default: 16) 25 | #CONCURRENT_REQUESTS = 32 26 | 27 | # Configure a delay for requests for the same website (default: 0) 28 | # See http://scrapy.readthedocs.org/en/latest/topics/settings.html#download-delay 29 | # See also autothrottle settings and docs 30 | #DOWNLOAD_DELAY = 3 31 | # The download delay setting will honor only one of: 32 | #CONCURRENT_REQUESTS_PER_DOMAIN = 16 33 | #CONCURRENT_REQUESTS_PER_IP = 16 34 | 35 | # Disable cookies (enabled by default) 36 | #COOKIES_ENABLED = False 37 | 38 | # Disable Telnet Console (enabled by default) 39 | #TELNETCONSOLE_ENABLED = False 40 | 41 | # Override the default request headers: 42 | #DEFAULT_REQUEST_HEADERS = { 43 | # 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8', 44 | # 'Accept-Language': 'en', 45 | #} 46 | 47 | # Enable or disable spider middlewares 48 | # See http://scrapy.readthedocs.org/en/latest/topics/spider-middleware.html 49 | #SPIDER_MIDDLEWARES = { 50 | # 'jingdong.middlewares.JingdongSpiderMiddleware': 543, 51 | #} 52 | 53 | # Enable or disable downloader middlewares 54 | # See http://scrapy.readthedocs.org/en/latest/topics/downloader-middleware.html 55 | #DOWNLOADER_MIDDLEWARES = { 56 | # 'jingdong.middlewares.MyCustomDownloaderMiddleware': 543, 57 | #} 58 | 59 | # Enable or disable extensions 60 | # See http://scrapy.readthedocs.org/en/latest/topics/extensions.html 61 | #EXTENSIONS = { 62 | # 'scrapy.extensions.telnet.TelnetConsole': None, 63 | #} 64 | 65 | # Configure item pipelines 66 | # See http://scrapy.readthedocs.org/en/latest/topics/item-pipeline.html 67 | #ITEM_PIPELINES = { 68 | # 'jingdong.pipelines.JingdongPipeline': 300, 69 | #} 70 | 71 | # Enable and configure the AutoThrottle extension (disabled by default) 72 | # See http://doc.scrapy.org/en/latest/topics/autothrottle.html 73 | #AUTOTHROTTLE_ENABLED = True 74 | # The initial download delay 75 | #AUTOTHROTTLE_START_DELAY = 5 76 | # The maximum download delay to be set in case of high latencies 77 | #AUTOTHROTTLE_MAX_DELAY = 60 78 | # The average number of requests Scrapy should be sending in parallel to 79 | # each remote server 80 | #AUTOTHROTTLE_TARGET_CONCURRENCY = 1.0 81 | # Enable showing throttling stats for every response received: 82 | #AUTOTHROTTLE_DEBUG = False 83 | 84 | # Enable and configure HTTP caching (disabled by default) 85 | # See http://scrapy.readthedocs.org/en/latest/topics/downloader-middleware.html#httpcache-middleware-settings 86 | #HTTPCACHE_ENABLED = True 87 | #HTTPCACHE_EXPIRATION_SECS = 0 88 | #HTTPCACHE_DIR = 'httpcache' 89 | #HTTPCACHE_IGNORE_HTTP_CODES = [] 90 | #HTTPCACHE_STORAGE = 'scrapy.extensions.httpcache.FilesystemCacheStorage' 91 | -------------------------------------------------------------------------------- /crawl_fund/htmls/details/580005/101.txt: -------------------------------------------------------------------------------- 1 |
净值日期单位净值累计净值日增长率申购状态赎回状态分红送配
2009-07-221.07951.07950.77%开放申购封闭期
2009-07-211.07131.0713-0.89%开放申购封闭期
2009-07-201.08091.08091.02%开放申购封闭期
2009-07-171.07001.07000.40%开放申购封闭期
2009-07-161.06571.06570.13%开放申购封闭期
2009-07-151.06431.06430.16%开放申购封闭期
2009-07-141.06261.06260.31%封闭期封闭期
2009-07-101.05931.05930.16%封闭期封闭期
2009-07-031.05761.05764.26%封闭期封闭期
2009-06-301.01441.01440.90%封闭期封闭期
2009-06-261.00541.00540.54%封闭期封闭期
2009-06-191.00001.00001.72%封闭期封闭期
2009-06-120.98310.9831-1.00%封闭期封闭期
2009-06-050.99300.99300.00%封闭期封闭期
2009-05-270.99300.9930-0.18%封闭期封闭期
2009-05-220.99480.9948-0.50%封闭期封闭期
2009-05-150.99980.9998-0.02%封闭期封闭期
2009-05-081.00001.00000.00%封闭期封闭期
2009-05-061.00001.0000封闭期封闭期
-------------------------------------------------------------------------------- /crawlDajiawen/spider.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | from selenium import webdriver 3 | from selenium.webdriver.chrome.options import Options 4 | from bs4 import BeautifulSoup 5 | import time 6 | import csv 7 | import sys 8 | import io 9 | sys.stdout = io.TextIOWrapper(sys.stdout.buffer,encoding='gb18030') #改变标准输出的默认编码 10 | 11 | 12 | def getTopGoodsId(url): 13 | goodIdList = [] 14 | driver = configure_driver() 15 | driver.get(url) 16 | goodInfo = BeautifulSoup(driver.page_source, 'lxml').find_all('a',class_='pic-link J_ClickStat J_ItemPicA') 17 | for goodRow in goodInfo: 18 | with open('goodid.txt', 'a') as infile: 19 | infile.write(goodRow['data-nid']+'\n') 20 | infile.close() 21 | 22 | ## 翻页 23 | nextpage = driver.find_element_by_css_selector('a[trace="srp_bottom_page2"]') 24 | nextpage.click() 25 | time.sleep(2) 26 | 27 | goodInfoPage = BeautifulSoup(driver.page_source, 'lxml').find_all('a',class_='pic-link J_ClickStat J_ItemPicA') 28 | for goodRowpage in goodInfoPage[:6]: 29 | with open('gooid.txt', 'a') as infile: 30 | infile.write(goodRowpage['data-nid']+'\n') 31 | infile.close() 32 | 33 | def makeDjiawenUrl(goodId): 34 | dajiwenUrl = 'https://h5.m.taobao.com/wendajia/question2017.html?refId={}'.format(goodId) 35 | return dajiwenUrl 36 | 37 | def configure_driver(): 38 | opts = Options() 39 | opts.add_argument('--headless') 40 | prefs = {"profile.managed_default_content_settings.images": 2} 41 | opts.add_experimental_option("prefs", prefs) 42 | opts.add_argument('--user-agent=Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/61.0.3163.91 Safari/537.36') 43 | driver = webdriver.Chrome(chrome_options=opts, executable_path='D:\soft\chromedriver\chromedriver.exe') 44 | return driver 45 | 46 | def get_page_source(url): 47 | 48 | driver = configure_driver() 49 | driver.get(url) 50 | driver.execute_script("document.getElementById('wdj').scrollTop=100000") 51 | time.sleep(2) 52 | parse(driver.page_source) 53 | 54 | def parse(response): 55 | questionDiv = BeautifulSoup(response, 'lxml').find_all('div',class_="question mgb16") 56 | answerDiv = BeautifulSoup(response, 'lxml').find_all('div',class_="answer mgb22") 57 | goodsName = BeautifulSoup(response, 'lxml').find('div',class_="it-name").get_text() 58 | dataList=[] 59 | 60 | for row in zip(questionDiv,answerDiv): 61 | if len(row)!=0: 62 | dataList.append([goodsName,row[0].find_next('div',class_="title text").get_text(),row[1].find_next('p',class_="title text").get_text()]) 63 | insertIntoCsv(dataList) 64 | 65 | def insertIntoCsv(data): 66 | with open("dajiawen.csv", "a+",encoding='gb18030') as csvfile: 67 | writer = csv.writer(csvfile) 68 | writer.writerows(data) 69 | csvfile.close() 70 | 71 | def writeHeader(): 72 | with open("dajiawen.csv", "a+", encoding='gb18030') as csvfile: 73 | writer = csv.writer(csvfile) 74 | # 先写入columns_name 75 | writer.writerow(["商品名字", "问题", "答案"]) 76 | csvfile.close() 77 | 78 | def run(): 79 | writeHeader() 80 | with open('goodid.txt', 'r') as infile: 81 | for id in infile.readlines(): 82 | dajiawenUrl = makeDjiawenUrl(id.strip()) 83 | get_page_source(dajiawenUrl) 84 | infile.close() 85 | 86 | 87 | ##淘宝搜索销量优先的查询链接 88 | top50url = 'https://s.taobao.com/search?q=%E5%86%AC%E8%99%AB%E5%A4%8F%E8%8D%89&imgfile=&commend=all&ssid=s5-e&search_type=item&sourceId=tb.index&spm=a21bo.2017.201856-taobao-item.1&ie=utf8&initiative_id=tbindexz_20170306&sort=sale-desc' 89 | 90 | run() -------------------------------------------------------------------------------- /alishiyong/itemLinks.txt: -------------------------------------------------------------------------------- 1 | https://try.taobao.com/item.htm?id=32554209#tab-report 2 | https://try.taobao.com/item.htm?id=32216005#tab-report 3 | https://try.taobao.com/item.htm?id=31696246#tab-report 4 | https://try.taobao.com/item.htm?id=31606332#tab-report 5 | https://try.taobao.com/item.htm?id=31398149#tab-report 6 | https://try.taobao.com/item.htm?id=30674254#tab-report 7 | https://try.taobao.com/item.htm?id=30246169#tab-report 8 | https://try.taobao.com/item.htm?id=29972091#tab-report 9 | https://try.taobao.com/item.htm?id=29352092#tab-report 10 | https://try.taobao.com/item.htm?id=28458152#tab-report 11 | https://try.taobao.com/item.htm?id=28410077#tab-report 12 | https://try.taobao.com/item.htm?id=28076155#tab-report 13 | https://try.taobao.com/item.htm?id=27656070#tab-report 14 | https://try.taobao.com/item.htm?id=24984068#tab-report 15 | https://try.taobao.com/item.htm?id=23584624#tab-report 16 | https://try.taobao.com/item.htm?id=23462544#tab-report 17 | https://try.taobao.com/item.htm?id=23032085#tab-report 18 | https://try.taobao.com/item.htm?id=22488896#tab-report 19 | https://try.taobao.com/item.htm?id=21706193#tab-report 20 | https://try.taobao.com/item.htm?id=21606463#tab-report 21 | https://try.taobao.com/item.htm?id=21220476#tab-report 22 | https://try.taobao.com/item.htm?id=20944194#tab-report 23 | https://try.taobao.com/item.htm?id=20676023#tab-report 24 | https://try.taobao.com/item.htm?id=20504295#tab-report 25 | https://try.taobao.com/item.htm?id=20312223#tab-report 26 | https://try.taobao.com/item.htm?id=19932583#tab-report 27 | https://try.taobao.com/item.htm?id=19882508#tab-report 28 | https://try.taobao.com/item.htm?id=19030422#tab-report 29 | https://try.taobao.com/item.htm?id=18724006#tab-report 30 | https://try.taobao.com/item.htm?id=17566967#tab-report 31 | https://try.taobao.com/item.htm?id=17560397#tab-report 32 | https://try.taobao.com/item.htm?id=17554283#tab-report 33 | https://try.taobao.com/item.htm?id=17480258#tab-report 34 | https://try.taobao.com/item.htm?id=16782277#tab-report 35 | https://try.taobao.com/item.htm?id=16660700#tab-report 36 | https://try.taobao.com/item.htm?id=16386485#tab-report 37 | https://try.taobao.com/item.htm?id=16084244#tab-report 38 | https://try.taobao.com/item.htm?id=16072306#tab-report 39 | https://try.taobao.com/item.htm?id=15346237#tab-report 40 | https://try.taobao.com/item.htm?id=15724056#tab-report 41 | https://try.taobao.com/item.htm?id=14788178#tab-report 42 | https://try.taobao.com/item.htm?id=14408069#tab-report 43 | https://try.taobao.com/item.htm?id=14288393#tab-report 44 | https://try.taobao.com/item.htm?id=13264128#tab-report 45 | https://try.taobao.com/item.htm?id=13180864#tab-report 46 | https://try.taobao.com/item.htm?id=13196401#tab-report 47 | https://try.taobao.com/item.htm?id=12388252#tab-report 48 | https://try.taobao.com/item.htm?id=12274907#tab-report 49 | https://try.taobao.com/item.htm?id=11954078#tab-report 50 | https://try.taobao.com/item.htm?id=10718243#tab-report 51 | https://try.taobao.com/item.htm?id=9426001 52 | https://try.taobao.com/item.htm?id=9166010#tab-report 53 | https://try.taobao.com/item.htm?id=8578149#tab-report 54 | https://try.taobao.com/item.htm?id=8620060#tab-report 55 | https://try.taobao.com/item.htm?id=7928034 56 | https://try.taobao.com/item.htm?id=8252098#tab-report 57 | https://try.taobao.com/item.htm?id=7882535#tab-report 58 | https://try.taobao.com/item.htm?id=7834391#tab-report 59 | https://try.taobao.com/item.htm?id=6223769#tab-report 60 | https://try.taobao.com/item.htm?id=5847744#tab-report 61 | https://try.taobao.com/item.htm?id=5738666#tab-report 62 | https://try.taobao.com/item.htm?id=5562814#tab-report 63 | https://try.taobao.com/item.htm?id=4817780#tab-report 64 | https://try.taobao.com/item.htm?id=3532761#tab-report 65 | -------------------------------------------------------------------------------- /crawl_fund/htmls/details/001112/25.txt: -------------------------------------------------------------------------------- 1 |
净值日期单位净值累计净值日增长率申购状态赎回状态分红送配
2015-10-210.94100.9410-3.09%开放申购开放赎回
2015-10-200.97100.97100.21%开放申购开放赎回
2015-10-190.96900.96900.10%开放申购开放赎回
2015-10-160.96800.96800.62%开放申购开放赎回
2015-10-150.96200.96201.69%开放申购开放赎回
2015-10-140.94600.9460-1.25%开放申购开放赎回
2015-10-130.95800.95800.21%开放申购开放赎回
2015-10-120.95600.95602.47%开放申购开放赎回
2015-10-090.93300.93300.54%开放申购开放赎回
2015-10-080.92800.92802.32%开放申购开放赎回
2015-09-300.90700.90701.00%开放申购开放赎回
2015-09-290.89800.8980-0.88%开放申购开放赎回
2015-09-280.90600.90600.78%开放申购开放赎回
2015-09-250.89900.8990-1.53%开放申购开放赎回
2015-09-240.91300.91300.33%开放申购开放赎回
2015-09-230.91000.9100-1.19%开放申购开放赎回
2015-09-220.92100.92100.22%开放申购开放赎回
2015-09-210.91900.91901.66%开放申购开放赎回
2015-09-180.90400.90400.67%开放申购开放赎回
2015-09-170.89800.8980-1.43%开放申购开放赎回
-------------------------------------------------------------------------------- /crawl_fund/htmls/details/001112/27.txt: -------------------------------------------------------------------------------- 1 |
净值日期单位净值累计净值日增长率申购状态赎回状态分红送配
2015-08-171.00501.00500.40%开放申购开放赎回
2015-08-141.00101.00100.10%开放申购开放赎回
2015-08-131.00001.00000.60%开放申购开放赎回
2015-08-120.99400.9940-1.58%开放申购开放赎回
2015-08-111.01001.0100-0.39%开放申购开放赎回
2015-08-101.01401.01402.53%开放申购开放赎回
2015-08-070.98900.98901.44%开放申购开放赎回
2015-08-060.97500.97500.00%开放申购开放赎回
2015-08-050.97500.9750-1.42%开放申购开放赎回
2015-08-040.98900.98902.49%开放申购开放赎回
2015-08-030.96500.96500.63%开放申购开放赎回
2015-07-310.95900.95900.21%开放申购开放赎回
2015-07-300.95700.9570-2.35%开放申购开放赎回
2015-07-290.98000.98001.87%开放申购开放赎回
2015-07-280.96200.96200.00%开放申购开放赎回
2015-07-270.96200.9620-5.13%开放申购开放赎回
2015-07-241.01401.0140-0.98%开放申购开放赎回
2015-07-231.02401.02401.49%开放申购开放赎回
2015-07-221.00901.00900.70%开放申购开放赎回
2015-07-211.00201.00200.70%开放申购开放赎回
-------------------------------------------------------------------------------- /crawl_fund/htmls/details/001112/8.txt: -------------------------------------------------------------------------------- 1 |
净值日期单位净值累计净值日增长率申购状态赎回状态分红送配
2017-03-101.11701.11700.72%开放申购开放赎回
2017-03-091.10901.1090-0.54%开放申购开放赎回
2017-03-081.11501.1150-0.18%开放申购开放赎回
2017-03-071.11701.11700.36%开放申购开放赎回
2017-03-061.11301.11300.91%开放申购开放赎回
2017-03-031.10301.10300.64%开放申购开放赎回
2017-03-021.09601.0960-0.18%开放申购开放赎回
2017-03-011.09801.09800.00%开放申购开放赎回
2017-02-281.09801.09800.00%开放申购开放赎回
2017-02-271.09801.0980-0.72%开放申购开放赎回
2017-02-241.10601.1060-0.36%开放申购开放赎回
2017-02-231.11001.11000.27%开放申购开放赎回
2017-02-221.10701.10700.36%开放申购开放赎回
2017-02-211.10301.10300.18%开放申购开放赎回
2017-02-201.10101.10101.66%开放申购开放赎回
2017-02-171.08301.08300.00%开放申购开放赎回
2017-02-161.08301.08300.74%开放申购开放赎回
2017-02-151.07501.0750-0.56%开放申购开放赎回
2017-02-141.08101.0810-0.37%开放申购开放赎回
2017-02-131.08501.08501.31%开放申购开放赎回
-------------------------------------------------------------------------------- /crawl_fund/htmls/details/001112/9.txt: -------------------------------------------------------------------------------- 1 |
净值日期单位净值累计净值日增长率申购状态赎回状态分红送配
2017-02-101.07101.07100.09%开放申购开放赎回
2017-02-091.07001.0700-0.19%开放申购开放赎回
2017-02-081.07201.07200.47%开放申购开放赎回
2017-02-071.06701.0670-0.28%开放申购开放赎回
2017-02-061.07001.07000.00%开放申购开放赎回
2017-02-031.07001.07000.38%开放申购开放赎回
2017-01-261.06601.06600.28%开放申购开放赎回
2017-01-251.06301.06300.47%开放申购开放赎回
2017-01-241.05801.05800.28%开放申购开放赎回
2017-01-231.05501.05500.29%开放申购开放赎回
2017-01-201.05201.05200.86%开放申购开放赎回
2017-01-191.04301.0430-0.38%开放申购开放赎回
2017-01-181.04701.04700.48%开放申购开放赎回
2017-01-171.04201.04200.97%开放申购开放赎回
2017-01-161.03201.0320-0.19%开放申购开放赎回
2017-01-131.03401.0340-0.10%开放申购开放赎回
2017-01-121.03501.0350-0.58%开放申购开放赎回
2017-01-111.04101.0410-0.57%开放申购开放赎回
2017-01-101.04701.04700.10%开放申购开放赎回
2017-01-091.04601.04600.19%开放申购开放赎回
-------------------------------------------------------------------------------- /crawl_fund/htmls/details/580005/100.txt: -------------------------------------------------------------------------------- 1 |
净值日期单位净值累计净值日增长率申购状态赎回状态分红送配
2009-08-190.97090.9709-1.04%开放申购开放赎回
2009-08-180.98110.9811-0.41%开放申购开放赎回
2009-08-170.98510.9851-2.97%开放申购开放赎回
2009-08-141.01531.0153-1.04%开放申购开放赎回
2009-08-131.02601.02601.43%开放申购开放赎回
2009-08-121.01151.0115-0.89%开放申购开放赎回
2009-08-111.02061.02060.60%开放申购开放赎回
2009-08-101.01451.0145-0.80%开放申购开放赎回
2009-08-071.02271.0227-2.79%开放申购开放赎回
2009-08-061.05211.0521-1.02%开放申购开放赎回
2009-08-051.06291.0629-0.80%开放申购开放赎回
2009-08-041.07151.07150.36%开放申购开放赎回
2009-08-031.06771.06770.30%开放申购开放赎回
2009-07-311.06451.06452.26%开放申购封闭期
2009-07-301.04101.04100.17%开放申购封闭期
2009-07-291.03921.0392-4.26%开放申购封闭期
2009-07-281.08541.0854-0.86%开放申购封闭期
2009-07-271.09481.09480.89%开放申购封闭期
2009-07-241.08511.0851-0.28%开放申购封闭期
2009-07-231.08821.08820.81%开放申购封闭期
-------------------------------------------------------------------------------- /crawl_fund/htmls/details/580005/16.txt: -------------------------------------------------------------------------------- 1 |
净值日期单位净值累计净值日增长率申购状态赎回状态分红送配
2016-07-131.55211.62210.72%开放申购开放赎回
2016-07-121.54101.61100.10%开放申购开放赎回
2016-07-111.53951.6095-0.74%开放申购开放赎回
2016-07-081.55091.6209-0.39%开放申购开放赎回
2016-07-071.55691.62690.30%开放申购开放赎回
2016-07-061.55231.62230.60%开放申购开放赎回
2016-07-051.54301.61300.32%开放申购开放赎回
2016-07-041.53811.60811.98%开放申购开放赎回
2016-07-011.50831.5783-0.15%开放申购开放赎回
2016-06-301.51061.58060.11%开放申购开放赎回
2016-06-291.50891.57890.53%开放申购开放赎回
2016-06-281.50101.57100.28%开放申购开放赎回
2016-06-271.49681.56681.97%开放申购开放赎回
2016-06-241.46791.53790.14%开放申购开放赎回
2016-06-231.46581.5358-1.02%开放申购开放赎回
2016-06-221.48091.55091.84%开放申购开放赎回
2016-06-211.45421.5242-1.18%开放申购开放赎回
2016-06-201.47151.54151.69%开放申购开放赎回
2016-06-171.44711.51710.05%开放申购开放赎回
2016-06-161.44641.51640.39%开放申购开放赎回
-------------------------------------------------------------------------------- /crawl_fund/htmls/details/580005/19.txt: -------------------------------------------------------------------------------- 1 |
净值日期单位净值累计净值日增长率申购状态赎回状态分红送配
2016-04-151.42941.49940.06%开放申购开放赎回
2016-04-141.42851.49850.95%开放申购开放赎回
2016-04-131.41511.48511.85%开放申购开放赎回
2016-04-121.38941.4594-1.21%开放申购开放赎回
2016-04-111.40641.47641.49%开放申购开放赎回
2016-04-081.38571.4557-0.76%开放申购开放赎回
2016-04-071.39631.46630.48%开放申购开放赎回
2016-04-061.38961.45960.67%开放申购开放赎回
2016-04-051.38041.45042.57%开放申购开放赎回
2016-04-011.34581.4158-0.76%开放申购开放赎回
2016-03-311.35611.42611.57%开放申购开放赎回
2016-03-301.33511.40513.85%开放申购开放赎回
2016-03-291.28561.3556-1.80%开放申购开放赎回
2016-03-281.30921.3792-1.04%开放申购开放赎回
2016-03-251.32291.39290.76%开放申购开放赎回
2016-03-241.31291.3829-1.11%开放申购开放赎回
2016-03-231.32761.39761.14%开放申购开放赎回
2016-03-221.31271.38270.74%开放申购开放赎回
2016-03-211.30301.37303.04%开放申购开放赎回
2016-03-181.26461.33464.47%开放申购开放赎回
-------------------------------------------------------------------------------- /crawl_fund/htmls/details/580005/25.txt: -------------------------------------------------------------------------------- 1 |
净值日期单位净值累计净值日增长率申购状态赎回状态分红送配
2015-10-211.24521.3152-6.69%开放申购开放赎回
2015-10-201.33451.40451.39%开放申购开放赎回
2015-10-191.31621.38620.40%开放申购开放赎回
2015-10-161.31101.38101.42%开放申购开放赎回
2015-10-151.29261.36263.67%开放申购开放赎回
2015-10-141.24681.3168-2.11%开放申购开放赎回
2015-10-131.27371.34372.05%开放申购开放赎回
2015-10-121.24811.31814.01%开放申购开放赎回
2015-10-091.20001.27001.60%开放申购开放赎回
2015-10-081.18111.25114.47%开放申购开放赎回
2015-09-301.13061.20060.15%开放申购开放赎回
2015-09-291.12891.1989-1.21%开放申购开放赎回
2015-09-281.14271.21273.85%开放申购开放赎回
2015-09-251.10031.1703-2.89%开放申购开放赎回
2015-09-241.13301.20301.02%开放申购开放赎回
2015-09-231.12161.19160.57%开放申购开放赎回
2015-09-221.11521.18520.31%开放申购开放赎回
2015-09-211.11181.18185.22%开放申购开放赎回
2015-09-181.05661.12662.12%开放申购开放赎回
2015-09-171.03471.10470.43%开放申购开放赎回
-------------------------------------------------------------------------------- /crawl_fund/htmls/details/580005/30.txt: -------------------------------------------------------------------------------- 1 |
净值日期单位净值累计净值日增长率申购状态赎回状态分红送配
2015-05-222.08932.1593-0.88%开放申购开放赎回
2015-05-212.10782.17783.18%开放申购开放赎回
2015-05-202.04282.11282.32%开放申购开放赎回
2015-05-191.99652.06650.17%开放申购开放赎回
2015-05-181.99322.06324.15%开放申购开放赎回
2015-05-151.91381.98380.35%开放申购开放赎回
2015-05-141.90711.9771-1.50%开放申购开放赎回
2015-05-131.93612.00611.14%开放申购开放赎回
2015-05-121.91431.98434.33%开放申购开放赎回
2015-05-111.83491.90495.98%开放申购开放赎回
2015-05-081.73141.80144.98%开放申购开放赎回
2015-05-071.64921.71920.22%开放申购开放赎回
2015-05-061.64561.71561.03%开放申购开放赎回
2015-05-051.62881.6988-2.60%开放申购开放赎回
2015-05-041.67221.74220.28%开放申购开放赎回
2015-04-301.66761.73761.11%开放申购开放赎回
2015-04-291.64931.71931.94%开放申购开放赎回
2015-04-281.61791.6879-3.60%开放申购开放赎回
2015-04-271.67831.7483-0.27%开放申购开放赎回
2015-04-241.68281.7528-0.07%开放申购开放赎回
-------------------------------------------------------------------------------- /crawl_fund/htmls/details/580005/32.txt: -------------------------------------------------------------------------------- 1 |
净值日期单位净值累计净值日增长率申购状态赎回状态分红送配
2015-03-251.56791.63791.54%开放申购开放赎回
2015-03-241.54411.61410.29%开放申购开放赎回
2015-03-231.53971.60972.05%开放申购开放赎回
2015-03-201.50881.57880.20%开放申购开放赎回
2015-03-191.50581.57580.01%开放申购开放赎回
2015-03-181.50561.57561.41%开放申购开放赎回
2015-03-171.48461.55460.76%开放申购开放赎回
2015-03-161.47341.54343.16%开放申购开放赎回
2015-03-131.42821.49820.80%开放申购开放赎回
2015-03-121.41691.4869-0.87%开放申购开放赎回
2015-03-111.42941.4994-0.09%开放申购开放赎回
2015-03-101.43071.50070.94%开放申购开放赎回
2015-03-091.41741.48740.96%开放申购开放赎回
2015-03-061.40391.4739-2.57%开放申购开放赎回
2015-03-051.44091.51090.89%开放申购开放赎回
2015-03-041.42821.49820.98%开放申购开放赎回
2015-03-031.41431.48430.12%开放申购开放赎回
2015-03-021.41261.48262.96%开放申购开放赎回
2015-02-271.37201.44201.22%开放申购开放赎回
2015-02-261.35541.4254-0.25%开放申购开放赎回
-------------------------------------------------------------------------------- /crawl_fund/htmls/details/580005/4.txt: -------------------------------------------------------------------------------- 1 |
净值日期单位净值累计净值日增长率申购状态赎回状态分红送配
2017-07-070.99911.51910.31%开放申购开放赎回
2017-07-060.99601.51601.08%开放申购开放赎回
2017-07-050.98541.50541.09%开放申购开放赎回
2017-07-040.97481.4948-0.60%开放申购开放赎回
2017-07-030.98071.50071.07%开放申购开放赎回
2017-06-300.97031.49030.80%开放申购开放赎回
2017-06-290.96261.48260.50%开放申购开放赎回
2017-06-280.95781.4778-0.90%开放申购开放赎回
2017-06-270.96651.48650.23%开放申购开放赎回
2017-06-260.96431.48431.71%开放申购开放赎回
2017-06-230.94811.46810.81%开放申购开放赎回
2017-06-220.94051.4605-1.58%开放申购开放赎回
2017-06-210.95561.4756-0.29%开放申购开放赎回
2017-06-200.95841.47840.58%开放申购开放赎回
2017-06-190.95291.47290.42%开放申购开放赎回
2017-06-160.94891.46890.81%开放申购开放赎回
2017-06-150.94131.46131.42%开放申购开放赎回
2017-06-140.92811.4481-0.27%开放申购开放赎回
2017-06-130.93061.45061.63%开放申购开放赎回
2017-06-120.91571.4357-0.65%开放申购开放赎回
-------------------------------------------------------------------------------- /crawl_fund/htmls/details/580005/58.txt: -------------------------------------------------------------------------------- 1 |
净值日期单位净值累计净值日增长率申购状态赎回状态分红送配
2013-01-280.92430.99432.60%开放申购开放赎回
2013-01-250.90090.97090.69%开放申购开放赎回
2013-01-240.89470.9647-0.52%开放申购开放赎回
2013-01-230.89940.96941.08%开放申购开放赎回
2013-01-220.88980.9598-1.14%开放申购开放赎回
2013-01-210.90010.97010.66%开放申购开放赎回
2013-01-180.89420.96421.71%开放申购开放赎回
2013-01-170.87920.9492-0.64%开放申购开放赎回
2013-01-160.88490.95490.41%开放申购开放赎回
2013-01-150.88130.95130.86%开放申购开放赎回
2013-01-140.87380.94383.68%开放申购开放赎回
2013-01-110.84280.9128-0.70%开放申购开放赎回
2013-01-100.84870.91870.77%开放申购开放赎回
2013-01-090.84220.91220.37%开放申购开放赎回
2013-01-080.83910.90910.82%开放申购开放赎回
2013-01-070.83230.90230.75%开放申购开放赎回
2013-01-040.82610.8961-1.09%开放申购开放赎回
2012-12-310.83520.90521.11%开放申购开放赎回
2012-12-280.82600.89601.08%开放申购开放赎回
2012-12-270.81720.8872-0.49%开放申购开放赎回
-------------------------------------------------------------------------------- /crawl_fund/htmls/details/580005/88.txt: -------------------------------------------------------------------------------- 1 |
净值日期单位净值累计净值日增长率申购状态赎回状态分红送配
2010-08-131.16331.16330.89%开放申购开放赎回
2010-08-121.15301.1530-0.07%开放申购开放赎回
2010-08-111.15381.15380.46%开放申购开放赎回
2010-08-101.14851.1485-1.99%开放申购开放赎回
2010-08-091.17181.17180.77%开放申购开放赎回
2010-08-061.16291.16290.86%开放申购开放赎回
2010-08-051.15301.15301.10%开放申购开放赎回
2010-08-041.14041.14041.13%开放申购开放赎回
2010-08-031.12771.1277-1.11%开放申购开放赎回
2010-08-021.14031.14031.28%开放申购开放赎回
2010-07-301.12591.1259-0.26%开放申购开放赎回
2010-07-291.12881.12880.29%开放申购开放赎回
2010-07-281.12551.12551.91%开放申购开放赎回
2010-07-271.10441.10440.98%开放申购开放赎回
2010-07-261.09371.09371.47%开放申购开放赎回
2010-07-231.07791.07790.14%开放申购开放赎回
2010-07-221.07641.07641.35%开放申购开放赎回
2010-07-211.06211.0621-0.11%开放申购开放赎回
2010-07-201.06331.06331.41%开放申购开放赎回
2010-07-191.04851.04850.98%开放申购开放赎回
-------------------------------------------------------------------------------- /crawl_fund/htmls/details/580005/92.txt: -------------------------------------------------------------------------------- 1 |
净值日期单位净值累计净值日增长率申购状态赎回状态分红送配
2010-04-191.09731.0973-1.93%开放申购开放赎回
2010-04-161.11891.11890.02%开放申购开放赎回
2010-04-151.11871.1187-0.70%开放申购开放赎回
2010-04-141.12661.12660.98%开放申购开放赎回
2010-04-131.11571.1157-1.80%开放申购开放赎回
2010-04-121.13611.13611.76%开放申购开放赎回
2010-04-091.11651.11651.98%开放申购开放赎回
2010-04-081.09481.09480.40%开放申购开放赎回
2010-04-071.09041.09040.38%开放申购开放赎回
2010-04-061.08631.0863-0.51%开放申购开放赎回
2010-04-021.09191.09190.23%开放申购开放赎回
2010-04-011.08941.08942.46%开放申购开放赎回
2010-03-311.06321.06320.48%开放申购开放赎回
2010-03-301.05811.05810.61%开放申购开放赎回
2010-03-291.05171.05170.59%开放申购开放赎回
2010-03-261.04551.04551.34%开放申购开放赎回
2010-03-251.03171.0317-0.60%开放申购开放赎回
2010-03-241.03791.03790.13%开放申购开放赎回
2010-03-231.03661.03660.34%开放申购开放赎回
2010-03-221.03311.03310.44%开放申购开放赎回
-------------------------------------------------------------------------------- /crawl_fund/htmls/details/001112/10.txt: -------------------------------------------------------------------------------- 1 |
净值日期单位净值累计净值日增长率申购状态赎回状态分红送配
2017-01-061.04401.0440-0.48%开放申购开放赎回
2017-01-051.04901.0490-0.38%开放申购开放赎回
2017-01-041.05301.05301.15%开放申购开放赎回
2017-01-031.04101.04100.19%开放申购开放赎回
2016-12-311.03901.03900.00%开放申购开放赎回
2016-12-301.03901.03901.17%开放申购开放赎回
2016-12-291.02701.02700.10%开放申购开放赎回
2016-12-281.02601.0260-0.19%开放申购开放赎回
2016-12-271.02801.02800.19%开放申购开放赎回
2016-12-261.02601.02600.49%开放申购开放赎回
2016-12-231.02101.0210-0.68%开放申购开放赎回
2016-12-221.02801.02800.10%开放申购开放赎回
2016-12-211.02701.02700.79%开放申购开放赎回
2016-12-201.01901.0190-0.49%开放申购开放赎回
2016-12-191.02401.0240-0.29%开放申购开放赎回
2016-12-161.02701.02700.49%开放申购开放赎回
2016-12-151.02201.0220-0.58%开放申购开放赎回
2016-12-141.02801.0280-0.68%开放申购开放赎回
2016-12-131.03501.03500.39%开放申购开放赎回
2016-12-121.03101.0310-2.09%开放申购开放赎回
-------------------------------------------------------------------------------- /crawl_fund/htmls/details/001112/11.txt: -------------------------------------------------------------------------------- 1 |
净值日期单位净值累计净值日增长率申购状态赎回状态分红送配
2016-12-091.05301.05300.10%开放申购开放赎回
2016-12-081.05201.0520-0.10%开放申购开放赎回
2016-12-071.05301.05300.57%开放申购开放赎回
2016-12-061.04701.04700.87%开放申购开放赎回
2016-12-051.03801.0380-1.24%开放申购开放赎回
2016-12-021.05101.0510-1.41%开放申购开放赎回
2016-12-011.06601.06600.85%开放申购开放赎回
2016-11-301.05701.0570-0.94%开放申购开放赎回
2016-11-291.06701.06701.43%开放申购开放赎回
2016-11-281.05201.05200.10%开放申购开放赎回
2016-11-251.05101.05100.29%开放申购开放赎回
2016-11-241.04801.04800.96%开放申购开放赎回
2016-11-231.03801.03800.29%开放申购开放赎回
2016-11-221.03501.03500.19%开放申购开放赎回
2016-11-211.03301.0330-0.29%开放申购开放赎回
2016-11-181.03601.0360-0.29%开放申购开放赎回
2016-11-171.03901.03900.19%开放申购开放赎回
2016-11-161.03701.03700.10%开放申购开放赎回
2016-11-151.03601.03600.39%开放申购开放赎回
2016-11-141.03201.0320-0.10%开放申购开放赎回
-------------------------------------------------------------------------------- /crawl_fund/htmls/details/001112/12.txt: -------------------------------------------------------------------------------- 1 |
净值日期单位净值累计净值日增长率申购状态赎回状态分红送配
2016-11-111.03301.0330-0.39%开放申购开放赎回
2016-11-101.03701.03700.78%开放申购开放赎回
2016-11-091.02901.0290-1.06%开放申购开放赎回
2016-11-081.04001.04000.39%开放申购开放赎回
2016-11-071.03601.0360-0.29%开放申购开放赎回
2016-11-041.03901.0390-0.38%开放申购开放赎回
2016-11-031.04301.04300.10%开放申购开放赎回
2016-11-021.04201.0420-0.57%开放申购开放赎回
2016-11-011.04801.04800.58%开放申购开放赎回
2016-10-311.04201.04200.00%开放申购开放赎回
2016-10-281.04201.0420-0.48%开放申购开放赎回
2016-10-271.04701.0470-0.29%开放申购开放赎回
2016-10-261.05001.05000.38%开放申购开放赎回
2016-10-251.04601.0460-0.10%开放申购开放赎回
2016-10-241.04701.04700.48%开放申购开放赎回
2016-10-211.04201.0420-0.29%开放申购开放赎回
2016-10-201.04501.04500.29%开放申购开放赎回
2016-10-191.04201.0420-0.29%开放申购开放赎回
2016-10-181.04501.04501.06%开放申购开放赎回
2016-10-171.03401.0340-0.86%开放申购开放赎回
-------------------------------------------------------------------------------- /crawl_fund/htmls/details/001112/13.txt: -------------------------------------------------------------------------------- 1 |
净值日期单位净值累计净值日增长率申购状态赎回状态分红送配
2016-10-141.04301.04300.10%开放申购开放赎回
2016-10-131.04201.04200.58%开放申购开放赎回
2016-10-121.03601.03600.19%开放申购开放赎回
2016-10-111.03401.03400.29%开放申购开放赎回
2016-10-101.03101.03101.48%开放申购开放赎回
2016-09-301.01601.01600.49%开放申购开放赎回
2016-09-291.01101.01100.40%开放申购开放赎回
2016-09-281.00701.0070-0.10%开放申购开放赎回
2016-09-271.00801.00800.50%开放申购开放赎回
2016-09-261.00301.0030-1.47%开放申购开放赎回
2016-09-231.01801.01800.00%开放申购开放赎回
2016-09-221.01801.01800.49%开放申购开放赎回
2016-09-211.01301.0130-0.20%开放申购开放赎回
2016-09-201.01501.0150-0.49%开放申购开放赎回
2016-09-191.02001.02000.69%开放申购开放赎回
2016-09-141.01301.01300.00%开放申购开放赎回
2016-09-131.01301.01300.20%开放申购开放赎回
2016-09-121.01101.0110-1.56%开放申购开放赎回
2016-09-091.02701.0270-0.29%开放申购开放赎回
2016-09-081.03001.0300-0.29%开放申购开放赎回
-------------------------------------------------------------------------------- /crawl_fund/htmls/details/001112/15.txt: -------------------------------------------------------------------------------- 1 |
净值日期单位净值累计净值日增长率申购状态赎回状态分红送配
2016-08-101.02501.0250-0.77%开放申购开放赎回
2016-08-091.03301.03300.88%开放申购开放赎回
2016-08-081.02401.02400.79%开放申购开放赎回
2016-08-051.01601.0160-0.59%开放申购开放赎回
2016-08-041.02201.02200.29%开放申购开放赎回
2016-08-031.01901.01900.39%开放申购开放赎回
2016-08-021.01501.01500.30%开放申购开放赎回
2016-08-011.01201.0120-1.36%开放申购开放赎回
2016-07-291.02601.02600.00%开放申购开放赎回
2016-07-281.02601.02601.38%开放申购开放赎回
2016-07-271.01201.0120-1.46%开放申购开放赎回
2016-07-261.02701.02701.28%开放申购开放赎回
2016-07-251.01401.01400.50%开放申购开放赎回
2016-07-221.00901.0090-0.88%开放申购开放赎回
2016-07-211.01801.01800.89%开放申购开放赎回
2016-07-201.00901.00900.10%开放申购开放赎回
2016-07-191.00801.0080-0.79%开放申购开放赎回
2016-07-181.01601.0160-0.78%开放申购开放赎回
2016-07-151.02401.02400.49%开放申购开放赎回
2016-07-141.01901.01900.00%开放申购开放赎回
-------------------------------------------------------------------------------- /crawl_fund/htmls/details/001112/23.txt: -------------------------------------------------------------------------------- 1 |
净值日期单位净值累计净值日增长率申购状态赎回状态分红送配
2015-12-161.02401.02400.10%开放申购开放赎回
2015-12-151.02301.02300.59%开放申购开放赎回
2015-12-141.01701.01701.40%开放申购开放赎回
2015-12-111.00301.0030-0.40%开放申购开放赎回
2015-12-101.00701.00700.00%开放申购开放赎回
2015-12-091.00701.00700.30%开放申购开放赎回
2015-12-081.00401.0040-1.67%开放申购开放赎回
2015-12-071.02101.02100.10%开放申购开放赎回
2015-12-041.02001.0200-0.97%开放申购开放赎回
2015-12-031.03001.03000.68%开放申购开放赎回
2015-12-021.02301.02302.20%开放申购开放赎回
2015-12-011.00101.00101.01%开放申购开放赎回
2015-11-300.99100.99101.02%开放申购开放赎回
2015-11-270.98100.9810-3.92%开放申购开放赎回
2015-11-261.02101.0210-0.58%开放申购开放赎回
2015-11-251.02701.02700.88%开放申购开放赎回
2015-11-241.01801.01800.30%开放申购开放赎回
2015-11-231.01501.0150-0.20%开放申购开放赎回
2015-11-201.01701.0170-0.20%开放申购开放赎回
2015-11-191.01901.01901.29%开放申购开放赎回
-------------------------------------------------------------------------------- /crawl_fund/htmls/details/001112/24.txt: -------------------------------------------------------------------------------- 1 |
净值日期单位净值累计净值日增长率申购状态赎回状态分红送配
2015-11-181.00601.0060-0.40%开放申购开放赎回
2015-11-171.01001.0100-0.20%开放申购开放赎回
2015-11-161.01201.01200.90%开放申购开放赎回
2015-11-131.00301.0030-1.28%开放申购开放赎回
2015-11-121.01601.0160-0.29%开放申购开放赎回
2015-11-111.01901.0190-0.10%开放申购开放赎回
2015-11-101.02001.0200-0.58%开放申购开放赎回
2015-11-091.02601.02600.89%开放申购开放赎回
2015-11-061.01701.01701.40%开放申购开放赎回
2015-11-051.00301.00300.40%开放申购开放赎回
2015-11-040.99900.99903.31%开放申购开放赎回
2015-11-030.96700.9670-0.10%开放申购开放赎回
2015-11-020.96800.9680-1.12%开放申购开放赎回
2015-10-300.97900.97900.51%开放申购开放赎回
2015-10-290.97400.97400.52%开放申购开放赎回
2015-10-280.96900.9690-0.92%开放申购开放赎回
2015-10-270.97800.97800.00%开放申购开放赎回
2015-10-260.97800.97800.72%开放申购开放赎回
2015-10-230.97100.97101.68%开放申购开放赎回
2015-10-220.95500.95501.49%开放申购开放赎回
-------------------------------------------------------------------------------- /crawl_fund/htmls/details/001112/28.txt: -------------------------------------------------------------------------------- 1 |
净值日期单位净值累计净值日增长率申购状态赎回状态分红送配
2015-07-200.99500.99500.71%开放申购开放赎回
2015-07-170.98800.98802.49%开放申购开放赎回
2015-07-160.96400.96401.05%开放申购开放赎回
2015-07-150.95400.9540-1.95%开放申购开放赎回
2015-07-140.97300.9730-0.51%开放申购开放赎回
2015-07-130.97800.97801.88%开放申购开放赎回
2015-07-100.96000.96002.67%开放申购开放赎回
2015-07-090.93500.93504.59%开放申购开放赎回
2015-07-080.89400.8940-6.19%开放申购开放赎回
2015-07-070.95300.9530-1.55%开放申购开放赎回
2015-07-060.96800.96801.89%暂停申购开放赎回
2015-07-030.95000.9500-3.36%暂停申购开放赎回
2015-07-020.98300.9830-2.38%暂停申购开放赎回
2015-07-011.00701.0070-2.99%暂停申购开放赎回
2015-06-301.03801.03805.70%暂停申购开放赎回
2015-06-290.98200.9820-1.31%暂停申购开放赎回
2015-06-260.99500.9950-5.60%暂停申购开放赎回
2015-06-251.05401.0540-2.77%暂停申购开放赎回
2015-06-241.08401.08401.59%暂停申购开放赎回
2015-06-231.06701.06703.59%暂停申购开放赎回
-------------------------------------------------------------------------------- /crawl_fund/htmls/details/001112/29.txt: -------------------------------------------------------------------------------- 1 |
净值日期单位净值累计净值日增长率申购状态赎回状态分红送配
2015-06-191.03001.0300-4.10%暂停申购开放赎回
2015-06-181.07401.0740-2.27%暂停申购开放赎回
2015-06-171.09901.09900.92%暂停申购开放赎回
2015-06-161.08901.0890-1.80%暂停申购开放赎回
2015-06-151.10901.1090-1.95%暂停申购开放赎回
2015-06-121.13101.13100.53%暂停申购开放赎回
2015-06-111.12501.1250-0.27%暂停申购开放赎回
2015-06-101.12801.12800.53%暂停申购开放赎回
2015-06-091.12201.1220-0.36%暂停申购开放赎回
2015-06-081.12601.12601.90%暂停申购开放赎回
2015-06-051.10501.10500.18%暂停申购开放赎回
2015-06-041.10301.10300.00%暂停申购开放赎回
2015-06-031.10301.10300.27%暂停申购开放赎回
2015-06-021.10001.10000.46%暂停申购开放赎回
2015-06-011.09501.09502.82%暂停申购开放赎回
2015-05-291.06501.06500.00%开放申购开放赎回
2015-05-281.06501.0650-3.79%开放申购开放赎回
2015-05-271.10701.1070-0.81%开放申购开放赎回
2015-05-261.11601.11601.55%开放申购开放赎回
2015-05-251.09901.09902.04%开放申购开放赎回
-------------------------------------------------------------------------------- /crawl_fund/htmls/details/580005/1.txt: -------------------------------------------------------------------------------- 1 |
净值日期单位净值累计净值日增长率申购状态赎回状态分红送配
2017-09-291.05421.57422.24%开放申购开放赎回
2017-09-281.03111.5511-0.55%开放申购开放赎回
2017-09-271.03681.55680.55%开放申购开放赎回
2017-09-261.03111.5511-0.44%开放申购开放赎回
2017-09-251.03571.5557-1.10%开放申购开放赎回
2017-09-221.04721.56720.24%开放申购开放赎回
2017-09-211.04471.5647-1.21%开放申购开放赎回
2017-09-201.05751.57751.12%开放申购开放赎回
2017-09-191.04581.5658-0.66%开放申购开放赎回
2017-09-181.05281.57281.00%开放申购开放赎回
2017-09-151.04241.5624-0.78%开放申购开放赎回
2017-09-141.05061.57060.14%开放申购开放赎回
2017-09-131.04911.56911.64%开放申购开放赎回
2017-09-121.03221.5522-0.16%开放申购开放赎回
2017-09-111.03391.55391.23%开放申购开放赎回
2017-09-081.02131.54130.03%开放申购开放赎回
2017-09-071.02101.5410-0.64%开放申购开放赎回
2017-09-061.02761.54760.38%开放申购开放赎回
2017-09-051.02371.5437-0.44%开放申购开放赎回
2017-09-041.02821.54820.08%开放申购开放赎回
-------------------------------------------------------------------------------- /crawl_fund/htmls/details/580005/10.txt: -------------------------------------------------------------------------------- 1 |
净值日期单位净值累计净值日增长率申购状态赎回状态分红送配
2017-01-061.44311.5131-0.70%开放申购开放赎回
2017-01-051.45331.5233-0.76%开放申购开放赎回
2017-01-041.46451.53450.43%开放申购开放赎回
2017-01-031.45821.52820.19%开放申购开放赎回
2016-12-311.45541.5254-0.01%开放申购开放赎回
2016-12-301.45551.5255-0.08%开放申购开放赎回
2016-12-291.45661.5266-0.84%开放申购开放赎回
2016-12-281.46901.5390-0.18%开放申购开放赎回
2016-12-271.47161.54161.79%开放申购开放赎回
2016-12-261.44571.51570.59%开放申购开放赎回
2016-12-231.43721.5072-1.36%开放申购开放赎回
2016-12-221.45701.52700.17%开放申购开放赎回
2016-12-211.45451.52451.01%开放申购开放赎回
2016-12-201.43991.50990.15%开放申购开放赎回
2016-12-191.43771.50770.17%开放申购开放赎回
2016-12-161.43531.50530.57%开放申购开放赎回
2016-12-151.42721.49720.63%开放申购开放赎回
2016-12-141.41831.4883-0.83%开放申购开放赎回
2016-12-131.43011.5001-0.42%开放申购开放赎回
2016-12-121.43611.5061-3.60%开放申购开放赎回
-------------------------------------------------------------------------------- /crawl_fund/htmls/details/580005/11.txt: -------------------------------------------------------------------------------- 1 |
净值日期单位净值累计净值日增长率申购状态赎回状态分红送配
2016-12-091.48981.5598-0.60%开放申购开放赎回
2016-12-081.49881.5688-0.33%开放申购开放赎回
2016-12-071.50381.57381.88%开放申购开放赎回
2016-12-061.47611.54610.16%开放申购开放赎回
2016-12-051.47371.5437-1.31%开放申购开放赎回
2016-12-021.49321.5632-1.36%开放申购开放赎回
2016-12-011.51381.58380.73%开放申购开放赎回
2016-11-301.50281.5728-1.01%开放申购开放赎回
2016-11-291.51811.5881-0.59%开放申购开放赎回
2016-11-281.52711.59710.48%开放申购开放赎回
2016-11-251.51981.58980.50%开放申购开放赎回
2016-11-241.51221.5822-0.01%开放申购开放赎回
2016-11-231.51231.5823-0.42%开放申购开放赎回
2016-11-221.51871.58870.21%开放申购开放赎回
2016-11-211.51551.58551.35%开放申购开放赎回
2016-11-181.49531.5653-1.18%开放申购开放赎回
2016-11-171.51321.58320.33%开放申购开放赎回
2016-11-161.50821.57820.76%开放申购开放赎回
2016-11-151.49681.5668-0.17%开放申购开放赎回
2016-11-141.49941.56940.29%开放申购开放赎回
-------------------------------------------------------------------------------- /crawl_fund/htmls/details/580005/13.txt: -------------------------------------------------------------------------------- 1 |
净值日期单位净值累计净值日增长率申购状态赎回状态分红送配
2016-10-141.50611.5761-0.44%开放申购开放赎回
2016-10-131.51271.58270.61%开放申购开放赎回
2016-10-121.50361.57360.47%开放申购开放赎回
2016-10-111.49651.56650.65%开放申购开放赎回
2016-10-101.48681.55681.93%开放申购开放赎回
2016-09-301.45861.52860.22%开放申购开放赎回
2016-09-291.45541.52540.27%开放申购开放赎回
2016-09-281.45151.5215-0.69%开放申购开放赎回
2016-09-271.46161.5316-0.14%开放申购开放赎回
2016-09-261.46371.5337-1.91%开放申购开放赎回
2016-09-231.49221.5622-0.20%开放申购开放赎回
2016-09-221.49521.56520.61%开放申购开放赎回
2016-09-211.48621.5562-0.31%开放申购开放赎回
2016-09-201.49081.56080.65%开放申购开放赎回
2016-09-191.48121.55121.14%开放申购开放赎回
2016-09-141.46451.5345-1.06%开放申购开放赎回
2016-09-131.48021.55020.35%开放申购开放赎回
2016-09-121.47501.5450-1.89%开放申购开放赎回
2016-09-091.50341.5734-0.70%开放申购开放赎回
2016-09-081.51401.58400.29%开放申购开放赎回
-------------------------------------------------------------------------------- /crawl_fund/htmls/details/580005/14.txt: -------------------------------------------------------------------------------- 1 |
净值日期单位净值累计净值日增长率申购状态赎回状态分红送配
2016-09-071.50961.5796-0.28%开放申购开放赎回
2016-09-061.51381.58381.69%开放申购开放赎回
2016-09-051.48871.55871.26%开放申购开放赎回
2016-09-021.47021.5402-0.55%开放申购开放赎回
2016-09-011.47831.5483-0.81%开放申购开放赎回
2016-08-311.49041.56040.48%开放申购开放赎回
2016-08-301.48331.5533-0.15%开放申购开放赎回
2016-08-291.48551.55550.32%开放申购开放赎回
2016-08-261.48081.55080.86%开放申购开放赎回
2016-08-251.46821.5382-0.08%开放申购开放赎回
2016-08-241.46941.5394-0.06%开放申购开放赎回
2016-08-231.47031.5403-0.24%开放申购开放赎回
2016-08-221.47391.5439-0.35%开放申购开放赎回
2016-08-191.47911.54910.07%开放申购开放赎回
2016-08-181.47811.5481-0.07%开放申购开放赎回
2016-08-171.47911.54910.33%开放申购开放赎回
2016-08-161.47421.54420.53%开放申购开放赎回
2016-08-151.46641.53641.88%开放申购开放赎回
2016-08-121.43931.50930.54%开放申购开放赎回
2016-08-111.43161.5016-1.51%开放申购开放赎回
-------------------------------------------------------------------------------- /crawl_fund/htmls/details/580005/17.txt: -------------------------------------------------------------------------------- 1 |
净值日期单位净值累计净值日增长率申购状态赎回状态分红送配
2016-06-151.44081.51083.69%开放申购开放赎回
2016-06-141.38951.45951.23%开放申购开放赎回
2016-06-131.37261.4426-5.44%开放申购开放赎回
2016-06-081.45161.5216-0.18%开放申购开放赎回
2016-06-071.45421.52420.77%开放申购开放赎回
2016-06-061.44311.51311.19%开放申购开放赎回
2016-06-031.42611.4961-0.29%开放申购开放赎回
2016-06-021.43021.50021.64%开放申购开放赎回
2016-06-011.40711.47711.35%开放申购开放赎回
2016-05-311.38841.45843.49%开放申购开放赎回
2016-05-301.34161.4116-0.22%开放申购开放赎回
2016-05-271.34451.41450.22%开放申购开放赎回
2016-05-261.34151.41150.37%开放申购开放赎回
2016-05-251.33651.4065-0.51%开放申购开放赎回
2016-05-241.34331.4133-1.12%开放申购开放赎回
2016-05-231.35851.42851.92%开放申购开放赎回
2016-05-201.33291.40291.18%开放申购开放赎回
2016-05-191.31731.38730.15%开放申购开放赎回
2016-05-181.31531.3853-2.82%开放申购开放赎回
2016-05-171.35351.4235-0.43%开放申购开放赎回
-------------------------------------------------------------------------------- /crawl_fund/htmls/details/580005/18.txt: -------------------------------------------------------------------------------- 1 |
净值日期单位净值累计净值日增长率申购状态赎回状态分红送配
2016-05-161.35941.42941.80%开放申购开放赎回
2016-05-131.33541.40540.16%开放申购开放赎回
2016-05-121.33331.40330.59%开放申购开放赎回
2016-05-111.32551.3955-1.85%开放申购开放赎回
2016-05-101.35051.4205-1.30%开放申购开放赎回
2016-05-091.36831.4383-2.65%开放申购开放赎回
2016-05-061.40561.4756-2.87%开放申购开放赎回
2016-05-051.44711.51710.65%开放申购开放赎回
2016-05-041.43771.50770.76%开放申购开放赎回
2016-05-031.42681.49682.22%开放申购开放赎回
2016-04-291.39581.46580.27%开放申购开放赎回
2016-04-281.39201.4620-0.06%开放申购开放赎回
2016-04-271.39281.46281.21%开放申购开放赎回
2016-04-261.37621.44621.20%开放申购开放赎回
2016-04-251.35991.4299-0.21%开放申购开放赎回
2016-04-221.36271.43271.17%开放申购开放赎回
2016-04-211.34701.4170-0.85%开放申购开放赎回
2016-04-201.35861.4286-4.18%开放申购开放赎回
2016-04-191.41791.4879-0.11%开放申购开放赎回
2016-04-181.41951.4895-0.69%开放申购开放赎回
-------------------------------------------------------------------------------- /crawl_fund/htmls/details/580005/2.txt: -------------------------------------------------------------------------------- 1 |
净值日期单位净值累计净值日增长率申购状态赎回状态分红送配
2017-09-011.02741.54740.71%开放申购开放赎回
2017-08-311.02021.54020.40%开放申购开放赎回
2017-08-301.01611.53610.33%开放申购开放赎回
2017-08-291.01281.5328-0.27%开放申购开放赎回
2017-08-281.01551.53551.85%开放申购开放赎回
2017-08-250.99711.51710.96%开放申购开放赎回
2017-08-240.98761.5076-0.39%开放申购开放赎回
2017-08-230.99151.5115-0.41%开放申购开放赎回
2017-08-220.99561.5156-0.96%开放申购开放赎回
2017-08-211.00531.52530.69%开放申购开放赎回
2017-08-180.99841.5184-0.79%开放申购开放赎回
2017-08-171.00631.52630.48%开放申购开放赎回
2017-08-161.00151.52150.71%开放申购开放赎回
2017-08-150.99441.5144-0.13%开放申购开放赎回
2017-08-140.99571.51572.33%开放申购开放赎回
2017-08-110.97301.4930-0.93%开放申购开放赎回
2017-08-100.98211.5021-0.40%开放申购开放赎回
2017-08-090.98601.50600.27%开放申购开放赎回
2017-08-080.98331.50330.11%开放申购开放赎回
2017-08-070.98221.50220.47%开放申购开放赎回
-------------------------------------------------------------------------------- /crawl_fund/htmls/details/580005/21.txt: -------------------------------------------------------------------------------- 1 |
净值日期单位净值累计净值日增长率申购状态赎回状态分红送配
2016-02-181.35191.42190.13%开放申购开放赎回
2016-02-171.35021.42021.66%开放申购开放赎回
2016-02-161.32821.39824.43%开放申购开放赎回
2016-02-151.27181.34180.54%开放申购开放赎回
2016-02-051.26501.3350-0.99%开放申购开放赎回
2016-02-041.27771.34772.13%开放申购开放赎回
2016-02-031.25101.32101.77%开放申购开放赎回
2016-02-021.22931.29934.08%开放申购开放赎回
2016-02-011.18111.2511-1.53%开放申购开放赎回
2016-01-291.19951.26953.47%开放申购开放赎回
2016-01-281.15931.2293-4.32%开放申购开放赎回
2016-01-271.21171.2817-2.91%开放申购开放赎回
2016-01-261.24801.3180-6.28%开放申购开放赎回
2016-01-251.33161.40160.86%开放申购开放赎回
2016-01-221.32021.39022.13%开放申购开放赎回
2016-01-211.29271.3627-4.83%开放申购开放赎回
2016-01-201.35831.4283-0.56%开放申购开放赎回
2016-01-191.36601.43604.05%开放申购开放赎回
2016-01-181.31281.38282.00%开放申购开放赎回
2016-01-151.28701.3570-3.22%开放申购开放赎回
-------------------------------------------------------------------------------- /crawl_fund/htmls/details/580005/22.txt: -------------------------------------------------------------------------------- 1 |
净值日期单位净值累计净值日增长率申购状态赎回状态分红送配
2016-01-141.32981.39983.68%开放申购开放赎回
2016-01-131.28261.3526-3.32%开放申购开放赎回
2016-01-121.32671.39672.88%开放申购开放赎回
2016-01-111.28961.3596-5.15%开放申购开放赎回
2016-01-081.35961.4296-0.41%开放申购开放赎回
2016-01-071.36521.4352-5.40%开放申购开放赎回
2016-01-061.44321.51321.63%开放申购开放赎回
2016-01-051.42011.4901-1.80%开放申购开放赎回
2016-01-041.44611.5161-6.93%开放申购开放赎回
2015-12-311.55381.6238-1.82%开放申购开放赎回
2015-12-301.58261.65260.97%开放申购开放赎回
2015-12-291.56741.63741.36%开放申购开放赎回
2015-12-281.54641.6164-1.35%开放申购开放赎回
2015-12-251.56761.63761.10%开放申购开放赎回
2015-12-241.55051.6205-0.88%开放申购开放赎回
2015-12-231.56431.6343-2.21%开放申购开放赎回
2015-12-221.59961.66960.38%开放申购开放赎回
2015-12-211.59351.66350.23%开放申购开放赎回
2015-12-181.58991.6599-0.74%开放申购开放赎回
2015-12-171.60171.67173.00%开放申购开放赎回
-------------------------------------------------------------------------------- /crawl_fund/htmls/details/580005/23.txt: -------------------------------------------------------------------------------- 1 |
净值日期单位净值累计净值日增长率申购状态赎回状态分红送配
2015-12-161.55501.62500.27%开放申购开放赎回
2015-12-151.55081.62081.48%开放申购开放赎回
2015-12-141.52821.59821.17%开放申购开放赎回
2015-12-111.51051.5805-0.20%开放申购开放赎回
2015-12-101.51351.5835-0.57%开放申购开放赎回
2015-12-091.52211.59210.77%开放申购开放赎回
2015-12-081.51051.5805-1.33%开放申购开放赎回
2015-12-071.53081.60081.86%开放申购开放赎回
2015-12-041.50281.5728-0.23%开放申购开放赎回
2015-12-031.50621.57622.01%开放申购开放赎回
2015-12-021.47651.5465-1.70%开放申购开放赎回
2015-12-011.50201.5720-1.86%开放申购开放赎回
2015-11-301.53051.60050.82%开放申购开放赎回
2015-11-271.51801.5880-5.07%开放申购开放赎回
2015-11-261.59901.6690-0.16%开放申购开放赎回
2015-11-251.60161.67162.57%开放申购开放赎回
2015-11-241.56151.63151.51%开放申购开放赎回
2015-11-231.53831.6083-1.32%开放申购开放赎回
2015-11-201.55891.62891.66%开放申购开放赎回
2015-11-191.53351.60353.80%开放申购开放赎回
-------------------------------------------------------------------------------- /crawl_fund/htmls/details/580005/24.txt: -------------------------------------------------------------------------------- 1 |
净值日期单位净值累计净值日增长率申购状态赎回状态分红送配
2015-11-181.47741.5474-1.85%开放申购开放赎回
2015-11-171.50531.5753-1.14%开放申购开放赎回
2015-11-161.52261.59262.19%开放申购开放赎回
2015-11-131.49001.5600-1.41%开放申购开放赎回
2015-11-121.51131.5813-0.20%开放申购开放赎回
2015-11-111.51431.58431.33%开放申购开放赎回
2015-11-101.49441.56440.40%开放申购开放赎回
2015-11-091.48841.55842.25%开放申购开放赎回
2015-11-061.45571.52573.65%开放申购开放赎回
2015-11-051.40451.4745-0.99%开放申购开放赎回
2015-11-041.41861.48864.55%开放申购开放赎回
2015-11-031.35691.42690.13%开放申购开放赎回
2015-11-021.35511.4251-2.07%开放申购开放赎回
2015-10-301.38371.45371.82%开放申购开放赎回
2015-10-291.35901.42902.40%开放申购开放赎回
2015-10-281.32711.3971-2.37%开放申购开放赎回
2015-10-271.35931.42930.72%开放申购开放赎回
2015-10-261.34961.41961.70%开放申购开放赎回
2015-10-231.32701.39702.84%开放申购开放赎回
2015-10-221.29031.36033.62%开放申购开放赎回
--------------------------------------------------------------------------------