├── setup.cfg
├── query
    ├── __init__.py
    └── movie.py
├── test
    ├── __init__.py
    ├── test_bookExport.py
    └── test_movieExport.py
├── utils
    ├── __init__.py
    └── logutil.py
├── MANIFEST.in
├── screenshot
    └── screenshot-output-result.png
├── requirements.txt
├── export_csv.py
├── setup.py
├── README.md
├── exporter
    ├── __init__.py
    ├── notes.py
    ├── status.py
    ├── book.py
    ├── music.py
    ├── movie.py
    └── __main__.py
├── .gitignore
├── douban-movie-export.user.js
└── douban-book-export.user.js


/setup.cfg:
--------------------------------------------------------------------------------
1 | [metadata]
2 | description-file = README.md
3 | 


--------------------------------------------------------------------------------
/query/__init__.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # -*- coding: utf-8 -*-
3 | 


--------------------------------------------------------------------------------
/test/__init__.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # -*- coding: utf-8 -*-
3 | 


--------------------------------------------------------------------------------
/utils/__init__.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # -*- coding: utf-8 -*-
3 | 


--------------------------------------------------------------------------------
/MANIFEST.in:
--------------------------------------------------------------------------------
1 | include README.md
2 | include LICENSE
3 | include screenshots/*


--------------------------------------------------------------------------------
/screenshot/screenshot-output-result.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/einverne/douban-export/HEAD/screenshot/screenshot-output-result.png


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
 1 | beautifulsoup4==4.7.1
 2 | certifi==2019.6.16
 3 | chardet==3.0.4
 4 | idna==2.8
 5 | lxml==4.3.4
 6 | PySocks==1.7.0
 7 | requests==2.22.0
 8 | soupsieve==1.9.1
 9 | urllib3==1.25.3
10 | Click


--------------------------------------------------------------------------------
/export_csv.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # -*- coding: utf-8 -*-
 3 | 
 4 | import csv
 5 | 
 6 | from exporter.movie import MovieExport
 7 | 
 8 | # This is a simple demo
 9 | if __name__ == '__main__':
10 |     m = MovieExport("einverne")
11 |     with open('movie_export.csv', mode='w') as movie_file:
12 |         writer = csv.writer(movie_file, delimiter=',', quotechar='"', quoting=csv.QUOTE_MINIMAL)
13 |         for wish in m.get_wish():
14 |             writer.writerow([wish.title, wish.url, wish.intro, wish.tags, wish.comment, wish.rating, wish.rating_date])


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # -*- coding: utf-8 -*-
 3 | 
 4 | import os
 5 | 
 6 | from setuptools import setup, find_packages
 7 | 
 8 | 
 9 | def read(fname):
10 |     return open(os.path.join(os.path.dirname(__file__), fname)).read()
11 | 
12 | 
13 | requirements = [
14 |     'Click',
15 |     'requests',
16 |     'bs4',
17 |     'lxml'
18 | ]
19 | 
20 | setup(
21 |     name="douban-export",
22 |     version="0.0.1",
23 |     author="Ein Verne",
24 |     author_email="git@einverne.info",
25 |     description="A tool to help export douban data ",
26 |     license="MIT",
27 |     keywords="douban, export, command, tools",
28 |     url="https://github.com/einverne/douban-export",
29 |     packages=find_packages(exclude=["test"]),
30 |     long_description=read('README.md'),
31 |     include_package_data=True,
32 |     install_requires=requirements,
33 |     entry_points={'console_scripts': ['douban=exporter.__main__:cli']},
34 | )
35 | 


--------------------------------------------------------------------------------
/utils/logutil.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # -*- coding: utf-8 -*-
 3 | import logging
 4 | import logging.handlers
 5 | from threading import Lock
 6 | 
 7 | cache = {}
 8 | lock = Lock()
 9 | 
10 | 
11 | def get_logger(logger_name):
12 |     # logging.getLogger 获取单例，多次调用会加多个handler，重复写的问题
13 |     global cache
14 |     with lock:
15 |         if not cache.get(logger_name):
16 |             cache[logger_name] = _get_logger(logger_name)
17 |     return cache.get(logger_name)
18 | 
19 | 
20 | def _get_logger(logger_name):
21 |     logger = logging.getLogger(logger_name)
22 |     level = "DEBUG"
23 |     logger.setLevel(level)
24 |     logger.propagate = False  # disable dup celery log
25 |     ch = logging.StreamHandler()
26 |     formatter = logging.Formatter('[%(asctime)s][%(name)s][%(levelname)6s] [%(pathname)s:%(lineno)s - %(funcName)s] '
27 |                                   '%(message)s')
28 |     ch.setFormatter(formatter)
29 |     logger.addHandler(ch)
30 | 
31 |     return logger
32 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | 豆瓣导出工具
 2 | 
 3 | 工具包含 Python 版本和 JavaScript 版本。
 4 | 
 5 | ## Python
 6 | Python 版本基于 Python 3.6.x ，其他版本暂未测试。
 7 | 
 8 | 主要实现:
 9 | 
10 | - 电影导出
11 | - 书籍导出
12 | - 音乐导出
13 | - 日记导出
14 | 
15 | ![result](screenshot/screenshot-output-result.png)
16 | 
17 | 关于豆瓣相册导出可以参考我 [这个](https://github.com/einverne/douban-dl) 项目。
18 | 
19 | ### 命令使用
20 | 
21 | 设置
22 | 
23 |     douban-export setup
24 |     
25 | 输入 uesr id，会将用户ID保存到 HOME 目录的 `~/.douban-export` 文件中，如果预先设置，一下的命令可以省略 `-u` 选项。
26 | 
27 | 导出电影
28 | 
29 |     douban-export movie -u einverne -t wish -o wish_movie.csv
30 | 
31 | 说明：
32 | 
33 | - `-t` 参数可以选择 `collect` 看过，`wish` 想看, `doing` 在看
34 | 
35 | 导出书籍
36 | 
37 |     douban-export book -u einverne -t wish -o wish_book.csv
38 |     
39 | 同理
40 | 
41 |     douban-export music -u einverne -t wish -o wish_music.csv
42 |     
43 | 
44 | ## JS
45 | 
46 | userscript 主要来自于
47 | 
48 | - douban-book-export.user.js
49 | - douban-movie-export.user.js
50 | 
51 | 分别来自于：
52 | 
53 | OpenUserJS
54 | 
55 | - <https://openuserjs.org/scripts/KiseXu/%E8%B1%86%E7%93%A3%E7%94%B5%E5%BD%B1%E5%AF%BC%E5%87%BA%E5%B7%A5%E5%85%B7/source>
56 | 
57 | DannyVim
58 | 
59 | - https://raw.githubusercontent.com/DannyVim/ToolsCollection/master/douban_book.js
60 | 
61 | 
62 | 
63 | ## reference
64 | 
65 | - <https://github.com/chishui/douban-movie>
66 | 
67 | 
68 | 
69 | 
70 | 
71 | 
72 | 


--------------------------------------------------------------------------------
/exporter/__init__.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # -*- coding: utf-8 -*-
 3 | import abc
 4 | import re
 5 | 
 6 | import requests
 7 | 
 8 | 
 9 | def r0(pattern, text):
10 |     m = re.search(pattern, text)
11 |     if m:
12 |         return m.group(0)
13 |     return ''
14 | 
15 | 
16 | class BaseReview:
17 |     def __init__(self):
18 |         self.title = ''
19 |         self.url = ''
20 |         self.id = ''
21 |         self.content = ''
22 |         self.publish_time = ''
23 |         self.useful_count = 0
24 |         self.useless_count = 0
25 |         self.total_count = 0
26 | 
27 |     @abc.abstractmethod
28 |     def parse(self, item):
29 |         raise NotImplementedError
30 | 
31 |     def update(self, raw_content):
32 |         self.content = raw_content['html']
33 |         if 'votes' in raw_content:
34 |             raw_votes = raw_content['votes']
35 |             if 'useful_count' in raw_votes:
36 |                 self.useful_count = raw_votes['useful_count']
37 |             if 'useless_count' in raw_votes:
38 |                 self.useless_count = raw_votes['useless_count']
39 |             if 'totalcount' in raw_votes:
40 |                 self.total_count = raw_votes['totalcount']
41 | 
42 |     def __str__(self):
43 |         s = []
44 |         for k in self.__dict__:
45 |             s.append("{key}={value}".format(key=k, value=self.__dict__.get(k)))
46 |         return ', '.join(s)
47 | 
48 |     def __repr__(self):
49 |         return self.__str__()
50 | 
51 | 
52 | COLLECT = 'collect'
53 | WISH = 'wish'
54 | DOING = 'do'
55 | 
56 | 
57 | class BaseExporter:
58 | 
59 |     def get_review_content(self, id):
60 |         url = 'https://www.douban.com/j/review/{}/full'.format(id)
61 |         r = requests.get(url, headers={
62 |             'Host': 'movie.douban.com',
63 |             'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.98 Safari/537.36'
64 |         })
65 |         return r.json()
66 | 


--------------------------------------------------------------------------------
/test/test_bookExport.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # -*- coding: utf-8 -*-
 3 | import unittest
 4 | from unittest import TestCase
 5 | 
 6 | from exporter import *
 7 | from exporter.book import BookExport
 8 | from utils.logutil import get_logger
 9 | 
10 | log = get_logger(__name__)
11 | 
12 | 
13 | class TestBookExport(TestCase):
14 | 
15 |     def setUp(self):
16 |         self.exporter = BookExport("einverne")
17 | 
18 |     def test_get_books(self):
19 |         books = self.exporter.get_books(COLLECT)
20 |         for book in books:
21 |             log.debug(book)
22 |             self.assertIsNotNone(book, "book object fetch failed")
23 |             self.assertNotEqual(book.title, '', 'book title fetch failed')
24 |             break
25 | 
26 |     def test_get_read(self):
27 |         readed = self.exporter.get_read()
28 |         for b in readed:
29 |             log.debug(b)
30 |             self.assertIsNotNone(b, 'book object fetch failed')
31 |             self.assertNotEqual(b.title, '', 'book title fetch failed')
32 |             break
33 | 
34 |     def test_get_wish(self):
35 |         wish = self.exporter.get_wish()
36 |         for b in wish:
37 |             log.debug(b)
38 |             self.assertIsNotNone(b, 'book object fetch failed')
39 |             self.assertNotEqual(b.title, '', 'book title fetch failed')
40 |             break
41 | 
42 |     def test_get_reading(self):
43 |         reading = self.exporter.get_reading()
44 |         for b in reading:
45 |             log.debug(b)
46 |             self.assertIsNotNone(b, 'book object fetch failed')
47 |             self.assertNotEqual(b.title, '', 'book title fetch failed')
48 |             break
49 | 
50 |     # def test_get_reviews(self):
51 |     # readed = self.exporter.get_reviews()
52 |     # for b in readed:
53 |     #     log.debug(b)
54 |     #     self.assertIsNotNone(b, 'book object fetch failed')
55 |     #     self.assertNotEqual(b.title, '', 'book title fetch failed')
56 |     #     break
57 | 
58 | 
59 | if __name__ == '__main__':
60 |     unittest.main()
61 | 


--------------------------------------------------------------------------------
/test/test_movieExport.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # -*- coding: utf-8 -*-
 3 | import unittest
 4 | from unittest import TestCase
 5 | 
 6 | from exporter import *
 7 | from exporter.movie import MovieExport
 8 | from utils.logutil import get_logger
 9 | 
10 | log = get_logger(__name__)
11 | 
12 | 
13 | class TestMovieExport(TestCase):
14 | 
15 |     def setUp(self):
16 |         self.exporter = MovieExport("einverne")
17 | 
18 |     def test_get_movies(self):
19 |         movies = self.exporter.get_movies(COLLECT)
20 |         for m in movies:
21 |             log.debug(m)
22 |             self.assertIsNotNone(m, 'content should not be none')
23 |             self.assertNotEqual(m.title, '', 'movie title fetch failed')
24 |             break
25 | 
26 |     def test_get_watched(self):
27 |         movies = self.exporter.get_watched()
28 |         for m in movies:
29 |             log.debug(m)
30 |             self.assertIsNotNone(m, 'content should not be none')
31 |             self.assertNotEqual(m.title, '', 'movie title fetch failed')
32 |             break
33 | 
34 |     def test_get_wish(self):
35 |         movies = self.exporter.get_wish()
36 |         for m in movies:
37 |             log.debug(m)
38 |             self.assertIsNotNone(m, 'content should not be none')
39 |             self.assertNotEqual(m.title, '', 'movie title fetch failed')
40 |             break
41 | 
42 |     def test_get_doing(self):
43 |         movies = self.exporter.get_doing()
44 |         for m in movies:
45 |             log.debug(m)
46 |             self.assertIsNotNone(m, 'content should not be none')
47 |             self.assertNotEqual(m.title, '', 'movie title fetch failed')
48 |             break
49 | 
50 |     def test_get_reviews(self):
51 |         movies = self.exporter.get_reviews()
52 |         for m in movies:
53 |             log.debug(m)
54 |             self.assertIsNotNone(m, 'content should not be none')
55 |             self.assertNotEqual(m.title, '', 'movie title fetch failed')
56 |             break
57 | 
58 |     # def test_get_doulist(self):
59 |     #     movies = self.exporter.get_doulist()
60 |     #     for m in movies:
61 |     #         log.debug(m)
62 |     #         self.assertIsNotNone(m, 'content should not be none')
63 |     #         self.assertNotEqual(m.title, '', 'movie title fetch failed')
64 |     #         break
65 | 
66 |     def test_get_review_content(self):
67 |         c = self.exporter.get_review_content('10124597')
68 |         log.debug(c)
69 |         self.assertIsNotNone(c)
70 | 
71 | 
72 | if __name__ == '__main__':
73 |     unittest.main()
74 | 


--------------------------------------------------------------------------------
/query/movie.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # -*- coding: utf-8 -*-
 3 | import json
 4 | 
 5 | import urllib.parse
 6 | 
 7 | import requests
 8 | from bs4 import BeautifulSoup, NavigableString
 9 | 
10 | from utils.logutil import  log
11 | 
12 | SEARCH_URL = 'https://movie.douban.com/j/subject_suggest?q='
13 | PAGE_URL = 'https://movie.douban.com/subject/%s/'
14 | 
15 | 
16 | 
17 | 
18 | class Movie:
19 |     def __init__(self):
20 |         self.id = ''
21 |         self.title = ''
22 |         self.score = 0
23 |         self.director = ''
24 |         self.actor = ''
25 |         self.year = ''
26 |         self.sub_title = ''
27 | 
28 |     def __str__(self):
29 |         text = '===============   Douban Movie   ===============\n' + \
30 |                'Title: ' + self.title + '\n' + \
31 |                'Score: ' + str(self.score) + '\n' + \
32 |                'Year: ' + self.year + '\n' + \
33 |                'Director: ' + self.director + '\n' + \
34 |                'Actors: ' + self.actor + '\n' + \
35 |                '================================================'
36 |         return text.encode('utf-8')
37 | 
38 | 
39 | def search(query_word):
40 |     query_word = urllib.parse.quote(query_word)
41 |     url = SEARCH_URL + query_word
42 |     r = requests.get(url)
43 |     if r.status_code != 200:
44 |         return
45 | 
46 |     data = r.text.encode('utf-8')
47 |     items = json.loads(data)
48 |     if len(items) == 0:
49 |         return
50 |     movies = []
51 |     for item in items:
52 |         if item['type'] != 'movie':
53 |             continue
54 |         movie = Movie()
55 |         movie.id = item['id']
56 |         movie.title = item['title']
57 |         movie.year = item['year']
58 |         movie.sub_title = item['sub_title']
59 |         movies.append(movie)
60 |     return movies
61 | 
62 | 
63 | def parse(movie):
64 |     url = PAGE_URL % movie.id
65 |     log.debug(url)
66 |     r = requests.get(url)
67 |     soup = BeautifulSoup(r.text.encode('utf-8'), 'lxml')
68 |     movie.score = soup.find('strong', 'rating_num').text
69 |     info = soup.find('div', {'id': 'info'})
70 |     for linebreak in info.find_all('br'):
71 |         linebreak.extract()
72 |     for span in info.contents:
73 |         if isinstance(span, NavigableString):
74 |             continue
75 |         if span.contents[0]:
76 |             if span.contents[0].string == u'导演':
77 |                 if isinstance(span.contents[1], NavigableString):
78 |                     movie.director = span.contents[2].text
79 |             elif span.contents[0].string == u'主演':
80 |                 if isinstance(span.contents[1], NavigableString):
81 |                     movie.actor = span.contents[2].text
82 |     print(movie)
83 | 
84 | 
85 | def get_movie(text):
86 |     movies = search(text)
87 |     if movies and len(movies):
88 |         parse(movies[0])
89 |     else:
90 |         print('cound not find movie: ' + text)
91 | 
92 | 
93 | if __name__ == '__main__':
94 |     get_movie("zootopia")


--------------------------------------------------------------------------------
/exporter/notes.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | # -*- coding: utf-8 -*-
  3 | import requests
  4 | from bs4 import BeautifulSoup
  5 | 
  6 | from exporter import r0
  7 | 
  8 | 
  9 | class NoteInfo:
 10 |     def __init__(self):
 11 |         self.title = ''
 12 |         self.url = ''
 13 |         self.id = ''
 14 |         self.content = ''
 15 |         self.publish_time = ''
 16 | 
 17 |     @classmethod
 18 |     def parse(cls, item):
 19 |         instance = cls()
 20 |         instance.title = item.select('h3')[0].text.strip()
 21 |         instance.url = item['data-url']
 22 |         instance.id = r0(r'\d+', instance.url)
 23 |         instance.publish_time = item.select('.pub-date')[0].text.strip()
 24 |         return instance
 25 | 
 26 |     def __str__(self):
 27 |         s = []
 28 |         for k in self.__dict__:
 29 |             s.append("{key}={value}".format(key=k, value=self.__dict__.get(k)))
 30 |         return ', '.join(s)
 31 | 
 32 |     def __repr__(self):
 33 |         return self.__str__()
 34 | 
 35 | 
 36 | class NoteExport:
 37 |     """
 38 |     遍历网页的问题可能被豆瓣反爬虫机制伤及，如果能够直接从接口 dump 数据就比较快
 39 |     """
 40 |     BASE_URL = 'https://www.douban.com/people/{}'
 41 | 
 42 |     def __init__(self, nickname):
 43 |         self.user_url = NoteExport.BASE_URL.format(nickname)
 44 | 
 45 |     def get_notes(self):
 46 |         """
 47 |         Get one's all notes
 48 | 
 49 |         https://music.douban.com/people/einverne/reviews?start=0
 50 |         https://music.douban.com/j/review/10000057/fullinfo?show_works=False
 51 |         """
 52 |         start = 0
 53 |         while True:
 54 |             note_list = self.__get_notes_list(start)
 55 |             step = len(note_list)
 56 |             if step == 0:
 57 |                 break
 58 |             for note in note_list:
 59 |                 note_info = NoteInfo.parse(note)
 60 |                 content = self.get_note_content(note_info.id)
 61 |                 bs = BeautifulSoup(content, 'html.parser')
 62 |                 note_info.content = bs.text
 63 |                 yield note_info
 64 |             if step < 10:
 65 |                 break
 66 |             start += step
 67 | 
 68 |     def __get_notes_list(self, start=0):
 69 |         url = self.user_url + '/notes'
 70 |         r = requests.get(url, params={
 71 |             'type': 'note',
 72 |             'start': start
 73 |         }, headers={
 74 |             'Host': 'www.douban.com',
 75 |             'Referer': self.user_url + '/notes?start=0&type=note',
 76 |             'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.98 Safari/537.36'
 77 |         })
 78 |         soup = BeautifulSoup(r.text, 'html.parser')
 79 |         return soup.select('.note-container')
 80 | 
 81 |     def get_note_content(self, id='721551646'):
 82 |         """
 83 |         Get all note content by pass ID, return the content str
 84 |             https://www.douban.com/j/note/{id}/full
 85 |         :param id:
 86 |         :return:
 87 |         """
 88 |         url = "https://www.douban.com/j/note/{}/full".format(id)
 89 |         r = requests.get(url, headers={
 90 |             'Accept': 'application/json, text/javascript, */*; q=0.01',
 91 |             'Accept-Encoding': 'gzip',
 92 |             'Host': 'www.douban.com',
 93 |             'Referer': self.user_url,
 94 |             'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/75.0.3770.142 Safari/537.36'
 95 |         })
 96 |         return r.json()['html']
 97 | 
 98 | 
 99 | if __name__ == '__main__':
100 |     m = NoteExport('einverne')
101 |     for note in m.get_notes():
102 |         print(note)
103 | 


--------------------------------------------------------------------------------
/exporter/status.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | # -*- coding: utf-8 -*-
  3 | import re
  4 | 
  5 | import requests
  6 | from bs4 import BeautifulSoup
  7 | 
  8 | 
  9 | class MusicInfo:
 10 |     def __init__(self):
 11 |         self.title = ''
 12 |         self.url = ''
 13 |         self.intro = ''
 14 |         self.tags = ''
 15 |         self.comment = ''
 16 |         self.rating_date = ''
 17 |         self.rating = ''
 18 | 
 19 |     @classmethod
 20 |     def parse(cls, item):
 21 |         instance = cls()
 22 |         instance.title = item.select('.title a')[0].text.strip()
 23 |         instance.url = item.select('.title a')[0]['href']
 24 |         instance.intro = item.select('.intro')[0].text.strip()
 25 |         instance.rating_date = item.select('.date')[0].text.strip()
 26 |         if len(item.select('.date span')) > 0:
 27 |             instance.rating = item.select('.date span')[0]['class'][0][6]
 28 |         if len(item.select('.tags')) > 0:
 29 |             instance.tags = item.select('.tags')[0].text
 30 |         if len(item.select('.comment')) > 0:
 31 |             instance.comment = item.select('.comment')[0].text.strip()
 32 |         return instance
 33 | 
 34 |     def __str__(self):
 35 |         s = []
 36 |         for k in self.__dict__:
 37 |             s.append("{key}={value}".format(key=k, value=self.__dict__.get(k)))
 38 |         return ', '.join(s)
 39 | 
 40 |     def __repr__(self):
 41 |         return self.__str__()
 42 | 
 43 | 
 44 | class StatusExport:
 45 |     """
 46 |     遍历网页的问题可能被豆瓣反爬虫机制伤及，如果能够直接从接口 dump 数据就比较快
 47 |     """
 48 |     BASE_URL = 'https://www.douban.com/people/{}/statuses'
 49 | 
 50 |     def __init__(self, nickname):
 51 |         self.user_url = StatusExport.BASE_URL.format(nickname)
 52 | 
 53 |     def get_status(self):
 54 |         """
 55 |         https://music.douban.com/people/einverne/collect
 56 |         第 1 页 https://music.douban.com/people/einverne/collect?start=0&sort=time&rating=all&filter=all&mode=grid
 57 |         第 2 页 https://music.douban.com/people/einverne/collect?start=15&sort=time&rating=all&filter=all&mode=grid
 58 |         第 3 页 https://music.douban.com/people/einverne/collect?start=30&sort=time&rating=all&filter=all&mode=grid
 59 |         ...
 60 |         https://music.douban.com/people/einverne/collect?start=60&sort=time&rating=all&filter=all&mode=grid
 61 |         """
 62 |         start = 1
 63 |         while True:
 64 |             item_list = self.__get_status_by_page(start)
 65 |             step = len(item_list)
 66 |             if step == 0:
 67 |                 break
 68 |             for item in item_list:
 69 |                 yield MusicInfo.parse(item)
 70 |             if step < 30:
 71 |                 break
 72 |             start += step
 73 | 
 74 |     def __get_status_by_page(self, page_num=1):
 75 |         url = self.user_url
 76 |         r = requests.get(url, params={
 77 |             'p': page_num,
 78 |         }, headers={
 79 |             'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.98 Safari/537.36',
 80 |             'Accept-Encoding': 'gzip, deflate, br',
 81 |             'Referer': url,
 82 |             'Host': 'www.douban.com'
 83 |         })
 84 |         # res = brotli.decompress(r.content)
 85 |         soup = BeautifulSoup(r.text, 'html.parser')
 86 |         item_list = soup.select('.new-status status-wrapper')
 87 |         return item_list
 88 | 
 89 |     def get_watched(self):
 90 |         return self.get_status()
 91 | 
 92 |     def get_wish(self):
 93 |         """https://music.douban.com/people/einverne/wish"""
 94 |         return self.get_status(self.WISH)
 95 | 
 96 |     def get_doing(self):
 97 |         """https://music.douban.com/people/einverne/do"""
 98 |         return self.get_status(self.DOING)
 99 | 
100 | 
101 | 
102 | if __name__ == '__main__':
103 |     m = StatusExport('einverne')
104 |     l = m.get_status()
105 |     for item in l:
106 |         print(item)
107 |     wishes = m.get_wish()
108 |     for wish in wishes:
109 |         print(wish)
110 |     reviews = m.get_reviews()
111 |     for r in reviews:
112 |         print(r)
113 | 


--------------------------------------------------------------------------------
/exporter/book.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | # -*- coding: utf-8 -*-
  3 | 
  4 | from bs4 import BeautifulSoup
  5 | 
  6 | from exporter import *
  7 | 
  8 | 
  9 | class BookInfo(object):
 10 |     def __init__(self):
 11 |         self.title = ''
 12 |         self.url = ''
 13 |         self.intro = ''
 14 |         self.tags = ''
 15 |         self.comment = ''
 16 |         self.rating = ''
 17 |         self.rating_date = ''
 18 | 
 19 |     @classmethod
 20 |     def parse(cls, item):
 21 |         instance = cls()
 22 |         instance.title = item.select('.title a')[0].text.strip()
 23 |         instance.url = item.select('.title a')[0]['href']
 24 |         instance.intro = item.select('.intro')[0].text.strip()
 25 |         instance.rating_date = item.select('.date')[0].text.strip()
 26 |         if len(item.select('.date span')) > 0:
 27 |             instance.rating = item.select('.date span')[0]['class'][0][6]
 28 |         if len(item.select('.tags')) > 0:
 29 |             instance.tags = item.select('.tags')[0].text
 30 |         if len(item.select('.comment')) > 0:
 31 |             instance.comment = item.select('.comment')[0].text.strip()
 32 |         return instance
 33 | 
 34 |     def __str__(self):
 35 |         s = []
 36 |         for k in self.__dict__:
 37 |             s.append("{key}={value}".format(key=k, value=self.__dict__.get(k)))
 38 |         return ', '.join(s)
 39 | 
 40 |     def __repr__(self):
 41 |         return self.__str__()
 42 | 
 43 | 
 44 | class BookReview(BaseReview):
 45 | 
 46 |     def parse(self, item):
 47 |         self.title = item.select('h3')[0].text.strip()
 48 |         self.url = item.select('h3 > a')[0]['href']
 49 |         self.id = r0(r'\d+', self.url)
 50 |         return self
 51 | 
 52 | 
 53 | class BookExport(BaseExporter):
 54 |     BASE_URL = 'https://book.douban.com/people/{}'
 55 | 
 56 |     def __init__(self, nickname):
 57 |         self.user_url = BookExport.BASE_URL.format(nickname)
 58 | 
 59 |     def get_books(self, path=COLLECT):
 60 |         start = 0
 61 |         while True:
 62 |             item_list = self.__get_book_list(path, start)
 63 |             step = len(item_list)
 64 |             if step == 0:
 65 |                 break
 66 |             for item in item_list:
 67 |                 yield BookInfo.parse(item)
 68 |             if step < 30:
 69 |                 break
 70 |             start += step
 71 | 
 72 |     def __get_book_list(self, path=COLLECT, start=0):
 73 |         url = self.user_url + '/' + path
 74 |         r = requests.get(url, params={
 75 |             'start': start,
 76 |             'sort': 'time',
 77 |             'rating': 'all',
 78 |             'filter': 'all',
 79 |             'mode': 'list'
 80 |         }, headers={
 81 |             'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.98 Safari/537.36',
 82 |             'Referer': url + '?start=0&sort=time&rating=all&filter=all&mode=list',
 83 |             'Host': 'book.douban.com'
 84 |         })
 85 |         soup = BeautifulSoup(r.text, 'html.parser')
 86 |         return soup.select('.item')
 87 | 
 88 |     def get_read(self):
 89 |         return self.get_books(COLLECT)
 90 | 
 91 |     def get_wish(self):
 92 |         return self.get_books(WISH)
 93 | 
 94 |     def get_reading(self):
 95 |         return self.get_books(DOING)
 96 | 
 97 |     def get_reviews(self):
 98 |         start = 0
 99 |         while True:
100 |             reviews_list = self.__get_reviews_list(start)
101 |             step = len(reviews_list)
102 |             if step == 0:
103 |                 break
104 |             for review in reviews_list:
105 |                 r = BookReview()
106 |                 r.parse(review)
107 |                 content = self.get_review_content(r.id)
108 |                 r.update(content)
109 |                 yield r
110 |             start += step
111 | 
112 |     def __get_reviews_list(self, start=0):
113 |         url = self.user_url + '/reviews'
114 |         r = requests.get(url, params={
115 |             'start': start
116 |         }, headers={
117 |             'Host': 'book.douban.com',
118 |             'Referer': url,
119 |             'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/75.0.3770.142 Safari/537.36'
120 |         })
121 |         soup = BeautifulSoup(r.text, 'html.parser')
122 |         return soup.select('.tlst')
123 | 
124 | 
125 | if __name__ == '__main__':
126 |     b = BookExport('einverne')
127 |     for review in b.get_reviews():
128 |         print(review)
129 |     # for book in b.get_books():
130 |     #     print(book.title)
131 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | *.csv
  2 | *.log
  3 | 
  4 | ### Intellij+all ###
  5 | # Covers JetBrains IDEs: IntelliJ, RubyMine, PhpStorm, AppCode, PyCharm, CLion, Android Studio and WebStorm
  6 | # Reference: https://intellij-support.jetbrains.com/hc/en-us/articles/206544839
  7 | 
  8 | # User-specific stuff
  9 | .idea/**/workspace.xml
 10 | .idea/**/tasks.xml
 11 | .idea/**/usage.statistics.xml
 12 | .idea/**/dictionaries
 13 | .idea/**/shelf
 14 | 
 15 | # Generated files
 16 | .idea/**/contentModel.xml
 17 | 
 18 | # Sensitive or high-churn files
 19 | .idea/**/dataSources/
 20 | .idea/**/dataSources.ids
 21 | .idea/**/dataSources.local.xml
 22 | .idea/**/sqlDataSources.xml
 23 | .idea/**/dynamic.xml
 24 | .idea/**/uiDesigner.xml
 25 | .idea/**/dbnavigator.xml
 26 | 
 27 | # Gradle
 28 | .idea/**/gradle.xml
 29 | .idea/**/libraries
 30 | 
 31 | # Gradle and Maven with auto-import
 32 | # When using Gradle or Maven with auto-import, you should exclude module files,
 33 | # since they will be recreated, and may cause churn.  Uncomment if using
 34 | # auto-import.
 35 | # .idea/modules.xml
 36 | # .idea/*.iml
 37 | # .idea/modules
 38 | 
 39 | # CMake
 40 | cmake-build-*/
 41 | 
 42 | # Mongo Explorer plugin
 43 | .idea/**/mongoSettings.xml
 44 | 
 45 | # File-based project format
 46 | *.iws
 47 | 
 48 | # IntelliJ
 49 | out/
 50 | 
 51 | # mpeltonen/sbt-idea plugin
 52 | .idea_modules/
 53 | 
 54 | # JIRA plugin
 55 | atlassian-ide-plugin.xml
 56 | 
 57 | # Cursive Clojure plugin
 58 | .idea/replstate.xml
 59 | 
 60 | # Crashlytics plugin (for Android Studio and IntelliJ)
 61 | com_crashlytics_export_strings.xml
 62 | crashlytics.properties
 63 | crashlytics-build.properties
 64 | fabric.properties
 65 | 
 66 | # Editor-based Rest Client
 67 | .idea/httpRequests
 68 | 
 69 | # Android studio 3.1+ serialized cache file
 70 | .idea/caches/build_file_checksums.ser
 71 | 
 72 | # JetBrains templates
 73 | **___jb_tmp___
 74 | 
 75 | ### Intellij+all Patch ###
 76 | # Ignores the whole .idea folder and all .iml files
 77 | # See https://github.com/joeblau/gitignore.io/issues/186 and https://github.com/joeblau/gitignore.io/issues/360
 78 | 
 79 | .idea/
 80 | 
 81 | # Reason: https://github.com/joeblau/gitignore.io/issues/186#issuecomment-249601023
 82 | 
 83 | *.iml
 84 | modules.xml
 85 | .idea/misc.xml
 86 | *.ipr
 87 | 
 88 | # Sonarlint plugin
 89 | .idea/sonarlint
 90 | 
 91 | ### Python ###
 92 | # Byte-compiled / optimized / DLL files
 93 | __pycache__/
 94 | *.py[cod]
 95 | *$py.class
 96 | 
 97 | # C extensions
 98 | *.so
 99 | 
100 | # Distribution / packaging
101 | .Python
102 | build/
103 | develop-eggs/
104 | dist/
105 | downloads/
106 | eggs/
107 | .eggs/
108 | lib/
109 | lib64/
110 | parts/
111 | sdist/
112 | var/
113 | wheels/
114 | pip-wheel-metadata/
115 | share/python-wheels/
116 | *.egg-info/
117 | .installed.cfg
118 | *.egg
119 | MANIFEST
120 | 
121 | # PyInstaller
122 | #  Usually these files are written by a python script from a template
123 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
124 | *.manifest
125 | *.spec
126 | 
127 | # Installer logs
128 | pip-log.txt
129 | pip-delete-this-directory.txt
130 | 
131 | # Unit test / coverage reports
132 | htmlcov/
133 | .tox/
134 | .nox/
135 | .coverage
136 | .coverage.*
137 | .cache
138 | nosetests.xml
139 | coverage.xml
140 | *.cover
141 | .hypothesis/
142 | .pytest_cache/
143 | 
144 | # Translations
145 | *.mo
146 | *.pot
147 | 
148 | # Django stuff:
149 | *.log
150 | local_settings.py
151 | db.sqlite3
152 | 
153 | # Flask stuff:
154 | instance/
155 | .webassets-cache
156 | 
157 | # Scrapy stuff:
158 | .scrapy
159 | 
160 | # Sphinx documentation
161 | docs/_build/
162 | 
163 | # PyBuilder
164 | target/
165 | 
166 | # Jupyter Notebook
167 | .ipynb_checkpoints
168 | 
169 | # IPython
170 | profile_default/
171 | ipython_config.py
172 | 
173 | # pyenv
174 | .python-version
175 | 
176 | # pipenv
177 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
178 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
179 | #   having no cross-platform support, pipenv may install dependencies that don’t work, or not
180 | #   install all needed dependencies.
181 | #Pipfile.lock
182 | 
183 | # celery beat schedule file
184 | celerybeat-schedule
185 | 
186 | # SageMath parsed files
187 | *.sage.py
188 | 
189 | # Environments
190 | .env
191 | .venv
192 | env/
193 | venv/
194 | ENV/
195 | env.bak/
196 | venv.bak/
197 | 
198 | # Spyder project settings
199 | .spyderproject
200 | .spyproject
201 | 
202 | # Rope project settings
203 | .ropeproject
204 | 
205 | # mkdocs documentation
206 | /site
207 | 
208 | # mypy
209 | .mypy_cache/
210 | .dmypy.json
211 | dmypy.json
212 | 
213 | # Pyre type checker
214 | .pyre/
215 | 


--------------------------------------------------------------------------------
/exporter/music.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | # -*- coding: utf-8 -*-
  3 | 
  4 | from bs4 import BeautifulSoup
  5 | 
  6 | from exporter import *
  7 | 
  8 | 
  9 | class MusicInfo:
 10 |     def __init__(self):
 11 |         self.title = ''
 12 |         self.url = ''
 13 |         self.intro = ''
 14 |         self.tags = ''
 15 |         self.comment = ''
 16 |         self.rating_date = ''
 17 |         self.rating = ''
 18 | 
 19 |     @classmethod
 20 |     def parse(cls, item):
 21 |         instance = cls()
 22 |         instance.title = item.select('.title a')[0].text.strip()
 23 |         instance.url = item.select('.title a')[0]['href']
 24 |         instance.intro = item.select('.intro')[0].text.strip()
 25 |         instance.rating_date = item.select('.date')[0].text.strip()
 26 |         if len(item.select('.date span')) > 0:
 27 |             instance.rating = item.select('.date span')[0]['class'][0][6]
 28 |         if len(item.select('.tags')) > 0:
 29 |             instance.tags = item.select('.tags')[0].text
 30 |         if len(item.select('.comment')) > 0:
 31 |             instance.comment = item.select('.comment')[0].text.strip()
 32 |         return instance
 33 | 
 34 |     def __str__(self):
 35 |         s = []
 36 |         for k in self.__dict__:
 37 |             s.append("{key}={value}".format(key=k, value=self.__dict__.get(k)))
 38 |         return ', '.join(s)
 39 | 
 40 |     def __repr__(self):
 41 |         return self.__str__()
 42 | 
 43 | 
 44 | class MusicReview(BaseReview):
 45 | 
 46 |     def parse(self, item):
 47 |         self.title = item.select('h3')[0].text.strip()
 48 |         self.url = item.select('h3 > a')[0]['href']
 49 |         self.id = r0(r'\d+', self.url)
 50 |         self.publish_time = item.select('.review-create-time')[0].text.strip()
 51 |         return self
 52 | 
 53 | 
 54 | class MusicExport(BaseExporter):
 55 |     """
 56 |     遍历网页的问题可能被豆瓣反爬虫机制伤及，如果能够直接从接口 dump 数据就比较快
 57 |     """
 58 |     BASE_URL = 'https://music.douban.com/people/{}'
 59 | 
 60 |     def __init__(self, nickname):
 61 |         self.user_url = MusicExport.BASE_URL.format(nickname)
 62 | 
 63 |     def get_musics(self, path=COLLECT):
 64 |         """
 65 |         https://music.douban.com/people/einverne/collect
 66 |         第 1 页 https://music.douban.com/people/einverne/collect?start=0&sort=time&rating=all&filter=all&mode=grid
 67 |         第 2 页 https://music.douban.com/people/einverne/collect?start=15&sort=time&rating=all&filter=all&mode=grid
 68 |         第 3 页 https://music.douban.com/people/einverne/collect?start=30&sort=time&rating=all&filter=all&mode=grid
 69 |         ...
 70 |         https://music.douban.com/people/einverne/collect?start=60&sort=time&rating=all&filter=all&mode=grid
 71 |         """
 72 |         start = 0
 73 |         while True:
 74 |             item_list = self.__get_music_list(path, start)
 75 |             step = len(item_list)
 76 |             if step == 0:
 77 |                 break
 78 |             for item in item_list:
 79 |                 yield MusicInfo.parse(item)
 80 |             if step < 30:
 81 |                 break
 82 |             start += step
 83 | 
 84 |     def __get_music_list(self, path='collect', start=0):
 85 |         url = self.user_url + '/' + path
 86 |         r = requests.get(url, params={
 87 |             'start': start,
 88 |             'sort': 'time',
 89 |             'rating': 'all',
 90 |             'filter': 'all',
 91 |             'mode': 'list'
 92 |         }, headers={
 93 |             'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.98 Safari/537.36',
 94 |             'Accept-Encoding': 'gzip, deflate, br',
 95 |             'Referer': url + '?start=0&sort=time&rating=all&filter=all&mode=grid',
 96 |             'Host': 'music.douban.com'
 97 |         })
 98 |         # res = brotli.decompress(r.content)
 99 |         soup = BeautifulSoup(r.text, 'html.parser')
100 |         item_list = soup.select('.item')
101 |         return item_list
102 | 
103 |     def get_listened(self):
104 |         return self.get_musics()
105 | 
106 |     def get_wish(self):
107 |         """https://music.douban.com/people/einverne/wish"""
108 |         return self.get_musics(WISH)
109 | 
110 |     def get_doing(self):
111 |         """https://music.douban.com/people/einverne/do"""
112 |         return self.get_musics(DOING)
113 | 
114 |     def get_reviews(self):
115 |         """
116 |         Get one's all music reviews
117 | 
118 |         https://music.douban.com/people/einverne/reviews?start=0
119 |         https://music.douban.com/j/review/10000057/fullinfo?show_works=False
120 |         """
121 |         start = 0
122 |         while True:
123 |             reviews_list = self.__get_reviews_list(start)
124 |             step = len(reviews_list)
125 |             if step == 0:
126 |                 break
127 |             for review in reviews_list:
128 |                 r = MusicReview()
129 |                 r.parse(review)
130 |                 content = self.get_review_content(r.id)
131 |                 r.update(content)
132 |                 yield r
133 |             start += step
134 | 
135 |     def __get_reviews_list(self, start=0):
136 |         url = self.user_url + '/reviews'
137 |         r = requests.get(url, params={
138 |             'start': start
139 |         }, headers={
140 |             'Host': 'music.douban.com',
141 |             'Referer': self.user_url + '/reviews?start=10',
142 |             'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.98 Safari/537.36'
143 |         })
144 |         soup = BeautifulSoup(r.text, 'html.parser')
145 |         return soup.select('.comment-item')
146 | 
147 |     def get_doulist(self):
148 |         """
149 |         创建的豆列 https://www.douban.com/people/einverne/doulists/all?start=20&tag=
150 |         关注的豆列 https://www.douban.com/people/einverne/doulists/collect?start=20
151 |         """
152 |         pass
153 | 
154 | 
155 | if __name__ == '__main__':
156 |     m = MusicExport('einverne')
157 |     # l = m.get_musics()
158 |     # for item in l:
159 |     #     print(item)
160 |     # wishes = m.get_wish()
161 |     # for wish in wishes:
162 |     #     print(wish)
163 |     reviews = m.get_reviews()
164 |     for r in reviews:
165 |         print(r)
166 | 


--------------------------------------------------------------------------------
/exporter/movie.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | # -*- coding: utf-8 -*-
  3 | 
  4 | from bs4 import BeautifulSoup
  5 | 
  6 | from exporter import *
  7 | 
  8 | """
  9 | <li class="item" id="list27617348">
 10 | <div class="item-show">
 11 | <div class="title">
 12 | <a href="https://movie.douban.com/subject/27617348/">
 13 |                 门锁 / 도어락
 14 |             </a>
 15 | </div>
 16 | <div class="date">
 17 | <span class="rating4-t"></span>  
 18 |         2019-03-16
 19 |         </div>
 20 | </div>
 21 | <div class="hide" id="grid27617348">
 22 | <div class="grid-date">
 23 | <span class="intro">2018-12-05(韩国) / 孔晓振 / 金叡园 / 金圣武 / 赵福来 / 李家燮 / 李天熙 / 金在华 / 金光奎 / 韩智恩 / 车烨 / 裴明真 / 郑钟宇 / 李钟求 / 尹钟硕 / 李相熹 / 韩国 / 李权 / 102分钟 / 门锁 / 悬疑 / 惊悚 / 朴正熙 Jeong-hee Park / 李权 Kwon Lee / 阿尔贝托·马里尼 Alberto Marini / 韩语</span><br/>
 24 | <span class="tags">标签: 犯罪 惊悚悬疑 剧情 悬疑 韩国 恐怖 2018 惊悚 惊悚片</span>
 25 | </div>
 26 | </div>
 27 | </li>
 28 | """
 29 | 
 30 | 
 31 | class MovieInfo:
 32 |     def __init__(self):
 33 |         self.title = ''
 34 |         self.url = ''
 35 |         self.intro = ''
 36 |         self.tags = ''
 37 |         self.comment = ''
 38 |         self.rating_date = ''
 39 |         self.rating = ''
 40 | 
 41 |     @classmethod
 42 |     def parse(cls, item):
 43 |         instance = cls()
 44 |         instance.title = item.select('.title a')[0].text.strip()
 45 |         instance.url = item.select('.title a')[0]['href']
 46 |         instance.intro = item.select('.intro')[0].text.strip()
 47 |         instance.rating_date = item.select('.date')[0].text.strip()
 48 |         if len(item.select('.date span')) > 0:
 49 |             instance.rating = item.select('.date span')[0]['class'][0][6]
 50 |         if len(item.select('.tags')) > 0:
 51 |             instance.tags = item.select('.tags')[0].text
 52 |         if len(item.select('.comment')) > 0:
 53 |             instance.comment = item.select('.comment')[0].text.strip()
 54 |         return instance
 55 | 
 56 |     def __str__(self):
 57 |         s = []
 58 |         for k in self.__dict__:
 59 |             s.append("{key}={value}".format(key=k, value=self.__dict__.get(k)))
 60 |         return ', '.join(s)
 61 | 
 62 |     def __repr__(self):
 63 |         return self.__str__()
 64 | 
 65 | 
 66 | class MovieReview(BaseReview):
 67 | 
 68 |     def parse(self, item):
 69 |         self.title = item.select('h3')[0].text.strip()
 70 |         self.url = item.select('h3 > a')[0]['href']
 71 |         self.id = r0(r'\d+', self.url)
 72 |         return self
 73 | 
 74 | 
 75 | class MovieExport(BaseExporter):
 76 |     """
 77 |     遍历网页的问题可能被豆瓣反爬虫机制伤及，如果能够直接从接口 dump 数据就比较快
 78 |     """
 79 |     BASE_URL = 'https://movie.douban.com/people/{}'
 80 | 
 81 |     def __init__(self, nickname):
 82 |         self.user_url = MovieExport.BASE_URL.format(nickname)
 83 | 
 84 |     def get_movies(self, path=COLLECT):
 85 |         """
 86 |         https://movie.douban.com/people/einverne/collect
 87 |         第 1 页 https://movie.douban.com/people/einverne/collect?start=0&sort=time&rating=all&filter=all&mode=grid
 88 |         第 2 页 https://movie.douban.com/people/einverne/collect?start=15&sort=time&rating=all&filter=all&mode=grid
 89 |         第 3 页 https://movie.douban.com/people/einverne/collect?start=30&sort=time&rating=all&filter=all&mode=grid
 90 |         ...
 91 |         https://movie.douban.com/people/einverne/collect?start=60&sort=time&rating=all&filter=all&mode=grid
 92 |         """
 93 |         start = 0
 94 |         while True:
 95 |             item_list = self.__get_movie_list(path, start)
 96 |             step = len(item_list)
 97 |             if step == 0:
 98 |                 break
 99 |             for item in item_list:
100 |                 yield MovieInfo.parse(item)
101 |             if step < 30:
102 |                 break
103 |             start += step
104 | 
105 |     def __get_movie_list(self, path='collect', start=0):
106 |         url = self.user_url + '/' + path
107 |         r = requests.get(url, params={
108 |             'start': start,
109 |             'sort': 'time',
110 |             'rating': 'all',
111 |             'filter': 'all',
112 |             'mode': 'list'
113 |         }, headers={
114 |             'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.98 Safari/537.36',
115 |             'Accept-Encoding': 'gzip, deflate, br',
116 |             'Referer': url + '?start=0&sort=time&rating=all&filter=all&mode=grid',
117 |             'Host': 'movie.douban.com'
118 |         })
119 |         # res = brotli.decompress(r.content)
120 |         soup = BeautifulSoup(r.text, 'html.parser')
121 |         item_list = soup.select('.item')
122 |         return item_list
123 | 
124 |     def get_watched(self):
125 |         return self.get_movies()
126 | 
127 |     def get_wish(self):
128 |         """https://movie.douban.com/people/einverne/wish"""
129 |         return self.get_movies(WISH)
130 | 
131 |     def get_doing(self):
132 |         """https://movie.douban.com/people/einverne/do"""
133 |         return self.get_movies(DOING)
134 | 
135 |     def get_reviews(self):
136 |         """
137 |         Get one's all movie reviews
138 | 
139 |         https://movie.douban.com/people/einverne/reviews?start=0
140 |         https://movie.douban.com/j/review/10000057/fullinfo?show_works=False
141 |         """
142 |         start = 0
143 |         while True:
144 |             reviews_list = self.__get_reviews_list(start)
145 |             step = len(reviews_list)
146 |             if step == 0:
147 |                 break
148 |             for review in reviews_list:
149 |                 r = MovieReview()
150 |                 r.parse(review)
151 |                 raw_content = self.get_review_content(r.id)
152 |                 r.update(raw_content)
153 |                 yield r
154 |             start += step
155 | 
156 |     def __get_reviews_list(self, start=0):
157 |         url = self.user_url + '/reviews'
158 |         r = requests.get(url, params={
159 |             'start': start
160 |         }, headers={
161 |             'Host': 'movie.douban.com',
162 |             'Referer': self.user_url + '/reviews?start=10',
163 |             'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.98 Safari/537.36'
164 |         })
165 |         soup = BeautifulSoup(r.text, 'html.parser')
166 |         return soup.select('.tlst')
167 | 
168 |     def get_doulist(self):
169 |         """
170 |         创建的豆列 https://www.douban.com/people/einverne/doulists/all?start=20&tag=
171 |         关注的豆列 https://www.douban.com/people/einverne/doulists/collect?start=20
172 |         """
173 |         pass
174 | 
175 | 
176 | if __name__ == '__main__':
177 |     m = MovieExport('einverne')
178 |     # l = m.get_movies()
179 |     # for item in l:
180 |     #     print(item)
181 |     # wishes = m.get_wish()
182 |     # for wish in wishes:
183 |     #     print(wish)
184 |     reviews = m.get_reviews()
185 |     for r in reviews:
186 |         print(r)
187 | 


--------------------------------------------------------------------------------
/exporter/__main__.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | # -*- coding: utf-8 -*-
  3 | 
  4 | import codecs
  5 | import csv
  6 | import os
  7 | from configparser import ConfigParser
  8 | 
  9 | import click
 10 | 
 11 | from exporter.book import BookExport
 12 | from exporter.movie import MovieExport
 13 | from exporter.music import MusicExport
 14 | from exporter.notes import NoteExport
 15 | 
 16 | CONFIG_PATH = os.path.join(os.environ.get('HOME'), '.douban-export')
 17 | 
 18 | 
 19 | def read_config():
 20 |     config = ConfigParser()
 21 |     if os.path.exists(CONFIG_PATH):
 22 |         config.read(CONFIG_PATH)
 23 |     return config
 24 | 
 25 | 
 26 | @click.group(context_settings=dict(help_option_names=['-h', '--help']))
 27 | def cli():
 28 |     pass
 29 | 
 30 | 
 31 | def save_movie(l, writer):
 32 |     for m in l:
 33 |         click.echo(m.title)
 34 |         writer.writerow([
 35 |             m.title,
 36 |             m.url,
 37 |             m.intro,
 38 |             m.tags,
 39 |             m.comment,
 40 |             m.rating_date,
 41 |             m.rating
 42 |         ])
 43 | 
 44 | 
 45 | def save_book(l, writer):
 46 |     for b in l:
 47 |         click.echo(b.title)
 48 |         writer.writerow([
 49 |             b.title,
 50 |             b.url,
 51 |             b.intro,
 52 |             b.tags,
 53 |             b.comment,
 54 |             b.rating_date,
 55 |             b.rating
 56 |         ])
 57 | 
 58 | 
 59 | def save_music(l, writer):
 60 |     for music in l:
 61 |         click.echo(music.title)
 62 |         writer.writerow([
 63 |             music.title,
 64 |             music.url,
 65 |             music.intro,
 66 |             music.tags,
 67 |             music.comment,
 68 |             music.rating_date,
 69 |             music.rating
 70 |         ])
 71 | 
 72 | 
 73 | @cli.command()
 74 | @click.option('-u', '--userid', required=False, help='user id')
 75 | @click.option('-t', '--type', required=False,
 76 |               type=click.Choice(['collect', 'wish', 'doing']),
 77 |               default='collect',
 78 |               help='type of list, collect, wish, doing')
 79 | @click.option('-o', '--outfile', help='output filename')
 80 | def movie(userid, type, outfile):
 81 |     if not userid:
 82 |         config = read_config()
 83 |         if 'auth' in config and 'username' in config['auth']:
 84 |             userid = config['auth']['username']
 85 |         else:
 86 |             click.echo("run setup first or pass -u parameter")
 87 |             return
 88 |     movie_exporter = MovieExport(userid)
 89 |     fout = codecs.open(outfile, mode='w', encoding='utf-8')
 90 |     writer = csv.writer(fout, delimiter=',', quotechar='"', quoting=csv.QUOTE_MINIMAL)
 91 |     if type == 'collect':
 92 |         save_movie(movie_exporter.get_watched(), writer)
 93 |     elif type == 'wish':
 94 |         save_movie(movie_exporter.get_wish(), writer)
 95 |     elif type == 'doing':
 96 |         save_movie(movie_exporter.get_doing(), writer)
 97 |     fout.close()
 98 | 
 99 | 
100 | @cli.command()
101 | @click.option('-u', '--userid', required=False, help='user id')
102 | @click.option('-t', '--type', required=False,
103 |               type=click.Choice(['collect', 'wish', 'doing']),
104 |               default='collect',
105 |               help='type of list, collect, wish, doing')
106 | @click.option('-o', '--outfile', help='output filename')
107 | def book(userid, type, outfile):
108 |     if not userid:
109 |         config = read_config()
110 |         if 'auth' in config and 'username' in config['auth']:
111 |             userid = config['auth']['username']
112 |         else:
113 |             click.echo('run setup first or pass -u parameter')
114 |             return
115 |     exporter = BookExport(userid)
116 |     fout = codecs.open(outfile, mode='w', encoding='utf-8')
117 |     writer = csv.writer(fout, delimiter=',', quotechar='"',
118 |                         quoting=csv.QUOTE_MINIMAL)
119 |     if type == 'collect':
120 |         save_book(exporter.get_read(), writer)
121 |     elif type == 'wish':
122 |         save_book(exporter.get_wish(), writer)
123 |     elif type == 'doing':
124 |         save_book(exporter.get_reading(), writer)
125 |     fout.close()
126 | 
127 | 
128 | @cli.command()
129 | @click.option('-u', '--userid', required=False, help='user id')
130 | @click.option('-t', '--type', required=False,
131 |               type=click.Choice(['collect', 'wish', 'doing']),
132 |               default='collect',
133 |               help='type of list, collect, wish, doing')
134 | @click.option('-o', '--outfile', help='output filename')
135 | def music(userid, type, outfile):
136 |     if not userid:
137 |         config = read_config()
138 |         if 'auth' in config and 'username' in config['auth']:
139 |             userid = config['auth']['username']
140 |         else:
141 |             click.echo('run setup first or pass -u parameter')
142 |             return
143 |     exporter = MusicExport(userid)
144 |     fout = codecs.open(outfile, mode='w', encoding='utf-8')
145 |     writer = csv.writer(fout, delimiter=',', quotechar='"',
146 |                         quoting=csv.QUOTE_MINIMAL)
147 |     if type == 'collect':
148 |         save_music(exporter.get_listened(), writer)
149 |     elif type == 'wish':
150 |         save_music(exporter.get_wish(), writer)
151 |     elif type == 'doing':
152 |         save_music(exporter.get_doing(), writer)
153 |     fout.close()
154 | 
155 | 
156 | def save_note(notes, writer):
157 |     for note in notes:
158 |         writer.writerow([
159 |             note.title,
160 |             note.url,
161 |             note.id,
162 |             note.content,
163 |             note.publish_time
164 |         ])
165 | 
166 | 
167 | @cli.command()
168 | @click.option('-u', '--userid', required=False, help='user id')
169 | @click.option('-o', '--outfile', help='output filename')
170 | def note(userid, outfile):
171 |     if not userid:
172 |         config = read_config()
173 |         if 'auth' in config and 'username' in config['auth']:
174 |             userid = config['auth']['username']
175 |         else:
176 |             click.echo('run setup first or pass -u parameter')
177 |             return
178 |     exporter = NoteExport(userid)
179 |     fout = codecs.open(outfile, mode='w', encoding='utf-8')
180 |     writer = csv.writer(fout, delimiter=',', quotechar='"',
181 |                         quoting=csv.QUOTE_MINIMAL)
182 |     save_note(exporter.get_notes(), writer)
183 |     fout.close()
184 | 
185 | 
186 | @cli.command()
187 | def setup():
188 |     """set up username"""
189 |     config = read_config()
190 |     if 'auth' in config and 'username' in config['auth']:
191 |         click.echo("username already setup: " + config['auth']['username'])
192 |         return
193 |     username = input("UserId: ").strip()
194 |     config['auth'] = {'username': username}
195 |     with codecs.open(CONFIG_PATH, mode='w', encoding='utf-8') as fconfig:
196 |         config.write(fconfig)
197 | 
198 | 
199 | if __name__ == '__main__':
200 |     cli()
201 | 


--------------------------------------------------------------------------------
/douban-movie-export.user.js:
--------------------------------------------------------------------------------
  1 | // ==UserScript==
  2 | // @name         豆瓣电影导出工具
  3 | // @namespace    https://kisexu.com/
  4 | // @version      0.1
  5 | // @description  将豆瓣已看电影导出为csv文件。启用本脚本，进入豆瓣个人页面后，在『我看』部分会有一链接『导出看过电影』，点击即可。无需登录，支持导出任意用户已看电影。
  6 | // @author       KiseXu
  7 | // @copyright 2018, KiseXu (https://kisexu.com)
  8 | // @license MIT
  9 | // @match        https://movie.douban.com/people/*/collect*
 10 | // @match        https://www.douban.com/people/*
 11 | // @require      https://unpkg.com/dexie@latest/dist/dexie.js
 12 | // @grant        none
 13 | // ==/UserScript==
 14 | 
 15 | // ==OpenUserJs==
 16 | // @author KiseXu
 17 | // ==/OpenUserJs==
 18 | 
 19 | (function() {
 20 |     'use strict';
 21 | 
 22 |     // 页面触发部分
 23 |     if (location.href.indexOf('//www.douban.com/') > -1) {
 24 |         // 加入导出按钮
 25 |         var people = location.href.slice(location.href.indexOf('/people') + 8, -1);
 26 |         var export_link = 'https://movie.douban.com/people/' + people + '/collect?start=0&sort=time&rating=all&filter=all&mode=list&export=1';
 27 |         $('#movie .pl a:last').after('&nbsp;·&nbsp;<a href="'+export_link+'">导出看过电影</a>')
 28 |     }
 29 | 
 30 |     if (location.href.indexOf('//movie.douban.com/') > -1 && location.href.indexOf('export=1') > -1) {
 31 |         // 开始导出
 32 |         getPage();
 33 |     }
 34 | 
 35 | 
 36 |     // 获取当前页数据
 37 |     function getCurrentPageList() {
 38 |         var items = [];
 39 | 
 40 |         $('li.item').each(function(index) {
 41 |             items[index] = {
 42 |                 title: $(this).find('a').text().replace(/修改删除/, '').trim(),
 43 |                 rating: ($(this).find('.date span').attr('class')) ? $(this).find('.date span').attr('class').slice(6, 7) : '',
 44 |                 date: $(this).find('.date').text().trim(),
 45 |                 link: $(this).find('.title a').attr('href').trim(),
 46 |             };
 47 |         });
 48 | 
 49 |         return items;
 50 |     }
 51 | 
 52 |     // 采集当前页数据，保存到indexedDB
 53 |     function getPage() {
 54 |         const db = new Dexie('db_export');
 55 |         db.version(1).stores({
 56 |             items: `++id, title, rating, date, link`
 57 |         });
 58 | 
 59 |         var items = getCurrentPageList();
 60 |         db.items.bulkAdd(items).then (function(){
 61 |             console.log('保存成功');
 62 |             // 获取下一页链接
 63 |             var next_link = $('span.next a').attr('href');
 64 |             if (next_link) {
 65 |                 next_link = next_link + '&export=1';
 66 |                 window.location.href = next_link;
 67 |             } else {
 68 |                 exportAll()
 69 |             }
 70 |         }).catch(function(error) {
 71 |             console.log("Ooops: " + error);
 72 |         });
 73 | 
 74 |     }
 75 | 
 76 |     // 导出所有数据到CSV
 77 |     function exportAll() {
 78 |         const db = new Dexie('db_export');
 79 |         db.version(1).stores({
 80 |             items: `++id, title, rating, date, link`
 81 |         });
 82 |         db.items.orderBy('date').toArray().then(function(all){
 83 |             all = all.map(function(item,index,array){
 84 |                 delete item.id;
 85 |                 return item;
 86 |             })
 87 | 
 88 |             JSonToCSV.setDataConver({
 89 |                 data: all,
 90 |                 fileName: 'movie',
 91 |                 columns: {
 92 |                     title: ['片名', '个人评分', '打分日期', '影片链接'],
 93 |                     key: ['title', 'rating', 'date', 'link']
 94 |                 }
 95 |             });
 96 |             db.delete();
 97 |         });
 98 |     }
 99 | 
100 |     // 导出CSV函数
101 |     // https://github.com/liqingzheng/pc/blob/master/JsonExportToCSV.js
102 |     var JSonToCSV = {
103 |     /*
104 |      * obj是一个对象，其中包含有：
105 |      * ## data 是导出的具体数据
106 |      * ## fileName 是导出时保存的文件名称 是string格式
107 |      * ## showLabel 表示是否显示表头 默认显示 是布尔格式
108 |      * ## columns 是表头对象，且title和key必须一一对应，包含有
109 |           title:[], // 表头展示的文字
110 |           key:[], // 获取数据的Key
111 |           formatter: function() // 自定义设置当前数据的 传入(key, value)
112 |      */
113 |     setDataConver: function(obj) {
114 |       var bw = this.browser();
115 |       if(bw['ie'] < 9) return; // IE9以下的
116 |       var data = obj['data'],
117 |           ShowLabel = typeof obj['showLabel'] === 'undefined' ? true : obj['showLabel'],
118 |           fileName = (obj['fileName'] || 'UserExport') + '.csv',
119 |           columns = obj['columns'] || {
120 |               title: [],
121 |               key: [],
122 |               formatter: undefined
123 |           };
124 |       ShowLabel = typeof ShowLabel === 'undefined' ? true : ShowLabel;
125 |       var row = "", CSV = '', key;
126 |       // 如果要现实表头文字
127 |       if (ShowLabel) {
128 |           // 如果有传入自定义的表头文字
129 |           if (columns.title.length) {
130 |               columns.title.map(function(n) {
131 |                   row += n + ',';
132 |               });
133 |           } else {
134 |               // 如果没有，就直接取数据第一条的对象的属性
135 |               for (key in data[0]) row += key + ',';
136 |           }
137 |           row = row.slice(0, -1); // 删除最后一个,号，即a,b, => a,b
138 |           CSV += row + '\r\n'; // 添加换行符号
139 |       }
140 |       // 具体的数据处理
141 |       data.map(function(n) {
142 |           row = '';
143 |           // 如果存在自定义key值
144 |           if (columns.key.length) {
145 |               columns.key.map(function(m) {
146 |                   row += '"' + (typeof columns.formatter === 'function' ? columns.formatter(m, n[m]) || n[m] : n[m]) + '",';
147 |               });
148 |           } else {
149 |               for (key in n) {
150 |                   row += '"' + (typeof columns.formatter === 'function' ? columns.formatter(key, n[key]) || n[key] : n[key]) + '",';
151 |               }
152 |           }
153 |           row.slice(0, row.length - 1); // 删除最后一个,
154 |           CSV += row + '\r\n'; // 添加换行符号
155 |       });
156 |       if(!CSV) return;
157 |       this.SaveAs(fileName, CSV);
158 |     },
159 |     SaveAs: function(fileName, csvData) {
160 |       var bw = this.browser();
161 |       if(!bw['edge'] || !bw['ie']) {
162 |         var alink = document.createElement("a");
163 |         alink.id = "linkDwnldLink";
164 |         alink.href = this.getDownloadUrl(csvData);
165 |         document.body.appendChild(alink);
166 |         var linkDom = document.getElementById('linkDwnldLink');
167 |         linkDom.setAttribute('download', fileName);
168 |         linkDom.click();
169 |         document.body.removeChild(linkDom);
170 |       }
171 |       else if(bw['ie'] >= 10 || bw['edge'] == 'edge') {
172 |         var _utf = "\uFEFF";
173 |         var _csvData = new Blob([_utf + csvData], {
174 |             type: 'text/csv'
175 |         });
176 |         navigator.msSaveBlob(_csvData, fileName);
177 |       }
178 |       else {
179 |         var oWin = window.top.open("about:blank", "_blank");
180 |         oWin.document.write('sep=,\r\n' + csvData);
181 |         oWin.document.close();
182 |         oWin.document.execCommand('SaveAs', true, fileName);
183 |         oWin.close();
184 |       }
185 |     },
186 |     getDownloadUrl: function(csvData) {
187 |       var _utf = "\uFEFF"; // 为了使Excel以utf-8的编码模式，同时也是解决中文乱码的问题
188 |       if (window.Blob && window.URL && window.URL.createObjectURL) {
189 |           csvData = new Blob([_utf + csvData], {
190 |               type: 'text/csv'
191 |           });
192 |           return URL.createObjectURL(csvData);
193 |       }
194 |       // return 'data:attachment/csv;charset=utf-8,' + _utf + encodeURIComponent(csvData);
195 |     },
196 |     browser: function() {
197 |       var Sys = {};
198 |       var ua = navigator.userAgent.toLowerCase();
199 |       var s;
200 |       (s = ua.indexOf('edge') !== - 1 ? Sys.edge = 'edge' : ua.match(/rv:([\d.]+)\) like gecko/)) ? Sys.ie = s[1]:
201 |           (s = ua.match(/msie ([\d.]+)/)) ? Sys.ie = s[1] :
202 |           (s = ua.match(/firefox\/([\d.]+)/)) ? Sys.firefox = s[1] :
203 |           (s = ua.match(/chrome\/([\d.]+)/)) ? Sys.chrome = s[1] :
204 |           (s = ua.match(/opera.([\d.]+)/)) ? Sys.opera = s[1] :
205 |           (s = ua.match(/version\/([\d.]+).*safari/)) ? Sys.safari = s[1] : 0;
206 |       return Sys;
207 |     }
208 |   };
209 | 
210 | })();


--------------------------------------------------------------------------------
/douban-book-export.user.js:
--------------------------------------------------------------------------------
  1 | // ==UserScript==
  2 | // @name         豆瓣读书&电影导出工具
  3 | // @namespace    https://kisexu.com/
  4 | // @version      0.1
  5 | // @description  原描述为：“将豆瓣已看电影导出为csv文件。启用本脚本，进入豆瓣个人页面后，在『我看』部分会有一链接『导出看过电影』，点击即可。无需登录，支持导出任意用户已看电影。”本代码仅仅是将其添加了豆瓣读书的导出。
  6 | // @author       KiseXu
  7 | // @copyright 2018, KiseXu (https://kisexu.com)
  8 | // @license MIT
  9 | // @match        https://book.douban.com/people/*/collect*
 10 | // @match        https://movie.douban.com/people/*/collect*
 11 | // @match        https://www.douban.com/people/*
 12 | // @require      https://unpkg.com/dexie@latest/dist/dexie.js
 13 | // @grant        none
 14 | // ==/UserScript==
 15 | 
 16 | // ==OpenUserJs==
 17 | // @author KiseXu
 18 | // ==/OpenUserJs==
 19 | 
 20 | (function() {
 21 |     'use strict';
 22 | 
 23 |     // 页面触发部分
 24 |     if (location.href.indexOf('//www.douban.com/') > -1) {
 25 |         // 加入导出按钮
 26 |         var people = location.href.slice(location.href.indexOf('/people') + 8, -1);
 27 |         var export_book_link = 'https://book.douban.com/people/' + people + '/collect?start=0&sort=time&rating=all&filter=all&mode=list&export=1';
 28 |         $('#book .pl a:last').after('&nbsp;·&nbsp;<a href="'+export_book_link+'">导出读过的图书</a>')
 29 |         var export_movie_link = 'https://movie.douban.com/people/' + people + '/collect?start=0&sort=time&rating=all&filter=all&mode=list&export=1';
 30 |         $('#movie .pl a:last').after('&nbsp;·&nbsp;<a href="'+export_movie_link+'">导出看过的电影</a>')
 31 |     }
 32 | 
 33 |     if (location.href.indexOf('//book.douban.com/') > -1 && location.href.indexOf('export=1') > -1) {
 34 |         // 开始导出
 35 |         getPage();
 36 |     }
 37 | 
 38 |     if (location.href.indexOf('//movie.douban.com/') > -1 && location.href.indexOf('export=1') > -1) {
 39 |         // 开始导出
 40 |         getPage();
 41 |     }
 42 | 
 43 | 
 44 |     // 获取当前页数据
 45 |     function getCurrentPageList() {
 46 |         var items = [];
 47 | 
 48 |         $('li.item').each(function(index) {
 49 |             items[index] = {
 50 |                 title: $(this).find('a').text().replace(/修改删除/, '').replace(/> 加入购书单/,'').trim(),
 51 |                 rating: ($(this).find('.date span').attr('class')) ? $(this).find('.date span').attr('class').slice(6, 7) : '',
 52 |                 date: $(this).find('.date').text().trim(),
 53 |                 link: $(this).find('.title a').attr('href').trim(),
 54 |                 comment:$(this).find('.comment').text().trim(),
 55 |             };
 56 |         });
 57 | 
 58 |         return items;
 59 |     }
 60 | 
 61 |     // 采集当前页数据，保存到indexedDB
 62 |     function getPage() {
 63 |         const db = new Dexie('db_export');
 64 |         db.version(1).stores({
 65 |             items: `++id, title, rating, date, link,comment`
 66 |         });
 67 | 
 68 |         var items = getCurrentPageList();
 69 |         db.items.bulkAdd(items).then (function(){
 70 |             console.log('保存成功');
 71 |             // 获取下一页链接
 72 |             var next_link = $('span.next a').attr('href');
 73 |             if (next_link) {
 74 |                 next_link = next_link + '&export=1';
 75 |                 window.location.href = next_link;
 76 |             } else {
 77 |                 exportAll()
 78 |             }
 79 |         }).catch(function(error) {
 80 |             console.log("Ooops: " + error);
 81 |         });
 82 | 
 83 |     }
 84 | 
 85 |     // 导出所有数据到CSV
 86 |     function exportAll() {
 87 |         const db = new Dexie('db_export');
 88 |         db.version(1).stores({
 89 |             items: `++id, title, rating, date, link,comment`
 90 |         });
 91 |         db.items.orderBy('date').toArray().then(function(all){
 92 |             all = all.map(function(item,index,array){
 93 |                 delete item.id;
 94 |                 return item;
 95 |             })
 96 | 
 97 |             JSonToCSV.setDataConver({
 98 |                 data: all,
 99 |                 fileName: 'Book_Movie',
100 |                 columns: {
101 |                     title: ['标题', '个人评分', '打分日期', '条目链接','评论'],
102 |                     key: ['title', 'rating', 'date', 'link','comment']
103 |                 }
104 |             });
105 |             db.delete();
106 |         });
107 |     }
108 | 
109 |     // 导出CSV函数
110 |     // https://github.com/liqingzheng/pc/blob/master/JsonExportToCSV.js
111 |     var JSonToCSV = {
112 |     /*
113 |      * obj是一个对象，其中包含有：
114 |      * ## data 是导出的具体数据
115 |      * ## fileName 是导出时保存的文件名称 是string格式
116 |      * ## showLabel 表示是否显示表头 默认显示 是布尔格式
117 |      * ## columns 是表头对象，且title和key必须一一对应，包含有
118 |           title:[], // 表头展示的文字
119 |           key:[], // 获取数据的Key
120 |           formatter: function() // 自定义设置当前数据的 传入(key, value)
121 |      */
122 |     setDataConver: function(obj) {
123 |       var bw = this.browser();
124 |       if(bw['ie'] < 9) return; // IE9以下的
125 |       var data = obj['data'],
126 |           ShowLabel = typeof obj['showLabel'] === 'undefined' ? true : obj['showLabel'],
127 |           fileName = (obj['fileName'] || 'UserExport') + '.csv',
128 |           columns = obj['columns'] || {
129 |               title: [],
130 |               key: [],
131 |               formatter: undefined
132 |           };
133 |       ShowLabel = typeof ShowLabel === 'undefined' ? true : ShowLabel;
134 |       var row = "", CSV = '', key;
135 |       // 如果要现实表头文字
136 |       if (ShowLabel) {
137 |           // 如果有传入自定义的表头文字
138 |           if (columns.title.length) {
139 |               columns.title.map(function(n) {
140 |                   row += n + ',';
141 |               });
142 |           } else {
143 |               // 如果没有，就直接取数据第一条的对象的属性
144 |               for (key in data[0]) row += key + ',';
145 |           }
146 |           row = row.slice(0, -1); // 删除最后一个,号，即a,b, => a,b
147 |           CSV += row + '\r\n'; // 添加换行符号
148 |       }
149 |       // 具体的数据处理
150 |       data.map(function(n) {
151 |           row = '';
152 |           // 如果存在自定义key值
153 |           if (columns.key.length) {
154 |               columns.key.map(function(m) {
155 |                   row += '"' + (typeof columns.formatter === 'function' ? columns.formatter(m, n[m]) || n[m] : n[m]) + '",';
156 |               });
157 |           } else {
158 |               for (key in n) {
159 |                   row += '"' + (typeof columns.formatter === 'function' ? columns.formatter(key, n[key]) || n[key] : n[key]) + '",';
160 |               }
161 |           }
162 |           row.slice(0, row.length - 1); // 删除最后一个,
163 |           CSV += row + '\r\n'; // 添加换行符号
164 |       });
165 |       if(!CSV) return;
166 |       this.SaveAs(fileName, CSV);
167 |     },
168 |     SaveAs: function(fileName, csvData) {
169 |       var bw = this.browser();
170 |       if(!bw['edge'] || !bw['ie']) {
171 |         var alink = document.createElement("a");
172 |         alink.id = "linkDwnldLink";
173 |         alink.href = this.getDownloadUrl(csvData);
174 |         document.body.appendChild(alink);
175 |         var linkDom = document.getElementById('linkDwnldLink');
176 |         linkDom.setAttribute('download', fileName);
177 |         linkDom.click();
178 |         document.body.removeChild(linkDom);
179 |       }
180 |       else if(bw['ie'] >= 10 || bw['edge'] == 'edge') {
181 |         var _utf = "\uFEFF";
182 |         var _csvData = new Blob([_utf + csvData], {
183 |             type: 'text/csv'
184 |         });
185 |         navigator.msSaveBlob(_csvData, fileName);
186 |       }
187 |       else {
188 |         var oWin = window.top.open("about:blank", "_blank");
189 |         oWin.document.write('sep=,\r\n' + csvData);
190 |         oWin.document.close();
191 |         oWin.document.execCommand('SaveAs', true, fileName);
192 |         oWin.close();
193 |       }
194 |     },
195 |     getDownloadUrl: function(csvData) {
196 |       var _utf = "\uFEFF"; // 为了使Excel以utf-8的编码模式，同时也是解决中文乱码的问题
197 |       if (window.Blob && window.URL && window.URL.createObjectURL) {
198 |           csvData = new Blob([_utf + csvData], {
199 |               type: 'text/csv'
200 |           });
201 |           return URL.createObjectURL(csvData);
202 |       }
203 |       // return 'data:attachment/csv;charset=utf-8,' + _utf + encodeURIComponent(csvData);
204 |     },
205 |     browser: function() {
206 |       var Sys = {};
207 |       var ua = navigator.userAgent.toLowerCase();
208 |       var s;
209 |       (s = ua.indexOf('edge') !== - 1 ? Sys.edge = 'edge' : ua.match(/rv:([\d.]+)\) like gecko/)) ? Sys.ie = s[1]:
210 |           (s = ua.match(/msie ([\d.]+)/)) ? Sys.ie = s[1] :
211 |           (s = ua.match(/firefox\/([\d.]+)/)) ? Sys.firefox = s[1] :
212 |           (s = ua.match(/chrome\/([\d.]+)/)) ? Sys.chrome = s[1] :
213 |           (s = ua.match(/opera.([\d.]+)/)) ? Sys.opera = s[1] :
214 |           (s = ua.match(/version\/([\d.]+).*safari/)) ? Sys.safari = s[1] : 0;
215 |       return Sys;
216 |     }
217 |   };
218 | 
219 | })();
220 | 


--------------------------------------------------------------------------------