├── lib ├── __init__.py ├── web │ ├── contrib │ │ └── __init__.py │ ├── __init__.py │ ├── python23.py │ ├── test.py │ ├── wsgi.py │ └── wsgiserver │ │ └── ssl_builtin.py ├── bs4 │ └── tests │ │ ├── __init__.py │ │ ├── test_htmlparser.py │ │ ├── test_docs.py │ │ └── test_lxml.py ├── dateutil │ ├── parser.py │ ├── zoneinfo │ │ ├── zoneinfo-2010g.tar.gz │ │ └── __init__.py │ ├── __init__.py │ └── easter.py ├── calibre │ ├── ebooks │ │ ├── oeb │ │ │ ├── __init__.py │ │ │ └── transforms │ │ │ │ └── __init__.py │ │ ├── unihandecode │ │ │ ├── pykakasi │ │ │ │ ├── __init__.py │ │ │ │ ├── jisyo.py │ │ │ │ ├── k2a.py │ │ │ │ ├── j2h.py │ │ │ │ └── kakasi.py │ │ │ ├── vndecoder.py │ │ │ ├── krdecoder.py │ │ │ ├── jadecoder.py │ │ │ └── __init__.py │ │ ├── compression │ │ │ ├── __init__.py │ │ │ └── palmdoc.py │ │ ├── mobi │ │ │ ├── reader │ │ │ │ └── __init__.py │ │ │ ├── writer8 │ │ │ │ ├── __init__.py │ │ │ │ ├── cleanup.py │ │ │ │ └── header.py │ │ │ ├── __init__.py │ │ │ ├── writer2 │ │ │ │ └── __init__.py │ │ │ └── tweak.py │ │ ├── conversion │ │ │ ├── plugins │ │ │ │ └── __init__.py │ │ │ ├── __init__.py │ │ │ ├── oeboutput.py │ │ │ └── config.py │ │ └── tweak.py │ ├── library │ │ └── __init__.py │ ├── utils │ │ ├── __init__.py │ │ ├── config_base.py │ │ ├── resources.py │ │ ├── mreplace.py │ │ ├── cleantext.py │ │ ├── img.py │ │ └── titlecase.py │ ├── constants.py │ └── startup.py ├── cssutils │ ├── scripts │ │ ├── __init__.py │ │ ├── cssparse.py │ │ ├── csscapture.py │ │ └── csscombine.py │ ├── stylesheets │ │ ├── __init__.py │ │ └── stylesheetlist.py │ ├── codec.py │ ├── settings.py │ ├── _fetch.py │ ├── css │ │ ├── __init__.py │ │ ├── cssrulelist.py │ │ └── csscomment.py │ └── _fetchgae.py ├── readability │ ├── __init__.py │ ├── debug.py │ ├── cleaners.py │ └── encoding.py ├── cssselect │ └── __init__.py ├── memcachestore.py ├── chardet │ ├── compat.py │ ├── __init__.py │ ├── constants.py │ ├── euctwprober.py │ ├── euckrprober.py │ ├── gb2312prober.py │ ├── big5prober.py │ ├── cp949prober.py │ ├── charsetprober.py │ ├── mbcsgroupprober.py │ ├── codingstatemachine.py │ ├── chardetect.py │ ├── utf8prober.py │ ├── escprober.py │ └── sbcsgroupprober.py ├── weixin.py └── opml.py ├── static ├── cn.gif ├── tr.gif ├── us.gif ├── book128.png ├── favicon.ico ├── favicon2.ico ├── fulltext.gif ├── separate.gif ├── periodical.gif ├── apple-touch-icon.png └── apple-touch-icon57.png ├── upload.sh ├── images ├── cv_wsj.jpg ├── mh_wsj.gif ├── cv_anbang.jpg ├── cv_bound.jpg ├── cv_folha.jpg ├── cv_nfzm.jpg ├── cv_xueqiu.jpg ├── mh_anbang.gif ├── mh_folha.gif ├── mh_nfzm.gif ├── mh_xueqiu.gif ├── cv_dapenti.jpg ├── cv_default.jpg ├── cv_gongshi.jpg ├── cv_lifeweek.jpg ├── mh_dapenti.gif ├── mh_default.gif ├── mh_gongshi.gif ├── mh_lifeweek.gif ├── cv_aisixiang.jpg ├── cv_chuansongme.jpg ├── cv_economist.jpg ├── cv_ftchinese.jpg ├── cv_qiushibaike.jpg ├── cv_zhihudaily.jpg ├── mh_aisixiang.gif ├── mh_chuansongme.gif ├── mh_economist.gif ├── mh_ftchinese.gif ├── mh_qiushibaike.gif └── mh_zhihudaily.gif ├── books ├── Economist.py ├── Xiaodaonews.py ├── ZhihuDailyRss.py ├── FTChinese.py ├── __init__.py ├── Dapenti.py ├── Readme.txt ├── Lifeweek.py ├── nfzm.py ├── Gongshi.py ├── Xueqiu.py ├── FolhaDeSaopaulo.py ├── wsj.py └── Qiushibaike.py ├── .gitattributes ├── i18n ├── en │ └── LC_MESSAGES │ │ ├── lang.mo │ │ └── lang.po ├── tr-tr │ └── LC_MESSAGES │ │ └── lang.mo └── zh-cn │ └── LC_MESSAGES │ └── lang.mo ├── .gitignore ├── mime.types ├── cron.yaml ├── queue.yaml ├── dispatch.yaml ├── templates ├── tipsandback.html ├── tipsback.html ├── autoback.html ├── delaccount.html ├── advimport.html ├── adminmgrpwd.html ├── advwhitelist.html ├── advurlfilter.html ├── booklogininfo.html ├── login.html ├── home.html ├── advbase.html └── admin.html ├── apps ├── View │ ├── Home.py │ ├── DbViewer.py │ ├── __init__.py │ ├── Logs.py │ └── Setting.py ├── __init__.py ├── Work │ └── __init__.py ├── module_backend.py └── module_front.py ├── index.yaml ├── module-worker.yaml ├── app.yaml ├── readme_EN.md └── changelog.md /lib/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /lib/web/contrib/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /lib/bs4/tests/__init__.py: -------------------------------------------------------------------------------- 1 | "The beautifulsoup tests." 2 | -------------------------------------------------------------------------------- /static/cn.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/speed/KindleEar/master/static/cn.gif -------------------------------------------------------------------------------- /static/tr.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/speed/KindleEar/master/static/tr.gif -------------------------------------------------------------------------------- /static/us.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/speed/KindleEar/master/static/us.gif -------------------------------------------------------------------------------- /upload.sh: -------------------------------------------------------------------------------- 1 | appcfg.py update app.yaml module-worker.yaml 2 | appcfg.py update . 3 | -------------------------------------------------------------------------------- /images/cv_wsj.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/speed/KindleEar/master/images/cv_wsj.jpg -------------------------------------------------------------------------------- /images/mh_wsj.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/speed/KindleEar/master/images/mh_wsj.gif -------------------------------------------------------------------------------- /books/Economist.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/speed/KindleEar/master/books/Economist.py -------------------------------------------------------------------------------- /images/cv_anbang.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/speed/KindleEar/master/images/cv_anbang.jpg -------------------------------------------------------------------------------- /images/cv_bound.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/speed/KindleEar/master/images/cv_bound.jpg -------------------------------------------------------------------------------- /images/cv_folha.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/speed/KindleEar/master/images/cv_folha.jpg -------------------------------------------------------------------------------- /images/cv_nfzm.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/speed/KindleEar/master/images/cv_nfzm.jpg -------------------------------------------------------------------------------- /images/cv_xueqiu.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/speed/KindleEar/master/images/cv_xueqiu.jpg -------------------------------------------------------------------------------- /images/mh_anbang.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/speed/KindleEar/master/images/mh_anbang.gif -------------------------------------------------------------------------------- /images/mh_folha.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/speed/KindleEar/master/images/mh_folha.gif -------------------------------------------------------------------------------- /images/mh_nfzm.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/speed/KindleEar/master/images/mh_nfzm.gif -------------------------------------------------------------------------------- /images/mh_xueqiu.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/speed/KindleEar/master/images/mh_xueqiu.gif -------------------------------------------------------------------------------- /static/book128.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/speed/KindleEar/master/static/book128.png -------------------------------------------------------------------------------- /static/favicon.ico: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/speed/KindleEar/master/static/favicon.ico -------------------------------------------------------------------------------- /static/favicon2.ico: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/speed/KindleEar/master/static/favicon2.ico -------------------------------------------------------------------------------- /static/fulltext.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/speed/KindleEar/master/static/fulltext.gif -------------------------------------------------------------------------------- /static/separate.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/speed/KindleEar/master/static/separate.gif -------------------------------------------------------------------------------- /images/cv_dapenti.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/speed/KindleEar/master/images/cv_dapenti.jpg -------------------------------------------------------------------------------- /images/cv_default.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/speed/KindleEar/master/images/cv_default.jpg -------------------------------------------------------------------------------- /images/cv_gongshi.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/speed/KindleEar/master/images/cv_gongshi.jpg -------------------------------------------------------------------------------- /images/cv_lifeweek.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/speed/KindleEar/master/images/cv_lifeweek.jpg -------------------------------------------------------------------------------- /images/mh_dapenti.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/speed/KindleEar/master/images/mh_dapenti.gif -------------------------------------------------------------------------------- /images/mh_default.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/speed/KindleEar/master/images/mh_default.gif -------------------------------------------------------------------------------- /images/mh_gongshi.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/speed/KindleEar/master/images/mh_gongshi.gif -------------------------------------------------------------------------------- /images/mh_lifeweek.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/speed/KindleEar/master/images/mh_lifeweek.gif -------------------------------------------------------------------------------- /lib/dateutil/parser.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/speed/KindleEar/master/lib/dateutil/parser.py -------------------------------------------------------------------------------- /static/periodical.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/speed/KindleEar/master/static/periodical.gif -------------------------------------------------------------------------------- /.gitattributes: -------------------------------------------------------------------------------- 1 | # Disable LF normalization for all files 2 | * text=auto 3 | *.pyc -text 4 | *.py text -------------------------------------------------------------------------------- /images/cv_aisixiang.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/speed/KindleEar/master/images/cv_aisixiang.jpg -------------------------------------------------------------------------------- /images/cv_chuansongme.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/speed/KindleEar/master/images/cv_chuansongme.jpg -------------------------------------------------------------------------------- /images/cv_economist.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/speed/KindleEar/master/images/cv_economist.jpg -------------------------------------------------------------------------------- /images/cv_ftchinese.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/speed/KindleEar/master/images/cv_ftchinese.jpg -------------------------------------------------------------------------------- /images/cv_qiushibaike.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/speed/KindleEar/master/images/cv_qiushibaike.jpg -------------------------------------------------------------------------------- /images/cv_zhihudaily.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/speed/KindleEar/master/images/cv_zhihudaily.jpg -------------------------------------------------------------------------------- /images/mh_aisixiang.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/speed/KindleEar/master/images/mh_aisixiang.gif -------------------------------------------------------------------------------- /images/mh_chuansongme.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/speed/KindleEar/master/images/mh_chuansongme.gif -------------------------------------------------------------------------------- /images/mh_economist.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/speed/KindleEar/master/images/mh_economist.gif -------------------------------------------------------------------------------- /images/mh_ftchinese.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/speed/KindleEar/master/images/mh_ftchinese.gif -------------------------------------------------------------------------------- /images/mh_qiushibaike.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/speed/KindleEar/master/images/mh_qiushibaike.gif -------------------------------------------------------------------------------- /images/mh_zhihudaily.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/speed/KindleEar/master/images/mh_zhihudaily.gif -------------------------------------------------------------------------------- /i18n/en/LC_MESSAGES/lang.mo: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/speed/KindleEar/master/i18n/en/LC_MESSAGES/lang.mo -------------------------------------------------------------------------------- /static/apple-touch-icon.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/speed/KindleEar/master/static/apple-touch-icon.png -------------------------------------------------------------------------------- /i18n/tr-tr/LC_MESSAGES/lang.mo: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/speed/KindleEar/master/i18n/tr-tr/LC_MESSAGES/lang.mo -------------------------------------------------------------------------------- /i18n/zh-cn/LC_MESSAGES/lang.mo: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/speed/KindleEar/master/i18n/zh-cn/LC_MESSAGES/lang.mo -------------------------------------------------------------------------------- /static/apple-touch-icon57.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/speed/KindleEar/master/static/apple-touch-icon57.png -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *.pyc 2 | .DS_Store 3 | Correio.py 4 | oGlobo.py 5 | mh_correio.gif 6 | mh_globo.gif 7 | cv_correio.jpg 8 | cv_globo.jpg 9 | -------------------------------------------------------------------------------- /lib/calibre/ebooks/oeb/__init__.py: -------------------------------------------------------------------------------- 1 | __license__ = 'GPL v3' 2 | __copyright__ = '2008, Marshall T. Vandegrift ' 3 | -------------------------------------------------------------------------------- /lib/cssutils/scripts/__init__.py: -------------------------------------------------------------------------------- 1 | from csscombine import csscombine 2 | __all__ = ["csscapture", "csscombine", "cssparse"] 3 | 4 | 5 | -------------------------------------------------------------------------------- /lib/dateutil/zoneinfo/zoneinfo-2010g.tar.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/speed/KindleEar/master/lib/dateutil/zoneinfo/zoneinfo-2010g.tar.gz -------------------------------------------------------------------------------- /lib/calibre/ebooks/unihandecode/pykakasi/__init__.py: -------------------------------------------------------------------------------- 1 | from calibre.ebooks.unihandecode.pykakasi.kakasi import kakasi 2 | kakasi 3 | 4 | __all__ = ["pykakasi"] 5 | 6 | -------------------------------------------------------------------------------- /lib/calibre/library/__init__.py: -------------------------------------------------------------------------------- 1 | __license__ = 'GPL v3' 2 | __copyright__ = '2008, Kovid Goyal ' 3 | ''' Code to manage ebook library''' 4 | -------------------------------------------------------------------------------- /lib/calibre/ebooks/compression/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | __license__ = 'GPL 3' 4 | __copyright__ = '2009, John Schember ' 5 | __docformat__ = 'restructuredtext en' 6 | -------------------------------------------------------------------------------- /mime.types: -------------------------------------------------------------------------------- 1 | application/epub+zip epub 2 | application/xhtml+xml xhtml 3 | text/css css 4 | application/x-dtbncx+xml ncx 5 | application/oebps-package+xml opf" 6 | application/vnd.ms-opentype otf 7 | image/svg+xml svg -------------------------------------------------------------------------------- /cron.yaml: -------------------------------------------------------------------------------- 1 | cron: 2 | - description: add deliver task to queue 3 | url: /deliver 4 | schedule: every 1 hours 5 | 6 | - description: remove old logs 7 | url: /removelogs 8 | schedule: every day 03:00 9 | timezone: Asia/Shanghai 10 | -------------------------------------------------------------------------------- /lib/calibre/utils/__init__.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | __license__ = 'GPL v3' 3 | __copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net' 4 | __docformat__ = 'restructuredtext en' 5 | 6 | ''' 7 | Miscelleaneous utilities. 8 | ''' 9 | 10 | -------------------------------------------------------------------------------- /queue.yaml: -------------------------------------------------------------------------------- 1 | queue: 2 | 3 | - name: deliverqueue1 4 | rate: 2/m 5 | bucket_size: 2 6 | retry_parameters: 7 | task_retry_limit: 2 8 | task_age_limit: 5m 9 | min_backoff_seconds: 60 10 | max_backoff_seconds: 600 11 | max_doublings: 4 12 | # target: worker -------------------------------------------------------------------------------- /lib/dateutil/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | Copyright (c) 2003-2010 Gustavo Niemeyer 3 | 4 | This module offers extensions to the standard python 2.3+ 5 | datetime module. 6 | """ 7 | __author__ = "Gustavo Niemeyer " 8 | __license__ = "PSF License" 9 | __version__ = "1.5" 10 | -------------------------------------------------------------------------------- /lib/calibre/ebooks/oeb/transforms/__init__.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai 3 | from __future__ import with_statement 4 | 5 | __license__ = 'GPL v3' 6 | __copyright__ = '2009, Kovid Goyal ' 7 | __docformat__ = 'restructuredtext en' 8 | 9 | 10 | 11 | -------------------------------------------------------------------------------- /dispatch.yaml: -------------------------------------------------------------------------------- 1 | dispatch: 2 | # Default module serves the typical web resources and all static resources. 3 | - url: "*/favicon.ico" 4 | module: default 5 | 6 | - url: "*/worker*" 7 | module: worker 8 | 9 | - url: "*/url2book*" 10 | module: worker 11 | 12 | # - url: "*/_ah/mail/*" 13 | # module: worker -------------------------------------------------------------------------------- /lib/calibre/ebooks/mobi/reader/__init__.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai 3 | from __future__ import (unicode_literals, division, absolute_import, 4 | print_function) 5 | 6 | __license__ = 'GPL v3' 7 | __copyright__ = '2012, Kovid Goyal ' 8 | __docformat__ = 'restructuredtext en' 9 | 10 | 11 | 12 | -------------------------------------------------------------------------------- /lib/calibre/ebooks/mobi/writer8/__init__.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai 3 | from __future__ import (unicode_literals, division, absolute_import, 4 | print_function) 5 | 6 | __license__ = 'GPL v3' 7 | __copyright__ = '2012, Kovid Goyal ' 8 | __docformat__ = 'restructuredtext en' 9 | 10 | 11 | 12 | -------------------------------------------------------------------------------- /templates/tipsandback.html: -------------------------------------------------------------------------------- 1 | {% extends "base.html" %} 2 | {% block menubar -%} 3 | {% endblock -%} 4 | {% block content -%} 5 | {% if tips -%} 6 |

{{tips}}

7 | {% endif -%} 8 |

{{_("Auto back to previous page after 5 seconds")}}...

9 |

{{_("Click to back")}}


10 | {% endblock -%} 11 | -------------------------------------------------------------------------------- /lib/calibre/ebooks/conversion/plugins/__init__.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai 3 | from __future__ import (unicode_literals, division, absolute_import, 4 | print_function) 5 | 6 | __license__ = 'GPL v3' 7 | __copyright__ = '2012, Kovid Goyal ' 8 | __docformat__ = 'restructuredtext en' 9 | 10 | 11 | 12 | -------------------------------------------------------------------------------- /templates/tipsback.html: -------------------------------------------------------------------------------- 1 | {% extends "base.html" %} 2 | {% block menubar -%} 3 | {% endblock -%} 4 | {% block content -%} 5 | {% if tips -%} 6 |

{{tips}}

7 | {% endif -%} 8 |

9 | 10 | {{_("Click to back")}} 11 | 12 |


13 | {% endblock -%} 14 | -------------------------------------------------------------------------------- /lib/cssutils/stylesheets/__init__.py: -------------------------------------------------------------------------------- 1 | """Implements Document Object Model Level 2 Style Sheets 2 | http://www.w3.org/TR/2000/PR-DOM-Level-2-Style-20000927/stylesheets.html 3 | """ 4 | __all__ = ['MediaList', 'MediaQuery', 'StyleSheet', 'StyleSheetList'] 5 | __docformat__ = 'restructuredtext' 6 | __version__ = '$Id$' 7 | 8 | from medialist import * 9 | from mediaquery import * 10 | from stylesheet import * 11 | from stylesheetlist import * 12 | -------------------------------------------------------------------------------- /lib/calibre/ebooks/mobi/__init__.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai 3 | 4 | __license__ = 'GPL v3' 5 | __copyright__ = '2008, Kovid Goyal ' 6 | 7 | class MobiError(Exception): 8 | pass 9 | 10 | # That might be a bit small on the PW, but Amazon/KG 2.5 still uses these values, even when delivered to a PW 11 | MAX_THUMB_SIZE = 16 * 1024 12 | MAX_THUMB_DIMEN = (180, 240) 13 | 14 | 15 | -------------------------------------------------------------------------------- /lib/cssutils/codec.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | """Python codec for CSS.""" 3 | __docformat__ = 'restructuredtext' 4 | __author__ = 'Walter Doerwald' 5 | __version__ = '$Id: util.py 1114 2008-03-05 13:22:59Z cthedot $' 6 | 7 | import sys 8 | 9 | if sys.version_info < (3,): 10 | from _codec2 import * 11 | # for tests 12 | from _codec2 import _fixencoding 13 | else: 14 | from _codec3 import * 15 | # for tests 16 | from _codec3 import _fixencoding 17 | -------------------------------------------------------------------------------- /lib/readability/__init__.py: -------------------------------------------------------------------------------- 1 | # version : 0.3.0.5 2 | # https://pypi.python.org/pypi/readability-lxml 3 | from .readability import Document 4 | 5 | #Usage: 6 | 7 | #from readability.readability import Document 8 | #import urllib 9 | #html = urllib.urlopen(url).read() 10 | #readable_article = Document(html).summary() 11 | #readable_title = Document(html).short_title() 12 | #Command-line usage: 13 | 14 | #python -m readability.readability -u http://pypi.python.org/pypi/readability-lxml 15 | -------------------------------------------------------------------------------- /lib/calibre/ebooks/mobi/writer2/__init__.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai 3 | from __future__ import (unicode_literals, division, absolute_import, 4 | print_function) 5 | 6 | __license__ = 'GPL v3' 7 | __copyright__ = '2011, Kovid Goyal ' 8 | __docformat__ = 'restructuredtext en' 9 | 10 | 11 | UNCOMPRESSED = 1 12 | PALMDOC = 2 13 | HUFFDIC = 17480 14 | PALM_MAX_IMAGE_SIZE = 63 * 1024 15 | 16 | -------------------------------------------------------------------------------- /apps/View/Home.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding:utf-8 -*- 3 | #A GAE web application to aggregate rss and send it to your kindle. 4 | #Visit https://github.com/cdhigh/KindleEar for the latest version 5 | #Contributors: 6 | # rexdf 7 | 8 | from apps.BaseHandler import BaseHandler 9 | from apps.utils import etagged 10 | 11 | class Home(BaseHandler): 12 | __url__ = r"/" 13 | @etagged() 14 | def GET(self): 15 | return self.render('home.html',"Home") -------------------------------------------------------------------------------- /lib/calibre/utils/config_base.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai 3 | 4 | __license__ = 'GPL v3' 5 | __copyright__ = '2011, Kovid Goyal ' 6 | __docformat__ = 'restructuredtext en' 7 | 8 | plugin_dir = "." 9 | 10 | tweaks = {"gui_timestamp_display_format":"dd MMM yyyy", 11 | "gui_pubdate_display_format":"MMM yyyy", 12 | "gui_last_modified_display_format":"dd MMM yyyy", 13 | "server_listen_on":"0.0.0.0",} #read_tweaks() 14 | 15 | 16 | -------------------------------------------------------------------------------- /i18n/en/LC_MESSAGES/lang.po: -------------------------------------------------------------------------------- 1 | # SOME DESCRIPTIVE TITLE. 2 | # Copyright (C) YEAR ORGANIZATION 3 | # Arroz , 2013. 4 | # 5 | msgid "" 6 | msgstr "" 7 | "Project-Id-Version: 1.5\n" 8 | "POT-Creation-Date: 2013-08-06 14:16+China Standard Time\n" 9 | "PO-Revision-Date: 2013-08-07 16:36+ZONE\n" 10 | "Last-Translator: Arroz \n" 11 | "Language-Team: Arroz \n" 12 | "MIME-Version: 1.0\n" 13 | "Content-Type: text/plain; charset=utf-8\n" 14 | "Content-Transfer-Encoding: utf-8\n" 15 | "Generated-By: pygettext.py 1.5\n" 16 | -------------------------------------------------------------------------------- /apps/__init__.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding:utf-8 -*- 3 | #A GAE web application to aggregate rss and send it to your kindle. 4 | #Visit https://github.com/cdhigh/KindleEar for the latest version 5 | #中文讨论贴:http://www.hi-pda.com/forum/viewthread.php?tid=1213082 6 | #Author: 7 | # cdhigh 8 | #Contributors: 9 | # rexdf 10 | 11 | import __builtin__, sys 12 | from google.appengine.ext import vendor 13 | 14 | __Version__ = '1.23.2' 15 | 16 | __builtin__.__dict__['__Version__'] = __Version__ 17 | 18 | sys.path.insert(0, 'lib') 19 | vendor.add('lib') 20 | -------------------------------------------------------------------------------- /books/Xiaodaonews.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding:utf-8 -*- 3 | from weixinbase import WeixinBook 4 | 5 | def getBook(): 6 | return Xiaodaonews 7 | 8 | class Xiaodaonews(WeixinBook): 9 | title = u'微信公众号:小道消息' 10 | description = u'只有小道消息才能拯救中国互联网' 11 | language = 'zh-cn' 12 | feed_encoding = "utf-8" 13 | page_encoding = "utf-8" 14 | oldest_article = 7 15 | deliver_days = ['Friday'] 16 | feeds = [ 17 | (u'小道消息', 'http://weixin.sogou.com/gzh?openid=oIWsFt86NKeSGd_BQKp1GcDkYpv0'), 18 | ] 19 | -------------------------------------------------------------------------------- /lib/cssutils/settings.py: -------------------------------------------------------------------------------- 1 | """Experimental settings for special stuff.""" 2 | 3 | def set(key, value): 4 | """Call to enable special settings: 5 | 6 | ('DXImageTransform.Microsoft', True) 7 | enable support for parsing special MS only filter values 8 | 9 | Clears the tokenizer cache which holds the compiled productions! 10 | """ 11 | if key == 'DXImageTransform.Microsoft' and value == True: 12 | import cssproductions 13 | import tokenize2 14 | tokenize2._TOKENIZER_CACHE.clear() 15 | cssproductions.PRODUCTIONS.insert(1, cssproductions._DXImageTransform) 16 | -------------------------------------------------------------------------------- /books/ZhihuDailyRss.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding:utf-8 -*- 3 | from base import BaseFeedBook 4 | 5 | def getBook(): 6 | return ZhihuDaily 7 | 8 | class ZhihuDaily(BaseFeedBook): 9 | title = u'知乎日報' 10 | description = u'知乎日报全文RSS,不需要转发,排版图片正常。' 11 | language = 'zh-cn' 12 | feed_encoding = "utf-8" 13 | page_encoding = "utf-8" 14 | mastheadfile = "mh_zhihudaily.gif" 15 | coverfile = "cv_zhihudaily.jpg" 16 | oldest_article = 1 17 | feeds = [ 18 | (u'知乎日报', 'http://zhihudaily.dev.malash.net/', True) 19 | ] 20 | -------------------------------------------------------------------------------- /templates/autoback.html: -------------------------------------------------------------------------------- 1 | {% extends "base.html" %} 2 | {% block js -%} 3 | 14 | {% endblock -%} 15 | {% block menubar -%} 16 | {% endblock -%} 17 | {% block content -%} 18 | {% if tips -%} 19 |

{{tips}}

20 | {% endif -%} 21 |

{{_("Auto back to previous page after 5 seconds")}}...

22 |

{{_("Click to back")}}


23 | {% endblock -%} 24 | -------------------------------------------------------------------------------- /lib/cssselect/__init__.py: -------------------------------------------------------------------------------- 1 | # coding: utf8 2 | """ 3 | CSS Selectors based on XPath 4 | ============================ 5 | 6 | This module supports selecting XML/HTML elements based on CSS selectors. 7 | See the `CSSSelector` class for details. 8 | 9 | 10 | :copyright: (c) 2007-2012 Ian Bicking and contributors. 11 | See AUTHORS for more details. 12 | :license: BSD, see LICENSE for more details. 13 | 14 | """ 15 | 16 | from cssselect.parser import (parse, Selector, SelectorError, 17 | SelectorSyntaxError) 18 | from cssselect.xpath import GenericTranslator, HTMLTranslator, ExpressionError 19 | 20 | 21 | VERSION = '0.8' 22 | __version__ = VERSION 23 | -------------------------------------------------------------------------------- /lib/calibre/ebooks/unihandecode/vndecoder.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | __license__ = 'GPL 3' 4 | __copyright__ = '2010, Hiroshi Miura ' 5 | __docformat__ = 'restructuredtext en' 6 | 7 | ''' 8 | Decode unicode text to an ASCII representation of the text in Vietnamese. 9 | 10 | ''' 11 | 12 | from calibre.ebooks.unihandecode.unidecoder import Unidecoder 13 | from calibre.ebooks.unihandecode.vncodepoints import CODEPOINTS as HANCODES 14 | from calibre.ebooks.unihandecode.unicodepoints import CODEPOINTS 15 | 16 | class Vndecoder(Unidecoder): 17 | 18 | codepoints = {} 19 | 20 | def __init__(self): 21 | self.codepoints = CODEPOINTS 22 | self.codepoints.update(HANCODES) 23 | 24 | -------------------------------------------------------------------------------- /lib/calibre/ebooks/unihandecode/krdecoder.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | __license__ = 'GPL 3' 4 | __copyright__ = '2010, Hiroshi Miura ' 5 | __docformat__ = 'restructuredtext en' 6 | 7 | ''' 8 | Decode unicode text to an ASCII representation of the text in Korean. 9 | Based on unidecoder. 10 | 11 | ''' 12 | 13 | from calibre.ebooks.unihandecode.unidecoder import Unidecoder 14 | from calibre.ebooks.unihandecode.krcodepoints import CODEPOINTS as HANCODES 15 | from calibre.ebooks.unihandecode.unicodepoints import CODEPOINTS 16 | 17 | class Krdecoder(Unidecoder): 18 | 19 | codepoints = {} 20 | 21 | def __init__(self): 22 | self.codepoints = CODEPOINTS 23 | self.codepoints.update(HANCODES) 24 | 25 | -------------------------------------------------------------------------------- /templates/delaccount.html: -------------------------------------------------------------------------------- 1 | {% extends "base.html" %} 2 | {% block menubar -%} 3 | {% endblock -%} 4 | {% block content -%} 5 | {% if tips -%} 6 |

{{tips}}

7 | {% endif -%} 8 |
9 |
10 | 11 | 13 |
14 |
15 | 16 |
17 |
18 | {% endblock -%} -------------------------------------------------------------------------------- /index.yaml: -------------------------------------------------------------------------------- 1 | indexes: 2 | 3 | # AUTOGENERATED 4 | 5 | # This index.yaml is automatically updated whenever the dev_appserver 6 | # detects that a new type of query is run. If you want to manage the 7 | # index.yaml file manually, remove the above marker line (the line 8 | # saying "# AUTOGENERATED"). If you want to manage some indexes 9 | # manually, move them above the marker line. The index.yaml file is 10 | # automatically uploaded to the admin console when you next deploy 11 | # your application using appcfg.py. 12 | 13 | - kind: Book 14 | properties: 15 | - name: builtin 16 | - name: users 17 | 18 | - kind: DeliverLog 19 | properties: 20 | - name: username 21 | - name: time 22 | direction: desc 23 | 24 | - kind: Feed 25 | properties: 26 | - name: book 27 | - name: time 28 | -------------------------------------------------------------------------------- /module-worker.yaml: -------------------------------------------------------------------------------- 1 | module: worker 2 | runtime: python27 3 | api_version: 1 4 | threadsafe: true 5 | instance_class: B4 6 | basic_scaling: 7 | max_instances: 3 8 | idle_timeout: 30m 9 | 10 | libraries: 11 | - name: lxml 12 | version: "latest" 13 | 14 | - name: PIL 15 | version: "latest" 16 | 17 | - name: jinja2 18 | version: "latest" 19 | 20 | - name: pycrypto 21 | version: "latest" 22 | 23 | inbound_services: 24 | - mail 25 | 26 | handlers: 27 | - url: /favicon.ico 28 | static_files: static/favicon.ico 29 | upload: static/favicon.ico 30 | mime_type: image/x-icon 31 | 32 | - url: /static 33 | static_dir: static 34 | application_readable: true 35 | 36 | - url: /_ah/mail/.+ 37 | script: apps.handlemail.appmail 38 | 39 | - url: /.* 40 | script: apps.module_backend.app 41 | 42 | -------------------------------------------------------------------------------- /templates/advimport.html: -------------------------------------------------------------------------------- 1 | {% extends "advbase.html" %} 2 | {% block advcontent -%} 3 |
4 |
5 | {{_("Import Feeds")}} 6 |

7 | {{_("Import custom rss from a OPML file.")}} 8 |

9 |
10 | 11 |
12 | 13 | {{_("Download")}} 14 | {% if tips -%} 15 |
16 |

{{tips}}

17 | {% endif -%} 18 |
19 |
20 | {% endblock -%} -------------------------------------------------------------------------------- /lib/calibre/ebooks/mobi/tweak.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai 3 | __license__ = 'GPL v3' 4 | __copyright__ = '2012, Kovid Goyal ' 5 | __docformat__ = 'restructuredtext en' 6 | 7 | class BadFormat(ValueError): 8 | pass 9 | 10 | def do_explode(path, dest): 11 | pass 12 | 13 | def explode(path, dest, question=lambda x:True): 14 | pass 15 | 16 | def set_cover(oeb): 17 | if 'cover' not in oeb.guide or oeb.metadata['cover']: 18 | return 19 | cover = oeb.guide['cover'] 20 | if cover.href in oeb.manifest.hrefs: 21 | item = oeb.manifest.hrefs[cover.href] 22 | oeb.metadata.clear('cover') 23 | oeb.metadata.add('cover', item.id) 24 | 25 | def do_rebuild(opf, dest_path): 26 | pass 27 | 28 | def rebuild(src_dir, dest_path): 29 | pass 30 | -------------------------------------------------------------------------------- /books/FTChinese.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding:utf-8 -*- 3 | from base import BaseFeedBook 4 | 5 | def getBook(): 6 | return FTChinese 7 | 8 | class FTChinese(BaseFeedBook): 9 | title = u'FT中文网' 10 | description = u'英国《金融时报》集团旗下唯一的中文商业财经网站。' 11 | language = 'zh-cn' 12 | feed_encoding = "utf-8" 13 | page_encoding = "utf-8" 14 | mastheadfile = "mh_ftchinese.gif" 15 | coverfile = "cv_ftchinese.jpg" 16 | oldest_article = 1 17 | 18 | feeds = [ 19 | (u'每日新闻', 'http://www.ftchinese.com/rss/feed'), 20 | ] 21 | 22 | def fetcharticle(self, url, opener, decoder): 23 | #每个URL都增加一个后缀full=y,如果有分页则自动获取全部分页 24 | url += '?full=y' 25 | return BaseFeedBook.fetcharticle(self,url,opener,decoder) 26 | -------------------------------------------------------------------------------- /lib/web/__init__.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | """web.py: makes web apps (http://webpy.org)""" 3 | 4 | from __future__ import generators 5 | 6 | __version__ = "0.37" 7 | __author__ = [ 8 | "Aaron Swartz ", 9 | "Anand Chitipothu " 10 | ] 11 | __license__ = "public domain" 12 | __contributors__ = "see http://webpy.org/changes" 13 | 14 | import utils, db, net, wsgi, http, webapi, httpserver, debugerror 15 | import template, form 16 | 17 | import session 18 | 19 | from utils import * 20 | from db import * 21 | from net import * 22 | from wsgi import * 23 | from http import * 24 | from webapi import * 25 | from httpserver import * 26 | from debugerror import * 27 | from application import * 28 | from browser import * 29 | try: 30 | import webopenid as openid 31 | except ImportError: 32 | pass # requires openid module 33 | 34 | -------------------------------------------------------------------------------- /lib/calibre/ebooks/mobi/writer8/cleanup.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai 3 | from __future__ import (unicode_literals, division, absolute_import, 4 | print_function) 5 | 6 | __license__ = 'GPL v3' 7 | __copyright__ = '2012, Kovid Goyal ' 8 | __docformat__ = 'restructuredtext en' 9 | 10 | from calibre.ebooks.oeb.base import XPath 11 | 12 | class CSSCleanup(object): 13 | 14 | def __init__(self, log, opts): 15 | self.log, self.opts = log, opts 16 | 17 | def __call__(self, item, stylizer): 18 | if not hasattr(item.data, 'xpath'): return 19 | 20 | # The Kindle touch displays all black pages if the height is set on 21 | # body 22 | for body in XPath('//h:body')(item.data): 23 | style = stylizer.style(body) 24 | style.drop('height') 25 | 26 | -------------------------------------------------------------------------------- /app.yaml: -------------------------------------------------------------------------------- 1 | runtime: python27 2 | api_version: 1 3 | threadsafe: true 4 | instance_class: F1 5 | #automatic_scaling: 6 | # min_idle_instances: 1 7 | # max_idle_instances: automatic # default value 8 | # min_pending_latency: automatic # default value 9 | # max_pending_latency: 30ms 10 | # max_concurrent_requests: 50 11 | 12 | libraries: 13 | - name: lxml 14 | version: "latest" 15 | 16 | - name: PIL 17 | version: "latest" 18 | 19 | - name: jinja2 20 | version: "latest" 21 | 22 | - name: pycrypto 23 | version: "latest" 24 | 25 | inbound_services: 26 | - mail 27 | 28 | handlers: 29 | - url: /favicon.ico 30 | static_files: static/favicon.ico 31 | upload: static/favicon.ico 32 | mime_type: image/x-icon 33 | 34 | - url: /static 35 | static_dir: static 36 | application_readable: true 37 | 38 | - url: /_ah/mail/.+ 39 | script: apps.handlemail.appmail 40 | 41 | - url: /.* 42 | script: apps.module_front.app 43 | 44 | -------------------------------------------------------------------------------- /lib/calibre/utils/resources.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai 3 | from __future__ import with_statement 4 | 5 | __license__ = 'GPL v3' 6 | __copyright__ = '2009, Kovid Goyal ' 7 | __docformat__ = 'restructuredtext en' 8 | 9 | 10 | import __builtin__ 11 | 12 | from calibre import config_dir 13 | 14 | #_resolver = PathResolver() 15 | 16 | def get_path(path, data=False, allow_user_override=True): 17 | #fpath = _resolver(path, allow_user_override=allow_user_override) 18 | fpath = path 19 | if data: 20 | with open(fpath, 'rb') as f: 21 | return f.read() 22 | return fpath 23 | 24 | def get_image_path(path, data=False, allow_user_override=True): 25 | if not path: 26 | return get_path('images') 27 | return get_path('images/'+path, data=data) 28 | 29 | __builtin__.__dict__['P'] = get_path 30 | __builtin__.__dict__['I'] = get_image_path 31 | -------------------------------------------------------------------------------- /lib/readability/debug.py: -------------------------------------------------------------------------------- 1 | def save_to_file(text, filename): 2 | f = open(filename, 'wt') 3 | f.write('') 4 | f.write(text.encode('utf-8')) 5 | f.close() 6 | 7 | uids = {} 8 | def describe(node, depth=2): 9 | if not hasattr(node, 'tag'): 10 | return "[%s]" % type(node) 11 | name = node.tag 12 | if node.get('id', ''): name += '#'+node.get('id') 13 | if node.get('class', ''): 14 | name += '.' + node.get('class').replace(' ','.') 15 | if name[:4] in ['div#', 'div.']: 16 | name = name[3:] 17 | if name in ['tr', 'td', 'div', 'p']: 18 | if not node in uids: 19 | uid = uids[node] = len(uids)+1 20 | else: 21 | uid = uids.get(node) 22 | name += "%02d" % (uid) 23 | if depth and node.getparent() is not None: 24 | return name+' - '+describe(node.getparent(), depth-1) 25 | return name 26 | -------------------------------------------------------------------------------- /books/__init__.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding:utf-8 -*- 3 | 4 | import os 5 | 6 | _booksclasses = [] 7 | def RegisterBook(book): 8 | if book.title: 9 | _booksclasses.append(book) 10 | 11 | def BookClasses(): 12 | return _booksclasses 13 | 14 | def BookClass(title): 15 | for bk in _booksclasses: 16 | if bk.title == title: 17 | return bk 18 | return None 19 | 20 | #def LoadBooks(): 21 | for bkfile in os.listdir(os.path.dirname(__file__)): 22 | if bkfile.endswith('.py') and not bkfile.startswith('__') and not bkfile.endswith("base.py"): 23 | bookname = os.path.splitext(bkfile)[0] 24 | try: 25 | mbook = __import__("books." + bookname, fromlist='*') 26 | bk = mbook.getBook() 27 | #globals()[bk.__name__] = getattr(bk, bk.__name__) 28 | RegisterBook(bk) 29 | except Exception as e: 30 | default_log.warn("Book '%s' import failed : %s" % (bookname,e)) 31 | 32 | #LoadBooks() 33 | -------------------------------------------------------------------------------- /lib/calibre/ebooks/conversion/__init__.py: -------------------------------------------------------------------------------- 1 | # vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai 2 | from __future__ import (unicode_literals, division, absolute_import, 3 | print_function) 4 | 5 | __license__ = 'GPL v3' 6 | __copyright__ = '2011, Kovid Goyal ' 7 | __docformat__ = 'restructuredtext en' 8 | 9 | 10 | class ConversionUserFeedBack(Exception): 11 | 12 | def __init__(self, title, msg, level='info', det_msg=''): 13 | ''' Show a simple message to the user 14 | 15 | :param title: The title (very short description) 16 | :param msg: The message to show the user 17 | :param level: Must be one of 'info', 'warn' or 'error' 18 | :param det_msg: Optional detailed message to show the user 19 | ''' 20 | import json 21 | Exception.__init__(self, json.dumps({'msg':msg, 'level':level, 22 | 'det_msg':det_msg, 'title':title})) 23 | self.title, self.msg, self.det_msg = title, msg, det_msg 24 | self.level = level 25 | 26 | -------------------------------------------------------------------------------- /books/Dapenti.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding:utf-8 -*- 3 | from base import BaseFeedBook 4 | 5 | def getBook(): 6 | return Dapenti 7 | 8 | class Dapenti(BaseFeedBook): 9 | title = u'喷嚏图卦' 10 | description = u'每天一图卦,让我们更清楚地了解这个世界' 11 | language = 'zh-cn' 12 | feed_encoding = "utf-8" 13 | page_encoding = "utf-8" 14 | max_articles_per_feed = 1 15 | oldest_article = 2 16 | mastheadfile = "mh_dapenti.gif" 17 | coverfile = "cv_dapenti.jpg" 18 | network_timeout = 60 19 | fetch_img_via_ssl = False 20 | feeds = [ 21 | (u'喷嚏图卦', 'http://tugua.daoapp.io/rss.xml', True), 22 | ] 23 | 24 | def soupbeforeimage(self, soup): 25 | #更换另一个图库,因为RSS中的图库已经被封 26 | for img in soup.find_all('img', attrs={'src':True}): 27 | if img['src'].startswith('http://ptimg.org:88'): 28 | img['src'] = img['src'].replace('http://ptimg.org:88','http://pic.yupoo.com') 29 | 30 | -------------------------------------------------------------------------------- /lib/calibre/ebooks/tweak.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai 3 | from __future__ import (unicode_literals, division, absolute_import, 4 | print_function) 5 | 6 | __license__ = 'GPL v3' 7 | __copyright__ = '2012, Kovid Goyal ' 8 | __docformat__ = 'restructuredtext en' 9 | 10 | class Error(ValueError): 11 | pass 12 | 13 | def ask_cli_question(msg): 14 | pass 15 | 16 | def mobi_exploder(path, tdir, question=lambda x:True): 17 | pass 18 | 19 | def zip_exploder(path, tdir, question=lambda x:True): 20 | pass 21 | 22 | def zip_rebuilder(tdir, path): 23 | pass 24 | 25 | def get_tools(fmt): 26 | fmt = fmt.lower() 27 | 28 | if fmt in {'mobi', 'azw', 'azw3'}: 29 | from calibre.ebooks.mobi.tweak import rebuild 30 | ans = mobi_exploder, rebuild 31 | elif fmt in {'epub', 'htmlz'}: 32 | ans = zip_exploder, zip_rebuilder 33 | else: 34 | ans = None, None 35 | 36 | return ans 37 | 38 | def tweak(ebook_file): 39 | pass 40 | 41 | -------------------------------------------------------------------------------- /lib/calibre/constants.py: -------------------------------------------------------------------------------- 1 | __license__ = 'GPL v3' 2 | __copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net' 3 | __docformat__ = 'restructuredtext en' 4 | __appname__ = u'calibre' 5 | numeric_version = (1, 0, 0) 6 | __version__ = u'.'.join(map(unicode, numeric_version)) 7 | __author__ = u"Kovid Goyal " 8 | 9 | ''' 10 | Various run time constants. 11 | ''' 12 | 13 | import sys, codecs 14 | 15 | iswindows = False 16 | ispy3 = False 17 | 18 | #try: 19 | # preferred_encoding = locale.getpreferredencoding() 20 | # codecs.lookup(preferred_encoding) 21 | #except: 22 | preferred_encoding = 'utf-8' 23 | 24 | winerror = None 25 | _osx_ver = None 26 | 27 | filesystem_encoding = sys.getfilesystemencoding() 28 | if filesystem_encoding is None: 29 | filesystem_encoding = 'utf-8' 30 | else: 31 | try: 32 | if codecs.lookup(filesystem_encoding).name == 'ascii': 33 | filesystem_encoding = 'utf-8' 34 | except: 35 | filesystem_encoding = 'utf-8' 36 | 37 | DEBUG = False 38 | plugins = None 39 | CONFIG_DIR_MODE = 0700 40 | config_dir = "" 41 | -------------------------------------------------------------------------------- /lib/calibre/startup.py: -------------------------------------------------------------------------------- 1 | __license__ = 'GPL v3' 2 | __copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net' 3 | __docformat__ = 'restructuredtext en' 4 | 5 | ''' 6 | Perform various initialization tasks. 7 | ''' 8 | 9 | import locale, sys, os, re 10 | 11 | # Default translation is NOOP 12 | import __builtin__ 13 | __builtin__.__dict__['_'] = lambda s: s 14 | 15 | # For strings which belong in the translation tables, but which shouldn't be 16 | # immediately translated to the environment language 17 | __builtin__.__dict__['__'] = lambda s: s 18 | __builtin__.__dict__['P'] = lambda s: s 19 | __builtin__.__dict__['I'] = lambda s: s 20 | __builtin__.__dict__['lopen'] = open 21 | __builtin__.__dict__['icu_lower'] = lambda x:x.lower() 22 | __builtin__.__dict__['icu_upper'] = lambda x:x.upper() 23 | __builtin__.__dict__['icu_title'] = lambda x:x.capitalize() 24 | __builtin__.__dict__['dynamic_property'] = lambda func: func(None) 25 | 26 | from calibre.constants import * 27 | 28 | _run_once = False 29 | winutil = winutilerror = None 30 | _base_dir = "." 31 | 32 | if not _run_once: 33 | _run_once = True 34 | 35 | -------------------------------------------------------------------------------- /lib/bs4/tests/test_htmlparser.py: -------------------------------------------------------------------------------- 1 | """Tests to ensure that the html.parser tree builder generates good 2 | trees.""" 3 | 4 | from pdb import set_trace 5 | import pickle 6 | from bs4.testing import SoupTest, HTMLTreeBuilderSmokeTest 7 | from bs4.builder import HTMLParserTreeBuilder 8 | 9 | class HTMLParserTreeBuilderSmokeTest(SoupTest, HTMLTreeBuilderSmokeTest): 10 | 11 | @property 12 | def default_builder(self): 13 | return HTMLParserTreeBuilder() 14 | 15 | def test_namespaced_system_doctype(self): 16 | # html.parser can't handle namespaced doctypes, so skip this one. 17 | pass 18 | 19 | def test_namespaced_public_doctype(self): 20 | # html.parser can't handle namespaced doctypes, so skip this one. 21 | pass 22 | 23 | def test_builder_is_pickled(self): 24 | """Unlike most tree builders, HTMLParserTreeBuilder and will 25 | be restored after pickling. 26 | """ 27 | tree = self.soup("foo") 28 | dumped = pickle.dumps(tree, 2) 29 | loaded = pickle.loads(dumped) 30 | self.assertTrue(isinstance(loaded.builder, type(tree.builder))) 31 | 32 | 33 | -------------------------------------------------------------------------------- /templates/adminmgrpwd.html: -------------------------------------------------------------------------------- 1 | {% extends "base.html" %} 2 | {% block menubar -%} 3 | {% endblock -%} 4 | {% block content -%} 5 | {% if tips -%} 6 |

{{tips}}

7 | {% endif -%} 8 |
9 |
10 | 11 | 13 |
14 |
15 | 16 | 17 |
18 |
19 | 20 | 21 |
22 |
23 | 24 |
25 |
26 | {% endblock -%} -------------------------------------------------------------------------------- /templates/advwhitelist.html: -------------------------------------------------------------------------------- 1 | {% extends "advbase.html" %} 2 | {% block advcontent -%} 3 |
4 |
5 | {{_("White List")}} 6 |

7 | {{_("mail sent to %sxxx@appid.appspotmail.com will be transfered to your email.")|format('' if user.name=="admin" else user.name+"__")}} 8 |

9 | {% if user.whitelist.count() -%} 10 | 11 | {% for lst in user.whitelist -%} 12 | 13 | 14 | 15 | 16 | {% endfor -%} 17 |
{{lst.mail}}{{_("Delete")}}
18 | {% endif -%} 19 |
20 | 21 |
22 | 23 |
24 |
25 | {% endblock -%} -------------------------------------------------------------------------------- /templates/advurlfilter.html: -------------------------------------------------------------------------------- 1 | {% extends "advbase.html" %} 2 | {% block advcontent -%} 3 |
4 |
5 | {{_("Url Filter")}} 6 |

7 | {{_("Urls in list would not be downloaded.")}} 8 |

9 | {% if user.urlfilter.count() -%} 10 | 11 | {% for urlflt in user.urlfilter -%} 12 | 13 | 16 | 17 | 18 | {% endfor -%} 19 |
14 | {%if urlflt.url|length>70%}{{urlflt.url[:70]}}...{{urlflt.url}}{%else%}{{urlflt.url}}{%endif%} 15 | {{_("Delete")}}
20 | {% endif -%} 21 |
22 | 23 |
24 | 25 |
26 |
27 | {% endblock -%} -------------------------------------------------------------------------------- /lib/bs4/tests/test_docs.py: -------------------------------------------------------------------------------- 1 | "Test harness for doctests." 2 | 3 | # pylint: disable-msg=E0611,W0142 4 | 5 | __metaclass__ = type 6 | __all__ = [ 7 | 'additional_tests', 8 | ] 9 | 10 | import atexit 11 | import doctest 12 | import os 13 | #from pkg_resources import ( 14 | # resource_filename, resource_exists, resource_listdir, cleanup_resources) 15 | import unittest 16 | 17 | DOCTEST_FLAGS = ( 18 | doctest.ELLIPSIS | 19 | doctest.NORMALIZE_WHITESPACE | 20 | doctest.REPORT_NDIFF) 21 | 22 | 23 | # def additional_tests(): 24 | # "Run the doc tests (README.txt and docs/*, if any exist)" 25 | # doctest_files = [ 26 | # os.path.abspath(resource_filename('bs4', 'README.txt'))] 27 | # if resource_exists('bs4', 'docs'): 28 | # for name in resource_listdir('bs4', 'docs'): 29 | # if name.endswith('.txt'): 30 | # doctest_files.append( 31 | # os.path.abspath( 32 | # resource_filename('bs4', 'docs/%s' % name))) 33 | # kwargs = dict(module_relative=False, optionflags=DOCTEST_FLAGS) 34 | # atexit.register(cleanup_resources) 35 | # return unittest.TestSuite(( 36 | # doctest.DocFileSuite(*doctest_files, **kwargs))) 37 | -------------------------------------------------------------------------------- /lib/memcachestore.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding:utf-8 -*- 3 | """GAE中不能直接使用WEB.PY的session,使用此Store代替dbstore""" 4 | from web.session import Store 5 | from google.appengine.api import memcache 6 | import web 7 | import time 8 | 9 | class MemcacheStore(Store): 10 | def __init__(self, memcache): 11 | self.memcache = memcache 12 | 13 | def __contains__(self, key): 14 | data = self.memcache.get(key) 15 | return bool(data) 16 | 17 | def __getitem__(self, key): 18 | now = time.time() 19 | value = self.memcache.get(key) 20 | if not value: 21 | raise KeyError 22 | else: 23 | value['attime'] = now 24 | self.memcache.replace(key,value) 25 | return value 26 | 27 | def __setitem__(self, key, value): 28 | now = time.time() 29 | value['attime'] = now 30 | s = self.memcache.get(key) 31 | if s: 32 | self.memcache.replace(key,value) 33 | else: 34 | self.memcache.add(key,value,web.config.session_parameters['timeout']) 35 | 36 | def __delitem__(self, key): 37 | self.memcache.delete(key) 38 | 39 | def cleanup(self, timeout): 40 | pass 41 | -------------------------------------------------------------------------------- /lib/cssutils/stylesheets/stylesheetlist.py: -------------------------------------------------------------------------------- 1 | """StyleSheetList implements DOM Level 2 Style Sheets StyleSheetList.""" 2 | __all__ = ['StyleSheetList'] 3 | __docformat__ = 'restructuredtext' 4 | __version__ = '$Id$' 5 | 6 | class StyleSheetList(list): 7 | """Interface `StyleSheetList` (introduced in DOM Level 2) 8 | 9 | The `StyleSheetList` interface provides the abstraction of an ordered 10 | collection of :class:`~cssutils.stylesheets.StyleSheet` objects. 11 | 12 | The items in the `StyleSheetList` are accessible via an integral index, 13 | starting from 0. 14 | 15 | This Python implementation is based on a standard Python list so e.g. 16 | allows ``examplelist[index]`` usage. 17 | """ 18 | def item(self, index): 19 | """ 20 | Used to retrieve a style sheet by ordinal `index`. If `index` is 21 | greater than or equal to the number of style sheets in the list, 22 | this returns ``None``. 23 | """ 24 | try: 25 | return self[index] 26 | except IndexError: 27 | return None 28 | 29 | length = property(lambda self: len(self), 30 | doc="The number of :class:`StyleSheet` objects in the list. The range" 31 | " of valid child stylesheet indices is 0 to length-1 inclusive.") 32 | 33 | -------------------------------------------------------------------------------- /lib/chardet/compat.py: -------------------------------------------------------------------------------- 1 | ######################## BEGIN LICENSE BLOCK ######################## 2 | # Contributor(s): 3 | # Ian Cordasco - port to Python 4 | # 5 | # This library is free software; you can redistribute it and/or 6 | # modify it under the terms of the GNU Lesser General Public 7 | # License as published by the Free Software Foundation; either 8 | # version 2.1 of the License, or (at your option) any later version. 9 | # 10 | # This library is distributed in the hope that it will be useful, 11 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 13 | # Lesser General Public License for more details. 14 | # 15 | # You should have received a copy of the GNU Lesser General Public 16 | # License along with this library; if not, write to the Free Software 17 | # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 18 | # 02110-1301 USA 19 | ######################### END LICENSE BLOCK ######################### 20 | 21 | import sys 22 | 23 | 24 | if sys.version_info < (3, 0): 25 | base_str = (str, unicode) 26 | else: 27 | base_str = (bytes, str) 28 | 29 | 30 | def wrap_ord(a): 31 | if sys.version_info < (3, 0) and isinstance(a, base_str): 32 | return ord(a) 33 | else: 34 | return a 35 | -------------------------------------------------------------------------------- /lib/calibre/utils/mreplace.py: -------------------------------------------------------------------------------- 1 | #multiple replace from dictionnary : http://code.activestate.com/recipes/81330/ 2 | __license__ = 'GPL v3' 3 | __copyright__ = '2010, sengian ' 4 | __docformat__ = 'restructuredtext en' 5 | 6 | import re 7 | from UserDict import UserDict 8 | 9 | class MReplace(UserDict): 10 | 11 | def __init__(self, data=None, case_sensitive=True): 12 | UserDict.__init__(self, data) 13 | self.re = None 14 | self.regex = None 15 | self.case_sensitive = case_sensitive 16 | self.compile_regex() 17 | 18 | def compile_regex(self): 19 | if len(self.data) > 0: 20 | keys = sorted(self.data.keys(), key=len, reverse=True) 21 | tmp = "(%s)" % "|".join(map(re.escape, keys)) 22 | if self.re != tmp: 23 | self.re = tmp 24 | if self.case_sensitive: 25 | self.regex = re.compile(self.re) 26 | else: 27 | self.regex = re.compile(self.re, re.I) 28 | 29 | def __call__(self, mo): 30 | return self[mo.string[mo.start():mo.end()]] 31 | 32 | def mreplace(self, text): 33 | #Replace without regex compile 34 | if len(self.data) < 1 or self.re is None: 35 | return text 36 | return self.regex.sub(self, text) 37 | 38 | -------------------------------------------------------------------------------- /books/Readme.txt: -------------------------------------------------------------------------------- 1 | 1. 概述 2 | 此应用根目录下的books目录存放自定义RSS设置,每个文件为一本"书",对应推送到kindle的一本书。 3 | 应用启动后会自动读取此目录下的所有py文件,动态导入,并显示在网页“我的订阅”下,可以选择是否推送。 4 | books目录下的文件除了__init__.py和base.py,其他的文件都可以随意删除,如果你不需要的话。 5 | 在books目录下删除的“书籍”会在一天内从数据库中清除。 6 | 7 | 2. py文件格式 8 | ★py文件建议为UTF-8格式,特别是里面有中文的话。 9 | 所以每个py文件的头一行建议为: 10 | # -*- coding:utf-8 -*- 11 | 或者: 12 | #!/usr/bin/env python 13 | # -*- coding:utf-8 -*- 14 | 15 | ★每个py文件都要实现一个函数getBook(),返回书籍实际定义的"类"对象: 16 | def getBook(): 17 | return Qiushibaike 18 | 19 | ★每本书为一个类,必须实现的接口只有一个: 20 | Items(self, opts=None) 21 | 它是一个生成器或者返回一个迭代器。 22 | 每次返回一个元组: 23 | HTML元组:(节标题, URL, 文章标题, 文章内容,文章摘要) - 文章内容为字符串 24 | 图片元组:(图片MIME, URL, 图片文件名, 图片内容,None) -图片内容为字节串 25 | 其中图片MIME为:image/jpeg, image/gif 等 26 | 27 | ★上面已经说完了书籍定义的一切,所以如果你精通python,就可以自己写自己的书籍类了。 28 | 29 | ★不过如果你偷懒,也可以继承base模块中定义的两个书籍模板之一来定制自己的书籍类。 30 | 下一节介绍如何定制。 31 | 32 | 3. 书籍类定制方法 33 | 写过或看过calibre的recipe的基本上就直接会了。 34 | 因为calibre的recipe模块依赖挺多的,我时间不够,偷懒了,就不移植了,直接根据 35 | recipe的外形写了一个处理模块。 36 | ★根据RSS类型,从base模块中导入不同的书籍基类 37 | from base import BaseFeedBook/WebpageBook 38 | 如果你感兴趣的网站不提供RSS订阅,则可以继承WebpageBook直接连接网页提取信息。 39 | ★子类能定制的参数都在BaseFeedBook类的定义中,注释很详细。 40 | ★处理HTML的BeautifulSoup为4.x版本。 41 | 42 | 在此贴子里有更详细的说明:http://www.hi-pda.com/forum/viewthread.php?tid=1248204 43 | -------------------------------------------------------------------------------- /lib/readability/cleaners.py: -------------------------------------------------------------------------------- 1 | # strip out a set of nuisance html attributes that can mess up rendering in RSS feeds 2 | import re 3 | from lxml.html.clean import Cleaner 4 | 5 | bad_attrs = ['width', 'height', 'style', '[-a-z]*color', 'background[-a-z]*', 'on*'] 6 | single_quoted = "'[^']+'" 7 | double_quoted = '"[^"]+"' 8 | non_space = '[^ "\'>]+' 9 | htmlstrip = re.compile("<" # open 10 | "([^>]+) " # prefix 11 | "(?:%s) *" % ('|'.join(bad_attrs),) + # undesirable attributes 12 | '= *(?:%s|%s|%s)' % (non_space, single_quoted, double_quoted) + # value 13 | "([^>]*)" # postfix 14 | ">" # end 15 | , re.I) 16 | 17 | def clean_attributes(html): 18 | while htmlstrip.search(html): 19 | html = htmlstrip.sub('<\\1\\2>', html) 20 | return html 21 | 22 | def normalize_spaces(s): 23 | if not s: return '' 24 | """replace any sequence of whitespace 25 | characters with a single space""" 26 | return ' '.join(s.split()) 27 | 28 | html_cleaner = Cleaner(scripts=True, javascript=True, comments=True, 29 | style=True, links=True, meta=False, add_nofollow=False, 30 | page_structure=False, processing_instructions=True, embedded=False, 31 | frames=False, forms=False, annoying_tags=False, remove_tags=None, 32 | remove_unknown_tags=False, safe_attrs_only=False) 33 | -------------------------------------------------------------------------------- /lib/chardet/__init__.py: -------------------------------------------------------------------------------- 1 | ######################## BEGIN LICENSE BLOCK ######################## 2 | # This library is free software; you can redistribute it and/or 3 | # modify it under the terms of the GNU Lesser General Public 4 | # License as published by the Free Software Foundation; either 5 | # version 2.1 of the License, or (at your option) any later version. 6 | # 7 | # This library is distributed in the hope that it will be useful, 8 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 9 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 10 | # Lesser General Public License for more details. 11 | # 12 | # You should have received a copy of the GNU Lesser General Public 13 | # License along with this library; if not, write to the Free Software 14 | # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 15 | # 02110-1301 USA 16 | ######################### END LICENSE BLOCK ######################### 17 | 18 | __version__ = "2.3.0" 19 | from sys import version_info 20 | 21 | 22 | def detect(aBuf): 23 | if ((version_info < (3, 0) and isinstance(aBuf, unicode)) or 24 | (version_info >= (3, 0) and not isinstance(aBuf, bytes))): 25 | raise ValueError('Expected a bytes object, not a unicode object') 26 | 27 | from . import universaldetector 28 | u = universaldetector.UniversalDetector() 29 | u.reset() 30 | u.feed(aBuf) 31 | u.close() 32 | return u.result 33 | -------------------------------------------------------------------------------- /apps/View/DbViewer.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding:utf-8 -*- 3 | #A GAE web application to aggregate rss and send it to your kindle. 4 | #Visit https://github.com/cdhigh/KindleEar for the latest version 5 | #Contributors: 6 | # rexdf 7 | 8 | import web 9 | 10 | from apps.BaseHandler import BaseHandler 11 | from apps.dbModels import * 12 | 13 | from lib.autodecoder import UrlEncoding 14 | 15 | class DbViewer(BaseHandler): 16 | __url__ = "/dbviewer" 17 | def GET(self): 18 | self.login_required('admin') 19 | #可以修改UrlEncoding,如果chardet自动检测的编码错误的话 20 | action = web.input().get('action') 21 | if action == 'modurlenc': 22 | id = int(web.input().get('id', 0)) 23 | feedenc = web.input().get('feedenc') 24 | pageenc = web.input().get('pageenc') 25 | urlenc = UrlEncoding.get_by_id(id) 26 | if urlenc: 27 | if feedenc: urlenc.feedenc = feedenc 28 | if pageenc: urlenc.pageenc = pageenc 29 | urlenc.put() 30 | elif action == 'delurlenc': 31 | id = int(web.input().get('id', 0)) 32 | urlenc = UrlEncoding.get_by_id(id) 33 | if urlenc: 34 | urlenc.delete() 35 | return self.render('dbviewer.html', "DbViewer", 36 | books=Book.all(),users=KeUser.all(), 37 | feeds=Feed.all().order('book'),urlencs=UrlEncoding.all()) -------------------------------------------------------------------------------- /lib/web/python23.py: -------------------------------------------------------------------------------- 1 | """Python 2.3 compatabilty""" 2 | import threading 3 | 4 | class threadlocal(object): 5 | """Implementation of threading.local for python2.3. 6 | """ 7 | def __getattribute__(self, name): 8 | if name == "__dict__": 9 | return threadlocal._getd(self) 10 | else: 11 | try: 12 | return object.__getattribute__(self, name) 13 | except AttributeError: 14 | try: 15 | return self.__dict__[name] 16 | except KeyError: 17 | raise AttributeError, name 18 | 19 | def __setattr__(self, name, value): 20 | self.__dict__[name] = value 21 | 22 | def __delattr__(self, name): 23 | try: 24 | del self.__dict__[name] 25 | except KeyError: 26 | raise AttributeError, name 27 | 28 | def _getd(self): 29 | t = threading.currentThread() 30 | if not hasattr(t, '_d'): 31 | # using __dict__ of thread as thread local storage 32 | t._d = {} 33 | 34 | _id = id(self) 35 | # there could be multiple instances of threadlocal. 36 | # use id(self) as key 37 | if _id not in t._d: 38 | t._d[_id] = {} 39 | return t._d[_id] 40 | 41 | if __name__ == '__main__': 42 | d = threadlocal() 43 | d.x = 1 44 | print d.__dict__ 45 | print d.x 46 | -------------------------------------------------------------------------------- /templates/booklogininfo.html: -------------------------------------------------------------------------------- 1 | {% extends "base.html" %} 2 | {% block menubar -%} 3 | {% endblock -%} 4 | {% block content -%} 5 |
6 |
7 | {{_("Input website login info for book '%s'")|format('' + bk.title + '')|safe}} 8 |
9 | 10 | 12 |
13 |
14 | 15 | 17 |
18 | 19 | {% if tips -%} 20 |

{{tips}}

21 | {% else -%} 22 |

{{_("Leave any field empty to delete info from database.")}}

23 | {% endif -%} 24 |
25 | 26 |
27 |
28 |
29 | {% endblock -%} -------------------------------------------------------------------------------- /templates/login.html: -------------------------------------------------------------------------------- 1 | {% extends "base.html" %} 2 | {% block css -%} 3 | 15 | {% endblock -%} 16 | {% block bodytag -%} 17 | 18 | {% endblock -%} 19 | {% block header_loginfo -%} 20 | {% endblock %} 21 | {% block menubar -%} 22 | {% endblock -%} 23 | {% block content -%} 24 |
25 | {% if tips -%} 26 |

{{tips}}

27 | {% endif -%} 28 |
29 |
30 | 31 | 33 |
34 |
35 | 36 | 37 |
38 |
39 | 40 |
41 |
42 |
43 |

{{_("The website dont allow register, you can ask owner for a account.")}}

44 | {% endblock -%} -------------------------------------------------------------------------------- /lib/chardet/constants.py: -------------------------------------------------------------------------------- 1 | ######################## BEGIN LICENSE BLOCK ######################## 2 | # The Original Code is Mozilla Universal charset detector code. 3 | # 4 | # The Initial Developer of the Original Code is 5 | # Netscape Communications Corporation. 6 | # Portions created by the Initial Developer are Copyright (C) 2001 7 | # the Initial Developer. All Rights Reserved. 8 | # 9 | # Contributor(s): 10 | # Mark Pilgrim - port to Python 11 | # Shy Shalom - original C code 12 | # 13 | # This library is free software; you can redistribute it and/or 14 | # modify it under the terms of the GNU Lesser General Public 15 | # License as published by the Free Software Foundation; either 16 | # version 2.1 of the License, or (at your option) any later version. 17 | # 18 | # This library is distributed in the hope that it will be useful, 19 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 20 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 21 | # Lesser General Public License for more details. 22 | # 23 | # You should have received a copy of the GNU Lesser General Public 24 | # License along with this library; if not, write to the Free Software 25 | # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 26 | # 02110-1301 USA 27 | ######################### END LICENSE BLOCK ######################### 28 | 29 | _debug = 0 30 | 31 | eDetecting = 0 32 | eFoundIt = 1 33 | eNotMe = 2 34 | 35 | eStart = 0 36 | eError = 1 37 | eItsMe = 2 38 | 39 | SHORTCUT_THRESHOLD = 0.95 40 | -------------------------------------------------------------------------------- /lib/calibre/ebooks/unihandecode/jadecoder.py: -------------------------------------------------------------------------------- 1 | # coding:utf-8 2 | __license__ = 'GPL 3' 3 | __copyright__ = '2010, Hiroshi Miura ' 4 | __docformat__ = 'restructuredtext en' 5 | 6 | ''' 7 | Decode unicode text to an ASCII representation of the text for Japanese. 8 | Translate unicode string to ASCII roman string. 9 | 10 | API is based on the python unidecode, 11 | which is based on Ruby gem (http://rubyforge.org/projects/unidecode/) 12 | and perl module Text::Unidecode 13 | (http://search.cpan.org/~sburke/Text-Unidecode-0.04/). 14 | 15 | This functionality is owned by Kakasi Japanese processing engine. 16 | 17 | Copyright (c) 2010 Hiroshi Miura 18 | ''' 19 | 20 | import re 21 | from calibre.ebooks.unihandecode.unidecoder import Unidecoder 22 | from calibre.ebooks.unihandecode.unicodepoints import CODEPOINTS 23 | from calibre.ebooks.unihandecode.jacodepoints import CODEPOINTS as JACODES 24 | from calibre.ebooks.unihandecode.pykakasi.kakasi import kakasi 25 | 26 | class Jadecoder(Unidecoder): 27 | kakasi = None 28 | codepoints = {} 29 | 30 | def __init__(self): 31 | self.codepoints = CODEPOINTS 32 | self.codepoints.update(JACODES) 33 | self.kakasi = kakasi() 34 | 35 | def decode(self, text): 36 | try: 37 | result=self.kakasi.do(text) 38 | return re.sub('[^\x00-\x7f]', lambda x: self.replace_point(x.group()),result) 39 | except: 40 | return re.sub('[^\x00-\x7f]', lambda x: self.replace_point(x.group()),text) 41 | 42 | -------------------------------------------------------------------------------- /apps/Work/__init__.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding:utf-8 -*- 3 | 4 | __all__ = [] 5 | 6 | import pkgutil 7 | import inspect 8 | 9 | #import main 10 | 11 | #Load all class with __url__ attribute in the directory 12 | 13 | for loader, name, is_pkg in pkgutil.walk_packages(__path__): 14 | module = loader.find_module(name).load_module(name) 15 | 16 | for name, value in inspect.getmembers(module): 17 | if name.startswith('__') or not inspect.isclass(value): 18 | continue 19 | url=getattr(value,'__url__',None) 20 | if not url: 21 | continue 22 | globals()[name] = value 23 | __all__.append(name) 24 | #main.log.info('debug: %s loaded'%name) 25 | 26 | try: 27 | main.urls += [url,name] 28 | except AttributeError: 29 | main.urls = [] 30 | main.log.info('First: %s loaded'%name) 31 | main.urls += [url,name] 32 | 33 | '''import os 34 | 35 | #def LoadWorker(): 36 | for works in os.listdir(os.path.dirname(__file__)): 37 | if works.endswith('.py') and not works.startswith('__'): 38 | workname = os.path.splitext(works)[0] 39 | try: 40 | mwork = __import__("apps.Work." + workname, fromlist='*') 41 | #bk = mbook.getBook() 42 | #globals()[bk.__name__] = getattr(bk, bk.__name__) 43 | #RegisterBook(bk) 44 | except Exception as e: 45 | default_log.warn("Worker '%s' import failed : %s" % (workname,e)) 46 | 47 | #LoadWorker()''' -------------------------------------------------------------------------------- /books/Lifeweek.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding:utf-8 -*- 3 | from base import BaseFeedBook 4 | import re 5 | 6 | def getBook(): 7 | return Lifeweek 8 | 9 | class Lifeweek(BaseFeedBook): 10 | title = u'三联生活周刊' 11 | description = u'秉承"倡导品质生活"的理念,提供优质新媒体内容与服务。每周六推送' 12 | language = 'zh-cn' 13 | feed_encoding = "utf-8" 14 | page_encoding = "utf-8" 15 | mastheadfile = "mh_lifeweek.gif" 16 | coverfile = "cv_lifeweek.jpg" 17 | oldest_article = 0 18 | deliver_days = ['Saturday'] 19 | 20 | feeds = [ 21 | (u'三联生活网', 'http://app.lifeweek.com.cn/?app=rss&controller=index&action=feed'), 22 | ] 23 | 24 | def processtitle(self, title): 25 | return title[:-6] if title.endswith(u'_三联生活网') else title 26 | 27 | def preprocess(self, content): 28 | #当文章有分页时,去除重复的首页 29 | 30 | #去除脚注,保留版权声明 31 | re_footer = re.compile(r'
]*>.*
') 32 | article = re_footer.sub('', content) 33 | 34 | #为了统一,去除“网络编辑“ 35 | re_editor = re.compile(r'

]*>.*

') 36 | article = re_editor.sub('', article) 37 | 38 | re_mce = re.compile(r'_mcePaste') 39 | if re_mce.search(content) is not None: 40 | #文章有分页,只处理一层嵌套 41 | re_first_page = re.compile(r']*>[^<>]*(<[^<>]*>[^<>]*]*>|<[^<>]*[/]>){,3}[^<>]*

') 42 | article = re_first_page.sub('', article) 43 | 44 | return article 45 | -------------------------------------------------------------------------------- /lib/calibre/ebooks/unihandecode/pykakasi/jisyo.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # jisyo.py 3 | # 4 | # Copyright 2011 Hiroshi Miura 5 | import cPickle, marshal 6 | from zlib import decompress 7 | 8 | class jisyo (object): 9 | kanwadict = None 10 | itaijidict = None 11 | kanadict = None 12 | jisyo_table = {} 13 | 14 | # this class is Borg 15 | _shared_state = {} 16 | 17 | def __new__(cls, *p, **k): 18 | self = object.__new__(cls, *p, **k) 19 | self.__dict__ = cls._shared_state 20 | return self 21 | 22 | def __init__(self): 23 | if self.kanwadict is None: 24 | self.kanwadict = cPickle.loads( 25 | P('localization/pykakasi/kanwadict2.pickle', data=True)) 26 | if self.itaijidict is None: 27 | self.itaijidict = cPickle.loads( 28 | P('localization/pykakasi/itaijidict2.pickle', data=True)) 29 | if self.kanadict is None: 30 | self.kanadict = cPickle.loads( 31 | P('localization/pykakasi/kanadict2.pickle', data=True)) 32 | 33 | def load_jisyo(self, char): 34 | try:#python2 35 | key = "%04x"%ord(unicode(char)) 36 | except:#python3 37 | key = "%04x"%ord(char) 38 | 39 | try: #already exist? 40 | table = self.jisyo_table[key] 41 | except: 42 | try: 43 | table = self.jisyo_table[key] = marshal.loads(decompress(self.kanwadict[key])) 44 | except: 45 | return None 46 | return table 47 | 48 | -------------------------------------------------------------------------------- /apps/View/__init__.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding:utf-8 -*- 3 | 4 | __all__ = [] 5 | 6 | import pkgutil 7 | import inspect 8 | 9 | #import main 10 | 11 | #Load all class with __url__ attribute in the directory 12 | 13 | for loader, name, is_pkg in pkgutil.walk_packages(__path__): 14 | module = loader.find_module(name).load_module(name) 15 | 16 | for name, value in inspect.getmembers(module): 17 | if name.startswith('__') or not inspect.isclass(value): 18 | continue 19 | url=getattr(value,'__url__',None) 20 | if not url: 21 | continue 22 | globals()[name] = value 23 | __all__.append(name) 24 | #main.log.info('debug: %s loaded'%name) 25 | 26 | try: 27 | main.urls += [url,name] 28 | except AttributeError: 29 | main.urls = [] 30 | main.log.info('First: %s loaded'%name) 31 | main.urls += [url,name] 32 | ''' 33 | import os 34 | 35 | __all__ = [] 36 | 37 | #def LoadViews(): 38 | for views in os.listdir(os.path.dirname(__file__)): 39 | if views.endswith('.py') and not views.startswith('__'): 40 | viewname = os.path.splitext(views)[0] 41 | __all__.append(viewname) 42 | try: 43 | mview = __import__("apps.View." + viewname, fromlist='*') 44 | #bk = mbook.getBook() 45 | #globals()[bk.__name__] = getattr(bk, bk.__name__) 46 | #RegisterBook(bk) 47 | except Exception as e: 48 | default_log.warn("View '%s' import failed : %s" % (viewname,e)) 49 | 50 | #LoadViews()''' -------------------------------------------------------------------------------- /lib/calibre/ebooks/unihandecode/pykakasi/k2a.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # k2a.py 3 | # 4 | # Copyright 2011 Hiroshi Miura 5 | # 6 | # Original copyright: 7 | # * KAKASI (Kanji Kana Simple inversion program) 8 | # * $Id: jj2.c,v 1.7 2001-04-12 05:57:34 rug Exp $ 9 | # * Copyright (C) 1992 10 | # * Hironobu Takahashi (takahasi@tiny.or.jp) 11 | # * 12 | # * This program is free software; you can redistribute it and/or modify 13 | # * it under the terms of the GNU General Public License as published by 14 | # * the Free Software Foundation; either versions 2, or (at your option) 15 | # * any later version. 16 | # * 17 | # * This program is distributed in the hope that it will be useful 18 | # * but WITHOUT ANY WARRANTY; without even the implied warranty of 19 | # * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 20 | # * GNU General Public License for more details. 21 | # * 22 | # */ 23 | 24 | from calibre.ebooks.unihandecode.pykakasi.jisyo import jisyo 25 | 26 | class K2a (object): 27 | 28 | kanwa = None 29 | 30 | def __init__(self): 31 | self.kanwa = jisyo() 32 | 33 | def isKatakana(self, char): 34 | return ( 0x30a0 < ord(char) and ord(char) < 0x30f7) 35 | 36 | def convert(self, text): 37 | Hstr = "" 38 | max_len = -1 39 | r = min(10, len(text)+1) 40 | for x in xrange(r): 41 | if text[:x] in self.kanwa.kanadict: 42 | if max_len < x: 43 | max_len = x 44 | Hstr = self.kanwa.kanadict[text[:x]] 45 | return (Hstr, max_len) 46 | 47 | -------------------------------------------------------------------------------- /lib/web/test.py: -------------------------------------------------------------------------------- 1 | """test utilities 2 | (part of web.py) 3 | """ 4 | import unittest 5 | import sys, os 6 | import web 7 | 8 | TestCase = unittest.TestCase 9 | TestSuite = unittest.TestSuite 10 | 11 | def load_modules(names): 12 | return [__import__(name, None, None, "x") for name in names] 13 | 14 | def module_suite(module, classnames=None): 15 | """Makes a suite from a module.""" 16 | if classnames: 17 | return unittest.TestLoader().loadTestsFromNames(classnames, module) 18 | elif hasattr(module, 'suite'): 19 | return module.suite() 20 | else: 21 | return unittest.TestLoader().loadTestsFromModule(module) 22 | 23 | def doctest_suite(module_names): 24 | """Makes a test suite from doctests.""" 25 | import doctest 26 | suite = TestSuite() 27 | for mod in load_modules(module_names): 28 | suite.addTest(doctest.DocTestSuite(mod)) 29 | return suite 30 | 31 | def suite(module_names): 32 | """Creates a suite from multiple modules.""" 33 | suite = TestSuite() 34 | for mod in load_modules(module_names): 35 | suite.addTest(module_suite(mod)) 36 | return suite 37 | 38 | def runTests(suite): 39 | runner = unittest.TextTestRunner() 40 | return runner.run(suite) 41 | 42 | def main(suite=None): 43 | if not suite: 44 | main_module = __import__('__main__') 45 | # allow command line switches 46 | args = [a for a in sys.argv[1:] if not a.startswith('-')] 47 | suite = module_suite(main_module, args or None) 48 | 49 | result = runTests(suite) 50 | sys.exit(not result.wasSuccessful()) 51 | 52 | -------------------------------------------------------------------------------- /lib/cssutils/_fetch.py: -------------------------------------------------------------------------------- 1 | """Default URL reading functions""" 2 | __all__ = ['_defaultFetcher'] 3 | __docformat__ = 'restructuredtext' 4 | __version__ = '$Id: tokenize2.py 1547 2008-12-10 20:42:26Z cthedot $' 5 | 6 | import cssutils 7 | from cssutils import VERSION 8 | import encutils 9 | import errorhandler 10 | import urllib2 11 | 12 | log = errorhandler.ErrorHandler() 13 | 14 | def _defaultFetcher(url): 15 | """Retrieve data from ``url``. cssutils default implementation of fetch 16 | URL function. 17 | 18 | Returns ``(encoding, string)`` or ``None`` 19 | """ 20 | try: 21 | request = urllib2.Request(url) 22 | request.add_header('User-agent', 23 | 'cssutils %s (http://www.cthedot.de/cssutils/)' % VERSION) 24 | res = urllib2.urlopen(request) 25 | except OSError, e: 26 | # e.g if file URL and not found 27 | log.warn(e, error=OSError) 28 | except (OSError, ValueError), e: 29 | # invalid url, e.g. "1" 30 | log.warn(u'ValueError, %s' % e.args[0], error=ValueError) 31 | except urllib2.HTTPError, e: 32 | # http error, e.g. 404, e can be raised 33 | log.warn(u'HTTPError opening url=%s: %s %s' % 34 | (url, e.code, e.msg), error=e) 35 | except urllib2.URLError, e: 36 | # URLError like mailto: or other IO errors, e can be raised 37 | log.warn(u'URLError, %s' % e.reason, error=e) 38 | else: 39 | if res: 40 | mimeType, encoding = encutils.getHTTPInfo(res) 41 | if mimeType != u'text/css': 42 | log.error(u'Expected "text/css" mime type for url=%r but found: %r' % 43 | (url, mimeType), error=ValueError) 44 | return encoding, res.read() 45 | -------------------------------------------------------------------------------- /lib/readability/encoding.py: -------------------------------------------------------------------------------- 1 | import re 2 | import chardet 3 | 4 | def get_encoding(page): 5 | # Regex for XML and HTML Meta charset declaration 6 | charset_re = re.compile(r']', flags=re.I) 7 | pragma_re = re.compile(r']', flags=re.I) 8 | xml_re = re.compile(r'^<\?xml.*?encoding=["\']*(.+?)["\'>]') 9 | 10 | declared_encodings = (charset_re.findall(page) + 11 | pragma_re.findall(page) + 12 | xml_re.findall(page)) 13 | 14 | # Try any declared encodings 15 | if len(declared_encodings) > 0: 16 | for declared_encoding in declared_encodings: 17 | try: 18 | page.decode(custom_decode(declared_encoding)) 19 | return custom_decode(declared_encoding) 20 | except UnicodeDecodeError: 21 | pass 22 | 23 | # Fallback to chardet if declared encodings fail 24 | text = re.sub(']*>\s*', ' ', page) 25 | enc = 'utf-8' 26 | if not text.strip() or len(text) < 10: 27 | return enc # can't guess 28 | res = chardet.detect(text) 29 | enc = res['encoding'] 30 | #print '->', enc, "%.2f" % res['confidence'] 31 | enc = custom_decode(enc) 32 | return enc 33 | 34 | def custom_decode(encoding): 35 | """Overrides encoding when charset declaration 36 | or charset determination is a subset of a larger 37 | charset. Created because of issues with Chinese websites""" 38 | encoding = encoding.lower() 39 | alternates = { 40 | 'big5': 'big5hkscs', 41 | 'gb2312': 'gb18030', 42 | 'ascii': 'utf-8', 43 | 'MacCyrillic': 'cp1251', 44 | } 45 | if encoding in alternates: 46 | return alternates[encoding] 47 | else: 48 | return encoding -------------------------------------------------------------------------------- /lib/weixin.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding:utf-8 -*- 3 | 4 | import re, base64 5 | 6 | from Crypto.Cipher import AES 7 | 8 | def process_eqs(html): 9 | pattern = ( 10 | r'SogouEncrypt.setKv\("(\w+)","(\d)"\)' 11 | r'.*?' 12 | r'SogouEncrypt.encryptquery\("(\w+)","(\w+)"\)' 13 | ) 14 | m = re.findall(pattern, html, re.S) 15 | key, level, secret, setting = m[0] 16 | 17 | eqs = _cipher_eqs(key, secret, setting) 18 | 19 | return eqs, level 20 | 21 | 22 | def _cipher_eqs(key, secret, setting='sogou'): 23 | """ 24 | SogouEncrypt.encryptquery 25 | """ 26 | assert len(key) == 11 27 | 28 | ss = setting.split('-') 29 | 30 | # function g 31 | if len(ss) > 2: 32 | h = ss[2] 33 | else: 34 | h = ss[0] 35 | 36 | # function f 37 | if len(h) > 5: 38 | n = h[:-5] 39 | else: 40 | n = h + (5 - len(h)) * 's' 41 | 42 | key += n 43 | 44 | data = secret + 'hdq=' + setting 45 | # padding data 46 | length = 16 - (len(data) % 16) 47 | data += chr(length) * length 48 | 49 | IV = b'0000000000000000' 50 | cipher = AES.new(_to_bytes(key), AES.MODE_CBC, IV) 51 | # encrypt data 52 | data = cipher.encrypt(_to_bytes(data)) 53 | data = _to_unicode(base64.b64encode(data)) 54 | 55 | # function e 56 | rv = '' 57 | i = 0 58 | for m in range(len(data)): 59 | rv += data[m] 60 | if (m == pow(2, i)) and i < 5: 61 | rv += n[i] 62 | i += 1 63 | return rv 64 | 65 | 66 | def _to_bytes(text): 67 | if isinstance(text, bytes): 68 | return text 69 | return text.encode('utf-8') 70 | 71 | 72 | def _to_unicode(text): 73 | if isinstance(text, str): 74 | return text 75 | return text.decode('utf-8') -------------------------------------------------------------------------------- /lib/chardet/euctwprober.py: -------------------------------------------------------------------------------- 1 | ######################## BEGIN LICENSE BLOCK ######################## 2 | # The Original Code is mozilla.org code. 3 | # 4 | # The Initial Developer of the Original Code is 5 | # Netscape Communications Corporation. 6 | # Portions created by the Initial Developer are Copyright (C) 1998 7 | # the Initial Developer. All Rights Reserved. 8 | # 9 | # Contributor(s): 10 | # Mark Pilgrim - port to Python 11 | # 12 | # This library is free software; you can redistribute it and/or 13 | # modify it under the terms of the GNU Lesser General Public 14 | # License as published by the Free Software Foundation; either 15 | # version 2.1 of the License, or (at your option) any later version. 16 | # 17 | # This library is distributed in the hope that it will be useful, 18 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 19 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 20 | # Lesser General Public License for more details. 21 | # 22 | # You should have received a copy of the GNU Lesser General Public 23 | # License along with this library; if not, write to the Free Software 24 | # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 25 | # 02110-1301 USA 26 | ######################### END LICENSE BLOCK ######################### 27 | 28 | from .mbcharsetprober import MultiByteCharSetProber 29 | from .codingstatemachine import CodingStateMachine 30 | from .chardistribution import EUCTWDistributionAnalysis 31 | from .mbcssm import EUCTWSMModel 32 | 33 | class EUCTWProber(MultiByteCharSetProber): 34 | def __init__(self): 35 | MultiByteCharSetProber.__init__(self) 36 | self._mCodingSM = CodingStateMachine(EUCTWSMModel) 37 | self._mDistributionAnalyzer = EUCTWDistributionAnalysis() 38 | self.reset() 39 | 40 | def get_charset_name(self): 41 | return "EUC-TW" 42 | -------------------------------------------------------------------------------- /lib/chardet/euckrprober.py: -------------------------------------------------------------------------------- 1 | ######################## BEGIN LICENSE BLOCK ######################## 2 | # The Original Code is mozilla.org code. 3 | # 4 | # The Initial Developer of the Original Code is 5 | # Netscape Communications Corporation. 6 | # Portions created by the Initial Developer are Copyright (C) 1998 7 | # the Initial Developer. All Rights Reserved. 8 | # 9 | # Contributor(s): 10 | # Mark Pilgrim - port to Python 11 | # 12 | # This library is free software; you can redistribute it and/or 13 | # modify it under the terms of the GNU Lesser General Public 14 | # License as published by the Free Software Foundation; either 15 | # version 2.1 of the License, or (at your option) any later version. 16 | # 17 | # This library is distributed in the hope that it will be useful, 18 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 19 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 20 | # Lesser General Public License for more details. 21 | # 22 | # You should have received a copy of the GNU Lesser General Public 23 | # License along with this library; if not, write to the Free Software 24 | # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 25 | # 02110-1301 USA 26 | ######################### END LICENSE BLOCK ######################### 27 | 28 | from .mbcharsetprober import MultiByteCharSetProber 29 | from .codingstatemachine import CodingStateMachine 30 | from .chardistribution import EUCKRDistributionAnalysis 31 | from .mbcssm import EUCKRSMModel 32 | 33 | 34 | class EUCKRProber(MultiByteCharSetProber): 35 | def __init__(self): 36 | MultiByteCharSetProber.__init__(self) 37 | self._mCodingSM = CodingStateMachine(EUCKRSMModel) 38 | self._mDistributionAnalyzer = EUCKRDistributionAnalysis() 39 | self.reset() 40 | 41 | def get_charset_name(self): 42 | return "EUC-KR" 43 | -------------------------------------------------------------------------------- /lib/chardet/gb2312prober.py: -------------------------------------------------------------------------------- 1 | ######################## BEGIN LICENSE BLOCK ######################## 2 | # The Original Code is mozilla.org code. 3 | # 4 | # The Initial Developer of the Original Code is 5 | # Netscape Communications Corporation. 6 | # Portions created by the Initial Developer are Copyright (C) 1998 7 | # the Initial Developer. All Rights Reserved. 8 | # 9 | # Contributor(s): 10 | # Mark Pilgrim - port to Python 11 | # 12 | # This library is free software; you can redistribute it and/or 13 | # modify it under the terms of the GNU Lesser General Public 14 | # License as published by the Free Software Foundation; either 15 | # version 2.1 of the License, or (at your option) any later version. 16 | # 17 | # This library is distributed in the hope that it will be useful, 18 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 19 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 20 | # Lesser General Public License for more details. 21 | # 22 | # You should have received a copy of the GNU Lesser General Public 23 | # License along with this library; if not, write to the Free Software 24 | # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 25 | # 02110-1301 USA 26 | ######################### END LICENSE BLOCK ######################### 27 | 28 | from .mbcharsetprober import MultiByteCharSetProber 29 | from .codingstatemachine import CodingStateMachine 30 | from .chardistribution import GB2312DistributionAnalysis 31 | from .mbcssm import GB2312SMModel 32 | 33 | class GB2312Prober(MultiByteCharSetProber): 34 | def __init__(self): 35 | MultiByteCharSetProber.__init__(self) 36 | self._mCodingSM = CodingStateMachine(GB2312SMModel) 37 | self._mDistributionAnalyzer = GB2312DistributionAnalysis() 38 | self.reset() 39 | 40 | def get_charset_name(self): 41 | return "GB2312" 42 | -------------------------------------------------------------------------------- /lib/chardet/big5prober.py: -------------------------------------------------------------------------------- 1 | ######################## BEGIN LICENSE BLOCK ######################## 2 | # The Original Code is Mozilla Communicator client code. 3 | # 4 | # The Initial Developer of the Original Code is 5 | # Netscape Communications Corporation. 6 | # Portions created by the Initial Developer are Copyright (C) 1998 7 | # the Initial Developer. All Rights Reserved. 8 | # 9 | # Contributor(s): 10 | # Mark Pilgrim - port to Python 11 | # 12 | # This library is free software; you can redistribute it and/or 13 | # modify it under the terms of the GNU Lesser General Public 14 | # License as published by the Free Software Foundation; either 15 | # version 2.1 of the License, or (at your option) any later version. 16 | # 17 | # This library is distributed in the hope that it will be useful, 18 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 19 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 20 | # Lesser General Public License for more details. 21 | # 22 | # You should have received a copy of the GNU Lesser General Public 23 | # License along with this library; if not, write to the Free Software 24 | # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 25 | # 02110-1301 USA 26 | ######################### END LICENSE BLOCK ######################### 27 | 28 | from .mbcharsetprober import MultiByteCharSetProber 29 | from .codingstatemachine import CodingStateMachine 30 | from .chardistribution import Big5DistributionAnalysis 31 | from .mbcssm import Big5SMModel 32 | 33 | 34 | class Big5Prober(MultiByteCharSetProber): 35 | def __init__(self): 36 | MultiByteCharSetProber.__init__(self) 37 | self._mCodingSM = CodingStateMachine(Big5SMModel) 38 | self._mDistributionAnalyzer = Big5DistributionAnalysis() 39 | self.reset() 40 | 41 | def get_charset_name(self): 42 | return "Big5" 43 | -------------------------------------------------------------------------------- /lib/calibre/ebooks/conversion/oeboutput.py: -------------------------------------------------------------------------------- 1 | from __future__ import with_statement 2 | __license__ = 'GPL 3' 3 | __copyright__ = '2009, Kovid Goyal ' 4 | __docformat__ = 'restructuredtext en' 5 | 6 | import os, re 7 | 8 | 9 | from calibre import CurrentDir 10 | 11 | class OEBOutput: 12 | 13 | name = 'OEB Output' 14 | author = 'Kovid Goyal' 15 | file_type = 'oeb' 16 | 17 | def convert(self, oeb_book, output_path, input_plugin, opts, log): 18 | from urllib import unquote 19 | from lxml import etree 20 | 21 | self.log, self.opts = log, opts 22 | if not os.path.exists(output_path): 23 | os.makedirs(output_path) 24 | from calibre.ebooks.oeb.base import OPF_MIME, NCX_MIME, PAGE_MAP_MIME 25 | with CurrentDir(output_path): 26 | results = oeb_book.to_opf2(page_map=True) 27 | for key in (OPF_MIME, NCX_MIME, PAGE_MAP_MIME): 28 | href, root = results.pop(key, [None, None]) 29 | if root is not None: 30 | raw = etree.tostring(root, pretty_print=True, 31 | encoding='utf-8', xml_declaration=True) 32 | if key == OPF_MIME: 33 | # Needed as I can't get lxml to output opf:role and 34 | # not output as well 35 | raw = re.sub(r'(<[/]{0,1})opf:', r'\1', raw) 36 | with open(href, 'wb') as f: 37 | f.write(raw) 38 | 39 | for item in oeb_book.manifest: 40 | path = os.path.abspath(unquote(item.href)) 41 | dir = os.path.dirname(path) 42 | if not os.path.exists(dir): 43 | os.makedirs(dir) 44 | with open(path, 'wb') as f: 45 | f.write(str(item)) 46 | item.unload_data_from_memory(memory=path) 47 | -------------------------------------------------------------------------------- /lib/chardet/cp949prober.py: -------------------------------------------------------------------------------- 1 | ######################## BEGIN LICENSE BLOCK ######################## 2 | # The Original Code is mozilla.org code. 3 | # 4 | # The Initial Developer of the Original Code is 5 | # Netscape Communications Corporation. 6 | # Portions created by the Initial Developer are Copyright (C) 1998 7 | # the Initial Developer. All Rights Reserved. 8 | # 9 | # Contributor(s): 10 | # Mark Pilgrim - port to Python 11 | # 12 | # This library is free software; you can redistribute it and/or 13 | # modify it under the terms of the GNU Lesser General Public 14 | # License as published by the Free Software Foundation; either 15 | # version 2.1 of the License, or (at your option) any later version. 16 | # 17 | # This library is distributed in the hope that it will be useful, 18 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 19 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 20 | # Lesser General Public License for more details. 21 | # 22 | # You should have received a copy of the GNU Lesser General Public 23 | # License along with this library; if not, write to the Free Software 24 | # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 25 | # 02110-1301 USA 26 | ######################### END LICENSE BLOCK ######################### 27 | 28 | from .mbcharsetprober import MultiByteCharSetProber 29 | from .codingstatemachine import CodingStateMachine 30 | from .chardistribution import EUCKRDistributionAnalysis 31 | from .mbcssm import CP949SMModel 32 | 33 | 34 | class CP949Prober(MultiByteCharSetProber): 35 | def __init__(self): 36 | MultiByteCharSetProber.__init__(self) 37 | self._mCodingSM = CodingStateMachine(CP949SMModel) 38 | # NOTE: CP949 is a superset of EUC-KR, so the distribution should be 39 | # not different. 40 | self._mDistributionAnalyzer = EUCKRDistributionAnalysis() 41 | self.reset() 42 | 43 | def get_charset_name(self): 44 | return "CP949" 45 | -------------------------------------------------------------------------------- /apps/module_backend.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding:utf-8 -*- 3 | #A GAE web application to aggregate rss and send it to your kindle. 4 | #Visit https://github.com/cdhigh/KindleEar for the latest version 5 | #Author: 6 | # cdhigh 7 | #Contributors: 8 | # rexdf 9 | 10 | __Author__ = "cdhigh" 11 | 12 | import os, datetime, logging, __builtin__, hashlib, time 13 | 14 | # for debug 15 | # 本地启动调试服务器:python.exe dev_appserver.py c:\kindleear 16 | IsRunInLocal = (os.environ.get('SERVER_SOFTWARE', '').startswith('Development')) 17 | log = logging.getLogger() 18 | __builtin__.__dict__['default_log'] = log 19 | __builtin__.__dict__['IsRunInLocal'] = IsRunInLocal 20 | 21 | supported_languages = ['en','zh-cn','tr-tr'] #不支持的语种则使用第一个语言 22 | #gettext.install('lang', 'i18n', unicode=True) #for calibre startup 23 | 24 | class Main_Var: 25 | urls = [] 26 | session = None 27 | jjenv = None 28 | supported_languages = None 29 | log = None 30 | __Version__ = None 31 | 32 | __builtin__.__dict__['main'] = Main_Var 33 | main.supported_languages = supported_languages 34 | main.log = log 35 | main.__Version__ = __Version__ 36 | log.setLevel(logging.INFO if IsRunInLocal else logging.WARN) 37 | 38 | import web 39 | import jinja2 40 | from google.appengine.api import memcache 41 | 42 | from lib.memcachestore import MemcacheStore 43 | 44 | from apps.Work import * 45 | 46 | from apps.utils import fix_filesizeformat 47 | 48 | application = web.application(main.urls, globals()) 49 | store = MemcacheStore(memcache) 50 | session = web.session.Session(application, store, initializer={'username':'', 'login':0, 'lang':'', 'pocket_request_token':''}) 51 | jjenv = jinja2.Environment(loader=jinja2.FileSystemLoader('templates'), 52 | extensions=["jinja2.ext.do",'jinja2.ext.i18n']) 53 | jjenv.filters['filesizeformat'] = fix_filesizeformat 54 | 55 | app = application.wsgifunc() 56 | 57 | web.config.debug = IsRunInLocal 58 | 59 | main.session = session 60 | main.jjenv = jjenv -------------------------------------------------------------------------------- /lib/opml.py: -------------------------------------------------------------------------------- 1 | # version : 0.5 2 | # https://pypi.python.org/pypi/opml 3 | import lxml.etree 4 | 5 | class OutlineElement(object): 6 | """A single outline object.""" 7 | 8 | def __init__(self, root): 9 | """Initialize from the root node.""" 10 | 11 | self._root = root 12 | 13 | def __getattr__(self, attr): 14 | 15 | if attr in self._root.attrib: 16 | return self._root.attrib[attr] 17 | else: 18 | return '' # added by cdhigh [2014.10.02] 19 | 20 | #raise AttributeError() 21 | 22 | @property 23 | def _outlines(self): 24 | """Return the available sub-outline objects as a seqeunce.""" 25 | 26 | return [OutlineElement(n) for n in self._root.xpath('./outline')] 27 | 28 | def __len__(self): 29 | return len(self._outlines) 30 | 31 | def __getitem__(self, index): 32 | return self._outlines[index] 33 | 34 | class Opml(object): 35 | """Python representation of an OPML file.""" 36 | 37 | def __init__(self, xml_tree): 38 | """Initialize the object using the parsed XML tree.""" 39 | 40 | self._tree = xml_tree 41 | 42 | def __getattr__(self, attr): 43 | """Fall back attribute handler -- attempt to find the attribute in 44 | the OPML .""" 45 | 46 | result = self._tree.xpath('/opml/head/%s/text()' % attr) 47 | if len(result) == 1: 48 | return result[0] 49 | 50 | raise AttributeError() 51 | 52 | @property 53 | def _outlines(self): 54 | """Return the available sub-outline objects as a seqeunce.""" 55 | 56 | return [OutlineElement(n) for n in self._tree.xpath( 57 | '/opml/body/outline')] 58 | 59 | def __len__(self): 60 | return len(self._outlines) 61 | 62 | def __getitem__(self, index): 63 | return self._outlines[index] 64 | 65 | def from_string(opml_text): 66 | 67 | return Opml(lxml.etree.fromstring(opml_text)) 68 | 69 | def parse(opml_url): 70 | 71 | return Opml(lxml.etree.parse(opml_url)) 72 | 73 | 74 | 75 | -------------------------------------------------------------------------------- /templates/home.html: -------------------------------------------------------------------------------- 1 | {% extends "base.html" %} 2 | {% block css -%} 3 | 40 | {% endblock -%} 41 | {% block bodytag -%} 42 | 43 | {% endblock -%} 44 | {% block content -%} 45 | 50 |
51 |
52 |
53 |

{{_("Inherited From Calibre")}}

54 |

{{_("Author modified and ported Calibre to generate mobi file in GAE without kindlegen tool of Amazon,")}} 55 | {{_("for periodical mobi file is a better format to represent news feeds.")}}

56 |
57 |
58 |
59 |
60 |

{{_("Share Your Idea")}}

61 |

{{_("With my")}} 62 | {{_("open source KindleEar application")}} 63 | {{_(", You can deploy your own server to push news feeds to your kindle dialy or share the service with your friends.")}}

64 |
65 |
66 |
67 | {% endblock -%} -------------------------------------------------------------------------------- /templates/advbase.html: -------------------------------------------------------------------------------- 1 | {% extends "base.html" %} 2 | {% block css -%} 3 | 39 | {% endblock -%} 40 | {% block content -%} 41 |
42 |
43 |
44 |
45 | {% block advcontent -%} 46 | {% endblock -%} 47 |
48 |
49 | 50 |
51 |
52 |
53 | 79 |
80 |
81 |
82 |
83 |
84 | {% endblock -%} -------------------------------------------------------------------------------- /lib/chardet/charsetprober.py: -------------------------------------------------------------------------------- 1 | ######################## BEGIN LICENSE BLOCK ######################## 2 | # The Original Code is Mozilla Universal charset detector code. 3 | # 4 | # The Initial Developer of the Original Code is 5 | # Netscape Communications Corporation. 6 | # Portions created by the Initial Developer are Copyright (C) 2001 7 | # the Initial Developer. All Rights Reserved. 8 | # 9 | # Contributor(s): 10 | # Mark Pilgrim - port to Python 11 | # Shy Shalom - original C code 12 | # 13 | # This library is free software; you can redistribute it and/or 14 | # modify it under the terms of the GNU Lesser General Public 15 | # License as published by the Free Software Foundation; either 16 | # version 2.1 of the License, or (at your option) any later version. 17 | # 18 | # This library is distributed in the hope that it will be useful, 19 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 20 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 21 | # Lesser General Public License for more details. 22 | # 23 | # You should have received a copy of the GNU Lesser General Public 24 | # License along with this library; if not, write to the Free Software 25 | # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 26 | # 02110-1301 USA 27 | ######################### END LICENSE BLOCK ######################### 28 | 29 | from . import constants 30 | import re 31 | 32 | 33 | class CharSetProber: 34 | def __init__(self): 35 | pass 36 | 37 | def reset(self): 38 | self._mState = constants.eDetecting 39 | 40 | def get_charset_name(self): 41 | return None 42 | 43 | def feed(self, aBuf): 44 | pass 45 | 46 | def get_state(self): 47 | return self._mState 48 | 49 | def get_confidence(self): 50 | return 0.0 51 | 52 | def filter_high_bit_only(self, aBuf): 53 | aBuf = re.sub(b'([\x00-\x7F])+', b' ', aBuf) 54 | return aBuf 55 | 56 | def filter_without_english_letters(self, aBuf): 57 | aBuf = re.sub(b'([A-Za-z])+', b' ', aBuf) 58 | return aBuf 59 | 60 | def filter_with_english_letters(self, aBuf): 61 | # TODO 62 | return aBuf 63 | -------------------------------------------------------------------------------- /lib/calibre/ebooks/unihandecode/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | __license__ = 'GPL 3' 4 | __copyright__ = '2010, Hiroshi Miura ' 5 | __docformat__ = 'restructuredtext en' 6 | __all__ = ["Unihandecoder"] 7 | 8 | ''' 9 | Decode unicode text to an ASCII representation of the text. 10 | Translate unicode characters to ASCII. 11 | 12 | Inspired from John Schember's unidecode library which was created as part 13 | of calibre. 14 | 15 | Copyright(c) 2009, John Schember 16 | 17 | Tranliterate the string from unicode characters to ASCII in Chinese and others. 18 | 19 | ''' 20 | import unicodedata 21 | 22 | class Unihandecoder(object): 23 | preferred_encoding = None 24 | decoder = None 25 | 26 | def __init__(self, lang="zh", encoding='utf-8'): 27 | self.preferred_encoding = encoding 28 | lang = lang.lower() 29 | if lang[:2] == u'ja': 30 | from calibre.ebooks.unihandecode.jadecoder import Jadecoder 31 | self.decoder = Jadecoder() 32 | elif lang[:2] == u'kr' or lang == u'korean': 33 | from calibre.ebooks.unihandecode.krdecoder import Krdecoder 34 | self.decoder = Krdecoder() 35 | elif lang[:2] == u'vn' or lang == u'vietnum': 36 | from calibre.ebooks.unihandecode.vndecoder import Vndecoder 37 | self.decoder = Vndecoder() 38 | else: #zh and others 39 | from calibre.ebooks.unihandecode.unidecoder import Unidecoder 40 | self.decoder = Unidecoder() 41 | 42 | def decode(self, text): 43 | try: 44 | unicode # python2 45 | if not isinstance(text, unicode): 46 | try: 47 | text = unicode(text) 48 | except: 49 | try: 50 | text = text.decode(self.preferred_encoding) 51 | except: 52 | text = text.decode('utf-8', 'replace') 53 | except: # python3, str is unicode 54 | pass 55 | #at first unicode normalize it. (see Unicode standards) 56 | ntext = unicodedata.normalize('NFKC', text) 57 | return self.decoder.decode(ntext) 58 | -------------------------------------------------------------------------------- /lib/chardet/mbcsgroupprober.py: -------------------------------------------------------------------------------- 1 | ######################## BEGIN LICENSE BLOCK ######################## 2 | # The Original Code is Mozilla Universal charset detector code. 3 | # 4 | # The Initial Developer of the Original Code is 5 | # Netscape Communications Corporation. 6 | # Portions created by the Initial Developer are Copyright (C) 2001 7 | # the Initial Developer. All Rights Reserved. 8 | # 9 | # Contributor(s): 10 | # Mark Pilgrim - port to Python 11 | # Shy Shalom - original C code 12 | # Proofpoint, Inc. 13 | # 14 | # This library is free software; you can redistribute it and/or 15 | # modify it under the terms of the GNU Lesser General Public 16 | # License as published by the Free Software Foundation; either 17 | # version 2.1 of the License, or (at your option) any later version. 18 | # 19 | # This library is distributed in the hope that it will be useful, 20 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 21 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 22 | # Lesser General Public License for more details. 23 | # 24 | # You should have received a copy of the GNU Lesser General Public 25 | # License along with this library; if not, write to the Free Software 26 | # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 27 | # 02110-1301 USA 28 | ######################### END LICENSE BLOCK ######################### 29 | 30 | from .charsetgroupprober import CharSetGroupProber 31 | from .utf8prober import UTF8Prober 32 | from .sjisprober import SJISProber 33 | from .eucjpprober import EUCJPProber 34 | from .gb2312prober import GB2312Prober 35 | from .euckrprober import EUCKRProber 36 | from .cp949prober import CP949Prober 37 | from .big5prober import Big5Prober 38 | from .euctwprober import EUCTWProber 39 | 40 | 41 | class MBCSGroupProber(CharSetGroupProber): 42 | def __init__(self): 43 | CharSetGroupProber.__init__(self) 44 | self._mProbers = [ 45 | UTF8Prober(), 46 | SJISProber(), 47 | EUCJPProber(), 48 | GB2312Prober(), 49 | EUCKRProber(), 50 | CP949Prober(), 51 | Big5Prober(), 52 | EUCTWProber() 53 | ] 54 | self.reset() 55 | -------------------------------------------------------------------------------- /apps/View/Logs.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding:utf-8 -*- 3 | #A GAE web application to aggregate rss and send it to your kindle. 4 | #Visit https://github.com/cdhigh/KindleEar for the latest version 5 | #Contributors: 6 | # rexdf 7 | from operator import attrgetter 8 | import datetime 9 | from apps.BaseHandler import BaseHandler 10 | from apps.dbModels import * 11 | from apps.utils import etagged 12 | from google.appengine.api.datastore_errors import NeedIndexError 13 | 14 | class Mylogs(BaseHandler): 15 | __url__ = "/logs" 16 | @etagged() 17 | def GET(self): 18 | user = self.getcurrentuser() 19 | try: 20 | mylogs = DeliverLog.all().filter("username = ", user.name).order('-time').fetch(limit=10) 21 | except NeedIndexError: #很多人不会部署,经常出现没有建立索引的情况,干脆碰到这种情况直接消耗CPU时间自己排序得了 22 | mylogsAll = sorted(DeliverLog.all().filter("username = ", user.name), key=attrgetter('time'), reverse=True)[:10] 23 | logs = {} 24 | if user.name == 'admin': 25 | for u in KeUser.all().filter("name != ", 'admin'): 26 | try: 27 | ul = DeliverLog.all().filter("username = ", u.name).order('-time').fetch(limit=5) 28 | except NeedIndexError: 29 | ul = sorted(DeliverLog.all().filter("username = ", user.name), key=attrgetter('time'), reverse=True)[:5] 30 | if ul: 31 | logs[u.name] = ul 32 | return self.render('logs.html', "Deliver log", current='logs', 33 | mylogs=mylogs, logs=logs) 34 | 35 | class RemoveLogs(BaseHandler): 36 | __url__ = "/removelogs" 37 | def GET(self): 38 | # 停止过期用户的推送 39 | for user in KeUser.all().filter('enable_send = ', True): 40 | if user.expires and (user.expires < datetime.datetime.utcnow()): 41 | user.enable_send = False 42 | user.put() 43 | 44 | query = DeliverLog.all() 45 | query.filter('datetime < ', datetime.datetime.utcnow() - datetime.timedelta(days=25)) 46 | logs = query.fetch(1000) 47 | c = len(logs) 48 | db.delete(logs) 49 | 50 | return "%s lines log removed.
" % c -------------------------------------------------------------------------------- /lib/calibre/utils/cleantext.py: -------------------------------------------------------------------------------- 1 | __license__ = 'GPL 3' 2 | __copyright__ = '2010, sengian ' 3 | __docformat__ = 'restructuredtext en' 4 | 5 | import re, htmlentitydefs 6 | from future_builtins import map 7 | 8 | _ascii_pat = None 9 | 10 | def clean_ascii_chars(txt, charlist=None): 11 | r''' 12 | Remove ASCII control chars. 13 | This is all control chars except \t, \n and \r 14 | ''' 15 | if not txt: 16 | return '' 17 | global _ascii_pat 18 | if _ascii_pat is None: 19 | chars = set(xrange(32)) 20 | chars.add(127) 21 | for x in (9, 10, 13): 22 | chars.remove(x) 23 | _ascii_pat = re.compile(u'|'.join(map(unichr, chars))) 24 | 25 | if charlist is None: 26 | pat = _ascii_pat 27 | else: 28 | pat = re.compile(u'|'.join(map(unichr, charlist))) 29 | return pat.sub('', txt) 30 | 31 | def allowed(x): 32 | x = ord(x) 33 | return (x != 127 and (31 < x < 0xd7ff or x in (9, 10, 13))) or (0xe000 < x < 0xfffd) or (0x10000 < x < 0x10ffff) 34 | 35 | def clean_xml_chars(unicode_string): 36 | return u''.join(filter(allowed, unicode_string)) 37 | 38 | 39 | # Fredrik Lundh: http://effbot.org/zone/re-sub.htm#unescape-html 40 | # Removes HTML or XML character references and entities from a text string. 41 | # 42 | # @param text The HTML (or XML) source text. 43 | # @return The plain text, as a Unicode string, if necessary. 44 | 45 | def unescape(text, rm=False, rchar=u''): 46 | def fixup(m, rm=rm, rchar=rchar): 47 | text = m.group(0) 48 | if text[:2] == "&#": 49 | # character reference 50 | try: 51 | if text[:3] == "&#x": 52 | return unichr(int(text[3:-1], 16)) 53 | else: 54 | return unichr(int(text[2:-1])) 55 | except ValueError: 56 | pass 57 | else: 58 | # named entity 59 | try: 60 | text = unichr(htmlentitydefs.name2codepoint[text[1:-1]]) 61 | except KeyError: 62 | pass 63 | if rm: 64 | return rchar # replace by char 65 | return text # leave as is 66 | return re.sub("&#?\w+;", fixup, text) 67 | 68 | -------------------------------------------------------------------------------- /lib/cssutils/css/__init__.py: -------------------------------------------------------------------------------- 1 | """Implements Document Object Model Level 2 CSS 2 | http://www.w3.org/TR/2000/PR-DOM-Level-2-Style-20000927/css.html 3 | 4 | currently implemented 5 | - CSSStyleSheet 6 | - CSSRuleList 7 | - CSSRule 8 | - CSSComment (cssutils addon) 9 | - CSSCharsetRule 10 | - CSSFontFaceRule 11 | - CSSImportRule 12 | - CSSMediaRule 13 | - CSSNamespaceRule (WD) 14 | - CSSPageRule 15 | - CSSStyleRule 16 | - CSSUnkownRule 17 | - Selector and SelectorList 18 | - CSSStyleDeclaration 19 | - CSS2Properties 20 | - CSSValue 21 | - CSSPrimitiveValue 22 | - CSSValueList 23 | - CSSVariablesRule 24 | - CSSVariablesDeclaration 25 | 26 | todo 27 | - RGBColor, Rect, Counter 28 | """ 29 | __all__ = [ 30 | 'CSSStyleSheet', 31 | 'CSSRuleList', 32 | 'CSSRule', 33 | 'CSSComment', 34 | 'CSSCharsetRule', 35 | 'CSSFontFaceRule' 36 | 'CSSImportRule', 37 | 'CSSMediaRule', 38 | 'CSSNamespaceRule', 39 | 'CSSPageRule', 40 | 'MarginRule', 41 | 'CSSStyleRule', 42 | 'CSSUnknownRule', 43 | 'CSSVariablesRule', 44 | 'CSSVariablesDeclaration', 45 | 'Selector', 'SelectorList', 46 | 'CSSStyleDeclaration', 'Property', 47 | #'CSSValue', 'CSSPrimitiveValue', 'CSSValueList' 48 | 'PropertyValue', 49 | 'Value', 50 | 'ColorValue', 51 | 'DimensionValue', 52 | 'URIValue', 53 | 'CSSFunction', 54 | 'CSSVariable', 55 | 'MSValue' 56 | ] 57 | __docformat__ = 'restructuredtext' 58 | __version__ = '$Id$' 59 | 60 | from cssstylesheet import * 61 | from cssrulelist import * 62 | from cssrule import * 63 | from csscomment import * 64 | from csscharsetrule import * 65 | from cssfontfacerule import * 66 | from cssimportrule import * 67 | from cssmediarule import * 68 | from cssnamespacerule import * 69 | from csspagerule import * 70 | from marginrule import * 71 | from cssstylerule import * 72 | from cssvariablesrule import * 73 | from cssunknownrule import * 74 | from selector import * 75 | from selectorlist import * 76 | from cssstyledeclaration import * 77 | from cssvariablesdeclaration import * 78 | from property import * 79 | #from cssvalue import * 80 | from value import * 81 | -------------------------------------------------------------------------------- /lib/cssutils/css/cssrulelist.py: -------------------------------------------------------------------------------- 1 | """CSSRuleList implements DOM Level 2 CSS CSSRuleList. 2 | Partly also http://dev.w3.org/csswg/cssom/#the-cssrulelist.""" 3 | __all__ = ['CSSRuleList'] 4 | __docformat__ = 'restructuredtext' 5 | __version__ = '$Id$' 6 | 7 | class CSSRuleList(list): 8 | """The CSSRuleList object represents an (ordered) list of statements. 9 | 10 | The items in the CSSRuleList are accessible via an integral index, 11 | starting from 0. 12 | 13 | Subclasses a standard Python list so theoretically all standard list 14 | methods are available. Setting methods like ``__init__``, ``append``, 15 | ``extend`` or ``__setslice__`` are added later on instances of this 16 | class if so desired. 17 | E.g. CSSStyleSheet adds ``append`` which is not available in a simple 18 | instance of this class! 19 | """ 20 | def __init__(self, *ignored): 21 | "Nothing is set as this must also be defined later." 22 | pass 23 | 24 | def __notimplemented(self, *ignored): 25 | "Implemented in class using a CSSRuleList only." 26 | raise NotImplementedError( 27 | 'Must be implemented by class using an instance of this class.') 28 | 29 | append = extend = __setitem__ = __setslice__ = __notimplemented 30 | 31 | def item(self, index): 32 | """(DOM) Retrieve a CSS rule by ordinal `index`. The order in this 33 | collection represents the order of the rules in the CSS style 34 | sheet. If index is greater than or equal to the number of rules in 35 | the list, this returns None. 36 | 37 | Returns CSSRule, the style rule at the index position in the 38 | CSSRuleList, or None if that is not a valid index. 39 | """ 40 | try: 41 | return self[index] 42 | except IndexError: 43 | return None 44 | 45 | length = property(lambda self: len(self), 46 | doc=u"(DOM) The number of CSSRules in the list.") 47 | 48 | def rulesOfType(self, type): 49 | """Yield the rules which have the given `type` only, one of the 50 | constants defined in :class:`cssutils.css.CSSRule`.""" 51 | for r in self: 52 | if r.type == type: 53 | yield r 54 | -------------------------------------------------------------------------------- /lib/cssutils/scripts/cssparse.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | """utility script to parse given filenames or string 3 | """ 4 | __docformat__ = 'restructuredtext' 5 | __version__ = '$Id$' 6 | 7 | import cssutils 8 | import logging 9 | import optparse 10 | import sys 11 | 12 | def main(args=None): 13 | """ 14 | Parses given filename(s) or string or URL (using optional encoding) and 15 | prints the parsed style sheet to stdout. 16 | 17 | Redirect stdout to save CSS. Redirect stderr to save parser log infos. 18 | """ 19 | usage = """usage: %prog [options] filename1.css [filename2.css ...] 20 | [>filename_combined.css] [2>parserinfo.log] """ 21 | p = optparse.OptionParser(usage=usage) 22 | p.add_option('-s', '--string', action='store_true', dest='string', 23 | help='parse given string') 24 | p.add_option('-u', '--url', action='store', dest='url', 25 | help='parse given url') 26 | p.add_option('-e', '--encoding', action='store', dest='encoding', 27 | help='encoding of the file or override encoding found') 28 | p.add_option('-m', '--minify', action='store_true', dest='minify', 29 | help='minify parsed CSS', default=False) 30 | p.add_option('-d', '--debug', action='store_true', dest='debug', 31 | help='activate debugging output') 32 | 33 | (options, params) = p.parse_args(args) 34 | 35 | if not params and not options.url: 36 | p.error("no filename given") 37 | 38 | if options.debug: 39 | p = cssutils.CSSParser(loglevel=logging.DEBUG) 40 | else: 41 | p = cssutils.CSSParser() 42 | 43 | if options.minify: 44 | cssutils.ser.prefs.useMinified() 45 | 46 | if options.string: 47 | sheet = p.parseString(u''.join(params), encoding=options.encoding) 48 | print sheet.cssText 49 | elif options.url: 50 | sheet = p.parseUrl(options.url, encoding=options.encoding) 51 | print sheet.cssText 52 | else: 53 | for filename in params: 54 | sys.stderr.write('=== CSS FILE: "%s" ===\n' % filename) 55 | sheet = p.parseFile(filename, encoding=options.encoding) 56 | print sheet.cssText 57 | print 58 | sys.stderr.write('\n') 59 | 60 | 61 | if __name__ == "__main__": 62 | sys.exit(main()) 63 | -------------------------------------------------------------------------------- /books/nfzm.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding:utf-8 -*- 3 | from bs4 import BeautifulSoup 4 | from base import BaseFeedBook, URLOpener, string_of_tag 5 | 6 | def getBook(): 7 | return NFZM 8 | 9 | class NFZM(BaseFeedBook): 10 | title = u'南方周末' 11 | description = u'在这里读懂中国 | 每周五更新 | 需要登录' 12 | __author__ = 'mcfloundinho' 13 | language = 'zh-cn' 14 | feed_encoding = "utf-8" 15 | page_encoding = "utf-8" 16 | mastheadfile = "mh_nfzm.gif" 17 | coverfile = "cv_nfzm.jpg" 18 | deliver_days = ['Friday'] 19 | needs_subscription = True 20 | 21 | def ParseFeedUrls(self): 22 | login_url = "http://passport.infzm.com/passport/login" 23 | content_url = "http://www.infzm.com/enews/infzm" 24 | urls = [] 25 | opener = URLOpener(self.host, timeout=60) 26 | login_form = {"loginname":self.account, "password":self.password} 27 | login_response = opener.open(login_url, data=login_form) 28 | #opener.SaveCookies(login_response.header_msg.getheaders('Set-Cookie')) 29 | result = opener.open(content_url) 30 | content = result.content.decode(self.feed_encoding) 31 | soup = BeautifulSoup(content, "lxml") 32 | sec_titles = [] 33 | for sec_name in soup.find_all('h2'): 34 | sec_titles.append(sec_name.get_text()) 35 | for top_news in soup.find_all('dl', {'class': 'topnews'}): 36 | url = top_news.a['href'] 37 | feed_content = opener.open(url).content.decode(self.feed_encoding) 38 | feed_soup = BeautifulSoup(feed_content, "lxml") 39 | urls.append( 40 | (sec_titles[0], top_news.a['title'], url, feed_soup.find(id="articleContent"))) 41 | sec_count = 0 42 | for sec_content in soup.find_all('ul', {'class': 'relnews'}): 43 | for a in sec_content.find_all('a'): 44 | url = a['href'] 45 | feed_content = opener.open( 46 | url).content.decode(self.feed_encoding) 47 | feed_soup = BeautifulSoup(feed_content, "lxml") 48 | urls.append( 49 | (sec_titles[sec_count], a['title'], url, feed_soup.find(id="articleContent"))) 50 | sec_count += 1 51 | return urls 52 | -------------------------------------------------------------------------------- /lib/web/wsgi.py: -------------------------------------------------------------------------------- 1 | """ 2 | WSGI Utilities 3 | (from web.py) 4 | """ 5 | 6 | import os, sys 7 | 8 | import http 9 | import webapi as web 10 | from utils import listget 11 | from net import validaddr, validip 12 | import httpserver 13 | 14 | def runfcgi(func, addr=('localhost', 8000)): 15 | """Runs a WSGI function as a FastCGI server.""" 16 | import flup.server.fcgi as flups 17 | return flups.WSGIServer(func, multiplexed=True, bindAddress=addr, debug=False).run() 18 | 19 | def runscgi(func, addr=('localhost', 4000)): 20 | """Runs a WSGI function as an SCGI server.""" 21 | import flup.server.scgi as flups 22 | return flups.WSGIServer(func, bindAddress=addr, debug=False).run() 23 | 24 | def runwsgi(func): 25 | """ 26 | Runs a WSGI-compatible `func` using FCGI, SCGI, or a simple web server, 27 | as appropriate based on context and `sys.argv`. 28 | """ 29 | 30 | if os.environ.has_key('SERVER_SOFTWARE'): # cgi 31 | os.environ['FCGI_FORCE_CGI'] = 'Y' 32 | 33 | if (os.environ.has_key('PHP_FCGI_CHILDREN') #lighttpd fastcgi 34 | or os.environ.has_key('SERVER_SOFTWARE')): 35 | return runfcgi(func, None) 36 | 37 | if 'fcgi' in sys.argv or 'fastcgi' in sys.argv: 38 | args = sys.argv[1:] 39 | if 'fastcgi' in args: args.remove('fastcgi') 40 | elif 'fcgi' in args: args.remove('fcgi') 41 | if args: 42 | return runfcgi(func, validaddr(args[0])) 43 | else: 44 | return runfcgi(func, None) 45 | 46 | if 'scgi' in sys.argv: 47 | args = sys.argv[1:] 48 | args.remove('scgi') 49 | if args: 50 | return runscgi(func, validaddr(args[0])) 51 | else: 52 | return runscgi(func) 53 | 54 | return httpserver.runsimple(func, validip(listget(sys.argv, 1, ''))) 55 | 56 | def _is_dev_mode(): 57 | # Some embedded python interpreters won't have sys.arv 58 | # For details, see https://github.com/webpy/webpy/issues/87 59 | argv = getattr(sys, "argv", []) 60 | 61 | # quick hack to check if the program is running in dev mode. 62 | if os.environ.has_key('SERVER_SOFTWARE') \ 63 | or os.environ.has_key('PHP_FCGI_CHILDREN') \ 64 | or 'fcgi' in argv or 'fastcgi' in argv \ 65 | or 'mod_wsgi' in argv: 66 | return False 67 | return True 68 | 69 | # When running the builtin-server, enable debug mode if not already set. 70 | web.config.setdefault('debug', _is_dev_mode()) 71 | -------------------------------------------------------------------------------- /lib/chardet/codingstatemachine.py: -------------------------------------------------------------------------------- 1 | ######################## BEGIN LICENSE BLOCK ######################## 2 | # The Original Code is mozilla.org code. 3 | # 4 | # The Initial Developer of the Original Code is 5 | # Netscape Communications Corporation. 6 | # Portions created by the Initial Developer are Copyright (C) 1998 7 | # the Initial Developer. All Rights Reserved. 8 | # 9 | # Contributor(s): 10 | # Mark Pilgrim - port to Python 11 | # 12 | # This library is free software; you can redistribute it and/or 13 | # modify it under the terms of the GNU Lesser General Public 14 | # License as published by the Free Software Foundation; either 15 | # version 2.1 of the License, or (at your option) any later version. 16 | # 17 | # This library is distributed in the hope that it will be useful, 18 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 19 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 20 | # Lesser General Public License for more details. 21 | # 22 | # You should have received a copy of the GNU Lesser General Public 23 | # License along with this library; if not, write to the Free Software 24 | # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 25 | # 02110-1301 USA 26 | ######################### END LICENSE BLOCK ######################### 27 | 28 | from .constants import eStart 29 | from .compat import wrap_ord 30 | 31 | 32 | class CodingStateMachine: 33 | def __init__(self, sm): 34 | self._mModel = sm 35 | self._mCurrentBytePos = 0 36 | self._mCurrentCharLen = 0 37 | self.reset() 38 | 39 | def reset(self): 40 | self._mCurrentState = eStart 41 | 42 | def next_state(self, c): 43 | # for each byte we get its class 44 | # if it is first byte, we also get byte length 45 | # PY3K: aBuf is a byte stream, so c is an int, not a byte 46 | byteCls = self._mModel['classTable'][wrap_ord(c)] 47 | if self._mCurrentState == eStart: 48 | self._mCurrentBytePos = 0 49 | self._mCurrentCharLen = self._mModel['charLenTable'][byteCls] 50 | # from byte's class and stateTable, we get its next state 51 | curr_state = (self._mCurrentState * self._mModel['classFactor'] 52 | + byteCls) 53 | self._mCurrentState = self._mModel['stateTable'][curr_state] 54 | self._mCurrentBytePos += 1 55 | return self._mCurrentState 56 | 57 | def get_current_charlen(self): 58 | return self._mCurrentCharLen 59 | 60 | def get_coding_state_machine(self): 61 | return self._mModel['name'] 62 | -------------------------------------------------------------------------------- /lib/cssutils/scripts/csscapture.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | """Retrieve all CSS stylesheets including embedded for a given URL. 3 | Retrieve as StyleSheetList or save to disk - raw, parsed or minified version. 4 | 5 | TODO: 6 | - maybe use DOM 3 load/save? 7 | - logger class which handles all cases when no log is given... 8 | - saveto: why does urllib2 hang? 9 | """ 10 | __all__ = ['CSSCapture'] 11 | __docformat__ = 'restructuredtext' 12 | __version__ = '$Id$' 13 | 14 | from cssutils.script import CSSCapture 15 | import logging 16 | import optparse 17 | import sys 18 | 19 | def main(args=None): 20 | usage = "usage: %prog [options] URL" 21 | parser = optparse.OptionParser(usage=usage) 22 | parser.add_option('-d', '--debug', action='store_true', dest='debug', 23 | help='show debug messages during capturing') 24 | parser.add_option('-m', '--minified', action='store_true', dest='minified', 25 | help='saves minified version of captured files') 26 | parser.add_option('-n', '--notsave', action='store_true', dest='notsave', 27 | help='if given files are NOT saved, only log is written') 28 | # parser.add_option('-r', '--saveraw', action='store_true', dest='saveraw', 29 | # help='if given saves raw css otherwise cssutils\' parsed files') 30 | parser.add_option('-s', '--saveto', action='store', dest='saveto', 31 | help='saving retrieved files to "saveto", defaults to "_CSSCapture_SAVED"') 32 | parser.add_option('-u', '--useragent', action='store', dest='ua', 33 | help='useragent to use for request of URL, default is urllib2s default') 34 | options, url = parser.parse_args() 35 | 36 | # TODO: 37 | options.saveraw = False 38 | 39 | if not url: 40 | parser.error('no URL given') 41 | else: 42 | url = url[0] 43 | 44 | if options.debug: 45 | level = logging.DEBUG 46 | else: 47 | level = logging.INFO 48 | 49 | # START 50 | c = CSSCapture(ua=options.ua, defaultloglevel=level) 51 | 52 | stylesheetlist = c.capture(url) 53 | 54 | if options.notsave is None or not options.notsave: 55 | if options.saveto: 56 | saveto = options.saveto 57 | else: 58 | saveto = u'_CSSCapture_SAVED' 59 | c.saveto(saveto, saveraw=options.saveraw, minified=options.minified) 60 | else: 61 | for i, s in enumerate(stylesheetlist): 62 | print u'''%s. 63 | encoding: %r 64 | title: %r 65 | href: %r''' % (i + 1, s.encoding, s.title, s.href) 66 | 67 | 68 | if __name__ == "__main__": 69 | sys.exit(main()) 70 | -------------------------------------------------------------------------------- /lib/bs4/tests/test_lxml.py: -------------------------------------------------------------------------------- 1 | """Tests to ensure that the lxml tree builder generates good trees.""" 2 | 3 | import re 4 | import warnings 5 | 6 | try: 7 | import lxml.etree 8 | LXML_PRESENT = True 9 | LXML_VERSION = lxml.etree.LXML_VERSION 10 | except ImportError, e: 11 | LXML_PRESENT = False 12 | LXML_VERSION = (0,) 13 | 14 | if LXML_PRESENT: 15 | from bs4.builder import LXMLTreeBuilder, LXMLTreeBuilderForXML 16 | 17 | from bs4 import ( 18 | BeautifulSoup, 19 | BeautifulStoneSoup, 20 | ) 21 | from bs4.element import Comment, Doctype, SoupStrainer 22 | from bs4.testing import skipIf 23 | from bs4.tests import test_htmlparser 24 | from bs4.testing import ( 25 | HTMLTreeBuilderSmokeTest, 26 | XMLTreeBuilderSmokeTest, 27 | SoupTest, 28 | skipIf, 29 | ) 30 | 31 | @skipIf( 32 | not LXML_PRESENT, 33 | "lxml seems not to be present, not testing its tree builder.") 34 | class LXMLTreeBuilderSmokeTest(SoupTest, HTMLTreeBuilderSmokeTest): 35 | """See ``HTMLTreeBuilderSmokeTest``.""" 36 | 37 | @property 38 | def default_builder(self): 39 | return LXMLTreeBuilder() 40 | 41 | def test_out_of_range_entity(self): 42 | self.assertSoupEquals( 43 | "

foo�bar

", "

foobar

") 44 | self.assertSoupEquals( 45 | "

foo�bar

", "

foobar

") 46 | self.assertSoupEquals( 47 | "

foo�bar

", "

foobar

") 48 | 49 | # In lxml < 2.3.5, an empty doctype causes a segfault. Skip this 50 | # test if an old version of lxml is installed. 51 | 52 | @skipIf( 53 | not LXML_PRESENT or LXML_VERSION < (2,3,5,0), 54 | "Skipping doctype test for old version of lxml to avoid segfault.") 55 | def test_empty_doctype(self): 56 | soup = self.soup("") 57 | doctype = soup.contents[0] 58 | self.assertEqual("", doctype.strip()) 59 | 60 | def test_beautifulstonesoup_is_xml_parser(self): 61 | # Make sure that the deprecated BSS class uses an xml builder 62 | # if one is installed. 63 | with warnings.catch_warnings(record=True) as w: 64 | soup = BeautifulStoneSoup("") 65 | self.assertEqual(u"", unicode(soup.b)) 66 | self.assertTrue("BeautifulStoneSoup class is deprecated" in str(w[0].message)) 67 | 68 | @skipIf( 69 | not LXML_PRESENT, 70 | "lxml seems not to be present, not testing its XML tree builder.") 71 | class LXMLXMLTreeBuilderSmokeTest(SoupTest, XMLTreeBuilderSmokeTest): 72 | """See ``HTMLTreeBuilderSmokeTest``.""" 73 | 74 | @property 75 | def default_builder(self): 76 | return LXMLTreeBuilderForXML() 77 | -------------------------------------------------------------------------------- /apps/View/Setting.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding:utf-8 -*- 3 | #A GAE web application to aggregate rss and send it to your kindle. 4 | #Visit https://github.com/cdhigh/KindleEar for the latest version 5 | #Contributors: 6 | # rexdf 7 | 8 | import gettext 9 | 10 | import web 11 | 12 | from apps.BaseHandler import BaseHandler 13 | from apps.dbModels import * 14 | from apps.utils import etagged 15 | from config import * 16 | 17 | #import main 18 | 19 | class Setting(BaseHandler): 20 | __url__ = "/setting" 21 | @etagged() 22 | def GET(self, tips=None): 23 | user = self.getcurrentuser() 24 | return self.render('setting.html',"Setting", 25 | current='setting',user=user,mail_sender=SRC_EMAIL,tips=tips) 26 | 27 | def POST(self): 28 | user = self.getcurrentuser() 29 | kemail = web.input().get('kindleemail') 30 | mytitle = web.input().get("rt") 31 | if not kemail: 32 | tips = _("Kindle E-mail is requied!") 33 | elif not mytitle: 34 | tips = _("Title is requied!") 35 | else: 36 | user.kindle_email = kemail 37 | user.timezone = int(web.input().get('timezone', TIMEZONE)) 38 | user.send_time = int(web.input().get('sendtime')) 39 | user.enable_send = bool(web.input().get('enablesend')) 40 | user.book_type = web.input().get('booktype') 41 | user.device = web.input().get('devicetype') or 'kindle' 42 | user.use_title_in_feed = bool(web.input().get('titlefrom') == 'feed') 43 | user.titlefmt = web.input().get('titlefmt') 44 | alldays = ['Monday','Tuesday','Wednesday','Thursday','Friday','Saturday','Sunday'] 45 | user.send_days = [day for day in alldays if web.input().get(day)] 46 | user.merge_books = bool(web.input().get('mergebooks')) 47 | user.put() 48 | 49 | myfeeds = user.ownfeeds 50 | myfeeds.language = web.input().get("lng") 51 | myfeeds.title = mytitle 52 | myfeeds.keep_image = bool(web.input().get("keepimage")) 53 | myfeeds.oldest_article = int(web.input().get('oldest', 7)) 54 | myfeeds.users = [user.name] if web.input().get("enablerss") else [] 55 | myfeeds.put() 56 | tips = _("Settings Saved!") 57 | 58 | return self.GET(tips) 59 | 60 | class SetLang(BaseHandler): 61 | __url__ = "/lang/(.*)" 62 | def GET(self, lang): 63 | lang = lang.lower() 64 | if lang not in main.supported_languages: 65 | return "language invalid!" 66 | main.session.lang = lang 67 | raise web.seeother(r'/') -------------------------------------------------------------------------------- /books/Gongshi.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding:utf-8 -*- 3 | 4 | from bs4 import BeautifulSoup 5 | from base import BaseFeedBook, URLOpener 6 | 7 | 8 | def getBook(): 9 | return Gongshi 10 | 11 | 12 | class Gongshi(BaseFeedBook): 13 | title = u'共识网一周排行' 14 | description = u'共识网—在大变革时代寻找共识 | 每周六推送。' 15 | language = 'zh-cn' 16 | feed_encoding = "gbk" 17 | page_encoding = "gbk" 18 | mastheadfile = "mh_gongshi.gif" 19 | coverfile = 'cv_gongshi.jpg' 20 | deliver_days = ['Saturday'] 21 | 22 | def FetchDesc(self, url): 23 | opener = URLOpener(self.host, timeout=60) 24 | result = opener.open(url) 25 | if result.status_code != 200: 26 | self.log.warn('fetch article failed(%d):%s.' % (status_code, url)) 27 | return None 28 | content = result.content.decode(self.feed_encoding) 29 | soup = BeautifulSoup(content, 'lxml') 30 | abstract = unicode(soup.find('div', attrs={'class': 'zhaiyao'})) 31 | article = unicode(soup.find(id='contents')) 32 | pagelist = soup.find('ul', attrs={'class': 'pagelist'}) 33 | if pagelist and pagelist.find('li'): 34 | page_count_context = pagelist.a.text 35 | page_count = int( 36 | page_count_context[1:page_count_context.index(u'页')]) 37 | for i in range(2, page_count + 1): 38 | page_url = url[:-5] + "_%d.html" % i 39 | result = opener.open(page_url) 40 | if result.status_code != 200: 41 | self.log.warn( 42 | 'fetch page failed(%d):%s.' % (status_code, page_url)) 43 | return None 44 | content = result.content.decode(self.feed_encoding) 45 | pagesoup = BeautifulSoup(content, 'lxml') 46 | article += unicode(pagesoup.find(id='contents')) 47 | return abstract + article 48 | 49 | def ParseFeedUrls(self): 50 | mainurl = "http://www.21ccom.net/articles/china/" 51 | urls = [] 52 | opener = URLOpener(self.host, timeout=60) 53 | result = opener.open(mainurl) 54 | if result.status_code != 200: 55 | self.log.warn('fetch rss failed:%s' % mainurl) 56 | return [] 57 | content = result.content.decode(self.feed_encoding) 58 | soup = BeautifulSoup(content, "lxml") 59 | # Get the 2nd block 60 | ul = soup.find_all('ul', attrs={'class': ['m-list', 'list-tweet']})[1] 61 | for li in ul.find_all('li'): 62 | urls.append( 63 | (u'共识网一周排行', li.a.text, li.a['href'], self.FetchDesc(li.a['href']))) 64 | return urls 65 | -------------------------------------------------------------------------------- /lib/chardet/chardetect.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | """ 3 | Script which takes one or more file paths and reports on their detected 4 | encodings 5 | 6 | Example:: 7 | 8 | % chardetect somefile someotherfile 9 | somefile: windows-1252 with confidence 0.5 10 | someotherfile: ascii with confidence 1.0 11 | 12 | If no paths are provided, it takes its input from stdin. 13 | 14 | """ 15 | 16 | from __future__ import absolute_import, print_function, unicode_literals 17 | 18 | import argparse 19 | import sys 20 | from io import open 21 | 22 | from chardet import __version__ 23 | from chardet.universaldetector import UniversalDetector 24 | 25 | 26 | def description_of(lines, name='stdin'): 27 | """ 28 | Return a string describing the probable encoding of a file or 29 | list of strings. 30 | 31 | :param lines: The lines to get the encoding of. 32 | :type lines: Iterable of bytes 33 | :param name: Name of file or collection of lines 34 | :type name: str 35 | """ 36 | u = UniversalDetector() 37 | for line in lines: 38 | u.feed(line) 39 | u.close() 40 | result = u.result 41 | if result['encoding']: 42 | return '{0}: {1} with confidence {2}'.format(name, result['encoding'], 43 | result['confidence']) 44 | else: 45 | return '{0}: no result'.format(name) 46 | 47 | 48 | def main(argv=None): 49 | ''' 50 | Handles command line arguments and gets things started. 51 | 52 | :param argv: List of arguments, as if specified on the command-line. 53 | If None, ``sys.argv[1:]`` is used instead. 54 | :type argv: list of str 55 | ''' 56 | # Get command line arguments 57 | parser = argparse.ArgumentParser( 58 | description="Takes one or more file paths and reports their detected \ 59 | encodings", 60 | formatter_class=argparse.ArgumentDefaultsHelpFormatter, 61 | conflict_handler='resolve') 62 | parser.add_argument('input', 63 | help='File whose encoding we would like to determine.', 64 | type=argparse.FileType('rb'), nargs='*', 65 | default=[sys.stdin]) 66 | parser.add_argument('--version', action='version', 67 | version='%(prog)s {0}'.format(__version__)) 68 | args = parser.parse_args(argv) 69 | 70 | for f in args.input: 71 | if f.isatty(): 72 | print("You are running chardetect interactively. Press " + 73 | "CTRL-D twice at the start of a blank line to signal the " + 74 | "end of your input. If you want help, run chardetect " + 75 | "--help\n", file=sys.stderr) 76 | print(description_of(f, f.name)) 77 | 78 | 79 | if __name__ == '__main__': 80 | main() 81 | -------------------------------------------------------------------------------- /lib/cssutils/_fetchgae.py: -------------------------------------------------------------------------------- 1 | """GAE specific URL reading functions""" 2 | __all__ = ['_defaultFetcher'] 3 | __docformat__ = 'restructuredtext' 4 | __version__ = '$Id: tokenize2.py 1547 2008-12-10 20:42:26Z cthedot $' 5 | 6 | # raises ImportError of not on GAE 7 | from google.appengine.api import urlfetch 8 | import cgi 9 | import errorhandler 10 | import util 11 | 12 | log = errorhandler.ErrorHandler() 13 | 14 | def _defaultFetcher(url): 15 | """ 16 | uses GoogleAppEngine (GAE) 17 | fetch(url, payload=None, method=GET, headers={}, allow_truncated=False) 18 | 19 | Response 20 | content 21 | The body content of the response. 22 | content_was_truncated 23 | True if the allow_truncated parameter to fetch() was True and 24 | the response exceeded the maximum response size. In this case, 25 | the content attribute contains the truncated response. 26 | status_code 27 | The HTTP status code. 28 | headers 29 | The HTTP response headers, as a mapping of names to values. 30 | 31 | Exceptions 32 | exception InvalidURLError() 33 | The URL of the request was not a valid URL, or it used an 34 | unsupported method. Only http and https URLs are supported. 35 | exception DownloadError() 36 | There was an error retrieving the data. 37 | 38 | This exception is not raised if the server returns an HTTP 39 | error code: In that case, the response data comes back intact, 40 | including the error code. 41 | 42 | exception ResponseTooLargeError() 43 | The response data exceeded the maximum allowed size, and the 44 | allow_truncated parameter passed to fetch() was False. 45 | """ 46 | #from google.appengine.api import urlfetch 47 | try: 48 | r = urlfetch.fetch(url, method=urlfetch.GET) 49 | except urlfetch.Error, e: 50 | log.warn(u'Error opening url=%r: %s' % (url, e), 51 | error=IOError) 52 | else: 53 | if r.status_code == 200: 54 | # find mimetype and encoding 55 | mimetype = 'application/octet-stream' 56 | try: 57 | mimetype, params = cgi.parse_header(r.headers['content-type']) 58 | encoding = params['charset'] 59 | except KeyError: 60 | encoding = None 61 | if mimetype != u'text/css': 62 | log.error(u'Expected "text/css" mime type for url %r but found: %r' % 63 | (url, mimetype), error=ValueError) 64 | return encoding, r.content 65 | else: 66 | # TODO: 301 etc 67 | log.warn(u'Error opening url=%r: HTTP status %s' % 68 | (url, r.status_code), error=IOError) 69 | -------------------------------------------------------------------------------- /lib/web/wsgiserver/ssl_builtin.py: -------------------------------------------------------------------------------- 1 | """A library for integrating Python's builtin ``ssl`` library with CherryPy. 2 | 3 | The ssl module must be importable for SSL functionality. 4 | 5 | To use this module, set ``CherryPyWSGIServer.ssl_adapter`` to an instance of 6 | ``BuiltinSSLAdapter``. 7 | """ 8 | 9 | try: 10 | import ssl 11 | except ImportError: 12 | ssl = None 13 | 14 | from cherrypy import wsgiserver 15 | 16 | 17 | class BuiltinSSLAdapter(wsgiserver.SSLAdapter): 18 | """A wrapper for integrating Python's builtin ssl module with CherryPy.""" 19 | 20 | certificate = None 21 | """The filename of the server SSL certificate.""" 22 | 23 | private_key = None 24 | """The filename of the server's private key file.""" 25 | 26 | def __init__(self, certificate, private_key, certificate_chain=None): 27 | if ssl is None: 28 | raise ImportError("You must install the ssl module to use HTTPS.") 29 | self.certificate = certificate 30 | self.private_key = private_key 31 | self.certificate_chain = certificate_chain 32 | 33 | def bind(self, sock): 34 | """Wrap and return the given socket.""" 35 | return sock 36 | 37 | def wrap(self, sock): 38 | """Wrap and return the given socket, plus WSGI environ entries.""" 39 | try: 40 | s = ssl.wrap_socket(sock, do_handshake_on_connect=True, 41 | server_side=True, certfile=self.certificate, 42 | keyfile=self.private_key, ssl_version=ssl.PROTOCOL_SSLv23) 43 | except ssl.SSLError, e: 44 | if e.errno == ssl.SSL_ERROR_EOF: 45 | # This is almost certainly due to the cherrypy engine 46 | # 'pinging' the socket to assert it's connectable; 47 | # the 'ping' isn't SSL. 48 | return None, {} 49 | elif e.errno == ssl.SSL_ERROR_SSL: 50 | if e.args[1].endswith('http request'): 51 | # The client is speaking HTTP to an HTTPS server. 52 | raise wsgiserver.NoSSLError 53 | raise 54 | return s, self.get_environ(s) 55 | 56 | # TODO: fill this out more with mod ssl env 57 | def get_environ(self, sock): 58 | """Create WSGI environ entries to be merged into each request.""" 59 | cipher = sock.cipher() 60 | ssl_environ = { 61 | "wsgi.url_scheme": "https", 62 | "HTTPS": "on", 63 | 'SSL_PROTOCOL': cipher[1], 64 | 'SSL_CIPHER': cipher[0] 65 | ## SSL_VERSION_INTERFACE string The mod_ssl program version 66 | ## SSL_VERSION_LIBRARY string The OpenSSL program version 67 | } 68 | return ssl_environ 69 | 70 | def makefile(self, sock, mode='r', bufsize=-1): 71 | return wsgiserver.CP_fileobject(sock, mode, bufsize) 72 | 73 | -------------------------------------------------------------------------------- /books/Xueqiu.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding:utf-8 -*- 3 | # vim: tabstop=4 expandtab shiftwidth=4 softtabstop=4 4 | 5 | from base import BaseFeedBook 6 | import re, urllib 7 | from lib.urlopener import URLOpener 8 | from bs4 import BeautifulSoup 9 | import json 10 | from config import SHARE_FUCK_GFW_SRV 11 | 12 | __author__ = 'henryouly' 13 | 14 | def getBook(): 15 | return Xueqiu 16 | 17 | class Xueqiu(BaseFeedBook): 18 | title = u'雪球今日话题' 19 | description = u'雪球是一个社交投资网络,「今日话题」是雪球用户每日发布的投资交流精选。' 20 | language = 'zh-cn' 21 | feed_encoding = "utf-8" 22 | page_encoding = "utf-8" 23 | mastheadfile = "mh_xueqiu.gif" 24 | coverfile = "cv_xueqiu.jpg" 25 | oldest_article = 1 26 | fulltext_by_readability = False 27 | 28 | remove_tags = ['meta'] 29 | remove_attrs = ['xmlns'] 30 | 31 | feeds = [ (u'今日话题', SHARE_FUCK_GFW_SRV % urllib.quote('http://xueqiu.com/hots/topic/rss'), True) ] 32 | 33 | def url4forwarder(self, url): 34 | #生成经过转发器的URL 35 | return SHARE_FUCK_GFW_SRV % urllib.quote(url) 36 | 37 | def fetcharticle(self, url, opener, decoder): 38 | #链接网页获取一篇文章 39 | return BaseFeedBook.fetcharticle(self, self.url4forwarder(url), opener, decoder) 40 | 41 | def soupbeforeimage(self, soup): 42 | for img in soup.find_all('img'): 43 | imgurl = img['src'] if 'src' in img.attrs else '' 44 | if imgurl.startswith('http'): 45 | img['src'] = self.url4forwarder(imgurl) 46 | 47 | def postprocess(self, content): 48 | pn = re.compile(ur'本话题在雪球有.*?条讨论,点击查看。', re.I) 49 | comment = '' 50 | mt = pn.search(content) 51 | url = mt.group(1) if mt else None 52 | if url: 53 | opener = URLOpener(url, timeout=self.timeout) 54 | result = opener.open(url) 55 | if result.status_code == 200 and result.content: 56 | if self.feed_encoding: 57 | try: 58 | comment = result.content.decode(self.feed_encoding) 59 | except UnicodeDecodeError: 60 | return content 61 | 62 | pn = re.compile(r'SNB.data.goodComments\ =\ ({.*?});', re.S | re.I) 63 | mt = pn.search(comment) 64 | if mt: 65 | comment_json = mt.group(1) 66 | j = json.loads(comment_json) 67 | soup = BeautifulSoup(content, "lxml") 68 | for c in j['comments']: 69 | u = c['user']['screen_name'] 70 | t = BeautifulSoup('

@%s:%s

' % (u, c['text'])) 71 | for img in t.find_all('img', alt=True): 72 | img.replace_with(t.new_string(img['alt'])) 73 | soup.html.body.append(t.p) 74 | 75 | content = unicode(soup) 76 | return content 77 | -------------------------------------------------------------------------------- /lib/calibre/utils/img.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai 3 | 4 | __license__ = 'GPL v3' 5 | __copyright__ = '2010, Kovid Goyal ' 6 | __docformat__ = 'restructuredtext en' 7 | 8 | import os 9 | from PIL import Image 10 | 11 | from calibre.utils.imghdr import what 12 | from StringIO import StringIO 13 | 14 | def identify_data(data): 15 | ''' 16 | Identify the image in data. Returns a 3-tuple 17 | (width, height, format) 18 | or raises an Exception if data is not an image. 19 | ''' 20 | if not isinstance(data, StringIO): 21 | data = StringIO(data) 22 | img = Image.open(data) 23 | width, height = img.size 24 | fmt = img.format 25 | return (width, height, fmt) 26 | 27 | 28 | def rescale_image(data, maxsizeb=4000000, dimen=None, 29 | png2jpg=False, graying=True, reduceto=(600,800)): 30 | ''' 31 | Convert image setting all transparent pixels to white and changing format 32 | to JPEG. Ensure the resultant image has a byte size less than 33 | maxsizeb. 34 | 35 | If dimen is not None, generate a thumbnail of 36 | width=dimen, height=dimen or width, height = dimen (depending on the type 37 | of dimen) 38 | 39 | Returns the image as a bytestring. 40 | ''' 41 | if not isinstance(data, StringIO): 42 | data = StringIO(data) 43 | img = Image.open(data) 44 | width, height = img.size 45 | fmt = img.format 46 | if graying and img.mode != "L": 47 | img = img.convert("L") 48 | 49 | reducewidth, reduceheight = reduceto 50 | 51 | if dimen is not None: 52 | if hasattr(dimen, '__len__'): 53 | width, height = dimen 54 | else: 55 | width = height = dimen 56 | img.thumbnail((width, height)) 57 | if png2jpg and fmt == 'PNG': 58 | fmt = 'JPEG' 59 | data = StringIO() 60 | img.save(data, fmt) 61 | elif width > reducewidth or height > reduceheight: 62 | ratio = min(float(reducewidth)/float(width), float(reduceheight)/float(height)) 63 | img = img.resize((int(width*ratio), int(height*ratio))) 64 | if png2jpg and fmt == 'PNG': 65 | fmt = 'JPEG' 66 | data = StringIO() 67 | img.save(data, fmt) 68 | elif png2jpg and fmt == 'PNG': 69 | data = StringIO() 70 | img.save(data, 'JPEG') 71 | else: 72 | data = StringIO() 73 | img.save(data, fmt) 74 | 75 | return data.getvalue() 76 | 77 | def mobify_image(data): 78 | 'Convert PNG images to GIF as the idiotic Kindle cannot display some PNG' 79 | fmt = what(None, data) 80 | 81 | if fmt == 'png': 82 | if not isinstance(data, StringIO): 83 | data = StringIO(data) 84 | im = Image.open(data) 85 | data = StringIO() 86 | im.save(data, 'GIF') 87 | data = data.getvalue() 88 | return data 89 | -------------------------------------------------------------------------------- /lib/dateutil/zoneinfo/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | Copyright (c) 2003-2005 Gustavo Niemeyer 3 | 4 | This module offers extensions to the standard python 2.3+ 5 | datetime module. 6 | """ 7 | from dateutil.tz import tzfile 8 | from tarfile import TarFile 9 | import os 10 | 11 | __author__ = "Gustavo Niemeyer " 12 | __license__ = "PSF License" 13 | 14 | __all__ = ["setcachesize", "gettz", "rebuild"] 15 | 16 | CACHE = [] 17 | CACHESIZE = 10 18 | 19 | class tzfile(tzfile): 20 | def __reduce__(self): 21 | return (gettz, (self._filename,)) 22 | 23 | def getzoneinfofile(): 24 | filenames = os.listdir(os.path.join(os.path.dirname(__file__))) 25 | filenames.sort() 26 | filenames.reverse() 27 | for entry in filenames: 28 | if entry.startswith("zoneinfo") and ".tar." in entry: 29 | return os.path.join(os.path.dirname(__file__), entry) 30 | return None 31 | 32 | ZONEINFOFILE = getzoneinfofile() 33 | 34 | del getzoneinfofile 35 | 36 | def setcachesize(size): 37 | global CACHESIZE, CACHE 38 | CACHESIZE = size 39 | del CACHE[size:] 40 | 41 | def gettz(name): 42 | tzinfo = None 43 | if ZONEINFOFILE: 44 | for cachedname, tzinfo in CACHE: 45 | if cachedname == name: 46 | break 47 | else: 48 | tf = TarFile.open(ZONEINFOFILE) 49 | try: 50 | zonefile = tf.extractfile(name) 51 | except KeyError: 52 | tzinfo = None 53 | else: 54 | tzinfo = tzfile(zonefile) 55 | tf.close() 56 | CACHE.insert(0, (name, tzinfo)) 57 | del CACHE[CACHESIZE:] 58 | return tzinfo 59 | 60 | def rebuild(filename, tag=None, format="gz"): 61 | import tempfile, shutil 62 | tmpdir = tempfile.mkdtemp() 63 | zonedir = os.path.join(tmpdir, "zoneinfo") 64 | moduledir = os.path.dirname(__file__) 65 | if tag: tag = "-"+tag 66 | targetname = "zoneinfo%s.tar.%s" % (tag, format) 67 | try: 68 | tf = TarFile.open(filename) 69 | for name in tf.getnames(): 70 | if not (name.endswith(".sh") or 71 | name.endswith(".tab") or 72 | name == "leapseconds"): 73 | tf.extract(name, tmpdir) 74 | filepath = os.path.join(tmpdir, name) 75 | os.system("zic -d %s %s" % (zonedir, filepath)) 76 | tf.close() 77 | target = os.path.join(moduledir, targetname) 78 | for entry in os.listdir(moduledir): 79 | if entry.startswith("zoneinfo") and ".tar." in entry: 80 | os.unlink(os.path.join(moduledir, entry)) 81 | tf = TarFile.open(target, "w:%s" % format) 82 | for entry in os.listdir(zonedir): 83 | entrypath = os.path.join(zonedir, entry) 84 | tf.add(entrypath, entry) 85 | tf.close() 86 | finally: 87 | shutil.rmtree(tmpdir) 88 | -------------------------------------------------------------------------------- /lib/chardet/utf8prober.py: -------------------------------------------------------------------------------- 1 | ######################## BEGIN LICENSE BLOCK ######################## 2 | # The Original Code is mozilla.org code. 3 | # 4 | # The Initial Developer of the Original Code is 5 | # Netscape Communications Corporation. 6 | # Portions created by the Initial Developer are Copyright (C) 1998 7 | # the Initial Developer. All Rights Reserved. 8 | # 9 | # Contributor(s): 10 | # Mark Pilgrim - port to Python 11 | # 12 | # This library is free software; you can redistribute it and/or 13 | # modify it under the terms of the GNU Lesser General Public 14 | # License as published by the Free Software Foundation; either 15 | # version 2.1 of the License, or (at your option) any later version. 16 | # 17 | # This library is distributed in the hope that it will be useful, 18 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 19 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 20 | # Lesser General Public License for more details. 21 | # 22 | # You should have received a copy of the GNU Lesser General Public 23 | # License along with this library; if not, write to the Free Software 24 | # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 25 | # 02110-1301 USA 26 | ######################### END LICENSE BLOCK ######################### 27 | 28 | from . import constants 29 | from .charsetprober import CharSetProber 30 | from .codingstatemachine import CodingStateMachine 31 | from .mbcssm import UTF8SMModel 32 | 33 | ONE_CHAR_PROB = 0.5 34 | 35 | 36 | class UTF8Prober(CharSetProber): 37 | def __init__(self): 38 | CharSetProber.__init__(self) 39 | self._mCodingSM = CodingStateMachine(UTF8SMModel) 40 | self.reset() 41 | 42 | def reset(self): 43 | CharSetProber.reset(self) 44 | self._mCodingSM.reset() 45 | self._mNumOfMBChar = 0 46 | 47 | def get_charset_name(self): 48 | return "utf-8" 49 | 50 | def feed(self, aBuf): 51 | for c in aBuf: 52 | codingState = self._mCodingSM.next_state(c) 53 | if codingState == constants.eError: 54 | self._mState = constants.eNotMe 55 | break 56 | elif codingState == constants.eItsMe: 57 | self._mState = constants.eFoundIt 58 | break 59 | elif codingState == constants.eStart: 60 | if self._mCodingSM.get_current_charlen() >= 2: 61 | self._mNumOfMBChar += 1 62 | 63 | if self.get_state() == constants.eDetecting: 64 | if self.get_confidence() > constants.SHORTCUT_THRESHOLD: 65 | self._mState = constants.eFoundIt 66 | 67 | return self.get_state() 68 | 69 | def get_confidence(self): 70 | unlike = 0.99 71 | if self._mNumOfMBChar < 6: 72 | for i in range(0, self._mNumOfMBChar): 73 | unlike = unlike * ONE_CHAR_PROB 74 | return 1.0 - unlike 75 | else: 76 | return unlike 77 | -------------------------------------------------------------------------------- /books/FolhaDeSaopaulo.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding:utf-8 -*- 3 | 4 | import re 5 | from base import BaseFeedBook 6 | 7 | def getBook(): 8 | return FolhaDeSaopaulo 9 | 10 | class FolhaDeSaopaulo(BaseFeedBook): 11 | title = 'Folha' 12 | description = 'Folha de Sao paulo' 13 | language = 'pt-br' 14 | feed_encoding = "ISO-8859-1" 15 | page_encoding = "ISO-8859-1" 16 | mastheadfile = "mh_folha.gif" 17 | coverfile = 'cv_folha.jpg' 18 | oldest_article = 1 19 | fulltext_by_readability = False 20 | fulltext_by_instapaper = False 21 | host = r'http://www.folha.uol.com.br/' 22 | keep_only_tags = [dict(name='article', attrs={'class':'news'})] 23 | remove_classes = ['toolbar','advertising'] 24 | 25 | feeds = [ 26 | (u'Em cima da hora', u'http://feeds.folha.uol.com.br/emcimadahora/rss091.xml'), 27 | (u'Cotidiano', u'http://feeds.folha.uol.com.br/folha/cotidiano/rss091.xml'), 28 | (u'Brasil', u'http://feeds.folha.uol.com.br/folha/brasil/rss091.xml'), 29 | (u'Mundo', u'http://feeds.folha.uol.com.br/mundo/rss091.xml'), 30 | (u'Poder', u'http://feeds.folha.uol.com.br/poder/rss091.xml'), 31 | (u'Mercado', u'http://feeds.folha.uol.com.br/folha/dinheiro/rss091.xml'), 32 | (u'Saber', u'http://feeds.folha.uol.com.br/folha/educacao/rss091.xml'), 33 | (u'Tec', u'http://feeds.folha.uol.com.br/folha/informatica/rss091.xml'), 34 | (u'Ilustrada', u'http://feeds.folha.uol.com.br/folha/ilustrada/rss091.xml'), 35 | #(u'Ambiente', u'http://feeds.folha.uol.com.br/ambiente/rss091.xml'), 36 | #(u'Bichos', u'http://feeds.folha.uol.com.br/bichos/rss091.xml'), 37 | (u'Ciencia', u'http://feeds.folha.uol.com.br/ciencia/rss091.xml'), 38 | (u'Equilibrio e Saude', u'http://feeds.folha.uol.com.br/equilibrioesaude/rss091.xml'), 39 | #(u'Turismo', u'http://feeds.folha.uol.com.br/folha/turismo/rss091.xml'), 40 | #(u'Esporte', u'http://feeds.folha.uol.com.br/folha/esporte/rss091.xml'), 41 | ] 42 | 43 | #def fetcharticle(self, url, opener, decoder): 44 | # url = 'http://tools.folha.com.br/print?url=' + url 45 | # return BaseFeedBook.fetcharticle(self, url, opener, decoder) 46 | 47 | def processtitle(self, title): 48 | pn1 = re.compile(r'^(.*?) - \d\d/\d\d/\d\d\d\d - .*? - (Folha de S\.Paulo|F5)$', re.I) 49 | pn2 = re.compile(r'^Folha de S\.Paulo - .*? - .*? - (.*?) - \d\d/\d\d/\d\d\d\d$', re.I) 50 | mt1 = pn1.match(title) 51 | if mt1: 52 | return mt1.group(1) 53 | else: 54 | mt2 = pn2.match(title) 55 | if mt2: 56 | return mt2.group(1) 57 | 58 | if title.endswith('Folha de S.Paulo'): 59 | title = title.replace('Folha de S.Paulo', '') 60 | 61 | return title 62 | -------------------------------------------------------------------------------- /lib/calibre/ebooks/mobi/writer8/header.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai 3 | from __future__ import (unicode_literals, division, absolute_import, 4 | print_function) 5 | 6 | __license__ = 'GPL v3' 7 | __copyright__ = '2012, Kovid Goyal ' 8 | __docformat__ = 'restructuredtext en' 9 | 10 | import random 11 | from io import BytesIO 12 | from collections import OrderedDict 13 | from struct import pack 14 | 15 | from calibre.ebooks.mobi.utils import align_block 16 | 17 | NULL = 0xffffffff 18 | zeroes = lambda x: b'\0'*x 19 | nulls = lambda x: b'\xff'*x 20 | short = lambda x: pack(b'>H', x) 21 | 22 | class Header(OrderedDict): 23 | 24 | HEADER_NAME = b'' 25 | 26 | DEFINITION = ''' 27 | ''' 28 | 29 | ALIGN_BLOCK = False 30 | POSITIONS = {} # Mapping of position field to field whose position should 31 | # be stored in the position field 32 | SHORT_FIELDS = set() 33 | 34 | def __init__(self): 35 | OrderedDict.__init__(self) 36 | 37 | for line in self.DEFINITION.splitlines(): 38 | line = line.strip() 39 | if not line or line.startswith('#'): 40 | continue 41 | name, val = [x.strip() for x in line.partition('=')[0::2]] 42 | if val: 43 | val = eval(val, {'zeroes':zeroes, 'NULL':NULL, 'DYN':None, 44 | 'nulls':nulls, 'short':short, 'random':random}) 45 | else: 46 | val = 0 47 | if name in self: 48 | raise ValueError('Duplicate field in definition: %r'%name) 49 | self[name] = val 50 | 51 | @property 52 | def dynamic_fields(self): 53 | return tuple(k for k, v in self.iteritems() if v is None) 54 | 55 | def __call__(self, **kwargs): 56 | positions = {} 57 | for name, val in kwargs.iteritems(): 58 | if name not in self: 59 | raise KeyError('Not a valid header field: %r'%name) 60 | self[name] = val 61 | 62 | buf = BytesIO() 63 | buf.write(bytes(self.HEADER_NAME)) 64 | for name, val in self.iteritems(): 65 | val = self.format_value(name, val) 66 | positions[name] = buf.tell() 67 | if val is None: 68 | raise ValueError('Dynamic field %r not set'%name) 69 | if isinstance(val, (int, long)): 70 | fmt = b'H' if name in self.SHORT_FIELDS else b'I' 71 | val = pack(b'>'+fmt, val) 72 | buf.write(val) 73 | 74 | for pos_field, field in self.POSITIONS.iteritems(): 75 | buf.seek(positions[pos_field]) 76 | buf.write(pack(b'>I', positions[field])) 77 | 78 | ans = buf.getvalue() 79 | if self.ALIGN_BLOCK: 80 | ans = align_block(ans) 81 | return ans 82 | 83 | def format_value(self, name, val): 84 | return val 85 | 86 | 87 | 88 | -------------------------------------------------------------------------------- /books/wsj.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding:utf-8 -*- 3 | import re 4 | from base import BaseFeedBook 5 | 6 | def getBook(): 7 | return WSJ 8 | 9 | class WSJ(BaseFeedBook): 10 | title = u'华尔街日报' 11 | description = u'每天最重要的商业财经要闻及金融市场综述' 12 | language = 'zh-cn' 13 | feed_encoding = "utf-8" 14 | page_encoding = "GBK" 15 | mastheadfile = "mh_wsj.gif" 16 | coverfile = 'cv_wsj.jpg' 17 | oldest_article = 1 18 | network_timeout = 60 19 | fulltext_by_readability = False 20 | fulltext_by_instapaper = False 21 | host = r'http://cn.wsj.com/gb/' 22 | feeds = [ 23 | (u'要闻','http://cn.wsj.com.feedsportal.com/c/33121/f/538760/index.rss'), 24 | ] 25 | keep_only_tags = [dict(name='div', attrs={'id':'A'}),] 26 | 27 | def fetcharticle(self, url, opener, decoder): 28 | result = opener.open(url) 29 | status_code, content = result.status_code, result.content 30 | if status_code != 200 or not content: 31 | self.log.warn('fetch article failed(%d):%s.' % (status_code,url)) 32 | return None 33 | 34 | if self.page_encoding: 35 | try: 36 | content = content.decode('utf-8') 37 | except UnicodeDecodeError: 38 | content = decoder.decode(content,url,result.headers) 39 | else: 40 | content = decoder.decode(content,url,result.headers) 41 | 42 | m = re.search(r'', content) 43 | if m: 44 | newurl = m.group(1) 45 | result = opener.open(newurl) 46 | status_code, content = result.status_code, result.content 47 | if status_code != 200 or not content: 48 | self.log.warn('fetch article failed(%d):%s.' % (status_code,newurl)) 49 | return None 50 | 51 | if self.page_encoding: 52 | try: 53 | content = content.decode(self.page_encoding) 54 | except UnicodeDecodeError: 55 | content = decoder.decode(content,newurl,result.headers) 56 | else: 57 | content = decoder.decode(content,newurl,result.headers) 58 | 59 | return content 60 | 61 | def processtitle(self, title): 62 | title = BaseFeedBook.processtitle(self,title) 63 | if title.endswith(u'-华尔街日报'): 64 | return title.replace(u'-华尔街日报','') 65 | else: 66 | return title 67 | 68 | def soupprocessex(self, soup): 69 | ' 将首字div变成b ' 70 | content = soup.find('div',attrs={'id':'A'}) 71 | if content: 72 | firstdiv = content.find('div') 73 | if firstdiv and firstdiv.string and len(firstdiv.string) == 1: 74 | b = soup.new_tag('b') 75 | b.string = firstdiv.string 76 | firstdiv.replace_with(b) 77 | -------------------------------------------------------------------------------- /apps/module_front.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding:utf-8 -*- 3 | #A GAE web application to aggregate rss and send it to your kindle. 4 | #Visit https://github.com/cdhigh/KindleEar for the latest version 5 | #Author: 6 | # cdhigh 7 | #Contributors: 8 | # rexdf 9 | 10 | __Author__ = "cdhigh" 11 | 12 | import os, datetime, logging, __builtin__, hashlib, time 13 | 14 | # for debug 15 | # 本地启动调试服务器:python.exe dev_appserver.py c:\kindleear 16 | IsRunInLocal = (os.environ.get('SERVER_SOFTWARE', '').startswith('Development')) 17 | log = logging.getLogger() 18 | __builtin__.__dict__['default_log'] = log 19 | __builtin__.__dict__['IsRunInLocal'] = IsRunInLocal 20 | 21 | supported_languages = ['en','zh-cn','tr-tr'] #不支持的语种则使用第一个语言 22 | #gettext.install('lang', 'i18n', unicode=True) #for calibre startup 23 | 24 | class Main_Var: 25 | urls = [] 26 | session = None 27 | jjenv = None 28 | supported_languages = None 29 | log = None 30 | __Version__ = None 31 | 32 | __builtin__.__dict__['main'] = Main_Var 33 | main.supported_languages = supported_languages 34 | main.log = log 35 | main.__Version__ = __Version__ 36 | log.setLevel(logging.INFO if IsRunInLocal else logging.WARN) 37 | 38 | import web 39 | import jinja2 40 | #from google.appengine.api import mail 41 | #from google.appengine.api import taskqueue 42 | from google.appengine.api import memcache 43 | 44 | from lib.memcachestore import MemcacheStore 45 | from books import BookClasses 46 | 47 | from apps.View import * 48 | 49 | from apps.dbModels import Book 50 | from apps.BaseHandler import BaseHandler 51 | from apps.utils import fix_filesizeformat 52 | 53 | #reload(sys) 54 | #sys.setdefaultencoding('utf-8') 55 | 56 | for book in BookClasses(): #添加内置书籍 57 | if memcache.get(book.title): #使用memcache加速 58 | continue 59 | b = Book.all().filter("title = ", book.title).get() 60 | if not b: 61 | b = Book(title=book.title, description=book.description, builtin=True, 62 | needs_subscription=book.needs_subscription, separate=False) 63 | b.put() 64 | memcache.add(book.title, book.description, 86400) 65 | 66 | class Test(BaseHandler): 67 | def GET(self): 68 | s = '' 69 | for d in os.environ: 70 | s += "

" + str(d).rjust(28) + " | " + str(os.environ[d]) + "

" 71 | return s 72 | 73 | main.urls += ["/test", "Test",] 74 | 75 | application = web.application(main.urls, globals()) 76 | store = MemcacheStore(memcache) 77 | session = web.session.Session(application, store, initializer={'username':'', 'login':0, 'lang':'', 'pocket_request_token':''}) 78 | jjenv = jinja2.Environment(loader=jinja2.FileSystemLoader('templates'), 79 | extensions=["jinja2.ext.do",'jinja2.ext.i18n']) 80 | jjenv.filters['filesizeformat'] = fix_filesizeformat 81 | 82 | app = application.wsgifunc() 83 | 84 | web.config.debug = IsRunInLocal 85 | 86 | main.session = session 87 | main.jjenv = jjenv -------------------------------------------------------------------------------- /lib/dateutil/easter.py: -------------------------------------------------------------------------------- 1 | """ 2 | Copyright (c) 2003-2007 Gustavo Niemeyer 3 | 4 | This module offers extensions to the standard python 2.3+ 5 | datetime module. 6 | """ 7 | __author__ = "Gustavo Niemeyer " 8 | __license__ = "PSF License" 9 | 10 | import datetime 11 | 12 | __all__ = ["easter", "EASTER_JULIAN", "EASTER_ORTHODOX", "EASTER_WESTERN"] 13 | 14 | EASTER_JULIAN = 1 15 | EASTER_ORTHODOX = 2 16 | EASTER_WESTERN = 3 17 | 18 | def easter(year, method=EASTER_WESTERN): 19 | """ 20 | This method was ported from the work done by GM Arts, 21 | on top of the algorithm by Claus Tondering, which was 22 | based in part on the algorithm of Ouding (1940), as 23 | quoted in "Explanatory Supplement to the Astronomical 24 | Almanac", P. Kenneth Seidelmann, editor. 25 | 26 | This algorithm implements three different easter 27 | calculation methods: 28 | 29 | 1 - Original calculation in Julian calendar, valid in 30 | dates after 326 AD 31 | 2 - Original method, with date converted to Gregorian 32 | calendar, valid in years 1583 to 4099 33 | 3 - Revised method, in Gregorian calendar, valid in 34 | years 1583 to 4099 as well 35 | 36 | These methods are represented by the constants: 37 | 38 | EASTER_JULIAN = 1 39 | EASTER_ORTHODOX = 2 40 | EASTER_WESTERN = 3 41 | 42 | The default method is method 3. 43 | 44 | More about the algorithm may be found at: 45 | 46 | http://users.chariot.net.au/~gmarts/eastalg.htm 47 | 48 | and 49 | 50 | http://www.tondering.dk/claus/calendar.html 51 | 52 | """ 53 | 54 | if not (1 <= method <= 3): 55 | raise ValueError, "invalid method" 56 | 57 | # g - Golden year - 1 58 | # c - Century 59 | # h - (23 - Epact) mod 30 60 | # i - Number of days from March 21 to Paschal Full Moon 61 | # j - Weekday for PFM (0=Sunday, etc) 62 | # p - Number of days from March 21 to Sunday on or before PFM 63 | # (-6 to 28 methods 1 & 3, to 56 for method 2) 64 | # e - Extra days to add for method 2 (converting Julian 65 | # date to Gregorian date) 66 | 67 | y = year 68 | g = y % 19 69 | e = 0 70 | if method < 3: 71 | # Old method 72 | i = (19*g+15)%30 73 | j = (y+y//4+i)%7 74 | if method == 2: 75 | # Extra dates to convert Julian to Gregorian date 76 | e = 10 77 | if y > 1600: 78 | e = e+y//100-16-(y//100-16)//4 79 | else: 80 | # New method 81 | c = y//100 82 | h = (c-c//4-(8*c+13)//25+19*g+15)%30 83 | i = h-(h//28)*(1-(h//28)*(29//(h+1))*((21-g)//11)) 84 | j = (y+y//4+i+2-c+c//4)%7 85 | 86 | # p can be from -6 to 56 corresponding to dates 22 March to 23 May 87 | # (later dates apply to method 2, although 23 May never actually occurs) 88 | p = i-j+e 89 | d = 1+(p+27+(p+6)//40)%31 90 | m = 3+(p+26)//30 91 | return datetime.date(int(y),int(m),int(d)) 92 | 93 | -------------------------------------------------------------------------------- /templates/admin.html: -------------------------------------------------------------------------------- 1 | {% extends "base.html" %} 2 | {% block css -%} 3 | 10 | {% endblock -%} 11 | {% block content -%} 12 |
13 |
14 | {{_("Change Password")}} 15 | {% if chpwdtips -%} 16 |

{{chpwdtips}}

17 | {% endif -%} 18 |
19 | 20 | 21 |
22 |
23 | 24 | 25 |
26 |
27 | 28 | 29 |
30 |
31 | 32 |
33 |
34 |
35 | {% if nickname == 'admin' -%} 36 |
37 |
38 | {{_("Add Account")}} 39 | {% if actips -%} 40 |

{{actips}}

41 | {% endif -%} 42 |

{{_("Note : No supports many accounts for limit of free account of GAE.")}}

43 |
44 | 45 | 46 |
47 |
48 | 49 | 50 |
51 |
52 | 53 | 54 |
55 |
56 | 57 |
58 |
59 |
60 |

{{_("Accounts")}}

61 | 62 | 63 | 64 | 65 | 66 | 67 | 68 | 69 | {% for u in users -%} 70 | 71 | 72 | 73 | 74 | 75 | 76 | 77 | {% endfor -%} 78 | 79 |
{{_("No.")}}{{_("Username")}}{{_("Enable")}}{{_("Operation")}}
{{loop.index}}{{u.name}}{{u.enable_send}}{{_("Change")}}{{_("Delete")}}
80 | {% endif -%} 81 | {% endblock -%} 82 | -------------------------------------------------------------------------------- /books/Qiushibaike.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding:utf-8 -*- 3 | import re 4 | from base import * 5 | 6 | def getBook(): 7 | return Qiushibaike 8 | 9 | class Qiushibaike(BaseFeedBook): 10 | title = u'糗事百科' 11 | description = u'快乐就是要建立在别人的痛苦之上,额外赠送哈哈.MX' 12 | language = 'zh-cn' 13 | feed_encoding = "utf-8" 14 | page_encoding = "utf-8" 15 | mastheadfile = "mh_qiushibaike.gif" 16 | coverfile = "cv_qiushibaike.jpg" 17 | network_timeout = 30 18 | keep_only_tags = [dict(name='div', attrs={'class':'main'}),] # qiushibaike 19 | #dict(name='div',attrs={'class':'block joke-item'}), # haha.mx 20 | # ] 21 | remove_tags = [] 22 | remove_ids = ['bdshare',] 23 | remove_classes = ['sharebox','comment','share','up','down', #qiushibaike 24 | 'backtop','close','author','col2','sponsor','pagebar', #qiushibaike 25 | 'seconday-nav fl','toolkit fr','fr','info clearfix', # haha.mx 26 | 'joke-item-footer','pagination','pos-ab','praise-box',] # haha.mx 27 | remove_attrs = [] 28 | 29 | feeds = [ 30 | #(u'8小时最热', r'http://www.qiushibaike.com'), 31 | (u'24小时 Page1', r'http://www.qiushibaike.com/hot'), 32 | (u'24小时 Page2', r'http://www.qiushibaike.com/hot/page/2'), 33 | #(u'哈哈MX', r'http://www.haha.mx/'), 34 | (u'哈哈.MX Page1', r'http://www.haha.mx/good/day'), 35 | (u'哈哈.MX Page2', r'http://www.haha.mx/good/day/2'), 36 | ] 37 | 38 | def processtitle(self, title): 39 | title = re.sub(r'(\n)+', ' ', title) 40 | title = title.replace(u' :: 糗事百科 :: 快乐减压 健康生活', u'') 41 | return title.replace(u'——分享所有好笑的事情', u'') 42 | 43 | def soupbeforeimage(self, soup): 44 | if soup.html.head.title.string.find(u'哈哈') > 0: 45 | for img in list(soup.find_all('img')): #HAHA.MX切换为大图链接 46 | src = img['src'] 47 | if src.find(r'/small/') > 0: 48 | img['src'] = src.replace(r'/small/', r'/big/') 49 | 50 | def soupprocessex(self, soup): 51 | if u'小时' in soup.html.head.title.string: #qiushibaike 52 | for article in soup.find_all("a", attrs={"href":re.compile(r'^/article')}): 53 | p = soup.new_tag("p", style='color:grey;text-decoration:underline;') 54 | p.string = string_of_tag(article.string) 55 | article.replace_with(p) 56 | 57 | first = True 58 | for detail in soup.find_all("div", attrs={"class":"content"}): 59 | if not first: 60 | hr = soup.new_tag("hr") 61 | detail.insert(0, hr) 62 | first = False 63 | 64 | if soup.html.head.title.string.startswith(u'哈哈'): #haha.mx 65 | first = True 66 | for item in soup.find_all("div", attrs={"class":"block joke-item"}): 67 | if not first: 68 | hr = soup.new_tag("hr") 69 | item.insert(0, hr) 70 | first = False 71 | -------------------------------------------------------------------------------- /readme_EN.md: -------------------------------------------------------------------------------- 1 | #Brief Introduction 2 | KindleEar is a web application to aggregate RSS for generating periodical mobi/epub file with images and send it to your kindle or your email automatically. 3 | 4 | ## The features included: 5 | * Support calibre-like recipe file to aggress unlimited RSS or webpage. 6 | * Support custom RSS, only title/url are needed, don't need to program. 7 | * With account management, support several kindles. 8 | * Generate periodical mobi/epub file with images. 9 | * Deliver news feeds to your kindle dialy automatically. 10 | * Website support multi-languages. 11 | * Powerful and convenient mail-transfering service. 12 | * Integration with Evernote/Pocket/Instapaper. 13 | 14 | #Deployment 15 | 1. [Create a Google account](https://accounts.google.com/SignUp) and [Turn on Access for less secure apps](https://www.google.com/settings/security/lesssecureapps). 16 | 17 | 2. [Create an application](https://console.developers.google.com/project). 18 | 19 | 3. Install [Python 2.7.x](https://www.python.org/downloads/). 20 | 21 | 4. Install [GAE SDK](https://cloud.google.com/appengine/downloads). 22 | 23 | 5. [Download KindleEar](https://github.com/cdhigh/KindleEar/archive/master.zip) and uncompress it into a directory for example: *c:\kindleear*. 24 | 25 | 6. Modify some variable in app.yaml/module-worker.yaml/config.py. 26 | 27 | File | To be changed | Description | 28 | -------------------|-------------|-----------------------| 29 | app.yaml | application | Your Application Id | 30 | module-worker.yaml | application | Your Application Id | 31 | config.py | SRC_EMAIL | Your Gmail Address | 32 | config.py | DOMAIN | appid@appspot.com | 33 | config.py | TIMEZONE | Your timezone | 34 | 35 | 36 | 7. Execute two commands in directory GAE SDK (default is *C:\Program Files\Google\google_appengine*) 37 | * `c:\python27\python.exe appcfg.py update KindleEarFolder\app.yaml KindleEarFolder\module-worker.yaml` 38 | * `c:\python27\python.exe appcfg.py update KindleEarFolder` 39 | 40 | 8. After finished, you can open the website *'http://appid.appspot.com'* (appid is the name of your application), 41 | For example the author's site: 42 | **The initial username is 'admin', password is 'admin', please change the password immediately after first login.** 43 | 44 | 9. More details could be found in [FAQ](http://htmlpreview.github.io/?https://github.com/cdhigh/KindleEar/blob/master/static/faq.html). 45 | 46 | #Deployment simplified 47 | If you don't want to intall GAE SDK and python, you have another choice. 48 | 49 | 1. [Download KindleEar](https://github.com/cdhigh/KindleEar/archive/master.zip) and uncompress it (Change the name of folder to 'KindleEar'). 50 | 2. [Download KindleEar-Uploader](https://drive.google.com/folderview?id=0ByRickMo9V_XNlJITzhYM3JOYW8&usp=sharing) and unzip it. 51 | 3. Put KindleEar folder into Uploader directory, double-click uploader.bat to start process of deployment. 52 | 53 | #License 54 | KindleEar is Licensed under the [AGPLv3](http://www.gnu.org/licenses/agpl-3.0.html) license. 55 | -------------------------------------------------------------------------------- /lib/calibre/ebooks/unihandecode/pykakasi/j2h.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # j2h.py 3 | # 4 | # Copyright 2011 Hiroshi Miura 5 | # 6 | # Original Copyright: 7 | # * KAKASI (Kanji Kana Simple inversion program) 8 | # * $Id: jj2.c,v 1.7 2001-04-12 05:57:34 rug Exp $ 9 | # * Copyright (C) 1992 10 | # * Hironobu Takahashi (takahasi@tiny.or.jp) 11 | # * 12 | # * This program is free software; you can redistribute it and/or modify 13 | # * it under the terms of the GNU General Public License as published by 14 | # * the Free Software Foundation; either versions 2, or (at your option) 15 | # * any later version. 16 | # * 17 | # * This program is distributed in the hope that it will be useful 18 | # * but WITHOUT ANY WARRANTY; without even the implied warranty of 19 | # * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 20 | # * GNU General Public License for more details. 21 | # * 22 | # */ 23 | 24 | from calibre.ebooks.unihandecode.pykakasi.jisyo import jisyo 25 | import re 26 | 27 | class J2H (object): 28 | 29 | kanwa = None 30 | 31 | cl_table = [ 32 | "","aiueow", "aiueow", "aiueow", "aiueow", "aiueow", "aiueow", "aiueow", 33 | "aiueow", "aiueow", "aiueow", "k", "g", "k", "g", "k", "g", "k", "g", "k", 34 | "g", "s", "zj", "s", "zj", "s", "zj", "s", "zj", "s", "zj", "t", "d", "tc", 35 | "d", "aiueokstchgzjfdbpw", "t", "d", "t", "d", "t", "d", "n", "n", "n", "n", 36 | "n", "h", "b", "p", "h", "b", "p", "hf", "b", "p", "h", "b", "p", "h", "b", 37 | "p", "m", "m", "m", "m", "m", "y", "y", "y", "y", "y", "y", "rl", "rl", 38 | "rl", "rl", "rl", "wiueo", "wiueo", "wiueo", "wiueo", "w", "n", "v", "k", 39 | "k", "", "", "", "", "", "", "", "", ""] 40 | 41 | def __init__(self): 42 | self.kanwa = jisyo() 43 | 44 | def isKanji(self, c): 45 | return ( 0x3400 <= ord(c) and ord(c) < 0xfa2e) 46 | 47 | def isCletter(self, l, c): 48 | if (ord(u"ぁ") <= ord(c) and ord(c) <= 0x309f) and ( l in self.cl_table[ord(c) - ord(u"ぁ")-1]): 49 | return True 50 | return False 51 | 52 | def itaiji_conv(self, text): 53 | r = [] 54 | for c in text: 55 | if c in self.kanwa.itaijidict: 56 | r.append(c) 57 | for c in r: 58 | text = re.sub(c, self.kanwa.itaijidict[c], text) 59 | return text 60 | 61 | def convert(self, text): 62 | max_len = 0 63 | Hstr = "" 64 | table = self.kanwa.load_jisyo(text[0]) 65 | if table is None: 66 | return ("", 0) 67 | for (k,v) in table.iteritems(): 68 | length = len(k) 69 | if len(text) >= length: 70 | if text.startswith(k): 71 | for (yomi, tail) in v: 72 | if tail is '': 73 | if max_len < length: 74 | Hstr = yomi 75 | max_len = length 76 | elif max_len < length+1 and len(text) > length and self.isCletter(tail, text[length]): 77 | Hstr=''.join([yomi,text[length]]) 78 | max_len = length+1 79 | return (Hstr, max_len) 80 | -------------------------------------------------------------------------------- /lib/cssutils/css/csscomment.py: -------------------------------------------------------------------------------- 1 | """CSSComment is not defined in DOM Level 2 at all but a cssutils defined 2 | class only. 3 | 4 | Implements CSSRule which is also extended for a CSSComment rule type. 5 | """ 6 | __all__ = ['CSSComment'] 7 | __docformat__ = 'restructuredtext' 8 | __version__ = '$Id$' 9 | 10 | import cssrule 11 | import cssutils 12 | import xml.dom 13 | 14 | class CSSComment(cssrule.CSSRule): 15 | """ 16 | Represents a CSS comment (cssutils only). 17 | 18 | Format:: 19 | 20 | /*...*/ 21 | """ 22 | def __init__(self, cssText=None, parentRule=None, 23 | parentStyleSheet=None, readonly=False): 24 | super(CSSComment, self).__init__(parentRule=parentRule, 25 | parentStyleSheet=parentStyleSheet) 26 | 27 | self._cssText = None 28 | if cssText: 29 | self._setCssText(cssText) 30 | 31 | self._readonly = readonly 32 | 33 | def __repr__(self): 34 | return u"cssutils.css.%s(cssText=%r)" % ( 35 | self.__class__.__name__, 36 | self.cssText) 37 | 38 | def __str__(self): 39 | return u"" % ( 40 | self.__class__.__name__, 41 | self.cssText, 42 | id(self)) 43 | 44 | def _getCssText(self): 45 | """Return serialized property cssText.""" 46 | return cssutils.ser.do_CSSComment(self) 47 | 48 | def _setCssText(self, cssText): 49 | """ 50 | :param cssText: 51 | textual text to set or tokenlist which is not tokenized 52 | anymore. May also be a single token for this rule 53 | 54 | :exceptions: 55 | - :exc:`~xml.dom.SyntaxErr`: 56 | Raised if the specified CSS string value has a syntax error and 57 | is unparsable. 58 | - :exc:`~xml.dom.InvalidModificationErr`: 59 | Raised if the specified CSS string value represents a different 60 | type of rule than the current one. 61 | - :exc:`~xml.dom.NoModificationAllowedErr`: 62 | Raised if the rule is readonly. 63 | """ 64 | super(CSSComment, self)._setCssText(cssText) 65 | tokenizer = self._tokenize2(cssText) 66 | 67 | commenttoken = self._nexttoken(tokenizer) 68 | unexpected = self._nexttoken(tokenizer) 69 | 70 | if not commenttoken or\ 71 | self._type(commenttoken) != self._prods.COMMENT or\ 72 | unexpected: 73 | self._log.error(u'CSSComment: Not a CSSComment: %r' % 74 | self._valuestr(cssText), 75 | error=xml.dom.InvalidModificationErr) 76 | else: 77 | self._cssText = self._tokenvalue(commenttoken) 78 | 79 | cssText = property(_getCssText, _setCssText, 80 | doc=u"The parsable textual representation of this rule.") 81 | 82 | type = property(lambda self: self.COMMENT, 83 | doc=u"The type of this rule, as defined by a CSSRule " 84 | u"type constant.") 85 | 86 | # constant but needed: 87 | wellformed = property(lambda self: True) 88 | -------------------------------------------------------------------------------- /changelog.md: -------------------------------------------------------------------------------- 1 | #Changelog for KindleEar 2 | 3 | ##1.23.1 4 | 1. 增强错误处理,规避部分网友部署后无法建立索引的问题。 5 | 2. Bugfix: 修正书籍设置中extra_css无效的问题。 6 | 3. Bugfix: 修正部分全文RSS XML中HTML链接格式不规范导致无法解析出内容的问题。 7 | 8 | ##1.23 9 | 1. 添加自动将超长图切割为多个小图片的功能,方便在kindle上看长图。 10 | 2. 进一步优化自动网页解码功能,减小乱码情况。 11 | 3. 修正导入含中文字符URL的OPML文件导致程序崩溃的问题。 12 | 13 | ##1.22.3 14 | 1. 修正遇到xml文件中的非法tag(中文tag)导致推送失败的问题。 15 | 16 | ##1.22.2 17 | 1. 邮件中转模块升级,在邮件标题中添加 !links则强制抓取文章链接,!article则强制发送文本。 18 | 2. Kindle邮件地址支持多个收件人地址,用分号分隔。 19 | 20 | ##1.22.1 21 | 1. 升级内部一些模块。 22 | 23 | ##1.22 24 | 1. 用AJAX技术重新实现“我的订阅”前后台,实现不重新加载页面增删订阅或RSS。 25 | 26 | ##1.21.1 27 | 1. 增加保存到Instapaper的归档功能。 28 | 29 | ##1.21 30 | 1. 增加保存到Pocket的归档功能。 31 | 32 | ##1.20.28 33 | 1. 《雪球网》屏蔽了gae的IP段的网络请求,此版本通过中转器获取。 34 | 35 | ##1.20.27 36 | 1. 邮件中转功能中引入字符串压缩,支持一封邮件内包含更多链接,以便更好的通过发送链接生成电子书。 37 | 38 | ##1.20.26 39 | 1. 引入html内嵌base64图像技术,减小http请求数量,提升网页加速效率。 40 | 41 | ##1.20.25 42 | 1. 增强网页解码,容忍部分字符解码错误,减小了网页乱码可能。 43 | 44 | ##1.20.24 45 | 1. 修改选择了合并推送但仅有自定义RSS推送时无封面问题。 46 | 47 | ##1.20.23 48 | 1. 去掉标题格式中的 “标题 日/月” 和 “标题 月/日” 格式,因为会导致推送失败。 49 | 50 | ##1.20.22 51 | 1. 解决URL中有unicode字符时无法导出OPML文件的文件。 52 | 2. 解决微信公众号获取失败后导致其他书籍推送异常的问题。 53 | 54 | ##1.20.21 55 | 1. 添加ETAG机制,减小需要的网络流量。 56 | 57 | ##1.20.20 58 | 1. 修正部分XML文件获取文件编码失败问题。 59 | 60 | ##1.20.19 61 | 1. 修正pycrypto模块加载失败问题。 62 | 63 | ##1.20.18 64 | 1. 微信公众号突破一次防爬取 [zhu327](https://github.com/zhu327/rss)。 65 | 2. 通过邮件链接抓取的文章不添加封面。 66 | 67 | ##1.20.17 68 | 1. 在目录中添加各个源的文章篇数。 69 | 2. 增加对书籍异常的处理,避免一本书籍的异常而影响其他书籍的推送。 70 | 71 | ##1.20.16 72 | 1. 增加一个选项,可选择使用网页标题还是feed标题做为文章标题。 73 | 74 | ##1.20.15 75 | 1. bugfix: weixinbase部分解码失败问题。 76 | 77 | ##1.20.14 78 | 1. bugfix: 喷嚏图啩原RSS地址已经失效,更换为另一个地址。 79 | 80 | ##1.20.13 81 | 1. bugfix: 修改有部分RSS将一个图片做为一篇文章(没有html包装)导致推送失败的问题。 82 | 83 | ##1.20.12 84 | 1. 导出opml时将url转义。 85 | 86 | ##1.20.11 87 | 1. "导入订阅列表"功能支持OPML元素Outline嵌套。 88 | 89 | ##1.20.10 90 | 1. bugfix: 修正有部分文章无法提取正文导致推送失败的问题。 91 | 92 | ##1.20.9 93 | 1. 增加从opml文件导入订阅列表的功能。 94 | 2. 网友seff增加特性:可选部分书籍单独推送。 95 | 96 | ##1.20.8 97 | 1. 网友mcfloundinho增加《共识网》 98 | 99 | ##1.20.7 100 | 1. bugfix:修正trigger@appid.appspotmail.com触发投递失败的问题。 101 | 102 | ##1.20.6 103 | 1. bugfix:继续修改urlopener处理cookie的一个bug. 104 | 105 | ##1.20.5 106 | 1. 增加网友mcfloundinho提供的《南方周末》。 107 | 2. bugfix:修改urlopener处理cookie的一个bug. 108 | 109 | ##1.20.4 110 | 1. 更新土耳其语翻译。 111 | 112 | ##1.20.3 113 | 1. 修改内置书籍TED渤海湾以适应其网站改版。 114 | 115 | ##1.20.2 116 | 1. 针对使用图片延迟加载技术的网页特殊处理,可以获取部分此类网页的图片。 117 | 118 | ##1.20.1 119 | 1. 新特性,在合并推送时将各书籍的封面拼贴起来。默认已经开启,如果你使用以前的config.py,请设置DEFAULT_COVER_BV=None,如果不喜欢此效果,可以设置DEFAULT_COVER_BV='cv_bound.jpg' 120 | 2. bugfix: 修正保存到evernote不成功的问题(1.13引入) 121 | 122 | ##1.20 123 | 1. 增加一个简单的正文提取模块,在readability失败后启用。 124 | 2. 增强的网页解码器,综合考虑http响应头/html文件头/chardet检测结果,效率更高,乱码更少。 125 | 3. 支持需要登陆才能查看文章的网站,请参照FAQ如何使用。 126 | 4. 针对一天推送多次的需求,书籍属性‘oldest_article’大于365则使用*秒*为单位。 127 | 5. 增强的密码安全,加salt然后md5,无法通过密码词典破解,在可接受的代价范围内无法暴力破解。 128 | (仅新账号启用,如果需要可以删掉admin然后重新登陆就会新建admin账号) 129 | 6. 整理文件夹结构,将相关库都放到lib目录下。 130 | 7. 其他一些小的可用性增强。 131 | > 升级注意:书籍的fetcharticle()增加了一个参数,如果你定制的书籍使用到此接口,需要修改。 132 | -------------------------------------------------------------------------------- /lib/calibre/ebooks/conversion/config.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai 3 | from __future__ import with_statement 4 | 5 | __license__ = 'GPL v3' 6 | __copyright__ = '2009, Kovid Goyal ' 7 | __docformat__ = 'restructuredtext en' 8 | 9 | import os 10 | 11 | from calibre.utils.config import config_dir 12 | from calibre.utils.lock import ExclusiveFile 13 | from calibre import sanitize_file_name 14 | from calibre.customize.conversion import OptionRecommendation 15 | 16 | 17 | config_dir = os.path.join(config_dir, 'conversion') 18 | if not os.path.exists(config_dir): 19 | os.makedirs(config_dir) 20 | 21 | def name_to_path(name): 22 | return os.path.join(config_dir, sanitize_file_name(name)+'.py') 23 | 24 | def save_defaults(name, recs): 25 | path = name_to_path(name) 26 | raw = str(recs) 27 | with open(path, 'wb'): 28 | pass 29 | with ExclusiveFile(path) as f: 30 | f.write(raw) 31 | 32 | def load_defaults(name): 33 | path = name_to_path(name) 34 | if not os.path.exists(path): 35 | open(path, 'wb').close() 36 | with ExclusiveFile(path) as f: 37 | raw = f.read() 38 | r = GuiRecommendations() 39 | if raw: 40 | r.from_string(raw) 41 | return r 42 | 43 | def save_specifics(db, book_id, recs): 44 | raw = str(recs) 45 | db.set_conversion_options(book_id, 'PIPE', raw) 46 | 47 | def load_specifics(db, book_id): 48 | raw = db.conversion_options(book_id, 'PIPE') 49 | r = GuiRecommendations() 50 | if raw: 51 | r.from_string(raw) 52 | return r 53 | 54 | def delete_specifics(db, book_id): 55 | db.delete_conversion_options(book_id, 'PIPE') 56 | 57 | class GuiRecommendations(dict): 58 | 59 | def __new__(cls, *args): 60 | dict.__new__(cls) 61 | obj = super(GuiRecommendations, cls).__new__(cls, *args) 62 | obj.disabled_options = set([]) 63 | return obj 64 | 65 | def to_recommendations(self, level=OptionRecommendation.LOW): 66 | ans = [] 67 | for key, val in self.items(): 68 | ans.append((key, val, level)) 69 | return ans 70 | 71 | def __str__(self): 72 | ans = ['{'] 73 | for key, val in self.items(): 74 | ans.append('\t'+repr(key)+' : '+repr(val)+',') 75 | ans.append('}') 76 | return '\n'.join(ans) 77 | 78 | def from_string(self, raw): 79 | try: 80 | d = eval(raw) 81 | except SyntaxError: 82 | d = None 83 | if d: 84 | self.update(d) 85 | 86 | def merge_recommendations(self, get_option, level, options, 87 | only_existing=False): 88 | for name in options: 89 | if only_existing and name not in self: 90 | continue 91 | opt = get_option(name) 92 | if opt is None: continue 93 | if opt.level == OptionRecommendation.HIGH: 94 | self[name] = opt.recommended_value 95 | self.disabled_options.add(name) 96 | elif opt.level > level or name not in self: 97 | self[name] = opt.recommended_value 98 | 99 | 100 | -------------------------------------------------------------------------------- /lib/cssutils/scripts/csscombine.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | """Combine all sheets referred to a given CSS *proxy* sheet 3 | into a single new sheet. 4 | 5 | - no ``url()`` values are adjusted so currently when using relative references 6 | for e.g. images it is best to have all sheets in a single folder 7 | - in @import rules only relative paths do work for now but should be used 8 | anyway 9 | - messages are send to stderr 10 | - output to stdout. 11 | 12 | Example:: 13 | 14 | csscombine sheets\csscombine-proxy.css -m -t ascii -s utf-8 15 | 1>combined.css 2>log.txt 16 | 17 | results in log.txt:: 18 | 19 | COMBINING sheets/csscombine-proxy.css 20 | USING SOURCE ENCODING: css 21 | * PROCESSING @import sheets\csscombine-1.css 22 | * PROCESSING @import sheets\csscombine-2.css 23 | INFO Nested @imports are not combined: @import "1.css"; 24 | SETTING TARGET ENCODING: ascii 25 | 26 | and combined.css:: 27 | 28 | @charset "ascii";@import"1.css";@namespaces2"uri";s2|sheet-1{top:1px}s2|sheet-2{top:2px}proxy{top:3px} 29 | 30 | or without option -m:: 31 | 32 | @charset "ascii"; 33 | @import "1.css"; 34 | @namespace s2 "uri"; 35 | @namespace other "other"; 36 | /* proxy sheet were imported sheets should be combined */ 37 | /* non-ascii chars: \F6 \E4 \FC */ 38 | /* @import "csscombine-1.css"; */ 39 | /* combined sheet 1 */ 40 | s2|sheet-1 { 41 | top: 1px 42 | } 43 | /* @import url(csscombine-2.css); */ 44 | /* combined sheet 2 */ 45 | s2|sheet-2 { 46 | top: 2px 47 | } 48 | proxy { 49 | top: 3px 50 | } 51 | 52 | """ 53 | __all__ = ['csscombine'] 54 | __docformat__ = 'restructuredtext' 55 | __version__ = '$Id$' 56 | 57 | from cssutils.script import csscombine 58 | import optparse 59 | import sys 60 | 61 | def main(args=None): 62 | usage = "usage: %prog [options] [path]" 63 | parser = optparse.OptionParser(usage=usage) 64 | parser.add_option('-u', '--url', action='store', 65 | dest='url', 66 | help='URL to parse (path is ignored if URL given)') 67 | parser.add_option('-s', '--sourceencoding', action='store', 68 | dest='sourceencoding', 69 | help='encoding of input, defaulting to "css". If given overwrites other encoding information like @charset declarations') 70 | parser.add_option('-t', '--targetencoding', action='store', 71 | dest='targetencoding', 72 | help='encoding of output, defaulting to "UTF-8"', default='utf-8') 73 | parser.add_option('-m', '--minify', action='store_true', dest='minify', 74 | default=False, 75 | help='saves minified version of combined files, defaults to False') 76 | options, path = parser.parse_args() 77 | 78 | if options.url: 79 | print csscombine(url=options.url, 80 | sourceencoding=options.sourceencoding, 81 | targetencoding=options.targetencoding, 82 | minify=options.minify) 83 | elif path: 84 | print csscombine(path=path[0], 85 | sourceencoding=options.sourceencoding, 86 | targetencoding=options.targetencoding, 87 | minify=options.minify) 88 | else: 89 | parser.error('no path or URL (-u) given') 90 | 91 | 92 | 93 | if __name__ == '__main__': 94 | sys.exit(main()) -------------------------------------------------------------------------------- /lib/calibre/ebooks/compression/palmdoc.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai 3 | 4 | __license__ = 'GPL v3' 5 | __copyright__ = '2008, Kovid Goyal ' 6 | 7 | from cStringIO import StringIO 8 | from struct import pack 9 | 10 | #from calibre.constants import plugins 11 | #cPalmdoc = plugins['cPalmdoc'][0] 12 | #if not cPalmdoc: 13 | # raise RuntimeError(('Failed to load required cPalmdoc module: ' 14 | # '%s')%plugins['cPalmdoc'][1]) 15 | 16 | #def decompress_doc(data): 17 | # return cPalmdoc.decompress(data) 18 | 19 | #GAE dont support cmodule, using py module, it will be very very slow!!! 20 | def compress_doc(data): 21 | if not data: 22 | return u'' 23 | #return cPalmdoc.compress(data) 24 | return py_compress_doc(data) 25 | 26 | #def test(): 27 | # TESTS = [ 28 | # 'abc\x03\x04\x05\x06ms', # Test binary writing 29 | # 'a b c \xfed ', # Test encoding of spaces 30 | # '0123456789axyz2bxyz2cdfgfo9iuyerh', 31 | # '0123456789asd0123456789asd|yyzzxxffhhjjkk', 32 | # ('ciewacnaq eiu743 r787q 0w% ; sa fd\xef\ffdxosac wocjp acoiecowei ' 33 | # 'owaic jociowapjcivcjpoivjporeivjpoavca; p9aw8743y6r74%$^$^%8 ') 34 | # ] 35 | # for test in TESTS: 36 | # print 'Test:', repr(test) 37 | # print '\tTesting compression...' 38 | # good = py_compress_doc(test) 39 | # x = compress_doc(test) 40 | # print '\t\tgood:', repr(good) 41 | # print '\t\tx :', repr(x) 42 | # assert x == good 43 | # print '\tTesting decompression...' 44 | # print '\t\t', repr(decompress_doc(x)) 45 | # assert decompress_doc(x) == test 46 | # print 47 | 48 | def py_compress_doc(data): 49 | out = StringIO() 50 | i = 0 51 | ldata = len(data) 52 | while i < ldata: 53 | if i > 10 and (ldata - i) > 10: 54 | chunk = '' 55 | match = -1 56 | for j in xrange(10, 2, -1): 57 | chunk = data[i:i+j] 58 | try: 59 | match = data.rindex(chunk, 0, i) 60 | except ValueError: 61 | continue 62 | if (i - match) <= 2047: 63 | break 64 | match = -1 65 | if match >= 0: 66 | n = len(chunk) 67 | m = i - match 68 | code = 0x8000 + ((m << 3) & 0x3ff8) + (n - 3) 69 | out.write(pack('>H', code)) 70 | i += n 71 | continue 72 | ch = data[i] 73 | och = ord(ch) 74 | i += 1 75 | if ch == ' ' and (i + 1) < ldata: 76 | onch = ord(data[i]) 77 | if onch >= 0x40 and onch < 0x80: 78 | out.write(pack('>B', onch ^ 0x80)) 79 | i += 1 80 | continue 81 | if och == 0 or (och > 8 and och < 0x80): 82 | out.write(ch) 83 | else: 84 | j = i 85 | binseq = [ch] 86 | while j < ldata and len(binseq) < 8: 87 | ch = data[j] 88 | och = ord(ch) 89 | if och == 0 or (och > 8 and och < 0x80): 90 | break 91 | binseq.append(ch) 92 | j += 1 93 | out.write(pack('>B', len(binseq))) 94 | out.write(''.join(binseq)) 95 | i += len(binseq) - 1 96 | return out.getvalue() 97 | 98 | -------------------------------------------------------------------------------- /lib/calibre/ebooks/unihandecode/pykakasi/kakasi.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # kakasi.py 3 | # 4 | # Copyright 2011 Hiroshi Miura 5 | # 6 | # Original Copyright: 7 | # * KAKASI (Kanji Kana Simple inversion program) 8 | # * $Id: jj2.c,v 1.7 2001-04-12 05:57:34 rug Exp $ 9 | # * Copyright (C) 1992 10 | # * Hironobu Takahashi (takahasi@tiny.or.jp) 11 | # * 12 | # * This program is free software; you can redistribute it and/or modify 13 | # * it under the terms of the GNU General Public License as published by 14 | # * the Free Software Foundation; either versions 2, or (at your option) 15 | # * any later version. 16 | # * 17 | # * This program is distributed in the hope that it will be useful 18 | # * but WITHOUT ANY WARRANTY; without even the implied warranty of 19 | # * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 20 | # * GNU General Public License for more details. 21 | # * 22 | # */ 23 | 24 | from calibre.ebooks.unihandecode.pykakasi.j2h import J2H 25 | from calibre.ebooks.unihandecode.pykakasi.h2a import H2a 26 | from calibre.ebooks.unihandecode.pykakasi.k2a import K2a 27 | 28 | class kakasi(object): 29 | 30 | j2h = None 31 | h2a = None 32 | k2a = None 33 | 34 | def __init__(self): 35 | self.j2h = J2H() 36 | self.h2a = H2a() 37 | self.k2a = K2a() 38 | 39 | 40 | def do(self, text): 41 | otext = '' 42 | i = 0 43 | while True: 44 | if i >= len(text): 45 | break 46 | 47 | if self.j2h.isKanji(text[i]): 48 | (t, l) = self.j2h.convert(text[i:]) 49 | if l <= 0: 50 | otext = otext + text[i] 51 | i = i + 1 52 | continue 53 | i = i + l 54 | m = 0 55 | tmptext = "" 56 | while True: 57 | if m >= len(t): 58 | break 59 | (s, n) = self.h2a.convert(t[m:]) 60 | if n <= 0: 61 | break 62 | m = m + n 63 | tmptext = tmptext+s 64 | if i >= len(text): 65 | otext = otext + tmptext.capitalize() 66 | else: 67 | otext = otext + tmptext.capitalize() +' ' 68 | elif self.h2a.isHiragana(text[i]): 69 | tmptext = '' 70 | while True: 71 | (t, l) = self.h2a.convert(text[i:]) 72 | tmptext = tmptext+t 73 | i = i + l 74 | if i >= len(text): 75 | otext = otext + tmptext 76 | break 77 | elif not self.h2a.isHiragana(text[i]): 78 | otext = otext + tmptext + ' ' 79 | break 80 | elif self.k2a.isKatakana(text[i]): 81 | tmptext = '' 82 | while True: 83 | (t, l) = self.k2a.convert(text[i:]) 84 | tmptext = tmptext+t 85 | i = i + l 86 | if i >= len(text): 87 | otext = otext + tmptext 88 | break 89 | elif not self.k2a.isKatakana(text[i]): 90 | otext = otext + tmptext + ' ' 91 | break 92 | else: 93 | otext = otext + text[i] 94 | i += 1 95 | 96 | return otext 97 | 98 | -------------------------------------------------------------------------------- /lib/calibre/utils/titlecase.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | 4 | """ 5 | Original Perl version by: John Gruber http://daringfireball.net/ 10 May 2008 6 | Python version by Stuart Colville http://muffinresearch.co.uk 7 | License: http://www.opensource.org/licenses/mit-license.php 8 | """ 9 | 10 | import re 11 | 12 | from calibre.utils.icu import capitalize 13 | 14 | __all__ = ['titlecase'] 15 | __version__ = '0.5' 16 | 17 | SMALL = 'a|an|and|as|at|but|by|en|for|if|in|of|on|or|the|to|v\.?|via|vs\.?' 18 | PUNCT = r"""!"#$%&'‘()*+,\-‒–—―./:;?@[\\\]_`{|}~""" 19 | 20 | SMALL_WORDS = re.compile(r'^(%s)$' % SMALL, re.I) 21 | INLINE_PERIOD = re.compile(r'[a-z][.][a-z]', re.I) 22 | UC_ELSEWHERE = re.compile(r'[%s]*?[a-zA-Z]+[A-Z]+?' % PUNCT) 23 | CAPFIRST = re.compile(r"^[%s]*?([A-Za-z])" % PUNCT) 24 | SMALL_FIRST = re.compile(r'^([%s]*)(%s)\b' % (PUNCT, SMALL), re.I) 25 | SMALL_LAST = re.compile(r'\b(%s)[%s]?$' % (SMALL, PUNCT), re.I) 26 | SMALL_AFTER_NUM = re.compile(r'(\d+\s+)(a|an|the)\b', re.I) 27 | SUBPHRASE = re.compile(r'([:.;?!][ ])(%s)' % SMALL) 28 | APOS_SECOND = re.compile(r"^[dol]{1}['‘]{1}[a-z]+$", re.I) 29 | ALL_CAPS = re.compile(r'^[A-Z0-9\s%s]+$' % PUNCT) 30 | UC_INITIALS = re.compile(r"^(?:[A-Z]{1}\.{1}|[A-Z]{1}\.{1}[A-Z]{1})+$") 31 | 32 | _lang = None 33 | 34 | def lang(): 35 | global _lang 36 | if _lang is None: 37 | from calibre.utils.localization import get_lang 38 | _lang = get_lang().lower() 39 | return _lang 40 | 41 | def titlecase(text): 42 | 43 | """ 44 | Titlecases input text 45 | 46 | This filter changes all words to Title Caps, and attempts to be clever 47 | about *un*capitalizing SMALL words like a/an/the in the input. 48 | 49 | The list of "SMALL words" which are not capped comes from 50 | the New York Times Manual of Style, plus 'vs' and 'v'. 51 | 52 | """ 53 | 54 | all_caps = ALL_CAPS.match(text) 55 | 56 | words = re.split('\s+', text) 57 | line = [] 58 | for word in words: 59 | if all_caps: 60 | if UC_INITIALS.match(word): 61 | line.append(word) 62 | continue 63 | else: 64 | word = icu_lower(word) 65 | 66 | if APOS_SECOND.match(word): 67 | word = word.replace(word[0], icu_upper(word[0]), 1) 68 | word = word[:2] + icu_upper(word[2]) + word[3:] 69 | line.append(word) 70 | continue 71 | if INLINE_PERIOD.search(word) or UC_ELSEWHERE.match(word): 72 | line.append(word) 73 | continue 74 | if SMALL_WORDS.match(word): 75 | line.append(icu_lower(word)) 76 | continue 77 | 78 | hyphenated = [] 79 | for item in word.split('-'): 80 | hyphenated.append(CAPFIRST.sub(lambda m: icu_upper(m.group(0)), item)) 81 | line.append("-".join(hyphenated)) 82 | 83 | 84 | result = " ".join(line) 85 | 86 | result = SMALL_FIRST.sub(lambda m: '%s%s' % ( 87 | m.group(1), 88 | capitalize(m.group(2)) 89 | ), result) 90 | 91 | result = SMALL_AFTER_NUM.sub(lambda m: '%s%s' % (m.group(1), 92 | capitalize(m.group(2)) 93 | ), result) 94 | 95 | result = SMALL_LAST.sub(lambda m: capitalize(m.group(0)), result) 96 | 97 | result = SUBPHRASE.sub(lambda m: '%s%s' % ( 98 | m.group(1), 99 | capitalize(m.group(2)) 100 | ), result) 101 | 102 | return result 103 | 104 | -------------------------------------------------------------------------------- /lib/chardet/escprober.py: -------------------------------------------------------------------------------- 1 | ######################## BEGIN LICENSE BLOCK ######################## 2 | # The Original Code is mozilla.org code. 3 | # 4 | # The Initial Developer of the Original Code is 5 | # Netscape Communications Corporation. 6 | # Portions created by the Initial Developer are Copyright (C) 1998 7 | # the Initial Developer. All Rights Reserved. 8 | # 9 | # Contributor(s): 10 | # Mark Pilgrim - port to Python 11 | # 12 | # This library is free software; you can redistribute it and/or 13 | # modify it under the terms of the GNU Lesser General Public 14 | # License as published by the Free Software Foundation; either 15 | # version 2.1 of the License, or (at your option) any later version. 16 | # 17 | # This library is distributed in the hope that it will be useful, 18 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 19 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 20 | # Lesser General Public License for more details. 21 | # 22 | # You should have received a copy of the GNU Lesser General Public 23 | # License along with this library; if not, write to the Free Software 24 | # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 25 | # 02110-1301 USA 26 | ######################### END LICENSE BLOCK ######################### 27 | 28 | from . import constants 29 | from .escsm import (HZSMModel, ISO2022CNSMModel, ISO2022JPSMModel, 30 | ISO2022KRSMModel) 31 | from .charsetprober import CharSetProber 32 | from .codingstatemachine import CodingStateMachine 33 | from .compat import wrap_ord 34 | 35 | 36 | class EscCharSetProber(CharSetProber): 37 | def __init__(self): 38 | CharSetProber.__init__(self) 39 | self._mCodingSM = [ 40 | CodingStateMachine(HZSMModel), 41 | CodingStateMachine(ISO2022CNSMModel), 42 | CodingStateMachine(ISO2022JPSMModel), 43 | CodingStateMachine(ISO2022KRSMModel) 44 | ] 45 | self.reset() 46 | 47 | def reset(self): 48 | CharSetProber.reset(self) 49 | for codingSM in self._mCodingSM: 50 | if not codingSM: 51 | continue 52 | codingSM.active = True 53 | codingSM.reset() 54 | self._mActiveSM = len(self._mCodingSM) 55 | self._mDetectedCharset = None 56 | 57 | def get_charset_name(self): 58 | return self._mDetectedCharset 59 | 60 | def get_confidence(self): 61 | if self._mDetectedCharset: 62 | return 0.99 63 | else: 64 | return 0.00 65 | 66 | def feed(self, aBuf): 67 | for c in aBuf: 68 | # PY3K: aBuf is a byte array, so c is an int, not a byte 69 | for codingSM in self._mCodingSM: 70 | if not codingSM: 71 | continue 72 | if not codingSM.active: 73 | continue 74 | codingState = codingSM.next_state(wrap_ord(c)) 75 | if codingState == constants.eError: 76 | codingSM.active = False 77 | self._mActiveSM -= 1 78 | if self._mActiveSM <= 0: 79 | self._mState = constants.eNotMe 80 | return self.get_state() 81 | elif codingState == constants.eItsMe: 82 | self._mState = constants.eFoundIt 83 | self._mDetectedCharset = codingSM.get_coding_state_machine() # nopep8 84 | return self.get_state() 85 | 86 | return self.get_state() 87 | -------------------------------------------------------------------------------- /lib/chardet/sbcsgroupprober.py: -------------------------------------------------------------------------------- 1 | ######################## BEGIN LICENSE BLOCK ######################## 2 | # The Original Code is Mozilla Universal charset detector code. 3 | # 4 | # The Initial Developer of the Original Code is 5 | # Netscape Communications Corporation. 6 | # Portions created by the Initial Developer are Copyright (C) 2001 7 | # the Initial Developer. All Rights Reserved. 8 | # 9 | # Contributor(s): 10 | # Mark Pilgrim - port to Python 11 | # Shy Shalom - original C code 12 | # 13 | # This library is free software; you can redistribute it and/or 14 | # modify it under the terms of the GNU Lesser General Public 15 | # License as published by the Free Software Foundation; either 16 | # version 2.1 of the License, or (at your option) any later version. 17 | # 18 | # This library is distributed in the hope that it will be useful, 19 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 20 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 21 | # Lesser General Public License for more details. 22 | # 23 | # You should have received a copy of the GNU Lesser General Public 24 | # License along with this library; if not, write to the Free Software 25 | # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 26 | # 02110-1301 USA 27 | ######################### END LICENSE BLOCK ######################### 28 | 29 | from .charsetgroupprober import CharSetGroupProber 30 | from .sbcharsetprober import SingleByteCharSetProber 31 | from .langcyrillicmodel import (Win1251CyrillicModel, Koi8rModel, 32 | Latin5CyrillicModel, MacCyrillicModel, 33 | Ibm866Model, Ibm855Model) 34 | from .langgreekmodel import Latin7GreekModel, Win1253GreekModel 35 | from .langbulgarianmodel import Latin5BulgarianModel, Win1251BulgarianModel 36 | from .langhungarianmodel import Latin2HungarianModel, Win1250HungarianModel 37 | from .langthaimodel import TIS620ThaiModel 38 | from .langhebrewmodel import Win1255HebrewModel 39 | from .hebrewprober import HebrewProber 40 | 41 | 42 | class SBCSGroupProber(CharSetGroupProber): 43 | def __init__(self): 44 | CharSetGroupProber.__init__(self) 45 | self._mProbers = [ 46 | SingleByteCharSetProber(Win1251CyrillicModel), 47 | SingleByteCharSetProber(Koi8rModel), 48 | SingleByteCharSetProber(Latin5CyrillicModel), 49 | SingleByteCharSetProber(MacCyrillicModel), 50 | SingleByteCharSetProber(Ibm866Model), 51 | SingleByteCharSetProber(Ibm855Model), 52 | SingleByteCharSetProber(Latin7GreekModel), 53 | SingleByteCharSetProber(Win1253GreekModel), 54 | SingleByteCharSetProber(Latin5BulgarianModel), 55 | SingleByteCharSetProber(Win1251BulgarianModel), 56 | SingleByteCharSetProber(Latin2HungarianModel), 57 | SingleByteCharSetProber(Win1250HungarianModel), 58 | SingleByteCharSetProber(TIS620ThaiModel), 59 | ] 60 | hebrewProber = HebrewProber() 61 | logicalHebrewProber = SingleByteCharSetProber(Win1255HebrewModel, 62 | False, hebrewProber) 63 | visualHebrewProber = SingleByteCharSetProber(Win1255HebrewModel, True, 64 | hebrewProber) 65 | hebrewProber.set_model_probers(logicalHebrewProber, visualHebrewProber) 66 | self._mProbers.extend([hebrewProber, logicalHebrewProber, 67 | visualHebrewProber]) 68 | 69 | self.reset() 70 | --------------------------------------------------------------------------------