├── lib
    ├── __init__.py
    ├── web
    │   ├── contrib
    │   │   └── __init__.py
    │   ├── __init__.py
    │   ├── python23.py
    │   ├── test.py
    │   ├── wsgi.py
    │   └── wsgiserver
    │   │   └── ssl_builtin.py
    ├── bs4
    │   └── tests
    │   │   ├── __init__.py
    │   │   ├── test_htmlparser.py
    │   │   ├── test_docs.py
    │   │   └── test_lxml.py
    ├── dateutil
    │   ├── parser.py
    │   ├── zoneinfo
    │   │   ├── zoneinfo-2010g.tar.gz
    │   │   └── __init__.py
    │   ├── __init__.py
    │   └── easter.py
    ├── calibre
    │   ├── ebooks
    │   │   ├── oeb
    │   │   │   ├── __init__.py
    │   │   │   └── transforms
    │   │   │   │   └── __init__.py
    │   │   ├── unihandecode
    │   │   │   ├── pykakasi
    │   │   │   │   ├── __init__.py
    │   │   │   │   ├── jisyo.py
    │   │   │   │   ├── k2a.py
    │   │   │   │   ├── j2h.py
    │   │   │   │   └── kakasi.py
    │   │   │   ├── vndecoder.py
    │   │   │   ├── krdecoder.py
    │   │   │   ├── jadecoder.py
    │   │   │   └── __init__.py
    │   │   ├── compression
    │   │   │   ├── __init__.py
    │   │   │   └── palmdoc.py
    │   │   ├── mobi
    │   │   │   ├── reader
    │   │   │   │   └── __init__.py
    │   │   │   ├── writer8
    │   │   │   │   ├── __init__.py
    │   │   │   │   ├── cleanup.py
    │   │   │   │   └── header.py
    │   │   │   ├── __init__.py
    │   │   │   ├── writer2
    │   │   │   │   └── __init__.py
    │   │   │   └── tweak.py
    │   │   ├── conversion
    │   │   │   ├── plugins
    │   │   │   │   └── __init__.py
    │   │   │   ├── __init__.py
    │   │   │   ├── oeboutput.py
    │   │   │   └── config.py
    │   │   └── tweak.py
    │   ├── library
    │   │   └── __init__.py
    │   ├── utils
    │   │   ├── __init__.py
    │   │   ├── config_base.py
    │   │   ├── resources.py
    │   │   ├── mreplace.py
    │   │   ├── cleantext.py
    │   │   ├── img.py
    │   │   └── titlecase.py
    │   ├── constants.py
    │   └── startup.py
    ├── cssutils
    │   ├── scripts
    │   │   ├── __init__.py
    │   │   ├── cssparse.py
    │   │   ├── csscapture.py
    │   │   └── csscombine.py
    │   ├── stylesheets
    │   │   ├── __init__.py
    │   │   └── stylesheetlist.py
    │   ├── codec.py
    │   ├── settings.py
    │   ├── _fetch.py
    │   ├── css
    │   │   ├── __init__.py
    │   │   ├── cssrulelist.py
    │   │   └── csscomment.py
    │   └── _fetchgae.py
    ├── readability
    │   ├── __init__.py
    │   ├── debug.py
    │   ├── cleaners.py
    │   └── encoding.py
    ├── cssselect
    │   └── __init__.py
    ├── memcachestore.py
    ├── chardet
    │   ├── compat.py
    │   ├── __init__.py
    │   ├── constants.py
    │   ├── euctwprober.py
    │   ├── euckrprober.py
    │   ├── gb2312prober.py
    │   ├── big5prober.py
    │   ├── cp949prober.py
    │   ├── charsetprober.py
    │   ├── mbcsgroupprober.py
    │   ├── codingstatemachine.py
    │   ├── chardetect.py
    │   ├── utf8prober.py
    │   ├── escprober.py
    │   └── sbcsgroupprober.py
    ├── weixin.py
    └── opml.py
├── static
    ├── cn.gif
    ├── tr.gif
    ├── us.gif
    ├── book128.png
    ├── favicon.ico
    ├── favicon2.ico
    ├── fulltext.gif
    ├── separate.gif
    ├── periodical.gif
    ├── apple-touch-icon.png
    └── apple-touch-icon57.png
├── upload.sh
├── images
    ├── cv_wsj.jpg
    ├── mh_wsj.gif
    ├── cv_anbang.jpg
    ├── cv_bound.jpg
    ├── cv_folha.jpg
    ├── cv_nfzm.jpg
    ├── cv_xueqiu.jpg
    ├── mh_anbang.gif
    ├── mh_folha.gif
    ├── mh_nfzm.gif
    ├── mh_xueqiu.gif
    ├── cv_dapenti.jpg
    ├── cv_default.jpg
    ├── cv_gongshi.jpg
    ├── cv_lifeweek.jpg
    ├── mh_dapenti.gif
    ├── mh_default.gif
    ├── mh_gongshi.gif
    ├── mh_lifeweek.gif
    ├── cv_aisixiang.jpg
    ├── cv_chuansongme.jpg
    ├── cv_economist.jpg
    ├── cv_ftchinese.jpg
    ├── cv_qiushibaike.jpg
    ├── cv_zhihudaily.jpg
    ├── mh_aisixiang.gif
    ├── mh_chuansongme.gif
    ├── mh_economist.gif
    ├── mh_ftchinese.gif
    ├── mh_qiushibaike.gif
    └── mh_zhihudaily.gif
├── books
    ├── Economist.py
    ├── Xiaodaonews.py
    ├── ZhihuDailyRss.py
    ├── FTChinese.py
    ├── __init__.py
    ├── Dapenti.py
    ├── Readme.txt
    ├── Lifeweek.py
    ├── nfzm.py
    ├── Gongshi.py
    ├── Xueqiu.py
    ├── FolhaDeSaopaulo.py
    ├── wsj.py
    └── Qiushibaike.py
├── .gitattributes
├── i18n
    ├── en
    │   └── LC_MESSAGES
    │   │   ├── lang.mo
    │   │   └── lang.po
    ├── tr-tr
    │   └── LC_MESSAGES
    │   │   └── lang.mo
    └── zh-cn
    │   └── LC_MESSAGES
    │       └── lang.mo
├── .gitignore
├── mime.types
├── cron.yaml
├── queue.yaml
├── dispatch.yaml
├── templates
    ├── tipsandback.html
    ├── tipsback.html
    ├── autoback.html
    ├── delaccount.html
    ├── advimport.html
    ├── adminmgrpwd.html
    ├── advwhitelist.html
    ├── advurlfilter.html
    ├── booklogininfo.html
    ├── login.html
    ├── home.html
    ├── advbase.html
    └── admin.html
├── apps
    ├── View
    │   ├── Home.py
    │   ├── DbViewer.py
    │   ├── __init__.py
    │   ├── Logs.py
    │   └── Setting.py
    ├── __init__.py
    ├── Work
    │   └── __init__.py
    ├── module_backend.py
    └── module_front.py
├── index.yaml
├── module-worker.yaml
├── app.yaml
├── readme_EN.md
└── changelog.md


/lib/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/lib/web/contrib/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/lib/bs4/tests/__init__.py:
--------------------------------------------------------------------------------
1 | "The beautifulsoup tests."
2 | 


--------------------------------------------------------------------------------
/static/cn.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/speed/KindleEar/master/static/cn.gif


--------------------------------------------------------------------------------
/static/tr.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/speed/KindleEar/master/static/tr.gif


--------------------------------------------------------------------------------
/static/us.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/speed/KindleEar/master/static/us.gif


--------------------------------------------------------------------------------
/upload.sh:
--------------------------------------------------------------------------------
1 | appcfg.py update app.yaml module-worker.yaml
2 | appcfg.py update .
3 | 


--------------------------------------------------------------------------------
/images/cv_wsj.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/speed/KindleEar/master/images/cv_wsj.jpg


--------------------------------------------------------------------------------
/images/mh_wsj.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/speed/KindleEar/master/images/mh_wsj.gif


--------------------------------------------------------------------------------
/books/Economist.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/speed/KindleEar/master/books/Economist.py


--------------------------------------------------------------------------------
/images/cv_anbang.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/speed/KindleEar/master/images/cv_anbang.jpg


--------------------------------------------------------------------------------
/images/cv_bound.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/speed/KindleEar/master/images/cv_bound.jpg


--------------------------------------------------------------------------------
/images/cv_folha.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/speed/KindleEar/master/images/cv_folha.jpg


--------------------------------------------------------------------------------
/images/cv_nfzm.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/speed/KindleEar/master/images/cv_nfzm.jpg


--------------------------------------------------------------------------------
/images/cv_xueqiu.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/speed/KindleEar/master/images/cv_xueqiu.jpg


--------------------------------------------------------------------------------
/images/mh_anbang.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/speed/KindleEar/master/images/mh_anbang.gif


--------------------------------------------------------------------------------
/images/mh_folha.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/speed/KindleEar/master/images/mh_folha.gif


--------------------------------------------------------------------------------
/images/mh_nfzm.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/speed/KindleEar/master/images/mh_nfzm.gif


--------------------------------------------------------------------------------
/images/mh_xueqiu.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/speed/KindleEar/master/images/mh_xueqiu.gif


--------------------------------------------------------------------------------
/static/book128.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/speed/KindleEar/master/static/book128.png


--------------------------------------------------------------------------------
/static/favicon.ico:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/speed/KindleEar/master/static/favicon.ico


--------------------------------------------------------------------------------
/static/favicon2.ico:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/speed/KindleEar/master/static/favicon2.ico


--------------------------------------------------------------------------------
/static/fulltext.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/speed/KindleEar/master/static/fulltext.gif


--------------------------------------------------------------------------------
/static/separate.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/speed/KindleEar/master/static/separate.gif


--------------------------------------------------------------------------------
/images/cv_dapenti.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/speed/KindleEar/master/images/cv_dapenti.jpg


--------------------------------------------------------------------------------
/images/cv_default.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/speed/KindleEar/master/images/cv_default.jpg


--------------------------------------------------------------------------------
/images/cv_gongshi.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/speed/KindleEar/master/images/cv_gongshi.jpg


--------------------------------------------------------------------------------
/images/cv_lifeweek.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/speed/KindleEar/master/images/cv_lifeweek.jpg


--------------------------------------------------------------------------------
/images/mh_dapenti.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/speed/KindleEar/master/images/mh_dapenti.gif


--------------------------------------------------------------------------------
/images/mh_default.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/speed/KindleEar/master/images/mh_default.gif


--------------------------------------------------------------------------------
/images/mh_gongshi.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/speed/KindleEar/master/images/mh_gongshi.gif


--------------------------------------------------------------------------------
/images/mh_lifeweek.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/speed/KindleEar/master/images/mh_lifeweek.gif


--------------------------------------------------------------------------------
/lib/dateutil/parser.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/speed/KindleEar/master/lib/dateutil/parser.py


--------------------------------------------------------------------------------
/static/periodical.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/speed/KindleEar/master/static/periodical.gif


--------------------------------------------------------------------------------
/.gitattributes:
--------------------------------------------------------------------------------
1 | # Disable LF normalization for all files
2 | * text=auto
3 | *.pyc -text
4 | *.py text


--------------------------------------------------------------------------------
/images/cv_aisixiang.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/speed/KindleEar/master/images/cv_aisixiang.jpg


--------------------------------------------------------------------------------
/images/cv_chuansongme.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/speed/KindleEar/master/images/cv_chuansongme.jpg


--------------------------------------------------------------------------------
/images/cv_economist.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/speed/KindleEar/master/images/cv_economist.jpg


--------------------------------------------------------------------------------
/images/cv_ftchinese.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/speed/KindleEar/master/images/cv_ftchinese.jpg


--------------------------------------------------------------------------------
/images/cv_qiushibaike.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/speed/KindleEar/master/images/cv_qiushibaike.jpg


--------------------------------------------------------------------------------
/images/cv_zhihudaily.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/speed/KindleEar/master/images/cv_zhihudaily.jpg


--------------------------------------------------------------------------------
/images/mh_aisixiang.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/speed/KindleEar/master/images/mh_aisixiang.gif


--------------------------------------------------------------------------------
/images/mh_chuansongme.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/speed/KindleEar/master/images/mh_chuansongme.gif


--------------------------------------------------------------------------------
/images/mh_economist.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/speed/KindleEar/master/images/mh_economist.gif


--------------------------------------------------------------------------------
/images/mh_ftchinese.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/speed/KindleEar/master/images/mh_ftchinese.gif


--------------------------------------------------------------------------------
/images/mh_qiushibaike.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/speed/KindleEar/master/images/mh_qiushibaike.gif


--------------------------------------------------------------------------------
/images/mh_zhihudaily.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/speed/KindleEar/master/images/mh_zhihudaily.gif


--------------------------------------------------------------------------------
/i18n/en/LC_MESSAGES/lang.mo:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/speed/KindleEar/master/i18n/en/LC_MESSAGES/lang.mo


--------------------------------------------------------------------------------
/static/apple-touch-icon.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/speed/KindleEar/master/static/apple-touch-icon.png


--------------------------------------------------------------------------------
/i18n/tr-tr/LC_MESSAGES/lang.mo:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/speed/KindleEar/master/i18n/tr-tr/LC_MESSAGES/lang.mo


--------------------------------------------------------------------------------
/i18n/zh-cn/LC_MESSAGES/lang.mo:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/speed/KindleEar/master/i18n/zh-cn/LC_MESSAGES/lang.mo


--------------------------------------------------------------------------------
/static/apple-touch-icon57.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/speed/KindleEar/master/static/apple-touch-icon57.png


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | *.pyc
2 | .DS_Store
3 | Correio.py
4 | oGlobo.py
5 | mh_correio.gif
6 | mh_globo.gif
7 | cv_correio.jpg
8 | cv_globo.jpg
9 | 


--------------------------------------------------------------------------------
/lib/calibre/ebooks/oeb/__init__.py:
--------------------------------------------------------------------------------
1 | __license__   = 'GPL v3'
2 | __copyright__ = '2008, Marshall T. Vandegrift <llasram@gmail.com>'
3 | 


--------------------------------------------------------------------------------
/lib/cssutils/scripts/__init__.py:
--------------------------------------------------------------------------------
1 | from csscombine import csscombine
2 | __all__ = ["csscapture", "csscombine", "cssparse"]
3 |  
4 | 
5 | 


--------------------------------------------------------------------------------
/lib/dateutil/zoneinfo/zoneinfo-2010g.tar.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/speed/KindleEar/master/lib/dateutil/zoneinfo/zoneinfo-2010g.tar.gz


--------------------------------------------------------------------------------
/lib/calibre/ebooks/unihandecode/pykakasi/__init__.py:
--------------------------------------------------------------------------------
1 | from calibre.ebooks.unihandecode.pykakasi.kakasi import kakasi
2 | kakasi
3 | 
4 | __all__ = ["pykakasi"]
5 | 
6 | 


--------------------------------------------------------------------------------
/lib/calibre/library/__init__.py:
--------------------------------------------------------------------------------
1 | __license__   = 'GPL v3'
2 | __copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
3 | ''' Code to manage ebook library'''
4 | 


--------------------------------------------------------------------------------
/lib/calibre/ebooks/compression/__init__.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | 
3 | __license__ = 'GPL 3'
4 | __copyright__ = '2009, John Schember <john@nachtimwald.com>'
5 | __docformat__ = 'restructuredtext en'
6 | 


--------------------------------------------------------------------------------
/mime.types:
--------------------------------------------------------------------------------
1 | application/epub+zip epub
2 | application/xhtml+xml xhtml
3 | text/css css
4 | application/x-dtbncx+xml ncx
5 | application/oebps-package+xml opf"
6 | application/vnd.ms-opentype otf
7 | image/svg+xml svg


--------------------------------------------------------------------------------
/cron.yaml:
--------------------------------------------------------------------------------
 1 | cron:
 2 | - description: add deliver task to queue
 3 |   url: /deliver
 4 |   schedule: every 1 hours
 5 | 
 6 | - description: remove old logs
 7 |   url: /removelogs
 8 |   schedule: every day 03:00
 9 |   timezone: Asia/Shanghai
10 |   


--------------------------------------------------------------------------------
/lib/calibre/utils/__init__.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env  python
 2 | __license__   = 'GPL v3'
 3 | __copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net'
 4 | __docformat__ = 'restructuredtext en'
 5 | 
 6 | '''
 7 | Miscelleaneous utilities.
 8 | '''
 9 | 
10 | 


--------------------------------------------------------------------------------
/queue.yaml:
--------------------------------------------------------------------------------
 1 | queue:
 2 | 
 3 | - name: deliverqueue1
 4 |   rate: 2/m
 5 |   bucket_size: 2
 6 |   retry_parameters:
 7 |     task_retry_limit: 2
 8 |     task_age_limit: 5m
 9 |     min_backoff_seconds: 60
10 |     max_backoff_seconds: 600
11 |     max_doublings: 4
12 | #  target: worker


--------------------------------------------------------------------------------
/lib/dateutil/__init__.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Copyright (c) 2003-2010  Gustavo Niemeyer <gustavo@niemeyer.net>
 3 | 
 4 | This module offers extensions to the standard python 2.3+
 5 | datetime module.
 6 | """
 7 | __author__ = "Gustavo Niemeyer <gustavo@niemeyer.net>"
 8 | __license__ = "PSF License"
 9 | __version__ = "1.5"
10 | 


--------------------------------------------------------------------------------
/lib/calibre/ebooks/oeb/transforms/__init__.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
 3 | from __future__ import with_statement
 4 | 
 5 | __license__   = 'GPL v3'
 6 | __copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
 7 | __docformat__ = 'restructuredtext en'
 8 | 
 9 | 
10 | 
11 | 


--------------------------------------------------------------------------------
/dispatch.yaml:
--------------------------------------------------------------------------------
 1 | dispatch:
 2 |   # Default module serves the typical web resources and all static resources.
 3 |   - url: "*/favicon.ico"
 4 |     module: default
 5 | 
 6 |   - url: "*/worker*"
 7 |     module: worker
 8 | 
 9 |   - url: "*/url2book*"
10 |     module: worker
11 | 
12 | #  - url: "*/_ah/mail/*"
13 | #    module: worker


--------------------------------------------------------------------------------
/lib/calibre/ebooks/mobi/reader/__init__.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
 3 | from __future__ import (unicode_literals, division, absolute_import,
 4 |                         print_function)
 5 | 
 6 | __license__   = 'GPL v3'
 7 | __copyright__ = '2012, Kovid Goyal <kovid@kovidgoyal.net>'
 8 | __docformat__ = 'restructuredtext en'
 9 | 
10 | 
11 | 
12 | 


--------------------------------------------------------------------------------
/lib/calibre/ebooks/mobi/writer8/__init__.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
 3 | from __future__ import (unicode_literals, division, absolute_import,
 4 |                         print_function)
 5 | 
 6 | __license__   = 'GPL v3'
 7 | __copyright__ = '2012, Kovid Goyal <kovid@kovidgoyal.net>'
 8 | __docformat__ = 'restructuredtext en'
 9 | 
10 | 
11 | 
12 | 


--------------------------------------------------------------------------------
/templates/tipsandback.html:
--------------------------------------------------------------------------------
 1 | {% extends "base.html" %}
 2 | {% block menubar -%}
 3 | {% endblock -%}
 4 | {% block content -%}
 5 |     {% if tips -%}
 6 |     <p style="font-size:0.9em;">{{tips}}</p>
 7 |     {% endif -%}
 8 |     <p>{{_("Auto back to previous page after 5 seconds")}}...</p>
 9 |     <p><a href="javascript:history.go(-1);">{{_("Click to back")}}</a></p><br />
10 | {% endblock -%}
11 | 


--------------------------------------------------------------------------------
/lib/calibre/ebooks/conversion/plugins/__init__.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
 3 | from __future__ import (unicode_literals, division, absolute_import,
 4 |                         print_function)
 5 | 
 6 | __license__   = 'GPL v3'
 7 | __copyright__ = '2012, Kovid Goyal <kovid@kovidgoyal.net>'
 8 | __docformat__ = 'restructuredtext en'
 9 | 
10 | 
11 | 
12 | 


--------------------------------------------------------------------------------
/templates/tipsback.html:
--------------------------------------------------------------------------------
 1 | {% extends "base.html" %}
 2 | {% block menubar -%}
 3 | {% endblock -%}
 4 | {% block content -%}
 5 |     {% if tips -%}
 6 |     <p style="font-size:0.9em;">{{tips}}</p>
 7 |     {% endif -%}
 8 |     <p>
 9 |     <a href="{% if urltoback is defined and urltoback %}{{urltoback}}{%else%}javascript:history.go(-1);{%endif%}">
10 |         {{_("Click to back")}}
11 |     </a>
12 |     </p><br />
13 | {% endblock -%}
14 | 


--------------------------------------------------------------------------------
/lib/cssutils/stylesheets/__init__.py:
--------------------------------------------------------------------------------
 1 | """Implements Document Object Model Level 2 Style Sheets
 2 | http://www.w3.org/TR/2000/PR-DOM-Level-2-Style-20000927/stylesheets.html
 3 | """
 4 | __all__ = ['MediaList', 'MediaQuery', 'StyleSheet', 'StyleSheetList']
 5 | __docformat__ = 'restructuredtext'
 6 | __version__ = '$Id$'
 7 | 
 8 | from medialist import *
 9 | from mediaquery import *
10 | from stylesheet import *
11 | from stylesheetlist import *
12 | 


--------------------------------------------------------------------------------
/lib/calibre/ebooks/mobi/__init__.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env  python
 2 | # vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
 3 | 
 4 | __license__   = 'GPL v3'
 5 | __copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
 6 | 
 7 | class MobiError(Exception):
 8 |     pass
 9 | 
10 | # That might be a bit small on the PW, but Amazon/KG 2.5 still uses these values, even when delivered to a PW
11 | MAX_THUMB_SIZE = 16 * 1024
12 | MAX_THUMB_DIMEN = (180, 240)
13 | 
14 | 
15 | 


--------------------------------------------------------------------------------
/lib/cssutils/codec.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | """Python codec for CSS."""
 3 | __docformat__ = 'restructuredtext'
 4 | __author__ = 'Walter Doerwald'
 5 | __version__ = '$Id: util.py 1114 2008-03-05 13:22:59Z cthedot $'
 6 | 
 7 | import sys
 8 | 
 9 | if sys.version_info < (3,):
10 |     from _codec2 import *
11 |     # for tests
12 |     from _codec2 import _fixencoding
13 | else:
14 |     from _codec3 import *
15 |     # for tests
16 |     from _codec3 import _fixencoding
17 | 


--------------------------------------------------------------------------------
/lib/readability/__init__.py:
--------------------------------------------------------------------------------
 1 | # version : 0.3.0.5
 2 | # https://pypi.python.org/pypi/readability-lxml
 3 | from .readability import Document
 4 | 
 5 | #Usage:
 6 | 
 7 | #from readability.readability import Document
 8 | #import urllib
 9 | #html = urllib.urlopen(url).read()
10 | #readable_article = Document(html).summary()
11 | #readable_title = Document(html).short_title()
12 | #Command-line usage:
13 | 
14 | #python -m readability.readability -u http://pypi.python.org/pypi/readability-lxml
15 | 


--------------------------------------------------------------------------------
/lib/calibre/ebooks/mobi/writer2/__init__.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
 3 | from __future__ import (unicode_literals, division, absolute_import,
 4 |                         print_function)
 5 | 
 6 | __license__   = 'GPL v3'
 7 | __copyright__ = '2011, Kovid Goyal <kovid@kovidgoyal.net>'
 8 | __docformat__ = 'restructuredtext en'
 9 | 
10 | 
11 | UNCOMPRESSED = 1
12 | PALMDOC = 2
13 | HUFFDIC = 17480
14 | PALM_MAX_IMAGE_SIZE = 63 * 1024
15 | 
16 | 


--------------------------------------------------------------------------------
/apps/View/Home.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # -*- coding:utf-8 -*-
 3 | #A GAE web application to aggregate rss and send it to your kindle.
 4 | #Visit https://github.com/cdhigh/KindleEar for the latest version
 5 | #Contributors:
 6 | # rexdf <https://github.com/rexdf>
 7 | 
 8 | from apps.BaseHandler import BaseHandler
 9 | from apps.utils import etagged
10 | 
11 | class Home(BaseHandler):
12 |     __url__ = r"/"
13 |     @etagged()
14 |     def GET(self):
15 |         return self.render('home.html',"Home")


--------------------------------------------------------------------------------
/lib/calibre/utils/config_base.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
 3 | 
 4 | __license__   = 'GPL v3'
 5 | __copyright__ = '2011, Kovid Goyal <kovid@kovidgoyal.net>'
 6 | __docformat__ = 'restructuredtext en'
 7 | 
 8 | plugin_dir = "."
 9 | 
10 | tweaks = {"gui_timestamp_display_format":"dd MMM yyyy",
11 |           "gui_pubdate_display_format":"MMM yyyy",
12 |           "gui_last_modified_display_format":"dd MMM yyyy",
13 |           "server_listen_on":"0.0.0.0",} #read_tweaks()
14 | 
15 | 
16 | 


--------------------------------------------------------------------------------
/i18n/en/LC_MESSAGES/lang.po:
--------------------------------------------------------------------------------
 1 | # SOME DESCRIPTIVE TITLE.
 2 | # Copyright (C) YEAR ORGANIZATION
 3 | # Arroz <akindleear@gmail.com>, 2013.
 4 | #
 5 | msgid ""
 6 | msgstr ""
 7 | "Project-Id-Version: 1.5\n"
 8 | "POT-Creation-Date: 2013-08-06 14:16+China Standard Time\n"
 9 | "PO-Revision-Date: 2013-08-07 16:36+ZONE\n"
10 | "Last-Translator: Arroz <akindleear@gmail.com>\n"
11 | "Language-Team: Arroz <akindleear@gmail.com>\n"
12 | "MIME-Version: 1.0\n"
13 | "Content-Type: text/plain; charset=utf-8\n"
14 | "Content-Transfer-Encoding: utf-8\n"
15 | "Generated-By: pygettext.py 1.5\n"
16 | 


--------------------------------------------------------------------------------
/apps/__init__.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # -*- coding:utf-8 -*-
 3 | #A GAE web application to aggregate rss and send it to your kindle.
 4 | #Visit https://github.com/cdhigh/KindleEar for the latest version
 5 | #中文讨论贴：http://www.hi-pda.com/forum/viewthread.php?tid=1213082
 6 | #Author:
 7 | # cdhigh <https://github.com/cdhigh>
 8 | #Contributors:
 9 | # rexdf <https://github.com/rexdf>
10 | 
11 | import __builtin__, sys
12 | from google.appengine.ext import vendor
13 | 
14 | __Version__ = '1.23.2'
15 | 
16 | __builtin__.__dict__['__Version__'] = __Version__
17 | 
18 | sys.path.insert(0, 'lib')
19 | vendor.add('lib')
20 | 


--------------------------------------------------------------------------------
/books/Xiaodaonews.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # -*- coding:utf-8 -*-
 3 | from weixinbase import WeixinBook
 4 | 
 5 | def getBook():
 6 |     return Xiaodaonews
 7 | 
 8 | class Xiaodaonews(WeixinBook):
 9 |     title                 = u'微信公众号：小道消息'
10 |     description           = u'只有小道消息才能拯救中国互联网'
11 |     language              = 'zh-cn'
12 |     feed_encoding         = "utf-8"
13 |     page_encoding         = "utf-8"
14 |     oldest_article        = 7
15 |     deliver_days = ['Friday']
16 |     feeds = [
17 |             (u'小道消息', 'http://weixin.sogou.com/gzh?openid=oIWsFt86NKeSGd_BQKp1GcDkYpv0'),
18 |             ]
19 | 


--------------------------------------------------------------------------------
/lib/cssutils/settings.py:
--------------------------------------------------------------------------------
 1 | """Experimental settings for special stuff."""
 2 | 
 3 | def set(key, value):
 4 |     """Call to enable special settings:
 5 |     
 6 |     ('DXImageTransform.Microsoft', True)
 7 |         enable support for parsing special MS only filter values
 8 |     
 9 |     Clears the tokenizer cache which holds the compiled productions!
10 |     """
11 |     if key == 'DXImageTransform.Microsoft' and value == True:
12 |         import cssproductions
13 |         import tokenize2
14 |         tokenize2._TOKENIZER_CACHE.clear()
15 |         cssproductions.PRODUCTIONS.insert(1, cssproductions._DXImageTransform)
16 | 


--------------------------------------------------------------------------------
/books/ZhihuDailyRss.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # -*- coding:utf-8 -*-
 3 | from base import BaseFeedBook
 4 | 
 5 | def getBook():
 6 |     return ZhihuDaily
 7 | 
 8 | class ZhihuDaily(BaseFeedBook):
 9 |     title                 = u'知乎日報'
10 |     description           = u'知乎日报全文RSS，不需要转发，排版图片正常。'
11 |     language              = 'zh-cn'
12 |     feed_encoding         = "utf-8"
13 |     page_encoding         = "utf-8"
14 |     mastheadfile = "mh_zhihudaily.gif"
15 |     coverfile = "cv_zhihudaily.jpg"
16 |     oldest_article        = 1
17 |     feeds = [
18 |             (u'知乎日报', 'http://zhihudaily.dev.malash.net/', True)
19 |            ]
20 | 


--------------------------------------------------------------------------------
/templates/autoback.html:
--------------------------------------------------------------------------------
 1 | {% extends "base.html" %}
 2 | {% block js -%}
 3 | <script type="text/javascript">
 4 | function gobackafter(secs){
 5 |  if(--secs > 0){
 6 |     setTimeout("gobackafter(" + secs + ");", 1000);
 7 |  }
 8 |  else{
 9 |     history.go(-1);
10 |  }
11 | }
12 | gobackafter(5);
13 | </script>
14 | {% endblock -%}
15 | {% block menubar -%}
16 | {% endblock -%}
17 | {% block content -%}
18 |     {% if tips -%}
19 |     <p style="font-size:0.9em;">{{tips}}</p>
20 |     {% endif -%}
21 |     <p>{{_("Auto back to previous page after 5 seconds")}}...</p>
22 |     <p><a href="javascript:history.go(-1);">{{_("Click to back")}}</a></p><br />
23 | {% endblock -%}
24 | 


--------------------------------------------------------------------------------
/lib/cssselect/__init__.py:
--------------------------------------------------------------------------------
 1 | # coding: utf8
 2 | """
 3 |     CSS Selectors based on XPath
 4 |     ============================
 5 | 
 6 |     This module supports selecting XML/HTML elements based on CSS selectors.
 7 |     See the `CSSSelector` class for details.
 8 | 
 9 | 
10 |     :copyright: (c) 2007-2012 Ian Bicking and contributors.
11 |                 See AUTHORS for more details.
12 |     :license: BSD, see LICENSE for more details.
13 | 
14 | """
15 | 
16 | from cssselect.parser import (parse, Selector, SelectorError,
17 |                               SelectorSyntaxError)
18 | from cssselect.xpath import GenericTranslator, HTMLTranslator, ExpressionError
19 | 
20 | 
21 | VERSION = '0.8'
22 | __version__ = VERSION
23 | 


--------------------------------------------------------------------------------
/lib/calibre/ebooks/unihandecode/vndecoder.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | __license__ = 'GPL 3'
 4 | __copyright__ = '2010, Hiroshi Miura <miurahr@linux.com>'
 5 | __docformat__ = 'restructuredtext en'
 6 | 
 7 | '''
 8 | Decode unicode text to an ASCII representation of the text in Vietnamese.
 9 | 
10 | '''
11 | 
12 | from calibre.ebooks.unihandecode.unidecoder import Unidecoder
13 | from calibre.ebooks.unihandecode.vncodepoints import CODEPOINTS as HANCODES
14 | from calibre.ebooks.unihandecode.unicodepoints import CODEPOINTS
15 | 
16 | class Vndecoder(Unidecoder):
17 | 
18 |     codepoints = {}
19 | 
20 |     def __init__(self):
21 |         self.codepoints = CODEPOINTS
22 |         self.codepoints.update(HANCODES)
23 | 
24 | 


--------------------------------------------------------------------------------
/lib/calibre/ebooks/unihandecode/krdecoder.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | __license__ = 'GPL 3'
 4 | __copyright__ = '2010, Hiroshi Miura <miurahr@linux.com>'
 5 | __docformat__ = 'restructuredtext en'
 6 | 
 7 | '''
 8 | Decode unicode text to an ASCII representation of the text in Korean.
 9 | Based on unidecoder.
10 | 
11 | '''
12 | 
13 | from calibre.ebooks.unihandecode.unidecoder import Unidecoder
14 | from calibre.ebooks.unihandecode.krcodepoints import CODEPOINTS as HANCODES
15 | from calibre.ebooks.unihandecode.unicodepoints import CODEPOINTS
16 | 
17 | class Krdecoder(Unidecoder):
18 | 
19 |     codepoints = {}
20 | 
21 |     def __init__(self):
22 |         self.codepoints = CODEPOINTS
23 |         self.codepoints.update(HANCODES)
24 | 
25 | 


--------------------------------------------------------------------------------
/templates/delaccount.html:
--------------------------------------------------------------------------------
 1 | {% extends "base.html" %}
 2 | {% block menubar -%}
 3 | {% endblock -%}
 4 | {% block content -%}
 5 |     {% if tips -%}
 6 |     <p style="color:red;font-weight:bold;">{{tips}}</p>
 7 |     {% endif -%}
 8 |     <form class="pure-form pure-form-aligned" action="" method="POST">
 9 |         <div class="pure-control-group">
10 |             <label for="u">{{_("Username")}}</label>
11 |             <input id="u" name="u" type="text" placeholder="Username" 
12 |             {% if username %}value="{{username}}"{% endif %} readonly="true" />
13 |         </div>
14 |         <div class="pure-controls">
15 |             <button type="submit" class="pure-button pure-button-primary">{{_("Delete")}}</button>
16 |         </div>        
17 |     </form>
18 | {% endblock -%}


--------------------------------------------------------------------------------
/index.yaml:
--------------------------------------------------------------------------------
 1 | indexes:
 2 | 
 3 | # AUTOGENERATED
 4 | 
 5 | # This index.yaml is automatically updated whenever the dev_appserver
 6 | # detects that a new type of query is run.  If you want to manage the
 7 | # index.yaml file manually, remove the above marker line (the line
 8 | # saying "# AUTOGENERATED").  If you want to manage some indexes
 9 | # manually, move them above the marker line.  The index.yaml file is
10 | # automatically uploaded to the admin console when you next deploy
11 | # your application using appcfg.py.
12 | 
13 | - kind: Book
14 |   properties:
15 |   - name: builtin
16 |   - name: users
17 | 
18 | - kind: DeliverLog
19 |   properties:
20 |   - name: username
21 |   - name: time
22 |     direction: desc
23 | 
24 | - kind: Feed
25 |   properties:
26 |   - name: book
27 |   - name: time
28 | 


--------------------------------------------------------------------------------
/module-worker.yaml:
--------------------------------------------------------------------------------
 1 | module: worker
 2 | runtime: python27
 3 | api_version: 1
 4 | threadsafe: true
 5 | instance_class: B4
 6 | basic_scaling:
 7 |   max_instances: 3
 8 |   idle_timeout: 30m
 9 | 
10 | libraries:
11 | - name: lxml
12 |   version: "latest"
13 |   
14 | - name: PIL
15 |   version: "latest"
16 | 
17 | - name: jinja2
18 |   version: "latest"
19 | 
20 | - name: pycrypto
21 |   version: "latest"
22 | 
23 | inbound_services:
24 | - mail
25 | 
26 | handlers:
27 | - url: /favicon.ico
28 |   static_files: static/favicon.ico
29 |   upload: static/favicon.ico
30 |   mime_type: image/x-icon    
31 | 
32 | - url: /static
33 |   static_dir: static
34 |   application_readable: true
35 | 
36 | - url: /_ah/mail/.+
37 |   script: apps.handlemail.appmail
38 |   
39 | - url: /.*
40 |   script: apps.module_backend.app
41 |   
42 | 


--------------------------------------------------------------------------------
/templates/advimport.html:
--------------------------------------------------------------------------------
 1 | {% extends "advbase.html" %}
 2 | {% block advcontent -%}
 3 | <form class="pure-form" action="" method="POST" enctype="multipart/form-data">
 4 |   <fieldset>
 5 |     <legend>{{_("Import Feeds")}}</legend>
 6 |     <p style="font-size:0.8em;color:grey;">
 7 |         {{_("Import custom rss from a OPML file.")}}
 8 |     </p>
 9 |     <br />
10 |     <input type="file" name="importfile" />
11 |     <br />
12 |     <input type="submit" value="{{_("Import")}}" class="pure-button pure-button-primary" style="margin-top:8px;" />
13 |     <span style="margin-left:1em;"><a href="/advexport" class="actionButton">{{_("Download")}}</a></span>
14 |     {% if tips -%}
15 |     <br />
16 |     <p style="font-size:0.9em;color:red;font-weight:bold;">{{tips}}</p>
17 |     {% endif -%}
18 |   </fieldset>
19 | </form>
20 | {% endblock -%}


--------------------------------------------------------------------------------
/lib/calibre/ebooks/mobi/tweak.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
 3 | __license__   = 'GPL v3'
 4 | __copyright__ = '2012, Kovid Goyal <kovid@kovidgoyal.net>'
 5 | __docformat__ = 'restructuredtext en'
 6 | 
 7 | class BadFormat(ValueError):
 8 |     pass
 9 | 
10 | def do_explode(path, dest):
11 |     pass
12 | 
13 | def explode(path, dest, question=lambda x:True):
14 |     pass
15 | 
16 | def set_cover(oeb):
17 |     if 'cover' not in oeb.guide or oeb.metadata['cover']:
18 |         return
19 |     cover = oeb.guide['cover']
20 |     if cover.href in oeb.manifest.hrefs:
21 |         item = oeb.manifest.hrefs[cover.href]
22 |         oeb.metadata.clear('cover')
23 |         oeb.metadata.add('cover', item.id)
24 | 
25 | def do_rebuild(opf, dest_path):
26 |     pass
27 | 
28 | def rebuild(src_dir, dest_path):
29 |     pass
30 | 


--------------------------------------------------------------------------------
/books/FTChinese.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # -*- coding:utf-8 -*-
 3 | from base import BaseFeedBook
 4 | 
 5 | def getBook():
 6 |     return FTChinese
 7 | 
 8 | class FTChinese(BaseFeedBook):
 9 |     title                 = u'FT中文网'
10 |     description           = u'英国《金融时报》集团旗下唯一的中文商业财经网站。'
11 |     language              = 'zh-cn'
12 |     feed_encoding         = "utf-8"
13 |     page_encoding         = "utf-8"
14 |     mastheadfile          = "mh_ftchinese.gif"
15 |     coverfile             = "cv_ftchinese.jpg"
16 |     oldest_article        = 1
17 |     
18 |     feeds = [
19 |             (u'每日新闻', 'http://www.ftchinese.com/rss/feed'),
20 |             ]
21 |     
22 |     def fetcharticle(self, url, opener, decoder):
23 |         #每个URL都增加一个后缀full=y，如果有分页则自动获取全部分页
24 |         url += '?full=y'
25 |         return BaseFeedBook.fetcharticle(self,url,opener,decoder)
26 |         


--------------------------------------------------------------------------------
/lib/web/__init__.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | """web.py: makes web apps (http://webpy.org)"""
 3 | 
 4 | from __future__ import generators
 5 | 
 6 | __version__ = "0.37"
 7 | __author__ = [
 8 |     "Aaron Swartz <me@aaronsw.com>",
 9 |     "Anand Chitipothu <anandology@gmail.com>"
10 | ]
11 | __license__ = "public domain"
12 | __contributors__ = "see http://webpy.org/changes"
13 | 
14 | import utils, db, net, wsgi, http, webapi, httpserver, debugerror
15 | import template, form
16 | 
17 | import session
18 | 
19 | from utils import *
20 | from db import *
21 | from net import *
22 | from wsgi import *
23 | from http import *
24 | from webapi import *
25 | from httpserver import *
26 | from debugerror import *
27 | from application import *
28 | from browser import *
29 | try:
30 |     import webopenid as openid
31 | except ImportError:
32 |     pass # requires openid module
33 | 
34 | 


--------------------------------------------------------------------------------
/lib/calibre/ebooks/mobi/writer8/cleanup.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
 3 | from __future__ import (unicode_literals, division, absolute_import,
 4 |                         print_function)
 5 | 
 6 | __license__   = 'GPL v3'
 7 | __copyright__ = '2012, Kovid Goyal <kovid@kovidgoyal.net>'
 8 | __docformat__ = 'restructuredtext en'
 9 | 
10 | from calibre.ebooks.oeb.base import XPath
11 | 
12 | class CSSCleanup(object):
13 | 
14 |     def __init__(self, log, opts):
15 |         self.log, self.opts = log, opts
16 | 
17 |     def __call__(self, item, stylizer):
18 |         if not hasattr(item.data, 'xpath'): return
19 | 
20 |         # The Kindle touch displays all black pages if the height is set on
21 |         # body
22 |         for body in XPath('//h:body')(item.data):
23 |             style = stylizer.style(body)
24 |             style.drop('height')
25 | 
26 | 


--------------------------------------------------------------------------------
/app.yaml:
--------------------------------------------------------------------------------
 1 | runtime: python27
 2 | api_version: 1
 3 | threadsafe: true
 4 | instance_class: F1
 5 | #automatic_scaling:
 6 | #  min_idle_instances: 1
 7 | #  max_idle_instances: automatic  # default value
 8 | #  min_pending_latency: automatic  # default value
 9 | #  max_pending_latency: 30ms
10 | #  max_concurrent_requests: 50
11 | 
12 | libraries:
13 | - name: lxml
14 |   version: "latest"
15 |   
16 | - name: PIL
17 |   version: "latest"
18 | 
19 | - name: jinja2
20 |   version: "latest"
21 | 
22 | - name: pycrypto
23 |   version: "latest"
24 | 
25 | inbound_services:
26 | - mail
27 | 
28 | handlers:
29 | - url: /favicon.ico
30 |   static_files: static/favicon.ico
31 |   upload: static/favicon.ico
32 |   mime_type: image/x-icon
33 | 
34 | - url: /static
35 |   static_dir: static
36 |   application_readable: true
37 | 
38 | - url: /_ah/mail/.+
39 |   script: apps.handlemail.appmail
40 |   
41 | - url: /.*
42 |   script: apps.module_front.app
43 |   
44 | 


--------------------------------------------------------------------------------
/lib/calibre/utils/resources.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
 3 | from __future__ import with_statement
 4 | 
 5 | __license__   = 'GPL v3'
 6 | __copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
 7 | __docformat__ = 'restructuredtext en'
 8 | 
 9 | 
10 | import __builtin__
11 | 
12 | from calibre import config_dir
13 | 
14 | #_resolver = PathResolver()
15 | 
16 | def get_path(path, data=False, allow_user_override=True):
17 |     #fpath = _resolver(path, allow_user_override=allow_user_override)
18 |     fpath = path
19 |     if data:
20 |         with open(fpath, 'rb') as f:
21 |             return f.read()
22 |     return fpath
23 | 
24 | def get_image_path(path, data=False, allow_user_override=True):
25 |     if not path:
26 |         return get_path('images')
27 |     return get_path('images/'+path, data=data)
28 | 
29 | __builtin__.__dict__['P'] = get_path
30 | __builtin__.__dict__['I'] = get_image_path
31 | 


--------------------------------------------------------------------------------
/lib/readability/debug.py:
--------------------------------------------------------------------------------
 1 | def save_to_file(text, filename):
 2 |     f = open(filename, 'wt')
 3 |     f.write('<meta http-equiv="Content-Type" content="text/html; charset=UTF-8" />')
 4 |     f.write(text.encode('utf-8'))
 5 |     f.close()
 6 | 
 7 | uids = {} 
 8 | def describe(node, depth=2):
 9 |     if not hasattr(node, 'tag'):
10 |         return "[%s]" % type(node)
11 |     name = node.tag
12 |     if node.get('id', ''): name += '#'+node.get('id') 
13 |     if node.get('class', ''): 
14 |         name += '.' + node.get('class').replace(' ','.')
15 |     if name[:4] in ['div#', 'div.']:
16 |         name = name[3:]
17 |     if name in ['tr', 'td', 'div', 'p']:
18 |         if not node in uids:
19 |             uid = uids[node] = len(uids)+1
20 |         else:
21 |             uid = uids.get(node)
22 |         name += "%02d" % (uid)
23 |     if depth and node.getparent() is not None:
24 |         return name+' - '+describe(node.getparent(), depth-1)
25 |     return name
26 | 


--------------------------------------------------------------------------------
/books/__init__.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # -*- coding:utf-8 -*-
 3 | 
 4 | import os
 5 | 
 6 | _booksclasses = []
 7 | def RegisterBook(book):
 8 |     if book.title:
 9 |         _booksclasses.append(book)
10 | 
11 | def BookClasses():
12 |     return _booksclasses
13 | 
14 | def BookClass(title):
15 |     for bk in _booksclasses:
16 |         if bk.title == title:
17 |             return bk
18 |     return None
19 | 
20 | #def LoadBooks():
21 | for bkfile in os.listdir(os.path.dirname(__file__)):
22 |     if bkfile.endswith('.py') and not bkfile.startswith('__') and not bkfile.endswith("base.py"):
23 |         bookname = os.path.splitext(bkfile)[0]
24 |         try:
25 |             mbook = __import__("books." + bookname, fromlist='*')
26 |             bk = mbook.getBook()
27 |             #globals()[bk.__name__] = getattr(bk, bk.__name__)
28 |             RegisterBook(bk)
29 |         except Exception as e:
30 |             default_log.warn("Book '%s' import failed : %s" % (bookname,e))
31 | 
32 | #LoadBooks()
33 | 


--------------------------------------------------------------------------------
/lib/calibre/ebooks/conversion/__init__.py:
--------------------------------------------------------------------------------
 1 | # vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
 2 | from __future__ import (unicode_literals, division, absolute_import,
 3 |                         print_function)
 4 | 
 5 | __license__   = 'GPL v3'
 6 | __copyright__ = '2011, Kovid Goyal <kovid@kovidgoyal.net>'
 7 | __docformat__ = 'restructuredtext en'
 8 | 
 9 | 
10 | class ConversionUserFeedBack(Exception):
11 | 
12 |     def __init__(self, title, msg, level='info', det_msg=''):
13 |         ''' Show a simple message to the user
14 | 
15 |         :param title: The title (very short description)
16 |         :param msg: The message to show the user
17 |         :param level: Must be one of 'info', 'warn' or 'error'
18 |         :param det_msg: Optional detailed message to show the user
19 |         '''
20 |         import json
21 |         Exception.__init__(self, json.dumps({'msg':msg, 'level':level,
22 |             'det_msg':det_msg, 'title':title}))
23 |         self.title, self.msg, self.det_msg = title, msg, det_msg
24 |         self.level = level
25 | 
26 | 


--------------------------------------------------------------------------------
/books/Dapenti.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # -*- coding:utf-8 -*-
 3 | from base import BaseFeedBook
 4 | 
 5 | def getBook():
 6 |     return Dapenti
 7 | 
 8 | class Dapenti(BaseFeedBook):
 9 |     title                 = u'喷嚏图卦'
10 |     description           = u'每天一图卦，让我们更清楚地了解这个世界'
11 |     language              = 'zh-cn'
12 |     feed_encoding         = "utf-8"
13 |     page_encoding         = "utf-8"
14 |     max_articles_per_feed = 1
15 |     oldest_article        = 2
16 |     mastheadfile          = "mh_dapenti.gif"
17 |     coverfile             = "cv_dapenti.jpg"
18 |     network_timeout       = 60
19 |     fetch_img_via_ssl     = False
20 |     feeds = [
21 |             (u'喷嚏图卦', 'http://tugua.daoapp.io/rss.xml', True),
22 |            ]
23 |     
24 |     def soupbeforeimage(self, soup):
25 |         #更换另一个图库，因为RSS中的图库已经被封
26 |         for img in soup.find_all('img', attrs={'src':True}):
27 |             if img['src'].startswith('http://ptimg.org:88'):
28 |                 img['src'] = img['src'].replace('http://ptimg.org:88','http://pic.yupoo.com')
29 |                 
30 | 


--------------------------------------------------------------------------------
/lib/calibre/ebooks/tweak.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
 3 | from __future__ import (unicode_literals, division, absolute_import,
 4 |                         print_function)
 5 | 
 6 | __license__   = 'GPL v3'
 7 | __copyright__ = '2012, Kovid Goyal <kovid@kovidgoyal.net>'
 8 | __docformat__ = 'restructuredtext en'
 9 | 
10 | class Error(ValueError):
11 |     pass
12 | 
13 | def ask_cli_question(msg):
14 |     pass
15 | 
16 | def mobi_exploder(path, tdir, question=lambda x:True):
17 |     pass
18 | 
19 | def zip_exploder(path, tdir, question=lambda x:True):
20 |     pass
21 | 
22 | def zip_rebuilder(tdir, path):
23 |     pass
24 | 
25 | def get_tools(fmt):
26 |     fmt = fmt.lower()
27 | 
28 |     if fmt in {'mobi', 'azw', 'azw3'}:
29 |         from calibre.ebooks.mobi.tweak import rebuild
30 |         ans = mobi_exploder, rebuild
31 |     elif fmt in {'epub', 'htmlz'}:
32 |         ans = zip_exploder, zip_rebuilder
33 |     else:
34 |         ans = None, None
35 | 
36 |     return ans
37 | 
38 | def tweak(ebook_file):
39 |     pass
40 | 
41 | 


--------------------------------------------------------------------------------
/lib/calibre/constants.py:
--------------------------------------------------------------------------------
 1 | __license__   = 'GPL v3'
 2 | __copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net'
 3 | __docformat__ = 'restructuredtext en'
 4 | __appname__   = u'calibre'
 5 | numeric_version = (1, 0, 0)
 6 | __version__   = u'.'.join(map(unicode, numeric_version))
 7 | __author__    = u"Kovid Goyal <kovid@kovidgoyal.net>"
 8 | 
 9 | '''
10 | Various run time constants.
11 | '''
12 | 
13 | import sys, codecs
14 | 
15 | iswindows = False
16 | ispy3 = False
17 | 
18 | #try:
19 | #    preferred_encoding = locale.getpreferredencoding()
20 | #    codecs.lookup(preferred_encoding)
21 | #except:
22 | preferred_encoding = 'utf-8'
23 | 
24 | winerror   = None
25 | _osx_ver = None
26 | 
27 | filesystem_encoding = sys.getfilesystemencoding()
28 | if filesystem_encoding is None:
29 |     filesystem_encoding = 'utf-8'
30 | else:
31 |     try:
32 |         if codecs.lookup(filesystem_encoding).name == 'ascii':
33 |             filesystem_encoding = 'utf-8'
34 |     except:
35 |         filesystem_encoding = 'utf-8'
36 | 
37 | DEBUG = False
38 | plugins = None
39 | CONFIG_DIR_MODE = 0700
40 | config_dir = ""
41 | 


--------------------------------------------------------------------------------
/lib/calibre/startup.py:
--------------------------------------------------------------------------------
 1 | __license__   = 'GPL v3'
 2 | __copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net'
 3 | __docformat__ = 'restructuredtext en'
 4 | 
 5 | '''
 6 | Perform various initialization tasks.
 7 | '''
 8 | 
 9 | import locale, sys, os, re
10 | 
11 | # Default translation is NOOP
12 | import __builtin__
13 | __builtin__.__dict__['_'] = lambda s: s
14 | 
15 | # For strings which belong in the translation tables, but which shouldn't be
16 | # immediately translated to the environment language
17 | __builtin__.__dict__['__'] = lambda s: s
18 | __builtin__.__dict__['P'] = lambda s: s
19 | __builtin__.__dict__['I'] = lambda s: s
20 | __builtin__.__dict__['lopen'] = open
21 | __builtin__.__dict__['icu_lower'] = lambda x:x.lower()
22 | __builtin__.__dict__['icu_upper'] = lambda x:x.upper()
23 | __builtin__.__dict__['icu_title'] = lambda x:x.capitalize()
24 | __builtin__.__dict__['dynamic_property'] = lambda func: func(None)
25 | 
26 | from calibre.constants import *
27 | 
28 | _run_once = False
29 | winutil = winutilerror = None
30 | _base_dir = "."
31 | 
32 | if not _run_once:
33 |     _run_once = True
34 | 
35 | 


--------------------------------------------------------------------------------
/lib/bs4/tests/test_htmlparser.py:
--------------------------------------------------------------------------------
 1 | """Tests to ensure that the html.parser tree builder generates good
 2 | trees."""
 3 | 
 4 | from pdb import set_trace
 5 | import pickle
 6 | from bs4.testing import SoupTest, HTMLTreeBuilderSmokeTest
 7 | from bs4.builder import HTMLParserTreeBuilder
 8 | 
 9 | class HTMLParserTreeBuilderSmokeTest(SoupTest, HTMLTreeBuilderSmokeTest):
10 | 
11 |     @property
12 |     def default_builder(self):
13 |         return HTMLParserTreeBuilder()
14 | 
15 |     def test_namespaced_system_doctype(self):
16 |         # html.parser can't handle namespaced doctypes, so skip this one.
17 |         pass
18 | 
19 |     def test_namespaced_public_doctype(self):
20 |         # html.parser can't handle namespaced doctypes, so skip this one.
21 |         pass
22 | 
23 |     def test_builder_is_pickled(self):
24 |         """Unlike most tree builders, HTMLParserTreeBuilder and will
25 |         be restored after pickling.
26 |         """
27 |         tree = self.soup("<a><b>foo</a>")
28 |         dumped = pickle.dumps(tree, 2)
29 |         loaded = pickle.loads(dumped)
30 |         self.assertTrue(isinstance(loaded.builder, type(tree.builder)))
31 | 
32 | 
33 | 


--------------------------------------------------------------------------------
/templates/adminmgrpwd.html:
--------------------------------------------------------------------------------
 1 | {% extends "base.html" %}
 2 | {% block menubar -%}
 3 | {% endblock -%}
 4 | {% block content -%}
 5 |     {% if tips -%}
 6 |     <p style="color:red;font-weight:bold;">{{tips}}</p>
 7 |     {% endif -%}
 8 |     <form class="pure-form pure-form-aligned" action="" method="POST">
 9 |         <div class="pure-control-group">
10 |             <label for="u">{{_("Username")}}</label>
11 |             <input id="u" name="u" type="text" placeholder="Username" 
12 |             {% if username %}value="{{username}}"{% endif %} />
13 |         </div>
14 |         <div class="pure-control-group">
15 |             <label for="p1">{{_("New password")}}</label>
16 |             <input id="p1" name="p1" type="password" placeholder="Password" />
17 |         </div>
18 |         <div class="pure-control-group">
19 |             <label for="p2">{{_("Confirm password")}}</label>
20 |             <input id="p2" name="p2" type="password" placeholder="Password" />
21 |         </div>
22 |         <div class="pure-controls">
23 |             <button type="submit" class="pure-button pure-button-primary">{{_("Confirm Change")}}</button>
24 |         </div>
25 |     </form>
26 | {% endblock -%}


--------------------------------------------------------------------------------
/templates/advwhitelist.html:
--------------------------------------------------------------------------------
 1 | {% extends "advbase.html" %}
 2 | {% block advcontent -%}
 3 | <form class="pure-form" action="" method="POST">
 4 |   <fieldset>
 5 |     <legend>{{_("White List")}}</legend>
 6 |     <p style="font-size:0.8em;color:grey;">
 7 |         {{_("mail sent to %sxxx@<em>appid</em>.appspotmail.com will be transfered to your email.")|format('' if user.name=="admin" else user.name+"__")}}
 8 |     </p>
 9 |     {% if user.whitelist.count() -%}
10 |     <table class="pure-table pure-table-striped pure-table-horizontal tburl">
11 |     {% for lst in user.whitelist -%}
12 |     <tr>
13 |         <td style="width:80%;font-size:0.9em;">{{lst.mail}}</td>
14 |         <td style="text-align:right;"><a href="/advdel?delwlist={{lst.key().id()}}" class="actionButton">{{_("Delete")}}</a></td>
15 |     </tr>
16 |     {% endfor -%}
17 |     </table>
18 |     {% endif -%}
19 |         <div class="newitem">
20 |             <input type="text" name="wlist" style="width:538px" placeholder="{{_("Please input mail address")}}" />
21 |         </div>
22 |         <input type="submit" value="{{_("Add")}}" class="pure-button pure-button-primary" style="margin-top:8px;" />
23 |   </fieldset>    
24 | </form>
25 | {% endblock -%}


--------------------------------------------------------------------------------
/templates/advurlfilter.html:
--------------------------------------------------------------------------------
 1 | {% extends "advbase.html" %}
 2 | {% block advcontent -%}
 3 | <form class="pure-form" action="" method="POST">
 4 |   <fieldset>
 5 |     <legend>{{_("Url Filter")}}</legend>
 6 |     <p style="font-size:0.8em;color:grey;">
 7 |         {{_("Urls in list would not be downloaded.")}}
 8 |     </p>
 9 |     {% if user.urlfilter.count() -%}
10 |     <table class="pure-table pure-table-striped pure-table-horizontal tburl">
11 |     {% for urlflt in user.urlfilter -%}
12 |     <tr">
13 |         <td style="width:80%;font-size:0.9em;">
14 |             {%if urlflt.url|length>70%}{{urlflt.url[:70]}}...<span>{{urlflt.url}}</span>{%else%}{{urlflt.url}}{%endif%}
15 |         </td>
16 |         <td style="text-align:right;"><a href="/advdel?delurlid={{urlflt.key().id()}}" class="actionButton">{{_("Delete")}}</a></td>
17 |     </tr>
18 |     {% endfor -%}
19 |     </table>
20 |     {% endif -%}
21 |     <div class="newitem">
22 |         <input type="text" name="url" style="width:100%;" placeholder="{{_("Please input regular expression")}}" />
23 |     </div>
24 |     <input type="submit" value="{{_("Add")}}" class="pure-button pure-button-primary" style="margin-top:8px;" />
25 |   </fieldset>
26 | </form>
27 | {% endblock -%}


--------------------------------------------------------------------------------
/lib/bs4/tests/test_docs.py:
--------------------------------------------------------------------------------
 1 | "Test harness for doctests."
 2 | 
 3 | # pylint: disable-msg=E0611,W0142
 4 | 
 5 | __metaclass__ = type
 6 | __all__ = [
 7 |     'additional_tests',
 8 |     ]
 9 | 
10 | import atexit
11 | import doctest
12 | import os
13 | #from pkg_resources import (
14 | #    resource_filename, resource_exists, resource_listdir, cleanup_resources)
15 | import unittest
16 | 
17 | DOCTEST_FLAGS = (
18 |     doctest.ELLIPSIS |
19 |     doctest.NORMALIZE_WHITESPACE |
20 |     doctest.REPORT_NDIFF)
21 | 
22 | 
23 | # def additional_tests():
24 | #     "Run the doc tests (README.txt and docs/*, if any exist)"
25 | #     doctest_files = [
26 | #         os.path.abspath(resource_filename('bs4', 'README.txt'))]
27 | #     if resource_exists('bs4', 'docs'):
28 | #         for name in resource_listdir('bs4', 'docs'):
29 | #             if name.endswith('.txt'):
30 | #                 doctest_files.append(
31 | #                     os.path.abspath(
32 | #                         resource_filename('bs4', 'docs/%s' % name)))
33 | #     kwargs = dict(module_relative=False, optionflags=DOCTEST_FLAGS)
34 | #     atexit.register(cleanup_resources)
35 | #     return unittest.TestSuite((
36 | #         doctest.DocFileSuite(*doctest_files, **kwargs)))
37 | 


--------------------------------------------------------------------------------
/lib/memcachestore.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # -*- coding:utf-8 -*-
 3 | """GAE中不能直接使用WEB.PY的session，使用此Store代替dbstore"""
 4 | from web.session import Store  
 5 | from google.appengine.api import memcache  
 6 | import web  
 7 | import time  
 8 |   
 9 | class MemcacheStore(Store):
10 |     def __init__(self, memcache):
11 |         self.memcache = memcache
12 |         
13 |     def __contains__(self, key):   
14 |         data = self.memcache.get(key)   
15 |         return bool(data)
16 |     
17 |     def __getitem__(self, key):   
18 |         now = time.time()   
19 |         value = self.memcache.get(key)  
20 |         if not value:   
21 |             raise KeyError   
22 |         else:
23 |             value['attime'] = now
24 |             self.memcache.replace(key,value)
25 |             return value
26 |     
27 |     def __setitem__(self, key, value):   
28 |         now = time.time()
29 |         value['attime'] = now
30 |         s = self.memcache.get(key)
31 |         if s:
32 |             self.memcache.replace(key,value)
33 |         else:
34 |             self.memcache.add(key,value,web.config.session_parameters['timeout'])
35 |     
36 |     def __delitem__(self, key):
37 |         self.memcache.delete(key)
38 |     
39 |     def cleanup(self, timeout): 
40 |         pass
41 |     


--------------------------------------------------------------------------------
/lib/cssutils/stylesheets/stylesheetlist.py:
--------------------------------------------------------------------------------
 1 | """StyleSheetList implements DOM Level 2 Style Sheets StyleSheetList."""
 2 | __all__ = ['StyleSheetList']
 3 | __docformat__ = 'restructuredtext'
 4 | __version__ = '$Id$'
 5 | 
 6 | class StyleSheetList(list):
 7 |     """Interface `StyleSheetList` (introduced in DOM Level 2)
 8 | 
 9 |     The `StyleSheetList` interface provides the abstraction of an ordered
10 |     collection of :class:`~cssutils.stylesheets.StyleSheet` objects.
11 | 
12 |     The items in the `StyleSheetList` are accessible via an integral index,
13 |     starting from 0.
14 | 
15 |     This Python implementation is based on a standard Python list so e.g.
16 |     allows ``examplelist[index]`` usage.
17 |     """
18 |     def item(self, index):
19 |         """
20 |         Used to retrieve a style sheet by ordinal `index`. If `index` is
21 |         greater than or equal to the number of style sheets in the list,
22 |         this returns ``None``.
23 |         """
24 |         try:
25 |             return self[index]
26 |         except IndexError:
27 |             return None
28 | 
29 |     length = property(lambda self: len(self),
30 |         doc="The number of :class:`StyleSheet` objects in the list. The range"
31 |         "  of valid child stylesheet indices is 0 to length-1 inclusive.")
32 | 
33 | 


--------------------------------------------------------------------------------
/lib/chardet/compat.py:
--------------------------------------------------------------------------------
 1 | ######################## BEGIN LICENSE BLOCK ########################
 2 | # Contributor(s):
 3 | #   Ian Cordasco - port to Python
 4 | #
 5 | # This library is free software; you can redistribute it and/or
 6 | # modify it under the terms of the GNU Lesser General Public
 7 | # License as published by the Free Software Foundation; either
 8 | # version 2.1 of the License, or (at your option) any later version.
 9 | #
10 | # This library is distributed in the hope that it will be useful,
11 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
12 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13 | # Lesser General Public License for more details.
14 | #
15 | # You should have received a copy of the GNU Lesser General Public
16 | # License along with this library; if not, write to the Free Software
17 | # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
18 | # 02110-1301  USA
19 | ######################### END LICENSE BLOCK #########################
20 | 
21 | import sys
22 | 
23 | 
24 | if sys.version_info < (3, 0):
25 |     base_str = (str, unicode)
26 | else:
27 |     base_str = (bytes, str)
28 | 
29 | 
30 | def wrap_ord(a):
31 |     if sys.version_info < (3, 0) and isinstance(a, base_str):
32 |         return ord(a)
33 |     else:
34 |         return a
35 | 


--------------------------------------------------------------------------------
/lib/calibre/utils/mreplace.py:
--------------------------------------------------------------------------------
 1 | #multiple replace from dictionnary : http://code.activestate.com/recipes/81330/
 2 | __license__   = 'GPL v3'
 3 | __copyright__ = '2010, sengian <sengian1 @ gmail.com>'
 4 | __docformat__ = 'restructuredtext en'
 5 | 
 6 | import re
 7 | from UserDict import UserDict
 8 | 
 9 | class MReplace(UserDict):
10 | 
11 |     def __init__(self, data=None, case_sensitive=True):
12 |         UserDict.__init__(self, data)
13 |         self.re = None
14 |         self.regex = None
15 |         self.case_sensitive = case_sensitive
16 |         self.compile_regex()
17 | 
18 |     def compile_regex(self):
19 |         if len(self.data) > 0:
20 |             keys = sorted(self.data.keys(), key=len, reverse=True)
21 |             tmp = "(%s)" % "|".join(map(re.escape, keys))
22 |             if self.re != tmp:
23 |                 self.re = tmp
24 |                 if self.case_sensitive:
25 |                     self.regex = re.compile(self.re)
26 |                 else:
27 |                     self.regex = re.compile(self.re, re.I)
28 | 
29 |     def __call__(self, mo):
30 |         return self[mo.string[mo.start():mo.end()]]
31 | 
32 |     def mreplace(self, text):
33 |         #Replace without regex compile
34 |         if len(self.data) < 1 or self.re is None:
35 |             return text
36 |         return self.regex.sub(self, text)
37 | 
38 | 


--------------------------------------------------------------------------------
/books/Readme.txt:
--------------------------------------------------------------------------------
 1 | 1. 概述
 2 |     此应用根目录下的books目录存放自定义RSS设置，每个文件为一本"书"，对应推送到kindle的一本书。
 3 |     应用启动后会自动读取此目录下的所有py文件，动态导入，并显示在网页“我的订阅”下，可以选择是否推送。
 4 |     books目录下的文件除了__init__.py和base.py，其他的文件都可以随意删除，如果你不需要的话。
 5 |     在books目录下删除的“书籍”会在一天内从数据库中清除。
 6 | 
 7 | 2. py文件格式
 8 |   ★py文件建议为UTF-8格式，特别是里面有中文的话。
 9 |     所以每个py文件的头一行建议为：
10 |     # -*- coding:utf-8 -*-
11 |     或者：
12 |     #!/usr/bin/env python
13 |     # -*- coding:utf-8 -*-
14 | 
15 |   ★每个py文件都要实现一个函数getBook()，返回书籍实际定义的"类"对象：
16 |     def getBook():
17 |         return Qiushibaike
18 | 
19 |   ★每本书为一个类，必须实现的接口只有一个：
20 |     Items(self, opts=None)
21 |     它是一个生成器或者返回一个迭代器。
22 |     每次返回一个元组：
23 |     HTML元组：(节标题, URL, 文章标题, 文章内容，文章摘要)  - 文章内容为字符串
24 |     图片元组：(图片MIME, URL, 图片文件名, 图片内容，None) -图片内容为字节串
25 |     其中图片MIME为：image/jpeg, image/gif 等
26 | 
27 |   ★上面已经说完了书籍定义的一切，所以如果你精通python，就可以自己写自己的书籍类了。
28 |   
29 |   ★不过如果你偷懒，也可以继承base模块中定义的两个书籍模板之一来定制自己的书籍类。
30 |     下一节介绍如何定制。
31 | 
32 | 3. 书籍类定制方法
33 |    写过或看过calibre的recipe的基本上就直接会了。
34 |    因为calibre的recipe模块依赖挺多的，我时间不够，偷懒了，就不移植了，直接根据
35 |    recipe的外形写了一个处理模块。
36 |    ★根据RSS类型，从base模块中导入不同的书籍基类
37 |      from base import BaseFeedBook/WebpageBook
38 |      如果你感兴趣的网站不提供RSS订阅，则可以继承WebpageBook直接连接网页提取信息。
39 |    ★子类能定制的参数都在BaseFeedBook类的定义中，注释很详细。
40 |    ★处理HTML的BeautifulSoup为4.x版本。
41 | 
42 |    在此贴子里有更详细的说明：http://www.hi-pda.com/forum/viewthread.php?tid=1248204
43 | 


--------------------------------------------------------------------------------
/lib/readability/cleaners.py:
--------------------------------------------------------------------------------
 1 | # strip out a set of nuisance html attributes that can mess up rendering in RSS feeds
 2 | import re
 3 | from lxml.html.clean import Cleaner
 4 | 
 5 | bad_attrs = ['width', 'height', 'style', '[-a-z]*color', 'background[-a-z]*', 'on*']
 6 | single_quoted = "'[^']+'"
 7 | double_quoted = '"[^"]+"'
 8 | non_space = '[^ "\'>]+'
 9 | htmlstrip = re.compile("<" # open
10 |     "([^>]+) " # prefix
11 |     "(?:%s) *" % ('|'.join(bad_attrs),) + # undesirable attributes
12 |     '= *(?:%s|%s|%s)' % (non_space, single_quoted, double_quoted) + # value
13 |     "([^>]*)"  # postfix
14 |     ">"        # end
15 | , re.I)
16 | 
17 | def clean_attributes(html):
18 |     while htmlstrip.search(html):
19 |         html = htmlstrip.sub('<\\1\\2>', html)
20 |     return html
21 | 
22 | def normalize_spaces(s):
23 |     if not s: return ''
24 |     """replace any sequence of whitespace
25 |     characters with a single space"""
26 |     return ' '.join(s.split())
27 | 
28 | html_cleaner = Cleaner(scripts=True, javascript=True, comments=True,
29 |                   style=True, links=True, meta=False, add_nofollow=False,
30 |                   page_structure=False, processing_instructions=True, embedded=False,
31 |                   frames=False, forms=False, annoying_tags=False, remove_tags=None,
32 |                   remove_unknown_tags=False, safe_attrs_only=False)
33 | 


--------------------------------------------------------------------------------
/lib/chardet/__init__.py:
--------------------------------------------------------------------------------
 1 | ######################## BEGIN LICENSE BLOCK ########################
 2 | # This library is free software; you can redistribute it and/or
 3 | # modify it under the terms of the GNU Lesser General Public
 4 | # License as published by the Free Software Foundation; either
 5 | # version 2.1 of the License, or (at your option) any later version.
 6 | #
 7 | # This library is distributed in the hope that it will be useful,
 8 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
 9 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
10 | # Lesser General Public License for more details.
11 | #
12 | # You should have received a copy of the GNU Lesser General Public
13 | # License along with this library; if not, write to the Free Software
14 | # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
15 | # 02110-1301  USA
16 | ######################### END LICENSE BLOCK #########################
17 | 
18 | __version__ = "2.3.0"
19 | from sys import version_info
20 | 
21 | 
22 | def detect(aBuf):
23 |     if ((version_info < (3, 0) and isinstance(aBuf, unicode)) or
24 |             (version_info >= (3, 0) and not isinstance(aBuf, bytes))):
25 |         raise ValueError('Expected a bytes object, not a unicode object')
26 | 
27 |     from . import universaldetector
28 |     u = universaldetector.UniversalDetector()
29 |     u.reset()
30 |     u.feed(aBuf)
31 |     u.close()
32 |     return u.result
33 | 


--------------------------------------------------------------------------------
/apps/View/DbViewer.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # -*- coding:utf-8 -*-
 3 | #A GAE web application to aggregate rss and send it to your kindle.
 4 | #Visit https://github.com/cdhigh/KindleEar for the latest version
 5 | #Contributors:
 6 | # rexdf <https://github.com/rexdf>
 7 | 
 8 | import web
 9 | 
10 | from apps.BaseHandler import BaseHandler
11 | from apps.dbModels import *
12 | 
13 | from lib.autodecoder import UrlEncoding
14 | 
15 | class DbViewer(BaseHandler):
16 |     __url__ = "/dbviewer"
17 |     def GET(self):
18 |         self.login_required('admin')
19 |         #可以修改UrlEncoding，如果chardet自动检测的编码错误的话
20 |         action = web.input().get('action')
21 |         if action == 'modurlenc':
22 |             id = int(web.input().get('id', 0))
23 |             feedenc = web.input().get('feedenc')
24 |             pageenc = web.input().get('pageenc')
25 |             urlenc = UrlEncoding.get_by_id(id)
26 |             if urlenc:
27 |                 if feedenc: urlenc.feedenc = feedenc
28 |                 if pageenc: urlenc.pageenc = pageenc
29 |                 urlenc.put()
30 |         elif action == 'delurlenc':
31 |             id = int(web.input().get('id', 0))
32 |             urlenc = UrlEncoding.get_by_id(id)
33 |             if urlenc:
34 |                 urlenc.delete()
35 |         return self.render('dbviewer.html', "DbViewer",
36 |             books=Book.all(),users=KeUser.all(),
37 |             feeds=Feed.all().order('book'),urlencs=UrlEncoding.all())


--------------------------------------------------------------------------------
/lib/web/python23.py:
--------------------------------------------------------------------------------
 1 | """Python 2.3 compatabilty"""
 2 | import threading
 3 | 
 4 | class threadlocal(object):
 5 |     """Implementation of threading.local for python2.3.
 6 |     """
 7 |     def __getattribute__(self, name):
 8 |         if name == "__dict__":
 9 |             return threadlocal._getd(self)
10 |         else:
11 |             try:
12 |                 return object.__getattribute__(self, name)
13 |             except AttributeError:
14 |                 try:
15 |                     return self.__dict__[name]
16 |                 except KeyError:
17 |                     raise AttributeError, name
18 |             
19 |     def __setattr__(self, name, value):
20 |         self.__dict__[name] = value
21 |         
22 |     def __delattr__(self, name):
23 |         try:
24 |             del self.__dict__[name]
25 |         except KeyError:
26 |             raise AttributeError, name
27 |     
28 |     def _getd(self):
29 |         t = threading.currentThread()
30 |         if not hasattr(t, '_d'):
31 |             # using __dict__ of thread as thread local storage
32 |             t._d = {}
33 |         
34 |         _id = id(self)
35 |         # there could be multiple instances of threadlocal.
36 |         # use id(self) as key
37 |         if _id not in t._d:
38 |             t._d[_id] = {}
39 |         return t._d[_id]
40 |         
41 | if __name__ == '__main__':
42 |      d = threadlocal()
43 |      d.x = 1
44 |      print d.__dict__
45 |      print d.x
46 |      


--------------------------------------------------------------------------------
/templates/booklogininfo.html:
--------------------------------------------------------------------------------
 1 | {% extends "base.html" %}
 2 | {% block menubar -%}
 3 | {% endblock -%}
 4 | {% block content -%}
 5 |     <form class="pure-form pure-form-aligned" action="" method="POST">
 6 |       <fieldset>
 7 |         <legend>{{_("Input website login info for book '%s'")|format('<strong>' + bk.title + '</strong>')|safe}}</legend>
 8 |         <div class="pure-control-group">
 9 |             <label for="account">{{_("Account")}}</label>
10 |             <input id="account" name="account" type="text" placeholder="Account" 
11 |             {% if subs_info and subs_info.account %}value="{{subs_info.account}}"{% endif %} />
12 |         </div>
13 |         <div class="pure-control-group">
14 |             <label for="password">{{_("Password")}}</label>
15 |             <input id="password" name="password" type="password" placeholder="Password" 
16 |             {% if subs_info and subs_info.password %}value="{{subs_info.password}}"{% endif %} />
17 |         </div>
18 |         <input name="id" type="hidden" value="{{bk.key().id()}}" />
19 |         {% if tips -%}
20 |         <p style="color:red;font-weight:bold;">{{tips}}</p>
21 |         {% else -%}
22 |         <p style="color:grey;">{{_("Leave any field empty to delete info from database.")}}</p>
23 |         {% endif -%}
24 |         <div class="pure-controls">
25 |             <button type="submit" class="pure-button pure-button-primary"> {{_("Submit")}} </button>
26 |         </div>
27 |       </fieldset>
28 |     </form>
29 | {% endblock -%}


--------------------------------------------------------------------------------
/templates/login.html:
--------------------------------------------------------------------------------
 1 | {% extends "base.html" %}
 2 | {% block css -%}
 3 | <style type="text/css">
 4 | #tips{
 5 |     color:red;
 6 |     font-size:0.8em;
 7 |     text-align:center;
 8 | }
 9 | #note{
10 |     color:grey;
11 |     font-size:0.8em;
12 |     text-align:center;
13 | }
14 | </style>
15 | {% endblock -%}
16 | {% block bodytag -%}
17 | <body onload="document.getElementById('u').focus();">
18 | {% endblock -%}
19 | {% block header_loginfo -%}
20 | {% endblock %}
21 | {% block menubar -%}
22 | {% endblock -%}
23 | {% block content -%}
24 |     <fieldset id="fs_login">
25 |     {% if tips -%}
26 |     <p id="tips">{{tips}}</p>
27 |     {% endif -%}
28 |     <form action="/login" method="POST" class="pure-form pure-form-aligned">
29 |         <div class="pure-control-group">
30 |             <label for="u">{{_("Username")}}</label>
31 |             <input id="u" name="u" type="text" placeholder="Username" 
32 |             {% if username %}value="{{username}}"{% endif %} />
33 |         </div>
34 |         <div class="pure-control-group">
35 |             <label for="p">{{_("Password")}}</label>
36 |             <input id="p" name="p" type="password" placeholder="Password" />
37 |         </div>
38 |         <div class="pure-controls">
39 |             <button type="submit" class="pure-button pure-button-primary">{{_("Login")}}</button>
40 |         </div>
41 |     </form>
42 |     </fieldset>
43 |     <p id="note">{{_("The website dont allow register, you can ask owner for a account.")}}</p>
44 | {% endblock -%}


--------------------------------------------------------------------------------
/lib/chardet/constants.py:
--------------------------------------------------------------------------------
 1 | ######################## BEGIN LICENSE BLOCK ########################
 2 | # The Original Code is Mozilla Universal charset detector code.
 3 | #
 4 | # The Initial Developer of the Original Code is
 5 | # Netscape Communications Corporation.
 6 | # Portions created by the Initial Developer are Copyright (C) 2001
 7 | # the Initial Developer. All Rights Reserved.
 8 | #
 9 | # Contributor(s):
10 | #   Mark Pilgrim - port to Python
11 | #   Shy Shalom - original C code
12 | #
13 | # This library is free software; you can redistribute it and/or
14 | # modify it under the terms of the GNU Lesser General Public
15 | # License as published by the Free Software Foundation; either
16 | # version 2.1 of the License, or (at your option) any later version.
17 | # 
18 | # This library is distributed in the hope that it will be useful,
19 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
20 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
21 | # Lesser General Public License for more details.
22 | # 
23 | # You should have received a copy of the GNU Lesser General Public
24 | # License along with this library; if not, write to the Free Software
25 | # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
26 | # 02110-1301  USA
27 | ######################### END LICENSE BLOCK #########################
28 | 
29 | _debug = 0
30 | 
31 | eDetecting = 0
32 | eFoundIt = 1
33 | eNotMe = 2
34 | 
35 | eStart = 0
36 | eError = 1
37 | eItsMe = 2
38 | 
39 | SHORTCUT_THRESHOLD = 0.95
40 | 


--------------------------------------------------------------------------------
/lib/calibre/ebooks/unihandecode/jadecoder.py:
--------------------------------------------------------------------------------
 1 | # coding:utf-8
 2 | __license__ = 'GPL 3'
 3 | __copyright__ = '2010, Hiroshi Miura <miurahr@linux.com>'
 4 | __docformat__ = 'restructuredtext en'
 5 | 
 6 | '''
 7 | Decode unicode text to an ASCII representation of the text for Japanese.
 8 |  Translate unicode string to ASCII roman string.
 9 | 
10 | API is based on the python unidecode,
11 | which is based on Ruby gem (http://rubyforge.org/projects/unidecode/)
12 | and  perl module Text::Unidecode
13 | (http://search.cpan.org/~sburke/Text-Unidecode-0.04/).
14 | 
15 | This functionality is owned by Kakasi Japanese processing engine.
16 | 
17 | Copyright (c) 2010 Hiroshi Miura
18 | '''
19 | 
20 | import re
21 | from calibre.ebooks.unihandecode.unidecoder import Unidecoder
22 | from calibre.ebooks.unihandecode.unicodepoints import CODEPOINTS
23 | from calibre.ebooks.unihandecode.jacodepoints import CODEPOINTS as JACODES
24 | from calibre.ebooks.unihandecode.pykakasi.kakasi import kakasi
25 | 
26 | class Jadecoder(Unidecoder):
27 |     kakasi = None
28 |     codepoints = {}
29 | 
30 |     def __init__(self):
31 |         self.codepoints = CODEPOINTS
32 |         self.codepoints.update(JACODES)
33 |         self.kakasi = kakasi()
34 | 
35 |     def decode(self, text):
36 |         try:
37 |             result=self.kakasi.do(text)
38 |             return re.sub('[^\x00-\x7f]', lambda x: self.replace_point(x.group()),result)
39 |         except:
40 |             return re.sub('[^\x00-\x7f]', lambda x: self.replace_point(x.group()),text)
41 | 
42 | 


--------------------------------------------------------------------------------
/apps/Work/__init__.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # -*- coding:utf-8 -*-
 3 | 
 4 | __all__ = []
 5 | 
 6 | import pkgutil
 7 | import inspect
 8 | 
 9 | #import main
10 | 
11 | #Load all class with __url__ attribute in the directory
12 | 
13 | for loader, name, is_pkg in pkgutil.walk_packages(__path__):
14 |     module = loader.find_module(name).load_module(name)
15 | 
16 |     for name, value in inspect.getmembers(module):
17 |         if name.startswith('__') or not inspect.isclass(value):
18 |             continue
19 |         url=getattr(value,'__url__',None)
20 |         if not url:
21 |             continue
22 |         globals()[name] = value
23 |         __all__.append(name)
24 |         #main.log.info('debug: %s loaded'%name)
25 | 
26 |         try:
27 |             main.urls += [url,name]
28 |         except AttributeError:
29 |             main.urls = []
30 |             main.log.info('First: %s loaded'%name)
31 |             main.urls += [url,name]
32 | 
33 | '''import os
34 | 
35 | #def LoadWorker():
36 | for works in os.listdir(os.path.dirname(__file__)):
37 |     if works.endswith('.py') and not works.startswith('__'):
38 |         workname = os.path.splitext(works)[0]
39 |         try:
40 |             mwork = __import__("apps.Work." + workname, fromlist='*')
41 |             #bk = mbook.getBook()
42 |             #globals()[bk.__name__] = getattr(bk, bk.__name__)
43 |             #RegisterBook(bk)
44 |         except Exception as e:
45 |             default_log.warn("Worker '%s' import failed : %s" % (workname,e))
46 | 
47 | #LoadWorker()'''


--------------------------------------------------------------------------------
/books/Lifeweek.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # -*- coding:utf-8 -*-
 3 | from base import BaseFeedBook
 4 | import re
 5 | 
 6 | def getBook():
 7 |     return Lifeweek
 8 | 
 9 | class Lifeweek(BaseFeedBook):
10 |     title                 = u'三联生活周刊'
11 |     description           = u'秉承"倡导品质生活"的理念，提供优质新媒体内容与服务。每周六推送'
12 |     language              = 'zh-cn'
13 |     feed_encoding         = "utf-8"
14 |     page_encoding         = "utf-8"
15 |     mastheadfile          = "mh_lifeweek.gif"
16 |     coverfile             = "cv_lifeweek.jpg"
17 |     oldest_article        = 0
18 |     deliver_days          = ['Saturday']
19 | 
20 |     feeds = [
21 |             (u'三联生活网', 'http://app.lifeweek.com.cn/?app=rss&controller=index&action=feed'),
22 |            ]
23 | 
24 |     def processtitle(self, title):
25 |         return title[:-6] if title.endswith(u'_三联生活网') else title
26 | 
27 |     def preprocess(self, content):
28 |         #当文章有分页时，去除重复的首页
29 | 
30 |         #去除脚注，保留版权声明
31 |         re_footer = re.compile(r'<div id="content_ad" [^>]*>.*</div>')
32 |         article = re_footer.sub('', content)
33 | 
34 |         #为了统一，去除“网络编辑“
35 |         re_editor = re.compile(r'<p class="editer" [^>]*>.*</p>')
36 |         article = re_editor.sub('', article)
37 | 
38 |         re_mce = re.compile(r'_mcePaste')
39 |         if re_mce.search(content) is not None:
40 |             #文章有分页，只处理一层嵌套
41 |             re_first_page = re.compile(r'<p[^>]*>[^<>]*(<[^<>]*>[^<>]*</[^<>]*>|<[^<>]*[/]>){,3}[^<>]*</p>')
42 |             article = re_first_page.sub('', article)
43 | 
44 |         return article
45 | 


--------------------------------------------------------------------------------
/lib/calibre/ebooks/unihandecode/pykakasi/jisyo.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | #  jisyo.py
 3 | #
 4 | # Copyright 2011 Hiroshi Miura <miurahr@linux.com>
 5 | import cPickle, marshal
 6 | from zlib import decompress
 7 | 
 8 | class jisyo (object):
 9 |     kanwadict = None
10 |     itaijidict = None
11 |     kanadict = None
12 |     jisyo_table = {}
13 | 
14 | # this class is Borg
15 |     _shared_state = {}
16 | 
17 |     def __new__(cls, *p, **k):
18 |         self = object.__new__(cls, *p, **k)
19 |         self.__dict__ = cls._shared_state
20 |         return self
21 | 
22 |     def __init__(self):
23 |         if self.kanwadict is None:
24 |             self.kanwadict = cPickle.loads(
25 |                 P('localization/pykakasi/kanwadict2.pickle', data=True))
26 |         if self.itaijidict is None:
27 |             self.itaijidict = cPickle.loads(
28 |                 P('localization/pykakasi/itaijidict2.pickle', data=True))
29 |         if self.kanadict is None:
30 |             self.kanadict = cPickle.loads(
31 |                 P('localization/pykakasi/kanadict2.pickle', data=True))
32 | 
33 |     def load_jisyo(self, char):
34 |         try:#python2
35 |             key = "%04x"%ord(unicode(char))
36 |         except:#python3
37 |             key = "%04x"%ord(char)
38 | 
39 |         try: #already exist?
40 |             table = self.jisyo_table[key]
41 |         except:
42 |             try:
43 |                 table = self.jisyo_table[key]  = marshal.loads(decompress(self.kanwadict[key]))
44 |             except:
45 |                 return None
46 |         return table
47 | 
48 | 


--------------------------------------------------------------------------------
/apps/View/__init__.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # -*- coding:utf-8 -*-
 3 | 
 4 | __all__ = []
 5 | 
 6 | import pkgutil
 7 | import inspect
 8 | 
 9 | #import main
10 | 
11 | #Load all class with __url__ attribute in the directory
12 | 
13 | for loader, name, is_pkg in pkgutil.walk_packages(__path__):
14 |     module = loader.find_module(name).load_module(name)
15 | 
16 |     for name, value in inspect.getmembers(module):
17 |         if name.startswith('__') or not inspect.isclass(value):
18 |             continue
19 |         url=getattr(value,'__url__',None)
20 |         if not url:
21 |             continue
22 |         globals()[name] = value
23 |         __all__.append(name)
24 |         #main.log.info('debug: %s loaded'%name)
25 | 
26 |         try:
27 |             main.urls += [url,name]
28 |         except AttributeError:
29 |             main.urls = []
30 |             main.log.info('First: %s loaded'%name)
31 |             main.urls += [url,name]
32 | '''
33 | import os
34 | 
35 | __all__ = []
36 | 
37 | #def LoadViews():
38 | for views in os.listdir(os.path.dirname(__file__)):
39 |     if views.endswith('.py') and not views.startswith('__'):
40 |         viewname = os.path.splitext(views)[0]
41 |         __all__.append(viewname)
42 |         try:
43 |             mview = __import__("apps.View." + viewname, fromlist='*')
44 |             #bk = mbook.getBook()
45 |             #globals()[bk.__name__] = getattr(bk, bk.__name__)
46 |             #RegisterBook(bk)
47 |         except Exception as e:
48 |             default_log.warn("View '%s' import failed : %s" % (viewname,e))
49 | 
50 | #LoadViews()'''


--------------------------------------------------------------------------------
/lib/calibre/ebooks/unihandecode/pykakasi/k2a.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | #  k2a.py
 3 | #
 4 | # Copyright 2011 Hiroshi Miura <miurahr@linux.com>
 5 | #
 6 | # Original copyright:
 7 | # * KAKASI (Kanji Kana Simple inversion program)
 8 | # * $Id: jj2.c,v 1.7 2001-04-12 05:57:34 rug Exp $
 9 | # * Copyright (C) 1992
10 | # * Hironobu Takahashi (takahasi@tiny.or.jp)
11 | # *
12 | # * This program is free software; you can redistribute it and/or modify
13 | # * it under the terms of the GNU General Public License as published by
14 | # * the Free Software Foundation; either versions 2, or (at your option)
15 | # * any later version.
16 | # *
17 | # * This program is distributed in the hope that it will be useful
18 | # * but WITHOUT ANY WARRANTY; without even the implied warranty of
19 | # * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
20 | # * GNU General Public License for more details.
21 | # *
22 | # */
23 | 
24 | from calibre.ebooks.unihandecode.pykakasi.jisyo import jisyo
25 | 
26 | class K2a (object):
27 | 
28 |     kanwa = None
29 | 
30 |     def __init__(self):
31 |         self.kanwa = jisyo()
32 | 
33 |     def isKatakana(self, char):
34 |         return ( 0x30a0 < ord(char) and ord(char) < 0x30f7)
35 | 
36 |     def convert(self, text):
37 |         Hstr = ""
38 |         max_len = -1
39 |         r = min(10, len(text)+1)
40 |         for x in xrange(r):
41 |             if text[:x] in self.kanwa.kanadict:
42 |                 if max_len < x:
43 |                     max_len = x
44 |                     Hstr = self.kanwa.kanadict[text[:x]]
45 |         return (Hstr, max_len)
46 | 
47 | 


--------------------------------------------------------------------------------
/lib/web/test.py:
--------------------------------------------------------------------------------
 1 | """test utilities
 2 | (part of web.py)
 3 | """
 4 | import unittest
 5 | import sys, os
 6 | import web
 7 | 
 8 | TestCase = unittest.TestCase
 9 | TestSuite = unittest.TestSuite
10 | 
11 | def load_modules(names):
12 |     return [__import__(name, None, None, "x") for name in names]
13 | 
14 | def module_suite(module, classnames=None):
15 |     """Makes a suite from a module."""
16 |     if classnames:
17 |         return unittest.TestLoader().loadTestsFromNames(classnames, module)
18 |     elif hasattr(module, 'suite'):
19 |         return module.suite()
20 |     else:
21 |         return unittest.TestLoader().loadTestsFromModule(module)
22 | 
23 | def doctest_suite(module_names):
24 |     """Makes a test suite from doctests."""
25 |     import doctest
26 |     suite = TestSuite()
27 |     for mod in load_modules(module_names):
28 |         suite.addTest(doctest.DocTestSuite(mod))
29 |     return suite
30 |     
31 | def suite(module_names):
32 |     """Creates a suite from multiple modules."""
33 |     suite = TestSuite()
34 |     for mod in load_modules(module_names):
35 |         suite.addTest(module_suite(mod))
36 |     return suite
37 | 
38 | def runTests(suite):
39 |     runner = unittest.TextTestRunner()
40 |     return runner.run(suite)
41 | 
42 | def main(suite=None):
43 |     if not suite:
44 |         main_module = __import__('__main__')
45 |         # allow command line switches
46 |         args = [a for a in sys.argv[1:] if not a.startswith('-')]
47 |         suite = module_suite(main_module, args or None)
48 | 
49 |     result = runTests(suite)
50 |     sys.exit(not result.wasSuccessful())
51 | 
52 | 


--------------------------------------------------------------------------------
/lib/cssutils/_fetch.py:
--------------------------------------------------------------------------------
 1 | """Default URL reading functions"""
 2 | __all__ = ['_defaultFetcher']
 3 | __docformat__ = 'restructuredtext'
 4 | __version__ = '$Id: tokenize2.py 1547 2008-12-10 20:42:26Z cthedot $'
 5 | 
 6 | import cssutils
 7 | from cssutils import VERSION
 8 | import encutils
 9 | import errorhandler
10 | import urllib2
11 | 
12 | log = errorhandler.ErrorHandler()
13 | 
14 | def _defaultFetcher(url):
15 |     """Retrieve data from ``url``. cssutils default implementation of fetch
16 |     URL function.
17 | 
18 |     Returns ``(encoding, string)`` or ``None``
19 |     """
20 |     try:        
21 |         request = urllib2.Request(url)
22 |         request.add_header('User-agent', 
23 |                            'cssutils %s (http://www.cthedot.de/cssutils/)' % VERSION)
24 |         res = urllib2.urlopen(request)
25 |     except OSError, e:
26 |         # e.g if file URL and not found
27 |         log.warn(e, error=OSError)
28 |     except (OSError, ValueError), e:
29 |         # invalid url, e.g. "1"
30 |         log.warn(u'ValueError, %s' % e.args[0], error=ValueError)
31 |     except urllib2.HTTPError, e:
32 |         # http error, e.g. 404, e can be raised
33 |         log.warn(u'HTTPError opening url=%s: %s %s' % 
34 |                           (url, e.code, e.msg), error=e)
35 |     except urllib2.URLError, e:
36 |         # URLError like mailto: or other IO errors, e can be raised
37 |         log.warn(u'URLError, %s' % e.reason, error=e)
38 |     else:
39 |         if res:
40 |             mimeType, encoding = encutils.getHTTPInfo(res)
41 |             if mimeType != u'text/css':
42 |                 log.error(u'Expected "text/css" mime type for url=%r but found: %r' % 
43 |                                   (url, mimeType), error=ValueError)
44 |             return encoding, res.read()
45 | 


--------------------------------------------------------------------------------
/lib/readability/encoding.py:
--------------------------------------------------------------------------------
 1 | import re
 2 | import chardet
 3 | 
 4 | def get_encoding(page):
 5 |     # Regex for XML and HTML Meta charset declaration
 6 |     charset_re = re.compile(r'<meta.*?charset=["\']*(.+?)["\'>]', flags=re.I)
 7 |     pragma_re = re.compile(r'<meta.*?content=["\']*;?charset=(.+?)["\'>]', flags=re.I)
 8 |     xml_re = re.compile(r'^<\?xml.*?encoding=["\']*(.+?)["\'>]')
 9 | 
10 |     declared_encodings = (charset_re.findall(page) +
11 |             pragma_re.findall(page) +
12 |             xml_re.findall(page))
13 | 
14 |     # Try any declared encodings
15 |     if len(declared_encodings) > 0:
16 |         for declared_encoding in declared_encodings:
17 |             try:
18 |                 page.decode(custom_decode(declared_encoding))
19 |                 return custom_decode(declared_encoding)
20 |             except UnicodeDecodeError:
21 |                 pass
22 | 
23 |     # Fallback to chardet if declared encodings fail
24 |     text = re.sub('</?[^>]*>\s*', ' ', page)
25 |     enc = 'utf-8'
26 |     if not text.strip() or len(text) < 10:
27 |         return enc # can't guess
28 |     res = chardet.detect(text)
29 |     enc = res['encoding']
30 |     #print '->', enc, "%.2f" % res['confidence']
31 |     enc = custom_decode(enc)
32 |     return enc
33 | 
34 | def custom_decode(encoding):
35 |     """Overrides encoding when charset declaration
36 |        or charset determination is a subset of a larger
37 |        charset.  Created because of issues with Chinese websites"""
38 |     encoding = encoding.lower()
39 |     alternates = {
40 |         'big5': 'big5hkscs',
41 |         'gb2312': 'gb18030',
42 |         'ascii': 'utf-8',
43 |         'MacCyrillic': 'cp1251',
44 |     }
45 |     if encoding in alternates:
46 |         return alternates[encoding]
47 |     else:
48 |         return encoding


--------------------------------------------------------------------------------
/lib/weixin.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # -*- coding:utf-8 -*-
 3 | 
 4 | import re, base64
 5 | 
 6 | from Crypto.Cipher import AES
 7 | 
 8 | def process_eqs(html):
 9 |     pattern = (
10 |         r'SogouEncrypt.setKv\("(\w+)","(\d)"\)'
11 |         r'.*?'
12 |         r'SogouEncrypt.encryptquery\("(\w+)","(\w+)"\)'
13 |     )
14 |     m = re.findall(pattern, html, re.S)
15 |     key, level, secret, setting = m[0]
16 | 
17 |     eqs = _cipher_eqs(key, secret, setting)
18 | 
19 |     return eqs, level   
20 | 
21 | 
22 | def _cipher_eqs(key, secret, setting='sogou'):
23 |     """
24 |     SogouEncrypt.encryptquery
25 |     """
26 |     assert len(key) == 11
27 | 
28 |     ss = setting.split('-')
29 | 
30 |     # function g
31 |     if len(ss) > 2:
32 |         h = ss[2]
33 |     else:
34 |         h = ss[0]
35 | 
36 |     # function f
37 |     if len(h) > 5:
38 |         n = h[:-5]
39 |     else:
40 |         n = h + (5 - len(h)) * 's'
41 | 
42 |     key += n
43 | 
44 |     data = secret + 'hdq=' + setting
45 |     # padding data
46 |     length = 16 - (len(data) % 16)
47 |     data += chr(length) * length
48 | 
49 |     IV = b'0000000000000000'
50 |     cipher = AES.new(_to_bytes(key), AES.MODE_CBC, IV)
51 |     # encrypt data
52 |     data = cipher.encrypt(_to_bytes(data))
53 |     data = _to_unicode(base64.b64encode(data))
54 | 
55 |     # function e
56 |     rv = ''
57 |     i = 0
58 |     for m in range(len(data)):
59 |         rv += data[m]
60 |         if (m == pow(2, i)) and i < 5:
61 |             rv += n[i]
62 |             i += 1
63 |     return rv
64 | 
65 | 
66 | def _to_bytes(text):
67 |     if isinstance(text, bytes):
68 |         return text
69 |     return text.encode('utf-8')
70 | 
71 | 
72 | def _to_unicode(text):
73 |     if isinstance(text, str):
74 |         return text
75 |     return text.decode('utf-8')


--------------------------------------------------------------------------------
/lib/chardet/euctwprober.py:
--------------------------------------------------------------------------------
 1 | ######################## BEGIN LICENSE BLOCK ########################
 2 | # The Original Code is mozilla.org code.
 3 | #
 4 | # The Initial Developer of the Original Code is
 5 | # Netscape Communications Corporation.
 6 | # Portions created by the Initial Developer are Copyright (C) 1998
 7 | # the Initial Developer. All Rights Reserved.
 8 | #
 9 | # Contributor(s):
10 | #   Mark Pilgrim - port to Python
11 | #
12 | # This library is free software; you can redistribute it and/or
13 | # modify it under the terms of the GNU Lesser General Public
14 | # License as published by the Free Software Foundation; either
15 | # version 2.1 of the License, or (at your option) any later version.
16 | # 
17 | # This library is distributed in the hope that it will be useful,
18 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
19 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
20 | # Lesser General Public License for more details.
21 | # 
22 | # You should have received a copy of the GNU Lesser General Public
23 | # License along with this library; if not, write to the Free Software
24 | # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
25 | # 02110-1301  USA
26 | ######################### END LICENSE BLOCK #########################
27 | 
28 | from .mbcharsetprober import MultiByteCharSetProber
29 | from .codingstatemachine import CodingStateMachine
30 | from .chardistribution import EUCTWDistributionAnalysis
31 | from .mbcssm import EUCTWSMModel
32 | 
33 | class EUCTWProber(MultiByteCharSetProber):
34 |     def __init__(self):
35 |         MultiByteCharSetProber.__init__(self)
36 |         self._mCodingSM = CodingStateMachine(EUCTWSMModel)
37 |         self._mDistributionAnalyzer = EUCTWDistributionAnalysis()
38 |         self.reset()
39 | 
40 |     def get_charset_name(self):
41 |         return "EUC-TW"
42 | 


--------------------------------------------------------------------------------
/lib/chardet/euckrprober.py:
--------------------------------------------------------------------------------
 1 | ######################## BEGIN LICENSE BLOCK ########################
 2 | # The Original Code is mozilla.org code.
 3 | #
 4 | # The Initial Developer of the Original Code is
 5 | # Netscape Communications Corporation.
 6 | # Portions created by the Initial Developer are Copyright (C) 1998
 7 | # the Initial Developer. All Rights Reserved.
 8 | #
 9 | # Contributor(s):
10 | #   Mark Pilgrim - port to Python
11 | #
12 | # This library is free software; you can redistribute it and/or
13 | # modify it under the terms of the GNU Lesser General Public
14 | # License as published by the Free Software Foundation; either
15 | # version 2.1 of the License, or (at your option) any later version.
16 | #
17 | # This library is distributed in the hope that it will be useful,
18 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
19 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
20 | # Lesser General Public License for more details.
21 | #
22 | # You should have received a copy of the GNU Lesser General Public
23 | # License along with this library; if not, write to the Free Software
24 | # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
25 | # 02110-1301  USA
26 | ######################### END LICENSE BLOCK #########################
27 | 
28 | from .mbcharsetprober import MultiByteCharSetProber
29 | from .codingstatemachine import CodingStateMachine
30 | from .chardistribution import EUCKRDistributionAnalysis
31 | from .mbcssm import EUCKRSMModel
32 | 
33 | 
34 | class EUCKRProber(MultiByteCharSetProber):
35 |     def __init__(self):
36 |         MultiByteCharSetProber.__init__(self)
37 |         self._mCodingSM = CodingStateMachine(EUCKRSMModel)
38 |         self._mDistributionAnalyzer = EUCKRDistributionAnalysis()
39 |         self.reset()
40 | 
41 |     def get_charset_name(self):
42 |         return "EUC-KR"
43 | 


--------------------------------------------------------------------------------
/lib/chardet/gb2312prober.py:
--------------------------------------------------------------------------------
 1 | ######################## BEGIN LICENSE BLOCK ########################
 2 | # The Original Code is mozilla.org code.
 3 | #
 4 | # The Initial Developer of the Original Code is
 5 | # Netscape Communications Corporation.
 6 | # Portions created by the Initial Developer are Copyright (C) 1998
 7 | # the Initial Developer. All Rights Reserved.
 8 | #
 9 | # Contributor(s):
10 | #   Mark Pilgrim - port to Python
11 | #
12 | # This library is free software; you can redistribute it and/or
13 | # modify it under the terms of the GNU Lesser General Public
14 | # License as published by the Free Software Foundation; either
15 | # version 2.1 of the License, or (at your option) any later version.
16 | # 
17 | # This library is distributed in the hope that it will be useful,
18 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
19 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
20 | # Lesser General Public License for more details.
21 | # 
22 | # You should have received a copy of the GNU Lesser General Public
23 | # License along with this library; if not, write to the Free Software
24 | # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
25 | # 02110-1301  USA
26 | ######################### END LICENSE BLOCK #########################
27 | 
28 | from .mbcharsetprober import MultiByteCharSetProber
29 | from .codingstatemachine import CodingStateMachine
30 | from .chardistribution import GB2312DistributionAnalysis
31 | from .mbcssm import GB2312SMModel
32 | 
33 | class GB2312Prober(MultiByteCharSetProber):
34 |     def __init__(self):
35 |         MultiByteCharSetProber.__init__(self)
36 |         self._mCodingSM = CodingStateMachine(GB2312SMModel)
37 |         self._mDistributionAnalyzer = GB2312DistributionAnalysis()
38 |         self.reset()
39 | 
40 |     def get_charset_name(self):
41 |         return "GB2312"
42 | 


--------------------------------------------------------------------------------
/lib/chardet/big5prober.py:
--------------------------------------------------------------------------------
 1 | ######################## BEGIN LICENSE BLOCK ########################
 2 | # The Original Code is Mozilla Communicator client code.
 3 | #
 4 | # The Initial Developer of the Original Code is
 5 | # Netscape Communications Corporation.
 6 | # Portions created by the Initial Developer are Copyright (C) 1998
 7 | # the Initial Developer. All Rights Reserved.
 8 | #
 9 | # Contributor(s):
10 | #   Mark Pilgrim - port to Python
11 | #
12 | # This library is free software; you can redistribute it and/or
13 | # modify it under the terms of the GNU Lesser General Public
14 | # License as published by the Free Software Foundation; either
15 | # version 2.1 of the License, or (at your option) any later version.
16 | #
17 | # This library is distributed in the hope that it will be useful,
18 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
19 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
20 | # Lesser General Public License for more details.
21 | #
22 | # You should have received a copy of the GNU Lesser General Public
23 | # License along with this library; if not, write to the Free Software
24 | # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
25 | # 02110-1301  USA
26 | ######################### END LICENSE BLOCK #########################
27 | 
28 | from .mbcharsetprober import MultiByteCharSetProber
29 | from .codingstatemachine import CodingStateMachine
30 | from .chardistribution import Big5DistributionAnalysis
31 | from .mbcssm import Big5SMModel
32 | 
33 | 
34 | class Big5Prober(MultiByteCharSetProber):
35 |     def __init__(self):
36 |         MultiByteCharSetProber.__init__(self)
37 |         self._mCodingSM = CodingStateMachine(Big5SMModel)
38 |         self._mDistributionAnalyzer = Big5DistributionAnalysis()
39 |         self.reset()
40 | 
41 |     def get_charset_name(self):
42 |         return "Big5"
43 | 


--------------------------------------------------------------------------------
/lib/calibre/ebooks/conversion/oeboutput.py:
--------------------------------------------------------------------------------
 1 | from __future__ import with_statement
 2 | __license__ = 'GPL 3'
 3 | __copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
 4 | __docformat__ = 'restructuredtext en'
 5 | 
 6 | import os, re
 7 | 
 8 | 
 9 | from calibre import CurrentDir
10 | 
11 | class OEBOutput:
12 | 
13 |     name = 'OEB Output'
14 |     author = 'Kovid Goyal'
15 |     file_type = 'oeb'
16 | 
17 |     def convert(self, oeb_book, output_path, input_plugin, opts, log):
18 |         from urllib import unquote
19 |         from lxml import etree
20 | 
21 |         self.log, self.opts = log, opts
22 |         if not os.path.exists(output_path):
23 |             os.makedirs(output_path)
24 |         from calibre.ebooks.oeb.base import OPF_MIME, NCX_MIME, PAGE_MAP_MIME
25 |         with CurrentDir(output_path):
26 |             results = oeb_book.to_opf2(page_map=True)
27 |             for key in (OPF_MIME, NCX_MIME, PAGE_MAP_MIME):
28 |                 href, root = results.pop(key, [None, None])
29 |                 if root is not None:
30 |                     raw = etree.tostring(root, pretty_print=True,
31 |                             encoding='utf-8', xml_declaration=True)
32 |                     if key == OPF_MIME:
33 |                         # Needed as I can't get lxml to output opf:role and
34 |                         # not output <opf:metadata> as well
35 |                         raw = re.sub(r'(<[/]{0,1})opf:', r'\1', raw)
36 |                     with open(href, 'wb') as f:
37 |                         f.write(raw)
38 | 
39 |             for item in oeb_book.manifest:
40 |                 path = os.path.abspath(unquote(item.href))
41 |                 dir = os.path.dirname(path)
42 |                 if not os.path.exists(dir):
43 |                     os.makedirs(dir)
44 |                 with open(path, 'wb') as f:
45 |                     f.write(str(item))
46 |                 item.unload_data_from_memory(memory=path)
47 | 


--------------------------------------------------------------------------------
/lib/chardet/cp949prober.py:
--------------------------------------------------------------------------------
 1 | ######################## BEGIN LICENSE BLOCK ########################
 2 | # The Original Code is mozilla.org code.
 3 | #
 4 | # The Initial Developer of the Original Code is
 5 | # Netscape Communications Corporation.
 6 | # Portions created by the Initial Developer are Copyright (C) 1998
 7 | # the Initial Developer. All Rights Reserved.
 8 | #
 9 | # Contributor(s):
10 | #   Mark Pilgrim - port to Python
11 | #
12 | # This library is free software; you can redistribute it and/or
13 | # modify it under the terms of the GNU Lesser General Public
14 | # License as published by the Free Software Foundation; either
15 | # version 2.1 of the License, or (at your option) any later version.
16 | #
17 | # This library is distributed in the hope that it will be useful,
18 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
19 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
20 | # Lesser General Public License for more details.
21 | #
22 | # You should have received a copy of the GNU Lesser General Public
23 | # License along with this library; if not, write to the Free Software
24 | # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
25 | # 02110-1301  USA
26 | ######################### END LICENSE BLOCK #########################
27 | 
28 | from .mbcharsetprober import MultiByteCharSetProber
29 | from .codingstatemachine import CodingStateMachine
30 | from .chardistribution import EUCKRDistributionAnalysis
31 | from .mbcssm import CP949SMModel
32 | 
33 | 
34 | class CP949Prober(MultiByteCharSetProber):
35 |     def __init__(self):
36 |         MultiByteCharSetProber.__init__(self)
37 |         self._mCodingSM = CodingStateMachine(CP949SMModel)
38 |         # NOTE: CP949 is a superset of EUC-KR, so the distribution should be
39 |         #       not different.
40 |         self._mDistributionAnalyzer = EUCKRDistributionAnalysis()
41 |         self.reset()
42 | 
43 |     def get_charset_name(self):
44 |         return "CP949"
45 | 


--------------------------------------------------------------------------------
/apps/module_backend.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # -*- coding:utf-8 -*-
 3 | #A GAE web application to aggregate rss and send it to your kindle.
 4 | #Visit https://github.com/cdhigh/KindleEar for the latest version
 5 | #Author:
 6 | # cdhigh <https://github.com/cdhigh>
 7 | #Contributors:
 8 | # rexdf <https://github.com/rexdf>
 9 | 
10 | __Author__ = "cdhigh"
11 | 
12 | import os, datetime, logging, __builtin__, hashlib, time
13 | 
14 | # for debug
15 | # 本地启动调试服务器：python.exe dev_appserver.py c:\kindleear
16 | IsRunInLocal = (os.environ.get('SERVER_SOFTWARE', '').startswith('Development'))
17 | log = logging.getLogger()
18 | __builtin__.__dict__['default_log'] = log
19 | __builtin__.__dict__['IsRunInLocal'] = IsRunInLocal
20 | 
21 | supported_languages = ['en','zh-cn','tr-tr'] #不支持的语种则使用第一个语言
22 | #gettext.install('lang', 'i18n', unicode=True) #for calibre startup
23 | 
24 | class Main_Var:
25 |     urls = []
26 |     session = None
27 |     jjenv = None
28 |     supported_languages = None
29 |     log = None
30 |     __Version__ = None
31 | 
32 | __builtin__.__dict__['main'] = Main_Var
33 | main.supported_languages = supported_languages
34 | main.log = log
35 | main.__Version__ = __Version__
36 | log.setLevel(logging.INFO if IsRunInLocal else logging.WARN)
37 | 
38 | import web
39 | import jinja2
40 | from google.appengine.api import memcache
41 | 
42 | from lib.memcachestore import MemcacheStore
43 | 
44 | from apps.Work import *
45 | 
46 | from apps.utils import fix_filesizeformat
47 | 
48 | application = web.application(main.urls, globals())
49 | store = MemcacheStore(memcache)
50 | session = web.session.Session(application, store, initializer={'username':'', 'login':0, 'lang':'', 'pocket_request_token':''})
51 | jjenv = jinja2.Environment(loader=jinja2.FileSystemLoader('templates'),
52 |                             extensions=["jinja2.ext.do",'jinja2.ext.i18n'])
53 | jjenv.filters['filesizeformat'] = fix_filesizeformat
54 | 
55 | app = application.wsgifunc()
56 | 
57 | web.config.debug = IsRunInLocal
58 | 
59 | main.session = session
60 | main.jjenv = jjenv


--------------------------------------------------------------------------------
/lib/opml.py:
--------------------------------------------------------------------------------
 1 | # version : 0.5
 2 | # https://pypi.python.org/pypi/opml
 3 | import lxml.etree
 4 | 
 5 | class OutlineElement(object):
 6 |     """A single outline object."""
 7 | 
 8 |     def __init__(self, root):
 9 |         """Initialize from the root <outline> node."""
10 | 
11 |         self._root = root
12 | 
13 |     def __getattr__(self, attr):
14 | 
15 |         if attr in self._root.attrib:
16 |             return self._root.attrib[attr]
17 |         else:
18 |             return '' # added by cdhigh [2014.10.02]
19 | 
20 |         #raise AttributeError()
21 | 
22 |     @property
23 |     def _outlines(self):
24 |         """Return the available sub-outline objects as a seqeunce."""
25 | 
26 |         return [OutlineElement(n) for n in self._root.xpath('./outline')]
27 | 
28 |     def __len__(self):
29 |         return len(self._outlines)
30 | 
31 |     def __getitem__(self, index):
32 |         return self._outlines[index]
33 | 
34 | class Opml(object):
35 |     """Python representation of an OPML file."""
36 | 
37 |     def __init__(self, xml_tree):
38 |         """Initialize the object using the parsed XML tree."""
39 | 
40 |         self._tree = xml_tree
41 | 
42 |     def __getattr__(self, attr):
43 |         """Fall back attribute handler -- attempt to find the attribute in 
44 |         the OPML <head>."""
45 | 
46 |         result = self._tree.xpath('/opml/head/%s/text()' % attr)
47 |         if len(result) == 1:
48 |             return result[0]
49 |         
50 |         raise AttributeError()
51 | 
52 |     @property
53 |     def _outlines(self):
54 |         """Return the available sub-outline objects as a seqeunce."""
55 | 
56 |         return [OutlineElement(n) for n in self._tree.xpath(
57 |                 '/opml/body/outline')]
58 | 
59 |     def __len__(self):
60 |         return len(self._outlines)
61 | 
62 |     def __getitem__(self, index):
63 |         return self._outlines[index]
64 | 
65 | def from_string(opml_text):
66 | 
67 |     return Opml(lxml.etree.fromstring(opml_text))
68 | 
69 | def parse(opml_url):
70 | 
71 |     return Opml(lxml.etree.parse(opml_url))
72 | 
73 | 
74 | 
75 | 


--------------------------------------------------------------------------------
/templates/home.html:
--------------------------------------------------------------------------------
 1 | {% extends "base.html" %}
 2 | {% block css -%}
 3 | <style type="text/css">
 4 | p{
 5 |     color:#2d2d2d;
 6 |     /*text-align:center;*/
 7 |     line-height: 1.6em;
 8 |     margin:1em 0 0 0;
 9 | }
10 | .banner {
11 |     background-color:#0078e7;
12 |     text-align: center;
13 |     background-size: cover;
14 |     height: 100px;
15 |     width: 100%;
16 |     display: table;
17 | }
18 | 
19 | .banner-head {
20 |     display: table-cell;
21 |     vertical-align: middle;
22 |     margin-bottom: 0;
23 |     font-size: 1.5em;
24 |     color: white;
25 |     font-weight: 500;
26 | }
27 | .l-box {
28 |     padding: 0.5em 1em;
29 | }
30 | .information {
31 |     max-width: 800px;
32 |     margin: 0 auto;
33 | }
34 | .information-head {
35 |     color: black;
36 |     font-weight: 500;
37 | }
38 | 
39 | </style>
40 | {% endblock -%}
41 | {% block bodytag -%}
42 | <body onload="document.getElementById('u').focus();">
43 | {% endblock -%}
44 | {% block content -%}
45 |     <div class="banner">
46 |         <h1 class="banner-head">
47 |             {{_("Free Forever.")}} {{_("Good News Always Come With You.")}}
48 |         </h1>
49 |     </div>
50 |     <div class="pure-g-r">
51 |     <div class="information pure-u-1-2">
52 |         <div class="l-box">
53 |         <h3 class="information-head">{{_("Inherited From Calibre")}}</h3>
54 |         <p>{{_("Author modified and ported Calibre to generate mobi file in GAE without kindlegen tool of Amazon,")}}
55 |             {{_("for periodical mobi file is a better format to represent news feeds.")}}</p>
56 |         </div>
57 |     </div>
58 |     <div class="information pure-u-1-2">
59 |         <div class="l-box">
60 |         <h3 class="information-head">{{_("Share Your Idea")}}</h3>
61 |         <p>{{_("With my")}} <a href="https://github.com/cdhigh/kindleear" target="_blank" style="text-decoration:none;">
62 |             {{_("open source KindleEar application")}}</a>
63 |             {{_(", You can deploy your own server to push news feeds to your kindle dialy or share the service with your friends.")}}</p>
64 |         </div>
65 |     </div>
66 |     </div>
67 | {% endblock -%}


--------------------------------------------------------------------------------
/templates/advbase.html:
--------------------------------------------------------------------------------
 1 | {% extends "base.html" %}
 2 | {% block css -%}
 3 | <style type="text/css">
 4 | .newitem {
 5 |     width:96.7%;
 6 |     color: #555;
 7 | }
 8 | .tburl{
 9 |     border:1px solid silver;
10 |     cellspacing:0;
11 |     cellpadding:0;
12 |     width:100%;
13 | }
14 | td span{
15 |     display:none;
16 | }
17 | td:hover{
18 |     position:relative;
19 |     z-index:1;
20 | }
21 | td:hover span{
22 |     display:block;
23 |     background:#ffffe1;
24 |     border:1px solid #000000;
25 |     color:#333;
26 |     z-index:10;
27 |     position:absolute;
28 |     top:1.2em;
29 |     left:5px;
30 |     width:600px;
31 | }
32 | .m-box-l {
33 |     padding:0.5em 0.5em 0.5em 0;
34 | }
35 | .m-box-r {
36 |     padding:0.5em 0 0.5em 0.5em;
37 | }
38 | </style>
39 | {% endblock -%}
40 | {% block content -%}
41 | <div class="pure-skin-mine">
42 | <div class="pure-g">
43 |   <div class="pure-u-4-5">
44 |   <div class="m-box-l">
45 |    {% block advcontent -%}
46 |    {% endblock -%}
47 |   </div>
48 |   </div>
49 |   
50 |   <div class="pure-u-1-5">
51 |   <div class="m-box-r">
52 |    <div class="pure-menu pure-menu-open">
53 |     <ul>
54 |     {% if advcurr=='whitelist' -%}
55 |       <li class="pure-menu-selected"><a href="#">{{_("White List")}}</a>
56 |       </li>
57 |     {% else -%}
58 |       <li><a href="/advwhitelist">{{_("White List")}}</a></li>
59 |     {% endif -%}
60 |     {% if advcurr=='share' -%}
61 |       <li class="pure-menu-selected"><a href="#">{{_("Share")}}</a>
62 |       </li>
63 |     {% else -%}
64 |       <li><a href="/advarchive">{{_("Share")}}</a></li>
65 |     {% endif -%}
66 |     {% if advcurr=='urlfilter' -%}
67 |       <li class="pure-menu-selected"><a href="#">{{_("Url Filter")}}</a>
68 |       </li>
69 |     {% else -%}
70 |       <li><a href="/advurlfilter">{{_("Url Filter")}}</a></li>
71 |     {% endif -%}
72 |     {% if advcurr=='import' -%}
73 |       <li class="pure-menu-selected"><a href="#">{{_("Import Feeds")}}</a>
74 |       </li>
75 |     {% else -%}
76 |       <li><a href="/advimport">{{_("Import Feeds")}}</a></li>
77 |     {% endif -%}
78 |     </ul>
79 |    </div>
80 |   </div>
81 |   </div>
82 | </div>
83 | </div>
84 | {% endblock -%}


--------------------------------------------------------------------------------
/lib/chardet/charsetprober.py:
--------------------------------------------------------------------------------
 1 | ######################## BEGIN LICENSE BLOCK ########################
 2 | # The Original Code is Mozilla Universal charset detector code.
 3 | #
 4 | # The Initial Developer of the Original Code is
 5 | # Netscape Communications Corporation.
 6 | # Portions created by the Initial Developer are Copyright (C) 2001
 7 | # the Initial Developer. All Rights Reserved.
 8 | #
 9 | # Contributor(s):
10 | #   Mark Pilgrim - port to Python
11 | #   Shy Shalom - original C code
12 | #
13 | # This library is free software; you can redistribute it and/or
14 | # modify it under the terms of the GNU Lesser General Public
15 | # License as published by the Free Software Foundation; either
16 | # version 2.1 of the License, or (at your option) any later version.
17 | #
18 | # This library is distributed in the hope that it will be useful,
19 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
20 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
21 | # Lesser General Public License for more details.
22 | #
23 | # You should have received a copy of the GNU Lesser General Public
24 | # License along with this library; if not, write to the Free Software
25 | # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
26 | # 02110-1301  USA
27 | ######################### END LICENSE BLOCK #########################
28 | 
29 | from . import constants
30 | import re
31 | 
32 | 
33 | class CharSetProber:
34 |     def __init__(self):
35 |         pass
36 | 
37 |     def reset(self):
38 |         self._mState = constants.eDetecting
39 | 
40 |     def get_charset_name(self):
41 |         return None
42 | 
43 |     def feed(self, aBuf):
44 |         pass
45 | 
46 |     def get_state(self):
47 |         return self._mState
48 | 
49 |     def get_confidence(self):
50 |         return 0.0
51 | 
52 |     def filter_high_bit_only(self, aBuf):
53 |         aBuf = re.sub(b'([\x00-\x7F])+', b' ', aBuf)
54 |         return aBuf
55 | 
56 |     def filter_without_english_letters(self, aBuf):
57 |         aBuf = re.sub(b'([A-Za-z])+', b' ', aBuf)
58 |         return aBuf
59 | 
60 |     def filter_with_english_letters(self, aBuf):
61 |         # TODO
62 |         return aBuf
63 | 


--------------------------------------------------------------------------------
/lib/calibre/ebooks/unihandecode/__init__.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | __license__ = 'GPL 3'
 4 | __copyright__ = '2010, Hiroshi Miura <miurahr@linux.com>'
 5 | __docformat__ = 'restructuredtext en'
 6 | __all__ = ["Unihandecoder"]
 7 | 
 8 | '''
 9 | Decode unicode text to an ASCII representation of the text.
10 | Translate unicode characters to ASCII.
11 | 
12 | Inspired from John Schember's unidecode library which was created as part
13 | of calibre.
14 | 
15 | Copyright(c) 2009, John Schember
16 | 
17 | Tranliterate the string from unicode characters to ASCII in Chinese and others.
18 | 
19 | '''
20 | import unicodedata
21 | 
22 | class Unihandecoder(object):
23 |     preferred_encoding = None
24 |     decoder = None
25 | 
26 |     def __init__(self, lang="zh", encoding='utf-8'):
27 |         self.preferred_encoding = encoding
28 |         lang = lang.lower()
29 |         if lang[:2] == u'ja':
30 |             from calibre.ebooks.unihandecode.jadecoder import Jadecoder
31 |             self.decoder = Jadecoder()
32 |         elif lang[:2] == u'kr' or lang == u'korean':
33 |             from calibre.ebooks.unihandecode.krdecoder import Krdecoder
34 |             self.decoder = Krdecoder()
35 |         elif lang[:2] == u'vn' or lang == u'vietnum':
36 |             from calibre.ebooks.unihandecode.vndecoder import Vndecoder
37 |             self.decoder = Vndecoder()
38 |         else: #zh and others
39 |             from calibre.ebooks.unihandecode.unidecoder import Unidecoder
40 |             self.decoder = Unidecoder()
41 | 
42 |     def decode(self, text):
43 |         try:
44 |             unicode # python2
45 |             if not isinstance(text, unicode):
46 |                 try:
47 |                     text = unicode(text)
48 |                 except:
49 |                     try:
50 |                         text = text.decode(self.preferred_encoding)
51 |                     except:
52 |                         text = text.decode('utf-8', 'replace')
53 |         except: # python3, str is unicode
54 |             pass
55 |         #at first unicode normalize it. (see Unicode standards)
56 |         ntext = unicodedata.normalize('NFKC', text)
57 |         return self.decoder.decode(ntext)
58 | 


--------------------------------------------------------------------------------
/lib/chardet/mbcsgroupprober.py:
--------------------------------------------------------------------------------
 1 | ######################## BEGIN LICENSE BLOCK ########################
 2 | # The Original Code is Mozilla Universal charset detector code.
 3 | #
 4 | # The Initial Developer of the Original Code is
 5 | # Netscape Communications Corporation.
 6 | # Portions created by the Initial Developer are Copyright (C) 2001
 7 | # the Initial Developer. All Rights Reserved.
 8 | #
 9 | # Contributor(s):
10 | #   Mark Pilgrim - port to Python
11 | #   Shy Shalom - original C code
12 | #   Proofpoint, Inc.
13 | #
14 | # This library is free software; you can redistribute it and/or
15 | # modify it under the terms of the GNU Lesser General Public
16 | # License as published by the Free Software Foundation; either
17 | # version 2.1 of the License, or (at your option) any later version.
18 | #
19 | # This library is distributed in the hope that it will be useful,
20 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
21 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
22 | # Lesser General Public License for more details.
23 | #
24 | # You should have received a copy of the GNU Lesser General Public
25 | # License along with this library; if not, write to the Free Software
26 | # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
27 | # 02110-1301  USA
28 | ######################### END LICENSE BLOCK #########################
29 | 
30 | from .charsetgroupprober import CharSetGroupProber
31 | from .utf8prober import UTF8Prober
32 | from .sjisprober import SJISProber
33 | from .eucjpprober import EUCJPProber
34 | from .gb2312prober import GB2312Prober
35 | from .euckrprober import EUCKRProber
36 | from .cp949prober import CP949Prober
37 | from .big5prober import Big5Prober
38 | from .euctwprober import EUCTWProber
39 | 
40 | 
41 | class MBCSGroupProber(CharSetGroupProber):
42 |     def __init__(self):
43 |         CharSetGroupProber.__init__(self)
44 |         self._mProbers = [
45 |             UTF8Prober(),
46 |             SJISProber(),
47 |             EUCJPProber(),
48 |             GB2312Prober(),
49 |             EUCKRProber(),
50 |             CP949Prober(),
51 |             Big5Prober(),
52 |             EUCTWProber()
53 |         ]
54 |         self.reset()
55 | 


--------------------------------------------------------------------------------
/apps/View/Logs.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # -*- coding:utf-8 -*-
 3 | #A GAE web application to aggregate rss and send it to your kindle.
 4 | #Visit https://github.com/cdhigh/KindleEar for the latest version
 5 | #Contributors:
 6 | # rexdf <https://github.com/rexdf>
 7 | from operator import attrgetter
 8 | import datetime
 9 | from apps.BaseHandler import BaseHandler
10 | from apps.dbModels import *
11 | from apps.utils import etagged
12 | from google.appengine.api.datastore_errors import NeedIndexError
13 | 
14 | class Mylogs(BaseHandler):
15 |     __url__ = "/logs"
16 |     @etagged()
17 |     def GET(self):
18 |         user = self.getcurrentuser()
19 |         try:
20 |             mylogs = DeliverLog.all().filter("username = ", user.name).order('-time').fetch(limit=10)
21 |         except NeedIndexError: #很多人不会部署，经常出现没有建立索引的情况，干脆碰到这种情况直接消耗CPU时间自己排序得了
22 |             mylogsAll = sorted(DeliverLog.all().filter("username = ", user.name), key=attrgetter('time'), reverse=True)[:10]
23 |         logs = {}
24 |         if user.name == 'admin':
25 |             for u in KeUser.all().filter("name != ", 'admin'):
26 |                 try:
27 |                     ul = DeliverLog.all().filter("username = ", u.name).order('-time').fetch(limit=5)
28 |                 except NeedIndexError:
29 |                     ul = sorted(DeliverLog.all().filter("username = ", user.name), key=attrgetter('time'), reverse=True)[:5]
30 |                 if ul:
31 |                     logs[u.name] =  ul
32 |         return self.render('logs.html', "Deliver log", current='logs',
33 |             mylogs=mylogs, logs=logs)
34 |         
35 | class RemoveLogs(BaseHandler):
36 |     __url__ = "/removelogs"
37 |     def GET(self):
38 |         # 停止过期用户的推送
39 |         for user in KeUser.all().filter('enable_send = ', True):
40 |             if user.expires and (user.expires < datetime.datetime.utcnow()):
41 |                 user.enable_send = False
42 |                 user.put()
43 |         
44 |         query = DeliverLog.all()
45 |         query.filter('datetime < ', datetime.datetime.utcnow() - datetime.timedelta(days=25))
46 |         logs = query.fetch(1000)
47 |         c = len(logs)
48 |         db.delete(logs)
49 |         
50 |         return "%s lines log removed.<br />" % c


--------------------------------------------------------------------------------
/lib/calibre/utils/cleantext.py:
--------------------------------------------------------------------------------
 1 | __license__ = 'GPL 3'
 2 | __copyright__ = '2010, sengian <sengian1@gmail.com>'
 3 | __docformat__ = 'restructuredtext en'
 4 | 
 5 | import re, htmlentitydefs
 6 | from future_builtins import map
 7 | 
 8 | _ascii_pat = None
 9 | 
10 | def clean_ascii_chars(txt, charlist=None):
11 |     r'''
12 |     Remove ASCII control chars.
13 |     This is all control chars except \t, \n and \r
14 |     '''
15 |     if not txt:
16 |         return ''
17 |     global _ascii_pat
18 |     if _ascii_pat is None:
19 |         chars = set(xrange(32))
20 |         chars.add(127)
21 |         for x in (9, 10, 13):
22 |             chars.remove(x)
23 |         _ascii_pat = re.compile(u'|'.join(map(unichr, chars)))
24 | 
25 |     if charlist is None:
26 |         pat = _ascii_pat
27 |     else:
28 |         pat = re.compile(u'|'.join(map(unichr, charlist)))
29 |     return pat.sub('', txt)
30 | 
31 | def allowed(x):
32 |     x = ord(x)
33 |     return (x != 127 and (31 < x < 0xd7ff or x in (9, 10, 13))) or (0xe000 < x < 0xfffd) or (0x10000 < x < 0x10ffff)
34 | 
35 | def clean_xml_chars(unicode_string):
36 |     return u''.join(filter(allowed, unicode_string))
37 | 
38 | 
39 | # Fredrik Lundh: http://effbot.org/zone/re-sub.htm#unescape-html
40 | # Removes HTML or XML character references and entities from a text string.
41 | #
42 | # @param text The HTML (or XML) source text.
43 | # @return The plain text, as a Unicode string, if necessary.
44 | 
45 | def unescape(text, rm=False, rchar=u''):
46 |     def fixup(m, rm=rm, rchar=rchar):
47 |         text = m.group(0)
48 |         if text[:2] == "&#":
49 |             # character reference
50 |             try:
51 |                 if text[:3] == "&#x":
52 |                     return unichr(int(text[3:-1], 16))
53 |                 else:
54 |                     return unichr(int(text[2:-1]))
55 |             except ValueError:
56 |                 pass
57 |         else:
58 |             # named entity
59 |             try:
60 |                 text = unichr(htmlentitydefs.name2codepoint[text[1:-1]])
61 |             except KeyError:
62 |                 pass
63 |         if rm:
64 |             return rchar  # replace by char
65 |         return text  # leave as is
66 |     return re.sub("&#?\w+;", fixup, text)
67 | 
68 | 


--------------------------------------------------------------------------------
/lib/cssutils/css/__init__.py:
--------------------------------------------------------------------------------
 1 | """Implements Document Object Model Level 2 CSS
 2 | http://www.w3.org/TR/2000/PR-DOM-Level-2-Style-20000927/css.html
 3 | 
 4 | currently implemented
 5 |     - CSSStyleSheet
 6 |     - CSSRuleList
 7 |     - CSSRule
 8 |     - CSSComment (cssutils addon)
 9 |     - CSSCharsetRule
10 |     - CSSFontFaceRule
11 |     - CSSImportRule
12 |     - CSSMediaRule
13 |     - CSSNamespaceRule (WD)
14 |     - CSSPageRule
15 |     - CSSStyleRule
16 |     - CSSUnkownRule
17 |     - Selector and SelectorList
18 |     - CSSStyleDeclaration
19 |     - CSS2Properties
20 |     - CSSValue
21 |     - CSSPrimitiveValue
22 |     - CSSValueList
23 |     - CSSVariablesRule
24 |     - CSSVariablesDeclaration
25 | 
26 | todo
27 |     - RGBColor, Rect, Counter
28 | """
29 | __all__ = [
30 |     'CSSStyleSheet',
31 |     'CSSRuleList',
32 |     'CSSRule',
33 |     'CSSComment',
34 |     'CSSCharsetRule',
35 |     'CSSFontFaceRule'
36 |     'CSSImportRule',
37 |     'CSSMediaRule',
38 |     'CSSNamespaceRule',
39 |     'CSSPageRule',
40 |     'MarginRule',
41 |     'CSSStyleRule',
42 |     'CSSUnknownRule',
43 |     'CSSVariablesRule',
44 |     'CSSVariablesDeclaration',
45 |     'Selector', 'SelectorList',
46 |     'CSSStyleDeclaration', 'Property',
47 |     #'CSSValue', 'CSSPrimitiveValue', 'CSSValueList'
48 |     'PropertyValue',
49 |            'Value',
50 |            'ColorValue',
51 |            'DimensionValue',
52 |            'URIValue',
53 |            'CSSFunction', 
54 |            'CSSVariable',
55 |            'MSValue'
56 |     ]
57 | __docformat__ = 'restructuredtext'
58 | __version__ = '$Id$'
59 | 
60 | from cssstylesheet import *
61 | from cssrulelist import *
62 | from cssrule import *
63 | from csscomment import *
64 | from csscharsetrule import *
65 | from cssfontfacerule import *
66 | from cssimportrule import *
67 | from cssmediarule import *
68 | from cssnamespacerule import *
69 | from csspagerule import *
70 | from marginrule import *
71 | from cssstylerule import *
72 | from cssvariablesrule import *
73 | from cssunknownrule import *
74 | from selector import *
75 | from selectorlist import *
76 | from cssstyledeclaration import *
77 | from cssvariablesdeclaration import *
78 | from property import *
79 | #from cssvalue import *
80 | from value import *
81 | 


--------------------------------------------------------------------------------
/lib/cssutils/css/cssrulelist.py:
--------------------------------------------------------------------------------
 1 | """CSSRuleList implements DOM Level 2 CSS CSSRuleList.
 2 | Partly also http://dev.w3.org/csswg/cssom/#the-cssrulelist."""
 3 | __all__ = ['CSSRuleList']
 4 | __docformat__ = 'restructuredtext'
 5 | __version__ = '$Id$'
 6 | 
 7 | class CSSRuleList(list):
 8 |     """The CSSRuleList object represents an (ordered) list of statements.
 9 | 
10 |     The items in the CSSRuleList are accessible via an integral index,
11 |     starting from 0.
12 | 
13 |     Subclasses a standard Python list so theoretically all standard list
14 |     methods are available. Setting methods like ``__init__``, ``append``,
15 |     ``extend`` or ``__setslice__`` are added later on instances of this
16 |     class if so desired.
17 |     E.g. CSSStyleSheet adds ``append`` which is not available in a simple
18 |     instance of this class! 
19 |     """
20 |     def __init__(self, *ignored):
21 |         "Nothing is set as this must also be defined later."
22 |         pass
23 |     
24 |     def __notimplemented(self, *ignored):
25 |         "Implemented in class using a CSSRuleList only."
26 |         raise NotImplementedError(
27 |             'Must be implemented by class using an instance of this class.')
28 |     
29 |     append = extend = __setitem__ = __setslice__ = __notimplemented
30 |     
31 |     def item(self, index):
32 |         """(DOM) Retrieve a CSS rule by ordinal `index`. The order in this
33 |         collection represents the order of the rules in the CSS style
34 |         sheet. If index is greater than or equal to the number of rules in
35 |         the list, this returns None.
36 | 
37 |         Returns CSSRule, the style rule at the index position in the
38 |         CSSRuleList, or None if that is not a valid index.
39 |         """
40 |         try:
41 |             return self[index]
42 |         except IndexError:
43 |             return None
44 | 
45 |     length = property(lambda self: len(self),
46 |                       doc=u"(DOM) The number of CSSRules in the list.")
47 | 
48 |     def rulesOfType(self, type):
49 |         """Yield the rules which have the given `type` only, one of the 
50 |         constants defined in :class:`cssutils.css.CSSRule`."""
51 |         for r in self:
52 |             if r.type == type:
53 |                 yield r 
54 | 


--------------------------------------------------------------------------------
/lib/cssutils/scripts/cssparse.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | """utility script to parse given filenames or string
 3 | """
 4 | __docformat__ = 'restructuredtext'
 5 | __version__ = '$Id$'
 6 | 
 7 | import cssutils
 8 | import logging
 9 | import optparse
10 | import sys
11 | 
12 | def main(args=None):
13 |     """
14 |     Parses given filename(s) or string or URL (using optional encoding) and 
15 |     prints the parsed style sheet to stdout.
16 | 
17 |     Redirect stdout to save CSS. Redirect stderr to save parser log infos.
18 |     """
19 |     usage = """usage: %prog [options] filename1.css [filename2.css ...]
20 |         [>filename_combined.css] [2>parserinfo.log] """
21 |     p = optparse.OptionParser(usage=usage)
22 |     p.add_option('-s', '--string', action='store_true', dest='string',
23 |         help='parse given string')
24 |     p.add_option('-u', '--url', action='store', dest='url',
25 |         help='parse given url')
26 |     p.add_option('-e', '--encoding', action='store', dest='encoding',
27 |         help='encoding of the file or override encoding found')
28 |     p.add_option('-m', '--minify', action='store_true', dest='minify',
29 |         help='minify parsed CSS', default=False)
30 |     p.add_option('-d', '--debug', action='store_true', dest='debug',
31 |         help='activate debugging output')
32 | 
33 |     (options, params) = p.parse_args(args)
34 | 
35 |     if not params and not options.url:
36 |         p.error("no filename given")
37 | 
38 |     if options.debug:
39 |         p = cssutils.CSSParser(loglevel=logging.DEBUG)
40 |     else:
41 |         p = cssutils.CSSParser()
42 | 
43 |     if options.minify:
44 |         cssutils.ser.prefs.useMinified()
45 | 
46 |     if options.string:
47 |         sheet = p.parseString(u''.join(params), encoding=options.encoding)
48 |         print sheet.cssText
49 |     elif options.url:
50 |         sheet = p.parseUrl(options.url, encoding=options.encoding)
51 |         print sheet.cssText
52 |     else:
53 |         for filename in params:
54 |             sys.stderr.write('=== CSS FILE: "%s" ===\n' % filename)
55 |             sheet = p.parseFile(filename, encoding=options.encoding)
56 |             print sheet.cssText
57 |             print
58 |             sys.stderr.write('\n')
59 | 
60 | 
61 | if __name__ == "__main__":
62 |      sys.exit(main())
63 | 


--------------------------------------------------------------------------------
/books/nfzm.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # -*- coding:utf-8 -*-
 3 | from bs4 import BeautifulSoup
 4 | from base import BaseFeedBook, URLOpener, string_of_tag
 5 | 
 6 | def getBook():
 7 |     return NFZM
 8 | 
 9 | class NFZM(BaseFeedBook):
10 |     title               = u'南方周末'
11 |     description         = u'在这里读懂中国 | 每周五更新 | 需要登录'
12 |     __author__          = 'mcfloundinho'
13 |     language            = 'zh-cn'
14 |     feed_encoding       = "utf-8"
15 |     page_encoding       = "utf-8"
16 |     mastheadfile        = "mh_nfzm.gif"
17 |     coverfile           = "cv_nfzm.jpg"
18 |     deliver_days        = ['Friday']
19 |     needs_subscription  = True
20 | 
21 |     def ParseFeedUrls(self):
22 |         login_url = "http://passport.infzm.com/passport/login"
23 |         content_url = "http://www.infzm.com/enews/infzm"
24 |         urls = []
25 |         opener = URLOpener(self.host, timeout=60)
26 |         login_form = {"loginname":self.account, "password":self.password}
27 |         login_response = opener.open(login_url, data=login_form)
28 |         #opener.SaveCookies(login_response.header_msg.getheaders('Set-Cookie'))
29 |         result = opener.open(content_url)
30 |         content = result.content.decode(self.feed_encoding)
31 |         soup = BeautifulSoup(content, "lxml")
32 |         sec_titles = []
33 |         for sec_name in soup.find_all('h2'):
34 |             sec_titles.append(sec_name.get_text())
35 |         for top_news in soup.find_all('dl', {'class': 'topnews'}):
36 |             url = top_news.a['href']
37 |             feed_content = opener.open(url).content.decode(self.feed_encoding)
38 |             feed_soup = BeautifulSoup(feed_content, "lxml")
39 |             urls.append(
40 |                 (sec_titles[0], top_news.a['title'], url, feed_soup.find(id="articleContent")))
41 |         sec_count = 0
42 |         for sec_content in soup.find_all('ul', {'class': 'relnews'}):
43 |             for a in sec_content.find_all('a'):
44 |                 url = a['href']
45 |                 feed_content = opener.open(
46 |                     url).content.decode(self.feed_encoding)
47 |                 feed_soup = BeautifulSoup(feed_content, "lxml")
48 |                 urls.append(
49 |                     (sec_titles[sec_count], a['title'], url, feed_soup.find(id="articleContent")))
50 |             sec_count += 1
51 |         return urls
52 | 


--------------------------------------------------------------------------------
/lib/web/wsgi.py:
--------------------------------------------------------------------------------
 1 | """
 2 | WSGI Utilities
 3 | (from web.py)
 4 | """
 5 | 
 6 | import os, sys
 7 | 
 8 | import http
 9 | import webapi as web
10 | from utils import listget
11 | from net import validaddr, validip
12 | import httpserver
13 |     
14 | def runfcgi(func, addr=('localhost', 8000)):
15 |     """Runs a WSGI function as a FastCGI server."""
16 |     import flup.server.fcgi as flups
17 |     return flups.WSGIServer(func, multiplexed=True, bindAddress=addr, debug=False).run()
18 | 
19 | def runscgi(func, addr=('localhost', 4000)):
20 |     """Runs a WSGI function as an SCGI server."""
21 |     import flup.server.scgi as flups
22 |     return flups.WSGIServer(func, bindAddress=addr, debug=False).run()
23 | 
24 | def runwsgi(func):
25 |     """
26 |     Runs a WSGI-compatible `func` using FCGI, SCGI, or a simple web server,
27 |     as appropriate based on context and `sys.argv`.
28 |     """
29 |     
30 |     if os.environ.has_key('SERVER_SOFTWARE'): # cgi
31 |         os.environ['FCGI_FORCE_CGI'] = 'Y'
32 | 
33 |     if (os.environ.has_key('PHP_FCGI_CHILDREN') #lighttpd fastcgi
34 |       or os.environ.has_key('SERVER_SOFTWARE')):
35 |         return runfcgi(func, None)
36 |     
37 |     if 'fcgi' in sys.argv or 'fastcgi' in sys.argv:
38 |         args = sys.argv[1:]
39 |         if 'fastcgi' in args: args.remove('fastcgi')
40 |         elif 'fcgi' in args: args.remove('fcgi')
41 |         if args:
42 |             return runfcgi(func, validaddr(args[0]))
43 |         else:
44 |             return runfcgi(func, None)
45 |     
46 |     if 'scgi' in sys.argv:
47 |         args = sys.argv[1:]
48 |         args.remove('scgi')
49 |         if args:
50 |             return runscgi(func, validaddr(args[0]))
51 |         else:
52 |             return runscgi(func)
53 |     
54 |     return httpserver.runsimple(func, validip(listget(sys.argv, 1, '')))
55 |     
56 | def _is_dev_mode():
57 |     # Some embedded python interpreters won't have sys.arv
58 |     # For details, see https://github.com/webpy/webpy/issues/87
59 |     argv = getattr(sys, "argv", [])
60 | 
61 |     # quick hack to check if the program is running in dev mode.
62 |     if os.environ.has_key('SERVER_SOFTWARE') \
63 |         or os.environ.has_key('PHP_FCGI_CHILDREN') \
64 |         or 'fcgi' in argv or 'fastcgi' in argv \
65 |         or 'mod_wsgi' in argv:
66 |             return False
67 |     return True
68 | 
69 | # When running the builtin-server, enable debug mode if not already set.
70 | web.config.setdefault('debug', _is_dev_mode())
71 | 


--------------------------------------------------------------------------------
/lib/chardet/codingstatemachine.py:
--------------------------------------------------------------------------------
 1 | ######################## BEGIN LICENSE BLOCK ########################
 2 | # The Original Code is mozilla.org code.
 3 | #
 4 | # The Initial Developer of the Original Code is
 5 | # Netscape Communications Corporation.
 6 | # Portions created by the Initial Developer are Copyright (C) 1998
 7 | # the Initial Developer. All Rights Reserved.
 8 | #
 9 | # Contributor(s):
10 | #   Mark Pilgrim - port to Python
11 | #
12 | # This library is free software; you can redistribute it and/or
13 | # modify it under the terms of the GNU Lesser General Public
14 | # License as published by the Free Software Foundation; either
15 | # version 2.1 of the License, or (at your option) any later version.
16 | #
17 | # This library is distributed in the hope that it will be useful,
18 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
19 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
20 | # Lesser General Public License for more details.
21 | #
22 | # You should have received a copy of the GNU Lesser General Public
23 | # License along with this library; if not, write to the Free Software
24 | # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
25 | # 02110-1301  USA
26 | ######################### END LICENSE BLOCK #########################
27 | 
28 | from .constants import eStart
29 | from .compat import wrap_ord
30 | 
31 | 
32 | class CodingStateMachine:
33 |     def __init__(self, sm):
34 |         self._mModel = sm
35 |         self._mCurrentBytePos = 0
36 |         self._mCurrentCharLen = 0
37 |         self.reset()
38 | 
39 |     def reset(self):
40 |         self._mCurrentState = eStart
41 | 
42 |     def next_state(self, c):
43 |         # for each byte we get its class
44 |         # if it is first byte, we also get byte length
45 |         # PY3K: aBuf is a byte stream, so c is an int, not a byte
46 |         byteCls = self._mModel['classTable'][wrap_ord(c)]
47 |         if self._mCurrentState == eStart:
48 |             self._mCurrentBytePos = 0
49 |             self._mCurrentCharLen = self._mModel['charLenTable'][byteCls]
50 |         # from byte's class and stateTable, we get its next state
51 |         curr_state = (self._mCurrentState * self._mModel['classFactor']
52 |                       + byteCls)
53 |         self._mCurrentState = self._mModel['stateTable'][curr_state]
54 |         self._mCurrentBytePos += 1
55 |         return self._mCurrentState
56 | 
57 |     def get_current_charlen(self):
58 |         return self._mCurrentCharLen
59 | 
60 |     def get_coding_state_machine(self):
61 |         return self._mModel['name']
62 | 


--------------------------------------------------------------------------------
/lib/cssutils/scripts/csscapture.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | """Retrieve all CSS stylesheets including embedded for a given URL.
 3 | Retrieve as StyleSheetList or save to disk - raw, parsed or minified version.
 4 | 
 5 | TODO:
 6 | - maybe use DOM 3 load/save?
 7 | - logger class which handles all cases when no log is given...
 8 | - saveto: why does urllib2 hang?
 9 | """
10 | __all__ = ['CSSCapture']
11 | __docformat__ = 'restructuredtext'
12 | __version__ = '$Id$'
13 | 
14 | from cssutils.script import CSSCapture
15 | import logging
16 | import optparse
17 | import sys
18 | 
19 | def main(args=None):
20 |     usage = "usage: %prog [options] URL"
21 |     parser = optparse.OptionParser(usage=usage)
22 |     parser.add_option('-d', '--debug', action='store_true', dest='debug',
23 |         help='show debug messages during capturing')
24 |     parser.add_option('-m', '--minified', action='store_true', dest='minified',
25 |         help='saves minified version of captured files')
26 |     parser.add_option('-n', '--notsave', action='store_true', dest='notsave',
27 |         help='if given files are NOT saved, only log is written')
28 | #    parser.add_option('-r', '--saveraw', action='store_true', dest='saveraw',
29 | #        help='if given saves raw css otherwise cssutils\' parsed files')
30 |     parser.add_option('-s', '--saveto', action='store', dest='saveto',
31 |         help='saving retrieved files to "saveto", defaults to "_CSSCapture_SAVED"')
32 |     parser.add_option('-u', '--useragent', action='store', dest='ua',
33 |         help='useragent to use for request of URL, default is urllib2s default')
34 |     options, url = parser.parse_args()
35 |     
36 |     # TODO:
37 |     options.saveraw = False 
38 | 
39 |     if not url:
40 |         parser.error('no URL given')
41 |     else:
42 |         url = url[0]
43 | 
44 |     if options.debug:
45 |         level = logging.DEBUG
46 |     else:
47 |         level = logging.INFO
48 | 
49 |     # START
50 |     c = CSSCapture(ua=options.ua, defaultloglevel=level)
51 | 
52 |     stylesheetlist = c.capture(url)
53 | 
54 |     if options.notsave is None or not options.notsave:
55 |         if options.saveto:
56 |             saveto = options.saveto
57 |         else:
58 |             saveto = u'_CSSCapture_SAVED'
59 |         c.saveto(saveto, saveraw=options.saveraw, minified=options.minified)
60 |     else:
61 |         for i, s in enumerate(stylesheetlist):
62 |             print u'''%s.
63 |     encoding: %r
64 |     title: %r
65 |     href: %r''' % (i + 1, s.encoding, s.title, s.href)
66 | 
67 | 
68 | if __name__ == "__main__":
69 |     sys.exit(main())
70 | 


--------------------------------------------------------------------------------
/lib/bs4/tests/test_lxml.py:
--------------------------------------------------------------------------------
 1 | """Tests to ensure that the lxml tree builder generates good trees."""
 2 | 
 3 | import re
 4 | import warnings
 5 | 
 6 | try:
 7 |     import lxml.etree
 8 |     LXML_PRESENT = True
 9 |     LXML_VERSION = lxml.etree.LXML_VERSION
10 | except ImportError, e:
11 |     LXML_PRESENT = False
12 |     LXML_VERSION = (0,)
13 | 
14 | if LXML_PRESENT:
15 |     from bs4.builder import LXMLTreeBuilder, LXMLTreeBuilderForXML
16 | 
17 | from bs4 import (
18 |     BeautifulSoup,
19 |     BeautifulStoneSoup,
20 |     )
21 | from bs4.element import Comment, Doctype, SoupStrainer
22 | from bs4.testing import skipIf
23 | from bs4.tests import test_htmlparser
24 | from bs4.testing import (
25 |     HTMLTreeBuilderSmokeTest,
26 |     XMLTreeBuilderSmokeTest,
27 |     SoupTest,
28 |     skipIf,
29 | )
30 | 
31 | @skipIf(
32 |     not LXML_PRESENT,
33 |     "lxml seems not to be present, not testing its tree builder.")
34 | class LXMLTreeBuilderSmokeTest(SoupTest, HTMLTreeBuilderSmokeTest):
35 |     """See ``HTMLTreeBuilderSmokeTest``."""
36 | 
37 |     @property
38 |     def default_builder(self):
39 |         return LXMLTreeBuilder()
40 | 
41 |     def test_out_of_range_entity(self):
42 |         self.assertSoupEquals(
43 |             "<p>foo&#10000000000000;bar</p>", "<p>foobar</p>")
44 |         self.assertSoupEquals(
45 |             "<p>foo&#x10000000000000;bar</p>", "<p>foobar</p>")
46 |         self.assertSoupEquals(
47 |             "<p>foo&#1000000000;bar</p>", "<p>foobar</p>")
48 | 
49 |     # In lxml < 2.3.5, an empty doctype causes a segfault. Skip this
50 |     # test if an old version of lxml is installed.
51 | 
52 |     @skipIf(
53 |         not LXML_PRESENT or LXML_VERSION < (2,3,5,0),
54 |         "Skipping doctype test for old version of lxml to avoid segfault.")
55 |     def test_empty_doctype(self):
56 |         soup = self.soup("<!DOCTYPE>")
57 |         doctype = soup.contents[0]
58 |         self.assertEqual("", doctype.strip())
59 | 
60 |     def test_beautifulstonesoup_is_xml_parser(self):
61 |         # Make sure that the deprecated BSS class uses an xml builder
62 |         # if one is installed.
63 |         with warnings.catch_warnings(record=True) as w:
64 |             soup = BeautifulStoneSoup("<b />")
65 |         self.assertEqual(u"<b/>", unicode(soup.b))
66 |         self.assertTrue("BeautifulStoneSoup class is deprecated" in str(w[0].message))
67 | 
68 | @skipIf(
69 |     not LXML_PRESENT,
70 |     "lxml seems not to be present, not testing its XML tree builder.")
71 | class LXMLXMLTreeBuilderSmokeTest(SoupTest, XMLTreeBuilderSmokeTest):
72 |     """See ``HTMLTreeBuilderSmokeTest``."""
73 | 
74 |     @property
75 |     def default_builder(self):
76 |         return LXMLTreeBuilderForXML()
77 | 


--------------------------------------------------------------------------------
/apps/View/Setting.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # -*- coding:utf-8 -*-
 3 | #A GAE web application to aggregate rss and send it to your kindle.
 4 | #Visit https://github.com/cdhigh/KindleEar for the latest version
 5 | #Contributors:
 6 | # rexdf <https://github.com/rexdf>
 7 | 
 8 | import gettext
 9 | 
10 | import web
11 | 
12 | from apps.BaseHandler import BaseHandler
13 | from apps.dbModels import *
14 | from apps.utils import etagged
15 | from config import *
16 | 
17 | #import main
18 | 
19 | class Setting(BaseHandler):
20 |     __url__ = "/setting"
21 |     @etagged()
22 |     def GET(self, tips=None):
23 |         user = self.getcurrentuser()
24 |         return self.render('setting.html',"Setting",
25 |             current='setting',user=user,mail_sender=SRC_EMAIL,tips=tips)
26 |         
27 |     def POST(self):
28 |         user = self.getcurrentuser()
29 |         kemail = web.input().get('kindleemail')
30 |         mytitle = web.input().get("rt")
31 |         if not kemail:
32 |             tips = _("Kindle E-mail is requied!")
33 |         elif not mytitle:
34 |             tips = _("Title is requied!")
35 |         else:
36 |             user.kindle_email = kemail
37 |             user.timezone = int(web.input().get('timezone', TIMEZONE))
38 |             user.send_time = int(web.input().get('sendtime'))
39 |             user.enable_send = bool(web.input().get('enablesend'))
40 |             user.book_type = web.input().get('booktype')
41 |             user.device = web.input().get('devicetype') or 'kindle'
42 |             user.use_title_in_feed = bool(web.input().get('titlefrom') == 'feed')
43 |             user.titlefmt = web.input().get('titlefmt')
44 |             alldays = ['Monday','Tuesday','Wednesday','Thursday','Friday','Saturday','Sunday']
45 |             user.send_days = [day for day in alldays if web.input().get(day)]
46 |             user.merge_books = bool(web.input().get('mergebooks'))
47 |             user.put()
48 |             
49 |             myfeeds = user.ownfeeds
50 |             myfeeds.language = web.input().get("lng")
51 |             myfeeds.title = mytitle
52 |             myfeeds.keep_image = bool(web.input().get("keepimage"))
53 |             myfeeds.oldest_article = int(web.input().get('oldest', 7))
54 |             myfeeds.users = [user.name] if web.input().get("enablerss") else []
55 |             myfeeds.put()
56 |             tips = _("Settings Saved!")
57 |         
58 |         return self.GET(tips)
59 | 
60 | class SetLang(BaseHandler):
61 |     __url__ = "/lang/(.*)"
62 |     def GET(self, lang):
63 |         lang = lang.lower()
64 |         if lang not in main.supported_languages:
65 |             return "language invalid!"
66 |         main.session.lang = lang
67 |         raise web.seeother(r'/')


--------------------------------------------------------------------------------
/books/Gongshi.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # -*- coding:utf-8 -*-
 3 | 
 4 | from bs4 import BeautifulSoup
 5 | from base import BaseFeedBook, URLOpener
 6 | 
 7 | 
 8 | def getBook():
 9 |     return Gongshi
10 | 
11 | 
12 | class Gongshi(BaseFeedBook):
13 |     title               = u'共识网一周排行'
14 |     description         = u'共识网—在大变革时代寻找共识 | 每周六推送。'
15 |     language            = 'zh-cn'
16 |     feed_encoding       = "gbk"
17 |     page_encoding       = "gbk"
18 |     mastheadfile        = "mh_gongshi.gif"
19 |     coverfile           = 'cv_gongshi.jpg'
20 |     deliver_days        = ['Saturday']
21 | 
22 |     def FetchDesc(self, url):
23 |         opener = URLOpener(self.host, timeout=60)
24 |         result = opener.open(url)
25 |         if result.status_code != 200:
26 |             self.log.warn('fetch article failed(%d):%s.' % (status_code, url))
27 |             return None
28 |         content = result.content.decode(self.feed_encoding)
29 |         soup = BeautifulSoup(content, 'lxml')
30 |         abstract = unicode(soup.find('div', attrs={'class': 'zhaiyao'}))
31 |         article = unicode(soup.find(id='contents'))
32 |         pagelist = soup.find('ul', attrs={'class': 'pagelist'})
33 |         if pagelist and pagelist.find('li'):
34 |             page_count_context = pagelist.a.text
35 |             page_count = int(
36 |                 page_count_context[1:page_count_context.index(u'页')])
37 |             for i in range(2, page_count + 1):
38 |                 page_url = url[:-5] + "_%d.html" % i
39 |                 result = opener.open(page_url)
40 |                 if result.status_code != 200:
41 |                     self.log.warn(
42 |                         'fetch page failed(%d):%s.' % (status_code, page_url))
43 |                     return None
44 |                 content = result.content.decode(self.feed_encoding)
45 |                 pagesoup = BeautifulSoup(content, 'lxml')
46 |                 article += unicode(pagesoup.find(id='contents'))
47 |         return abstract + article
48 | 
49 |     def ParseFeedUrls(self):
50 |         mainurl = "http://www.21ccom.net/articles/china/"
51 |         urls = []
52 |         opener = URLOpener(self.host, timeout=60)
53 |         result = opener.open(mainurl)
54 |         if result.status_code != 200:
55 |             self.log.warn('fetch rss failed:%s' % mainurl)
56 |             return []
57 |         content = result.content.decode(self.feed_encoding)
58 |         soup = BeautifulSoup(content, "lxml")
59 |         # Get the 2nd block
60 |         ul = soup.find_all('ul', attrs={'class': ['m-list', 'list-tweet']})[1]
61 |         for li in ul.find_all('li'):
62 |             urls.append(
63 |                 (u'共识网一周排行', li.a.text, li.a['href'], self.FetchDesc(li.a['href'])))
64 |         return urls
65 | 


--------------------------------------------------------------------------------
/lib/chardet/chardetect.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | """
 3 | Script which takes one or more file paths and reports on their detected
 4 | encodings
 5 | 
 6 | Example::
 7 | 
 8 |     % chardetect somefile someotherfile
 9 |     somefile: windows-1252 with confidence 0.5
10 |     someotherfile: ascii with confidence 1.0
11 | 
12 | If no paths are provided, it takes its input from stdin.
13 | 
14 | """
15 | 
16 | from __future__ import absolute_import, print_function, unicode_literals
17 | 
18 | import argparse
19 | import sys
20 | from io import open
21 | 
22 | from chardet import __version__
23 | from chardet.universaldetector import UniversalDetector
24 | 
25 | 
26 | def description_of(lines, name='stdin'):
27 |     """
28 |     Return a string describing the probable encoding of a file or
29 |     list of strings.
30 | 
31 |     :param lines: The lines to get the encoding of.
32 |     :type lines: Iterable of bytes
33 |     :param name: Name of file or collection of lines
34 |     :type name: str
35 |     """
36 |     u = UniversalDetector()
37 |     for line in lines:
38 |         u.feed(line)
39 |     u.close()
40 |     result = u.result
41 |     if result['encoding']:
42 |         return '{0}: {1} with confidence {2}'.format(name, result['encoding'],
43 |                                                      result['confidence'])
44 |     else:
45 |         return '{0}: no result'.format(name)
46 | 
47 | 
48 | def main(argv=None):
49 |     '''
50 |     Handles command line arguments and gets things started.
51 | 
52 |     :param argv: List of arguments, as if specified on the command-line.
53 |                  If None, ``sys.argv[1:]`` is used instead.
54 |     :type argv: list of str
55 |     '''
56 |     # Get command line arguments
57 |     parser = argparse.ArgumentParser(
58 |         description="Takes one or more file paths and reports their detected \
59 |                      encodings",
60 |         formatter_class=argparse.ArgumentDefaultsHelpFormatter,
61 |         conflict_handler='resolve')
62 |     parser.add_argument('input',
63 |                         help='File whose encoding we would like to determine.',
64 |                         type=argparse.FileType('rb'), nargs='*',
65 |                         default=[sys.stdin])
66 |     parser.add_argument('--version', action='version',
67 |                         version='%(prog)s {0}'.format(__version__))
68 |     args = parser.parse_args(argv)
69 | 
70 |     for f in args.input:
71 |         if f.isatty():
72 |             print("You are running chardetect interactively. Press " +
73 |                   "CTRL-D twice at the start of a blank line to signal the " +
74 |                   "end of your input. If you want help, run chardetect " +
75 |                   "--help\n", file=sys.stderr)
76 |         print(description_of(f, f.name))
77 | 
78 | 
79 | if __name__ == '__main__':
80 |     main()
81 | 


--------------------------------------------------------------------------------
/lib/cssutils/_fetchgae.py:
--------------------------------------------------------------------------------
 1 | """GAE specific URL reading functions"""
 2 | __all__ = ['_defaultFetcher']
 3 | __docformat__ = 'restructuredtext'
 4 | __version__ = '$Id: tokenize2.py 1547 2008-12-10 20:42:26Z cthedot $'
 5 | 
 6 | # raises ImportError of not on GAE
 7 | from google.appengine.api import urlfetch
 8 | import cgi
 9 | import errorhandler
10 | import util
11 | 
12 | log = errorhandler.ErrorHandler()
13 | 
14 | def _defaultFetcher(url):
15 |     """
16 |     uses GoogleAppEngine (GAE)
17 |         fetch(url, payload=None, method=GET, headers={}, allow_truncated=False)
18 | 
19 |     Response
20 |         content
21 |             The body content of the response.
22 |         content_was_truncated
23 |             True if the allow_truncated parameter to fetch() was True and
24 |             the response exceeded the maximum response size. In this case,
25 |             the content attribute contains the truncated response.
26 |         status_code
27 |             The HTTP status code.
28 |         headers
29 |             The HTTP response headers, as a mapping of names to values.
30 | 
31 |     Exceptions
32 |         exception InvalidURLError()
33 |             The URL of the request was not a valid URL, or it used an
34 |             unsupported method. Only http and https URLs are supported.
35 |         exception DownloadError()
36 |             There was an error retrieving the data.
37 | 
38 |             This exception is not raised if the server returns an HTTP
39 |             error code: In that case, the response data comes back intact,
40 |             including the error code.
41 | 
42 |         exception ResponseTooLargeError()
43 |             The response data exceeded the maximum allowed size, and the
44 |             allow_truncated parameter passed to fetch() was False.
45 |     """
46 |     #from google.appengine.api import urlfetch
47 |     try:
48 |         r = urlfetch.fetch(url, method=urlfetch.GET)
49 |     except urlfetch.Error, e:
50 |         log.warn(u'Error opening url=%r: %s' % (url, e),
51 |                           error=IOError)
52 |     else:
53 |         if r.status_code == 200:
54 |             # find mimetype and encoding
55 |             mimetype = 'application/octet-stream'
56 |             try:
57 |                 mimetype, params = cgi.parse_header(r.headers['content-type'])
58 |                 encoding = params['charset']
59 |             except KeyError:
60 |                 encoding = None
61 |             if mimetype != u'text/css':
62 |                 log.error(u'Expected "text/css" mime type for url %r but found: %r' % 
63 |                               (url, mimetype), error=ValueError)
64 |             return encoding, r.content
65 |         else:
66 |             # TODO: 301 etc
67 |             log.warn(u'Error opening url=%r: HTTP status %s' % 
68 |                               (url, r.status_code), error=IOError)
69 | 


--------------------------------------------------------------------------------
/lib/web/wsgiserver/ssl_builtin.py:
--------------------------------------------------------------------------------
 1 | """A library for integrating Python's builtin ``ssl`` library with CherryPy.
 2 | 
 3 | The ssl module must be importable for SSL functionality.
 4 | 
 5 | To use this module, set ``CherryPyWSGIServer.ssl_adapter`` to an instance of
 6 | ``BuiltinSSLAdapter``.
 7 | """
 8 | 
 9 | try:
10 |     import ssl
11 | except ImportError:
12 |     ssl = None
13 | 
14 | from cherrypy import wsgiserver
15 | 
16 | 
17 | class BuiltinSSLAdapter(wsgiserver.SSLAdapter):
18 |     """A wrapper for integrating Python's builtin ssl module with CherryPy."""
19 |     
20 |     certificate = None
21 |     """The filename of the server SSL certificate."""
22 |     
23 |     private_key = None
24 |     """The filename of the server's private key file."""
25 |     
26 |     def __init__(self, certificate, private_key, certificate_chain=None):
27 |         if ssl is None:
28 |             raise ImportError("You must install the ssl module to use HTTPS.")
29 |         self.certificate = certificate
30 |         self.private_key = private_key
31 |         self.certificate_chain = certificate_chain
32 |     
33 |     def bind(self, sock):
34 |         """Wrap and return the given socket."""
35 |         return sock
36 |     
37 |     def wrap(self, sock):
38 |         """Wrap and return the given socket, plus WSGI environ entries."""
39 |         try:
40 |             s = ssl.wrap_socket(sock, do_handshake_on_connect=True,
41 |                     server_side=True, certfile=self.certificate,
42 |                     keyfile=self.private_key, ssl_version=ssl.PROTOCOL_SSLv23)
43 |         except ssl.SSLError, e:
44 |             if e.errno == ssl.SSL_ERROR_EOF:
45 |                 # This is almost certainly due to the cherrypy engine
46 |                 # 'pinging' the socket to assert it's connectable;
47 |                 # the 'ping' isn't SSL.
48 |                 return None, {}
49 |             elif e.errno == ssl.SSL_ERROR_SSL:
50 |                 if e.args[1].endswith('http request'):
51 |                     # The client is speaking HTTP to an HTTPS server.
52 |                     raise wsgiserver.NoSSLError
53 |             raise
54 |         return s, self.get_environ(s)
55 |     
56 |     # TODO: fill this out more with mod ssl env
57 |     def get_environ(self, sock):
58 |         """Create WSGI environ entries to be merged into each request."""
59 |         cipher = sock.cipher()
60 |         ssl_environ = {
61 |             "wsgi.url_scheme": "https",
62 |             "HTTPS": "on",
63 |             'SSL_PROTOCOL': cipher[1],
64 |             'SSL_CIPHER': cipher[0]
65 | ##            SSL_VERSION_INTERFACE 	string 	The mod_ssl program version
66 | ##            SSL_VERSION_LIBRARY 	string 	The OpenSSL program version
67 |             }
68 |         return ssl_environ
69 |     
70 |     def makefile(self, sock, mode='r', bufsize=-1):
71 |         return wsgiserver.CP_fileobject(sock, mode, bufsize)
72 | 
73 | 


--------------------------------------------------------------------------------
/books/Xueqiu.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # -*- coding:utf-8 -*-
 3 | # vim: tabstop=4 expandtab shiftwidth=4 softtabstop=4
 4 | 
 5 | from base import BaseFeedBook
 6 | import re, urllib
 7 | from lib.urlopener import URLOpener
 8 | from bs4 import BeautifulSoup
 9 | import json
10 | from config import SHARE_FUCK_GFW_SRV
11 | 
12 | __author__ = 'henryouly'
13 | 
14 | def getBook():
15 |     return Xueqiu
16 | 
17 | class Xueqiu(BaseFeedBook):
18 |     title                 = u'雪球今日话题'
19 |     description           = u'雪球是一个社交投资网络，「今日话题」是雪球用户每日发布的投资交流精选。'
20 |     language              = 'zh-cn'
21 |     feed_encoding         = "utf-8"
22 |     page_encoding         = "utf-8"
23 |     mastheadfile          = "mh_xueqiu.gif"
24 |     coverfile             = "cv_xueqiu.jpg"
25 |     oldest_article        = 1
26 |     fulltext_by_readability = False
27 | 
28 |     remove_tags = ['meta']
29 |     remove_attrs = ['xmlns']
30 | 
31 |     feeds = [ (u'今日话题', SHARE_FUCK_GFW_SRV % urllib.quote('http://xueqiu.com/hots/topic/rss'), True) ]
32 |     
33 |     def url4forwarder(self, url):
34 |         #生成经过转发器的URL
35 |         return SHARE_FUCK_GFW_SRV % urllib.quote(url)
36 |     
37 |     def fetcharticle(self, url, opener, decoder):
38 |         #链接网页获取一篇文章
39 |         return BaseFeedBook.fetcharticle(self, self.url4forwarder(url), opener, decoder)
40 |         
41 |     def soupbeforeimage(self, soup):
42 |         for img in soup.find_all('img'):
43 |             imgurl = img['src'] if 'src' in img.attrs else ''
44 |             if imgurl.startswith('http'):
45 |                 img['src'] = self.url4forwarder(imgurl)
46 |                 
47 |     def postprocess(self, content):
48 |         pn = re.compile(ur'<a href="(\S*?)">本话题在雪球有.*?条讨论，点击查看。</a>', re.I)
49 |         comment = ''
50 |         mt = pn.search(content)
51 |         url = mt.group(1) if mt else None
52 |         if url:
53 |             opener = URLOpener(url, timeout=self.timeout)
54 |             result = opener.open(url)
55 |             if result.status_code == 200 and result.content:
56 |               if self.feed_encoding:
57 |                 try:
58 |                   comment = result.content.decode(self.feed_encoding)
59 |                 except UnicodeDecodeError:
60 |                   return content
61 | 
62 |         pn = re.compile(r'SNB.data.goodComments\ =\ ({.*?});', re.S | re.I)
63 |         mt = pn.search(comment)
64 |         if mt:
65 |             comment_json = mt.group(1)
66 |             j = json.loads(comment_json)
67 |             soup = BeautifulSoup(content, "lxml")
68 |             for c in j['comments']:
69 |                 u = c['user']['screen_name']
70 |                 t = BeautifulSoup('<p>@%s:%s</p>' % (u, c['text']))
71 |                 for img in t.find_all('img', alt=True):
72 |                     img.replace_with(t.new_string(img['alt']))
73 |                 soup.html.body.append(t.p)
74 | 
75 |             content = unicode(soup)
76 |         return content
77 | 


--------------------------------------------------------------------------------
/lib/calibre/utils/img.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
 3 | 
 4 | __license__   = 'GPL v3'
 5 | __copyright__ = '2010, Kovid Goyal <kovid@kovidgoyal.net>'
 6 | __docformat__ = 'restructuredtext en'
 7 | 
 8 | import os
 9 | from PIL import Image
10 | 
11 | from calibre.utils.imghdr import what
12 | from StringIO import StringIO
13 | 
14 | def identify_data(data):
15 |     '''
16 |     Identify the image in data. Returns a 3-tuple
17 |     (width, height, format)
18 |     or raises an Exception if data is not an image.
19 |     '''
20 |     if not isinstance(data, StringIO):
21 |         data = StringIO(data)    
22 |     img = Image.open(data)
23 |     width, height = img.size
24 |     fmt = img.format
25 |     return (width, height, fmt)
26 |     
27 |     
28 | def rescale_image(data, maxsizeb=4000000, dimen=None, 
29 |                 png2jpg=False, graying=True, reduceto=(600,800)):
30 |     '''
31 |     Convert image setting all transparent pixels to white and changing format
32 |     to JPEG. Ensure the resultant image has a byte size less than
33 |     maxsizeb.
34 | 
35 |     If dimen is not None, generate a thumbnail of
36 |     width=dimen, height=dimen or width, height = dimen (depending on the type
37 |     of dimen)
38 | 
39 |     Returns the image as a bytestring.
40 |     '''
41 |     if not isinstance(data, StringIO):
42 |         data = StringIO(data)
43 |     img = Image.open(data)
44 |     width, height = img.size
45 |     fmt = img.format
46 |     if graying and img.mode != "L":
47 |         img = img.convert("L")
48 |     
49 |     reducewidth, reduceheight = reduceto
50 |     
51 |     if dimen is not None:
52 |         if hasattr(dimen, '__len__'):
53 |             width, height = dimen
54 |         else:
55 |             width = height = dimen
56 |         img.thumbnail((width, height))
57 |         if png2jpg and fmt == 'PNG':
58 |             fmt = 'JPEG'
59 |         data = StringIO()
60 |         img.save(data, fmt)
61 |     elif width > reducewidth or height > reduceheight:
62 |         ratio = min(float(reducewidth)/float(width), float(reduceheight)/float(height))
63 |         img = img.resize((int(width*ratio), int(height*ratio)))
64 |         if png2jpg and fmt == 'PNG':
65 |             fmt = 'JPEG'
66 |         data = StringIO()
67 |         img.save(data, fmt)
68 |     elif png2jpg and fmt == 'PNG':
69 |         data = StringIO()
70 |         img.save(data, 'JPEG')
71 |     else:
72 |         data = StringIO()
73 |         img.save(data, fmt)
74 |     
75 |     return data.getvalue()
76 | 
77 | def mobify_image(data):
78 |     'Convert PNG images to GIF as the idiotic Kindle cannot display some PNG'
79 |     fmt = what(None, data)
80 | 
81 |     if fmt == 'png':
82 |         if not isinstance(data, StringIO):
83 |             data = StringIO(data)
84 |         im = Image.open(data)
85 |         data = StringIO()
86 |         im.save(data, 'GIF')
87 |         data = data.getvalue()
88 |     return data
89 | 


--------------------------------------------------------------------------------
/lib/dateutil/zoneinfo/__init__.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Copyright (c) 2003-2005  Gustavo Niemeyer <gustavo@niemeyer.net>
 3 | 
 4 | This module offers extensions to the standard python 2.3+
 5 | datetime module.
 6 | """
 7 | from dateutil.tz import tzfile
 8 | from tarfile import TarFile
 9 | import os
10 | 
11 | __author__ = "Gustavo Niemeyer <gustavo@niemeyer.net>"
12 | __license__ = "PSF License"
13 | 
14 | __all__ = ["setcachesize", "gettz", "rebuild"]
15 | 
16 | CACHE = []
17 | CACHESIZE = 10
18 | 
19 | class tzfile(tzfile):
20 |     def __reduce__(self):
21 |         return (gettz, (self._filename,))
22 | 
23 | def getzoneinfofile():
24 |     filenames = os.listdir(os.path.join(os.path.dirname(__file__)))
25 |     filenames.sort()
26 |     filenames.reverse()
27 |     for entry in filenames:
28 |         if entry.startswith("zoneinfo") and ".tar." in entry:
29 |             return os.path.join(os.path.dirname(__file__), entry)
30 |     return None
31 | 
32 | ZONEINFOFILE = getzoneinfofile()
33 | 
34 | del getzoneinfofile
35 | 
36 | def setcachesize(size):
37 |     global CACHESIZE, CACHE
38 |     CACHESIZE = size
39 |     del CACHE[size:]
40 | 
41 | def gettz(name):
42 |     tzinfo = None
43 |     if ZONEINFOFILE:
44 |         for cachedname, tzinfo in CACHE:
45 |             if cachedname == name:
46 |                 break
47 |         else:
48 |             tf = TarFile.open(ZONEINFOFILE)
49 |             try:
50 |                 zonefile = tf.extractfile(name)
51 |             except KeyError:
52 |                 tzinfo = None
53 |             else:
54 |                 tzinfo = tzfile(zonefile)
55 |             tf.close()
56 |             CACHE.insert(0, (name, tzinfo))
57 |             del CACHE[CACHESIZE:]
58 |     return tzinfo
59 | 
60 | def rebuild(filename, tag=None, format="gz"):
61 |     import tempfile, shutil
62 |     tmpdir = tempfile.mkdtemp()
63 |     zonedir = os.path.join(tmpdir, "zoneinfo")
64 |     moduledir = os.path.dirname(__file__)
65 |     if tag: tag = "-"+tag
66 |     targetname = "zoneinfo%s.tar.%s" % (tag, format)
67 |     try:
68 |         tf = TarFile.open(filename)
69 |         for name in tf.getnames():
70 |             if not (name.endswith(".sh") or
71 |                     name.endswith(".tab") or
72 |                     name == "leapseconds"):
73 |                 tf.extract(name, tmpdir)
74 |                 filepath = os.path.join(tmpdir, name)
75 |                 os.system("zic -d %s %s" % (zonedir, filepath))
76 |         tf.close()
77 |         target = os.path.join(moduledir, targetname)
78 |         for entry in os.listdir(moduledir):
79 |             if entry.startswith("zoneinfo") and ".tar." in entry:
80 |                 os.unlink(os.path.join(moduledir, entry))
81 |         tf = TarFile.open(target, "w:%s" % format)
82 |         for entry in os.listdir(zonedir):
83 |             entrypath = os.path.join(zonedir, entry)
84 |             tf.add(entrypath, entry)
85 |         tf.close()
86 |     finally:
87 |         shutil.rmtree(tmpdir)
88 | 


--------------------------------------------------------------------------------
/lib/chardet/utf8prober.py:
--------------------------------------------------------------------------------
 1 | ######################## BEGIN LICENSE BLOCK ########################
 2 | # The Original Code is mozilla.org code.
 3 | #
 4 | # The Initial Developer of the Original Code is
 5 | # Netscape Communications Corporation.
 6 | # Portions created by the Initial Developer are Copyright (C) 1998
 7 | # the Initial Developer. All Rights Reserved.
 8 | #
 9 | # Contributor(s):
10 | #   Mark Pilgrim - port to Python
11 | #
12 | # This library is free software; you can redistribute it and/or
13 | # modify it under the terms of the GNU Lesser General Public
14 | # License as published by the Free Software Foundation; either
15 | # version 2.1 of the License, or (at your option) any later version.
16 | #
17 | # This library is distributed in the hope that it will be useful,
18 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
19 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
20 | # Lesser General Public License for more details.
21 | #
22 | # You should have received a copy of the GNU Lesser General Public
23 | # License along with this library; if not, write to the Free Software
24 | # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
25 | # 02110-1301  USA
26 | ######################### END LICENSE BLOCK #########################
27 | 
28 | from . import constants
29 | from .charsetprober import CharSetProber
30 | from .codingstatemachine import CodingStateMachine
31 | from .mbcssm import UTF8SMModel
32 | 
33 | ONE_CHAR_PROB = 0.5
34 | 
35 | 
36 | class UTF8Prober(CharSetProber):
37 |     def __init__(self):
38 |         CharSetProber.__init__(self)
39 |         self._mCodingSM = CodingStateMachine(UTF8SMModel)
40 |         self.reset()
41 | 
42 |     def reset(self):
43 |         CharSetProber.reset(self)
44 |         self._mCodingSM.reset()
45 |         self._mNumOfMBChar = 0
46 | 
47 |     def get_charset_name(self):
48 |         return "utf-8"
49 | 
50 |     def feed(self, aBuf):
51 |         for c in aBuf:
52 |             codingState = self._mCodingSM.next_state(c)
53 |             if codingState == constants.eError:
54 |                 self._mState = constants.eNotMe
55 |                 break
56 |             elif codingState == constants.eItsMe:
57 |                 self._mState = constants.eFoundIt
58 |                 break
59 |             elif codingState == constants.eStart:
60 |                 if self._mCodingSM.get_current_charlen() >= 2:
61 |                     self._mNumOfMBChar += 1
62 | 
63 |         if self.get_state() == constants.eDetecting:
64 |             if self.get_confidence() > constants.SHORTCUT_THRESHOLD:
65 |                 self._mState = constants.eFoundIt
66 | 
67 |         return self.get_state()
68 | 
69 |     def get_confidence(self):
70 |         unlike = 0.99
71 |         if self._mNumOfMBChar < 6:
72 |             for i in range(0, self._mNumOfMBChar):
73 |                 unlike = unlike * ONE_CHAR_PROB
74 |             return 1.0 - unlike
75 |         else:
76 |             return unlike
77 | 


--------------------------------------------------------------------------------
/books/FolhaDeSaopaulo.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # -*- coding:utf-8 -*-
 3 | 
 4 | import re
 5 | from base import BaseFeedBook
 6 | 
 7 | def getBook():
 8 |     return FolhaDeSaopaulo
 9 | 
10 | class FolhaDeSaopaulo(BaseFeedBook):
11 |     title                 = 'Folha'
12 |     description           = 'Folha de Sao paulo'
13 |     language = 'pt-br'
14 |     feed_encoding = "ISO-8859-1"
15 |     page_encoding = "ISO-8859-1"
16 |     mastheadfile = "mh_folha.gif"
17 |     coverfile =  'cv_folha.jpg'
18 |     oldest_article        = 1
19 |     fulltext_by_readability = False
20 |     fulltext_by_instapaper = False
21 |     host = r'http://www.folha.uol.com.br/'
22 |     keep_only_tags = [dict(name='article', attrs={'class':'news'})]
23 |     remove_classes = ['toolbar','advertising']
24 |     
25 |     feeds = [
26 |             (u'Em cima da hora', u'http://feeds.folha.uol.com.br/emcimadahora/rss091.xml'),
27 |             (u'Cotidiano', u'http://feeds.folha.uol.com.br/folha/cotidiano/rss091.xml'),
28 |             (u'Brasil', u'http://feeds.folha.uol.com.br/folha/brasil/rss091.xml'),
29 |             (u'Mundo', u'http://feeds.folha.uol.com.br/mundo/rss091.xml'),
30 |             (u'Poder', u'http://feeds.folha.uol.com.br/poder/rss091.xml'),
31 |             (u'Mercado', u'http://feeds.folha.uol.com.br/folha/dinheiro/rss091.xml'),
32 |             (u'Saber', u'http://feeds.folha.uol.com.br/folha/educacao/rss091.xml'),
33 |             (u'Tec', u'http://feeds.folha.uol.com.br/folha/informatica/rss091.xml'),
34 |             (u'Ilustrada', u'http://feeds.folha.uol.com.br/folha/ilustrada/rss091.xml'),
35 |             #(u'Ambiente', u'http://feeds.folha.uol.com.br/ambiente/rss091.xml'),
36 |             #(u'Bichos', u'http://feeds.folha.uol.com.br/bichos/rss091.xml'),
37 |             (u'Ciencia', u'http://feeds.folha.uol.com.br/ciencia/rss091.xml'),
38 |             (u'Equilibrio e Saude', u'http://feeds.folha.uol.com.br/equilibrioesaude/rss091.xml'),
39 |             #(u'Turismo', u'http://feeds.folha.uol.com.br/folha/turismo/rss091.xml'),
40 |             #(u'Esporte', u'http://feeds.folha.uol.com.br/folha/esporte/rss091.xml'),
41 |            ]
42 |     
43 |     #def fetcharticle(self, url, opener, decoder):
44 |     #    url = 'http://tools.folha.com.br/print?url=' + url
45 |     #    return BaseFeedBook.fetcharticle(self, url, opener, decoder)
46 |         
47 |     def processtitle(self, title):
48 |         pn1 = re.compile(r'^(.*?) - \d\d/\d\d/\d\d\d\d - .*? - (Folha de S\.Paulo|F5)$', re.I)
49 |         pn2 = re.compile(r'^Folha de S\.Paulo - .*? - .*? - (.*?) - \d\d/\d\d/\d\d\d\d$', re.I)
50 |         mt1 = pn1.match(title)
51 |         if mt1:
52 |             return mt1.group(1)
53 |         else:
54 |             mt2 = pn2.match(title)
55 |             if mt2:
56 |                 return mt2.group(1)
57 |                 
58 |         if title.endswith('Folha de S.Paulo'):
59 |             title = title.replace('Folha de S.Paulo', '')
60 |             
61 |         return title
62 |     


--------------------------------------------------------------------------------
/lib/calibre/ebooks/mobi/writer8/header.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
 3 | from __future__ import (unicode_literals, division, absolute_import,
 4 |                         print_function)
 5 | 
 6 | __license__   = 'GPL v3'
 7 | __copyright__ = '2012, Kovid Goyal <kovid@kovidgoyal.net>'
 8 | __docformat__ = 'restructuredtext en'
 9 | 
10 | import random
11 | from io import BytesIO
12 | from collections import OrderedDict
13 | from struct import pack
14 | 
15 | from calibre.ebooks.mobi.utils import align_block
16 | 
17 | NULL = 0xffffffff
18 | zeroes = lambda x: b'\0'*x
19 | nulls = lambda x: b'\xff'*x
20 | short = lambda x: pack(b'>H', x)
21 | 
22 | class Header(OrderedDict):
23 | 
24 |     HEADER_NAME = b''
25 | 
26 |     DEFINITION = '''
27 |     '''
28 | 
29 |     ALIGN_BLOCK = False
30 |     POSITIONS = {}  # Mapping of position field to field whose position should
31 |                     # be stored in the position field
32 |     SHORT_FIELDS = set()
33 | 
34 |     def __init__(self):
35 |         OrderedDict.__init__(self)
36 | 
37 |         for line in self.DEFINITION.splitlines():
38 |             line = line.strip()
39 |             if not line or line.startswith('#'):
40 |                 continue
41 |             name, val = [x.strip() for x in line.partition('=')[0::2]]
42 |             if val:
43 |                 val = eval(val, {'zeroes':zeroes, 'NULL':NULL, 'DYN':None,
44 |                     'nulls':nulls, 'short':short, 'random':random})
45 |             else:
46 |                 val = 0
47 |             if name in self:
48 |                 raise ValueError('Duplicate field in definition: %r'%name)
49 |             self[name] = val
50 | 
51 |     @property
52 |     def dynamic_fields(self):
53 |         return tuple(k for k, v in self.iteritems() if v is None)
54 | 
55 |     def __call__(self, **kwargs):
56 |         positions = {}
57 |         for name, val in kwargs.iteritems():
58 |             if name not in self:
59 |                 raise KeyError('Not a valid header field: %r'%name)
60 |             self[name] = val
61 | 
62 |         buf = BytesIO()
63 |         buf.write(bytes(self.HEADER_NAME))
64 |         for name, val in self.iteritems():
65 |             val = self.format_value(name, val)
66 |             positions[name] = buf.tell()
67 |             if val is None:
68 |                 raise ValueError('Dynamic field %r not set'%name)
69 |             if isinstance(val, (int, long)):
70 |                 fmt = b'H' if name in self.SHORT_FIELDS else b'I'
71 |                 val = pack(b'>'+fmt, val)
72 |             buf.write(val)
73 | 
74 |         for pos_field, field in self.POSITIONS.iteritems():
75 |             buf.seek(positions[pos_field])
76 |             buf.write(pack(b'>I', positions[field]))
77 | 
78 |         ans = buf.getvalue()
79 |         if self.ALIGN_BLOCK:
80 |             ans = align_block(ans)
81 |         return ans
82 | 
83 |     def format_value(self, name, val):
84 |         return val
85 | 
86 | 
87 | 
88 | 


--------------------------------------------------------------------------------
/books/wsj.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # -*- coding:utf-8 -*-
 3 | import re
 4 | from base import BaseFeedBook
 5 | 
 6 | def getBook():
 7 |     return WSJ
 8 | 
 9 | class WSJ(BaseFeedBook):
10 |     title                 = u'华尔街日报'
11 |     description           = u'每天最重要的商业财经要闻及金融市场综述'
12 |     language        = 'zh-cn'
13 |     feed_encoding   = "utf-8"
14 |     page_encoding   = "GBK"
15 |     mastheadfile    = "mh_wsj.gif"
16 |     coverfile       = 'cv_wsj.jpg'
17 |     oldest_article  = 1
18 |     network_timeout = 60
19 |     fulltext_by_readability = False
20 |     fulltext_by_instapaper  = False
21 |     host = r'http://cn.wsj.com/gb/'
22 |     feeds = [
23 |             (u'要闻','http://cn.wsj.com.feedsportal.com/c/33121/f/538760/index.rss'),
24 |            ]
25 |     keep_only_tags = [dict(name='div', attrs={'id':'A'}),]
26 |     
27 |     def fetcharticle(self, url, opener, decoder):
28 |         result = opener.open(url)
29 |         status_code, content = result.status_code, result.content
30 |         if status_code != 200 or not content:
31 |             self.log.warn('fetch article failed(%d):%s.' % (status_code,url))
32 |             return None
33 |         
34 |         if self.page_encoding:
35 |             try:
36 |                 content = content.decode('utf-8')
37 |             except UnicodeDecodeError:
38 |                 content = decoder.decode(content,url,result.headers)
39 |         else:
40 |             content = decoder.decode(content,url,result.headers)
41 |         
42 |         m = re.search(r'<iframe.*?src="(.*?)".*?>', content)
43 |         if m:
44 |             newurl = m.group(1)
45 |             result = opener.open(newurl)
46 |             status_code, content = result.status_code, result.content
47 |             if status_code != 200 or not content:
48 |                 self.log.warn('fetch article failed(%d):%s.' % (status_code,newurl))
49 |                 return None
50 |             
51 |             if self.page_encoding:
52 |                 try:
53 |                     content = content.decode(self.page_encoding)
54 |                 except UnicodeDecodeError:
55 |                     content = decoder.decode(content,newurl,result.headers)
56 |             else:
57 |                 content = decoder.decode(content,newurl,result.headers)
58 |         
59 |         return content
60 |         
61 |     def processtitle(self, title):
62 |         title = BaseFeedBook.processtitle(self,title)
63 |         if title.endswith(u'-华尔街日报'):
64 |             return title.replace(u'-华尔街日报','')
65 |         else:
66 |             return title
67 |             
68 |     def soupprocessex(self, soup):
69 |         ' 将首字div变成b '
70 |         content = soup.find('div',attrs={'id':'A'})
71 |         if content:
72 |             firstdiv = content.find('div')
73 |             if firstdiv and firstdiv.string and len(firstdiv.string) == 1:
74 |                 b = soup.new_tag('b')
75 |                 b.string = firstdiv.string
76 |                 firstdiv.replace_with(b)
77 |             


--------------------------------------------------------------------------------
/apps/module_front.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # -*- coding:utf-8 -*-
 3 | #A GAE web application to aggregate rss and send it to your kindle.
 4 | #Visit https://github.com/cdhigh/KindleEar for the latest version
 5 | #Author:
 6 | # cdhigh <https://github.com/cdhigh>
 7 | #Contributors:
 8 | # rexdf <https://github.com/rexdf>
 9 | 
10 | __Author__ = "cdhigh"
11 | 
12 | import os, datetime, logging, __builtin__, hashlib, time
13 | 
14 | # for debug
15 | # 本地启动调试服务器：python.exe dev_appserver.py c:\kindleear
16 | IsRunInLocal = (os.environ.get('SERVER_SOFTWARE', '').startswith('Development'))
17 | log = logging.getLogger()
18 | __builtin__.__dict__['default_log'] = log
19 | __builtin__.__dict__['IsRunInLocal'] = IsRunInLocal
20 | 
21 | supported_languages = ['en','zh-cn','tr-tr'] #不支持的语种则使用第一个语言
22 | #gettext.install('lang', 'i18n', unicode=True) #for calibre startup
23 | 
24 | class Main_Var:
25 |     urls = []
26 |     session = None
27 |     jjenv = None
28 |     supported_languages = None
29 |     log = None
30 |     __Version__ = None
31 | 
32 | __builtin__.__dict__['main'] = Main_Var
33 | main.supported_languages = supported_languages
34 | main.log = log
35 | main.__Version__ = __Version__
36 | log.setLevel(logging.INFO if IsRunInLocal else logging.WARN)
37 | 
38 | import web
39 | import jinja2
40 | #from google.appengine.api import mail
41 | #from google.appengine.api import taskqueue
42 | from google.appengine.api import memcache
43 | 
44 | from lib.memcachestore import MemcacheStore
45 | from books import BookClasses
46 | 
47 | from apps.View import *
48 | 
49 | from apps.dbModels import Book
50 | from apps.BaseHandler import BaseHandler
51 | from apps.utils import fix_filesizeformat
52 | 
53 | #reload(sys)
54 | #sys.setdefaultencoding('utf-8')
55 | 
56 | for book in BookClasses():  #添加内置书籍
57 |     if memcache.get(book.title): #使用memcache加速
58 |         continue
59 |     b = Book.all().filter("title = ", book.title).get()
60 |     if not b:
61 |         b = Book(title=book.title, description=book.description, builtin=True, 
62 |             needs_subscription=book.needs_subscription, separate=False)
63 |         b.put()
64 |         memcache.add(book.title, book.description, 86400)
65 | 
66 | class Test(BaseHandler):
67 |     def GET(self):
68 |         s = ''
69 |         for d in os.environ:
70 |             s += "<pre><p>" + str(d).rjust(28) + " | " + str(os.environ[d]) + "</p></pre>"
71 |         return s
72 | 
73 | main.urls += ["/test", "Test",]
74 | 
75 | application = web.application(main.urls, globals())
76 | store = MemcacheStore(memcache)
77 | session = web.session.Session(application, store, initializer={'username':'', 'login':0, 'lang':'', 'pocket_request_token':''})
78 | jjenv = jinja2.Environment(loader=jinja2.FileSystemLoader('templates'),
79 |                             extensions=["jinja2.ext.do",'jinja2.ext.i18n'])
80 | jjenv.filters['filesizeformat'] = fix_filesizeformat
81 | 
82 | app = application.wsgifunc()
83 | 
84 | web.config.debug = IsRunInLocal
85 | 
86 | main.session = session
87 | main.jjenv = jjenv


--------------------------------------------------------------------------------
/lib/dateutil/easter.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Copyright (c) 2003-2007  Gustavo Niemeyer <gustavo@niemeyer.net>
 3 | 
 4 | This module offers extensions to the standard python 2.3+
 5 | datetime module.
 6 | """
 7 | __author__ = "Gustavo Niemeyer <gustavo@niemeyer.net>"
 8 | __license__ = "PSF License"
 9 | 
10 | import datetime
11 | 
12 | __all__ = ["easter", "EASTER_JULIAN", "EASTER_ORTHODOX", "EASTER_WESTERN"]
13 | 
14 | EASTER_JULIAN   = 1
15 | EASTER_ORTHODOX = 2
16 | EASTER_WESTERN  = 3
17 | 
18 | def easter(year, method=EASTER_WESTERN):
19 |     """
20 |     This method was ported from the work done by GM Arts,
21 |     on top of the algorithm by Claus Tondering, which was
22 |     based in part on the algorithm of Ouding (1940), as
23 |     quoted in "Explanatory Supplement to the Astronomical
24 |     Almanac", P.  Kenneth Seidelmann, editor.
25 | 
26 |     This algorithm implements three different easter
27 |     calculation methods:
28 |     
29 |     1 - Original calculation in Julian calendar, valid in
30 |         dates after 326 AD
31 |     2 - Original method, with date converted to Gregorian
32 |         calendar, valid in years 1583 to 4099
33 |     3 - Revised method, in Gregorian calendar, valid in
34 |         years 1583 to 4099 as well
35 | 
36 |     These methods are represented by the constants:
37 | 
38 |     EASTER_JULIAN   = 1
39 |     EASTER_ORTHODOX = 2
40 |     EASTER_WESTERN  = 3
41 | 
42 |     The default method is method 3.
43 |     
44 |     More about the algorithm may be found at:
45 | 
46 |     http://users.chariot.net.au/~gmarts/eastalg.htm
47 | 
48 |     and
49 | 
50 |     http://www.tondering.dk/claus/calendar.html
51 | 
52 |     """
53 | 
54 |     if not (1 <= method <= 3):
55 |         raise ValueError, "invalid method"
56 | 
57 |     # g - Golden year - 1
58 |     # c - Century
59 |     # h - (23 - Epact) mod 30
60 |     # i - Number of days from March 21 to Paschal Full Moon
61 |     # j - Weekday for PFM (0=Sunday, etc)
62 |     # p - Number of days from March 21 to Sunday on or before PFM
63 |     #     (-6 to 28 methods 1 & 3, to 56 for method 2)
64 |     # e - Extra days to add for method 2 (converting Julian
65 |     #     date to Gregorian date)
66 | 
67 |     y = year
68 |     g = y % 19
69 |     e = 0
70 |     if method < 3:
71 |         # Old method
72 |         i = (19*g+15)%30
73 |         j = (y+y//4+i)%7
74 |         if method == 2:
75 |             # Extra dates to convert Julian to Gregorian date
76 |             e = 10
77 |             if y > 1600:
78 |                 e = e+y//100-16-(y//100-16)//4
79 |     else:
80 |         # New method
81 |         c = y//100
82 |         h = (c-c//4-(8*c+13)//25+19*g+15)%30
83 |         i = h-(h//28)*(1-(h//28)*(29//(h+1))*((21-g)//11))
84 |         j = (y+y//4+i+2-c+c//4)%7
85 | 
86 |     # p can be from -6 to 56 corresponding to dates 22 March to 23 May
87 |     # (later dates apply to method 2, although 23 May never actually occurs)
88 |     p = i-j+e
89 |     d = 1+(p+27+(p+6)//40)%31
90 |     m = 3+(p+26)//30
91 |     return datetime.date(int(y),int(m),int(d))
92 | 
93 | 


--------------------------------------------------------------------------------
/templates/admin.html:
--------------------------------------------------------------------------------
 1 | {% extends "base.html" %}
 2 | {% block css -%}
 3 | <style type="text/css">
 4 | .usrtb {
 5 | width:100%;
 6 | font-size:0.8em;
 7 | }
 8 | .usrtb th{text-align:center;}
 9 | </style>
10 | {% endblock -%}
11 | {% block content -%}
12 | <form class="pure-form pure-form-aligned" action="" method="POST">
13 | <fieldset>
14 |     <legend>{{_("Change Password")}}</legend>
15 |     {% if chpwdtips -%}
16 |     <p style="color:red;font-size:0.8em;">{{chpwdtips}}</p>
17 |     {% endif -%}
18 |     <div class="pure-control-group">
19 |         <label for="orgpwd">{{_("Old password")}}</label>
20 |         <input type="password" name="op" id="orgpwd">
21 |     </div>
22 |     <div class="pure-control-group">
23 |         <label for="newpwd1">{{_("New password")}}</label>
24 |         <input type="password" name="p1" id="newpwd1">
25 |     </div>
26 |     <div class="pure-control-group">
27 |         <label for="newpwd2">{{_("Confirm password")}}</label>
28 |         <input type="password" name="p2" id="newpwd2">
29 |     </div>
30 |     <div class="pure-controls">
31 |         <button type="submit" class="pure-button">{{_("Confirm Change")}}</button>
32 |     </div>
33 | </fieldset>
34 | </form>
35 | {% if nickname == 'admin' -%}
36 | <form class="pure-form pure-form-aligned" action="" method="POST" style="margin-top:10px;">
37 | <fieldset>
38 |     <legend>{{_("Add Account")}}</legend>
39 |     {% if actips -%}
40 |     <p style="color:red;font-size:0.8em;">{{actips}}</p>
41 |     {% endif -%}
42 |     <p style="font-size:0.8em;color:grey;">{{_("Note : No supports many accounts for limit of free account of GAE.")}}</p>
43 |     <div class="pure-control-group">
44 |         <label for="username">{{_("Username")}}</label>
45 |         <input type="text" name="u" id="username" />
46 |     </div>
47 |     <div class="pure-control-group">
48 |         <label for="password1">{{_("Password")}}</label>
49 |         <input type="text" name="up1" id="password1">
50 |     </div>
51 |     <div class="pure-control-group">
52 |         <label for="password2">{{_("Confirm password")}}</label>
53 |         <input type="text" name="up2" id="password2">
54 |     </div>
55 |     <div class="pure-controls">
56 |         <button type="submit" class="pure-button">{{_("Add")}}</button>
57 |     </div>
58 | </fieldset>
59 | </form>
60 | <p>{{_("Accounts")}}</p>
61 | <table class="pure-table pure-table-bordered usrtb">
62 |     <thead><tr>
63 |         <th>{{_("No.")}}</th>
64 |         <th>{{_("Username")}}</th>
65 |         <th>{{_("Enable")}}</th>
66 |         <th colspan="2">{{_("Operation")}}</th>
67 |     </tr></thead>
68 |     <tbody>
69 |     {% for u in users -%}
70 |     <tr>
71 |     <th>{{loop.index}}</th>
72 |     <th>{{u.name}}</th>
73 |     <th>{{u.enable_send}}</th>
74 |     <th><a href="/mgrpwd/{{u.name}}" class="actionButton">{{_("Change")}}</a></th>
75 |     <th><a href="delaccount/{{u.name}}" class="actionButton">{{_("Delete")}}</a></th>
76 |     </tr>
77 |     {% endfor -%}
78 |     </tbody>
79 | </table>
80 | {% endif -%}
81 | {% endblock -%}
82 | 


--------------------------------------------------------------------------------
/books/Qiushibaike.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # -*- coding:utf-8 -*-
 3 | import re
 4 | from base import *
 5 | 
 6 | def getBook():
 7 |     return Qiushibaike
 8 | 
 9 | class Qiushibaike(BaseFeedBook):
10 |     title                 = u'糗事百科'
11 |     description           = u'快乐就是要建立在别人的痛苦之上，额外赠送哈哈.MX'
12 |     language = 'zh-cn'
13 |     feed_encoding = "utf-8"
14 |     page_encoding = "utf-8"
15 |     mastheadfile = "mh_qiushibaike.gif"
16 |     coverfile = "cv_qiushibaike.jpg"
17 |     network_timeout       = 30
18 |     keep_only_tags = [dict(name='div', attrs={'class':'main'}),] # qiushibaike
19 |         #dict(name='div',attrs={'class':'block joke-item'}), # haha.mx
20 |         #    ]
21 |     remove_tags = []
22 |     remove_ids = ['bdshare',]
23 |     remove_classes = ['sharebox','comment','share','up','down', #qiushibaike
24 |             'backtop','close','author','col2','sponsor','pagebar', #qiushibaike
25 |             'seconday-nav fl','toolkit fr','fr','info clearfix', # haha.mx
26 |             'joke-item-footer','pagination','pos-ab','praise-box',] # haha.mx
27 |     remove_attrs = []
28 |     
29 |     feeds = [
30 |             #(u'8小时最热', r'http://www.qiushibaike.com'),
31 |             (u'24小时 Page1', r'http://www.qiushibaike.com/hot'),
32 |             (u'24小时 Page2', r'http://www.qiushibaike.com/hot/page/2'),
33 |             #(u'哈哈MX', r'http://www.haha.mx/'),
34 |             (u'哈哈.MX Page1', r'http://www.haha.mx/good/day'),
35 |             (u'哈哈.MX Page2', r'http://www.haha.mx/good/day/2'),
36 |            ]
37 |     
38 |     def processtitle(self, title):
39 |         title = re.sub(r'(\n)+', ' ', title)
40 |         title = title.replace(u' :: 糗事百科 :: 快乐减压 健康生活', u'')
41 |         return title.replace(u'——分享所有好笑的事情', u'')
42 |         
43 |     def soupbeforeimage(self, soup):
44 |         if soup.html.head.title.string.find(u'哈哈') > 0:
45 |             for img in list(soup.find_all('img')): #HAHA.MX切换为大图链接
46 |                 src = img['src']
47 |                 if src.find(r'/small/') > 0:
48 |                     img['src'] = src.replace(r'/small/', r'/big/')
49 |         
50 |     def soupprocessex(self, soup):
51 |         if u'小时' in soup.html.head.title.string: #qiushibaike
52 |             for article in soup.find_all("a", attrs={"href":re.compile(r'^/article')}):
53 |                 p = soup.new_tag("p", style='color:grey;text-decoration:underline;')
54 |                 p.string = string_of_tag(article.string)
55 |                 article.replace_with(p)
56 |             
57 |             first = True
58 |             for detail in soup.find_all("div", attrs={"class":"content"}):
59 |                 if not first:
60 |                     hr = soup.new_tag("hr")
61 |                     detail.insert(0, hr)
62 |                 first = False
63 |         
64 |         if soup.html.head.title.string.startswith(u'哈哈'): #haha.mx
65 |             first = True
66 |             for item in soup.find_all("div", attrs={"class":"block joke-item"}):
67 |                 if not first:
68 |                     hr = soup.new_tag("hr")
69 |                     item.insert(0, hr)
70 |                 first = False
71 |             


--------------------------------------------------------------------------------
/readme_EN.md:
--------------------------------------------------------------------------------
 1 | #Brief Introduction
 2 | KindleEar is a web application to aggregate RSS for generating periodical mobi/epub file with images and send it to your kindle or your email automatically.
 3 | 
 4 | ## The features included:
 5 | * Support calibre-like recipe file to aggress unlimited RSS or webpage.
 6 | * Support custom RSS, only title/url are needed, don't need to program.
 7 | * With account management, support several kindles.
 8 | * Generate periodical mobi/epub file with images.
 9 | * Deliver news feeds to your kindle dialy automatically.
10 | * Website support multi-languages.
11 | * Powerful and convenient mail-transfering service.
12 | * Integration with Evernote/Pocket/Instapaper.
13 | 
14 | #Deployment
15 | 1. [Create a Google account](https://accounts.google.com/SignUp) and [Turn on Access for less secure apps](https://www.google.com/settings/security/lesssecureapps).  
16 | 
17 | 2. [Create an application](https://console.developers.google.com/project).  
18 | 
19 | 3. Install [Python 2.7.x](https://www.python.org/downloads/).  
20 | 
21 | 4. Install [GAE SDK](https://cloud.google.com/appengine/downloads).  
22 | 
23 | 5. [Download KindleEar](https://github.com/cdhigh/KindleEar/archive/master.zip) and uncompress it into a directory for example: *c:\kindleear*.  
24 | 
25 | 6. Modify some variable in app.yaml/module-worker.yaml/config.py.  
26 | 
27 |   File              | To be changed | Description             |  
28 | -------------------|-------------|-----------------------|  
29 | app.yaml           | application | Your Application Id    |  
30 | module-worker.yaml | application | Your Application Id    |  
31 | config.py          | SRC_EMAIL   | Your Gmail Address          |  
32 | config.py          | DOMAIN      | appid@appspot.com        |  
33 | config.py          | TIMEZONE    | Your timezone         |
34 | 
35 |  
36 | 7. Execute two commands in directory GAE SDK (default is *C:\Program Files\Google\google_appengine*)  
37 | 	* `c:\python27\python.exe appcfg.py update KindleEarFolder\app.yaml KindleEarFolder\module-worker.yaml`  
38 | 	* `c:\python27\python.exe appcfg.py update KindleEarFolder`  
39 | 
40 | 8. After finished, you can open the website *'http://appid.appspot.com'* (appid is the name of your application),  
41 | For example the author's site: <http://kindleear.appspot.com>  
42 | **The initial username is 'admin', password is 'admin', please change the password immediately after first login.**  
43 | 
44 | 9. More details could be found in [FAQ](http://htmlpreview.github.io/?https://github.com/cdhigh/KindleEar/blob/master/static/faq.html).
45 | 
46 | #Deployment simplified
47 | If you don't want to intall GAE SDK and python, you have another choice.  
48 | 
49 | 1. [Download KindleEar](https://github.com/cdhigh/KindleEar/archive/master.zip) and uncompress it (Change the name of folder to 'KindleEar').  
50 | 2. [Download KindleEar-Uploader](https://drive.google.com/folderview?id=0ByRickMo9V_XNlJITzhYM3JOYW8&usp=sharing) and unzip it.  
51 | 3. Put KindleEar folder into Uploader directory, double-click uploader.bat to start process of deployment.  
52 | 
53 | #License
54 |    KindleEar is Licensed under the [AGPLv3](http://www.gnu.org/licenses/agpl-3.0.html) license.
55 | 


--------------------------------------------------------------------------------
/lib/calibre/ebooks/unihandecode/pykakasi/j2h.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | #  j2h.py
 3 | #
 4 | # Copyright 2011 Hiroshi Miura <miurahr@linux.com>
 5 | #
 6 | #  Original Copyright:
 7 | # * KAKASI (Kanji Kana Simple inversion program)
 8 | # * $Id: jj2.c,v 1.7 2001-04-12 05:57:34 rug Exp $
 9 | # * Copyright (C) 1992
10 | # * Hironobu Takahashi (takahasi@tiny.or.jp)
11 | # *
12 | # * This program is free software; you can redistribute it and/or modify
13 | # * it under the terms of the GNU General Public License as published by
14 | # * the Free Software Foundation; either versions 2, or (at your option)
15 | # * any later version.
16 | # *
17 | # * This program is distributed in the hope that it will be useful
18 | # * but WITHOUT ANY WARRANTY; without even the implied warranty of
19 | # * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
20 | # * GNU General Public License for more details.
21 | # *
22 | # */
23 | 
24 | from calibre.ebooks.unihandecode.pykakasi.jisyo import jisyo
25 | import re
26 | 
27 | class J2H (object):
28 | 
29 |     kanwa = None
30 | 
31 |     cl_table = [
32 | 	"","aiueow", "aiueow", "aiueow", "aiueow", "aiueow", "aiueow", "aiueow",
33 | 	"aiueow", "aiueow", "aiueow", "k", "g", "k", "g", "k", "g", "k", "g", "k",
34 | 	"g", "s", "zj", "s", "zj", "s", "zj", "s", "zj", "s", "zj", "t", "d", "tc",
35 | 	"d", "aiueokstchgzjfdbpw", "t", "d", "t", "d", "t", "d", "n", "n", "n", "n",
36 | 	"n", "h", "b", "p", "h", "b", "p", "hf", "b", "p", "h", "b", "p", "h", "b",
37 | 	"p", "m", "m", "m", "m", "m", "y", "y", "y", "y", "y", "y", "rl", "rl",
38 | 	"rl", "rl", "rl", "wiueo", "wiueo", "wiueo", "wiueo", "w", "n", "v", "k",
39 | 	"k", "", "", "", "", "", "", "", "", ""]
40 | 
41 |     def __init__(self):
42 |         self.kanwa = jisyo()
43 | 
44 |     def isKanji(self, c):
45 |         return ( 0x3400 <= ord(c) and ord(c) < 0xfa2e)
46 | 
47 |     def isCletter(self, l, c):
48 |         if (ord(u"ぁ") <= ord(c) and  ord(c) <= 0x309f) and (  l in self.cl_table[ord(c) - ord(u"ぁ")-1]):
49 |             return True
50 |         return False
51 | 
52 |     def itaiji_conv(self, text):
53 |         r = []
54 |         for c in text:
55 |             if c in self.kanwa.itaijidict:
56 |                 r.append(c)
57 |         for c in r:
58 |             text = re.sub(c, self.kanwa.itaijidict[c], text)
59 |         return text
60 | 
61 |     def convert(self, text):
62 |         max_len = 0
63 |         Hstr = ""
64 |         table = self.kanwa.load_jisyo(text[0])
65 |         if table is None:
66 |             return ("", 0)
67 |         for (k,v) in table.iteritems():
68 |             length = len(k)
69 |             if len(text) >= length:
70 |                 if text.startswith(k):
71 |                     for  (yomi, tail) in v:
72 |                         if tail is '':
73 |                             if max_len < length:
74 |                                 Hstr = yomi
75 |                                 max_len = length
76 |                         elif max_len < length+1 and len(text) > length and self.isCletter(tail, text[length]):
77 |                             Hstr=''.join([yomi,text[length]])
78 |                             max_len = length+1
79 |         return (Hstr, max_len)
80 | 


--------------------------------------------------------------------------------
/lib/cssutils/css/csscomment.py:
--------------------------------------------------------------------------------
 1 | """CSSComment is not defined in DOM Level 2 at all but a cssutils defined
 2 | class only.
 3 | 
 4 | Implements CSSRule which is also extended for a CSSComment rule type.
 5 | """
 6 | __all__ = ['CSSComment']
 7 | __docformat__ = 'restructuredtext'
 8 | __version__ = '$Id$'
 9 | 
10 | import cssrule
11 | import cssutils
12 | import xml.dom
13 | 
14 | class CSSComment(cssrule.CSSRule):
15 |     """
16 |     Represents a CSS comment (cssutils only).
17 | 
18 |     Format::
19 | 
20 |         /*...*/
21 |     """
22 |     def __init__(self, cssText=None, parentRule=None, 
23 |                  parentStyleSheet=None, readonly=False):
24 |         super(CSSComment, self).__init__(parentRule=parentRule, 
25 |                                          parentStyleSheet=parentStyleSheet)
26 | 
27 |         self._cssText = None
28 |         if cssText:
29 |             self._setCssText(cssText)
30 | 
31 |         self._readonly = readonly
32 | 
33 |     def __repr__(self):
34 |         return u"cssutils.css.%s(cssText=%r)" % (
35 |                 self.__class__.__name__, 
36 |                 self.cssText)
37 | 
38 |     def __str__(self):
39 |         return u"<cssutils.css.%s object cssText=%r at 0x%x>" % (
40 |                 self.__class__.__name__, 
41 |                 self.cssText, 
42 |                 id(self))
43 | 
44 |     def _getCssText(self):
45 |         """Return serialized property cssText."""
46 |         return cssutils.ser.do_CSSComment(self)
47 | 
48 |     def _setCssText(self, cssText):
49 |         """
50 |         :param cssText:
51 |             textual text to set or tokenlist which is not tokenized
52 |             anymore. May also be a single token for this rule
53 | 
54 |         :exceptions:
55 |             - :exc:`~xml.dom.SyntaxErr`:
56 |               Raised if the specified CSS string value has a syntax error and
57 |               is unparsable.
58 |             - :exc:`~xml.dom.InvalidModificationErr`:
59 |               Raised if the specified CSS string value represents a different
60 |               type of rule than the current one.
61 |             - :exc:`~xml.dom.NoModificationAllowedErr`:
62 |               Raised if the rule is readonly.
63 |         """
64 |         super(CSSComment, self)._setCssText(cssText)
65 |         tokenizer = self._tokenize2(cssText)
66 | 
67 |         commenttoken = self._nexttoken(tokenizer)
68 |         unexpected = self._nexttoken(tokenizer)
69 | 
70 |         if not commenttoken or\
71 |            self._type(commenttoken) != self._prods.COMMENT or\
72 |            unexpected:
73 |             self._log.error(u'CSSComment: Not a CSSComment: %r' %
74 |                 self._valuestr(cssText),
75 |                 error=xml.dom.InvalidModificationErr)
76 |         else:
77 |             self._cssText = self._tokenvalue(commenttoken)
78 | 
79 |     cssText = property(_getCssText, _setCssText,
80 |         doc=u"The parsable textual representation of this rule.")
81 | 
82 |     type = property(lambda self: self.COMMENT, 
83 |                     doc=u"The type of this rule, as defined by a CSSRule "
84 |                         u"type constant.")
85 |     
86 |     # constant but needed:
87 |     wellformed = property(lambda self: True)
88 | 


--------------------------------------------------------------------------------
/changelog.md:
--------------------------------------------------------------------------------
  1 | #Changelog for KindleEar
  2 | 
  3 | ##1.23.1
  4 |   1. 增强错误处理，规避部分网友部署后无法建立索引的问题。
  5 |   2. Bugfix: 修正书籍设置中extra_css无效的问题。
  6 |   3. Bugfix: 修正部分全文RSS XML中HTML链接格式不规范导致无法解析出内容的问题。
  7 | 
  8 | ##1.23
  9 |   1. 添加自动将超长图切割为多个小图片的功能，方便在kindle上看长图。
 10 |   2. 进一步优化自动网页解码功能，减小乱码情况。
 11 |   3. 修正导入含中文字符URL的OPML文件导致程序崩溃的问题。
 12 | 
 13 | ##1.22.3
 14 |   1. 修正遇到xml文件中的非法tag（中文tag）导致推送失败的问题。
 15 | 
 16 | ##1.22.2
 17 |   1. 邮件中转模块升级，在邮件标题中添加 !links则强制抓取文章链接，!article则强制发送文本。
 18 |   2. Kindle邮件地址支持多个收件人地址，用分号分隔。
 19 | 
 20 | ##1.22.1
 21 |   1. 升级内部一些模块。
 22 | 
 23 | ##1.22
 24 |   1. 用AJAX技术重新实现“我的订阅”前后台，实现不重新加载页面增删订阅或RSS。
 25 | 
 26 | ##1.21.1
 27 |   1. 增加保存到Instapaper的归档功能。
 28 | 
 29 | ##1.21
 30 |   1. 增加保存到Pocket的归档功能。
 31 | 
 32 | ##1.20.28
 33 |   1. 《雪球网》屏蔽了gae的IP段的网络请求，此版本通过中转器获取。
 34 | 
 35 | ##1.20.27
 36 |   1. 邮件中转功能中引入字符串压缩，支持一封邮件内包含更多链接，以便更好的通过发送链接生成电子书。
 37 |   
 38 | ##1.20.26
 39 |   1. 引入html内嵌base64图像技术，减小http请求数量，提升网页加速效率。
 40 |   
 41 | ##1.20.25
 42 |   1. 增强网页解码，容忍部分字符解码错误，减小了网页乱码可能。
 43 | 
 44 | ##1.20.24
 45 |   1. 修改选择了合并推送但仅有自定义RSS推送时无封面问题。
 46 | 
 47 | ##1.20.23
 48 |   1. 去掉标题格式中的 “标题 日/月” 和 “标题 月/日” 格式，因为会导致推送失败。
 49 | 
 50 | ##1.20.22
 51 |   1. 解决URL中有unicode字符时无法导出OPML文件的文件。
 52 |   2. 解决微信公众号获取失败后导致其他书籍推送异常的问题。
 53 |   
 54 | ##1.20.21
 55 |   1. 添加ETAG机制，减小需要的网络流量。
 56 | 
 57 | ##1.20.20
 58 |   1. 修正部分XML文件获取文件编码失败问题。
 59 | 
 60 | ##1.20.19
 61 |   1. 修正pycrypto模块加载失败问题。
 62 | 
 63 | ##1.20.18
 64 |   1. 微信公众号突破一次防爬取 [zhu327](https://github.com/zhu327/rss)。
 65 |   2. 通过邮件链接抓取的文章不添加封面。
 66 |   
 67 | ##1.20.17
 68 |   1. 在目录中添加各个源的文章篇数。
 69 |   2. 增加对书籍异常的处理，避免一本书籍的异常而影响其他书籍的推送。
 70 | 
 71 | ##1.20.16
 72 |   1. 增加一个选项，可选择使用网页标题还是feed标题做为文章标题。
 73 | 
 74 | ##1.20.15
 75 |   1. bugfix: weixinbase部分解码失败问题。
 76 | 
 77 | ##1.20.14
 78 |   1. bugfix: 喷嚏图啩原RSS地址已经失效，更换为另一个地址。
 79 | 
 80 | ##1.20.13
 81 |   1. bugfix: 修改有部分RSS将一个图片做为一篇文章（没有html包装）导致推送失败的问题。
 82 | 
 83 | ##1.20.12
 84 |   1. 导出opml时将url转义。
 85 | 
 86 | ##1.20.11
 87 |   1. "导入订阅列表"功能支持OPML元素Outline嵌套。
 88 | 
 89 | ##1.20.10
 90 |   1. bugfix: 修正有部分文章无法提取正文导致推送失败的问题。
 91 | 
 92 | ##1.20.9
 93 |   1. 增加从opml文件导入订阅列表的功能。
 94 |   2. 网友seff增加特性：可选部分书籍单独推送。
 95 | 
 96 | ##1.20.8
 97 |   1. 网友mcfloundinho增加《共识网》
 98 | 
 99 | ##1.20.7
100 |   1. bugfix:修正trigger@appid.appspotmail.com触发投递失败的问题。
101 | 
102 | ##1.20.6
103 |   1. bugfix:继续修改urlopener处理cookie的一个bug.
104 | 
105 | ##1.20.5
106 |   1. 增加网友mcfloundinho提供的《南方周末》。
107 |   2. bugfix:修改urlopener处理cookie的一个bug.
108 | 
109 | ##1.20.4
110 |   1. 更新土耳其语翻译。
111 | 
112 | ##1.20.3
113 |   1. 修改内置书籍TED渤海湾以适应其网站改版。
114 | 
115 | ##1.20.2
116 |   1. 针对使用图片延迟加载技术的网页特殊处理，可以获取部分此类网页的图片。
117 | 
118 | ##1.20.1
119 |   1. 新特性，在合并推送时将各书籍的封面拼贴起来。默认已经开启，如果你使用以前的config.py，请设置DEFAULT_COVER_BV=None，如果不喜欢此效果，可以设置DEFAULT_COVER_BV='cv_bound.jpg'
120 |   2. bugfix: 修正保存到evernote不成功的问题（1.13引入）
121 |   
122 | ##1.20
123 |   1. 增加一个简单的正文提取模块，在readability失败后启用。
124 |   2. 增强的网页解码器，综合考虑http响应头/html文件头/chardet检测结果，效率更高，乱码更少。
125 |   3. 支持需要登陆才能查看文章的网站，请参照FAQ如何使用。
126 |   4. 针对一天推送多次的需求，书籍属性‘oldest_article’大于365则使用*秒*为单位。
127 |   5. 增强的密码安全，加salt然后md5，无法通过密码词典破解，在可接受的代价范围内无法暴力破解。
128 |     （仅新账号启用，如果需要可以删掉admin然后重新登陆就会新建admin账号）
129 |   6. 整理文件夹结构，将相关库都放到lib目录下。
130 |   7. 其他一些小的可用性增强。
131 |   > 升级注意:书籍的fetcharticle()增加了一个参数，如果你定制的书籍使用到此接口，需要修改。
132 |   


--------------------------------------------------------------------------------
/lib/calibre/ebooks/conversion/config.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | # vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
  3 | from __future__ import with_statement
  4 | 
  5 | __license__   = 'GPL v3'
  6 | __copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
  7 | __docformat__ = 'restructuredtext en'
  8 | 
  9 | import os
 10 | 
 11 | from calibre.utils.config import config_dir
 12 | from calibre.utils.lock import ExclusiveFile
 13 | from calibre import sanitize_file_name
 14 | from calibre.customize.conversion import OptionRecommendation
 15 | 
 16 | 
 17 | config_dir = os.path.join(config_dir, 'conversion')
 18 | if not os.path.exists(config_dir):
 19 |     os.makedirs(config_dir)
 20 | 
 21 | def name_to_path(name):
 22 |     return os.path.join(config_dir, sanitize_file_name(name)+'.py')
 23 | 
 24 | def save_defaults(name, recs):
 25 |     path = name_to_path(name)
 26 |     raw = str(recs)
 27 |     with open(path, 'wb'):
 28 |         pass
 29 |     with ExclusiveFile(path) as f:
 30 |         f.write(raw)
 31 | 
 32 | def load_defaults(name):
 33 |     path = name_to_path(name)
 34 |     if not os.path.exists(path):
 35 |         open(path, 'wb').close()
 36 |     with ExclusiveFile(path) as f:
 37 |         raw = f.read()
 38 |     r = GuiRecommendations()
 39 |     if raw:
 40 |         r.from_string(raw)
 41 |     return r
 42 | 
 43 | def save_specifics(db, book_id, recs):
 44 |     raw = str(recs)
 45 |     db.set_conversion_options(book_id, 'PIPE', raw)
 46 | 
 47 | def load_specifics(db, book_id):
 48 |     raw = db.conversion_options(book_id, 'PIPE')
 49 |     r = GuiRecommendations()
 50 |     if raw:
 51 |         r.from_string(raw)
 52 |     return r
 53 | 
 54 | def delete_specifics(db, book_id):
 55 |     db.delete_conversion_options(book_id, 'PIPE')
 56 | 
 57 | class GuiRecommendations(dict):
 58 | 
 59 |     def __new__(cls, *args):
 60 |         dict.__new__(cls)
 61 |         obj = super(GuiRecommendations, cls).__new__(cls, *args)
 62 |         obj.disabled_options = set([])
 63 |         return obj
 64 | 
 65 |     def to_recommendations(self, level=OptionRecommendation.LOW):
 66 |         ans = []
 67 |         for key, val in self.items():
 68 |             ans.append((key, val, level))
 69 |         return ans
 70 | 
 71 |     def __str__(self):
 72 |         ans = ['{']
 73 |         for key, val in self.items():
 74 |             ans.append('\t'+repr(key)+' : '+repr(val)+',')
 75 |         ans.append('}')
 76 |         return '\n'.join(ans)
 77 | 
 78 |     def from_string(self, raw):
 79 |         try:
 80 |             d = eval(raw)
 81 |         except SyntaxError:
 82 |             d = None
 83 |         if d:
 84 |             self.update(d)
 85 | 
 86 |     def merge_recommendations(self, get_option, level, options,
 87 |             only_existing=False):
 88 |         for name in options:
 89 |             if only_existing and name not in self:
 90 |                 continue
 91 |             opt = get_option(name)
 92 |             if opt is None: continue
 93 |             if opt.level == OptionRecommendation.HIGH:
 94 |                 self[name] = opt.recommended_value
 95 |                 self.disabled_options.add(name)
 96 |             elif opt.level > level or name not in self:
 97 |                 self[name] = opt.recommended_value
 98 | 
 99 | 
100 | 


--------------------------------------------------------------------------------
/lib/cssutils/scripts/csscombine.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | """Combine all sheets referred to a given CSS *proxy* sheet
 3 | into a single new sheet.
 4 | 
 5 | - no ``url()`` values are adjusted so currently when using relative references
 6 |   for e.g. images it is best to have all sheets in a single folder 
 7 | - in @import rules only relative paths do work for now but should be used
 8 |   anyway
 9 | - messages are send to stderr
10 | - output to stdout.
11 | 
12 | Example::
13 | 
14 |     csscombine sheets\csscombine-proxy.css -m -t ascii -s utf-8
15 |         1>combined.css 2>log.txt
16 | 
17 | results in log.txt::
18 | 
19 |     COMBINING sheets/csscombine-proxy.css
20 |     USING SOURCE ENCODING: css
21 |     * PROCESSING @import sheets\csscombine-1.css
22 |     * PROCESSING @import sheets\csscombine-2.css
23 |     INFO    Nested @imports are not combined: @import "1.css";
24 |     SETTING TARGET ENCODING: ascii
25 | 
26 | and combined.css::
27 | 
28 |     @charset "ascii";@import"1.css";@namespaces2"uri";s2|sheet-1{top:1px}s2|sheet-2{top:2px}proxy{top:3px}
29 | 
30 | or without option -m::
31 | 
32 |     @charset "ascii";
33 |     @import "1.css";
34 |     @namespace s2 "uri";
35 |     @namespace other "other";
36 |     /* proxy sheet were imported sheets should be combined */
37 |     /* non-ascii chars: \F6 \E4 \FC  */
38 |     /* @import "csscombine-1.css"; */
39 |     /* combined sheet 1 */
40 |     s2|sheet-1 {
41 |         top: 1px
42 |         }
43 |     /* @import url(csscombine-2.css); */
44 |     /* combined sheet 2 */
45 |     s2|sheet-2 {
46 |         top: 2px
47 |         }
48 |     proxy {
49 |         top: 3px
50 |         }
51 | 
52 | """
53 | __all__ = ['csscombine']
54 | __docformat__ = 'restructuredtext'
55 | __version__ = '$Id$'
56 | 
57 | from cssutils.script import csscombine
58 | import optparse
59 | import sys
60 | 
61 | def main(args=None):
62 |     usage = "usage: %prog [options] [path]"
63 |     parser = optparse.OptionParser(usage=usage)
64 |     parser.add_option('-u', '--url', action='store',
65 |         dest='url', 
66 |         help='URL to parse (path is ignored if URL given)')
67 |     parser.add_option('-s', '--sourceencoding', action='store',
68 |         dest='sourceencoding', 
69 |         help='encoding of input, defaulting to "css". If given overwrites other encoding information like @charset declarations')
70 |     parser.add_option('-t', '--targetencoding', action='store',
71 |         dest='targetencoding',
72 |         help='encoding of output, defaulting to "UTF-8"', default='utf-8')
73 |     parser.add_option('-m', '--minify', action='store_true', dest='minify',
74 |         default=False,
75 |         help='saves minified version of combined files, defaults to False')
76 |     options, path = parser.parse_args()
77 | 
78 |     if options.url:
79 |         print csscombine(url=options.url,
80 |                          sourceencoding=options.sourceencoding, 
81 |                          targetencoding=options.targetencoding,
82 |                          minify=options.minify)
83 |     elif path:
84 |         print csscombine(path=path[0],
85 |                          sourceencoding=options.sourceencoding, 
86 |                          targetencoding=options.targetencoding,
87 |                          minify=options.minify)
88 |     else:
89 |         parser.error('no path or URL (-u) given')
90 | 
91 | 
92 | 
93 | if __name__ == '__main__':
94 |     sys.exit(main())


--------------------------------------------------------------------------------
/lib/calibre/ebooks/compression/palmdoc.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env  python
 2 | # vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
 3 | 
 4 | __license__   = 'GPL v3'
 5 | __copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
 6 | 
 7 | from cStringIO import StringIO
 8 | from struct import pack
 9 | 
10 | #from calibre.constants import plugins
11 | #cPalmdoc = plugins['cPalmdoc'][0]
12 | #if not cPalmdoc:
13 | #    raise RuntimeError(('Failed to load required cPalmdoc module: '
14 | #            '%s')%plugins['cPalmdoc'][1])
15 | 
16 | #def decompress_doc(data):
17 | #    return cPalmdoc.decompress(data)
18 | 
19 | #GAE dont support cmodule, using py module, it will be very very slow!!!
20 | def compress_doc(data):
21 |     if not data:
22 |         return u''
23 |     #return cPalmdoc.compress(data)
24 |     return py_compress_doc(data) 
25 | 
26 | #def test():
27 | #    TESTS = [
28 | #            'abc\x03\x04\x05\x06ms', # Test binary writing
29 | #            'a b c \xfed ', # Test encoding of spaces
30 | #            '0123456789axyz2bxyz2cdfgfo9iuyerh',
31 | #            '0123456789asd0123456789asd|yyzzxxffhhjjkk',
32 | #            ('ciewacnaq eiu743 r787q 0w%  ; sa fd\xef\ffdxosac wocjp acoiecowei '
33 | #            'owaic jociowapjcivcjpoivjporeivjpoavca; p9aw8743y6r74%$^$^%8 ')
34 | #            ]
35 | #    for test in TESTS:
36 | #        print 'Test:', repr(test)
37 | #        print '\tTesting compression...'
38 | #        good = py_compress_doc(test)
39 | #        x = compress_doc(test)
40 | #        print '\t\tgood:',  repr(good)
41 | #        print '\t\tx   :',  repr(x)
42 | #        assert x == good
43 | #        print '\tTesting decompression...'
44 | #        print '\t\t', repr(decompress_doc(x))
45 | #        assert decompress_doc(x) == test
46 | #        print
47 | 
48 | def py_compress_doc(data):
49 |     out = StringIO()
50 |     i = 0
51 |     ldata = len(data)
52 |     while i < ldata:
53 |         if i > 10 and (ldata - i) > 10:
54 |             chunk = ''
55 |             match = -1
56 |             for j in xrange(10, 2, -1):
57 |                 chunk = data[i:i+j]
58 |                 try:
59 |                     match = data.rindex(chunk, 0, i)
60 |                 except ValueError:
61 |                     continue
62 |                 if (i - match) <= 2047:
63 |                     break
64 |                 match = -1
65 |             if match >= 0:
66 |                 n = len(chunk)
67 |                 m = i - match
68 |                 code = 0x8000 + ((m << 3) & 0x3ff8) + (n - 3)
69 |                 out.write(pack('>H', code))
70 |                 i += n
71 |                 continue
72 |         ch = data[i]
73 |         och = ord(ch)
74 |         i += 1
75 |         if ch == ' ' and (i + 1) < ldata:
76 |             onch = ord(data[i])
77 |             if onch >= 0x40 and onch < 0x80:
78 |                 out.write(pack('>B', onch ^ 0x80))
79 |                 i += 1
80 |                 continue
81 |         if och == 0 or (och > 8 and och < 0x80):
82 |             out.write(ch)
83 |         else:
84 |             j = i
85 |             binseq = [ch]
86 |             while j < ldata and len(binseq) < 8:
87 |                 ch = data[j]
88 |                 och = ord(ch)
89 |                 if och == 0 or (och > 8 and och < 0x80):
90 |                     break
91 |                 binseq.append(ch)
92 |                 j += 1
93 |             out.write(pack('>B', len(binseq)))
94 |             out.write(''.join(binseq))
95 |             i += len(binseq) - 1
96 |     return out.getvalue()
97 | 
98 | 


--------------------------------------------------------------------------------
/lib/calibre/ebooks/unihandecode/pykakasi/kakasi.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | #  kakasi.py
 3 | #
 4 | # Copyright 2011 Hiroshi Miura <miurahr@linux.com>
 5 | #
 6 | #  Original Copyright:
 7 | # * KAKASI (Kanji Kana Simple inversion program)
 8 | # * $Id: jj2.c,v 1.7 2001-04-12 05:57:34 rug Exp $
 9 | # * Copyright (C) 1992
10 | # * Hironobu Takahashi (takahasi@tiny.or.jp)
11 | # *
12 | # * This program is free software; you can redistribute it and/or modify
13 | # * it under the terms of the GNU General Public License as published by
14 | # * the Free Software Foundation; either versions 2, or (at your option)
15 | # * any later version.
16 | # *
17 | # * This program is distributed in the hope that it will be useful
18 | # * but WITHOUT ANY WARRANTY; without even the implied warranty of
19 | # * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
20 | # * GNU General Public License for more details.
21 | # *
22 | # */
23 | 
24 | from calibre.ebooks.unihandecode.pykakasi.j2h import J2H
25 | from calibre.ebooks.unihandecode.pykakasi.h2a import H2a
26 | from calibre.ebooks.unihandecode.pykakasi.k2a import K2a
27 | 
28 | class kakasi(object):
29 | 
30 |     j2h = None
31 |     h2a = None
32 |     k2a = None
33 | 
34 |     def __init__(self):
35 |         self.j2h = J2H()
36 |         self.h2a = H2a()
37 |         self.k2a = K2a()
38 | 
39 | 
40 |     def do(self, text):
41 |         otext =  ''
42 |         i = 0
43 |         while True:
44 |             if i >= len(text):
45 |                 break
46 | 
47 |             if self.j2h.isKanji(text[i]):
48 |                 (t, l) = self.j2h.convert(text[i:])
49 |                 if l <= 0:
50 |                     otext  = otext + text[i]
51 |                     i = i + 1
52 |                     continue
53 |                 i = i + l
54 |                 m = 0
55 |                 tmptext = ""
56 |                 while True:
57 |                     if m >= len(t):
58 |                         break
59 |                     (s, n) = self.h2a.convert(t[m:])
60 |                     if n <= 0:
61 |                         break
62 |                     m = m + n
63 |                     tmptext = tmptext+s
64 |                 if i >= len(text):
65 |                     otext = otext + tmptext.capitalize()
66 |                 else:
67 |                     otext = otext + tmptext.capitalize() +' '
68 |             elif self.h2a.isHiragana(text[i]):
69 |                 tmptext = ''
70 |                 while True:
71 |                     (t, l) = self.h2a.convert(text[i:])
72 |                     tmptext = tmptext+t
73 |                     i = i + l
74 |                     if i >= len(text):
75 |                         otext = otext + tmptext
76 |                         break
77 |                     elif not self.h2a.isHiragana(text[i]):
78 |                         otext = otext + tmptext + ' '
79 |                         break
80 |             elif self.k2a.isKatakana(text[i]):
81 |                 tmptext = ''
82 |                 while True:
83 |                     (t, l) = self.k2a.convert(text[i:])
84 |                     tmptext = tmptext+t
85 |                     i = i + l
86 |                     if i >= len(text):
87 |                         otext = otext + tmptext
88 |                         break
89 |                     elif not self.k2a.isKatakana(text[i]):
90 |                         otext = otext + tmptext + ' '
91 |                         break
92 |             else:
93 |                 otext  = otext + text[i]
94 |                 i += 1
95 | 
96 |         return otext
97 | 
98 | 


--------------------------------------------------------------------------------
/lib/calibre/utils/titlecase.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | # -*- coding: utf-8 -*-
  3 | 
  4 | """
  5 | Original Perl version by: John Gruber http://daringfireball.net/ 10 May 2008
  6 | Python version by Stuart Colville http://muffinresearch.co.uk
  7 | License: http://www.opensource.org/licenses/mit-license.php
  8 | """
  9 | 
 10 | import re
 11 | 
 12 | from calibre.utils.icu import capitalize
 13 | 
 14 | __all__ = ['titlecase']
 15 | __version__ = '0.5'
 16 | 
 17 | SMALL = 'a|an|and|as|at|but|by|en|for|if|in|of|on|or|the|to|v\.?|via|vs\.?'
 18 | PUNCT = r"""!"#$%&'‘()*+,\-‒–—―./:;?@[\\\]_`{|}~"""
 19 | 
 20 | SMALL_WORDS = re.compile(r'^(%s)$' % SMALL, re.I)
 21 | INLINE_PERIOD = re.compile(r'[a-z][.][a-z]', re.I)
 22 | UC_ELSEWHERE = re.compile(r'[%s]*?[a-zA-Z]+[A-Z]+?' % PUNCT)
 23 | CAPFIRST = re.compile(r"^[%s]*?([A-Za-z])" % PUNCT)
 24 | SMALL_FIRST = re.compile(r'^([%s]*)(%s)\b' % (PUNCT, SMALL), re.I)
 25 | SMALL_LAST = re.compile(r'\b(%s)[%s]?$' % (SMALL, PUNCT), re.I)
 26 | SMALL_AFTER_NUM = re.compile(r'(\d+\s+)(a|an|the)\b', re.I)
 27 | SUBPHRASE = re.compile(r'([:.;?!][ ])(%s)' % SMALL)
 28 | APOS_SECOND = re.compile(r"^[dol]{1}['‘]{1}[a-z]+$", re.I)
 29 | ALL_CAPS = re.compile(r'^[A-Z0-9\s%s]+$' % PUNCT)
 30 | UC_INITIALS = re.compile(r"^(?:[A-Z]{1}\.{1}|[A-Z]{1}\.{1}[A-Z]{1})+$")
 31 | 
 32 | _lang = None
 33 | 
 34 | def lang():
 35 |     global _lang
 36 |     if _lang is None:
 37 |         from calibre.utils.localization import get_lang
 38 |         _lang = get_lang().lower()
 39 |     return _lang
 40 | 
 41 | def titlecase(text):
 42 | 
 43 |     """
 44 |     Titlecases input text
 45 | 
 46 |     This filter changes all words to Title Caps, and attempts to be clever
 47 |     about *un*capitalizing SMALL words like a/an/the in the input.
 48 | 
 49 |     The list of "SMALL words" which are not capped comes from
 50 |     the New York Times Manual of Style, plus 'vs' and 'v'.
 51 | 
 52 |     """
 53 | 
 54 |     all_caps = ALL_CAPS.match(text)
 55 | 
 56 |     words = re.split('\s+', text)
 57 |     line = []
 58 |     for word in words:
 59 |         if all_caps:
 60 |             if UC_INITIALS.match(word):
 61 |                 line.append(word)
 62 |                 continue
 63 |             else:
 64 |                 word = icu_lower(word)
 65 | 
 66 |         if APOS_SECOND.match(word):
 67 |             word = word.replace(word[0], icu_upper(word[0]), 1)
 68 |             word = word[:2] + icu_upper(word[2]) + word[3:]
 69 |             line.append(word)
 70 |             continue
 71 |         if INLINE_PERIOD.search(word) or UC_ELSEWHERE.match(word):
 72 |             line.append(word)
 73 |             continue
 74 |         if SMALL_WORDS.match(word):
 75 |             line.append(icu_lower(word))
 76 |             continue
 77 | 
 78 |         hyphenated = []
 79 |         for item in word.split('-'):
 80 |             hyphenated.append(CAPFIRST.sub(lambda m: icu_upper(m.group(0)), item))
 81 |         line.append("-".join(hyphenated))
 82 | 
 83 | 
 84 |     result = " ".join(line)
 85 | 
 86 |     result = SMALL_FIRST.sub(lambda m: '%s%s' % (
 87 |         m.group(1),
 88 |         capitalize(m.group(2))
 89 |     ), result)
 90 | 
 91 |     result = SMALL_AFTER_NUM.sub(lambda m: '%s%s' % (m.group(1),
 92 |         capitalize(m.group(2))
 93 |     ), result)
 94 | 
 95 |     result = SMALL_LAST.sub(lambda m: capitalize(m.group(0)), result)
 96 | 
 97 |     result = SUBPHRASE.sub(lambda m: '%s%s' % (
 98 |         m.group(1),
 99 |         capitalize(m.group(2))
100 |     ), result)
101 | 
102 |     return result
103 | 
104 | 


--------------------------------------------------------------------------------
/lib/chardet/escprober.py:
--------------------------------------------------------------------------------
 1 | ######################## BEGIN LICENSE BLOCK ########################
 2 | # The Original Code is mozilla.org code.
 3 | #
 4 | # The Initial Developer of the Original Code is
 5 | # Netscape Communications Corporation.
 6 | # Portions created by the Initial Developer are Copyright (C) 1998
 7 | # the Initial Developer. All Rights Reserved.
 8 | #
 9 | # Contributor(s):
10 | #   Mark Pilgrim - port to Python
11 | #
12 | # This library is free software; you can redistribute it and/or
13 | # modify it under the terms of the GNU Lesser General Public
14 | # License as published by the Free Software Foundation; either
15 | # version 2.1 of the License, or (at your option) any later version.
16 | #
17 | # This library is distributed in the hope that it will be useful,
18 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
19 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
20 | # Lesser General Public License for more details.
21 | #
22 | # You should have received a copy of the GNU Lesser General Public
23 | # License along with this library; if not, write to the Free Software
24 | # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
25 | # 02110-1301  USA
26 | ######################### END LICENSE BLOCK #########################
27 | 
28 | from . import constants
29 | from .escsm import (HZSMModel, ISO2022CNSMModel, ISO2022JPSMModel,
30 |                     ISO2022KRSMModel)
31 | from .charsetprober import CharSetProber
32 | from .codingstatemachine import CodingStateMachine
33 | from .compat import wrap_ord
34 | 
35 | 
36 | class EscCharSetProber(CharSetProber):
37 |     def __init__(self):
38 |         CharSetProber.__init__(self)
39 |         self._mCodingSM = [
40 |             CodingStateMachine(HZSMModel),
41 |             CodingStateMachine(ISO2022CNSMModel),
42 |             CodingStateMachine(ISO2022JPSMModel),
43 |             CodingStateMachine(ISO2022KRSMModel)
44 |         ]
45 |         self.reset()
46 | 
47 |     def reset(self):
48 |         CharSetProber.reset(self)
49 |         for codingSM in self._mCodingSM:
50 |             if not codingSM:
51 |                 continue
52 |             codingSM.active = True
53 |             codingSM.reset()
54 |         self._mActiveSM = len(self._mCodingSM)
55 |         self._mDetectedCharset = None
56 | 
57 |     def get_charset_name(self):
58 |         return self._mDetectedCharset
59 | 
60 |     def get_confidence(self):
61 |         if self._mDetectedCharset:
62 |             return 0.99
63 |         else:
64 |             return 0.00
65 | 
66 |     def feed(self, aBuf):
67 |         for c in aBuf:
68 |             # PY3K: aBuf is a byte array, so c is an int, not a byte
69 |             for codingSM in self._mCodingSM:
70 |                 if not codingSM:
71 |                     continue
72 |                 if not codingSM.active:
73 |                     continue
74 |                 codingState = codingSM.next_state(wrap_ord(c))
75 |                 if codingState == constants.eError:
76 |                     codingSM.active = False
77 |                     self._mActiveSM -= 1
78 |                     if self._mActiveSM <= 0:
79 |                         self._mState = constants.eNotMe
80 |                         return self.get_state()
81 |                 elif codingState == constants.eItsMe:
82 |                     self._mState = constants.eFoundIt
83 |                     self._mDetectedCharset = codingSM.get_coding_state_machine()  # nopep8
84 |                     return self.get_state()
85 | 
86 |         return self.get_state()
87 | 


--------------------------------------------------------------------------------
/lib/chardet/sbcsgroupprober.py:
--------------------------------------------------------------------------------
 1 | ######################## BEGIN LICENSE BLOCK ########################
 2 | # The Original Code is Mozilla Universal charset detector code.
 3 | #
 4 | # The Initial Developer of the Original Code is
 5 | # Netscape Communications Corporation.
 6 | # Portions created by the Initial Developer are Copyright (C) 2001
 7 | # the Initial Developer. All Rights Reserved.
 8 | #
 9 | # Contributor(s):
10 | #   Mark Pilgrim - port to Python
11 | #   Shy Shalom - original C code
12 | #
13 | # This library is free software; you can redistribute it and/or
14 | # modify it under the terms of the GNU Lesser General Public
15 | # License as published by the Free Software Foundation; either
16 | # version 2.1 of the License, or (at your option) any later version.
17 | #
18 | # This library is distributed in the hope that it will be useful,
19 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
20 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
21 | # Lesser General Public License for more details.
22 | #
23 | # You should have received a copy of the GNU Lesser General Public
24 | # License along with this library; if not, write to the Free Software
25 | # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
26 | # 02110-1301  USA
27 | ######################### END LICENSE BLOCK #########################
28 | 
29 | from .charsetgroupprober import CharSetGroupProber
30 | from .sbcharsetprober import SingleByteCharSetProber
31 | from .langcyrillicmodel import (Win1251CyrillicModel, Koi8rModel,
32 |                                 Latin5CyrillicModel, MacCyrillicModel,
33 |                                 Ibm866Model, Ibm855Model)
34 | from .langgreekmodel import Latin7GreekModel, Win1253GreekModel
35 | from .langbulgarianmodel import Latin5BulgarianModel, Win1251BulgarianModel
36 | from .langhungarianmodel import Latin2HungarianModel, Win1250HungarianModel
37 | from .langthaimodel import TIS620ThaiModel
38 | from .langhebrewmodel import Win1255HebrewModel
39 | from .hebrewprober import HebrewProber
40 | 
41 | 
42 | class SBCSGroupProber(CharSetGroupProber):
43 |     def __init__(self):
44 |         CharSetGroupProber.__init__(self)
45 |         self._mProbers = [
46 |             SingleByteCharSetProber(Win1251CyrillicModel),
47 |             SingleByteCharSetProber(Koi8rModel),
48 |             SingleByteCharSetProber(Latin5CyrillicModel),
49 |             SingleByteCharSetProber(MacCyrillicModel),
50 |             SingleByteCharSetProber(Ibm866Model),
51 |             SingleByteCharSetProber(Ibm855Model),
52 |             SingleByteCharSetProber(Latin7GreekModel),
53 |             SingleByteCharSetProber(Win1253GreekModel),
54 |             SingleByteCharSetProber(Latin5BulgarianModel),
55 |             SingleByteCharSetProber(Win1251BulgarianModel),
56 |             SingleByteCharSetProber(Latin2HungarianModel),
57 |             SingleByteCharSetProber(Win1250HungarianModel),
58 |             SingleByteCharSetProber(TIS620ThaiModel),
59 |         ]
60 |         hebrewProber = HebrewProber()
61 |         logicalHebrewProber = SingleByteCharSetProber(Win1255HebrewModel,
62 |                                                       False, hebrewProber)
63 |         visualHebrewProber = SingleByteCharSetProber(Win1255HebrewModel, True,
64 |                                                      hebrewProber)
65 |         hebrewProber.set_model_probers(logicalHebrewProber, visualHebrewProber)
66 |         self._mProbers.extend([hebrewProber, logicalHebrewProber,
67 |                                visualHebrewProber])
68 | 
69 |         self.reset()
70 | 


--------------------------------------------------------------------------------