├── bs4 ├── tests │ ├── __init__.py │ ├── test_htmlparser.py │ ├── test_docs.py │ ├── test_html5lib.py │ ├── test_lxml.py │ ├── test_builder_registry.py │ └── test_soup.py ├── diagnose.py ├── builder │ ├── _lxml.py │ ├── _htmlparser.py │ ├── _html5lib.py │ └── __init__.py ├── __init__.py ├── testing.py └── dammit.py ├── .gitignore ├── CHANGELOG ├── getcookie.user.coffee ├── getcookie.user.js ├── LICENSE ├── README.md └── tenkou.py /bs4/tests/__init__.py: -------------------------------------------------------------------------------- 1 | "The beautifulsoup tests." 2 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *.html 2 | *.css 3 | *.htm 4 | *.txt 5 | *auth* -------------------------------------------------------------------------------- /CHANGELOG: -------------------------------------------------------------------------------- 1 | v0.0.2 2 | + 从本地文件批量导入至bangumi(恢复功能) 3 | 4 | v0.0.1 5 | + 导出bangumi条目到本地 6 | + 批量删除条目 -------------------------------------------------------------------------------- /bs4/tests/test_htmlparser.py: -------------------------------------------------------------------------------- 1 | """Tests to ensure that the html.parser tree builder generates good 2 | trees.""" 3 | 4 | from bs4.testing import SoupTest, HTMLTreeBuilderSmokeTest 5 | from bs4.builder import HTMLParserTreeBuilder 6 | 7 | class HTMLParserTreeBuilderSmokeTest(SoupTest, HTMLTreeBuilderSmokeTest): 8 | 9 | @property 10 | def default_builder(self): 11 | return HTMLParserTreeBuilder() 12 | 13 | def test_namespaced_system_doctype(self): 14 | # html.parser can't handle namespaced doctypes, so skip this one. 15 | pass 16 | 17 | def test_namespaced_public_doctype(self): 18 | # html.parser can't handle namespaced doctypes, so skip this one. 19 | pass 20 | -------------------------------------------------------------------------------- /getcookie.user.coffee: -------------------------------------------------------------------------------- 1 | ### 2 | // ==UserScript== 3 | // @name getbgmcookie 4 | // @namespace https://github.com/hentaiPanda 5 | // @author niR 6 | // @version 0.0.1 7 | // @license MIT License 8 | // @encoding utf-8 9 | // @grant GM_setClipboard 10 | // @grant GM_registerMenuCommand 11 | // @include http://bangumi.tv/* 12 | // @include http://bgm.tv/* 13 | // @include http://chii.in/* 14 | // ==/UserScript== 15 | ### 16 | 17 | 18 | show = -> 19 | # alert(document.cookie) 20 | cks = document.cookie.split(';') 21 | for i in cks 22 | i = i.trim() 23 | if i.indexOf('chii_auth') is 0 24 | auth = i.split('=')[1] 25 | break 26 | ua = navigator.userAgent 27 | data = ua + '\n' + auth 28 | alert(data) 29 | console.log(data) 30 | GM_setClipboard(data) 31 | alert('已复制到剪贴板') 32 | 33 | 34 | GM_registerMenuCommand('显示UA和AUTH', show) -------------------------------------------------------------------------------- /getcookie.user.js: -------------------------------------------------------------------------------- 1 | /* 2 | // ==UserScript== 3 | // @name getbgmcookie 4 | // @namespace https://github.com/hentaiPanda 5 | // @author niR 6 | // @version 0.0.1 7 | // @license MIT License 8 | // @encoding utf-8 9 | // @grant GM_setClipboard 10 | // @grant GM_registerMenuCommand 11 | // @include http://bangumi.tv/* 12 | // @include http://bgm.tv/* 13 | // @include http://chii.in/* 14 | // ==/UserScript== 15 | */ 16 | var show; 17 | 18 | show = function() { 19 | var auth, cks, data, i, ua, _i, _len; 20 | cks = document.cookie.split(';'); 21 | for (_i = 0, _len = cks.length; _i < _len; _i++) { 22 | i = cks[_i]; 23 | i = i.trim(); 24 | if (i.indexOf('chii_auth') === 0) { 25 | auth = i.split('=')[1]; 26 | break; 27 | } 28 | } 29 | ua = navigator.userAgent; 30 | data = ua + '\n' + auth; 31 | alert(data); 32 | console.log(data); 33 | GM_setClipboard(data); 34 | return alert('已复制到剪贴板'); 35 | }; 36 | 37 | GM_registerMenuCommand('显示UA和AUTH', show); -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) 2014 niR 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. -------------------------------------------------------------------------------- /bs4/tests/test_docs.py: -------------------------------------------------------------------------------- 1 | "Test harness for doctests." 2 | 3 | # pylint: disable-msg=E0611,W0142 4 | 5 | __metaclass__ = type 6 | __all__ = [ 7 | 'additional_tests', 8 | ] 9 | 10 | import atexit 11 | import doctest 12 | import os 13 | #from pkg_resources import ( 14 | # resource_filename, resource_exists, resource_listdir, cleanup_resources) 15 | import unittest 16 | 17 | DOCTEST_FLAGS = ( 18 | doctest.ELLIPSIS | 19 | doctest.NORMALIZE_WHITESPACE | 20 | doctest.REPORT_NDIFF) 21 | 22 | 23 | # def additional_tests(): 24 | # "Run the doc tests (README.txt and docs/*, if any exist)" 25 | # doctest_files = [ 26 | # os.path.abspath(resource_filename('bs4', 'README.txt'))] 27 | # if resource_exists('bs4', 'docs'): 28 | # for name in resource_listdir('bs4', 'docs'): 29 | # if name.endswith('.txt'): 30 | # doctest_files.append( 31 | # os.path.abspath( 32 | # resource_filename('bs4', 'docs/%s' % name))) 33 | # kwargs = dict(module_relative=False, optionflags=DOCTEST_FLAGS) 34 | # atexit.register(cleanup_resources) 35 | # return unittest.TestSuite(( 36 | # doctest.DocFileSuite(*doctest_files, **kwargs))) 37 | -------------------------------------------------------------------------------- /bs4/tests/test_html5lib.py: -------------------------------------------------------------------------------- 1 | """Tests to ensure that the html5lib tree builder generates good trees.""" 2 | 3 | import warnings 4 | 5 | try: 6 | from bs4.builder import HTML5TreeBuilder 7 | HTML5LIB_PRESENT = True 8 | except ImportError as e: 9 | HTML5LIB_PRESENT = False 10 | from bs4.element import SoupStrainer 11 | from bs4.testing import ( 12 | HTML5TreeBuilderSmokeTest, 13 | SoupTest, 14 | skipIf, 15 | ) 16 | 17 | @skipIf( 18 | not HTML5LIB_PRESENT, 19 | "html5lib seems not to be present, not testing its tree builder.") 20 | class HTML5LibBuilderSmokeTest(SoupTest, HTML5TreeBuilderSmokeTest): 21 | """See ``HTML5TreeBuilderSmokeTest``.""" 22 | 23 | @property 24 | def default_builder(self): 25 | return HTML5TreeBuilder() 26 | 27 | def test_soupstrainer(self): 28 | # The html5lib tree builder does not support SoupStrainers. 29 | strainer = SoupStrainer("b") 30 | markup = "
A bold statement.
" 31 | with warnings.catch_warnings(record=True) as w: 32 | soup = self.soup(markup, parse_only=strainer) 33 | self.assertEqual( 34 | soup.decode(), self.document_for(markup)) 35 | 36 | self.assertTrue( 37 | "the html5lib tree builder doesn't support parse_only" in 38 | str(w[0].message)) 39 | 40 | def test_correctly_nested_tables(self): 41 | """html5lib inserts tags where other parsers don't.""" 42 | markup = ('Here's another table:"
45 | '
| ')
48 |
49 | self.assertSoupEquals(
50 | markup,
51 | '
Here\'s another table:'
52 | '
|
| Foo |
| Bar |
| Baz |
foo
68 | 69 | ''' 70 | soup = self.soup(markup) 71 | # Verify that we can reach thetag; this means the tree is connected. 72 | self.assertEqual(b"
foo
", soup.p.encode()) 73 | 74 | def test_reparented_markup(self): 75 | markup = 'foo
\n' 76 | soup = self.soup(markup) 77 | self.assertEqual("foo
\n", soup.body.decode()) 78 | self.assertEqual(2, len(soup.find_all('p'))) 79 | 80 | 81 | def test_reparented_markup_ends_with_whitespace(self): 82 | markup = 'foo
\n\n' 83 | soup = self.soup(markup) 84 | self.assertEqual("foo
\n\n", soup.body.decode()) 85 | self.assertEqual(2, len(soup.find_all('p'))) 86 | -------------------------------------------------------------------------------- /bs4/tests/test_lxml.py: -------------------------------------------------------------------------------- 1 | """Tests to ensure that the lxml tree builder generates good trees.""" 2 | 3 | import re 4 | import warnings 5 | 6 | try: 7 | import lxml.etree 8 | LXML_PRESENT = True 9 | LXML_VERSION = lxml.etree.LXML_VERSION 10 | except ImportError as e: 11 | LXML_PRESENT = False 12 | LXML_VERSION = (0,) 13 | 14 | if LXML_PRESENT: 15 | from bs4.builder import LXMLTreeBuilder, LXMLTreeBuilderForXML 16 | 17 | from bs4 import ( 18 | BeautifulSoup, 19 | BeautifulStoneSoup, 20 | ) 21 | from bs4.element import Comment, Doctype, SoupStrainer 22 | from bs4.testing import skipIf 23 | from bs4.tests import test_htmlparser 24 | from bs4.testing import ( 25 | HTMLTreeBuilderSmokeTest, 26 | XMLTreeBuilderSmokeTest, 27 | SoupTest, 28 | skipIf, 29 | ) 30 | 31 | @skipIf( 32 | not LXML_PRESENT, 33 | "lxml seems not to be present, not testing its tree builder.") 34 | class LXMLTreeBuilderSmokeTest(SoupTest, HTMLTreeBuilderSmokeTest): 35 | """See ``HTMLTreeBuilderSmokeTest``.""" 36 | 37 | @property 38 | def default_builder(self): 39 | return LXMLTreeBuilder() 40 | 41 | def test_out_of_range_entity(self): 42 | self.assertSoupEquals( 43 | "foobar
", "foobar
") 44 | self.assertSoupEquals( 45 | "foobar
", "foobar
") 46 | self.assertSoupEquals( 47 | "foobar
", "foobar
") 48 | 49 | # In lxml < 2.3.5, an empty doctype causes a segfault. Skip this 50 | # test if an old version of lxml is installed. 51 | 52 | @skipIf( 53 | not LXML_PRESENT or LXML_VERSION < (2,3,5,0), 54 | "Skipping doctype test for old version of lxml to avoid segfault.") 55 | def test_empty_doctype(self): 56 | soup = self.soup("") 57 | doctype = soup.contents[0] 58 | self.assertEqual("", doctype.strip()) 59 | 60 | def test_beautifulstonesoup_is_xml_parser(self): 61 | # Make sure that the deprecated BSS class uses an xml builder 62 | # if one is installed. 63 | with warnings.catch_warnings(record=True) as w: 64 | soup = BeautifulStoneSoup("") 65 | self.assertEqual("", str(soup.b)) 66 | self.assertTrue("BeautifulStoneSoup class is deprecated" in str(w[0].message)) 67 | 68 | def test_real_xhtml_document(self): 69 | """lxml strips the XML definition from an XHTML doc, which is fine.""" 70 | markup = b""" 71 | 72 | 73 |tag to be 108 | an empty-element tag (it's not in 109 | HTMLBuilder.empty_element_tags). This means an empty
tag 110 | will be presented as "
", not "". 111 | 112 | The default implementation has no opinion about which tags are 113 | empty-element tags, so a tag will be presented as an 114 | empty-element tag if and only if it has no contents. 115 | "foo
' 75 | soup = self.soup(markup) 76 | return doctype, soup 77 | 78 | def test_normal_doctypes(self): 79 | """Make sure normal, everyday HTML doctypes are handled correctly.""" 80 | self.assertDoctypeHandled("html") 81 | self.assertDoctypeHandled( 82 | 'html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"') 83 | 84 | def test_empty_doctype(self): 85 | soup = self.soup("") 86 | doctype = soup.contents[0] 87 | self.assertEqual("", doctype.strip()) 88 | 89 | def test_public_doctype_with_url(self): 90 | doctype = 'html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"' 91 | self.assertDoctypeHandled(doctype) 92 | 93 | def test_system_doctype(self): 94 | self.assertDoctypeHandled('foo SYSTEM "http://www.example.com/"') 95 | 96 | def test_namespaced_system_doctype(self): 97 | # We can handle a namespaced doctype with a system ID. 98 | self.assertDoctypeHandled('xsl:stylesheet SYSTEM "htmlent.dtd"') 99 | 100 | def test_namespaced_public_doctype(self): 101 | # Test a namespaced doctype with a public id. 102 | self.assertDoctypeHandled('xsl:stylesheet PUBLIC "htmlent.dtd"') 103 | 104 | def test_real_xhtml_document(self): 105 | """A real XHTML document should come out more or less the same as it went in.""" 106 | markup = b""" 107 | 108 | 109 |tag is never designated as an empty-element tag. 127 | 128 | Even if the markup shows it as an empty-element tag, it 129 | shouldn't be presented that way. 130 | """ 131 | soup = self.soup("
") 132 | self.assertFalse(soup.p.is_empty_element) 133 | self.assertEqual(str(soup.p), "") 134 | 135 | def test_unclosed_tags_get_closed(self): 136 | """A tag that's not closed by the end of the document should be closed. 137 | 138 | This applies to all tags except empty-element tags. 139 | """ 140 | self.assertSoupEquals("", "
") 141 | self.assertSoupEquals("", "") 142 | 143 | self.assertSoupEquals("foobaz
" 161 | self.assertSoupEquals(markup) 162 | 163 | soup = self.soup(markup) 164 | comment = soup.find(text="foobar") 165 | self.assertEqual(comment.__class__, Comment) 166 | 167 | # The comment is properly integrated into the tree. 168 | foo = soup.find(text="foo") 169 | self.assertEqual(comment, foo.next_element) 170 | baz = soup.find(text="baz") 171 | self.assertEqual(comment, baz.previous_element) 172 | 173 | def test_preserved_whitespace_in_pre_and_textarea(self): 174 | """Whitespace must be preserved inand