├── .gitignore ├── requirements.txt ├── BappModules ├── jsbeautifier │ ├── core │ │ ├── __init__.py │ │ ├── token.py │ │ ├── directives.py │ │ ├── tokenstream.py │ │ ├── whitespacepattern.py │ │ ├── pattern.py │ │ ├── tokenizer.py │ │ ├── inputscanner.py │ │ ├── templatablepattern.py │ │ ├── options.py │ │ └── output.py │ ├── tests │ │ ├── __init__.py │ │ ├── generated │ │ │ └── __init__.py │ │ └── testindentation.py │ ├── __version__.py │ ├── javascript │ │ ├── __init__.py │ │ ├── options.py │ │ └── acorn.py │ └── unpackers │ │ ├── tests │ │ ├── __init__.py │ │ ├── testurlencode.py │ │ ├── testmyobfuscate.py │ │ ├── testjavascriptobfuscator.py │ │ └── testpacker.py │ │ ├── urlencode.py │ │ ├── evalbased.py │ │ ├── javascriptobfuscator.py │ │ ├── __init__.py │ │ ├── myobfuscate.py │ │ └── packer.py ├── bs4 │ ├── tests │ │ ├── __init__.py │ │ ├── test_docs.py │ │ ├── test_htmlparser.py │ │ ├── test_lxml.py │ │ ├── test_builder_registry.py │ │ └── test_html5lib.py │ ├── check_block.py │ ├── formatter.py │ ├── diagnose.py │ └── builder │ │ ├── _lxml.py │ │ └── _htmlparser.py ├── backports │ ├── __init__.py │ └── functools_lru_cache.py ├── editorconfig │ ├── compat.py │ ├── __init__.py │ ├── exceptions.py │ ├── versiontools.py │ ├── main.py │ ├── handler.py │ ├── ini.py │ └── fnmatch.py └── soupsieve │ ├── __init__.py │ ├── util.py │ ├── __meta__.py │ └── css_types.py ├── imgs ├── options.png ├── beautifierTab.png └── messageEditorTab.png ├── BappManifest.bmf ├── BappDescription.html └── README.md /.gitignore: -------------------------------------------------------------------------------- 1 | .idea -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | jsbeautifier 2 | beautifulsoup4 -------------------------------------------------------------------------------- /BappModules/jsbeautifier/core/__init__.py: -------------------------------------------------------------------------------- 1 | # Empty file :) 2 | -------------------------------------------------------------------------------- /BappModules/jsbeautifier/tests/__init__.py: -------------------------------------------------------------------------------- 1 | # Empty file :) 2 | -------------------------------------------------------------------------------- /BappModules/bs4/tests/__init__.py: -------------------------------------------------------------------------------- 1 | "The beautifulsoup tests." 2 | -------------------------------------------------------------------------------- /BappModules/jsbeautifier/__version__.py: -------------------------------------------------------------------------------- 1 | __version__ = '1.10.2' 2 | -------------------------------------------------------------------------------- /BappModules/jsbeautifier/javascript/__init__.py: -------------------------------------------------------------------------------- 1 | # Empty file :) 2 | -------------------------------------------------------------------------------- /BappModules/jsbeautifier/tests/generated/__init__.py: -------------------------------------------------------------------------------- 1 | # Empty file :) 2 | -------------------------------------------------------------------------------- /imgs/options.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PortSwigger/burp-beautifier/master/imgs/options.png -------------------------------------------------------------------------------- /BappModules/backports/__init__.py: -------------------------------------------------------------------------------- 1 | __path__ = __import__('pkgutil').extend_path(__path__, __name__) 2 | -------------------------------------------------------------------------------- /BappModules/jsbeautifier/unpackers/tests/__init__.py: -------------------------------------------------------------------------------- 1 | # Empty file :) 2 | # pylint: disable=C0111 3 | -------------------------------------------------------------------------------- /imgs/beautifierTab.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PortSwigger/burp-beautifier/master/imgs/beautifierTab.png -------------------------------------------------------------------------------- /imgs/messageEditorTab.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PortSwigger/burp-beautifier/master/imgs/messageEditorTab.png -------------------------------------------------------------------------------- /BappModules/bs4/check_block.py: -------------------------------------------------------------------------------- 1 | import requests 2 | data = requests.get("https://www.crummy.com/").content 3 | from bs4 import _s 4 | data = [x for x in _s(data).block_text()] 5 | -------------------------------------------------------------------------------- /BappManifest.bmf: -------------------------------------------------------------------------------- 1 | Uuid: a005a6a8fba34a8893ec649f76a8d5a7 2 | ExtensionType: 2 3 | Name: Burp Beautifier 4 | RepoName: burp-beautifier 5 | ScreenVersion: 1.0 6 | SerialVersion: 2 7 | MinPlatformVersion: 2 8 | ProOnly: False 9 | Author: Ovi3 10 | ShortDescription: BurpBeautifier is a Burpsuite extension for beautifying request/response body, supporting JS, JSON, HTML, XML format, writing in Jython 2.7. 11 | EntryPoint: beautifier.py 12 | BuildCommand: 13 | -------------------------------------------------------------------------------- /BappModules/editorconfig/compat.py: -------------------------------------------------------------------------------- 1 | """EditorConfig Python2/Python3 compatibility utilities""" 2 | import sys 3 | 4 | 5 | __all__ = ['force_unicode', 'u'] 6 | 7 | 8 | if sys.version_info[0] == 2: 9 | text_type = unicode 10 | else: 11 | text_type = str 12 | 13 | 14 | def force_unicode(string): 15 | if not isinstance(string, text_type): 16 | string = text_type(string, encoding='utf-8') 17 | return string 18 | 19 | 20 | if sys.version_info[0] == 2: 21 | import codecs 22 | u = lambda s: codecs.unicode_escape_decode(s)[0] 23 | else: 24 | u = lambda s: s 25 | -------------------------------------------------------------------------------- /BappModules/editorconfig/__init__.py: -------------------------------------------------------------------------------- 1 | """EditorConfig Python Core""" 2 | 3 | from editorconfig.versiontools import join_version 4 | 5 | VERSION = (0, 12, 2, "final") 6 | 7 | __all__ = ['get_properties', 'EditorConfigError', 'exceptions'] 8 | 9 | __version__ = join_version(VERSION) 10 | 11 | 12 | def get_properties(filename): 13 | """Locate and parse EditorConfig files for the given filename""" 14 | handler = EditorConfigHandler(filename) 15 | return handler.get_configurations() 16 | 17 | 18 | from editorconfig.handler import EditorConfigHandler 19 | from editorconfig.exceptions import * 20 | -------------------------------------------------------------------------------- /BappModules/editorconfig/exceptions.py: -------------------------------------------------------------------------------- 1 | """EditorConfig exception classes 2 | 3 | Licensed under Simplified BSD License (see LICENSE.BSD file). 4 | 5 | """ 6 | 7 | 8 | class EditorConfigError(Exception): 9 | """Parent class of all exceptions raised by EditorConfig""" 10 | 11 | 12 | try: 13 | from ConfigParser import ParsingError as _ParsingError 14 | except: 15 | from configparser import ParsingError as _ParsingError 16 | 17 | 18 | class ParsingError(_ParsingError, EditorConfigError): 19 | """Error raised if an EditorConfig file could not be parsed""" 20 | 21 | 22 | class PathError(ValueError, EditorConfigError): 23 | """Error raised if invalid filepath is specified""" 24 | 25 | 26 | class VersionError(ValueError, EditorConfigError): 27 | """Error raised if invalid version number is specified""" 28 | -------------------------------------------------------------------------------- /BappModules/editorconfig/versiontools.py: -------------------------------------------------------------------------------- 1 | """EditorConfig version tools 2 | 3 | Provides ``join_version`` and ``split_version`` classes for converting 4 | __version__ strings to VERSION tuples and vice versa. 5 | 6 | """ 7 | 8 | import re 9 | 10 | 11 | __all__ = ['join_version', 'split_version'] 12 | 13 | 14 | _version_re = re.compile(r'^(\d+)\.(\d+)\.(\d+)(\..*)?$', re.VERBOSE) 15 | 16 | 17 | def join_version(version_tuple): 18 | """Return a string representation of version from given VERSION tuple""" 19 | version = "%s.%s.%s" % version_tuple[:3] 20 | if version_tuple[3] != "final": 21 | version += "-%s" % version_tuple[3] 22 | return version 23 | 24 | 25 | def split_version(version): 26 | """Return VERSION tuple for given string representation of version""" 27 | match = _version_re.search(version) 28 | if not match: 29 | return None 30 | else: 31 | split_version = list(match.groups()) 32 | if split_version[3] is None: 33 | split_version[3] = "final" 34 | split_version = list(map(int, split_version[:3])) + split_version[3:] 35 | return tuple(split_version) 36 | -------------------------------------------------------------------------------- /BappModules/jsbeautifier/unpackers/urlencode.py: -------------------------------------------------------------------------------- 1 | # 2 | # Trivial bookmarklet/escaped script detector for the javascript beautifier 3 | # written by Einar Lielmanis 4 | # rewritten in Python by Stefano Sanfilippo 5 | # 6 | # Will always return valid javascript: if `detect()` is false, `code` is 7 | # returned, unmodified. 8 | # 9 | # usage: 10 | # 11 | # some_string = urlencode.unpack(some_string) 12 | # 13 | 14 | """Bookmarklet/escaped script unpacker.""" 15 | 16 | # Python 2 retrocompatibility 17 | # pylint: disable=F0401 18 | # pylint: disable=E0611 19 | try: 20 | from urllib import unquote_plus 21 | except ImportError: 22 | from urllib.parse import unquote_plus 23 | 24 | PRIORITY = 0 25 | 26 | 27 | def detect(code): 28 | """Detects if a scriptlet is urlencoded.""" 29 | # the fact that script doesn't contain any space, but has %20 instead 30 | # should be sufficient check for now. 31 | return ' ' not in code and ('%20' in code or code.count('%') > 3) 32 | 33 | 34 | def unpack(code): 35 | """URL decode `code` source string.""" 36 | return unquote_plus(code) if detect(code) else code 37 | -------------------------------------------------------------------------------- /BappModules/bs4/tests/test_docs.py: -------------------------------------------------------------------------------- 1 | "Test harness for doctests." 2 | 3 | # pylint: disable-msg=E0611,W0142 4 | 5 | __metaclass__ = type 6 | __all__ = [ 7 | 'additional_tests', 8 | ] 9 | 10 | import atexit 11 | import doctest 12 | import os 13 | #from pkg_resources import ( 14 | # resource_filename, resource_exists, resource_listdir, cleanup_resources) 15 | import unittest 16 | 17 | DOCTEST_FLAGS = ( 18 | doctest.ELLIPSIS | 19 | doctest.NORMALIZE_WHITESPACE | 20 | doctest.REPORT_NDIFF) 21 | 22 | 23 | # def additional_tests(): 24 | # "Run the doc tests (README.txt and docs/*, if any exist)" 25 | # doctest_files = [ 26 | # os.path.abspath(resource_filename('bs4', 'README.txt'))] 27 | # if resource_exists('bs4', 'docs'): 28 | # for name in resource_listdir('bs4', 'docs'): 29 | # if name.endswith('.txt'): 30 | # doctest_files.append( 31 | # os.path.abspath( 32 | # resource_filename('bs4', 'docs/%s' % name))) 33 | # kwargs = dict(module_relative=False, optionflags=DOCTEST_FLAGS) 34 | # atexit.register(cleanup_resources) 35 | # return unittest.TestSuite(( 36 | # doctest.DocFileSuite(*doctest_files, **kwargs))) 37 | -------------------------------------------------------------------------------- /BappModules/jsbeautifier/unpackers/tests/testurlencode.py: -------------------------------------------------------------------------------- 1 | # 2 | # written by Stefano Sanfilippo 3 | # 4 | 5 | """Tests for urlencoded unpacker.""" 6 | 7 | import unittest 8 | 9 | from jsbeautifier.unpackers.urlencode import detect, unpack 10 | 11 | # pylint: disable=R0904 12 | 13 | 14 | class TestUrlencode(unittest.TestCase): 15 | """urlencode test case.""" 16 | 17 | def test_detect(self): 18 | """Test detect() function.""" 19 | def encoded(source): return self.assertTrue(detect(source)) 20 | 21 | def unencoded(source): return self.assertFalse(detect(source)) 22 | 23 | unencoded('') 24 | unencoded('var a = b') 25 | encoded('var%20a+=+b') 26 | encoded('var%20a=b') 27 | encoded('var%20%21%22') 28 | 29 | def test_unpack(self): 30 | """Test unpack function.""" 31 | def equals( 32 | source, 33 | result): return self.assertEqual( 34 | unpack(source), 35 | result) 36 | 37 | equals('', '') 38 | equals('abcd', 'abcd') 39 | equals('var a = b', 'var a = b') 40 | equals('var%20a=b', 'var a=b') 41 | equals('var%20a+=+b', 'var a = b') 42 | 43 | 44 | if __name__ == '__main__': 45 | unittest.main() 46 | -------------------------------------------------------------------------------- /BappModules/jsbeautifier/unpackers/evalbased.py: -------------------------------------------------------------------------------- 1 | # 2 | # Unpacker for eval() based packers, a part of javascript beautifier 3 | # by Einar Lielmanis 4 | # 5 | # written by Stefano Sanfilippo 6 | # 7 | # usage: 8 | # 9 | # if detect(some_string): 10 | # unpacked = unpack(some_string) 11 | # 12 | 13 | """Unpacker for eval() based packers: runs JS code and returns result. 14 | Works only if a JS interpreter (e.g. Mozilla's Rhino) is installed and 15 | properly set up on host.""" 16 | 17 | from subprocess import PIPE, Popen 18 | 19 | PRIORITY = 3 20 | 21 | 22 | def detect(source): 23 | """Detects if source is likely to be eval() packed.""" 24 | return source.strip().lower().startswith('eval(function(') 25 | 26 | 27 | def unpack(source): 28 | """Runs source and return resulting code.""" 29 | return jseval('print %s;' % source[4:]) if detect(source) else source 30 | 31 | # In case of failure, we'll just return the original, without crashing on user. 32 | 33 | 34 | def jseval(script): 35 | """Run code in the JS interpreter and return output.""" 36 | try: 37 | interpreter = Popen(['js'], stdin=PIPE, stdout=PIPE) 38 | except OSError: 39 | return script 40 | result, errors = interpreter.communicate(script) 41 | if interpreter.poll() or errors: 42 | return script 43 | return result 44 | -------------------------------------------------------------------------------- /BappModules/jsbeautifier/unpackers/tests/testmyobfuscate.py: -------------------------------------------------------------------------------- 1 | # 2 | # written by Stefano Sanfilippo 3 | # 4 | 5 | """Tests for MyObfuscate unpacker.""" 6 | 7 | import unittest 8 | import os 9 | from jsbeautifier.unpackers.myobfuscate import detect, unpack 10 | from jsbeautifier.unpackers.tests import __path__ as path 11 | 12 | INPUT = os.path.join(path[0], 'test-myobfuscate-input.js') 13 | OUTPUT = os.path.join(path[0], 'test-myobfuscate-output.js') 14 | 15 | # pylint: disable=R0904 16 | 17 | 18 | class TestMyObfuscate(unittest.TestCase): 19 | # pylint: disable=C0103 20 | """MyObfuscate obfuscator testcase.""" 21 | @classmethod 22 | def setUpClass(cls): 23 | """Load source files (encoded and decoded version) for tests.""" 24 | with open(INPUT, 'r') as data: 25 | cls.input = data.read() 26 | with open(OUTPUT, 'r') as data: 27 | cls.output = data.read() 28 | 29 | def test_detect(self): 30 | """Test detect() function.""" 31 | def detected(source): return self.assertTrue(detect(source)) 32 | 33 | detected(self.input) 34 | 35 | def test_unpack(self): 36 | """Test unpack() function.""" 37 | def check(inp, out): return self.assertEqual(unpack(inp), out) 38 | 39 | check(self.input, self.output) 40 | 41 | 42 | if __name__ == '__main__': 43 | unittest.main() 44 | -------------------------------------------------------------------------------- /BappModules/jsbeautifier/tests/testindentation.py: -------------------------------------------------------------------------------- 1 | import re 2 | import unittest 3 | import jsbeautifier 4 | 5 | 6 | class TestJSBeautifierIndentation(unittest.TestCase): 7 | def test_tabs(self): 8 | test_fragment = self.decodesto 9 | 10 | self.options.indent_with_tabs = 1 11 | test_fragment('{tabs()}', "{\n\ttabs()\n}") 12 | 13 | def test_function_indent(self): 14 | test_fragment = self.decodesto 15 | 16 | self.options.indent_with_tabs = 1 17 | self.options.keep_function_indentation = 1 18 | test_fragment( 19 | 'var foo = function(){ bar() }();', 20 | "var foo = function() {\n\tbar()\n}();") 21 | 22 | self.options.tabs = 1 23 | self.options.keep_function_indentation = 0 24 | test_fragment( 25 | 'var foo = function(){ baz() }();', 26 | "var foo = function() {\n\tbaz()\n}();") 27 | 28 | def decodesto(self, input, expectation=None): 29 | self.assertEqual( 30 | jsbeautifier.beautify(input, self.options), expectation or input) 31 | 32 | @classmethod 33 | def setUpClass(cls): 34 | options = jsbeautifier.default_options() 35 | options.indent_size = 4 36 | options.indent_char = ' ' 37 | options.preserve_newlines = True 38 | options.jslint_happy = False 39 | options.keep_array_indentation = False 40 | options.brace_style = 'collapse' 41 | options.indent_level = 0 42 | 43 | cls.options = options 44 | cls.wrapregex = re.compile('^(.+)$', re.MULTILINE) 45 | 46 | 47 | if __name__ == '__main__': 48 | unittest.main() 49 | -------------------------------------------------------------------------------- /BappDescription.html: -------------------------------------------------------------------------------- 1 |

BurpBeautifier is a Burpsuite extension for beautifying request/response body, supporting JS, JSON, HTML, XML format, writing in Jython 2.7.

2 |

Options

3 |
    4 |
  • Enable in MessageEditorTab: By default, JavaScript and JSON is enable, and HTML and XML is disable cause Burpsuite already beautify these.
  • 5 |
  • Replace PROXY Response: Enable it if you want to beautify response and replace it.
  • 6 |
7 |

Features

8 |
    9 |
  • 10 |

    Supporting Format

    11 |
      12 |
    • JS: use jsbeautify module
    • 13 |
    • JSON: use jsbeautify module
    • 14 |
    • HTML: use beautifulsoup4 module, and jsbeautify module for js code in HTML
    • 15 |
    • XML: use xml module
    • 16 |
    17 |
  • 18 |
  • Writing in Jython, modify source code conveniently. eg: Modify the beautify function to change how to beautify.
  • 19 |
  • Beautify and replace response body.
  • 20 |
  • Option setting persistence.
  • 21 |
  • Send Request/Response body to Beautifier Panel
  • 22 |
23 |

Notice

24 |
    25 |
  • It may modify data, such as: beautifying xml will add the <?xml version="1.0" encoding="UTF-8"?> header, beautifying html will repair html tag.
  • 26 |
  • Assuming the request/response body is utf-8, so if the body is not utf8 encoding or compatible with utf8, it may not work well.
  • 27 |
  • Not considering gzip/deflate compress/decompress, so need the Proxy->Options->Miscellaneous->Unpack gzip/deflate in response is enabled (ON by default)
  • 28 |
29 |

Limitations

30 |
    31 |
  • Installation is not convenient. Not like extension writing in Java, just add a jar file.
  • 32 |
  • Jython is slow. So It will not beautify if size excess dataMaxSize(default is 680KB)
  • 33 |
34 | -------------------------------------------------------------------------------- /BappModules/jsbeautifier/core/token.py: -------------------------------------------------------------------------------- 1 | # The MIT License (MIT) 2 | # 3 | # Copyright (c) 2007-2018 Einar Lielmanis, Liam Newman, and contributors. 4 | # 5 | # Permission is hereby granted, free of charge, to any person 6 | # obtaining a copy of this software and associated documentation files 7 | # (the "Software"), to deal in the Software without restriction, 8 | # including without limitation the rights to use, copy, modify, merge, 9 | # publish, distribute, sublicense, and/or sell copies of the Software, 10 | # and to permit persons to whom the Software is furnished to do so, 11 | # subject to the following conditions: 12 | # 13 | # The above copyright notice and this permission notice shall be 14 | # included in all copies or substantial portions of the Software. 15 | # 16 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 17 | # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 18 | # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 19 | # NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 20 | # BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 21 | # ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 22 | # CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 23 | # SOFTWARE. 24 | 25 | 26 | class Token: 27 | def __init__( 28 | self, 29 | type, 30 | text, 31 | newlines=0, 32 | whitespace_before=''): 33 | self.type = type 34 | self.text = text 35 | self.comments_before = None 36 | self.newlines = newlines 37 | self.whitespace_before = whitespace_before 38 | self.parent = None 39 | self.next = None 40 | self.previous = None 41 | self.opened = None 42 | self.closed = None 43 | self.directives = None 44 | -------------------------------------------------------------------------------- /BappModules/jsbeautifier/unpackers/tests/testjavascriptobfuscator.py: -------------------------------------------------------------------------------- 1 | # 2 | # written by Stefano Sanfilippo 3 | # 4 | 5 | """Tests for JavaScriptObfuscator unpacker.""" 6 | 7 | import unittest 8 | from jsbeautifier.unpackers.javascriptobfuscator import ( 9 | unpack, detect, smartsplit) 10 | 11 | # pylint: disable=R0904 12 | 13 | 14 | class TestJavascriptObfuscator(unittest.TestCase): 15 | """JavascriptObfuscator.com test case.""" 16 | 17 | def test_smartsplit(self): 18 | """Test smartsplit() function.""" 19 | split = smartsplit 20 | 21 | def equals(data, result): return self.assertEqual(split(data), result) 22 | 23 | equals('', []) 24 | equals('"a", "b"', ['"a"', '"b"']) 25 | equals('"aaa","bbbb"', ['"aaa"', '"bbbb"']) 26 | equals('"a", "b\\\""', ['"a"', '"b\\\""']) 27 | 28 | def test_detect(self): 29 | """Test detect() function.""" 30 | def positive(source): return self.assertTrue(detect(source)) 31 | 32 | def negative(source): return self.assertFalse(detect(source)) 33 | 34 | negative('') 35 | negative('abcd') 36 | negative('var _0xaaaa') 37 | positive('var _0xaaaa = ["a", "b"]') 38 | positive('var _0xaaaa=["a", "b"]') 39 | positive('var _0x1234=["a","b"]') 40 | 41 | def test_unpack(self): 42 | """Test unpack() function.""" 43 | def decodeto( 44 | ob, original): return self.assertEqual( 45 | unpack(ob), original) 46 | 47 | decodeto('var _0x8df3=[];var a=10;', 'var a=10;') 48 | decodeto('var _0xb2a7=["\x74\x27\x65\x73\x74"];var i;for(i=0;i<10;++i)' 49 | '{alert(_0xb2a7[0]);} ;', 'var i;for(i=0;i<10;++i){alert' 50 | '("t\'est");} ;') 51 | 52 | 53 | if __name__ == '__main__': 54 | unittest.main() 55 | -------------------------------------------------------------------------------- /BappModules/jsbeautifier/unpackers/javascriptobfuscator.py: -------------------------------------------------------------------------------- 1 | # 2 | # simple unpacker/deobfuscator for scripts messed up with 3 | # javascriptobfuscator.com 4 | # 5 | # written by Einar Lielmanis 6 | # rewritten in Python by Stefano Sanfilippo 7 | # 8 | # Will always return valid javascript: if `detect()` is false, `code` is 9 | # returned, unmodified. 10 | # 11 | # usage: 12 | # 13 | # if javascriptobfuscator.detect(some_string): 14 | # some_string = javascriptobfuscator.unpack(some_string) 15 | # 16 | 17 | """deobfuscator for scripts messed up with JavascriptObfuscator.com""" 18 | 19 | import re 20 | 21 | PRIORITY = 1 22 | 23 | 24 | def smartsplit(code): 25 | """Split `code` at " symbol, only if it is not escaped.""" 26 | strings = [] 27 | pos = 0 28 | while pos < len(code): 29 | if code[pos] == '"': 30 | word = '' # new word 31 | pos += 1 32 | while pos < len(code): 33 | if code[pos] == '"': 34 | break 35 | if code[pos] == '\\': 36 | word += '\\' 37 | pos += 1 38 | word += code[pos] 39 | pos += 1 40 | strings.append('"%s"' % word) 41 | pos += 1 42 | return strings 43 | 44 | 45 | def detect(code): 46 | """Detects if `code` is JavascriptObfuscator.com packed.""" 47 | # prefer `is not` idiom, so that a true boolean is returned 48 | return (re.search(r'^var _0x[a-f0-9]+ ?\= ?\[', code) is not None) 49 | 50 | 51 | def unpack(code): 52 | """Unpacks JavascriptObfuscator.com packed code.""" 53 | if detect(code): 54 | matches = re.search(r'var (_0x[a-f\d]+) ?\= ?\[(.*?)\];', code) 55 | if matches: 56 | variable = matches.group(1) 57 | dictionary = smartsplit(matches.group(2)) 58 | code = code[len(matches.group(0)):] 59 | for key, value in enumerate(dictionary): 60 | code = code.replace(r'%s[%s]' % (variable, key), value) 61 | return code 62 | -------------------------------------------------------------------------------- /BappModules/jsbeautifier/core/directives.py: -------------------------------------------------------------------------------- 1 | # The MIT License (MIT) 2 | # 3 | # Copyright (c) 2007-2018 Einar Lielmanis, Liam Newman, and contributors. 4 | # 5 | # Permission is hereby granted, free of charge, to any person 6 | # obtaining a copy of this software and associated documentation files 7 | # (the "Software"), to deal in the Software without restriction, 8 | # including without limitation the rights to use, copy, modify, merge, 9 | # publish, distribute, sublicense, and/or sell copies of the Software, 10 | # and to permit persons to whom the Software is furnished to do so, 11 | # subject to the following conditions: 12 | # 13 | # The above copyright notice and this permission notice shall be 14 | # included in all copies or substantial portions of the Software. 15 | # 16 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 17 | # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 18 | # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 19 | # NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 20 | # BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 21 | # ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 22 | # CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 23 | # SOFTWARE. 24 | 25 | import re 26 | 27 | 28 | class Directives: 29 | 30 | def __init__(self, start_block_pattern, end_block_pattern): 31 | 32 | self.__directives_block_pattern = re.compile(start_block_pattern + r' beautify( \w+[:]\w+)+ ' + end_block_pattern) 33 | self.__directive_pattern = re.compile(r' (\w+)[:](\w+)') 34 | 35 | self.__directives_end_ignore_pattern = re.compile(start_block_pattern + r'\sbeautify\signore:end\s' + end_block_pattern) 36 | 37 | def get_directives(self, text): 38 | if not self.__directives_block_pattern.match(text): 39 | return None 40 | 41 | directives = {} 42 | directive_match = self.__directive_pattern.search(text) 43 | 44 | while directive_match: 45 | directives[directive_match.group(1)] = directive_match.group(2) 46 | directive_match = self.__directive_pattern.search( 47 | text, directive_match.end()) 48 | 49 | 50 | return directives 51 | 52 | def readIgnored(self, input): 53 | return input.readUntilAfter(self.__directives_end_ignore_pattern) 54 | -------------------------------------------------------------------------------- /BappModules/bs4/tests/test_htmlparser.py: -------------------------------------------------------------------------------- 1 | """Tests to ensure that the html.parser tree builder generates good 2 | trees.""" 3 | 4 | from pdb import set_trace 5 | import pickle 6 | from bs4.testing import SoupTest, HTMLTreeBuilderSmokeTest 7 | from bs4.builder import HTMLParserTreeBuilder 8 | from bs4.builder._htmlparser import BeautifulSoupHTMLParser 9 | 10 | class HTMLParserTreeBuilderSmokeTest(SoupTest, HTMLTreeBuilderSmokeTest): 11 | 12 | default_builder = HTMLParserTreeBuilder 13 | 14 | def test_namespaced_system_doctype(self): 15 | # html.parser can't handle namespaced doctypes, so skip this one. 16 | pass 17 | 18 | def test_namespaced_public_doctype(self): 19 | # html.parser can't handle namespaced doctypes, so skip this one. 20 | pass 21 | 22 | def test_builder_is_pickled(self): 23 | """Unlike most tree builders, HTMLParserTreeBuilder and will 24 | be restored after pickling. 25 | """ 26 | tree = self.soup("foo") 27 | dumped = pickle.dumps(tree, 2) 28 | loaded = pickle.loads(dumped) 29 | self.assertTrue(isinstance(loaded.builder, type(tree.builder))) 30 | 31 | def test_redundant_empty_element_closing_tags(self): 32 | self.assertSoupEquals('





', "


") 33 | self.assertSoupEquals('


', "") 34 | 35 | def test_empty_element(self): 36 | # This verifies that any buffered data present when the parser 37 | # finishes working is handled. 38 | self.assertSoupEquals("foo &# bar", "foo &# bar") 39 | 40 | def test_tracking_line_numbers(self): 41 | # The html.parser TreeBuilder keeps track of line number and 42 | # position of each element. 43 | markup = "\n

\n\n\ntext

" 44 | soup = self.soup(markup) 45 | self.assertEqual(2, soup.p.sourceline) 46 | self.assertEqual(3, soup.p.sourcepos) 47 | self.assertEqual("sourceline", soup.p.find('sourceline').name) 48 | 49 | # You can deactivate this behavior. 50 | soup = self.soup(markup, store_line_numbers=False) 51 | self.assertEqual("sourceline", soup.p.sourceline.name) 52 | self.assertEqual("sourcepos", soup.p.sourcepos.name) 53 | 54 | 55 | class TestHTMLParserSubclass(SoupTest): 56 | def test_error(self): 57 | """Verify that our HTMLParser subclass implements error() in a way 58 | that doesn't cause a crash. 59 | """ 60 | parser = BeautifulSoupHTMLParser() 61 | parser.error("don't crash") 62 | -------------------------------------------------------------------------------- /BappModules/jsbeautifier/unpackers/__init__.py: -------------------------------------------------------------------------------- 1 | # 2 | # General code for JSBeautifier unpackers infrastructure. See README.specs 3 | # written by Stefano Sanfilippo 4 | # 5 | 6 | """General code for JSBeautifier unpackers infrastructure.""" 7 | 8 | import pkgutil 9 | import re 10 | from jsbeautifier.unpackers import evalbased 11 | 12 | # NOTE: AT THE MOMENT, IT IS DEACTIVATED FOR YOUR SECURITY: it runs js! 13 | BLACKLIST = ['jsbeautifier.unpackers.evalbased'] 14 | 15 | 16 | class UnpackingError(Exception): 17 | """Badly packed source or general error. Argument is a 18 | meaningful description.""" 19 | pass 20 | 21 | 22 | def getunpackers(): 23 | """Scans the unpackers dir, finds unpackers and add them to UNPACKERS list. 24 | An unpacker will be loaded only if it is a valid python module (name must 25 | adhere to naming conventions) and it is not blacklisted (i.e. inserted 26 | into BLACKLIST.""" 27 | path = __path__ 28 | prefix = __name__ + '.' 29 | unpackers = [] 30 | interface = ['unpack', 'detect', 'PRIORITY'] 31 | for _importer, modname, _ispkg in pkgutil.iter_modules(path, prefix): 32 | if 'tests' not in modname and modname not in BLACKLIST: 33 | try: 34 | module = __import__(modname, fromlist=interface) 35 | except ImportError: 36 | raise UnpackingError('Bad unpacker: %s' % modname) 37 | else: 38 | unpackers.append(module) 39 | 40 | return sorted(unpackers, key=lambda mod: mod.PRIORITY) 41 | 42 | 43 | UNPACKERS = getunpackers() 44 | 45 | 46 | def run(source, evalcode=False): 47 | """Runs the applicable unpackers and return unpacked source as a string.""" 48 | for unpacker in [mod for mod in UNPACKERS if mod.detect(source)]: 49 | source = unpacker.unpack(source) 50 | if evalcode and evalbased.detect(source): 51 | source = evalbased.unpack(source) 52 | return source 53 | 54 | 55 | def filtercomments(source): 56 | """NOT USED: strips trailing comments and put them at the top.""" 57 | trailing_comments = [] 58 | comment = True 59 | 60 | while comment: 61 | if re.search(r'^\s*\/\*', source): 62 | comment = source[0, source.index('*/') + 2] 63 | elif re.search(r'^\s*\/\/', source): 64 | comment = re.search(r'^\s*\/\/', source).group(0) 65 | else: 66 | comment = None 67 | 68 | if comment: 69 | source = re.sub(r'^\s+', '', source[len(comment):]) 70 | trailing_comments.append(comment) 71 | 72 | return '\n'.join(trailing_comments) + source 73 | -------------------------------------------------------------------------------- /BappModules/jsbeautifier/core/tokenstream.py: -------------------------------------------------------------------------------- 1 | # The MIT License (MIT) 2 | # 3 | # Copyright (c) 2007-2018 Einar Lielmanis, Liam Newman, and contributors. 4 | # 5 | # Permission is hereby granted, free of charge, to any person 6 | # obtaining a copy of this software and associated documentation files 7 | # (the "Software"), to deal in the Software without restriction, 8 | # including without limitation the rights to use, copy, modify, merge, 9 | # publish, distribute, sublicense, and/or sell copies of the Software, 10 | # and to permit persons to whom the Software is furnished to do so, 11 | # subject to the following conditions: 12 | # 13 | # The above copyright notice and this permission notice shall be 14 | # included in all copies or substantial portions of the Software. 15 | # 16 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 17 | # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 18 | # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 19 | # NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 20 | # BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 21 | # ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 22 | # CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 23 | # SOFTWARE. 24 | 25 | import re 26 | from ..core.inputscanner import InputScanner 27 | from ..core.token import Token 28 | 29 | class TokenStream: 30 | 31 | def __init__(self, parent_token=None): 32 | self.__tokens = [] 33 | self.__tokens_length = len(self.__tokens) 34 | self.__position = 0 35 | self.__parent_token = parent_token 36 | 37 | def restart(self): 38 | self.__position = 0 39 | 40 | def isEmpty(self): 41 | return self.__tokens_length == 0 42 | 43 | def hasNext(self): 44 | return self.__position < self.__tokens_length 45 | 46 | def next(self): 47 | if self.hasNext(): 48 | val = self.__tokens[self.__position] 49 | self.__position += 1 50 | return val 51 | else: 52 | raise StopIteration 53 | 54 | def peek(self, index=0): 55 | val = None 56 | index += self.__position 57 | if index >= 0 and index < self.__tokens_length: 58 | val = self.__tokens[index] 59 | 60 | return val 61 | 62 | def add(self, token): 63 | if self.__parent_token: 64 | token.parent = self.__parent_token 65 | 66 | self.__tokens.append(token) 67 | self.__tokens_length += 1 68 | 69 | def __iter__(self): 70 | self.restart() 71 | return self 72 | 73 | def __next__(self): 74 | return self.next() 75 | -------------------------------------------------------------------------------- /BappModules/editorconfig/main.py: -------------------------------------------------------------------------------- 1 | """EditorConfig command line interface 2 | 3 | Licensed under Simplified BSD License (see LICENSE.BSD file). 4 | 5 | """ 6 | 7 | import getopt 8 | import sys 9 | 10 | from editorconfig import VERSION, __version__ 11 | from editorconfig.compat import force_unicode 12 | from editorconfig.exceptions import ParsingError, PathError, VersionError 13 | from editorconfig.handler import EditorConfigHandler 14 | from editorconfig.versiontools import split_version 15 | 16 | 17 | def version(): 18 | print("EditorConfig Python Core Version %s" % __version__) 19 | 20 | 21 | def usage(command, error=False): 22 | if error: 23 | out = sys.stderr 24 | else: 25 | out = sys.stdout 26 | out.write("%s [OPTIONS] FILENAME\n" % command) 27 | out.write('-f ' 28 | 'Specify conf filename other than ".editorconfig".\n') 29 | out.write("-b " 30 | "Specify version (used by devs to test compatibility).\n") 31 | out.write("-h OR --help Print this help message.\n") 32 | out.write("-v OR --version Display version information.\n") 33 | 34 | 35 | def main(): 36 | command_name = sys.argv[0] 37 | try: 38 | opts, args = getopt.getopt(list(map(force_unicode, sys.argv[1:])), 39 | "vhb:f:", ["version", "help"]) 40 | except getopt.GetoptError as e: 41 | print(str(e)) 42 | usage(command_name, error=True) 43 | sys.exit(2) 44 | 45 | version_tuple = VERSION 46 | conf_filename = '.editorconfig' 47 | 48 | for option, arg in opts: 49 | if option in ('-h', '--help'): 50 | usage(command_name) 51 | sys.exit() 52 | if option in ('-v', '--version'): 53 | version() 54 | sys.exit() 55 | if option == '-f': 56 | conf_filename = arg 57 | if option == '-b': 58 | version_tuple = split_version(arg) 59 | if version_tuple is None: 60 | sys.exit("Invalid version number: %s" % arg) 61 | 62 | if len(args) < 1: 63 | usage(command_name, error=True) 64 | sys.exit(2) 65 | filenames = args 66 | multiple_files = len(args) > 1 67 | 68 | for filename in filenames: 69 | handler = EditorConfigHandler(filename, conf_filename, version_tuple) 70 | try: 71 | options = handler.get_configurations() 72 | except (ParsingError, PathError, VersionError) as e: 73 | print(str(e)) 74 | sys.exit(2) 75 | if multiple_files: 76 | print("[%s]" % filename) 77 | for key, value in options.items(): 78 | print("%s=%s" % (key, value)) 79 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | BurpBeautifier is a Burpsuite extension for beautifying request/response body, supporting JS, JSON, HTML, XML format, writing in Jython 2.7. 2 | 3 | 4 | ### Installation 5 | Install from BApp: 6 | 1. Visit [Jython Offical Site](https://www.jython.org/download), and download Jython Standalone jar file. 7 | 2. Open Burpsuite, goto Extender -> Options -> Python Environment, set the Location of Jython Jar. And goto Extender -> BApp Store, find Burp Beautifier and install it. 8 | 9 | 10 | Manual Install: 11 | 1. Visit [Jython Offical Site](https://www.jython.org/download), and download Jython Installer. 12 | 2. Open Jython Installer to install Jython(In standard mode, make sure it will install pip). Assume it has be installed in `D:\jython2.7.1` directory. 13 | 3. Install Jython modules: `D:\jython2.7.1\bin\jython.exe -m pip install -r requirements.txt`. 14 | 4. Open Burpsuite, goto Extender -> Options -> Python Environment, set the Location of Jython Jar is `D:\jython2.7.1\jython.jar`, and the Folder for loading modules is `D:\jython2.7.1`. 15 | 5. Download this tool, Open Burpsuite, goto Extender -> Extensions -> Add -> Choose the beautifier.py. 16 | 17 | ### Screenshot 18 | MessageEditorTab: 19 | ![messageEditorTab](imgs/messageEditorTab.png) 20 | 21 | Standalone beautifier panel: 22 | ![beautifierTab](imgs/beautifierTab.png) 23 | 24 | Options panel: 25 | ![options](imgs/options.png) 26 | 27 | Options setting: 28 | 1. `General Options`: Set Max Size of data that should be beautified. 29 | 2. `Enable in MessageEditorTab`: By default, JavaScript and JSON is enable, and HTML and XML is disable cause Burpsuite already beautify these. 30 | 3. `Replace PROXY Response`: Enable it if you want to beautify response and replace it. 31 | 32 | 33 | 34 | ### Features 35 | 1. Supportting Format 36 | - JS: use jsbeautify module 37 | - JSON: use jsbeautify module 38 | - HTML: use beautifulsoup4 module, and jsbeautify module for js code in HTML 39 | - XML: use xml module 40 | 2. Writing in Jython, modify source code conveniently. eg: Modify the beautify function to change how to beautify. 41 | 3. Beautify and replace response body. 42 | 4. Option setting persistence. 43 | 5. Send Resquest/Response body to Beautifier Panel. 44 | 45 | ### Notice 46 | 1. It may modify data, such as: beautifying xml will add the `` header, beautifying html will repair html tag. 47 | 2. It assume the request/response body is utf-8, so if the body is not utf8 encoding or compatible with utf8, it may not work well. 48 | 3. Not considering gzip/deflate compress/decompress, so need the Proxy->Options->Miscellaneous->Unpack gzip/deflate in response is enabled(Enabled by default). 49 | 50 | ### Limitations 51 | 1. Jython is slow. So It will not beautify if size excess dataMaxSize(default is 680KB). -------------------------------------------------------------------------------- /BappModules/jsbeautifier/core/whitespacepattern.py: -------------------------------------------------------------------------------- 1 | # The MIT License (MIT) 2 | # 3 | # Copyright (c) 2007-2018 Einar Lielmanis, Liam Newman, and contributors. 4 | # 5 | # Permission is hereby granted, free of charge, to any person 6 | # obtaining a copy of this software and associated documentation files 7 | # (the "Software"), to deal in the Software without restriction, 8 | # including without limitation the rights to use, copy, modify, merge, 9 | # publish, distribute, sublicense, and/or sell copies of the Software, 10 | # and to permit persons to whom the Software is furnished to do so, 11 | # subject to the following conditions: 12 | # 13 | # The above copyright notice and this permission notice shall be 14 | # included in all copies or substantial portions of the Software. 15 | # 16 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 17 | # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 18 | # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 19 | # NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 20 | # BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 21 | # ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 22 | # CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 23 | # SOFTWARE. 24 | 25 | import re 26 | from ..core.pattern import Pattern 27 | 28 | __all__ = ["WhitespacePattern"] 29 | 30 | 31 | class WhitespacePattern(Pattern): 32 | def __init__(self, input_scanner, parent=None): 33 | Pattern.__init__(self, input_scanner, parent) 34 | 35 | if parent is not None: 36 | self._newline_regexp = \ 37 | self._input.get_regexp(parent._newline_regexp) 38 | else: 39 | self.__set_whitespace_patterns('', '') 40 | 41 | self.newline_count = 0 42 | self.whitespace_before_token = '' 43 | 44 | def __set_whitespace_patterns(self, whitespace_chars, newline_chars): 45 | whitespace_chars += '\\t ' 46 | newline_chars += '\\n\\r' 47 | 48 | self._match_pattern = self._input.get_regexp( 49 | '[' + whitespace_chars + newline_chars + ']+') 50 | self._newline_regexp = self._input.get_regexp( 51 | '\\r\\n|[' + newline_chars + ']') 52 | 53 | 54 | def read(self): 55 | self.newline_count = 0 56 | self.whitespace_before_token = '' 57 | 58 | resulting_string = self._input.read(self._match_pattern) 59 | if resulting_string == ' ': 60 | self.whitespace_before_token = ' ' 61 | elif bool(resulting_string): 62 | lines = self._newline_regexp.split(resulting_string) 63 | self.newline_count = len(lines) - 1 64 | self.whitespace_before_token = lines[-1] 65 | 66 | return resulting_string 67 | 68 | 69 | def matching(self, whitespace_chars, newline_chars): 70 | result = self._create() 71 | result.__set_whitespace_patterns(whitespace_chars, newline_chars) 72 | result._update() 73 | return result 74 | 75 | def _create(self): 76 | return WhitespacePattern(self._input, self) 77 | 78 | 79 | -------------------------------------------------------------------------------- /BappModules/jsbeautifier/unpackers/myobfuscate.py: -------------------------------------------------------------------------------- 1 | # 2 | # deobfuscator for scripts messed up with myobfuscate.com 3 | # by Einar Lielmanis 4 | # 5 | # written by Stefano Sanfilippo 6 | # 7 | # usage: 8 | # 9 | # if detect(some_string): 10 | # unpacked = unpack(some_string) 11 | # 12 | 13 | # CAVEAT by Einar Lielmanis 14 | 15 | # 16 | # You really don't want to obfuscate your scripts there: they're tracking 17 | # your unpackings, your script gets turned into something like this, 18 | # as of 2011-08-26: 19 | # 20 | # var _escape = 'your_script_escaped'; 21 | # var _111 = document.createElement('script'); 22 | # _111.src = 'http://api.www.myobfuscate.com/?getsrc=ok' + 23 | # '&ref=' + encodeURIComponent(document.referrer) + 24 | # '&url=' + encodeURIComponent(document.URL); 25 | # var 000 = document.getElementsByTagName('head')[0]; 26 | # 000.appendChild(_111); 27 | # document.write(unescape(_escape)); 28 | # 29 | 30 | """Deobfuscator for scripts messed up with MyObfuscate.com""" 31 | 32 | import re 33 | import base64 34 | 35 | # Python 2 retrocompatibility 36 | # pylint: disable=F0401 37 | # pylint: disable=E0611 38 | try: 39 | from urllib import unquote 40 | except ImportError: 41 | from urllib.parse import unquote 42 | 43 | from jsbeautifier.unpackers import UnpackingError 44 | 45 | PRIORITY = 1 46 | 47 | CAVEAT = """// 48 | // Unpacker warning: be careful when using myobfuscate.com for your projects: 49 | // scripts obfuscated by the free online version call back home. 50 | // 51 | 52 | """ 53 | 54 | SIGNATURE = ( 55 | r'["\x41\x42\x43\x44\x45\x46\x47\x48\x49\x4A\x4B\x4C\x4D\x4E\x4F' 56 | r'\x50\x51\x52\x53\x54\x55\x56\x57\x58\x59\x5A\x61\x62\x63\x64\x65' 57 | r'\x66\x67\x68\x69\x6A\x6B\x6C\x6D\x6E\x6F\x70\x71\x72\x73\x74\x75' 58 | r'\x76\x77\x78\x79\x7A\x30\x31\x32\x33\x34\x35\x36\x37\x38\x39\x2B' 59 | r'\x2F\x3D","","\x63\x68\x61\x72\x41\x74","\x69\x6E\x64\x65\x78' 60 | r'\x4F\x66","\x66\x72\x6F\x6D\x43\x68\x61\x72\x43\x6F\x64\x65","' 61 | r'\x6C\x65\x6E\x67\x74\x68"]') 62 | 63 | 64 | def detect(source): 65 | """Detects MyObfuscate.com packer.""" 66 | return SIGNATURE in source 67 | 68 | 69 | def unpack(source): 70 | """Unpacks js code packed with MyObfuscate.com""" 71 | if not detect(source): 72 | return source 73 | payload = unquote(_filter(source)) 74 | match = re.search(r"^var _escape\='

hello

143 | """ 144 | soup = self.soup(markup) 145 | [s.extract() for s in soup('script')] 146 | [s.extract() for s in soup('style')] 147 | 148 | self.assertEqual(len(soup.find_all("p")), 1) 149 | 150 | def test_empty_comment(self): 151 | """ 152 | Test that empty comment does not break structure. 153 | 154 | https://bugs.launchpad.net/beautifulsoup/+bug/1806598 155 | """ 156 | 157 | markup = """ 158 | 159 | 160 |
161 | 162 |
163 | 164 | 165 | """ 166 | soup = self.soup(markup) 167 | inputs = [] 168 | for form in soup.find_all('form'): 169 | inputs.extend(form.find_all('input')) 170 | self.assertEqual(len(inputs), 1) 171 | 172 | def test_tracking_line_numbers(self): 173 | # The html.parser TreeBuilder keeps track of line number and 174 | # position of each element. 175 | markup = "\n

\n\n\ntext

" 176 | soup = self.soup(markup) 177 | self.assertEqual(2, soup.p.sourceline) 178 | self.assertEqual(5, soup.p.sourcepos) 179 | self.assertEqual("sourceline", soup.p.find('sourceline').name) 180 | 181 | # You can deactivate this behavior. 182 | soup = self.soup(markup, store_line_numbers=False) 183 | self.assertEqual("sourceline", soup.p.sourceline.name) 184 | self.assertEqual("sourcepos", soup.p.sourcepos.name) 185 | -------------------------------------------------------------------------------- /BappModules/soupsieve/__meta__.py: -------------------------------------------------------------------------------- 1 | """Meta related things.""" 2 | from __future__ import unicode_literals 3 | from collections import namedtuple 4 | import re 5 | 6 | RE_VER = re.compile( 7 | r'''(?x) 8 | (?P\d+)(?:\.(?P\d+))?(?:\.(?P\d+))? 9 | (?:(?Pa|b|rc)(?P
\d+))?
 10 |     (?:\.post(?P\d+))?
 11 |     (?:\.dev(?P\d+))?
 12 |     '''
 13 | )
 14 | 
 15 | REL_MAP = {
 16 |     ".dev": "",
 17 |     ".dev-alpha": "a",
 18 |     ".dev-beta": "b",
 19 |     ".dev-candidate": "rc",
 20 |     "alpha": "a",
 21 |     "beta": "b",
 22 |     "candidate": "rc",
 23 |     "final": ""
 24 | }
 25 | 
 26 | DEV_STATUS = {
 27 |     ".dev": "2 - Pre-Alpha",
 28 |     ".dev-alpha": "2 - Pre-Alpha",
 29 |     ".dev-beta": "2 - Pre-Alpha",
 30 |     ".dev-candidate": "2 - Pre-Alpha",
 31 |     "alpha": "3 - Alpha",
 32 |     "beta": "4 - Beta",
 33 |     "candidate": "4 - Beta",
 34 |     "final": "5 - Production/Stable"
 35 | }
 36 | 
 37 | PRE_REL_MAP = {"a": 'alpha', "b": 'beta', "rc": 'candidate'}
 38 | 
 39 | 
 40 | class Version(namedtuple("Version", ["major", "minor", "micro", "release", "pre", "post", "dev"])):
 41 |     """
 42 |     Get the version (PEP 440).
 43 | 
 44 |     A biased approach to the PEP 440 semantic version.
 45 | 
 46 |     Provides a tuple structure which is sorted for comparisons `v1 > v2` etc.
 47 |       (major, minor, micro, release type, pre-release build, post-release build, development release build)
 48 |     Release types are named in is such a way they are comparable with ease.
 49 |     Accessors to check if a development, pre-release, or post-release build. Also provides accessor to get
 50 |     development status for setup files.
 51 | 
 52 |     How it works (currently):
 53 | 
 54 |     - You must specify a release type as either `final`, `alpha`, `beta`, or `candidate`.
 55 |     - To define a development release, you can use either `.dev`, `.dev-alpha`, `.dev-beta`, or `.dev-candidate`.
 56 |       The dot is used to ensure all development specifiers are sorted before `alpha`.
 57 |       You can specify a `dev` number for development builds, but do not have to as implicit development releases
 58 |       are allowed.
 59 |     - You must specify a `pre` value greater than zero if using a prerelease as this project (not PEP 440) does not
 60 |       allow implicit prereleases.
 61 |     - You can optionally set `post` to a value greater than zero to make the build a post release. While post releases
 62 |       are technically allowed in prereleases, it is strongly discouraged, so we are rejecting them. It should be
 63 |       noted that we do not allow `post0` even though PEP 440 does not restrict this. This project specifically
 64 |       does not allow implicit post releases.
 65 |     - It should be noted that we do not support epochs `1!` or local versions `+some-custom.version-1`.
 66 | 
 67 |     Acceptable version releases:
 68 | 
 69 |     ```
 70 |     Version(1, 0, 0, "final")                    1.0
 71 |     Version(1, 2, 0, "final")                    1.2
 72 |     Version(1, 2, 3, "final")                    1.2.3
 73 |     Version(1, 2, 0, ".dev-alpha", pre=4)        1.2a4
 74 |     Version(1, 2, 0, ".dev-beta", pre=4)         1.2b4
 75 |     Version(1, 2, 0, ".dev-candidate", pre=4)    1.2rc4
 76 |     Version(1, 2, 0, "final", post=1)            1.2.post1
 77 |     Version(1, 2, 3, ".dev")                     1.2.3.dev0
 78 |     Version(1, 2, 3, ".dev", dev=1)              1.2.3.dev1
 79 |     ```
 80 | 
 81 |     """
 82 | 
 83 |     def __new__(cls, major, minor, micro, release="final", pre=0, post=0, dev=0):
 84 |         """Validate version info."""
 85 | 
 86 |         # Ensure all parts are positive integers.
 87 |         for value in (major, minor, micro, pre, post):
 88 |             if not (isinstance(value, int) and value >= 0):
 89 |                 raise ValueError("All version parts except 'release' should be integers.")
 90 | 
 91 |         if release not in REL_MAP:
 92 |             raise ValueError("'{}' is not a valid release type.".format(release))
 93 | 
 94 |         # Ensure valid pre-release (we do not allow implicit pre-releases).
 95 |         if ".dev-candidate" < release < "final":
 96 |             if pre == 0:
 97 |                 raise ValueError("Implicit pre-releases not allowed.")
 98 |             elif dev:
 99 |                 raise ValueError("Version is not a development release.")
100 |             elif post:
101 |                 raise ValueError("Post-releases are not allowed with pre-releases.")
102 | 
103 |         # Ensure valid development or development/pre release
104 |         elif release < "alpha":
105 |             if release > ".dev" and pre == 0:
106 |                 raise ValueError("Implicit pre-release not allowed.")
107 |             elif post:
108 |                 raise ValueError("Post-releases are not allowed with pre-releases.")
109 | 
110 |         # Ensure a valid normal release
111 |         else:
112 |             if pre:
113 |                 raise ValueError("Version is not a pre-release.")
114 |             elif dev:
115 |                 raise ValueError("Version is not a development release.")
116 | 
117 |         return super(Version, cls).__new__(cls, major, minor, micro, release, pre, post, dev)
118 | 
119 |     def _is_pre(self):
120 |         """Is prerelease."""
121 | 
122 |         return self.pre > 0
123 | 
124 |     def _is_dev(self):
125 |         """Is development."""
126 | 
127 |         return bool(self.release < "alpha")
128 | 
129 |     def _is_post(self):
130 |         """Is post."""
131 | 
132 |         return self.post > 0
133 | 
134 |     def _get_dev_status(self):  # pragma: no cover
135 |         """Get development status string."""
136 | 
137 |         return DEV_STATUS[self.release]
138 | 
139 |     def _get_canonical(self):
140 |         """Get the canonical output string."""
141 | 
142 |         # Assemble major, minor, micro version and append `pre`, `post`, or `dev` if needed..
143 |         if self.micro == 0:
144 |             ver = "{}.{}".format(self.major, self.minor)
145 |         else:
146 |             ver = "{}.{}.{}".format(self.major, self.minor, self.micro)
147 |         if self._is_pre():
148 |             ver += '{}{}'.format(REL_MAP[self.release], self.pre)
149 |         if self._is_post():
150 |             ver += ".post{}".format(self.post)
151 |         if self._is_dev():
152 |             ver += ".dev{}".format(self.dev)
153 | 
154 |         return ver
155 | 
156 | 
157 | def parse_version(ver, pre=False):
158 |     """Parse version into a comparable Version tuple."""
159 | 
160 |     m = RE_VER.match(ver)
161 | 
162 |     # Handle major, minor, micro
163 |     major = int(m.group('major'))
164 |     minor = int(m.group('minor')) if m.group('minor') else 0
165 |     micro = int(m.group('micro')) if m.group('micro') else 0
166 | 
167 |     # Handle pre releases
168 |     if m.group('type'):
169 |         release = PRE_REL_MAP[m.group('type')]
170 |         pre = int(m.group('pre'))
171 |     else:
172 |         release = "final"
173 |         pre = 0
174 | 
175 |     # Handle development releases
176 |     dev = m.group('dev') if m.group('dev') else 0
177 |     if m.group('dev'):
178 |         dev = int(m.group('dev'))
179 |         release = '.dev-' + release if pre else '.dev'
180 |     else:
181 |         dev = 0
182 | 
183 |     # Handle post
184 |     post = int(m.group('post')) if m.group('post') else 0
185 | 
186 |     return Version(major, minor, micro, release, pre, post, dev)
187 | 
188 | 
189 | __version_info__ = Version(1, 9, 5, "final")
190 | __version__ = __version_info__._get_canonical()
191 | 


--------------------------------------------------------------------------------
/BappModules/jsbeautifier/core/templatablepattern.py:
--------------------------------------------------------------------------------
  1 | # The MIT License (MIT)
  2 | #
  3 | # Copyright (c) 2007-2018 Einar Lielmanis, Liam Newman, and contributors.
  4 | #
  5 | # Permission is hereby granted, free of charge, to any person
  6 | # obtaining a copy of this software and associated documentation files
  7 | # (the "Software"), to deal in the Software without restriction,
  8 | # including without limitation the rights to use, copy, modify, merge,
  9 | # publish, distribute, sublicense, and/or sell copies of the Software,
 10 | # and to permit persons to whom the Software is furnished to do so,
 11 | # subject to the following conditions:
 12 | #
 13 | # The above copyright notice and this permission notice shall be
 14 | # included in all copies or substantial portions of the Software.
 15 | #
 16 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
 17 | # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
 18 | # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
 19 | # NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
 20 | # BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
 21 | # ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
 22 | # CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 23 | # SOFTWARE.
 24 | 
 25 | import copy
 26 | from ..core.pattern import Pattern
 27 | 
 28 | __all__ = ["TemplatablePattern"]
 29 | 
 30 | class TemplateNames:
 31 |     def __init__(self):
 32 |         self.django = False
 33 |         self.erb = False
 34 |         self.handlebars = False
 35 |         self.php = False
 36 | 
 37 | class TemplatePatterns:
 38 |     def __init__(self, input_scanner):
 39 |         pattern = Pattern(input_scanner)
 40 |         self.handlebars_comment = pattern.starting_with(r'{{!--').until_after(r'--}}')
 41 |         self.handlebars_unescaped = pattern.starting_with(r'{{{').until_after(r'}}}')
 42 |         self.handlebars = pattern.starting_with(r'{{').until_after(r'}}')
 43 |         self.php = pattern.starting_with(r'<\?(?:[=]|php)').until_after(r'\?>')
 44 |         self.erb = pattern.starting_with(r'<%[^%]').until_after(r'[^%]%>')
 45 |         # django coflicts with handlebars a bit.
 46 |         self.django = pattern.starting_with(r'{%').until_after(r'%}')
 47 |         self.django_value = pattern.starting_with(r'{{').until_after(r'}}')
 48 |         self.django_comment = pattern.starting_with(r'{#').until_after(r'#}')
 49 | 
 50 | class TemplatablePattern(Pattern):
 51 | 
 52 |     def __init__(self, input_scanner, parent=None):
 53 |         Pattern.__init__(self, input_scanner, parent)
 54 |         self.__template_pattern = None
 55 |         self._disabled = TemplateNames()
 56 |         self._excluded = TemplateNames()
 57 | 
 58 |         if parent is not None:
 59 |             self.__template_pattern = \
 60 |                 self._input.get_regexp(parent.__template_pattern)
 61 |             self._disabled = copy.copy(parent._disabled)
 62 |             self._excluded = copy.copy(parent._excluded)
 63 | 
 64 |         self.__patterns = TemplatePatterns(input_scanner)
 65 | 
 66 |     def _create(self):
 67 |         return TemplatablePattern(self._input, self)
 68 | 
 69 |     def _update(self):
 70 |         self.__set_templated_pattern()
 71 | 
 72 |     def read_options(self, options):
 73 |         result = self._create()
 74 |         for language in ['django', 'erb', 'handlebars', 'php']:
 75 |             setattr(result._disabled, language,
 76 |                 not (language in options.templating))
 77 |         result._update()
 78 |         return result
 79 | 
 80 |     def disable(self, language):
 81 |         result = self._create()
 82 |         setattr(result._disabled, language, True)
 83 |         result._update()
 84 |         return result
 85 | 
 86 |     def exclude(self, language):
 87 |         result = self._create()
 88 |         setattr(result._excluded, language, True)
 89 |         result._update()
 90 |         return result
 91 | 
 92 |     def read(self):
 93 |         result = ''
 94 |         if bool(self._match_pattern):
 95 |             result = self._input.read(self._starting_pattern)
 96 |         else:
 97 |             result = self._input.read(self._starting_pattern,
 98 |                 self.__template_pattern)
 99 | 
100 |         next = self._read_template()
101 | 
102 |         while (bool(next)):
103 |             if self._match_pattern is not None:
104 |                 next += self._input.read(self._match_pattern)
105 |             else:
106 |                 next += self._input.readUntil(self.__template_pattern)
107 | 
108 |             result += next
109 |             next = self._read_template()
110 | 
111 |         if self._until_after:
112 |             result += self._input.readUntilAfter(self._until_after)
113 | 
114 |         return result
115 | 
116 |     def __set_templated_pattern(self):
117 |         items = list()
118 | 
119 |         if not self._disabled.php:
120 |             items.append(self.__patterns.php._starting_pattern.pattern)
121 | 
122 |         if not self._disabled.handlebars:
123 |             items.append(self.__patterns.handlebars._starting_pattern.pattern)
124 | 
125 |         if not self._disabled.erb:
126 |             items.append(self.__patterns.erb._starting_pattern.pattern)
127 | 
128 |         if not self._disabled.django:
129 |             items.append(self.__patterns.django._starting_pattern.pattern)
130 |             items.append(self.__patterns.django_value._starting_pattern.pattern)
131 |             items.append(self.__patterns.django_comment._starting_pattern.pattern)
132 | 
133 |         if self._until_pattern:
134 |             items.append(self._until_pattern.pattern)
135 | 
136 |         self.__template_pattern = self._input.get_regexp(
137 |             r'(?:' + '|'.join(items) + ')')
138 | 
139 |     def _read_template(self):
140 |         resulting_string = ''
141 |         c = self._input.peek()
142 |         if c == '<':
143 |             peek1 = self._input.peek(1)
144 |             if not self._disabled.php and \
145 |                     not self._excluded.php and \
146 |                     peek1 == '?':
147 |                 resulting_string = resulting_string or \
148 |                     self.__patterns.php.read()
149 | 
150 |             if not self._disabled.erb and \
151 |                     not self._excluded.erb and \
152 |                     peek1 == '%':
153 |                 resulting_string = resulting_string or \
154 |                     self.__patterns.erb.read()
155 |         elif c == '{':
156 |             if not self._disabled.handlebars and \
157 |                     not self._excluded.handlebars:
158 |                 resulting_string = resulting_string or \
159 |                     self.__patterns.handlebars_comment.read()
160 |                 resulting_string = resulting_string or \
161 |                     self.__patterns.handlebars_unescaped.read()
162 |                 resulting_string = resulting_string or \
163 |                     self.__patterns.handlebars.read()
164 |             if not self._disabled.django:
165 |                 # django coflicts with handlebars a bit.
166 |                 if not self._excluded.django and \
167 |                         not self._excluded.handlebars:
168 |                     resulting_string = resulting_string or \
169 |                         self.__patterns.django_value.read()
170 |                 if not self._excluded.django:
171 | 
172 |                     resulting_string = resulting_string or \
173 |                         self.__patterns.django_comment.read()
174 |                     resulting_string = resulting_string or \
175 |                         self.__patterns.django.read()
176 | 
177 |         return resulting_string
178 | 


--------------------------------------------------------------------------------
/BappModules/editorconfig/fnmatch.py:
--------------------------------------------------------------------------------
  1 | """Filename matching with shell patterns.
  2 | 
  3 | fnmatch(FILENAME, PATTERN) matches according to the local convention.
  4 | fnmatchcase(FILENAME, PATTERN) always takes case in account.
  5 | 
  6 | The functions operate by translating the pattern into a regular
  7 | expression.  They cache the compiled regular expressions for speed.
  8 | 
  9 | The function translate(PATTERN) returns a regular expression
 10 | corresponding to PATTERN.  (It does not compile it.)
 11 | 
 12 | Based on code from fnmatch.py file distributed with Python 2.6.
 13 | 
 14 | Licensed under PSF License (see LICENSE.PSF file).
 15 | 
 16 | Changes to original fnmatch module:
 17 | - translate function supports ``*`` and ``**`` similarly to fnmatch C library
 18 | """
 19 | 
 20 | import os
 21 | import re
 22 | 
 23 | 
 24 | __all__ = ["fnmatch", "fnmatchcase", "translate"]
 25 | 
 26 | _cache = {}
 27 | 
 28 | LEFT_BRACE = re.compile(
 29 |     r"""
 30 | 
 31 |     (?: ^ | [^\\] )     # Beginning of string or a character besides "\"
 32 | 
 33 |     \{                  # "{"
 34 | 
 35 |     """, re.VERBOSE
 36 | )
 37 | 
 38 | RIGHT_BRACE = re.compile(
 39 |     r"""
 40 | 
 41 |     (?: ^ | [^\\] )     # Beginning of string or a character besides "\"
 42 | 
 43 |     \}                  # "}"
 44 | 
 45 |     """, re.VERBOSE
 46 | )
 47 | 
 48 | NUMERIC_RANGE = re.compile(
 49 |     r"""
 50 |     (               # Capture a number
 51 |         [+-] ?      # Zero or one "+" or "-" characters
 52 |         \d +        # One or more digits
 53 |     )
 54 | 
 55 |     \.\.            # ".."
 56 | 
 57 |     (               # Capture a number
 58 |         [+-] ?      # Zero or one "+" or "-" characters
 59 |         \d +        # One or more digits
 60 |     )
 61 |     """, re.VERBOSE
 62 | )
 63 | 
 64 | 
 65 | def fnmatch(name, pat):
 66 |     """Test whether FILENAME matches PATTERN.
 67 | 
 68 |     Patterns are Unix shell style:
 69 | 
 70 |     - ``*``             matches everything except path separator
 71 |     - ``**``            matches everything
 72 |     - ``?``             matches any single character
 73 |     - ``[seq]``         matches any character in seq
 74 |     - ``[!seq]``        matches any char not in seq
 75 |     - ``{s1,s2,s3}``    matches any of the strings given (separated by commas)
 76 | 
 77 |     An initial period in FILENAME is not special.
 78 |     Both FILENAME and PATTERN are first case-normalized
 79 |     if the operating system requires it.
 80 |     If you don't want this, use fnmatchcase(FILENAME, PATTERN).
 81 |     """
 82 | 
 83 |     name = os.path.normpath(name).replace(os.sep, "/")
 84 |     return fnmatchcase(name, pat)
 85 | 
 86 | 
 87 | def cached_translate(pat):
 88 |     if not pat in _cache:
 89 |         res, num_groups = translate(pat)
 90 |         regex = re.compile(res)
 91 |         _cache[pat] = regex, num_groups
 92 |     return _cache[pat]
 93 | 
 94 | 
 95 | def fnmatchcase(name, pat):
 96 |     """Test whether FILENAME matches PATTERN, including case.
 97 | 
 98 |     This is a version of fnmatch() which doesn't case-normalize
 99 |     its arguments.
100 |     """
101 | 
102 |     regex, num_groups = cached_translate(pat)
103 |     match = regex.match(name)
104 |     if not match:
105 |         return False
106 |     pattern_matched = True
107 |     for (num, (min_num, max_num)) in zip(match.groups(), num_groups):
108 |         if num[0] == '0' or not (min_num <= int(num) <= max_num):
109 |             pattern_matched = False
110 |             break
111 |     return pattern_matched
112 | 
113 | 
114 | def translate(pat, nested=False):
115 |     """Translate a shell PATTERN to a regular expression.
116 | 
117 |     There is no way to quote meta-characters.
118 |     """
119 | 
120 |     index, length = 0, len(pat)  # Current index and length of pattern
121 |     brace_level = 0
122 |     in_brackets = False
123 |     result = ''
124 |     is_escaped = False
125 |     matching_braces = (len(LEFT_BRACE.findall(pat)) ==
126 |                        len(RIGHT_BRACE.findall(pat)))
127 |     numeric_groups = []
128 |     while index < length:
129 |         current_char = pat[index]
130 |         index += 1
131 |         if current_char == '*':
132 |             pos = index
133 |             if pos < length and pat[pos] == '*':
134 |                 result += '.*'
135 |             else:
136 |                 result += '[^/]*'
137 |         elif current_char == '?':
138 |             result += '.'
139 |         elif current_char == '[':
140 |             if in_brackets:
141 |                 result += '\\['
142 |             else:
143 |                 pos = index
144 |                 has_slash = False
145 |                 while pos < length and pat[pos] != ']':
146 |                     if pat[pos] == '/' and pat[pos-1] != '\\':
147 |                         has_slash = True
148 |                         break
149 |                     pos += 1
150 |                 if has_slash:
151 |                     result += '\\[' + pat[index:(pos + 1)] + '\\]'
152 |                     index = pos + 2
153 |                 else:
154 |                     if index < length and pat[index] in '!^':
155 |                         index += 1
156 |                         result += '[^'
157 |                     else:
158 |                         result += '['
159 |                     in_brackets = True
160 |         elif current_char == '-':
161 |             if in_brackets:
162 |                 result += current_char
163 |             else:
164 |                 result += '\\' + current_char
165 |         elif current_char == ']':
166 |             result += current_char
167 |             in_brackets = False
168 |         elif current_char == '{':
169 |             pos = index
170 |             has_comma = False
171 |             while pos < length and (pat[pos] != '}' or is_escaped):
172 |                 if pat[pos] == ',' and not is_escaped:
173 |                     has_comma = True
174 |                     break
175 |                 is_escaped = pat[pos] == '\\' and not is_escaped
176 |                 pos += 1
177 |             if not has_comma and pos < length:
178 |                 num_range = NUMERIC_RANGE.match(pat[index:pos])
179 |                 if num_range:
180 |                     numeric_groups.append(map(int, num_range.groups()))
181 |                     result += "([+-]?\d+)"
182 |                 else:
183 |                     inner_result, inner_groups = translate(pat[index:pos],
184 |                                                            nested=True)
185 |                     result += '\\{%s\\}' % (inner_result,)
186 |                     numeric_groups += inner_groups
187 |                 index = pos + 1
188 |             elif matching_braces:
189 |                 result += '(?:'
190 |                 brace_level += 1
191 |             else:
192 |                 result += '\\{'
193 |         elif current_char == ',':
194 |             if brace_level > 0 and not is_escaped:
195 |                 result += '|'
196 |             else:
197 |                 result += '\\,'
198 |         elif current_char == '}':
199 |             if brace_level > 0 and not is_escaped:
200 |                 result += ')'
201 |                 brace_level -= 1
202 |             else:
203 |                 result += '\\}'
204 |         elif current_char == '/':
205 |             if pat[index:(index + 3)] == "**/":
206 |                 result += "(?:/|/.*/)"
207 |                 index += 3
208 |             else:
209 |                 result += '/'
210 |         elif current_char != '\\':
211 |             result += re.escape(current_char)
212 |         if current_char == '\\':
213 |             if is_escaped:
214 |                 result += re.escape(current_char)
215 |             is_escaped = not is_escaped
216 |         else:
217 |             is_escaped = False
218 |     if not nested:
219 |         result += '\Z(?ms)'
220 |     return result, numeric_groups
221 | 


--------------------------------------------------------------------------------
/BappModules/bs4/diagnose.py:
--------------------------------------------------------------------------------
  1 | """Diagnostic functions, mainly for use when doing tech support."""
  2 | 
  3 | # Use of this source code is governed by the MIT license.
  4 | __license__ = "MIT"
  5 | 
  6 | import cProfile
  7 | from StringIO import StringIO
  8 | from HTMLParser import HTMLParser
  9 | import bs4
 10 | from bs4 import BeautifulSoup, __version__
 11 | from bs4.builder import builder_registry
 12 | 
 13 | import os
 14 | import pstats
 15 | import random
 16 | import tempfile
 17 | import time
 18 | import traceback
 19 | import sys
 20 | import cProfile
 21 | 
 22 | def diagnose(data):
 23 |     """Diagnostic suite for isolating common problems."""
 24 |     print "Diagnostic running on Beautiful Soup %s" % __version__
 25 |     print "Python version %s" % sys.version
 26 | 
 27 |     basic_parsers = ["html.parser", "html5lib", "lxml"]
 28 |     for name in basic_parsers:
 29 |         for builder in builder_registry.builders:
 30 |             if name in builder.features:
 31 |                 break
 32 |         else:
 33 |             basic_parsers.remove(name)
 34 |             print (
 35 |                 "I noticed that %s is not installed. Installing it may help." %
 36 |                 name)
 37 | 
 38 |     if 'lxml' in basic_parsers:
 39 |         basic_parsers.append("lxml-xml")
 40 |         try:
 41 |             from lxml import etree
 42 |             print "Found lxml version %s" % ".".join(map(str,etree.LXML_VERSION))
 43 |         except ImportError, e:
 44 |             print (
 45 |                 "lxml is not installed or couldn't be imported.")
 46 | 
 47 | 
 48 |     if 'html5lib' in basic_parsers:
 49 |         try:
 50 |             import html5lib
 51 |             print "Found html5lib version %s" % html5lib.__version__
 52 |         except ImportError, e:
 53 |             print (
 54 |                 "html5lib is not installed or couldn't be imported.")
 55 | 
 56 |     if hasattr(data, 'read'):
 57 |         data = data.read()
 58 |     elif data.startswith("http:") or data.startswith("https:"):
 59 |         print '"%s" looks like a URL. Beautiful Soup is not an HTTP client.' % data
 60 |         print "You need to use some other library to get the document behind the URL, and feed that document to Beautiful Soup."
 61 |         return
 62 |     else:
 63 |         try:
 64 |             if os.path.exists(data):
 65 |                 print '"%s" looks like a filename. Reading data from the file.' % data
 66 |                 with open(data) as fp:
 67 |                     data = fp.read()
 68 |         except ValueError:
 69 |             # This can happen on some platforms when the 'filename' is
 70 |             # too long. Assume it's data and not a filename.
 71 |             pass
 72 |         print
 73 | 
 74 |     for parser in basic_parsers:
 75 |         print "Trying to parse your markup with %s" % parser
 76 |         success = False
 77 |         try:
 78 |             soup = BeautifulSoup(data, features=parser)
 79 |             success = True
 80 |         except Exception, e:
 81 |             print "%s could not parse the markup." % parser
 82 |             traceback.print_exc()
 83 |         if success:
 84 |             print "Here's what %s did with the markup:" % parser
 85 |             print soup.prettify()
 86 | 
 87 |         print "-" * 80
 88 | 
 89 | def lxml_trace(data, html=True, **kwargs):
 90 |     """Print out the lxml events that occur during parsing.
 91 | 
 92 |     This lets you see how lxml parses a document when no Beautiful
 93 |     Soup code is running.
 94 |     """
 95 |     from lxml import etree
 96 |     for event, element in etree.iterparse(StringIO(data), html=html, **kwargs):
 97 |         print("%s, %4s, %s" % (event, element.tag, element.text))
 98 | 
 99 | class AnnouncingParser(HTMLParser):
100 |     """Announces HTMLParser parse events, without doing anything else."""
101 | 
102 |     def _p(self, s):
103 |         print(s)
104 | 
105 |     def handle_starttag(self, name, attrs):
106 |         self._p("%s START" % name)
107 | 
108 |     def handle_endtag(self, name):
109 |         self._p("%s END" % name)
110 | 
111 |     def handle_data(self, data):
112 |         self._p("%s DATA" % data)
113 | 
114 |     def handle_charref(self, name):
115 |         self._p("%s CHARREF" % name)
116 | 
117 |     def handle_entityref(self, name):
118 |         self._p("%s ENTITYREF" % name)
119 | 
120 |     def handle_comment(self, data):
121 |         self._p("%s COMMENT" % data)
122 | 
123 |     def handle_decl(self, data):
124 |         self._p("%s DECL" % data)
125 | 
126 |     def unknown_decl(self, data):
127 |         self._p("%s UNKNOWN-DECL" % data)
128 | 
129 |     def handle_pi(self, data):
130 |         self._p("%s PI" % data)
131 | 
132 | def htmlparser_trace(data):
133 |     """Print out the HTMLParser events that occur during parsing.
134 | 
135 |     This lets you see how HTMLParser parses a document when no
136 |     Beautiful Soup code is running.
137 |     """
138 |     parser = AnnouncingParser()
139 |     parser.feed(data)
140 | 
141 | _vowels = "aeiou"
142 | _consonants = "bcdfghjklmnpqrstvwxyz"
143 | 
144 | def rword(length=5):
145 |     "Generate a random word-like string."
146 |     s = ''
147 |     for i in range(length):
148 |         if i % 2 == 0:
149 |             t = _consonants
150 |         else:
151 |             t = _vowels
152 |         s += random.choice(t)
153 |     return s
154 | 
155 | def rsentence(length=4):
156 |     "Generate a random sentence-like string."
157 |     return " ".join(rword(random.randint(4,9)) for i in range(length))
158 |         
159 | def rdoc(num_elements=1000):
160 |     """Randomly generate an invalid HTML document."""
161 |     tag_names = ['p', 'div', 'span', 'i', 'b', 'script', 'table']
162 |     elements = []
163 |     for i in range(num_elements):
164 |         choice = random.randint(0,3)
165 |         if choice == 0:
166 |             # New tag.
167 |             tag_name = random.choice(tag_names)
168 |             elements.append("<%s>" % tag_name)
169 |         elif choice == 1:
170 |             elements.append(rsentence(random.randint(1,4)))
171 |         elif choice == 2:
172 |             # Close a tag.
173 |             tag_name = random.choice(tag_names)
174 |             elements.append("" % tag_name)
175 |     return "" + "\n".join(elements) + ""
176 | 
177 | def benchmark_parsers(num_elements=100000):
178 |     """Very basic head-to-head performance benchmark."""
179 |     print "Comparative parser benchmark on Beautiful Soup %s" % __version__
180 |     data = rdoc(num_elements)
181 |     print "Generated a large invalid HTML document (%d bytes)." % len(data)
182 |     
183 |     for parser in ["lxml", ["lxml", "html"], "html5lib", "html.parser"]:
184 |         success = False
185 |         try:
186 |             a = time.time()
187 |             soup = BeautifulSoup(data, parser)
188 |             b = time.time()
189 |             success = True
190 |         except Exception, e:
191 |             print "%s could not parse the markup." % parser
192 |             traceback.print_exc()
193 |         if success:
194 |             print "BS4+%s parsed the markup in %.2fs." % (parser, b-a)
195 | 
196 |     from lxml import etree
197 |     a = time.time()
198 |     etree.HTML(data)
199 |     b = time.time()
200 |     print "Raw lxml parsed the markup in %.2fs." % (b-a)
201 | 
202 |     import html5lib
203 |     parser = html5lib.HTMLParser()
204 |     a = time.time()
205 |     parser.parse(data)
206 |     b = time.time()
207 |     print "Raw html5lib parsed the markup in %.2fs." % (b-a)
208 | 
209 | def profile(num_elements=100000, parser="lxml"):
210 | 
211 |     filehandle = tempfile.NamedTemporaryFile()
212 |     filename = filehandle.name
213 | 
214 |     data = rdoc(num_elements)
215 |     vars = dict(bs4=bs4, data=data, parser=parser)
216 |     cProfile.runctx('bs4.BeautifulSoup(data, parser)' , vars, vars, filename)
217 | 
218 |     stats = pstats.Stats(filename)
219 |     # stats.strip_dirs()
220 |     stats.sort_stats("cumulative")
221 |     stats.print_stats('_html5lib|bs4', 50)
222 | 
223 | if __name__ == '__main__':
224 |     diagnose(sys.stdin.read())
225 | 


--------------------------------------------------------------------------------
/BappModules/backports/functools_lru_cache.py:
--------------------------------------------------------------------------------
  1 | from __future__ import absolute_import
  2 | 
  3 | import functools
  4 | from collections import namedtuple
  5 | from threading import RLock
  6 | 
  7 | _CacheInfo = namedtuple("CacheInfo", ["hits", "misses", "maxsize", "currsize"])
  8 | 
  9 | 
 10 | @functools.wraps(functools.update_wrapper)
 11 | def update_wrapper(
 12 |     wrapper,
 13 |     wrapped,
 14 |     assigned=functools.WRAPPER_ASSIGNMENTS,
 15 |     updated=functools.WRAPPER_UPDATES,
 16 | ):
 17 |     """
 18 |     Patch two bugs in functools.update_wrapper.
 19 |     """
 20 |     # workaround for http://bugs.python.org/issue3445
 21 |     assigned = tuple(attr for attr in assigned if hasattr(wrapped, attr))
 22 |     wrapper = functools.update_wrapper(wrapper, wrapped, assigned, updated)
 23 |     # workaround for https://bugs.python.org/issue17482
 24 |     wrapper.__wrapped__ = wrapped
 25 |     return wrapper
 26 | 
 27 | 
 28 | class _HashedSeq(list):
 29 |     __slots__ = 'hashvalue'
 30 | 
 31 |     def __init__(self, tup, hash=hash):
 32 |         self[:] = tup
 33 |         self.hashvalue = hash(tup)
 34 | 
 35 |     def __hash__(self):
 36 |         return self.hashvalue
 37 | 
 38 | 
 39 | def _make_key(
 40 |     args,
 41 |     kwds,
 42 |     typed,
 43 |     kwd_mark=(object(),),
 44 |     fasttypes=set([int, str, frozenset, type(None)]),
 45 |     sorted=sorted,
 46 |     tuple=tuple,
 47 |     type=type,
 48 |     len=len,
 49 | ):
 50 |     'Make a cache key from optionally typed positional and keyword arguments'
 51 |     key = args
 52 |     if kwds:
 53 |         sorted_items = sorted(kwds.items())
 54 |         key += kwd_mark
 55 |         for item in sorted_items:
 56 |             key += item
 57 |     if typed:
 58 |         key += tuple(type(v) for v in args)
 59 |         if kwds:
 60 |             key += tuple(type(v) for k, v in sorted_items)
 61 |     elif len(key) == 1 and type(key[0]) in fasttypes:
 62 |         return key[0]
 63 |     return _HashedSeq(key)
 64 | 
 65 | 
 66 | def lru_cache(maxsize=100, typed=False):
 67 |     """Least-recently-used cache decorator.
 68 | 
 69 |     If *maxsize* is set to None, the LRU features are disabled and the cache
 70 |     can grow without bound.
 71 | 
 72 |     If *typed* is True, arguments of different types will be cached separately.
 73 |     For example, f(3.0) and f(3) will be treated as distinct calls with
 74 |     distinct results.
 75 | 
 76 |     Arguments to the cached function must be hashable.
 77 | 
 78 |     View the cache statistics named tuple (hits, misses, maxsize, currsize) with
 79 |     f.cache_info().  Clear the cache and statistics with f.cache_clear().
 80 |     Access the underlying function with f.__wrapped__.
 81 | 
 82 |     See:  http://en.wikipedia.org/wiki/Cache_algorithms#Least_Recently_Used
 83 | 
 84 |     """
 85 | 
 86 |     # Users should only access the lru_cache through its public API:
 87 |     #       cache_info, cache_clear, and f.__wrapped__
 88 |     # The internals of the lru_cache are encapsulated for thread safety and
 89 |     # to allow the implementation to change (including a possible C version).
 90 | 
 91 |     def decorating_function(user_function):
 92 | 
 93 |         cache = dict()
 94 |         stats = [0, 0]  # make statistics updateable non-locally
 95 |         HITS, MISSES = 0, 1  # names for the stats fields
 96 |         make_key = _make_key
 97 |         cache_get = cache.get  # bound method to lookup key or return None
 98 |         _len = len  # localize the global len() function
 99 |         lock = RLock()  # because linkedlist updates aren't threadsafe
100 |         root = []  # root of the circular doubly linked list
101 |         root[:] = [root, root, None, None]  # initialize by pointing to self
102 |         nonlocal_root = [root]  # make updateable non-locally
103 |         PREV, NEXT, KEY, RESULT = 0, 1, 2, 3  # names for the link fields
104 | 
105 |         if maxsize == 0:
106 | 
107 |             def wrapper(*args, **kwds):
108 |                 # no caching, just do a statistics update after a successful call
109 |                 result = user_function(*args, **kwds)
110 |                 stats[MISSES] += 1
111 |                 return result
112 | 
113 |         elif maxsize is None:
114 | 
115 |             def wrapper(*args, **kwds):
116 |                 # simple caching without ordering or size limit
117 |                 key = make_key(args, kwds, typed)
118 |                 result = cache_get(
119 |                     key, root
120 |                 )  # root used here as a unique not-found sentinel
121 |                 if result is not root:
122 |                     stats[HITS] += 1
123 |                     return result
124 |                 result = user_function(*args, **kwds)
125 |                 cache[key] = result
126 |                 stats[MISSES] += 1
127 |                 return result
128 | 
129 |         else:
130 | 
131 |             def wrapper(*args, **kwds):
132 |                 # size limited caching that tracks accesses by recency
133 |                 key = make_key(args, kwds, typed) if kwds or typed else args
134 |                 with lock:
135 |                     link = cache_get(key)
136 |                     if link is not None:
137 |                         # record recent use of the key by moving it
138 |                         # to the front of the list
139 |                         root, = nonlocal_root
140 |                         link_prev, link_next, key, result = link
141 |                         link_prev[NEXT] = link_next
142 |                         link_next[PREV] = link_prev
143 |                         last = root[PREV]
144 |                         last[NEXT] = root[PREV] = link
145 |                         link[PREV] = last
146 |                         link[NEXT] = root
147 |                         stats[HITS] += 1
148 |                         return result
149 |                 result = user_function(*args, **kwds)
150 |                 with lock:
151 |                     root, = nonlocal_root
152 |                     if key in cache:
153 |                         # getting here means that this same key was added to the
154 |                         # cache while the lock was released.  since the link
155 |                         # update is already done, we need only return the
156 |                         # computed result and update the count of misses.
157 |                         pass
158 |                     elif _len(cache) >= maxsize:
159 |                         # use the old root to store the new key and result
160 |                         oldroot = root
161 |                         oldroot[KEY] = key
162 |                         oldroot[RESULT] = result
163 |                         # empty the oldest link and make it the new root
164 |                         root = nonlocal_root[0] = oldroot[NEXT]
165 |                         oldkey = root[KEY]
166 |                         root[KEY] = root[RESULT] = None
167 |                         # now update the cache dictionary for the new links
168 |                         del cache[oldkey]
169 |                         cache[key] = oldroot
170 |                     else:
171 |                         # put result in a new link at the front of the list
172 |                         last = root[PREV]
173 |                         link = [last, root, key, result]
174 |                         last[NEXT] = root[PREV] = cache[key] = link
175 |                     stats[MISSES] += 1
176 |                 return result
177 | 
178 |         def cache_info():
179 |             """Report cache statistics"""
180 |             with lock:
181 |                 return _CacheInfo(stats[HITS], stats[MISSES], maxsize, len(cache))
182 | 
183 |         def cache_clear():
184 |             """Clear the cache and cache statistics"""
185 |             with lock:
186 |                 cache.clear()
187 |                 root = nonlocal_root[0]
188 |                 root[:] = [root, root, None, None]
189 |                 stats[:] = [0, 0]
190 | 
191 |         wrapper.__wrapped__ = user_function
192 |         wrapper.cache_info = cache_info
193 |         wrapper.cache_clear = cache_clear
194 |         return update_wrapper(wrapper, user_function)
195 | 
196 |     return decorating_function
197 | 


--------------------------------------------------------------------------------
/BappModules/jsbeautifier/core/options.py:
--------------------------------------------------------------------------------
  1 | # The MIT License (MIT)
  2 | #
  3 | # Copyright (c) 2007-2018 Einar Lielmanis, Liam Newman, and contributors.
  4 | #
  5 | # Permission is hereby granted, free of charge, to any person
  6 | # obtaining a copy of this software and associated documentation files
  7 | # (the "Software"), to deal in the Software without restriction,
  8 | # including without limitation the rights to use, copy, modify, merge,
  9 | # publish, distribute, sublicense, and/or sell copies of the Software,
 10 | # and to permit persons to whom the Software is furnished to do so,
 11 | # subject to the following conditions:
 12 | #
 13 | # The above copyright notice and this permission notice shall be
 14 | # included in all copies or substantial portions of the Software.
 15 | #
 16 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
 17 | # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
 18 | # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
 19 | # NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
 20 | # BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
 21 | # ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
 22 | # CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 23 | # SOFTWARE.
 24 | 
 25 | import copy
 26 | import re
 27 | from collections import namedtuple
 28 | 
 29 | 
 30 | class Options:
 31 |     def __init__(self, options=None, merge_child_field=None):
 32 |         self.css = None
 33 |         self.js = None
 34 |         self.html = None
 35 | 
 36 |         self.raw_options = _mergeOpts(options, merge_child_field)
 37 | 
 38 |         # Support passing the source text back with no change
 39 |         self.disabled = self._get_boolean('disabled')
 40 | 
 41 |         self.eol = self._get_characters('eol', 'auto')
 42 |         self.end_with_newline = self._get_boolean('end_with_newline')
 43 |         self.indent_size = self._get_number('indent_size', 4)
 44 |         self.indent_char = self._get_characters('indent_char', ' ')
 45 |         self.indent_level = self._get_number('indent_level')
 46 | 
 47 |         self.preserve_newlines = self._get_boolean('preserve_newlines', True)
 48 |         self.max_preserve_newlines = self._get_number(
 49 |             'max_preserve_newlines', 32786)
 50 | 
 51 |         if not self.preserve_newlines:
 52 |             self.max_preserve_newlines = 0
 53 | 
 54 |         self.indent_with_tabs = self._get_boolean(
 55 |             'indent_with_tabs', self.indent_char == '\t')
 56 |         if self.indent_with_tabs:
 57 |             self.indent_char = '\t'
 58 | 
 59 |             # indent_size behavior changed after 1.8.6
 60 |             # It used to be that indent_size would be
 61 |             # set to 1 for indent_with_tabs. That is no longer needed and
 62 |             # actually doesn't make sense - why not use spaces? Further,
 63 |             # that might produce unexpected behavior - tabs being used
 64 |             # for single-column alignment. So, when indent_with_tabs is true
 65 |             # and indent_size is 1, reset indent_size to 4.
 66 |             if self.indent_size == 1:
 67 |                 self.indent_size = 4
 68 | 
 69 |         # Backwards compat with 1.3.x
 70 |         self.wrap_line_length = self._get_number(
 71 |             'wrap_line_length', self._get_number('max_char'))
 72 | 
 73 |         self.indent_empty_lines = self._get_boolean('indent_empty_lines')
 74 | 
 75 | 
 76 |         # valid templating languages ['django', 'erb', 'handlebars', 'php']
 77 |         # For now, 'auto' = all off for javascript, all on for html (and inline javascript).
 78 |         # other values ignored
 79 |         self.templating = self._get_selection_list('templating',
 80 |             ['auto', 'none', 'django', 'erb', 'handlebars', 'php'], ['auto'])
 81 | 
 82 | 
 83 |     def _get_array(self, name, default_value=[]):
 84 |         option_value = getattr(self.raw_options, name, default_value)
 85 |         result = []
 86 |         if isinstance(option_value, list):
 87 |             result = copy.copy(option_value)
 88 |         elif isinstance(option_value, str):
 89 |             result = re.compile(r"[^a-zA-Z0-9_/\-]+").split(option_value)
 90 | 
 91 |         return result
 92 | 
 93 |     def _get_boolean(self, name, default_value=False):
 94 |         option_value = getattr(self.raw_options, name, default_value)
 95 |         result = False
 96 |         try:
 97 |             result = bool(option_value)
 98 |         except ValueError:
 99 |             pass
100 | 
101 |         return result
102 | 
103 |     def _get_characters(self, name, default_value=''):
104 |         option_value = getattr(self.raw_options, name, default_value)
105 |         result = ''
106 |         if isinstance(option_value, str):
107 |             result = option_value.replace('\\r', '\r').replace(
108 |                 '\\n', '\n').replace('\\t', '\t')
109 | 
110 |         return result
111 | 
112 |     def _get_number(self, name, default_value=0):
113 |         option_value = getattr(self.raw_options, name, default_value)
114 |         result = 0
115 |         try:
116 |             result = int(option_value)
117 |         except ValueError:
118 |             pass
119 | 
120 |         return result
121 | 
122 |     def _get_selection(self, name, selection_list, default_value=None):
123 |         result = self._get_selection_list(name, selection_list, default_value)
124 |         if len(result) != 1:
125 |             raise ValueError(
126 |                 "Invalid Option Value: The option '" + name + "' can only be one of the following values:\n" +
127 |                 str(selection_list) +
128 |                 "\nYou passed in: '" +
129 |                 str(getattr(self.raw_options, name, None)) +
130 |                 "'")
131 | 
132 |         return result[0]
133 | 
134 |     def _get_selection_list(self, name, selection_list, default_value=None):
135 |         if not selection_list:
136 |             raise ValueError("Selection list cannot be empty.")
137 | 
138 |         default_value = default_value or [selection_list[0]]
139 | 
140 |         if not self._is_valid_selection(default_value, selection_list):
141 |             raise ValueError("Invalid Default Value!")
142 | 
143 |         result = self._get_array(name, default_value)
144 |         if not self._is_valid_selection(result, selection_list):
145 |             raise ValueError(
146 |                 "Invalid Option Value: The option '" + name + "' can contain only the following values:\n" +
147 |                 str(selection_list) +
148 |                 "\nYou passed in: '" +
149 |                 str(getattr(self.raw_options, name, None)) +
150 |                 "'")
151 | 
152 |         return result
153 | 
154 |     def _is_valid_selection(self, result, selection_list):
155 |         if len(result) == 0 or len(selection_list) == 0:
156 |             return False
157 | 
158 |         for item in result:
159 |             if item not in selection_list:
160 |                 return False
161 | 
162 |         return True
163 | 
164 | 
165 | # merges child options up with the parent options object
166 | # Example: obj = {a: 1, b: {a: 2}}
167 | #          mergeOpts(obj, 'b')
168 | #
169 | #          Returns: {a: 2}
170 | 
171 | 
172 | def _mergeOpts(options, childFieldName):
173 |     if options is None:
174 |         options = {}
175 | 
176 |     if isinstance(options, tuple):
177 |         options = dict(options)
178 | 
179 |     options = _normalizeOpts(options)
180 |     finalOpts = copy.copy(options)
181 |     if isinstance(options, dict):
182 |         local = finalOpts.get(childFieldName, None)
183 |         if local:
184 |             del(finalOpts[childFieldName])
185 |             for key in local:
186 |                 finalOpts[key] = local[key]
187 |         finalOpts = namedtuple("CustomOptions", finalOpts.keys())(
188 |             *finalOpts.values())
189 | 
190 |     if isinstance(options, Options):
191 |         local = getattr(finalOpts, childFieldName, None)
192 |         if local:
193 |             delattr(finalOpts, childFieldName)
194 |             for key in local:
195 |                 setattr(finalOpts, key, local[key])
196 | 
197 |     return finalOpts
198 | 
199 | 
200 | def _normalizeOpts(options):
201 |     convertedOpts = copy.copy(options)
202 |     if isinstance(convertedOpts, dict):
203 |         option_keys = list(convertedOpts.keys())
204 |         for key in option_keys:
205 |             if '-' in key:
206 |                 del convertedOpts[key]
207 |                 convertedOpts[key.replace('-', '_')] = options[key]
208 |     else:
209 |         option_keys = list(getattr(convertedOpts, '__dict__', {}))
210 |         for key in option_keys:
211 |             if '-' in key:
212 |                 delattr(convertedOpts, key)
213 |                 setattr(convertedOpts, key.replace(
214 |                     '-', '_'), getattr(options, key, None))
215 | 
216 |     return convertedOpts
217 | 


--------------------------------------------------------------------------------
/BappModules/jsbeautifier/javascript/acorn.py:
--------------------------------------------------------------------------------
 1 | import re
 2 | 
 3 | # This section of code was translated to python from acorn (javascript).
 4 | #
 5 | # Acorn was written by Marijn Haverbeke and released under an MIT
 6 | # license. The Unicode regexps (for identifiers and whitespace) were
 7 | # taken from [Esprima](http://esprima.org) by Ariya Hidayat.
 8 | #
 9 | # Git repositories for Acorn are available at
10 | #
11 | #     http://marijnhaverbeke.nl/git/acorn
12 | #     https://github.com/marijnh/acorn.git
13 | 
14 | # This is not pretty, but given how we did the version import
15 | # it is the only way to do this without having setup.py fail on a missing
16 | # six dependency.
17 | six = __import__("six")
18 | 
19 | # ## Character categories
20 | 
21 | # acorn used char codes to squeeze the last bit of performance out
22 | # Beautifier is okay without that, so we're using regex
23 | # permit $ (36) and @ (64). @ is used in ES7 decorators.
24 | # 65 through 91 are uppercase letters.
25 | # permit _ (95).
26 | # 97 through 123 are lowercase letters.
27 | _baseASCIIidentifierStartChars = six.u(r"\x24\x40\x41-\x5a\x5f\x61-\x7a")
28 | 
29 | # inside an identifier @ is not allowed but 0-9 are.
30 | _baseASCIIidentifierChars = six.u(r"\x24\x30-\x39\x41-\x5a\x5f\x61-\x7a")
31 | 
32 | # Big ugly regular expressions that match characters in the
33 | # whitespace, identifier, and identifier-start categories. These
34 | # are only applied when a character is found to actually have a
35 | # code point above 128.
36 | # IMPORTANT: These strings must be run through six to handle \u chars
37 | _nonASCIIidentifierStartChars = six.u(r"\xaa\xb5\xba\xc0-\xd6\xd8-\xf6\xf8-\u02c1\u02c6-\u02d1\u02e0-\u02e4\u02ec\u02ee\u0370-\u0374\u0376\u0377\u037a-\u037d\u0386\u0388-\u038a\u038c\u038e-\u03a1\u03a3-\u03f5\u03f7-\u0481\u048a-\u0527\u0531-\u0556\u0559\u0561-\u0587\u05d0-\u05ea\u05f0-\u05f2\u0620-\u064a\u066e\u066f\u0671-\u06d3\u06d5\u06e5\u06e6\u06ee\u06ef\u06fa-\u06fc\u06ff\u0710\u0712-\u072f\u074d-\u07a5\u07b1\u07ca-\u07ea\u07f4\u07f5\u07fa\u0800-\u0815\u081a\u0824\u0828\u0840-\u0858\u08a0\u08a2-\u08ac\u0904-\u0939\u093d\u0950\u0958-\u0961\u0971-\u0977\u0979-\u097f\u0985-\u098c\u098f\u0990\u0993-\u09a8\u09aa-\u09b0\u09b2\u09b6-\u09b9\u09bd\u09ce\u09dc\u09dd\u09df-\u09e1\u09f0\u09f1\u0a05-\u0a0a\u0a0f\u0a10\u0a13-\u0a28\u0a2a-\u0a30\u0a32\u0a33\u0a35\u0a36\u0a38\u0a39\u0a59-\u0a5c\u0a5e\u0a72-\u0a74\u0a85-\u0a8d\u0a8f-\u0a91\u0a93-\u0aa8\u0aaa-\u0ab0\u0ab2\u0ab3\u0ab5-\u0ab9\u0abd\u0ad0\u0ae0\u0ae1\u0b05-\u0b0c\u0b0f\u0b10\u0b13-\u0b28\u0b2a-\u0b30\u0b32\u0b33\u0b35-\u0b39\u0b3d\u0b5c\u0b5d\u0b5f-\u0b61\u0b71\u0b83\u0b85-\u0b8a\u0b8e-\u0b90\u0b92-\u0b95\u0b99\u0b9a\u0b9c\u0b9e\u0b9f\u0ba3\u0ba4\u0ba8-\u0baa\u0bae-\u0bb9\u0bd0\u0c05-\u0c0c\u0c0e-\u0c10\u0c12-\u0c28\u0c2a-\u0c33\u0c35-\u0c39\u0c3d\u0c58\u0c59\u0c60\u0c61\u0c85-\u0c8c\u0c8e-\u0c90\u0c92-\u0ca8\u0caa-\u0cb3\u0cb5-\u0cb9\u0cbd\u0cde\u0ce0\u0ce1\u0cf1\u0cf2\u0d05-\u0d0c\u0d0e-\u0d10\u0d12-\u0d3a\u0d3d\u0d4e\u0d60\u0d61\u0d7a-\u0d7f\u0d85-\u0d96\u0d9a-\u0db1\u0db3-\u0dbb\u0dbd\u0dc0-\u0dc6\u0e01-\u0e30\u0e32\u0e33\u0e40-\u0e46\u0e81\u0e82\u0e84\u0e87\u0e88\u0e8a\u0e8d\u0e94-\u0e97\u0e99-\u0e9f\u0ea1-\u0ea3\u0ea5\u0ea7\u0eaa\u0eab\u0ead-\u0eb0\u0eb2\u0eb3\u0ebd\u0ec0-\u0ec4\u0ec6\u0edc-\u0edf\u0f00\u0f40-\u0f47\u0f49-\u0f6c\u0f88-\u0f8c\u1000-\u102a\u103f\u1050-\u1055\u105a-\u105d\u1061\u1065\u1066\u106e-\u1070\u1075-\u1081\u108e\u10a0-\u10c5\u10c7\u10cd\u10d0-\u10fa\u10fc-\u1248\u124a-\u124d\u1250-\u1256\u1258\u125a-\u125d\u1260-\u1288\u128a-\u128d\u1290-\u12b0\u12b2-\u12b5\u12b8-\u12be\u12c0\u12c2-\u12c5\u12c8-\u12d6\u12d8-\u1310\u1312-\u1315\u1318-\u135a\u1380-\u138f\u13a0-\u13f4\u1401-\u166c\u166f-\u167f\u1681-\u169a\u16a0-\u16ea\u16ee-\u16f0\u1700-\u170c\u170e-\u1711\u1720-\u1731\u1740-\u1751\u1760-\u176c\u176e-\u1770\u1780-\u17b3\u17d7\u17dc\u1820-\u1877\u1880-\u18a8\u18aa\u18b0-\u18f5\u1900-\u191c\u1950-\u196d\u1970-\u1974\u1980-\u19ab\u19c1-\u19c7\u1a00-\u1a16\u1a20-\u1a54\u1aa7\u1b05-\u1b33\u1b45-\u1b4b\u1b83-\u1ba0\u1bae\u1baf\u1bba-\u1be5\u1c00-\u1c23\u1c4d-\u1c4f\u1c5a-\u1c7d\u1ce9-\u1cec\u1cee-\u1cf1\u1cf5\u1cf6\u1d00-\u1dbf\u1e00-\u1f15\u1f18-\u1f1d\u1f20-\u1f45\u1f48-\u1f4d\u1f50-\u1f57\u1f59\u1f5b\u1f5d\u1f5f-\u1f7d\u1f80-\u1fb4\u1fb6-\u1fbc\u1fbe\u1fc2-\u1fc4\u1fc6-\u1fcc\u1fd0-\u1fd3\u1fd6-\u1fdb\u1fe0-\u1fec\u1ff2-\u1ff4\u1ff6-\u1ffc\u2071\u207f\u2090-\u209c\u2102\u2107\u210a-\u2113\u2115\u2119-\u211d\u2124\u2126\u2128\u212a-\u212d\u212f-\u2139\u213c-\u213f\u2145-\u2149\u214e\u2160-\u2188\u2c00-\u2c2e\u2c30-\u2c5e\u2c60-\u2ce4\u2ceb-\u2cee\u2cf2\u2cf3\u2d00-\u2d25\u2d27\u2d2d\u2d30-\u2d67\u2d6f\u2d80-\u2d96\u2da0-\u2da6\u2da8-\u2dae\u2db0-\u2db6\u2db8-\u2dbe\u2dc0-\u2dc6\u2dc8-\u2dce\u2dd0-\u2dd6\u2dd8-\u2dde\u2e2f\u3005-\u3007\u3021-\u3029\u3031-\u3035\u3038-\u303c\u3041-\u3096\u309d-\u309f\u30a1-\u30fa\u30fc-\u30ff\u3105-\u312d\u3131-\u318e\u31a0-\u31ba\u31f0-\u31ff\u3400-\u4db5\u4e00-\u9fcc\ua000-\ua48c\ua4d0-\ua4fd\ua500-\ua60c\ua610-\ua61f\ua62a\ua62b\ua640-\ua66e\ua67f-\ua697\ua6a0-\ua6ef\ua717-\ua71f\ua722-\ua788\ua78b-\ua78e\ua790-\ua793\ua7a0-\ua7aa\ua7f8-\ua801\ua803-\ua805\ua807-\ua80a\ua80c-\ua822\ua840-\ua873\ua882-\ua8b3\ua8f2-\ua8f7\ua8fb\ua90a-\ua925\ua930-\ua946\ua960-\ua97c\ua984-\ua9b2\ua9cf\uaa00-\uaa28\uaa40-\uaa42\uaa44-\uaa4b\uaa60-\uaa76\uaa7a\uaa80-\uaaaf\uaab1\uaab5\uaab6\uaab9-\uaabd\uaac0\uaac2\uaadb-\uaadd\uaae0-\uaaea\uaaf2-\uaaf4\uab01-\uab06\uab09-\uab0e\uab11-\uab16\uab20-\uab26\uab28-\uab2e\uabc0-\uabe2\uac00-\ud7a3\ud7b0-\ud7c6\ud7cb-\ud7fb\uf900-\ufa6d\ufa70-\ufad9\ufb00-\ufb06\ufb13-\ufb17\ufb1d\ufb1f-\ufb28\ufb2a-\ufb36\ufb38-\ufb3c\ufb3e\ufb40\ufb41\ufb43\ufb44\ufb46-\ufbb1\ufbd3-\ufd3d\ufd50-\ufd8f\ufd92-\ufdc7\ufdf0-\ufdfb\ufe70-\ufe74\ufe76-\ufefc\uff21-\uff3a\uff41-\uff5a\uff66-\uffbe\uffc2-\uffc7\uffca-\uffcf\uffd2-\uffd7\uffda-\uffdc")
38 | _nonASCIIidentifierChars = six.u(r"\u0300-\u036f\u0483-\u0487\u0591-\u05bd\u05bf\u05c1\u05c2\u05c4\u05c5\u05c7\u0610-\u061a\u0620-\u0649\u0672-\u06d3\u06e7-\u06e8\u06fb-\u06fc\u0730-\u074a\u0800-\u0814\u081b-\u0823\u0825-\u0827\u0829-\u082d\u0840-\u0857\u08e4-\u08fe\u0900-\u0903\u093a-\u093c\u093e-\u094f\u0951-\u0957\u0962-\u0963\u0966-\u096f\u0981-\u0983\u09bc\u09be-\u09c4\u09c7\u09c8\u09d7\u09df-\u09e0\u0a01-\u0a03\u0a3c\u0a3e-\u0a42\u0a47\u0a48\u0a4b-\u0a4d\u0a51\u0a66-\u0a71\u0a75\u0a81-\u0a83\u0abc\u0abe-\u0ac5\u0ac7-\u0ac9\u0acb-\u0acd\u0ae2-\u0ae3\u0ae6-\u0aef\u0b01-\u0b03\u0b3c\u0b3e-\u0b44\u0b47\u0b48\u0b4b-\u0b4d\u0b56\u0b57\u0b5f-\u0b60\u0b66-\u0b6f\u0b82\u0bbe-\u0bc2\u0bc6-\u0bc8\u0bca-\u0bcd\u0bd7\u0be6-\u0bef\u0c01-\u0c03\u0c46-\u0c48\u0c4a-\u0c4d\u0c55\u0c56\u0c62-\u0c63\u0c66-\u0c6f\u0c82\u0c83\u0cbc\u0cbe-\u0cc4\u0cc6-\u0cc8\u0cca-\u0ccd\u0cd5\u0cd6\u0ce2-\u0ce3\u0ce6-\u0cef\u0d02\u0d03\u0d46-\u0d48\u0d57\u0d62-\u0d63\u0d66-\u0d6f\u0d82\u0d83\u0dca\u0dcf-\u0dd4\u0dd6\u0dd8-\u0ddf\u0df2\u0df3\u0e34-\u0e3a\u0e40-\u0e45\u0e50-\u0e59\u0eb4-\u0eb9\u0ec8-\u0ecd\u0ed0-\u0ed9\u0f18\u0f19\u0f20-\u0f29\u0f35\u0f37\u0f39\u0f41-\u0f47\u0f71-\u0f84\u0f86-\u0f87\u0f8d-\u0f97\u0f99-\u0fbc\u0fc6\u1000-\u1029\u1040-\u1049\u1067-\u106d\u1071-\u1074\u1082-\u108d\u108f-\u109d\u135d-\u135f\u170e-\u1710\u1720-\u1730\u1740-\u1750\u1772\u1773\u1780-\u17b2\u17dd\u17e0-\u17e9\u180b-\u180d\u1810-\u1819\u1920-\u192b\u1930-\u193b\u1951-\u196d\u19b0-\u19c0\u19c8-\u19c9\u19d0-\u19d9\u1a00-\u1a15\u1a20-\u1a53\u1a60-\u1a7c\u1a7f-\u1a89\u1a90-\u1a99\u1b46-\u1b4b\u1b50-\u1b59\u1b6b-\u1b73\u1bb0-\u1bb9\u1be6-\u1bf3\u1c00-\u1c22\u1c40-\u1c49\u1c5b-\u1c7d\u1cd0-\u1cd2\u1d00-\u1dbe\u1e01-\u1f15\u200c\u200d\u203f\u2040\u2054\u20d0-\u20dc\u20e1\u20e5-\u20f0\u2d81-\u2d96\u2de0-\u2dff\u3021-\u3028\u3099\u309a\ua640-\ua66d\ua674-\ua67d\ua69f\ua6f0-\ua6f1\ua7f8-\ua800\ua806\ua80b\ua823-\ua827\ua880-\ua881\ua8b4-\ua8c4\ua8d0-\ua8d9\ua8f3-\ua8f7\ua900-\ua909\ua926-\ua92d\ua930-\ua945\ua980-\ua983\ua9b3-\ua9c0\uaa00-\uaa27\uaa40-\uaa41\uaa4c-\uaa4d\uaa50-\uaa59\uaa7b\uaae0-\uaae9\uaaf2-\uaaf3\uabc0-\uabe1\uabec\uabed\uabf0-\uabf9\ufb20-\ufb28\ufe00-\ufe0f\ufe20-\ufe26\ufe33\ufe34\ufe4d-\ufe4f\uff10-\uff19\uff3f")
39 | #_nonASCIIidentifierStart = re.compile("[" + _nonASCIIidentifierStartChars + "]")
40 | #_nonASCIIidentifier = re.compile("[" + _nonASCIIidentifierStartChars + _nonASCIIidentifierChars + "]")
41 | 
42 | _identifierStart = six.u(r"(?:\\u[0-9a-fA-F]{4}|[") + \
43 |     _baseASCIIidentifierStartChars + \
44 |     _nonASCIIidentifierStartChars + \
45 |     six.u("])")
46 | _identifierChars = six.u(r"(?:\\u[0-9a-fA-F]{4}|[") + \
47 |     _baseASCIIidentifierChars + \
48 |     _nonASCIIidentifierStartChars + \
49 |     _nonASCIIidentifierChars + \
50 |     six.u("])*")
51 | 
52 | identifier = re.compile(_identifierStart + _identifierChars)
53 | 
54 | identifierStart = re.compile(_identifierStart)
55 | identifierMatch = re.compile(six.u(r"(?:\\u[0-9a-fA-F]{4}|[") + \
56 |     _baseASCIIidentifierChars + \
57 |     _nonASCIIidentifierStartChars + \
58 |     _nonASCIIidentifierChars + \
59 |     six.u("])+"))
60 | 
61 | _nonASCIIwhitespace = re.compile(
62 |     six.u(r"[\u1680\u180e\u2000-\u200a\u202f\u205f\u3000\ufeff]"))
63 | 
64 | # Whether a single character denotes a newline.
65 | # IMPORTANT: This string must be run through six to handle \u chars
66 | newline = re.compile(six.u(r"[\n\r\u2028\u2029]"))
67 | 
68 | # Matches a whole line break (where CRLF is considered a single
69 | # line break). Used to count lines.
70 | 
71 | # in javascript, these two differ
72 | # in python they are the same, different methods are called on them
73 | # IMPORTANT: This string must be run through six to handle \u chars
74 | lineBreak = re.compile(six.u(r"\r\n|[\n\r\u2028\u2029]"))
75 | allLineBreaks = lineBreak


--------------------------------------------------------------------------------
/BappModules/soupsieve/css_types.py:
--------------------------------------------------------------------------------
  1 | """CSS selector structure items."""
  2 | from __future__ import unicode_literals
  3 | from . import util
  4 | 
  5 | __all__ = (
  6 |     'Selector',
  7 |     'SelectorNull',
  8 |     'SelectorTag',
  9 |     'SelectorAttribute',
 10 |     'SelectorContains',
 11 |     'SelectorNth',
 12 |     'SelectorLang',
 13 |     'SelectorList',
 14 |     'Namespaces',
 15 |     'CustomSelectors'
 16 | )
 17 | 
 18 | 
 19 | SEL_EMPTY = 0x1
 20 | SEL_ROOT = 0x2
 21 | SEL_DEFAULT = 0x4
 22 | SEL_INDETERMINATE = 0x8
 23 | SEL_SCOPE = 0x10
 24 | SEL_DIR_LTR = 0x20
 25 | SEL_DIR_RTL = 0x40
 26 | SEL_IN_RANGE = 0x80
 27 | SEL_OUT_OF_RANGE = 0x100
 28 | SEL_DEFINED = 0x200
 29 | SEL_PLACEHOLDER_SHOWN = 0x400
 30 | 
 31 | 
 32 | class Immutable(object):
 33 |     """Immutable."""
 34 | 
 35 |     __slots__ = ('_hash',)
 36 | 
 37 |     def __init__(self, **kwargs):
 38 |         """Initialize."""
 39 | 
 40 |         temp = []
 41 |         for k, v in kwargs.items():
 42 |             temp.append(type(v))
 43 |             temp.append(v)
 44 |             super(Immutable, self).__setattr__(k, v)
 45 |         super(Immutable, self).__setattr__('_hash', hash(tuple(temp)))
 46 | 
 47 |     @classmethod
 48 |     def __base__(cls):
 49 |         """Get base class."""
 50 | 
 51 |         return cls
 52 | 
 53 |     def __eq__(self, other):
 54 |         """Equal."""
 55 | 
 56 |         return (
 57 |             isinstance(other, self.__base__()) and
 58 |             all([getattr(other, key) == getattr(self, key) for key in self.__slots__ if key != '_hash'])
 59 |         )
 60 | 
 61 |     def __ne__(self, other):
 62 |         """Equal."""
 63 | 
 64 |         return (
 65 |             not isinstance(other, self.__base__()) or
 66 |             any([getattr(other, key) != getattr(self, key) for key in self.__slots__ if key != '_hash'])
 67 |         )
 68 | 
 69 |     def __hash__(self):
 70 |         """Hash."""
 71 | 
 72 |         return self._hash
 73 | 
 74 |     def __setattr__(self, name, value):
 75 |         """Prevent mutability."""
 76 | 
 77 |         raise AttributeError("'{}' is immutable".format(self.__class__.__name__))
 78 | 
 79 |     def __repr__(self):  # pragma: no cover
 80 |         """Representation."""
 81 | 
 82 |         return "{}({})".format(
 83 |             self.__base__(), ', '.join(["{}={!r}".format(k, getattr(self, k)) for k in self.__slots__[:-1]])
 84 |         )
 85 | 
 86 |     __str__ = __repr__
 87 | 
 88 | 
 89 | class ImmutableDict(util.Mapping):
 90 |     """Hashable, immutable dictionary."""
 91 | 
 92 |     def __init__(self, *args, **kwargs):
 93 |         """Initialize."""
 94 | 
 95 |         arg = args[0] if args else kwargs
 96 |         is_dict = isinstance(arg, dict)
 97 |         if (
 98 |             is_dict and not all([isinstance(v, util.Hashable) for v in arg.values()]) or
 99 |             not is_dict and not all([isinstance(k, util.Hashable) and isinstance(v, util.Hashable) for k, v in arg])
100 |         ):
101 |             raise TypeError('All values must be hashable')
102 | 
103 |         self._d = dict(*args, **kwargs)
104 |         self._hash = hash(tuple([(type(x), x, type(y), y) for x, y in sorted(self._d.items())]))
105 | 
106 |     def __iter__(self):
107 |         """Iterator."""
108 | 
109 |         return iter(self._d)
110 | 
111 |     def __len__(self):
112 |         """Length."""
113 | 
114 |         return len(self._d)
115 | 
116 |     def __getitem__(self, key):
117 |         """Get item: `namespace['key']`."""
118 |         return self._d[key]
119 | 
120 |     def __hash__(self):
121 |         """Hash."""
122 | 
123 |         return self._hash
124 | 
125 |     def __repr__(self):  # pragma: no cover
126 |         """Representation."""
127 | 
128 |         return "{!r}".format(self._d)
129 | 
130 |     __str__ = __repr__
131 | 
132 | 
133 | class Namespaces(ImmutableDict):
134 |     """Namespaces."""
135 | 
136 |     def __init__(self, *args, **kwargs):
137 |         """Initialize."""
138 | 
139 |         # If there are arguments, check the first index.
140 |         # `super` should fail if the user gave multiple arguments,
141 |         # so don't bother checking that.
142 |         arg = args[0] if args else kwargs
143 |         is_dict = isinstance(arg, dict)
144 |         if is_dict and not all([isinstance(k, util.string) and isinstance(v, util.string) for k, v in arg.items()]):
145 |             raise TypeError('Namespace keys and values must be Unicode strings')
146 |         elif not is_dict and not all([isinstance(k, util.string) and isinstance(v, util.string) for k, v in arg]):
147 |             raise TypeError('Namespace keys and values must be Unicode strings')
148 | 
149 |         super(Namespaces, self).__init__(*args, **kwargs)
150 | 
151 | 
152 | class CustomSelectors(ImmutableDict):
153 |     """Custom selectors."""
154 | 
155 |     def __init__(self, *args, **kwargs):
156 |         """Initialize."""
157 | 
158 |         # If there are arguments, check the first index.
159 |         # `super` should fail if the user gave multiple arguments,
160 |         # so don't bother checking that.
161 |         arg = args[0] if args else kwargs
162 |         is_dict = isinstance(arg, dict)
163 |         if is_dict and not all([isinstance(k, util.string) and isinstance(v, util.string) for k, v in arg.items()]):
164 |             raise TypeError('CustomSelectors keys and values must be Unicode strings')
165 |         elif not is_dict and not all([isinstance(k, util.string) and isinstance(v, util.string) for k, v in arg]):
166 |             raise TypeError('CustomSelectors keys and values must be Unicode strings')
167 | 
168 |         super(CustomSelectors, self).__init__(*args, **kwargs)
169 | 
170 | 
171 | class Selector(Immutable):
172 |     """Selector."""
173 | 
174 |     __slots__ = (
175 |         'tag', 'ids', 'classes', 'attributes', 'nth', 'selectors',
176 |         'relation', 'rel_type', 'contains', 'lang', 'flags', '_hash'
177 |     )
178 | 
179 |     def __init__(
180 |         self, tag, ids, classes, attributes, nth, selectors,
181 |         relation, rel_type, contains, lang, flags
182 |     ):
183 |         """Initialize."""
184 | 
185 |         super(Selector, self).__init__(
186 |             tag=tag,
187 |             ids=ids,
188 |             classes=classes,
189 |             attributes=attributes,
190 |             nth=nth,
191 |             selectors=selectors,
192 |             relation=relation,
193 |             rel_type=rel_type,
194 |             contains=contains,
195 |             lang=lang,
196 |             flags=flags
197 |         )
198 | 
199 | 
200 | class SelectorNull(Immutable):
201 |     """Null Selector."""
202 | 
203 |     def __init__(self):
204 |         """Initialize."""
205 | 
206 |         super(SelectorNull, self).__init__()
207 | 
208 | 
209 | class SelectorTag(Immutable):
210 |     """Selector tag."""
211 | 
212 |     __slots__ = ("name", "prefix", "_hash")
213 | 
214 |     def __init__(self, name, prefix):
215 |         """Initialize."""
216 | 
217 |         super(SelectorTag, self).__init__(
218 |             name=name,
219 |             prefix=prefix
220 |         )
221 | 
222 | 
223 | class SelectorAttribute(Immutable):
224 |     """Selector attribute rule."""
225 | 
226 |     __slots__ = ("attribute", "prefix", "pattern", "xml_type_pattern", "_hash")
227 | 
228 |     def __init__(self, attribute, prefix, pattern, xml_type_pattern):
229 |         """Initialize."""
230 | 
231 |         super(SelectorAttribute, self).__init__(
232 |             attribute=attribute,
233 |             prefix=prefix,
234 |             pattern=pattern,
235 |             xml_type_pattern=xml_type_pattern
236 |         )
237 | 
238 | 
239 | class SelectorContains(Immutable):
240 |     """Selector contains rule."""
241 | 
242 |     __slots__ = ("text", "_hash")
243 | 
244 |     def __init__(self, text):
245 |         """Initialize."""
246 | 
247 |         super(SelectorContains, self).__init__(
248 |             text=text
249 |         )
250 | 
251 | 
252 | class SelectorNth(Immutable):
253 |     """Selector nth type."""
254 | 
255 |     __slots__ = ("a", "n", "b", "of_type", "last", "selectors", "_hash")
256 | 
257 |     def __init__(self, a, n, b, of_type, last, selectors):
258 |         """Initialize."""
259 | 
260 |         super(SelectorNth, self).__init__(
261 |             a=a,
262 |             n=n,
263 |             b=b,
264 |             of_type=of_type,
265 |             last=last,
266 |             selectors=selectors
267 |         )
268 | 
269 | 
270 | class SelectorLang(Immutable):
271 |     """Selector language rules."""
272 | 
273 |     __slots__ = ("languages", "_hash",)
274 | 
275 |     def __init__(self, languages):
276 |         """Initialize."""
277 | 
278 |         super(SelectorLang, self).__init__(
279 |             languages=tuple(languages)
280 |         )
281 | 
282 |     def __iter__(self):
283 |         """Iterator."""
284 | 
285 |         return iter(self.languages)
286 | 
287 |     def __len__(self):  # pragma: no cover
288 |         """Length."""
289 | 
290 |         return len(self.languages)
291 | 
292 |     def __getitem__(self, index):  # pragma: no cover
293 |         """Get item."""
294 | 
295 |         return self.languages[index]
296 | 
297 | 
298 | class SelectorList(Immutable):
299 |     """Selector list."""
300 | 
301 |     __slots__ = ("selectors", "is_not", "is_html", "_hash")
302 | 
303 |     def __init__(self, selectors=tuple(), is_not=False, is_html=False):
304 |         """Initialize."""
305 | 
306 |         super(SelectorList, self).__init__(
307 |             selectors=tuple(selectors),
308 |             is_not=is_not,
309 |             is_html=is_html
310 |         )
311 | 
312 |     def __iter__(self):
313 |         """Iterator."""
314 | 
315 |         return iter(self.selectors)
316 | 
317 |     def __len__(self):
318 |         """Length."""
319 | 
320 |         return len(self.selectors)
321 | 
322 |     def __getitem__(self, index):
323 |         """Get item."""
324 | 
325 |         return self.selectors[index]
326 | 
327 | 
328 | def _pickle(p):
329 |     return p.__base__(), tuple([getattr(p, s) for s in p.__slots__[:-1]])
330 | 
331 | 
332 | def pickle_register(obj):
333 |     """Allow object to be pickled."""
334 | 
335 |     util.copyreg.pickle(obj, _pickle)
336 | 
337 | 
338 | pickle_register(Selector)
339 | pickle_register(SelectorNull)
340 | pickle_register(SelectorTag)
341 | pickle_register(SelectorAttribute)
342 | pickle_register(SelectorContains)
343 | pickle_register(SelectorNth)
344 | pickle_register(SelectorLang)
345 | pickle_register(SelectorList)
346 | 


--------------------------------------------------------------------------------
/BappModules/bs4/builder/_lxml.py:
--------------------------------------------------------------------------------
  1 | # Use of this source code is governed by the MIT license.
  2 | __license__ = "MIT"
  3 | 
  4 | __all__ = [
  5 |     'LXMLTreeBuilderForXML',
  6 |     'LXMLTreeBuilder',
  7 |     ]
  8 | 
  9 | try:
 10 |     from collections.abc import Callable # Python 3.6
 11 | except ImportError , e:
 12 |     from collections import Callable
 13 | 
 14 | from io import BytesIO
 15 | from StringIO import StringIO
 16 | from lxml import etree
 17 | from bs4.element import (
 18 |     Comment,
 19 |     Doctype,
 20 |     NamespacedAttribute,
 21 |     ProcessingInstruction,
 22 |     XMLProcessingInstruction,
 23 | )
 24 | from bs4.builder import (
 25 |     FAST,
 26 |     HTML,
 27 |     HTMLTreeBuilder,
 28 |     PERMISSIVE,
 29 |     ParserRejectedMarkup,
 30 |     TreeBuilder,
 31 |     XML)
 32 | from bs4.dammit import EncodingDetector
 33 | 
 34 | LXML = 'lxml'
 35 | 
 36 | def _invert(d):
 37 |     "Invert a dictionary."
 38 |     return dict((v,k) for k, v in d.items())
 39 | 
 40 | class LXMLTreeBuilderForXML(TreeBuilder):
 41 |     DEFAULT_PARSER_CLASS = etree.XMLParser
 42 | 
 43 |     is_xml = True
 44 |     processing_instruction_class = XMLProcessingInstruction
 45 | 
 46 |     NAME = "lxml-xml"
 47 |     ALTERNATE_NAMES = ["xml"]
 48 | 
 49 |     # Well, it's permissive by XML parser standards.
 50 |     features = [NAME, LXML, XML, FAST, PERMISSIVE]
 51 | 
 52 |     CHUNK_SIZE = 512
 53 | 
 54 |     # This namespace mapping is specified in the XML Namespace
 55 |     # standard.
 56 |     DEFAULT_NSMAPS = dict(xml='http://www.w3.org/XML/1998/namespace')
 57 | 
 58 |     DEFAULT_NSMAPS_INVERTED = _invert(DEFAULT_NSMAPS)
 59 | 
 60 |     # NOTE: If we parsed Element objects and looked at .sourceline,
 61 |     # we'd be able to see the line numbers from the original document.
 62 |     # But instead we build an XMLParser or HTMLParser object to serve
 63 |     # as the target of parse messages, and those messages don't include
 64 |     # line numbers.
 65 |     
 66 |     def initialize_soup(self, soup):
 67 |         """Let the BeautifulSoup object know about the standard namespace
 68 |         mapping.
 69 |         """
 70 |         super(LXMLTreeBuilderForXML, self).initialize_soup(soup)
 71 |         self._register_namespaces(self.DEFAULT_NSMAPS)
 72 | 
 73 |     def _register_namespaces(self, mapping):
 74 |         """Let the BeautifulSoup object know about namespaces encountered
 75 |         while parsing the document.
 76 | 
 77 |         This might be useful later on when creating CSS selectors.
 78 |         """
 79 |         for key, value in mapping.items():
 80 |             if key and key not in self.soup._namespaces:
 81 |                 # Let the BeautifulSoup object know about a new namespace.
 82 |                 # If there are multiple namespaces defined with the same
 83 |                 # prefix, the first one in the document takes precedence.
 84 |                 self.soup._namespaces[key] = value
 85 | 
 86 |     def default_parser(self, encoding):
 87 |         # This can either return a parser object or a class, which
 88 |         # will be instantiated with default arguments.
 89 |         if self._default_parser is not None:
 90 |             return self._default_parser
 91 |         return etree.XMLParser(
 92 |             target=self, strip_cdata=False, recover=True, encoding=encoding)
 93 | 
 94 |     def parser_for(self, encoding):
 95 |         # Use the default parser.
 96 |         parser = self.default_parser(encoding)
 97 | 
 98 |         if isinstance(parser, Callable):
 99 |             # Instantiate the parser with default arguments
100 |             parser = parser(target=self, strip_cdata=False, encoding=encoding)
101 |         return parser
102 | 
103 |     def __init__(self, parser=None, empty_element_tags=None, **kwargs):
104 |         # TODO: Issue a warning if parser is present but not a
105 |         # callable, since that means there's no way to create new
106 |         # parsers for different encodings.
107 |         self._default_parser = parser
108 |         if empty_element_tags is not None:
109 |             self.empty_element_tags = set(empty_element_tags)
110 |         self.soup = None
111 |         self.nsmaps = [self.DEFAULT_NSMAPS_INVERTED]
112 |         super(LXMLTreeBuilderForXML, self).__init__(**kwargs)
113 |         
114 |     def _getNsTag(self, tag):
115 |         # Split the namespace URL out of a fully-qualified lxml tag
116 |         # name. Copied from lxml's src/lxml/sax.py.
117 |         if tag[0] == '{':
118 |             return tuple(tag[1:].split('}', 1))
119 |         else:
120 |             return (None, tag)
121 | 
122 |     def prepare_markup(self, markup, user_specified_encoding=None,
123 |                        exclude_encodings=None,
124 |                        document_declared_encoding=None):
125 |         """
126 |         :yield: A series of 4-tuples.
127 |          (markup, encoding, declared encoding,
128 |           has undergone character replacement)
129 | 
130 |         Each 4-tuple represents a strategy for parsing the document.
131 |         """
132 |         # Instead of using UnicodeDammit to convert the bytestring to
133 |         # Unicode using different encodings, use EncodingDetector to
134 |         # iterate over the encodings, and tell lxml to try to parse
135 |         # the document as each one in turn.
136 |         is_html = not self.is_xml
137 |         if is_html:
138 |             self.processing_instruction_class = ProcessingInstruction
139 |         else:
140 |             self.processing_instruction_class = XMLProcessingInstruction
141 | 
142 |         if isinstance(markup, unicode):
143 |             # We were given Unicode. Maybe lxml can parse Unicode on
144 |             # this system?
145 |             yield markup, None, document_declared_encoding, False
146 | 
147 |         if isinstance(markup, unicode):
148 |             # No, apparently not. Convert the Unicode to UTF-8 and
149 |             # tell lxml to parse it as UTF-8.
150 |             yield (markup.encode("utf8"), "utf8",
151 |                    document_declared_encoding, False)
152 | 
153 |         try_encodings = [user_specified_encoding, document_declared_encoding]
154 |         detector = EncodingDetector(
155 |             markup, try_encodings, is_html, exclude_encodings)
156 |         for encoding in detector.encodings:
157 |             yield (detector.markup, encoding, document_declared_encoding, False)
158 | 
159 |     def feed(self, markup):
160 |         if isinstance(markup, bytes):
161 |             markup = BytesIO(markup)
162 |         elif isinstance(markup, unicode):
163 |             markup = StringIO(markup)
164 | 
165 |         # Call feed() at least once, even if the markup is empty,
166 |         # or the parser won't be initialized.
167 |         data = markup.read(self.CHUNK_SIZE)
168 |         try:
169 |             self.parser = self.parser_for(self.soup.original_encoding)
170 |             self.parser.feed(data)
171 |             while len(data) != 0:
172 |                 # Now call feed() on the rest of the data, chunk by chunk.
173 |                 data = markup.read(self.CHUNK_SIZE)
174 |                 if len(data) != 0:
175 |                     self.parser.feed(data)
176 |             self.parser.close()
177 |         except (UnicodeDecodeError, LookupError, etree.ParserError), e:
178 |             raise ParserRejectedMarkup(e)
179 | 
180 |     def close(self):
181 |         self.nsmaps = [self.DEFAULT_NSMAPS_INVERTED]
182 | 
183 |     def start(self, name, attrs, nsmap={}):
184 |         # Make sure attrs is a mutable dict--lxml may send an immutable dictproxy.
185 |         attrs = dict(attrs)
186 |         nsprefix = None
187 |         # Invert each namespace map as it comes in.
188 |         if len(nsmap) == 0 and len(self.nsmaps) > 1:
189 |                 # There are no new namespaces for this tag, but
190 |                 # non-default namespaces are in play, so we need a
191 |                 # separate tag stack to know when they end.
192 |                 self.nsmaps.append(None)
193 |         elif len(nsmap) > 0:
194 |             # A new namespace mapping has come into play.
195 | 
196 |             # First, Let the BeautifulSoup object know about it.
197 |             self._register_namespaces(nsmap)
198 | 
199 |             # Then, add it to our running list of inverted namespace
200 |             # mappings.
201 |             self.nsmaps.append(_invert(nsmap))
202 | 
203 |             # Also treat the namespace mapping as a set of attributes on the
204 |             # tag, so we can recreate it later.
205 |             attrs = attrs.copy()
206 |             for prefix, namespace in nsmap.items():
207 |                 attribute = NamespacedAttribute(
208 |                     "xmlns", prefix, "http://www.w3.org/2000/xmlns/")
209 |                 attrs[attribute] = namespace
210 | 
211 |         # Namespaces are in play. Find any attributes that came in
212 |         # from lxml with namespaces attached to their names, and
213 |         # turn then into NamespacedAttribute objects.
214 |         new_attrs = {}
215 |         for attr, value in attrs.items():
216 |             namespace, attr = self._getNsTag(attr)
217 |             if namespace is None:
218 |                 new_attrs[attr] = value
219 |             else:
220 |                 nsprefix = self._prefix_for_namespace(namespace)
221 |                 attr = NamespacedAttribute(nsprefix, attr, namespace)
222 |                 new_attrs[attr] = value
223 |         attrs = new_attrs
224 | 
225 |         namespace, name = self._getNsTag(name)
226 |         nsprefix = self._prefix_for_namespace(namespace)
227 |         self.soup.handle_starttag(name, namespace, nsprefix, attrs)
228 | 
229 |     def _prefix_for_namespace(self, namespace):
230 |         """Find the currently active prefix for the given namespace."""
231 |         if namespace is None:
232 |             return None
233 |         for inverted_nsmap in reversed(self.nsmaps):
234 |             if inverted_nsmap is not None and namespace in inverted_nsmap:
235 |                 return inverted_nsmap[namespace]
236 |         return None
237 | 
238 |     def end(self, name):
239 |         self.soup.endData()
240 |         completed_tag = self.soup.tagStack[-1]
241 |         namespace, name = self._getNsTag(name)
242 |         nsprefix = None
243 |         if namespace is not None:
244 |             for inverted_nsmap in reversed(self.nsmaps):
245 |                 if inverted_nsmap is not None and namespace in inverted_nsmap:
246 |                     nsprefix = inverted_nsmap[namespace]
247 |                     break
248 |         self.soup.handle_endtag(name, nsprefix)
249 |         if len(self.nsmaps) > 1:
250 |             # This tag, or one of its parents, introduced a namespace
251 |             # mapping, so pop it off the stack.
252 |             self.nsmaps.pop()
253 | 
254 |     def pi(self, target, data):
255 |         self.soup.endData()
256 |         self.soup.handle_data(target + ' ' + data)
257 |         self.soup.endData(self.processing_instruction_class)
258 | 
259 |     def data(self, content):
260 |         self.soup.handle_data(content)
261 | 
262 |     def doctype(self, name, pubid, system):
263 |         self.soup.endData()
264 |         doctype = Doctype.for_name_and_ids(name, pubid, system)
265 |         self.soup.object_was_parsed(doctype)
266 | 
267 |     def comment(self, content):
268 |         "Handle comments as Comment objects."
269 |         self.soup.endData()
270 |         self.soup.handle_data(content)
271 |         self.soup.endData(Comment)
272 | 
273 |     def test_fragment_to_document(self, fragment):
274 |         """See `TreeBuilder`."""
275 |         return u'\n%s' % fragment
276 | 
277 | 
278 | class LXMLTreeBuilder(HTMLTreeBuilder, LXMLTreeBuilderForXML):
279 | 
280 |     NAME = LXML
281 |     ALTERNATE_NAMES = ["lxml-html"]
282 | 
283 |     features = ALTERNATE_NAMES + [NAME, HTML, FAST, PERMISSIVE]
284 |     is_xml = False
285 |     processing_instruction_class = ProcessingInstruction
286 | 
287 |     def default_parser(self, encoding):
288 |         return etree.HTMLParser
289 | 
290 |     def feed(self, markup):
291 |         encoding = self.soup.original_encoding
292 |         try:
293 |             self.parser = self.parser_for(encoding)
294 |             self.parser.feed(markup)
295 |             self.parser.close()
296 |         except (UnicodeDecodeError, LookupError, etree.ParserError), e:
297 |             raise ParserRejectedMarkup(e)
298 | 
299 | 
300 |     def test_fragment_to_document(self, fragment):
301 |         """See `TreeBuilder`."""
302 |         return u'%s' % fragment
303 | 


--------------------------------------------------------------------------------
/BappModules/jsbeautifier/core/output.py:
--------------------------------------------------------------------------------
  1 | # The MIT License (MIT)
  2 | #
  3 | # Copyright (c) 2007-2018 Einar Lielmanis, Liam Newman, and contributors.
  4 | #
  5 | # Permission is hereby granted, free of charge, to any person
  6 | # obtaining a copy of this software and associated documentation files
  7 | # (the "Software"), to deal in the Software without restriction,
  8 | # including without limitation the rights to use, copy, modify, merge,
  9 | # publish, distribute, sublicense, and/or sell copies of the Software,
 10 | # and to permit persons to whom the Software is furnished to do so,
 11 | # subject to the following conditions:
 12 | #
 13 | # The above copyright notice and this permission notice shall be
 14 | # included in all copies or substantial portions of the Software.
 15 | #
 16 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
 17 | # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
 18 | # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
 19 | # NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
 20 | # BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
 21 | # ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
 22 | # CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 23 | # SOFTWARE.
 24 | 
 25 | import re
 26 | import math
 27 | 
 28 | # Using object instead of string to allow for later expansion of info
 29 | # about each line
 30 | 
 31 | __all__ = ["Output"]
 32 | 
 33 | 
 34 | class OutputLine:
 35 |     def __init__(self, parent):
 36 |         self.__parent = parent
 37 |         self.__character_count = 0
 38 |         self.__indent_count = -1
 39 |         self.__alignment_count = 0
 40 |         self.__wrap_point_index = 0
 41 |         self.__wrap_point_character_count = 0
 42 |         self.__wrap_point_indent_count = -1
 43 |         self.__wrap_point_alignment_count = 0
 44 | 
 45 |         self.__items = []
 46 | 
 47 |     def clone_empty(self):
 48 |         line = OutputLine(self.__parent)
 49 |         line.set_indent(self.__indent_count, self.__alignment_count)
 50 |         return line
 51 | 
 52 |     def item(self, index):
 53 |         return self.__items[index]
 54 | 
 55 |     def is_empty(self):
 56 |         return len(self.__items) == 0
 57 | 
 58 |     def set_indent(self, indent=0, alignment=0):
 59 |         if self.is_empty():
 60 |             self.__indent_count = indent
 61 |             self.__alignment_count = alignment
 62 |             self.__character_count = self.__parent.get_indent_size(
 63 |                 self.__indent_count, self.__alignment_count)
 64 | 
 65 |     def _set_wrap_point(self):
 66 |         if self.__parent.wrap_line_length:
 67 |             self.__wrap_point_index = len(self.__items)
 68 |             self.__wrap_point_character_count = self.__character_count
 69 |             self.__wrap_point_indent_count = \
 70 |                 self.__parent.next_line.__indent_count
 71 |             self.__wrap_point_alignment_count = \
 72 |                 self.__parent.next_line.__alignment_count
 73 | 
 74 |     def _should_wrap(self):
 75 |         return self.__wrap_point_index and \
 76 |                 self.__character_count > \
 77 |                     self.__parent.wrap_line_length and \
 78 |                 self.__wrap_point_character_count > \
 79 |                     self.__parent.next_line.__character_count
 80 | 
 81 | 
 82 |     def _allow_wrap(self):
 83 |         if self._should_wrap():
 84 |             self.__parent.add_new_line()
 85 |             next = self.__parent.current_line
 86 |             next.set_indent(self.__wrap_point_indent_count,
 87 |                 self.__wrap_point_alignment_count)
 88 |             next.__items = self.__items[self.__wrap_point_index:]
 89 |             self.__items = self.__items[:self.__wrap_point_index]
 90 | 
 91 |             next.__character_count += self.__character_count - \
 92 |                 self.__wrap_point_character_count
 93 |             self.__character_count = self.__wrap_point_character_count
 94 | 
 95 |             if next.__items[0] == " ":
 96 |                 next.__items.pop(0)
 97 |                 next.__character_count -= 1
 98 | 
 99 |             return True
100 | 
101 |         return False
102 | 
103 |     def last(self):
104 |         if not self.is_empty():
105 |             return self.__items[-1]
106 | 
107 |         return None
108 | 
109 |     def push(self, item):
110 |         self.__items.append(item)
111 |         last_newline_index = item.rfind('\n')
112 |         if last_newline_index != -1:
113 |             self.__character_count = len(item) - last_newline_index
114 |         else:
115 |             self.__character_count += len(item)
116 | 
117 |     def pop(self):
118 |         item = None
119 |         if not self.is_empty():
120 |             item = self.__items.pop()
121 |             self.__character_count -= len(item)
122 |         return item
123 | 
124 |     def _remove_indent(self):
125 |         if self.__indent_count > 0:
126 |             self.__indent_count -= 1
127 |             self.__character_count -= self.__parent.indent_size
128 | 
129 |     def _remove_wrap_indent(self):
130 |         if self.__wrap_point_indent_count > 0:
131 |             self.__wrap_point_indent_count -= 1
132 | 
133 |     def trim(self):
134 |         while self.last() == ' ':
135 |             self.__items.pop()
136 |             self.__character_count -= 1
137 | 
138 |     def toString(self):
139 |         result = ''
140 |         if self.is_empty():
141 |             if self.__parent.indent_empty_lines:
142 |                 result = self.__parent.get_indent_string(self.__indent_count)
143 |         else:
144 |             result = self.__parent.get_indent_string(
145 |                 self.__indent_count, self.__alignment_count)
146 |             result += ''.join(self.__items)
147 |         return result
148 | 
149 | 
150 | class IndentStringCache:
151 |     def __init__(self, options, base_string):
152 |         self.__cache = ['']
153 |         self.__indent_size = options.indent_size
154 |         self.__indent_string = options.indent_char
155 |         if not options.indent_with_tabs:
156 |             self.__indent_string = options.indent_char * options.indent_size
157 | 
158 |         # Set to null to continue support of auto detection of base indent
159 |         base_string = base_string or ''
160 |         if options.indent_level > 0:
161 |             base_string = options.indent_level * self.__indent_string
162 | 
163 |         self.__base_string = base_string
164 |         self.__base_string_length = len(base_string)
165 | 
166 |     def get_indent_size(self, indent, column=0):
167 |         result = self.__base_string_length
168 |         if indent < 0:
169 |             result = 0
170 |         result += indent * self.__indent_size
171 |         result += column
172 |         return result
173 | 
174 |     def get_indent_string(self, indent_level, column=0):
175 |         result = self.__base_string
176 |         if indent_level < 0:
177 |             indent_level = 0
178 |             result = ''
179 |         column += indent_level * self.__indent_size
180 |         self.__ensure_cache(column)
181 |         result += self.__cache[column]
182 |         return result
183 | 
184 |     def __ensure_cache(self, column):
185 |         while column >= len(self.__cache):
186 |             self.__add_column()
187 | 
188 |     def __add_column(self):
189 |         column = len(self.__cache)
190 |         indent = 0
191 |         result = ''
192 |         if self.__indent_size and column >= self.__indent_size:
193 |             indent = int(math.floor(column / self.__indent_size))
194 |             column -= indent * self.__indent_size
195 |             result = indent * self.__indent_string
196 |         if column:
197 |             result += column * ' '
198 |         self.__cache.append(result)
199 | 
200 | 
201 | class Output:
202 |     def __init__(self, options, baseIndentString=''):
203 | 
204 |         self.__indent_cache = IndentStringCache(options, baseIndentString)
205 |         self.raw = False
206 |         self._end_with_newline = options.end_with_newline
207 |         self.indent_size = options.indent_size
208 |         self.wrap_line_length = options.wrap_line_length
209 |         self.indent_empty_lines = options.indent_empty_lines
210 |         self.__lines = []
211 |         self.previous_line = None
212 |         self.current_line = None
213 |         self.next_line = OutputLine(self)
214 |         self.space_before_token = False
215 |         self.non_breaking_space = False
216 |         self.previous_token_wrapped = False
217 |         # initialize
218 |         self.__add_outputline()
219 | 
220 |     def __add_outputline(self):
221 |         self.previous_line = self.current_line
222 |         self.current_line = self.next_line.clone_empty()
223 |         self.__lines.append(self.current_line)
224 | 
225 |     def get_line_number(self):
226 |         return len(self.__lines)
227 | 
228 |     def get_indent_string(self, indent, column=0):
229 |         return self.__indent_cache.get_indent_string(indent, column)
230 | 
231 |     def get_indent_size(self, indent, column=0):
232 |         return self.__indent_cache.get_indent_size(indent, column)
233 | 
234 |     def is_empty(self):
235 |         return self.previous_line is None and self.current_line.is_empty()
236 | 
237 |     def add_new_line(self, force_newline=False):
238 |         # never newline at the start of file
239 |         # otherwise, newline only if we didn't just add one or we're forced
240 |         if self.is_empty() or \
241 |                 (not force_newline and self.just_added_newline()):
242 |             return False
243 | 
244 |         # if raw output is enabled, don't print additional newlines,
245 |         # but still return True as though you had
246 |         if not self.raw:
247 |             self.__add_outputline()
248 |         return True
249 | 
250 |     def get_code(self, eol):
251 |         self.trim(True)
252 | 
253 |         # handle some edge cases where the last tokens
254 |         # has text that ends with newline(s)
255 |         last_item = self.current_line.pop()
256 |         if last_item:
257 |             if last_item[-1] == '\n':
258 |                 last_item = re.sub(r'[\n]+$', '', last_item)
259 |             self.current_line.push(last_item)
260 | 
261 |         if self._end_with_newline:
262 |             self.__add_outputline()
263 | 
264 |         sweet_code = "\n".join(line.toString() for line in self.__lines)
265 | 
266 |         if not eol == '\n':
267 |             sweet_code = sweet_code.replace('\n', eol)
268 | 
269 |         return sweet_code
270 | 
271 |     def set_wrap_point(self):
272 |         self.current_line._set_wrap_point()
273 | 
274 |     def set_indent(self, indent=0, alignment=0):
275 |         # Next line stores alignment values
276 |         self.next_line.set_indent(indent, alignment)
277 | 
278 |         # Never indent your first output indent at the start of the file
279 |         if len(self.__lines) > 1:
280 |             self.current_line.set_indent(indent, alignment)
281 |             return True
282 |         self.current_line.set_indent()
283 |         return False
284 | 
285 |     def add_raw_token(self, token):
286 |         for _ in range(token.newlines):
287 |             self.__add_outputline()
288 | 
289 |         self.current_line.set_indent(-1)
290 |         self.current_line.push(token.whitespace_before)
291 |         self.current_line.push(token.text)
292 |         self.space_before_token = False
293 |         self.non_breaking_space = False
294 |         self.previous_token_wrapped = False
295 | 
296 |     def add_token(self, printable_token):
297 |         self.__add_space_before_token()
298 |         self.current_line.push(printable_token)
299 |         self.space_before_token = False
300 |         self.non_breaking_space = False
301 |         self.previous_token_wrapped = self.current_line._allow_wrap()
302 | 
303 |     def __add_space_before_token(self):
304 |         if self.space_before_token and not self.just_added_newline():
305 |             if not self.non_breaking_space:
306 |                 self.set_wrap_point()
307 |             self.current_line.push(' ')
308 |         self.space_before_token = False
309 | 
310 |     def remove_indent(self, index):
311 |         while index < len(self.__lines):
312 |             self.__lines[index]._remove_indent()
313 |             index += 1
314 |         self.current_line._remove_wrap_indent()
315 | 
316 |     def trim(self, eat_newlines=False):
317 |         self.current_line.trim()
318 | 
319 |         while eat_newlines and len(
320 |                 self.__lines) > 1 and self.current_line.is_empty():
321 |             self.__lines.pop()
322 |             self.current_line = self.__lines[-1]
323 |             self.current_line.trim()
324 | 
325 |         if len(self.__lines) > 1:
326 |             self.previous_line = self.__lines[-2]
327 |         else:
328 |             self.previous_line = None
329 | 
330 |     def just_added_newline(self):
331 |         return self.current_line.is_empty()
332 | 
333 |     def just_added_blankline(self):
334 |         return self.is_empty() or \
335 |             (self.current_line.is_empty() and self.previous_line.is_empty())
336 | 
337 |     def ensure_empty_line_above(self, starts_with, ends_with):
338 |         index = len(self.__lines) - 2
339 |         while index >= 0:
340 |             potentialEmptyLine = self.__lines[index]
341 |             if potentialEmptyLine.is_empty():
342 |                 break
343 |             elif not potentialEmptyLine.item(0).startswith(starts_with) and \
344 |                     potentialEmptyLine.item(-1) != ends_with:
345 |                 self.__lines.insert(index + 1, OutputLine(self))
346 |                 self.previous_line = self.__lines[-2]
347 |                 break
348 |             index -= 1
349 | 


--------------------------------------------------------------------------------
/BappModules/bs4/builder/_htmlparser.py:
--------------------------------------------------------------------------------
  1 | # encoding: utf-8
  2 | """Use the HTMLParser library to parse HTML files that aren't too bad."""
  3 | 
  4 | # Use of this source code is governed by the MIT license.
  5 | __license__ = "MIT"
  6 | 
  7 | __all__ = [
  8 |     'HTMLParserTreeBuilder',
  9 |     ]
 10 | 
 11 | from HTMLParser import HTMLParser
 12 | 
 13 | try:
 14 |     from HTMLParser import HTMLParseError
 15 | except ImportError, e:
 16 |     # HTMLParseError is removed in Python 3.5. Since it can never be
 17 |     # thrown in 3.5, we can just define our own class as a placeholder.
 18 |     class HTMLParseError(Exception):
 19 |         pass
 20 | 
 21 | import sys
 22 | import warnings
 23 | 
 24 | # Starting in Python 3.2, the HTMLParser constructor takes a 'strict'
 25 | # argument, which we'd like to set to False. Unfortunately,
 26 | # http://bugs.python.org/issue13273 makes strict=True a better bet
 27 | # before Python 3.2.3.
 28 | #
 29 | # At the end of this file, we monkeypatch HTMLParser so that
 30 | # strict=True works well on Python 3.2.2.
 31 | major, minor, release = sys.version_info[:3]
 32 | CONSTRUCTOR_TAKES_STRICT = major == 3 and minor == 2 and release >= 3
 33 | CONSTRUCTOR_STRICT_IS_DEPRECATED = major == 3 and minor == 3
 34 | CONSTRUCTOR_TAKES_CONVERT_CHARREFS = major == 3 and minor >= 4
 35 | 
 36 | 
 37 | from bs4.element import (
 38 |     CData,
 39 |     Comment,
 40 |     Declaration,
 41 |     Doctype,
 42 |     ProcessingInstruction,
 43 |     )
 44 | from bs4.dammit import EntitySubstitution, UnicodeDammit
 45 | 
 46 | from bs4.builder import (
 47 |     HTML,
 48 |     HTMLTreeBuilder,
 49 |     STRICT,
 50 |     )
 51 | 
 52 | 
 53 | HTMLPARSER = 'html.parser'
 54 | 
 55 | class BeautifulSoupHTMLParser(HTMLParser):
 56 | 
 57 |     def __init__(self, *args, **kwargs):
 58 |         HTMLParser.__init__(self, *args, **kwargs)
 59 | 
 60 |         # Keep a list of empty-element tags that were encountered
 61 |         # without an explicit closing tag. If we encounter a closing tag
 62 |         # of this type, we'll associate it with one of those entries.
 63 |         #
 64 |         # This isn't a stack because we don't care about the
 65 |         # order. It's a list of closing tags we've already handled and
 66 |         # will ignore, assuming they ever show up.
 67 |         self.already_closed_empty_element = []
 68 | 
 69 |     def error(self, msg):
 70 |         """In Python 3, HTMLParser subclasses must implement error(), although this
 71 |         requirement doesn't appear to be documented.
 72 | 
 73 |         In Python 2, HTMLParser implements error() as raising an exception.
 74 | 
 75 |         In any event, this method is called only on very strange markup and our best strategy
 76 |         is to pretend it didn't happen and keep going.
 77 |         """
 78 |         warnings.warn(msg)
 79 |         
 80 |     def handle_startendtag(self, name, attrs):
 81 |         # This is only called when the markup looks like
 82 |         # .
 83 | 
 84 |         # is_startend() tells handle_starttag not to close the tag
 85 |         # just because its name matches a known empty-element tag. We
 86 |         # know that this is an empty-element tag and we want to call
 87 |         # handle_endtag ourselves.
 88 |         tag = self.handle_starttag(name, attrs, handle_empty_element=False)
 89 |         self.handle_endtag(name)
 90 |         
 91 |     def handle_starttag(self, name, attrs, handle_empty_element=True):
 92 |         # XXX namespace
 93 |         attr_dict = {}
 94 |         for key, value in attrs:
 95 |             # Change None attribute values to the empty string
 96 |             # for consistency with the other tree builders.
 97 |             if value is None:
 98 |                 value = ''
 99 |             attr_dict[key] = value
100 |             attrvalue = '""'
101 |         #print "START", name
102 |         sourceline, sourcepos = self.getpos()
103 |         tag = self.soup.handle_starttag(
104 |             name, None, None, attr_dict, sourceline=sourceline,
105 |             sourcepos=sourcepos
106 |         )
107 |         if tag and tag.is_empty_element and handle_empty_element:
108 |             # Unlike other parsers, html.parser doesn't send separate end tag
109 |             # events for empty-element tags. (It's handled in
110 |             # handle_startendtag, but only if the original markup looked like
111 |             # .)
112 |             #
113 |             # So we need to call handle_endtag() ourselves. Since we
114 |             # know the start event is identical to the end event, we
115 |             # don't want handle_endtag() to cross off any previous end
116 |             # events for tags of this name.
117 |             self.handle_endtag(name, check_already_closed=False)
118 | 
119 |             # But we might encounter an explicit closing tag for this tag
120 |             # later on. If so, we want to ignore it.
121 |             self.already_closed_empty_element.append(name)
122 |             
123 |     def handle_endtag(self, name, check_already_closed=True):
124 |         #print "END", name
125 |         if check_already_closed and name in self.already_closed_empty_element:
126 |             # This is a redundant end tag for an empty-element tag.
127 |             # We've already called handle_endtag() for it, so just
128 |             # check it off the list.
129 |             # print "ALREADY CLOSED", name
130 |             self.already_closed_empty_element.remove(name)
131 |         else:
132 |             self.soup.handle_endtag(name)
133 | 
134 |     def handle_data(self, data):
135 |         self.soup.handle_data(data)
136 | 
137 |     def handle_charref(self, name):
138 |         # XXX workaround for a bug in HTMLParser. Remove this once
139 |         # it's fixed in all supported versions.
140 |         # http://bugs.python.org/issue13633
141 |         if name.startswith('x'):
142 |             real_name = int(name.lstrip('x'), 16)
143 |         elif name.startswith('X'):
144 |             real_name = int(name.lstrip('X'), 16)
145 |         else:
146 |             real_name = int(name)
147 | 
148 |         data = None
149 |         if real_name < 256:
150 |             # HTML numeric entities are supposed to reference Unicode
151 |             # code points, but sometimes they reference code points in
152 |             # some other encoding (ahem, Windows-1252). E.g. “
153 |             # instead of É for LEFT DOUBLE QUOTATION MARK. This
154 |             # code tries to detect this situation and compensate.
155 |             for encoding in (self.soup.original_encoding, 'windows-1252'):
156 |                 if not encoding:
157 |                     continue
158 |                 try:
159 |                     data = bytearray([real_name]).decode(encoding)
160 |                 except UnicodeDecodeError, e:
161 |                     pass
162 |         if not data:
163 |             try:
164 |                 data = unichr(real_name)
165 |             except (ValueError, OverflowError), e:
166 |                 pass
167 |         data = data or u"\N{REPLACEMENT CHARACTER}"
168 |         self.handle_data(data)
169 | 
170 |     def handle_entityref(self, name):
171 |         character = EntitySubstitution.HTML_ENTITY_TO_CHARACTER.get(name)
172 |         if character is not None:
173 |             data = character
174 |         else:
175 |             # If this were XML, it would be ambiguous whether "&foo"
176 |             # was an character entity reference with a missing
177 |             # semicolon or the literal string "&foo". Since this is
178 |             # HTML, we have a complete list of all character entity references,
179 |             # and this one wasn't found, so assume it's the literal string "&foo".
180 |             data = "&%s" % name
181 |         self.handle_data(data)
182 | 
183 |     def handle_comment(self, data):
184 |         self.soup.endData()
185 |         self.soup.handle_data(data)
186 |         self.soup.endData(Comment)
187 | 
188 |     def handle_decl(self, data):
189 |         self.soup.endData()
190 |         if data.startswith("DOCTYPE "):
191 |             data = data[len("DOCTYPE "):]
192 |         elif data == 'DOCTYPE':
193 |             # i.e. ""
194 |             data = ''
195 |         self.soup.handle_data(data)
196 |         self.soup.endData(Doctype)
197 | 
198 |     def unknown_decl(self, data):
199 |         if data.upper().startswith('CDATA['):
200 |             cls = CData
201 |             data = data[len('CDATA['):]
202 |         else:
203 |             cls = Declaration
204 |         self.soup.endData()
205 |         self.soup.handle_data(data)
206 |         self.soup.endData(cls)
207 | 
208 |     def handle_pi(self, data):
209 |         self.soup.endData()
210 |         self.soup.handle_data(data)
211 |         self.soup.endData(ProcessingInstruction)
212 | 
213 | 
214 | class HTMLParserTreeBuilder(HTMLTreeBuilder):
215 | 
216 |     is_xml = False
217 |     picklable = True
218 |     NAME = HTMLPARSER
219 |     features = [NAME, HTML, STRICT]
220 | 
221 |     # The html.parser knows which line number and position in the
222 |     # original file is the source of an element.
223 |     TRACKS_LINE_NUMBERS = True
224 |     
225 |     def __init__(self, parser_args=None, parser_kwargs=None, **kwargs):
226 |         super(HTMLParserTreeBuilder, self).__init__(**kwargs)
227 |         parser_args = parser_args or []
228 |         parser_kwargs = parser_kwargs or {}
229 |         if CONSTRUCTOR_TAKES_STRICT and not CONSTRUCTOR_STRICT_IS_DEPRECATED:
230 |             parser_kwargs['strict'] = False
231 |         if CONSTRUCTOR_TAKES_CONVERT_CHARREFS:
232 |             parser_kwargs['convert_charrefs'] = False
233 |         self.parser_args = (parser_args, parser_kwargs)
234 | 
235 |     def prepare_markup(self, markup, user_specified_encoding=None,
236 |                        document_declared_encoding=None, exclude_encodings=None):
237 |         """
238 |         :return: A 4-tuple (markup, original encoding, encoding
239 |         declared within markup, whether any characters had to be
240 |         replaced with REPLACEMENT CHARACTER).
241 |         """
242 |         if isinstance(markup, unicode):
243 |             yield (markup, None, None, False)
244 |             return
245 | 
246 |         try_encodings = [user_specified_encoding, document_declared_encoding]
247 |         dammit = UnicodeDammit(markup, try_encodings, is_html=True,
248 |                                exclude_encodings=exclude_encodings)
249 |         yield (dammit.markup, dammit.original_encoding,
250 |                dammit.declared_html_encoding,
251 |                dammit.contains_replacement_characters)
252 | 
253 |     def feed(self, markup):
254 |         args, kwargs = self.parser_args
255 |         parser = BeautifulSoupHTMLParser(*args, **kwargs)
256 |         parser.soup = self.soup
257 |         try:
258 |             parser.feed(markup)
259 |             parser.close()
260 |         except HTMLParseError, e:
261 |             warnings.warn(RuntimeWarning(
262 |                 "Python's built-in HTMLParser cannot parse the given document. This is not a bug in Beautiful Soup. The best solution is to install an external parser (lxml or html5lib), and use Beautiful Soup with that parser. See http://www.crummy.com/software/BeautifulSoup/bs4/doc/#installing-a-parser for help."))
263 |             raise e
264 |         parser.already_closed_empty_element = []
265 | 
266 | # Patch 3.2 versions of HTMLParser earlier than 3.2.3 to use some
267 | # 3.2.3 code. This ensures they don't treat markup like 

as a 268 | # string. 269 | # 270 | # XXX This code can be removed once most Python 3 users are on 3.2.3. 271 | if major == 3 and minor == 2 and not CONSTRUCTOR_TAKES_STRICT: 272 | import re 273 | attrfind_tolerant = re.compile( 274 | r'\s*((?<=[\'"\s])[^\s/>][^\s/=>]*)(\s*=+\s*' 275 | r'(\'[^\']*\'|"[^"]*"|(?![\'"])[^>\s]*))?') 276 | HTMLParserTreeBuilder.attrfind_tolerant = attrfind_tolerant 277 | 278 | locatestarttagend = re.compile(r""" 279 | <[a-zA-Z][-.a-zA-Z0-9:_]* # tag name 280 | (?:\s+ # whitespace before attribute name 281 | (?:[a-zA-Z_][-.:a-zA-Z0-9_]* # attribute name 282 | (?:\s*=\s* # value indicator 283 | (?:'[^']*' # LITA-enclosed value 284 | |\"[^\"]*\" # LIT-enclosed value 285 | |[^'\">\s]+ # bare value 286 | ) 287 | )? 288 | ) 289 | )* 290 | \s* # trailing whitespace 291 | """, re.VERBOSE) 292 | BeautifulSoupHTMLParser.locatestarttagend = locatestarttagend 293 | 294 | from html.parser import tagfind, attrfind 295 | 296 | def parse_starttag(self, i): 297 | self.__starttag_text = None 298 | endpos = self.check_for_whole_start_tag(i) 299 | if endpos < 0: 300 | return endpos 301 | rawdata = self.rawdata 302 | self.__starttag_text = rawdata[i:endpos] 303 | 304 | # Now parse the data between i+1 and j into a tag and attrs 305 | attrs = [] 306 | match = tagfind.match(rawdata, i+1) 307 | assert match, 'unexpected call to parse_starttag()' 308 | k = match.end() 309 | self.lasttag = tag = rawdata[i+1:k].lower() 310 | while k < endpos: 311 | if self.strict: 312 | m = attrfind.match(rawdata, k) 313 | else: 314 | m = attrfind_tolerant.match(rawdata, k) 315 | if not m: 316 | break 317 | attrname, rest, attrvalue = m.group(1, 2, 3) 318 | if not rest: 319 | attrvalue = None 320 | elif attrvalue[:1] == '\'' == attrvalue[-1:] or \ 321 | attrvalue[:1] == '"' == attrvalue[-1:]: 322 | attrvalue = attrvalue[1:-1] 323 | if attrvalue: 324 | attrvalue = self.unescape(attrvalue) 325 | attrs.append((attrname.lower(), attrvalue)) 326 | k = m.end() 327 | 328 | end = rawdata[k:endpos].strip() 329 | if end not in (">", "/>"): 330 | lineno, offset = self.getpos() 331 | if "\n" in self.__starttag_text: 332 | lineno = lineno + self.__starttag_text.count("\n") 333 | offset = len(self.__starttag_text) \ 334 | - self.__starttag_text.rfind("\n") 335 | else: 336 | offset = offset + len(self.__starttag_text) 337 | if self.strict: 338 | self.error("junk characters in start tag: %r" 339 | % (rawdata[k:endpos][:20],)) 340 | self.handle_data(rawdata[i:endpos]) 341 | return endpos 342 | if end.endswith('/>'): 343 | # XHTML-style empty tag: 344 | self.handle_startendtag(tag, attrs) 345 | else: 346 | self.handle_starttag(tag, attrs) 347 | if tag in self.CDATA_CONTENT_ELEMENTS: 348 | self.set_cdata_mode(tag) 349 | return endpos 350 | 351 | def set_cdata_mode(self, elem): 352 | self.cdata_elem = elem.lower() 353 | self.interesting = re.compile(r'' % self.cdata_elem, re.I) 354 | 355 | BeautifulSoupHTMLParser.parse_starttag = parse_starttag 356 | BeautifulSoupHTMLParser.set_cdata_mode = set_cdata_mode 357 | 358 | CONSTRUCTOR_TAKES_STRICT = True 359 | --------------------------------------------------------------------------------