├── .gitignore
├── BappDescription.html
├── BappManifest.bmf
├── LICENSE
├── README.md
├── directories.txt
├── issue.png
├── off-by-slash.py
└── scrape.py
/.gitignore:
--------------------------------------------------------------------------------
1 | # Byte-compiled / optimized / DLL files
2 | __pycache__/
3 | *.py[cod]
4 | *$py.class
5 |
6 | # C extensions
7 | *.so
8 |
9 | # Distribution / packaging
10 | .Python
11 | build/
12 | develop-eggs/
13 | dist/
14 | downloads/
15 | eggs/
16 | .eggs/
17 | lib/
18 | lib64/
19 | parts/
20 | sdist/
21 | var/
22 | wheels/
23 | *.egg-info/
24 | .installed.cfg
25 | *.egg
26 | MANIFEST
27 |
28 | # PyInstaller
29 | # Usually these files are written by a python script from a template
30 | # before PyInstaller builds the exe, so as to inject date/other infos into it.
31 | *.manifest
32 | *.spec
33 |
34 | # Installer logs
35 | pip-log.txt
36 | pip-delete-this-directory.txt
37 |
38 | # Unit test / coverage reports
39 | htmlcov/
40 | .tox/
41 | .coverage
42 | .coverage.*
43 | .cache
44 | nosetests.xml
45 | coverage.xml
46 | *.cover
47 | .hypothesis/
48 | .pytest_cache/
49 |
50 | # Translations
51 | *.mo
52 | *.pot
53 |
54 | # Django stuff:
55 | *.log
56 | local_settings.py
57 | db.sqlite3
58 |
59 | # Flask stuff:
60 | instance/
61 | .webassets-cache
62 |
63 | # Scrapy stuff:
64 | .scrapy
65 |
66 | # Sphinx documentation
67 | docs/_build/
68 |
69 | # PyBuilder
70 | target/
71 |
72 | # Jupyter Notebook
73 | .ipynb_checkpoints
74 |
75 | # pyenv
76 | .python-version
77 |
78 | # celery beat schedule file
79 | celerybeat-schedule
80 |
81 | # SageMath parsed files
82 | *.sage.py
83 |
84 | # Environments
85 | .env
86 | .venv
87 | env/
88 | venv/
89 | ENV/
90 | env.bak/
91 | venv.bak/
92 |
93 | # Spyder project settings
94 | .spyderproject
95 | .spyproject
96 |
97 | # Rope project settings
98 | .ropeproject
99 |
100 | # mkdocs documentation
101 | /site
102 |
103 | # mypy
104 | .mypy_cache/
105 |
106 | # VSCode
107 | .vscode/settings.json
108 |
109 | results.txt
110 | domains.txt
111 |
--------------------------------------------------------------------------------
/BappDescription.html:
--------------------------------------------------------------------------------
1 |
This extension detects NGINX alias traversal due to misconfiguration.
2 |
3 | The technique is based on Orange Tsai's BlackHat USA 2018
4 | Presentation
5 |
6 | A server is assumed to be vulnerable if a request to an existing path like https://example.com/static../
returns the same response as https://example.com/
. To eliminate false positives the misconfiguration has to be confirmed by successfully requesting an existing resource via path traversal. This is done as follows:
7 |
8 | For the URL https://example.com/folder1/folder2/static/main.css it generates the following links:
9 |
10 | https://example.com/folder1../folder1/folder2/static/main.css
11 | https://example.com/folder1../%s/folder2/static/main.css
12 | https://example.com/folder1/folder2../folder2/static/main.css
13 | https://example.com/folder1/folder2../%s/static/main.css
14 | https://example.com/folder1/folder2/static../static/main.css
15 | https://example.com/folder1/folder2/static../%s/main.css
16 |
17 | Where %s
are common directories used in alias paths based on around 9500 nginx configuration files from GH (thanks @TomNomNom), see directories.txt.
18 |
--------------------------------------------------------------------------------
/BappManifest.bmf:
--------------------------------------------------------------------------------
1 | Uuid: a5fdd2cdffa6410eb530de5a4c294d3a
2 | ExtensionType: 2
3 | Name: NGINX Alias Traversal
4 | RepoName: nginx-alias-traversal
5 | ScreenVersion: 1.1
6 | SerialVersion: 5
7 | MinPlatformVersion: 0
8 | ProOnly: True
9 | Author: Martin Bajanik (@_bayotop)
10 | ShortDescription: Detects NGINX alias traversal due to misconfiguration.
11 | EntryPoint: off-by-slash.py
12 | BuildCommand:
13 | SupportedProducts: Pro
14 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) 2018 Martin Bajanik
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # off-by-slash
2 | Burp extension to detect alias traversal via NGINX misconfiguration at scale. Requires Burp Professional.
3 |
4 | 
5 |
6 | ## Usage
7 |
8 | 1. git clone https://github.com/bayotop/off-by-slash/
9 | 2. Burp -> Extender -> Add -> find and select `off-by-slash.py`
10 |
11 | The extension implements an active scanner check. Simply run a new scan, preferably with an "Audit checks - extensions only" configuration, on static resources identified via Burp's crawler. Alternatively, use `scrape.py` with a list of URLs to scrape static resources from. The results can be directly passed to a new Burp scan (Burp 2.0).
12 |
13 | ## Description
14 |
15 | *https://i.blackhat.com/us-18/Wed-August-8/us-18-Orange-Tsai-Breaking-Parser-Logic-Take-Your-Path-Normalization-Off-And-Pop-0days-Out-2.pdf*
16 |
17 | A server is assumed to be vulnerable if a request to an existing path like `https://example.com/static../` returns the same response as `https://example.com/`. To eliminate false positives the misconfiguration has to be confirmed by successfully requesting an existing resource via path traversal. This is done as follows:
18 |
19 | For the URL https://example.com/folder1/folder2/static/main.css it generates the following links:
20 |
21 | ```
22 | https://example.com/folder1../folder1/folder2/static/main.css
23 | https://example.com/folder1../%s/folder2/static/main.css
24 | https://example.com/folder1/folder2../folder2/static/main.css
25 | https://example.com/folder1/folder2../%s/static/main.css
26 | https://example.com/folder1/folder2/static../static/main.css
27 | https://example.com/folder1/folder2/static../%s/main.css
28 | ```
29 |
30 | Where `%s` are common directories used in alias paths based on around 9500 nginx configuration files from GH (thanks [@TomNomNom](https://twitter.com/TomNomNom)), see directories.txt.
31 |
--------------------------------------------------------------------------------
/directories.txt:
--------------------------------------------------------------------------------
1 | Archipel
2 | _static
3 | acme
4 | acme-challenge
5 | acme_challenges
6 | admin
7 | adminer
8 | alpha
9 | app1-static
10 | app2-static
11 | app_dir
12 | app_nginx_static_path
13 | asset_img
14 | assets
15 | audio
16 | awstats
17 | backend
18 | base_dir
19 | blankon
20 | blob
21 | blue
22 | bootstrap
23 | build
24 | cgi-bin
25 | challenge
26 | challenges
27 | chiminey
28 | clld_dir
29 | collected_static
30 | community
31 | content
32 | counterblockd
33 | counterwallet
34 | css
35 | custom
36 | d
37 | data
38 | dataset1
39 | dataset2
40 | default
41 | demo
42 | demo-app
43 | developerslv
44 | dist
45 | django-blog
46 | django_project_path
47 | doc
48 | docs
49 | download
50 | downloads
51 | error
52 | errors
53 | export
54 | favicons
55 | favs
56 | files
57 | films
58 | flask_test_uploads
59 | fm
60 | font-icons
61 | fonts
62 | frontend
63 | ftp
64 | ftpmaster
65 | hgs-static
66 | higlass-website
67 | home
68 | horde
69 | htdocs
70 | html
71 | httpboot
72 | icon
73 | icons
74 | ikiwiki
75 | image_data
76 | images
77 | img
78 | install
79 | items
80 | javascript
81 | js
82 | js-plugin
83 | khanlinks
84 | kibana
85 | kolab-syncroton
86 | latest
87 | layout
88 | legal
89 | lemonldap-ng-doc
90 | lemonldap-ng-fr-doc
91 | letsencrypt
92 | lib
93 | libs
94 | log
95 | logging
96 | mailinabox
97 | mailman
98 | main_user
99 | manual
100 | media
101 | memcached
102 | minified
103 | misc
104 | moodledata
105 | msks
106 | munki_repo
107 | music
108 | name
109 | new-js
110 | nginx
111 | noVNC
112 | node_modules
113 | oldsanta
114 | option
115 | outputs
116 | owncloud
117 | packed
118 | patchwork
119 | path
120 | pictures
121 | plugin_static
122 | postfixadmin
123 | prod
124 | project_root
125 | pub
126 | public
127 | public_html
128 | public_root
129 | qv-frontend
130 | repo
131 | repos
132 | repository
133 | resources
134 | resourcesync
135 | results
136 | robots
137 | root
138 | roundcube
139 | roundcubemail
140 | run
141 | script
142 | scripts
143 | shared
144 | shibboleth
145 | site-library
146 | sitestatic
147 | spearmint
148 | src
149 | stackato-pkg
150 | static
151 | static-collected
152 | static-html
153 | static-root
154 | static_prod
155 | static_root
156 | static_user
157 | staticfiles
158 | stats
159 | storage
160 | style
161 | styles
162 | stylesheets
163 | target
164 | temp
165 | templates
166 | test
167 | testfiles
168 | tests
169 | theme
170 | theme_static
171 | thumb
172 | thumbs
173 | tiedostot
174 | tmp
175 | ubuntu
176 | ui
177 | unsplash-downloader
178 | upfiles
179 | upload
180 | uploads
181 | videos
182 | web
183 | web-dist
184 | webroot_path
185 | websocket
186 | webstatic
187 | well-known
188 | whturk
189 | wp-content
190 | www
191 | www-data
192 | zmusic-frontend
--------------------------------------------------------------------------------
/issue.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PortSwigger/nginx-alias-traversal/43f4a8f5258bfed0c7d91421f7b276332bc55ec0/issue.png
--------------------------------------------------------------------------------
/off-by-slash.py:
--------------------------------------------------------------------------------
1 | from burp import IBurpExtender, IScannerCheck, IScanIssue
2 | from java.io import PrintWriter
3 | from java.net import URL
4 |
5 | # https://i.blackhat.com/us-18/Wed-August-8/us-18-Orange-Tsai-Breaking-Parser-Logic-Take-Your-Path-Normalization-Off-And-Pop-0days-Out-2.pdf
6 |
7 | # Attempts to detect path traversal caused via a common NGINX misconfiguration.
8 | # Example:
9 | # For the URL: https://example.com/folder1/folder2/static/main.css it generates the following links (only if the folders seem vulnerable):
10 | #
11 | # https://example.com/folder1../folder1/folder2/static/main.css
12 | # https://example.com/folder1../%s/folder2/static/main.css
13 | # https://example.com/folder1/folder2../folder2/static/main.css
14 | # https://example.com/folder1/folder2../%s/static/main.css
15 | # https://example.com/folder1/folder2/static../static/main.css
16 | # https://example.com/folder1/folder2/static../%s/main.css
17 | #
18 | # where %s are common directories used in alias paths based on top 10k nginx configuration files from GH (thanks @TomNomNom), see directories.txt.
19 |
20 |
21 | class BurpExtender(IBurpExtender, IScannerCheck):
22 | scanned_urls = set()
23 |
24 | def registerExtenderCallbacks(self, callbacks):
25 | self._callbacks = callbacks
26 | self._helpers = callbacks.getHelpers()
27 |
28 | callbacks.setExtensionName("NGINX Alias Traversal")
29 |
30 | self._stdout = PrintWriter(callbacks.getStdout(), True)
31 | self._callbacks.registerScannerCheck(self)
32 |
33 | self.enableDirectoryGuessing = True
34 | with open("directories.txt", "r") as f:
35 | self.common_directories = [x.strip() for x in f.readlines()]
36 |
37 | self._stdout.println("GitHub: https://github.com/bayotop/off-by-slash/")
38 | self._stdout.println("Contact: https://twitter.com/_bayotop")
39 | self._stdout.println("")
40 | self._stdout.println("Successfully initialized (v1.1)!")
41 |
42 | def doActiveScan(self, baseRequestResponse, insertionPoint):
43 | scan_issues = []
44 |
45 | if not self.isGet(baseRequestResponse.getRequest()):
46 | return None
47 |
48 | if not self.isStaticResource(baseRequestResponse):
49 | return None
50 |
51 | # Am I missing cases because of this?
52 | if not self._helpers.analyzeResponse(baseRequestResponse.getResponse()).getStatusCode() == 200:
53 | return None
54 |
55 | # Prevent testing same paths repeadetly
56 | url = self._helpers.analyzeRequest(baseRequestResponse).getUrl().toString()
57 | url = url[: url.rindex("/")]
58 |
59 | if url in self.scanned_urls:
60 | return None
61 |
62 | self.scanned_urls.add(url)
63 | vulnerable, verifyingRequestResponse = self.detectAliasTraversal(baseRequestResponse)
64 |
65 | if vulnerable:
66 | scan_issues.append(self.generateIssue(baseRequestResponse, verifyingRequestResponse))
67 |
68 | return scan_issues
69 |
70 | def doPassiveScan(self, baseRequestResponse):
71 | return []
72 |
73 | def consolidateDuplicateIssues(self, existingIssue, newIssue):
74 | return existingIssue.getIssueName() == newIssue.getIssueName()
75 |
76 | def isGet(self, request):
77 | requestInfo = self._helpers.analyzeRequest(request)
78 | return requestInfo.getMethod() == "GET"
79 |
80 | def isStaticResource(self, requestResponse):
81 | # This likely needs adjustment.
82 | return "." in self._helpers.analyzeRequest(requestResponse).getUrl().getPath().split("/")[-1]
83 |
84 | def detectAliasTraversal(self, requestResponse):
85 | originalUrl = self._helpers.analyzeRequest(requestResponse).getUrl()
86 | urls = self.generateUrls(originalUrl, requestResponse)
87 |
88 | for url in urls:
89 | verifyingRequestResponse = self._callbacks.makeHttpRequest(
90 | requestResponse.getHttpService(), self._helpers.buildHttpRequest(url)
91 | )
92 | if self.compareResponses(requestResponse.getResponse(), verifyingRequestResponse.getResponse()):
93 | self._stdout.println("Vulnerable: %s" % url)
94 | return True, verifyingRequestResponse
95 |
96 | return False, None
97 |
98 | def generateUrls(self, url, requestResponse):
99 | urls = []
100 | path = url.getPath()
101 | parts = filter(None, path.split("/"))
102 |
103 | for part in parts:
104 | if "." in part and part[0] != ".":
105 | continue
106 |
107 | # Checks if /part../ returns the same as /
108 | if not self.quickCheck(url, part, requestResponse):
109 | continue
110 |
111 | self._stdout.println("Potentially vulnerable: %s (folder /%s/)" % (url, part))
112 |
113 | replacement = "/%s../%s/" % (part, part)
114 | urls.append(URL(url.toString().replace("/%s/" % part, replacement)))
115 | if self.enableDirectoryGuessing:
116 | urls = urls + self.guessDirectories(url, part)
117 |
118 | return urls
119 |
120 | def quickCheck(self, url, part, requestResponse):
121 | replacement = "/%s../" % part
122 |
123 | # https://host/some/part/other -> https://host/some/part../
124 | probe = url.toString().replace("/%s/" % part, replacement)
125 | probe = URL(probe[: probe.index("../") + 3])
126 |
127 | # https://host/some/part../ -> https://host/some/
128 | verifier = URL(probe.toString().replace(replacement, "") + "/")
129 |
130 | expected = self._callbacks.makeHttpRequest(
131 | requestResponse.getHttpService(), self._helpers.buildHttpRequest(verifier)
132 | )
133 | actual = self._callbacks.makeHttpRequest(
134 | requestResponse.getHttpService(), self._helpers.buildHttpRequest(probe)
135 | )
136 |
137 | return self.compareResponses(expected.getResponse(), actual.getResponse())
138 |
139 | def guessDirectories(self, url, part):
140 | urls = []
141 |
142 | for directory in self.common_directories:
143 | replacement = "/%s../%s/" % (part, directory)
144 | urls.append(URL(url.toString().replace("/%s/" % part, replacement)))
145 |
146 | return urls
147 |
148 | def compareResponses(self, oResponse, vResponse):
149 | vResponseInfo = self._helpers.analyzeResponse(vResponse)
150 | oResponseInfo = self._helpers.analyzeResponse(oResponse)
151 |
152 | if vResponseInfo.getStatusCode() != oResponseInfo.getStatusCode():
153 | return False
154 |
155 | vBodyOffset = vResponseInfo.getBodyOffset()
156 | vBody = vResponse.tostring()[vBodyOffset:]
157 |
158 | oBodyOffset = oResponseInfo.getBodyOffset()
159 | oBody = oResponse.tostring()[oBodyOffset:]
160 |
161 | return str(oBody) == str(vBody)
162 |
163 | def generateIssue(self, baseRequestResponse, verifyingRequestResponse):
164 | name = "Path traversal via misconfigured NGINX alias"
165 | severity = "High"
166 | confidence = "Firm"
167 | detail = """
168 | Found path traversal at:
169 |
170 | - Original url: %s
171 | - Verification url: %s
172 |
173 | """ % (
174 | self._helpers.analyzeRequest(baseRequestResponse).getUrl(),
175 | self._helpers.analyzeRequest(verifyingRequestResponse).getUrl(),
176 | )
177 | # https://github.com/yandex/gixy/blob/master/docs/en/plugins/aliastraversal.md
178 | background = """
179 | The alias directive is used to replace path of the specified location. For example, with the following configuration:
180 |
181 | location /i/ {
182 | alias /data/w3/images/;
183 | }
184 | on request of /i/top.gif, the file /data/w3/images/top.gif will be sent.
185 |
186 | But, if the location doesn't ends with directory separator (i.e. /):
187 |
188 | location /i {
189 | alias /data/w3/images/
190 | }
191 | on request of /i../app/config.py, the file /data/w3/app/config.py will be sent.
192 |
193 | In other words, the incorrect configuration of alias could allow an attacker to read file stored outside the target folder.
194 | """
195 | remediation = "Find all 'alias' directives and make sure that the parent prefixed location ends with and directory separator."
196 |
197 | return ScanIssue(
198 | baseRequestResponse.getHttpService(),
199 | self._helpers.analyzeRequest(baseRequestResponse).getUrl(),
200 | [baseRequestResponse, verifyingRequestResponse],
201 | name,
202 | detail,
203 | background,
204 | confidence,
205 | severity,
206 | remediation,
207 | )
208 |
209 |
210 | class ScanIssue(IScanIssue):
211 | def __init__(self, httpService, url, httpMessages, name, detail, background, confidence, severity, remediation):
212 | self.HttpService = httpService
213 | self.Url = url
214 | self.HttpMessages = httpMessages
215 | self.Name = name
216 | self.Background = background
217 | self.Detail = detail
218 | self.Severity = severity
219 | self.Confidence = confidence
220 | self.Remediation = remediation
221 | return
222 |
223 | def getUrl(self):
224 | return self.Url
225 |
226 | def getIssueName(self):
227 | return self.Name
228 |
229 | def getIssueType(self):
230 | return 0
231 |
232 | def getSeverity(self):
233 | return self.Severity
234 |
235 | def getConfidence(self):
236 | return self.Confidence
237 |
238 | def getIssueBackground(self):
239 | return self.Background
240 |
241 | def getRemediationBackground(self):
242 | return self.Remediation
243 |
244 | def getIssueDetail(self):
245 | return self.Detail
246 |
247 | def getRemediationDetail(self):
248 | return None
249 |
250 | def getHttpMessages(self):
251 | return self.HttpMessages
252 |
253 | def getHttpService(self):
254 | return self.HttpService
255 |
--------------------------------------------------------------------------------
/scrape.py:
--------------------------------------------------------------------------------
1 | import multiprocessing
2 | import sys
3 | import re
4 | import requests
5 | import urllib3
6 | from urllib.parse import urlparse
7 |
8 | # 1. Use this to scrape a resource from a list of given URLs
9 | # 2. In Burp start a new scan and them as "URLs to Scan"
10 | # 3. Selectively disable other extensions adding active scanner checks and run a "Audit checks - extensions only" scan.
11 |
12 | RESOURCES_PATTERN = r'(?:(?:href|src)=(?:["\']([^\'"]*)[\'"]|([^\s<>]+)))' # @d0nutptr
13 | EXCLUDED_EXTENSIONS = [r"html?", r"as.x?", r"php\d?"]
14 |
15 | RESULTS_FILE = "results.txt"
16 | PROCESSES_COUNT = 4
17 | DONE_FLAG = "__done__"
18 |
19 |
20 | def initiate(pool, results, urls):
21 | jobs = []
22 | for url in urls:
23 | job = pool.apply_async(scrape, (url, results))
24 | jobs.append(job)
25 |
26 | try:
27 | for job in jobs:
28 | job.get()
29 | except KeyboardInterrupt:
30 | print("Killed.")
31 | try:
32 | pool.terminate()
33 | pool.close()
34 | finally:
35 | sys.exit(0)
36 |
37 |
38 | def scrape(url, queue):
39 | urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
40 | results = set()
41 |
42 | print("Scraping %s ..." % url)
43 | try:
44 | response = requests.get(url, verify=False, timeout=3)
45 | if response.history:
46 | url = "{uri.scheme}://{uri.netloc}".format(uri=urlparse(response.url))
47 | content = response.content
48 | except:
49 | print("Failed on %s: %s" % (url, sys.exc_info()[1]))
50 | return
51 |
52 | matches = re.findall(RESOURCES_PATTERN, content.decode("utf-8", "replace"))
53 |
54 | for match in matches:
55 | for group in match:
56 | results.add(group)
57 |
58 | results = [result for result in results if is_same_origin(url, result) or is_relative(result)]
59 | results = [
60 | result
61 | for result in results
62 | if ("." in result.split("/")[-1] and not is_excluded(result.split("/")[-1].split(".")[-1]))
63 | ]
64 | results = [get_full_url(url, result) for result in results]
65 |
66 | print("Found %s resources on %s" % (len(results), url))
67 |
68 | for result in results:
69 | queue.put(result.replace(" ", "%20"))
70 |
71 |
72 | def writer(queue):
73 | results = set()
74 | while True:
75 | try:
76 | entry = queue.get()
77 | if entry == DONE_FLAG:
78 | return results
79 |
80 | results.add(entry)
81 | except:
82 | # KeyboardInterrupt
83 | break
84 |
85 |
86 | def is_same_origin(origin, url):
87 | return url.startswith(origin + "/") or url.startswith("//%s/" % origin.split("/")[2])
88 |
89 |
90 | def is_relative(url):
91 | return url.startswith("/") and not (url.startswith("//") or url.startswith("/\\"))
92 |
93 |
94 | def is_excluded(extension):
95 | return any(re.match(ep, extension) for ep in EXCLUDED_EXTENSIONS)
96 |
97 |
98 | def get_full_url(origin, url):
99 | if url.startswith(origin):
100 | return url
101 | if url.startswith("//"):
102 | return origin.split("/")[0] + url
103 | if url.startswith("/"):
104 | return origin + url
105 |
106 |
107 | if __name__ == "__main__":
108 | if len(sys.argv) != 2:
109 | print("Usage: %s " % sys.argv[0])
110 | sys.exit()
111 |
112 | with open(sys.argv[1]) as f:
113 | urls = [line.strip().rstrip("/") for line in f.readlines()]
114 |
115 | results = multiprocessing.Manager().Queue()
116 | p = multiprocessing.Pool(4)
117 |
118 | wjob = p.apply_async(writer, (results,))
119 | initiate(p, results, urls)
120 |
121 | results.put(DONE_FLAG)
122 | resources = wjob.get()
123 | p.close()
124 |
125 | with open(RESULTS_FILE, "w", encoding="utf-8") as f:
126 | for resource in resources:
127 | f.write("%s\n" % resource)
128 |
--------------------------------------------------------------------------------