├── .gitignore ├── Dockerfile ├── LICENSE ├── MANIFEST.in ├── Makefile ├── README.md ├── benchmarks └── urls.py ├── requirements.txt ├── setup.py ├── tests ├── test_urlparse.py ├── test_urlparse4.py └── urls │ ├── blink-performancetests.txt │ └── seeds_es_dmoz.txt ├── urlparse4 ├── __init__.py ├── cgurl.cpp ├── cgurl.pyx ├── chromium_gurl.pxd └── mozilla_url_parse.pxd └── vendor └── gurl ├── base ├── base_export.h ├── basictypes.h ├── compiler_specific.h ├── macros.h ├── memory │ └── scoped_ptr.h ├── move.h ├── strings │ ├── cscope.out │ ├── string16.cc │ ├── string16.h │ ├── string_piece.cc │ ├── string_piece.h │ ├── string_util.cc │ ├── string_util.h │ ├── utf_string_conversion_utils.cc │ ├── utf_string_conversion_utils.h │ ├── utf_string_conversions.cc │ └── utf_string_conversions.h ├── template_util.h └── third_party │ └── icu │ ├── LICENSE │ ├── README.chromium │ ├── icu_utf.cc │ └── icu_utf.h ├── build └── build_config.h └── url ├── gurl.cc ├── gurl.h ├── third_party └── mozilla │ ├── LICENSE.txt │ ├── README.chromium │ ├── a.out │ ├── url_parse.cc │ └── url_parse.h ├── url_canon.h ├── url_canon_etc.cc ├── url_canon_filesystemurl.cc ├── url_canon_fileurl.cc ├── url_canon_host.cc ├── url_canon_internal.cc ├── url_canon_internal.h ├── url_canon_ip.cc ├── url_canon_ip.h ├── url_canon_mailtourl.cc ├── url_canon_path.cc ├── url_canon_pathurl.cc ├── url_canon_query.cc ├── url_canon_relative.cc ├── url_canon_stdstring.cc ├── url_canon_stdstring.h ├── url_canon_stdurl.cc ├── url_constants.cc ├── url_constants.h ├── url_export.h ├── url_file.h ├── url_parse_file.cc ├── url_parse_internal.h ├── url_util.cc ├── url_util.h └── url_util_internal.h /.gitignore: -------------------------------------------------------------------------------- 1 | *.pyc 2 | /venv 3 | /src 4 | /build 5 | *.so 6 | /.cache 7 | /tmp 8 | /MANIFEST 9 | /urlparse4/*.html 10 | /dist -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | FROM debian:jessie 2 | 3 | RUN apt-get update && apt-get upgrade -y && apt-get install -y --no-install-recommends \ 4 | curl \ 5 | automake \ 6 | gcc \ 7 | g++ \ 8 | make \ 9 | libtool \ 10 | ca-certificates \ 11 | python-pip \ 12 | python-dev \ 13 | python-numpy \ 14 | bzip2 \ 15 | git \ 16 | pkg-config \ 17 | liburiparser-dev \ 18 | vim 19 | 20 | RUN mkdir -p /cosr/urlparse4 21 | 22 | # Upgrade pip 23 | RUN pip install --upgrade --ignore-installed pip 24 | 25 | ADD requirements.txt /requirements.txt 26 | 27 | # Install Cython first to be able to install other dependencies from git 28 | RUN grep -i "^Cython\=" /requirements.txt | xargs -n1 pip install 29 | 30 | RUN pip install -r requirements.txt 31 | 32 | RUN cd /tmp && \ 33 | git clone --recursive https://github.com/mitghi/cyuri && \ 34 | cd ./cyuri/liburi && \ 35 | autoreconf -i && \ 36 | ./configure --prefix=/usr/local && \ 37 | make && \ 38 | make install && \ 39 | cd .. && \ 40 | CPPFLAGS=-I/usr/local/include/liburi make && \ 41 | cp cyuri.so /usr/lib/python2.7/ && \ 42 | ldconfig 43 | 44 | ADD Makefile /Makefile 45 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "{}" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright 2016 Common Search contributors 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | recursive-include vendor *.cpp *.h *.cc 2 | recursive-include urlparse4 *.pxd *.pyx *.cpp 3 | include README.md 4 | include LICENSE -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | clean: 2 | rm -rf *.so urlparse4/*.so build urlparse4/*.c urlparse4/*.cpp urlparse4/*.html dist .cache tests/__pycache__ *.rst 3 | 4 | benchmark: 5 | python benchmarks/urls.py 6 | 7 | test: 8 | py.test tests/ -v 9 | 10 | docker_build: 11 | docker build -t commonsearch/urlparse4 . 12 | 13 | docker_shell: 14 | docker run -v "$(PWD):/cosr/urlparse4:rw" -w /cosr/urlparse4 -i -t commonsearch/urlparse4 bash 15 | 16 | docker_test: 17 | docker run -v "$(PWD):/cosr/urlparse4:rw" -w /cosr/urlparse4 -i -t commonsearch/urlparse4 make test 18 | 19 | docker_benchmark: 20 | docker run -v "$(PWD):/cosr/urlparse4:rw" -w /cosr/urlparse4 -i -t commonsearch/urlparse4 make benchmark 21 | 22 | build_ext: 23 | python setup.py build_ext --inplace 24 | 25 | sdist: 26 | python setup.py sdist 27 | 28 | pypi: clean build_ext 29 | pip install pypandoc 30 | python setup.py sdist upload -r pypi-commonsearch -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # urlparse4 2 | 3 | `urlparse4` is a performance-focused replacement for Python's `urlparse` module, using C++ code from Chromium's own URL parser. 4 | 5 | It is not production-ready yet. 6 | 7 | Many credits go to [gurl-cython](https://github.com/Preetwinder/gurl-cython) for inspiration. 8 | 9 | ## Differences with Python's `urlparse` 10 | 11 | `urlparse4` should be a transparent, drop-in replacement in almost all cases. Still, there are a few differences to be aware of: 12 | 13 | - `urlparse4` is 2-7x faster for most operations (see benchmarks below) 14 | - `urlparse4` currently doesn't pass CPython's `test_urlparse.py` suite due to edge cases that Chromium's parser manages differently (usually in accordance to the RFCs, which `urlparse` doesn't follow entirely). 15 | - `urlparse4` only supports Python 2.7 for now 16 | 17 | ## How to install 18 | 19 | ``` 20 | pip install urlparse4 21 | ``` 22 | 23 | ## How to use 24 | 25 | The most straightforward way to use `urlparse4` is to replace your imports of `urlparse` with this: 26 | 27 | ``` 28 | import urlparse4 as urlparse 29 | ``` 30 | 31 | You could also monkey-patch its methods in the global `urlparse` module like this: 32 | 33 | ``` 34 | import urlparse4 35 | import urlparse 36 | 37 | for attr in dir(urlparse4): 38 | setattr(urlparse, attr, getattr(urlparse4, attr)) 39 | 40 | ``` 41 | 42 | ## How to test 43 | 44 | You must have Docker installed and running. You can run CPython's test suite for `urlparse` like this: 45 | 46 | ``` 47 | make docker_build 48 | make docker_test 49 | ``` 50 | 51 | ## Benchmarks 52 | 53 | We are testing the following librairies on a sample of 100k URLs from Blink and DMOZ: 54 | 55 | - urlparse4 ;-) 56 | - [CPython's urlparse](https://github.com/python/cpython/blob/2.7/Lib/urlparse.py) 57 | - [urlparse2](https://github.com/mwhooker/urlparse2) 58 | - [YURL](http://github.com/homm/yurl/) 59 | - [uritools](https://github.com/tkem/uritools) 60 | - [pygurl / gurl-cython](https://github.com/Preetwinder/gurl-cython) 61 | - [cyuri](https://github.com/mitghi/cyuri) 62 | 63 | Each of them is being tested on a few different types of operations (basic urlsplit, relative link resolution, hostname extraction) 64 | 65 | Here is how to launch the tests: 66 | 67 | ``` 68 | make docker_build 69 | make docker_benchmark 70 | ``` 71 | 72 | Current results on a 2.2GHz Intel Core i7 MBP (in seconds): 73 | 74 | ``` 75 | Benchmark results on 104300 URLs x 10 times, in seconds: 76 | 77 | Name Sum Mean Median 90% 78 | ---------------- ------------- ----------------- ----------------- ----------------- 79 | 80 | urlsplit: 81 | ---- ---- ---- ---- ---- 82 | urlparse4 1.681858 1.61251965484e-06 1.99999999984e-06 2.00000000006e-06 83 | pygurl 2.031712 1.94795014382e-06 1.99999999984e-06 2.00000000028e-06 84 | uritools 2.638991 2.53019271333e-06 2.00000000028e-06 3.00000000042e-06 85 | yurl 3.910247 3.74903835091e-06 3.00000000131e-06 4.99999999981e-06 86 | urlparse2 3.756782 3.60190028763e-06 2.99999999953e-06 4.00000000056e-06 87 | urlparse 3.862006 3.70278619367e-06 3.00000000308e-06 4.99999999803e-06 88 | cyuri 9.912275 9.50361936721e-06 8.00000000112e-06 1.30000000027e-05 89 | 90 | urljoin_sibling: 91 | ---- ---- ---- ---- ---- 92 | urlparse4 2.008453 1.92565004794e-06 2.00000000206e-06 2.00000000206e-06 93 | pygurl 2.193427 2.10299808245e-06 2.00000000206e-06 2.99999999953e-06 94 | uritools 10.575344 1.01393518696e-05 9.99999999607e-06 1.20000000052e-05 95 | yurl 13.213052 1.26683144775e-05 1.19999999981e-05 1.60000000022e-05 96 | urlparse2 14.239327 1.36522790029e-05 1.19999999981e-05 1.69999999997e-05 97 | urlparse 9.25991500001 8.87815436242e-06 8.00000000822e-06 1.10000000006e-05 98 | cyuri 5.742724 5.50596740172e-06 5.00000000159e-06 7.00000001075e-06 99 | 100 | hostname: 101 | ---- ---- ---- ---- ---- 102 | urlparse4 1.883982 1.80631064237e-06 1.99999999495e-06 2.00000000916e-06 103 | pygurl 1.67332099999 1.60433461169e-06 1.99999999495e-06 2.00000000916e-06 104 | uritools 3.31632199999 3.17959923297e-06 3.00000000664e-06 4.00000000411e-06 105 | yurl 3.853319 3.69445733461e-06 3.00000000664e-06 4.00000000411e-06 106 | urlparse2 4.641513 4.45015627996e-06 4.00000000411e-06 5.99999999906e-06 107 | urlparse 5.122682 4.91148801534e-06 4.00000000411e-06 5.99999999906e-06 108 | cyuri 11.108649 1.06506701822e-05 9.0000000057e-06 1.5999999988e-05 109 | ``` 110 | 111 | Some libraries are included in the benchmark code but disabled for various reasons: 112 | 113 | - [urlparse3](https://pypi.python.org/pypi/urlparse3/) (Raises on valid URLs) 114 | - [slimurl](https://github.com/mosquito/slimurl) (Too slow) 115 | 116 | Feel free to submit pull requests to add new ones! 117 | 118 | ## Feedback 119 | 120 | We'd love to hear your feedback! Feel free to look at the issues on GitHub and open new ones if needed :) 121 | -------------------------------------------------------------------------------- /benchmarks/urls.py: -------------------------------------------------------------------------------- 1 | from numpy import median, percentile, mean 2 | from time import clock 3 | import os 4 | import gc 5 | import tabulate 6 | import sys 7 | 8 | import urlparse 9 | import urlparse2 10 | from uritools import urisplit as uritools_urisplit 11 | from uritools import urijoin as uritools_urijoin 12 | 13 | from yurl import URL as yurl_url 14 | import pygurl 15 | 16 | # Disabled benchmarks 17 | # import slimurl 18 | # import urlparse3 19 | import cyuri 20 | 21 | sys.path.insert(-1, os.path.dirname(os.path.dirname(__file__))) 22 | import urlparse4 23 | 24 | gc.disable() 25 | 26 | REPEATS = 10 27 | 28 | URLS = [] 29 | for fp in os.listdir("tests/urls/"): 30 | with open("tests/urls/%s" % fp) as f: 31 | URLS += f.readlines() 32 | 33 | data = [] 34 | 35 | 36 | def benchmark(name, func, debug=False): 37 | times = [] 38 | for n in range(0, REPEATS): 39 | for i, url in enumerate(URLS): 40 | u = url.strip() 41 | if debug: 42 | print u 43 | t = clock() 44 | func(u) 45 | times.append(clock() - t) 46 | 47 | row = [name, sum(times), mean(times), median(times), percentile(times, 90)] 48 | print row 49 | data.append(row) 50 | 51 | 52 | def title(name): 53 | data.append(["", "", "", "", ""]) 54 | data.append(["%s:" % name, "", "", "", ""]) 55 | data.append(["----", "----", "----", "----", "----"]) 56 | 57 | # Segfault: https://github.com/mitghi/cyuri/issues/1 58 | cyuri_parser = cyuri.uriparser() 59 | 60 | title("urlsplit") 61 | benchmark("urlparse4", lambda url: urlparse4.urlsplit(url)) 62 | benchmark("pygurl", lambda url: pygurl.ParseStandard(url)) 63 | benchmark("uritools", lambda url: uritools_urisplit(url)) 64 | benchmark("yurl", lambda url: yurl_url(url)) 65 | benchmark("urlparse2", lambda url: urlparse2.urlsplit(url)) 66 | benchmark("urlparse", lambda url: urlparse.urlsplit(url)) 67 | benchmark("cyuri", lambda url: cyuri_parser.components(url)) 68 | 69 | title("urljoin_sibling") 70 | benchmark("urlparse4", lambda url: urlparse4.urljoin(url, "sibling.html?q=1#e=b")) 71 | benchmark("pygurl", lambda url: pygurl.URL(url).Resolve("sibling.html?q=1#e=b")) 72 | benchmark("uritools", lambda url: uritools_urijoin(url, "sibling.html?q=1#e=b")) 73 | benchmark("yurl", lambda url: yurl_url(url) + yurl_url("sibling.html?q=1#e=b")) 74 | benchmark("urlparse2", lambda url: urlparse2.urljoin(url, "sibling.html?q=1#e=b")) 75 | benchmark("urlparse", lambda url: urlparse.urljoin(url, "sibling.html?q=1#e=b")) 76 | benchmark("cyuri", lambda url: cyuri_parser.join(url, "sibling.html?q=1#e=b")) 77 | 78 | # Not very representative because some libraries have functions to access the host directly without parsing the rest. 79 | # Might still be useful for some people! 80 | title("hostname") 81 | benchmark("urlparse4", lambda url: urlparse4.urlsplit(url).hostname) 82 | benchmark("pygurl", lambda url: pygurl.URL(url).host()) 83 | benchmark("uritools", lambda url: uritools_urisplit(url).host) 84 | benchmark("yurl", lambda url: yurl_url(url).host) 85 | benchmark("urlparse2", lambda url: urlparse2.urlsplit(url).hostname) 86 | benchmark("urlparse", lambda url: urlparse.urlsplit(url).hostname) 87 | benchmark("cyuri", lambda url: cyuri_parser.components(url)["host"]) 88 | 89 | # Very slow! 90 | # benchmark("slimurl", lambda url: slimurl.URL(url)) 91 | 92 | # Breaks on simple URLs like http://1-14th.com/timeline-4-66T.htm 93 | # benchmark("urlparse3_urlsplit", lambda url: urlparse3.parse_url(url)) 94 | 95 | 96 | print 97 | print "Benchmark results on %s URLs x %s times, in seconds:" % (len(URLS), REPEATS) 98 | print 99 | print tabulate.tabulate(data, headers=["Name", "Sum", "Mean", "Median", "90%"]) 100 | print 101 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | tabulate==0.7.5 2 | Cython==0.24 3 | pytest==2.9.2 4 | 5 | # For benchmarks 6 | uritools==1.0.2 7 | YURL==0.13 8 | urlparse2==1.1.1 9 | urlparse3==1.0.9 10 | slimurl==0.7.2 11 | -e git+git://github.com/Preetwinder/gurl-cython.git@0b973257d9a3b8a38c209ed65e793953e21c6bf9#egg=gurl-cython 12 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | from distutils.core import setup, Extension 2 | import os 3 | 4 | VERSION = "0.1.3" 5 | 6 | extension = Extension( 7 | name="urlparse4/cgurl", 8 | sources=["urlparse4/cgurl.pyx", 9 | "vendor/gurl/base/third_party/icu/icu_utf.cc", 10 | "vendor/gurl/base/strings/string16.cc", 11 | "vendor/gurl/base/strings/string_piece.cc", 12 | "vendor/gurl/base/strings/string_util.cc", 13 | "vendor/gurl/base/strings/utf_string_conversions.cc", 14 | "vendor/gurl/base/strings/utf_string_conversion_utils.cc", 15 | "vendor/gurl/url/gurl.cc", 16 | "vendor/gurl/url/url_canon_etc.cc", 17 | "vendor/gurl/url/url_canon_filesystemurl.cc", 18 | "vendor/gurl/url/url_canon_fileurl.cc", 19 | "vendor/gurl/url/url_canon_host.cc", 20 | "vendor/gurl/url/url_canon_internal.cc", 21 | "vendor/gurl/url/url_canon_ip.cc", 22 | "vendor/gurl/url/url_canon_mailtourl.cc", 23 | "vendor/gurl/url/url_canon_path.cc", 24 | "vendor/gurl/url/url_canon_pathurl.cc", 25 | "vendor/gurl/url/url_canon_query.cc", 26 | "vendor/gurl/url/url_canon_relative.cc", 27 | "vendor/gurl/url/url_canon_stdstring.cc", 28 | "vendor/gurl/url/url_canon_stdurl.cc", 29 | "vendor/gurl/url/url_constants.cc", 30 | "vendor/gurl/url/url_parse_file.cc", 31 | "vendor/gurl/url/url_util.cc", 32 | "vendor/gurl/url/third_party/mozilla/url_parse.cc" 33 | ], 34 | language="c++", 35 | extra_compile_args=["-std=gnu++0x", "-I./vendor/gurl/", 36 | "-fPIC", "-Ofast", "-pthread", "-w"], 37 | extra_link_args=["-std=gnu++0x", "-w"], 38 | ) 39 | 40 | 41 | if not os.path.isfile("urlparse4/cgurl.cpp"): 42 | try: 43 | from Cython.Build import cythonize 44 | ext_modules = cythonize(extension, annotate=True) 45 | except: 46 | print "urlparse4/gurl.cpp not found and Cython failed to run to recreate it. Please install/upgrade Cython and try again." 47 | raise 48 | else: 49 | ext_modules = [extension] 50 | ext_modules[0].sources[0] = "urlparse4/cgurl.cpp" 51 | 52 | try: 53 | import pypandoc 54 | long_description = pypandoc.convert('README.md', 'rst') 55 | except ImportError: 56 | long_description = open('README.md').read() 57 | 58 | setup( 59 | name="urlparse4", 60 | packages=['urlparse4'], 61 | version=VERSION, 62 | description="Performance-focused replacement for Python's urlparse module", 63 | author="Common Search contributors", 64 | author_email="contact@commonsearch.org", 65 | license="Apache License, Version 2.0", 66 | url="https://github.com/commonsearch/urlparse4", 67 | keywords=["urlparse", "urlsplit", "urljoin", "url", "parser", "urlparser", "parsing", "gurl", "cython", "faster", "speed", "performance"], 68 | platforms='any', 69 | classifiers=[ 70 | "Programming Language :: Python", 71 | "Programming Language :: Python :: 2.7", 72 | # 'Development Status :: 1 - Planning', 73 | # 'Development Status :: 2 - Pre-Alpha', 74 | 'Development Status :: 3 - Alpha', 75 | # 'Development Status :: 4 - Beta', 76 | # 'Development Status :: 5 - Production/Stable', 77 | # 'Development Status :: 6 - Mature', 78 | # 'Development Status :: 7 - Inactive', 79 | "Environment :: Other Environment", 80 | "Intended Audience :: Developers", 81 | "License :: OSI Approved :: Apache Software License", 82 | "Operating System :: OS Independent", 83 | "Topic :: Software Development :: Libraries" 84 | ], 85 | long_description=long_description, 86 | ext_modules=ext_modules, 87 | include_package_data=True 88 | ) 89 | -------------------------------------------------------------------------------- /tests/test_urlparse4.py: -------------------------------------------------------------------------------- 1 | # https://github.com/python/cpython/blob/40dac3272231773af0015fc35df5353783d77c4e/Lib/test/test_urlparse.py 2 | import sys 3 | import os 4 | sys.path.insert(-1, os.path.dirname(os.path.dirname(__file__))) 5 | 6 | from test import test_support 7 | import unittest 8 | import urlparse4 as urlparse 9 | 10 | 11 | urlsplit_testcases = [ 12 | ["mailto:webtechs@oltn.odl.state.ok.us", ("mailto", "webtechs@oltn.odl.state.ok.us", "", "", "")], 13 | ["mailto:mailto:webtechs@oltn.odl.state.ok.us", ("mailto", "mailto:webtechs@oltn.odl.state.ok.us", "", "", "")], 14 | ["http://a@example.com:80", ("http", "a@example.com:80", "", "", "")], 15 | 16 | ] 17 | 18 | urljoin_testcases = [ 19 | [("", "http://example.com"), "http://example.com"] 20 | ] 21 | 22 | 23 | class UrlParse4TestCase(unittest.TestCase): 24 | 25 | def test_urlsplit(self): 26 | for case in urlsplit_testcases: 27 | self.assertEqual(urlparse.urlsplit(case[0]), case[1]) 28 | 29 | def test_urljoin(self): 30 | for case in urljoin_testcases: 31 | self.assertEqual(urlparse.urljoin(*case[0]), case[1]) 32 | -------------------------------------------------------------------------------- /urlparse4/__init__.py: -------------------------------------------------------------------------------- 1 | # https://github.com/python/cpython/blob/2.7/Lib/urlparse.py 2 | 3 | from urlparse import * 4 | 5 | _original_urlsplit = urlsplit 6 | _original_urljoin = urljoin 7 | 8 | from cgurl import urlsplit, urljoin 9 | -------------------------------------------------------------------------------- /urlparse4/cgurl.pyx: -------------------------------------------------------------------------------- 1 | from urlparse4.mozilla_url_parse cimport Component, Parsed, ParseStandardURL, ParseFileURL 2 | from chromium_gurl cimport GURL 3 | import urlparse as stdlib_urlparse 4 | cimport cython 5 | 6 | cdef bytes slice_component(bytes pyurl, Component comp): 7 | if comp.len <= 0: 8 | return b"" 9 | 10 | return pyurl[comp.begin:comp.begin + comp.len] 11 | 12 | 13 | cdef bytes cslice_component(char * url, Component comp): 14 | if comp.len <= 0: 15 | return b"" 16 | 17 | # TODO: check if std::string brings any speedups 18 | return url[comp.begin:comp.begin + comp.len] 19 | 20 | 21 | cdef bytes build_netloc(bytes url, Parsed parsed): 22 | 23 | if parsed.host.len <= 0: 24 | return b"" 25 | 26 | # Nothing at all 27 | elif parsed.username.len <= 0 and parsed.password.len <= 0 and parsed.port.len <= 0: 28 | return url[parsed.host.begin: parsed.host.begin + parsed.host.len] 29 | 30 | # Only port 31 | elif parsed.username.len <= 0 and parsed.password.len <= 0 and parsed.port.len > 0: 32 | return url[parsed.host.begin: parsed.host.begin + parsed.host.len + 1 + parsed.port.len] 33 | 34 | # Only username 35 | elif parsed.username.len > 0 and parsed.password.len <= 0 and parsed.port.len <= 0: 36 | return url[parsed.username.begin: parsed.username.begin + parsed.host.len + 1 + parsed.username.len] 37 | 38 | # Username + password 39 | elif parsed.username.len > 0 and parsed.password.len > 0 and parsed.port.len <= 0: 40 | return url[parsed.username.begin: parsed.username.begin + parsed.host.len + 2 + parsed.username.len + parsed.password.len] 41 | 42 | # Username + port 43 | elif parsed.username.len > 0 and parsed.password.len <= 0 and parsed.port.len > 0: 44 | return url[parsed.username.begin: parsed.username.begin + parsed.host.len + 2 + parsed.username.len + parsed.port.len] 45 | 46 | # Username + port + password 47 | elif parsed.username.len > 0 and parsed.password.len > 0 and parsed.port.len > 0: 48 | return url[parsed.username.begin: parsed.username.begin + parsed.host.len + 3 + parsed.port.len + parsed.username.len + parsed.password.len] 49 | 50 | else: 51 | raise ValueError 52 | 53 | 54 | # @cython.freelist(100) 55 | # cdef class SplitResult: 56 | 57 | # cdef Parsed parsed 58 | # # cdef char * url 59 | # cdef bytes pyurl 60 | 61 | # def __cinit__(self, char* url): 62 | # # self.url = url 63 | # self.pyurl = url 64 | # if url[0:5] == b"file:": 65 | # ParseFileURL(url, len(url), &self.parsed) 66 | # else: 67 | # ParseStandardURL(url, len(url), &self.parsed) 68 | 69 | # property scheme: 70 | # def __get__(self): 71 | # return slice_component(self.pyurl, self.parsed.scheme) 72 | 73 | # property path: 74 | # def __get__(self): 75 | # return slice_component(self.pyurl, self.parsed.path) 76 | 77 | # property query: 78 | # def __get__(self): 79 | # return slice_component(self.pyurl, self.parsed.query) 80 | 81 | # property fragment: 82 | # def __get__(self): 83 | # return slice_component(self.pyurl, self.parsed.ref) 84 | 85 | # property username: 86 | # def __get__(self): 87 | # return slice_component(self.pyurl, self.parsed.username) 88 | 89 | # property password: 90 | # def __get__(self): 91 | # return slice_component(self.pyurl, self.parsed.password) 92 | 93 | # property port: 94 | # def __get__(self): 95 | # return slice_component(self.pyurl, self.parsed.port) 96 | 97 | # # Not in regular urlsplit() ! 98 | # property host: 99 | # def __get__(self): 100 | # return slice_component(self.pyurl, self.parsed.host) 101 | 102 | # property netloc: 103 | # def __get__(self): 104 | # return build_netloc(self.pyurl, self.parsed) 105 | 106 | 107 | class SplitResultNamedTuple(tuple): 108 | 109 | __slots__ = () # prevent creation of instance dictionary 110 | 111 | def __new__(cls, bytes url): 112 | 113 | cdef Parsed parsed 114 | 115 | if url[0:5] == b"file:": 116 | ParseFileURL(url, len(url), &parsed) 117 | else: 118 | ParseStandardURL(url, len(url), &parsed) 119 | 120 | def _get_attr(self, prop): 121 | if prop == "scheme": 122 | return self[0] 123 | elif prop == "netloc": 124 | return self[1] 125 | elif prop == "path": 126 | return self[2] 127 | elif prop == "query": 128 | return self[3] 129 | elif prop == "fragment": 130 | return self[4] 131 | elif prop == "port": 132 | if parsed.port.len > 0: 133 | port = int(slice_component(url, parsed.port)) 134 | if port <= 65535: 135 | return port 136 | 137 | elif prop == "username": 138 | return slice_component(url, parsed.username) or None 139 | elif prop == "password": 140 | return slice_component(url, parsed.password) or None 141 | elif prop == "hostname": 142 | return slice_component(url, parsed.host).lower() 143 | 144 | 145 | cls.__getattr__ = _get_attr 146 | 147 | return tuple.__new__(cls, ( 148 | slice_component(url, parsed.scheme).lower(), 149 | build_netloc(url, parsed), 150 | slice_component(url, parsed.path), 151 | slice_component(url, parsed.query), 152 | slice_component(url, parsed.ref) 153 | )) 154 | 155 | def geturl(self): 156 | return stdlib_urlparse.urlunsplit(self) 157 | 158 | 159 | def urlsplit(url): 160 | return SplitResultNamedTuple.__new__(SplitResultNamedTuple, url) 161 | 162 | def urljoin(bytes base, bytes url, allow_fragments=True): 163 | if allow_fragments and base: 164 | return GURL(base).Resolve(url).spec() 165 | else: 166 | return stdlib_urlparse.urljoin(base, url, allow_fragments=allow_fragments) 167 | -------------------------------------------------------------------------------- /urlparse4/chromium_gurl.pxd: -------------------------------------------------------------------------------- 1 | from libcpp.string cimport string 2 | from libcpp cimport bool 3 | from mozilla_url_parse cimport Component, Parsed 4 | 5 | 6 | cdef extern from "../vendor/gurl/url/gurl.h": 7 | cdef cppclass GURL: 8 | GURL() 9 | GURL(const string & url_string) 10 | GURL(const char * canonical_spec, 11 | size_t canonical_spec_len, 12 | const Parsed parsed, 13 | bool is_valid) 14 | 15 | bool is_valid() 16 | bool is_empty() 17 | bool IsStandard() 18 | string spec() 19 | GURL Resolve(const string & relative) 20 | string possibly_invalid_spec() 21 | 22 | bool has_scheme() 23 | bool has_username() 24 | bool has_password() 25 | bool has_host() 26 | bool has_port() 27 | bool has_path() 28 | bool has_query() 29 | bool has_ref() 30 | 31 | string scheme() 32 | string username() 33 | string password() 34 | string host() 35 | string port() 36 | string path() 37 | string query() 38 | string ref() 39 | 40 | Parsed parsed_for_possibly_invalid_spec() 41 | # GURL ReplaceComponents(const Replacements[char] replacements) 42 | -------------------------------------------------------------------------------- /urlparse4/mozilla_url_parse.pxd: -------------------------------------------------------------------------------- 1 | cdef extern from "../vendor/gurl/url/third_party/mozilla/url_parse.h" namespace "url": 2 | cdef struct Component: 3 | int begin 4 | int len 5 | 6 | cdef struct Parsed: 7 | int Length() 8 | Component scheme 9 | Component username 10 | Component password 11 | Component host 12 | Component port 13 | Component path 14 | Component query 15 | Component ref 16 | 17 | cdef void ParseStandardURL(const char* url, int url_len, Parsed* parsed) 18 | cdef void ParseFileURL(const char* url, int url_len, Parsed* parsed) 19 | -------------------------------------------------------------------------------- /vendor/gurl/base/base_export.h: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2012 The Chromium Authors. All rights reserved. 2 | // Use of this source code is governed by a BSD-style license that can be 3 | // found in the LICENSE file. 4 | 5 | #ifndef BASE_BASE_EXPORT_H_ 6 | #define BASE_BASE_EXPORT_H_ 7 | 8 | #if defined(COMPONENT_BUILD) 9 | #if defined(WIN32) 10 | 11 | #if defined(BASE_IMPLEMENTATION) 12 | #define BASE_EXPORT __declspec(dllexport) 13 | #define BASE_EXPORT_PRIVATE __declspec(dllexport) 14 | #else 15 | #define BASE_EXPORT __declspec(dllimport) 16 | #define BASE_EXPORT_PRIVATE __declspec(dllimport) 17 | #endif // defined(BASE_IMPLEMENTATION) 18 | 19 | #else // defined(WIN32) 20 | #if defined(BASE_IMPLEMENTATION) 21 | #define BASE_EXPORT __attribute__((visibility("default"))) 22 | #define BASE_EXPORT_PRIVATE __attribute__((visibility("default"))) 23 | #else 24 | #define BASE_EXPORT 25 | #define BASE_EXPORT_PRIVATE 26 | #endif // defined(BASE_IMPLEMENTATION) 27 | #endif 28 | 29 | #else // defined(COMPONENT_BUILD) 30 | #define BASE_EXPORT 31 | #define BASE_EXPORT_PRIVATE 32 | #endif 33 | 34 | #endif // BASE_BASE_EXPORT_H_ 35 | -------------------------------------------------------------------------------- /vendor/gurl/base/basictypes.h: -------------------------------------------------------------------------------- 1 | // Copyright 2013 The Chromium Authors. All rights reserved. 2 | // Use of this source code is governed by a BSD-style license that can be 3 | // found in the LICENSE file. 4 | 5 | // This file contains definitions of our old basic integral types 6 | // ((u)int{8,16,32,64}) and further includes. I recommend that you use the C99 7 | // standard types instead, and include //etc. as needed. 8 | // Note that the macros and macro-like constructs that were formerly defined in 9 | // this file are now available separately in base/macros.h. 10 | 11 | #ifndef BASE_BASICTYPES_H_ 12 | #define BASE_BASICTYPES_H_ 13 | 14 | #include // So we can set the bounds of our types. 15 | #include // For size_t. 16 | #include // For intptr_t. 17 | 18 | #include "base/macros.h" 19 | #include "build/build_config.h" 20 | 21 | // DEPRECATED: Please use (u)int{8,16,32,64}_t instead (and include ). 22 | typedef int8_t int8; 23 | typedef uint8_t uint8; 24 | typedef int16_t int16; 25 | typedef uint16_t uint16; 26 | typedef int32_t int32; 27 | typedef uint32_t uint32; 28 | typedef int64_t int64; 29 | typedef uint64_t uint64; 30 | 31 | // DEPRECATED: Please use std::numeric_limits (from ) or 32 | // (U)INT{8,16,32,64}_{MIN,MAX} in case of globals (and include ). 33 | const uint8 kuint8max = 0xFF; 34 | const uint16 kuint16max = 0xFFFF; 35 | const uint32 kuint32max = 0xFFFFFFFF; 36 | const uint64 kuint64max = 0xFFFFFFFFFFFFFFFFULL; 37 | const int8 kint8min = -0x7F - 1; 38 | const int8 kint8max = 0x7F; 39 | const int16 kint16min = -0x7FFF - 1; 40 | const int16 kint16max = 0x7FFF; 41 | const int32 kint32min = -0x7FFFFFFF - 1; 42 | const int32 kint32max = 0x7FFFFFFF; 43 | const int64 kint64min = -0x7FFFFFFFFFFFFFFFLL - 1; 44 | const int64 kint64max = 0x7FFFFFFFFFFFFFFFLL; 45 | 46 | #endif // BASE_BASICTYPES_H_ 47 | -------------------------------------------------------------------------------- /vendor/gurl/base/compiler_specific.h: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2012 The Chromium Authors. All rights reserved. 2 | // Use of this source code is governed by a BSD-style license that can be 3 | // found in the LICENSE file. 4 | 5 | #ifndef BASE_COMPILER_SPECIFIC_H_ 6 | #define BASE_COMPILER_SPECIFIC_H_ 7 | 8 | #include "build/build_config.h" 9 | 10 | #if defined(COMPILER_MSVC) 11 | 12 | // Macros for suppressing and disabling warnings on MSVC. 13 | // 14 | // Warning numbers are enumerated at: 15 | // http://msdn.microsoft.com/en-us/library/8x5x43k7(VS.80).aspx 16 | // 17 | // The warning pragma: 18 | // http://msdn.microsoft.com/en-us/library/2c8f766e(VS.80).aspx 19 | // 20 | // Using __pragma instead of #pragma inside macros: 21 | // http://msdn.microsoft.com/en-us/library/d9x1s805.aspx 22 | 23 | // MSVC_SUPPRESS_WARNING disables warning |n| for the remainder of the line and 24 | // for the next line of the source file. 25 | #define MSVC_SUPPRESS_WARNING(n) __pragma(warning(suppress:n)) 26 | 27 | // MSVC_PUSH_DISABLE_WARNING pushes |n| onto a stack of warnings to be disabled. 28 | // The warning remains disabled until popped by MSVC_POP_WARNING. 29 | #define MSVC_PUSH_DISABLE_WARNING(n) __pragma(warning(push)) \ 30 | __pragma(warning(disable:n)) 31 | 32 | // MSVC_PUSH_WARNING_LEVEL pushes |n| as the global warning level. The level 33 | // remains in effect until popped by MSVC_POP_WARNING(). Use 0 to disable all 34 | // warnings. 35 | #define MSVC_PUSH_WARNING_LEVEL(n) __pragma(warning(push, n)) 36 | 37 | // Pop effects of innermost MSVC_PUSH_* macro. 38 | #define MSVC_POP_WARNING() __pragma(warning(pop)) 39 | 40 | #define MSVC_DISABLE_OPTIMIZE() __pragma(optimize("", off)) 41 | #define MSVC_ENABLE_OPTIMIZE() __pragma(optimize("", on)) 42 | 43 | // Allows exporting a class that inherits from a non-exported base class. 44 | // This uses suppress instead of push/pop because the delimiter after the 45 | // declaration (either "," or "{") has to be placed before the pop macro. 46 | // 47 | // Example usage: 48 | // class EXPORT_API Foo : NON_EXPORTED_BASE(public Bar) { 49 | // 50 | // MSVC Compiler warning C4275: 51 | // non dll-interface class 'Bar' used as base for dll-interface class 'Foo'. 52 | // Note that this is intended to be used only when no access to the base class' 53 | // static data is done through derived classes or inline methods. For more info, 54 | // see http://msdn.microsoft.com/en-us/library/3tdb471s(VS.80).aspx 55 | #define NON_EXPORTED_BASE(code) MSVC_SUPPRESS_WARNING(4275) \ 56 | code 57 | 58 | #else // Not MSVC 59 | 60 | #define MSVC_SUPPRESS_WARNING(n) 61 | #define MSVC_PUSH_DISABLE_WARNING(n) 62 | #define MSVC_PUSH_WARNING_LEVEL(n) 63 | #define MSVC_POP_WARNING() 64 | #define MSVC_DISABLE_OPTIMIZE() 65 | #define MSVC_ENABLE_OPTIMIZE() 66 | #define NON_EXPORTED_BASE(code) code 67 | 68 | #endif // COMPILER_MSVC 69 | 70 | 71 | // Annotate a variable indicating it's ok if the variable is not used. 72 | // (Typically used to silence a compiler warning when the assignment 73 | // is important for some other reason.) 74 | // Use like: 75 | // int x = ...; 76 | // ALLOW_UNUSED_LOCAL(x); 77 | #define ALLOW_UNUSED_LOCAL(x) false ? (void)x : (void)0 78 | 79 | // Annotate a typedef or function indicating it's ok if it's not used. 80 | // Use like: 81 | // typedef Foo Bar ALLOW_UNUSED_TYPE; 82 | #if defined(COMPILER_GCC) || defined(__clang__) 83 | #define ALLOW_UNUSED_TYPE __attribute__((unused)) 84 | #else 85 | #define ALLOW_UNUSED_TYPE 86 | #endif 87 | 88 | // Annotate a function indicating it should not be inlined. 89 | // Use like: 90 | // NOINLINE void DoStuff() { ... } 91 | #if defined(COMPILER_GCC) 92 | #define NOINLINE __attribute__((noinline)) 93 | #elif defined(COMPILER_MSVC) 94 | #define NOINLINE __declspec(noinline) 95 | #else 96 | #define NOINLINE 97 | #endif 98 | 99 | // Specify memory alignment for structs, classes, etc. 100 | // Use like: 101 | // class ALIGNAS(16) MyClass { ... } 102 | // ALIGNAS(16) int array[4]; 103 | #if defined(COMPILER_MSVC) 104 | #define ALIGNAS(byte_alignment) __declspec(align(byte_alignment)) 105 | #elif defined(COMPILER_GCC) 106 | #define ALIGNAS(byte_alignment) __attribute__((aligned(byte_alignment))) 107 | #endif 108 | 109 | // Return the byte alignment of the given type (available at compile time). 110 | // Use like: 111 | // ALIGNOF(int32) // this would be 4 112 | #if defined(COMPILER_MSVC) 113 | #define ALIGNOF(type) __alignof(type) 114 | #elif defined(COMPILER_GCC) 115 | #define ALIGNOF(type) __alignof__(type) 116 | #endif 117 | 118 | // Annotate a function indicating the caller must examine the return value. 119 | // Use like: 120 | // int foo() WARN_UNUSED_RESULT; 121 | // To explicitly ignore a result, see |ignore_result()| in base/macros.h. 122 | // TODO(dcheng): Update //third_party/webrtc's macro definition to match. 123 | #undef WARN_UNUSED_RESULT 124 | #if defined(COMPILER_GCC) || defined(__clang__) 125 | #define WARN_UNUSED_RESULT __attribute__((warn_unused_result)) 126 | #else 127 | #define WARN_UNUSED_RESULT 128 | #endif 129 | 130 | // Tell the compiler a function is using a printf-style format string. 131 | // |format_param| is the one-based index of the format string parameter; 132 | // |dots_param| is the one-based index of the "..." parameter. 133 | // For v*printf functions (which take a va_list), pass 0 for dots_param. 134 | // (This is undocumented but matches what the system C headers do.) 135 | #if defined(COMPILER_GCC) 136 | #define PRINTF_FORMAT(format_param, dots_param) \ 137 | __attribute__((format(printf, format_param, dots_param))) 138 | #else 139 | #define PRINTF_FORMAT(format_param, dots_param) 140 | #endif 141 | 142 | // WPRINTF_FORMAT is the same, but for wide format strings. 143 | // This doesn't appear to yet be implemented in any compiler. 144 | // See http://gcc.gnu.org/bugzilla/show_bug.cgi?id=38308 . 145 | #define WPRINTF_FORMAT(format_param, dots_param) 146 | // If available, it would look like: 147 | // __attribute__((format(wprintf, format_param, dots_param))) 148 | 149 | // MemorySanitizer annotations. 150 | #if defined(MEMORY_SANITIZER) && !defined(OS_NACL) 151 | #include 152 | 153 | // Mark a memory region fully initialized. 154 | // Use this to annotate code that deliberately reads uninitialized data, for 155 | // example a GC scavenging root set pointers from the stack. 156 | #define MSAN_UNPOISON(p, size) __msan_unpoison(p, size) 157 | 158 | // Check a memory region for initializedness, as if it was being used here. 159 | // If any bits are uninitialized, crash with an MSan report. 160 | // Use this to sanitize data which MSan won't be able to track, e.g. before 161 | // passing data to another process via shared memory. 162 | #define MSAN_CHECK_MEM_IS_INITIALIZED(p, size) \ 163 | __msan_check_mem_is_initialized(p, size) 164 | #else // MEMORY_SANITIZER 165 | #define MSAN_UNPOISON(p, size) 166 | #define MSAN_CHECK_MEM_IS_INITIALIZED(p, size) 167 | #endif // MEMORY_SANITIZER 168 | 169 | // Macro useful for writing cross-platform function pointers. 170 | #if !defined(CDECL) 171 | #if defined(OS_WIN) 172 | #define CDECL __cdecl 173 | #else // defined(OS_WIN) 174 | #define CDECL 175 | #endif // defined(OS_WIN) 176 | #endif // !defined(CDECL) 177 | 178 | // Macro for hinting that an expression is likely to be false. 179 | #if !defined(UNLIKELY) 180 | #if defined(COMPILER_GCC) 181 | #define UNLIKELY(x) __builtin_expect(!!(x), 0) 182 | #else 183 | #define UNLIKELY(x) (x) 184 | #endif // defined(COMPILER_GCC) 185 | #endif // !defined(UNLIKELY) 186 | 187 | #endif // BASE_COMPILER_SPECIFIC_H_ 188 | -------------------------------------------------------------------------------- /vendor/gurl/base/macros.h: -------------------------------------------------------------------------------- 1 | // Copyright 2014 The Chromium Authors. All rights reserved. 2 | // Use of this source code is governed by a BSD-style license that can be 3 | // found in the LICENSE file. 4 | 5 | // This file contains macros and macro-like constructs (e.g., templates) that 6 | // are commonly used throughout Chromium source. (It may also contain things 7 | // that are closely related to things that are commonly used that belong in this 8 | // file.) 9 | 10 | #ifndef BASE_MACROS_H_ 11 | #define BASE_MACROS_H_ 12 | 13 | #include // For size_t. 14 | #include // For memcpy. 15 | 16 | // Put this in the declarations for a class to be uncopyable. 17 | #define DISALLOW_COPY(TypeName) \ 18 | TypeName(const TypeName&) = delete 19 | 20 | // Put this in the declarations for a class to be unassignable. 21 | #define DISALLOW_ASSIGN(TypeName) \ 22 | void operator=(const TypeName&) = delete 23 | 24 | // A macro to disallow the copy constructor and operator= functions 25 | // This should be used in the private: declarations for a class 26 | #define DISALLOW_COPY_AND_ASSIGN(TypeName) \ 27 | TypeName(const TypeName&); \ 28 | void operator=(const TypeName&) 29 | 30 | // An older, deprecated, politically incorrect name for the above. 31 | // NOTE: The usage of this macro was banned from our code base, but some 32 | // third_party libraries are yet using it. 33 | // TODO(tfarina): Figure out how to fix the usage of this macro in the 34 | // third_party libraries and get rid of it. 35 | #define DISALLOW_EVIL_CONSTRUCTORS(TypeName) DISALLOW_COPY_AND_ASSIGN(TypeName) 36 | 37 | // A macro to disallow all the implicit constructors, namely the 38 | // default constructor, copy constructor and operator= functions. 39 | // 40 | // This should be used in the private: declarations for a class 41 | // that wants to prevent anyone from instantiating it. This is 42 | // especially useful for classes containing only static methods. 43 | #define DISALLOW_IMPLICIT_CONSTRUCTORS(TypeName) \ 44 | TypeName() = delete; \ 45 | DISALLOW_COPY_AND_ASSIGN(TypeName) 46 | 47 | // The arraysize(arr) macro returns the # of elements in an array arr. 48 | // The expression is a compile-time constant, and therefore can be 49 | // used in defining new arrays, for example. If you use arraysize on 50 | // a pointer by mistake, you will get a compile-time error. 51 | 52 | // This template function declaration is used in defining arraysize. 53 | // Note that the function doesn't need an implementation, as we only 54 | // use its type. 55 | template char (&ArraySizeHelper(T (&array)[N]))[N]; 56 | #define arraysize(array) (sizeof(ArraySizeHelper(array))) 57 | 58 | // The COMPILE_ASSERT macro can be used to verify that a compile time 59 | // expression is true. For example, you could use it to verify the 60 | // size of a static array: 61 | // 62 | // COMPILE_ASSERT(arraysize(content_type_names) == CONTENT_NUM_TYPES, 63 | // content_type_names_incorrect_size); 64 | // 65 | // or to make sure a struct is smaller than a certain size: 66 | // 67 | // COMPILE_ASSERT(sizeof(foo) < 128, foo_too_large); 68 | // 69 | // The second argument to the macro is the name of the variable. If 70 | // the expression is false, most compilers will issue a warning/error 71 | // containing the name of the variable. 72 | 73 | #undef COMPILE_ASSERT 74 | #define COMPILE_ASSERT(expr, msg) static_assert(expr, #msg) 75 | 76 | // bit_cast is a template function that implements the 77 | // equivalent of "*reinterpret_cast(&source)". We need this in 78 | // very low-level functions like the protobuf library and fast math 79 | // support. 80 | // 81 | // float f = 3.14159265358979; 82 | // int i = bit_cast(f); 83 | // // i = 0x40490fdb 84 | // 85 | // The classical address-casting method is: 86 | // 87 | // // WRONG 88 | // float f = 3.14159265358979; // WRONG 89 | // int i = * reinterpret_cast(&f); // WRONG 90 | // 91 | // The address-casting method actually produces undefined behavior 92 | // according to ISO C++ specification section 3.10 -15 -. Roughly, this 93 | // section says: if an object in memory has one type, and a program 94 | // accesses it with a different type, then the result is undefined 95 | // behavior for most values of "different type". 96 | // 97 | // This is true for any cast syntax, either *(int*)&f or 98 | // *reinterpret_cast(&f). And it is particularly true for 99 | // conversions between integral lvalues and floating-point lvalues. 100 | // 101 | // The purpose of 3.10 -15- is to allow optimizing compilers to assume 102 | // that expressions with different types refer to different memory. gcc 103 | // 4.0.1 has an optimizer that takes advantage of this. So a 104 | // non-conforming program quietly produces wildly incorrect output. 105 | // 106 | // The problem is not the use of reinterpret_cast. The problem is type 107 | // punning: holding an object in memory of one type and reading its bits 108 | // back using a different type. 109 | // 110 | // The C++ standard is more subtle and complex than this, but that 111 | // is the basic idea. 112 | // 113 | // Anyways ... 114 | // 115 | // bit_cast<> calls memcpy() which is blessed by the standard, 116 | // especially by the example in section 3.9 . Also, of course, 117 | // bit_cast<> wraps up the nasty logic in one place. 118 | // 119 | // Fortunately memcpy() is very fast. In optimized mode, with a 120 | // constant size, gcc 2.95.3, gcc 4.0.1, and msvc 7.1 produce inline 121 | // code with the minimal amount of data movement. On a 32-bit system, 122 | // memcpy(d,s,4) compiles to one load and one store, and memcpy(d,s,8) 123 | // compiles to two loads and two stores. 124 | // 125 | // I tested this code with gcc 2.95.3, gcc 4.0.1, icc 8.1, and msvc 7.1. 126 | // 127 | // WARNING: if Dest or Source is a non-POD type, the result of the memcpy 128 | // is likely to surprise you. 129 | 130 | template 131 | inline Dest bit_cast(const Source& source) { 132 | COMPILE_ASSERT(sizeof(Dest) == sizeof(Source), VerifySizesAreEqual); 133 | 134 | Dest dest; 135 | memcpy(&dest, &source, sizeof(dest)); 136 | return dest; 137 | } 138 | 139 | // Used to explicitly mark the return value of a function as unused. If you are 140 | // really sure you don't want to do anything with the return value of a function 141 | // that has been marked WARN_UNUSED_RESULT, wrap it with this. Example: 142 | // 143 | // scoped_ptr my_var = ...; 144 | // if (TakeOwnership(my_var.get()) == SUCCESS) 145 | // ignore_result(my_var.release()); 146 | // 147 | template 148 | inline void ignore_result(const T&) { 149 | } 150 | 151 | // The following enum should be used only as a constructor argument to indicate 152 | // that the variable has static storage class, and that the constructor should 153 | // do nothing to its state. It indicates to the reader that it is legal to 154 | // declare a static instance of the class, provided the constructor is given 155 | // the base::LINKER_INITIALIZED argument. Normally, it is unsafe to declare a 156 | // static variable that has a constructor or a destructor because invocation 157 | // order is undefined. However, IF the type can be initialized by filling with 158 | // zeroes (which the loader does for static variables), AND the destructor also 159 | // does nothing to the storage, AND there are no virtual methods, then a 160 | // constructor declared as 161 | // explicit MyClass(base::LinkerInitialized x) {} 162 | // and invoked as 163 | // static MyClass my_variable_name(base::LINKER_INITIALIZED); 164 | namespace base { 165 | enum LinkerInitialized { LINKER_INITIALIZED }; 166 | 167 | // Use these to declare and define a static local variable (static T;) so that 168 | // it is leaked so that its destructors are not called at exit. If you need 169 | // thread-safe initialization, use base/lazy_instance.h instead. 170 | #define CR_DEFINE_STATIC_LOCAL(type, name, arguments) \ 171 | static type& name = *new type arguments 172 | 173 | } // base 174 | 175 | #endif // BASE_MACROS_H_ 176 | -------------------------------------------------------------------------------- /vendor/gurl/base/move.h: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2012 The Chromium Authors. All rights reserved. 2 | // Use of this source code is governed by a BSD-style license that can be 3 | // found in the LICENSE file. 4 | 5 | #ifndef BASE_MOVE_H_ 6 | #define BASE_MOVE_H_ 7 | 8 | #include "base/compiler_specific.h" 9 | 10 | // Macro with the boilerplate that makes a type move-only in C++03. 11 | // 12 | // USAGE 13 | // 14 | // This macro should be used instead of DISALLOW_COPY_AND_ASSIGN to create 15 | // a "move-only" type. Unlike DISALLOW_COPY_AND_ASSIGN, this macro should be 16 | // the first line in a class declaration. 17 | // 18 | // A class using this macro must call .Pass() (or somehow be an r-value already) 19 | // before it can be: 20 | // 21 | // * Passed as a function argument 22 | // * Used as the right-hand side of an assignment 23 | // * Returned from a function 24 | // 25 | // Each class will still need to define their own "move constructor" and "move 26 | // operator=" to make this useful. Here's an example of the macro, the move 27 | // constructor, and the move operator= from the scoped_ptr class: 28 | // 29 | // template 30 | // class scoped_ptr { 31 | // MOVE_ONLY_TYPE_FOR_CPP_03(scoped_ptr, RValue) 32 | // public: 33 | // scoped_ptr(RValue& other) : ptr_(other.release()) { } 34 | // scoped_ptr& operator=(RValue& other) { 35 | // swap(other); 36 | // return *this; 37 | // } 38 | // }; 39 | // 40 | // Note that the constructor must NOT be marked explicit. 41 | // 42 | // For consistency, the second parameter to the macro should always be RValue 43 | // unless you have a strong reason to do otherwise. It is only exposed as a 44 | // macro parameter so that the move constructor and move operator= don't look 45 | // like they're using a phantom type. 46 | // 47 | // 48 | // HOW THIS WORKS 49 | // 50 | // For a thorough explanation of this technique, see: 51 | // 52 | // http://en.wikibooks.org/wiki/More_C%2B%2B_Idioms/Move_Constructor 53 | // 54 | // The summary is that we take advantage of 2 properties: 55 | // 56 | // 1) non-const references will not bind to r-values. 57 | // 2) C++ can apply one user-defined conversion when initializing a 58 | // variable. 59 | // 60 | // The first lets us disable the copy constructor and assignment operator 61 | // by declaring private version of them with a non-const reference parameter. 62 | // 63 | // For l-values, direct initialization still fails like in 64 | // DISALLOW_COPY_AND_ASSIGN because the copy constructor and assignment 65 | // operators are private. 66 | // 67 | // For r-values, the situation is different. The copy constructor and 68 | // assignment operator are not viable due to (1), so we are trying to call 69 | // a non-existent constructor and non-existing operator= rather than a private 70 | // one. Since we have not committed an error quite yet, we can provide an 71 | // alternate conversion sequence and a constructor. We add 72 | // 73 | // * a private struct named "RValue" 74 | // * a user-defined conversion "operator RValue()" 75 | // * a "move constructor" and "move operator=" that take the RValue& as 76 | // their sole parameter. 77 | // 78 | // Only r-values will trigger this sequence and execute our "move constructor" 79 | // or "move operator=." L-values will match the private copy constructor and 80 | // operator= first giving a "private in this context" error. This combination 81 | // gives us a move-only type. 82 | // 83 | // For signaling a destructive transfer of data from an l-value, we provide a 84 | // method named Pass() which creates an r-value for the current instance 85 | // triggering the move constructor or move operator=. 86 | // 87 | // Other ways to get r-values is to use the result of an expression like a 88 | // function call. 89 | // 90 | // Here's an example with comments explaining what gets triggered where: 91 | // 92 | // class Foo { 93 | // MOVE_ONLY_TYPE_FOR_CPP_03(Foo, RValue); 94 | // 95 | // public: 96 | // ... API ... 97 | // Foo(RValue other); // Move constructor. 98 | // Foo& operator=(RValue rhs); // Move operator= 99 | // }; 100 | // 101 | // Foo MakeFoo(); // Function that returns a Foo. 102 | // 103 | // Foo f; 104 | // Foo f_copy(f); // ERROR: Foo(Foo&) is private in this context. 105 | // Foo f_assign; 106 | // f_assign = f; // ERROR: operator=(Foo&) is private in this context. 107 | // 108 | // 109 | // Foo f(MakeFoo()); // R-value so alternate conversion executed. 110 | // Foo f_copy(f.Pass()); // R-value so alternate conversion executed. 111 | // f = f_copy.Pass(); // R-value so alternate conversion executed. 112 | // 113 | // 114 | // IMPLEMENTATION SUBTLETIES WITH RValue 115 | // 116 | // The RValue struct is just a container for a pointer back to the original 117 | // object. It should only ever be created as a temporary, and no external 118 | // class should ever declare it or use it in a parameter. 119 | // 120 | // It is tempting to want to use the RValue type in function parameters, but 121 | // excluding the limited usage here for the move constructor and move 122 | // operator=, doing so would mean that the function could take both r-values 123 | // and l-values equially which is unexpected. See COMPARED To Boost.Move for 124 | // more details. 125 | // 126 | // An alternate, and incorrect, implementation of the RValue class used by 127 | // Boost.Move makes RValue a fieldless child of the move-only type. RValue& 128 | // is then used in place of RValue in the various operators. The RValue& is 129 | // "created" by doing *reinterpret_cast(this). This has the appeal 130 | // of never creating a temporary RValue struct even with optimizations 131 | // disabled. Also, by virtue of inheritance you can treat the RValue 132 | // reference as if it were the move-only type itself. Unfortunately, 133 | // using the result of this reinterpret_cast<> is actually undefined behavior 134 | // due to C++98 5.2.10.7. In certain compilers (e.g., NaCl) the optimizer 135 | // will generate non-working code. 136 | // 137 | // In optimized builds, both implementations generate the same assembly so we 138 | // choose the one that adheres to the standard. 139 | // 140 | // 141 | // WHY HAVE typedef void MoveOnlyTypeForCPP03 142 | // 143 | // Callback<>/Bind() needs to understand movable-but-not-copyable semantics 144 | // to call .Pass() appropriately when it is expected to transfer the value. 145 | // The cryptic typedef MoveOnlyTypeForCPP03 is added to make this check 146 | // easy and automatic in helper templates for Callback<>/Bind(). 147 | // See IsMoveOnlyType template and its usage in base/callback_internal.h 148 | // for more details. 149 | // 150 | // 151 | // COMPARED TO C++11 152 | // 153 | // In C++11, you would implement this functionality using an r-value reference 154 | // and our .Pass() method would be replaced with a call to std::move(). 155 | // 156 | // This emulation also has a deficiency where it uses up the single 157 | // user-defined conversion allowed by C++ during initialization. This can 158 | // cause problems in some API edge cases. For instance, in scoped_ptr, it is 159 | // impossible to make a function "void Foo(scoped_ptr p)" accept a 160 | // value of type scoped_ptr even if you add a constructor to 161 | // scoped_ptr<> that would make it look like it should work. C++11 does not 162 | // have this deficiency. 163 | // 164 | // 165 | // COMPARED TO Boost.Move 166 | // 167 | // Our implementation similar to Boost.Move, but we keep the RValue struct 168 | // private to the move-only type, and we don't use the reinterpret_cast<> hack. 169 | // 170 | // In Boost.Move, RValue is the boost::rv<> template. This type can be used 171 | // when writing APIs like: 172 | // 173 | // void MyFunc(boost::rv& f) 174 | // 175 | // that can take advantage of rv<> to avoid extra copies of a type. However you 176 | // would still be able to call this version of MyFunc with an l-value: 177 | // 178 | // Foo f; 179 | // MyFunc(f); // Uh oh, we probably just destroyed |f| w/o calling Pass(). 180 | // 181 | // unless someone is very careful to also declare a parallel override like: 182 | // 183 | // void MyFunc(const Foo& f) 184 | // 185 | // that would catch the l-values first. This was declared unsafe in C++11 and 186 | // a C++11 compiler will explicitly fail MyFunc(f). Unfortunately, we cannot 187 | // ensure this in C++03. 188 | // 189 | // Since we have no need for writing such APIs yet, our implementation keeps 190 | // RValue private and uses a .Pass() method to do the conversion instead of 191 | // trying to write a version of "std::move()." Writing an API like std::move() 192 | // would require the RValue struct to be public. 193 | // 194 | // 195 | // CAVEATS 196 | // 197 | // If you include a move-only type as a field inside a class that does not 198 | // explicitly declare a copy constructor, the containing class's implicit 199 | // copy constructor will change from Containing(const Containing&) to 200 | // Containing(Containing&). This can cause some unexpected errors. 201 | // 202 | // http://llvm.org/bugs/show_bug.cgi?id=11528 203 | // 204 | // The workaround is to explicitly declare your copy constructor. 205 | // 206 | #define MOVE_ONLY_TYPE_FOR_CPP_03(type, rvalue_type) \ 207 | private: \ 208 | struct rvalue_type { \ 209 | explicit rvalue_type(type* object) : object(object) {} \ 210 | type* object; \ 211 | }; \ 212 | type(type&); \ 213 | void operator=(type&); \ 214 | public: \ 215 | operator rvalue_type() { return rvalue_type(this); } \ 216 | type Pass() WARN_UNUSED_RESULT { return type(rvalue_type(this)); } \ 217 | typedef void MoveOnlyTypeForCPP03; \ 218 | private: 219 | 220 | #define MOVE_ONLY_TYPE_WITH_MOVE_CONSTRUCTOR_FOR_CPP_03(type) \ 221 | private: \ 222 | type(const type&); \ 223 | void operator=(const type&); \ 224 | public: \ 225 | type&& Pass() WARN_UNUSED_RESULT { return static_cast(*this); } \ 226 | typedef void MoveOnlyTypeForCPP03; \ 227 | private: 228 | 229 | #define TYPE_WITH_MOVE_CONSTRUCTOR_FOR_CPP_03(type) \ 230 | public: \ 231 | type&& Pass() WARN_UNUSED_RESULT { return static_cast(*this); } \ 232 | private: 233 | 234 | #endif // BASE_MOVE_H_ 235 | -------------------------------------------------------------------------------- /vendor/gurl/base/strings/cscope.out: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/commonsearch/urlparse4/fda910309aa189d57473dbb12e2d2acde49c1736/vendor/gurl/base/strings/cscope.out -------------------------------------------------------------------------------- /vendor/gurl/base/strings/string16.cc: -------------------------------------------------------------------------------- 1 | // Copyright 2013 The Chromium Authors. All rights reserved. 2 | // Use of this source code is governed by a BSD-style license that can be 3 | // found in the LICENSE file. 4 | 5 | #include "base/strings/string16.h" 6 | 7 | #if defined(WCHAR_T_IS_UTF16) 8 | 9 | #error This file should not be used on 2-byte wchar_t systems 10 | // If this winds up being needed on 2-byte wchar_t systems, either the 11 | // definitions below can be used, or the host system's wide character 12 | // functions like wmemcmp can be wrapped. 13 | 14 | #elif defined(WCHAR_T_IS_UTF32) 15 | 16 | #include 17 | 18 | //#include "base/strings/utf_string_conversions.h" 19 | 20 | namespace base { 21 | 22 | int c16memcmp(const char16* s1, const char16* s2, size_t n) { 23 | // We cannot call memcmp because that changes the semantics. 24 | while (n-- > 0) { 25 | if (*s1 != *s2) { 26 | // We cannot use (*s1 - *s2) because char16 is unsigned. 27 | return ((*s1 < *s2) ? -1 : 1); 28 | } 29 | ++s1; 30 | ++s2; 31 | } 32 | return 0; 33 | } 34 | 35 | size_t c16len(const char16* s) { 36 | const char16 *s_orig = s; 37 | while (*s) { 38 | ++s; 39 | } 40 | return s - s_orig; 41 | } 42 | 43 | const char16* c16memchr(const char16* s, char16 c, size_t n) { 44 | while (n-- > 0) { 45 | if (*s == c) { 46 | return s; 47 | } 48 | ++s; 49 | } 50 | return 0; 51 | } 52 | 53 | char16* c16memmove(char16* s1, const char16* s2, size_t n) { 54 | return static_cast(memmove(s1, s2, n * sizeof(char16))); 55 | } 56 | 57 | char16* c16memcpy(char16* s1, const char16* s2, size_t n) { 58 | return static_cast(memcpy(s1, s2, n * sizeof(char16))); 59 | } 60 | 61 | char16* c16memset(char16* s, char16 c, size_t n) { 62 | char16 *s_orig = s; 63 | while (n-- > 0) { 64 | *s = c; 65 | ++s; 66 | } 67 | return s_orig; 68 | } 69 | /* 70 | std::ostream& operator<<(std::ostream& out, const string16& str) { 71 | return out << UTF16ToUTF8(str); 72 | }*/ 73 | /* 74 | void PrintTo(const string16& str, std::ostream* out) { 75 | *out << str; 76 | }*/ 77 | 78 | } // namespace base 79 | 80 | template class std::basic_string; 81 | 82 | #endif // WCHAR_T_IS_UTF32 83 | -------------------------------------------------------------------------------- /vendor/gurl/base/strings/string16.h: -------------------------------------------------------------------------------- 1 | // Copyright 2013 The Chromium Authors. All rights reserved. 2 | // Use of this source code is governed by a BSD-style license that can be 3 | // found in the LICENSE file. 4 | 5 | #ifndef BASE_STRINGS_STRING16_H_ 6 | #define BASE_STRINGS_STRING16_H_ 7 | 8 | // WHAT: 9 | // A version of std::basic_string that provides 2-byte characters even when 10 | // wchar_t is not implemented as a 2-byte type. You can access this class as 11 | // string16. We also define char16, which string16 is based upon. 12 | // 13 | // WHY: 14 | // On Windows, wchar_t is 2 bytes, and it can conveniently handle UTF-16/UCS-2 15 | // data. Plenty of existing code operates on strings encoded as UTF-16. 16 | // 17 | // On many other platforms, sizeof(wchar_t) is 4 bytes by default. We can make 18 | // it 2 bytes by using the GCC flag -fshort-wchar. But then std::wstring fails 19 | // at run time, because it calls some functions (like wcslen) that come from 20 | // the system's native C library -- which was built with a 4-byte wchar_t! 21 | // It's wasteful to use 4-byte wchar_t strings to carry UTF-16 data, and it's 22 | // entirely improper on those systems where the encoding of wchar_t is defined 23 | // as UTF-32. 24 | // 25 | // Here, we define string16, which is similar to std::wstring but replaces all 26 | // libc functions with custom, 2-byte-char compatible routines. It is capable 27 | // of carrying UTF-16-encoded data. 28 | 29 | #include 30 | #include 31 | 32 | #include "base/base_export.h" 33 | #include "base/basictypes.h" 34 | 35 | #if defined(WCHAR_T_IS_UTF16) 36 | 37 | namespace base { 38 | 39 | typedef wchar_t char16; 40 | typedef std::wstring string16; 41 | typedef std::char_traits string16_char_traits; 42 | 43 | } // namespace base 44 | 45 | #elif defined(WCHAR_T_IS_UTF32) 46 | 47 | namespace base { 48 | 49 | typedef uint16 char16; 50 | 51 | // char16 versions of the functions required by string16_char_traits; these 52 | // are based on the wide character functions of similar names ("w" or "wcs" 53 | // instead of "c16"). 54 | BASE_EXPORT int c16memcmp(const char16* s1, const char16* s2, size_t n); 55 | BASE_EXPORT size_t c16len(const char16* s); 56 | BASE_EXPORT const char16* c16memchr(const char16* s, char16 c, size_t n); 57 | BASE_EXPORT char16* c16memmove(char16* s1, const char16* s2, size_t n); 58 | BASE_EXPORT char16* c16memcpy(char16* s1, const char16* s2, size_t n); 59 | BASE_EXPORT char16* c16memset(char16* s, char16 c, size_t n); 60 | 61 | struct string16_char_traits { 62 | typedef char16 char_type; 63 | typedef int int_type; 64 | 65 | // int_type needs to be able to hold each possible value of char_type, and in 66 | // addition, the distinct value of eof(). 67 | COMPILE_ASSERT(sizeof(int_type) > sizeof(char_type), unexpected_type_width); 68 | 69 | typedef std::streamoff off_type; 70 | typedef mbstate_t state_type; 71 | typedef std::fpos pos_type; 72 | 73 | static void assign(char_type& c1, const char_type& c2) { 74 | c1 = c2; 75 | } 76 | 77 | static bool eq(const char_type& c1, const char_type& c2) { 78 | return c1 == c2; 79 | } 80 | static bool lt(const char_type& c1, const char_type& c2) { 81 | return c1 < c2; 82 | } 83 | 84 | static int compare(const char_type* s1, const char_type* s2, size_t n) { 85 | return c16memcmp(s1, s2, n); 86 | } 87 | 88 | static size_t length(const char_type* s) { 89 | return c16len(s); 90 | } 91 | 92 | static const char_type* find(const char_type* s, size_t n, 93 | const char_type& a) { 94 | return c16memchr(s, a, n); 95 | } 96 | 97 | static char_type* move(char_type* s1, const char_type* s2, size_t n) { 98 | return c16memmove(s1, s2, n); 99 | } 100 | 101 | static char_type* copy(char_type* s1, const char_type* s2, size_t n) { 102 | return c16memcpy(s1, s2, n); 103 | } 104 | 105 | static char_type* assign(char_type* s, size_t n, char_type a) { 106 | return c16memset(s, a, n); 107 | } 108 | 109 | static int_type not_eof(const int_type& c) { 110 | return eq_int_type(c, eof()) ? 0 : c; 111 | } 112 | 113 | static char_type to_char_type(const int_type& c) { 114 | return char_type(c); 115 | } 116 | 117 | static int_type to_int_type(const char_type& c) { 118 | return int_type(c); 119 | } 120 | 121 | static bool eq_int_type(const int_type& c1, const int_type& c2) { 122 | return c1 == c2; 123 | } 124 | 125 | static int_type eof() { 126 | return static_cast(EOF); 127 | } 128 | }; 129 | 130 | typedef std::basic_string string16; 131 | 132 | BASE_EXPORT extern std::ostream& operator<<(std::ostream& out, 133 | const string16& str); 134 | 135 | // This is required by googletest to print a readable output on test failures. 136 | BASE_EXPORT extern void PrintTo(const string16& str, std::ostream* out); 137 | 138 | } // namespace base 139 | 140 | // The string class will be explicitly instantiated only once, in string16.cc. 141 | // 142 | // std::basic_string<> in GNU libstdc++ contains a static data member, 143 | // _S_empty_rep_storage, to represent empty strings. When an operation such 144 | // as assignment or destruction is performed on a string, causing its existing 145 | // data member to be invalidated, it must not be freed if this static data 146 | // member is being used. Otherwise, it counts as an attempt to free static 147 | // (and not allocated) data, which is a memory error. 148 | // 149 | // Generally, due to C++ template magic, _S_empty_rep_storage will be marked 150 | // as a coalesced symbol, meaning that the linker will combine multiple 151 | // instances into a single one when generating output. 152 | // 153 | // If a string class is used by multiple shared libraries, a problem occurs. 154 | // Each library will get its own copy of _S_empty_rep_storage. When strings 155 | // are passed across a library boundary for alteration or destruction, memory 156 | // errors will result. GNU libstdc++ contains a configuration option, 157 | // --enable-fully-dynamic-string (_GLIBCXX_FULLY_DYNAMIC_STRING), which 158 | // disables the static data member optimization, but it's a good optimization 159 | // and non-STL code is generally at the mercy of the system's STL 160 | // configuration. Fully-dynamic strings are not the default for GNU libstdc++ 161 | // libstdc++ itself or for the libstdc++ installations on the systems we care 162 | // about, such as Mac OS X and relevant flavors of Linux. 163 | // 164 | // See also http://gcc.gnu.org/bugzilla/show_bug.cgi?id=24196 . 165 | // 166 | // To avoid problems, string classes need to be explicitly instantiated only 167 | // once, in exactly one library. All other string users see it via an "extern" 168 | // declaration. This is precisely how GNU libstdc++ handles 169 | // std::basic_string (string) and std::basic_string (wstring). 170 | // 171 | // This also works around a Mac OS X linker bug in ld64-85.2.1 (Xcode 3.1.2), 172 | // in which the linker does not fully coalesce symbols when dead code 173 | // stripping is enabled. This bug causes the memory errors described above 174 | // to occur even when a std::basic_string<> does not cross shared library 175 | // boundaries, such as in statically-linked executables. 176 | // 177 | // TODO(mark): File this bug with Apple and update this note with a bug number. 178 | 179 | extern template 180 | class BASE_EXPORT std::basic_string; 181 | 182 | #endif // WCHAR_T_IS_UTF32 183 | 184 | #endif // BASE_STRINGS_STRING16_H_ 185 | -------------------------------------------------------------------------------- /vendor/gurl/base/strings/string_util.cc: -------------------------------------------------------------------------------- 1 | // Copyright 2013 The Chromium Authors. All rights reserved. 2 | // Use of this source code is governed by a BSD-style license that can be 3 | // found in the LICENSE file. 4 | 5 | #include "base/strings/string_util.h" 6 | 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include 13 | #include 14 | #include 15 | #include 16 | #include 17 | 18 | #include 19 | #include 20 | 21 | #include "base/basictypes.h" 22 | #include "build/build_config.h" 23 | 24 | namespace base { 25 | 26 | namespace { 27 | 28 | template 29 | static inline bool DoLowerCaseEqualsASCII(BasicStringPiece str, 30 | StringPiece lowercase_ascii) { 31 | if (str.size() != lowercase_ascii.size()) 32 | return false; 33 | for (size_t i = 0; i < str.size(); i++) { 34 | if (ToLowerASCII(str[i]) != lowercase_ascii[i]) 35 | return false; 36 | } 37 | return true; 38 | } 39 | 40 | } // nampspace 41 | 42 | // Assuming that a pointer is the size of a "machine word", then 43 | // uintptr_t is an integer type that is also a machine word. 44 | typedef uintptr_t MachineWord; 45 | const uintptr_t kMachineWordAlignmentMask = sizeof(MachineWord) - 1; 46 | 47 | inline bool IsAlignedToMachineWord(const void* pointer) { 48 | return !(reinterpret_cast(pointer) & kMachineWordAlignmentMask); 49 | } 50 | 51 | template inline T* AlignToMachineWord(T* pointer) { 52 | return reinterpret_cast(reinterpret_cast(pointer) & 53 | ~kMachineWordAlignmentMask); 54 | } 55 | 56 | template struct NonASCIIMask; 57 | template<> struct NonASCIIMask<4, char16> { 58 | static inline uint32_t value() { return 0xFF80FF80U; } 59 | }; 60 | template<> struct NonASCIIMask<4, char> { 61 | static inline uint32_t value() { return 0x80808080U; } 62 | }; 63 | template<> struct NonASCIIMask<8, char16> { 64 | static inline uint64_t value() { return 0xFF80FF80FF80FF80ULL; } 65 | }; 66 | template<> struct NonASCIIMask<8, char> { 67 | static inline uint64_t value() { return 0x8080808080808080ULL; } 68 | }; 69 | #if defined(WCHAR_T_IS_UTF32) 70 | template<> struct NonASCIIMask<4, wchar_t> { 71 | static inline uint32_t value() { return 0xFFFFFF80U; } 72 | }; 73 | template<> struct NonASCIIMask<8, wchar_t> { 74 | static inline uint64_t value() { return 0xFFFFFF80FFFFFF80ULL; } 75 | }; 76 | #endif // WCHAR_T_IS_UTF32 77 | 78 | template 79 | inline bool DoIsStringASCII(const Char* characters, size_t length) { 80 | MachineWord all_char_bits = 0; 81 | const Char* end = characters + length; 82 | 83 | // Prologue: align the input. 84 | while (!IsAlignedToMachineWord(characters) && characters != end) { 85 | all_char_bits |= *characters; 86 | ++characters; 87 | } 88 | 89 | // Compare the values of CPU word size. 90 | const Char* word_end = AlignToMachineWord(end); 91 | const size_t loop_increment = sizeof(MachineWord) / sizeof(Char); 92 | while (characters < word_end) { 93 | all_char_bits |= *(reinterpret_cast(characters)); 94 | characters += loop_increment; 95 | } 96 | 97 | // Process the remaining bytes. 98 | while (characters != end) { 99 | all_char_bits |= *characters; 100 | ++characters; 101 | } 102 | 103 | MachineWord non_ascii_bit_mask = 104 | NonASCIIMask::value(); 105 | return !(all_char_bits & non_ascii_bit_mask); 106 | } 107 | 108 | 109 | bool IsStringASCII(const StringPiece& str) { 110 | return DoIsStringASCII(str.data(), str.length()); 111 | } 112 | 113 | bool IsStringASCII(const StringPiece16& str) { 114 | return DoIsStringASCII(str.data(), str.length()); 115 | } 116 | 117 | bool IsStringASCII(const string16& str) { 118 | return DoIsStringASCII(str.data(), str.length()); 119 | } 120 | 121 | #if defined(WCHAR_T_IS_UTF32) 122 | bool IsStringASCII(const std::wstring& str) { 123 | return DoIsStringASCII(str.data(), str.length()); 124 | } 125 | #endif 126 | 127 | bool LowerCaseEqualsASCII(StringPiece str, StringPiece lowercase_ascii) { 128 | return DoLowerCaseEqualsASCII(str, lowercase_ascii); 129 | } 130 | 131 | bool LowerCaseEqualsASCII(StringPiece16 str, StringPiece lowercase_ascii) { 132 | return DoLowerCaseEqualsASCII(str, lowercase_ascii); 133 | } 134 | 135 | } // namespace base 136 | -------------------------------------------------------------------------------- /vendor/gurl/base/strings/string_util.h: -------------------------------------------------------------------------------- 1 | // Copyright 2013 The Chromium Authors. All rights reserved. 2 | // Use of this source code is governed by a BSD-style license that can be 3 | // found in the LICENSE file. 4 | // 5 | // This file defines utility functions for working with strings. 6 | 7 | #ifndef BASE_STRINGS_STRING_UTIL_H_ 8 | #define BASE_STRINGS_STRING_UTIL_H_ 9 | 10 | #include 11 | #include // va_list 12 | 13 | #include 14 | #include 15 | 16 | #include "base/base_export.h" 17 | #include "base/basictypes.h" 18 | //#include "base/compiler_specific.h" 19 | #include "base/strings/string16.h" 20 | #include "base/strings/string_piece.h" // For implicit conversions. 21 | 22 | namespace base { 23 | 24 | // ASCII-specific tolower. The standard library's tolower is locale sensitive, 25 | // so we don't want to use it here. 26 | inline char ToLowerASCII(char c) { 27 | return (c >= 'A' && c <= 'Z') ? (c + ('a' - 'A')) : c; 28 | } 29 | inline char16 ToLowerASCII(char16 c) { 30 | return (c >= 'A' && c <= 'Z') ? (c + ('a' - 'A')) : c; 31 | } 32 | 33 | // Compare the lower-case form of the given string against the given 34 | // previously-lower-cased ASCII string (typically a constant). 35 | BASE_EXPORT bool LowerCaseEqualsASCII(StringPiece str, 36 | StringPiece lowecase_ascii); 37 | BASE_EXPORT bool LowerCaseEqualsASCII(StringPiece16 str, 38 | StringPiece lowecase_ascii); 39 | 40 | // Returns true if the specified string matches the criteria. How can a wide 41 | // string be 8-bit or UTF8? It contains only characters that are < 256 (in the 42 | // first case) or characters that use only 8-bits and whose 8-bit 43 | // representation looks like a UTF-8 string (the second case). 44 | // 45 | // Note that IsStringUTF8 checks not only if the input is structurally 46 | // valid but also if it doesn't contain any non-character codepoint 47 | // (e.g. U+FFFE). It's done on purpose because all the existing callers want 48 | // to have the maximum 'discriminating' power from other encodings. If 49 | // there's a use case for just checking the structural validity, we have to 50 | // add a new function for that. 51 | // 52 | // IsStringASCII assumes the input is likely all ASCII, and does not leave early 53 | // if it is not the case. 54 | BASE_EXPORT bool IsStringUTF8(const StringPiece& str); 55 | BASE_EXPORT bool IsStringASCII(const StringPiece& str); 56 | BASE_EXPORT bool IsStringASCII(const StringPiece16& str); 57 | // A convenience adaptor for WebStrings, as they don't convert into 58 | // StringPieces directly. 59 | BASE_EXPORT bool IsStringASCII(const string16& str); 60 | #if defined(WCHAR_T_IS_UTF32) 61 | BASE_EXPORT bool IsStringASCII(const std::wstring& str); 62 | #endif 63 | 64 | } // namespace base 65 | 66 | #endif // BASE_STRINGS_STRING_UTIL_H_ 67 | -------------------------------------------------------------------------------- /vendor/gurl/base/strings/utf_string_conversion_utils.cc: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2009 The Chromium Authors. All rights reserved. 2 | // Use of this source code is governed by a BSD-style license that can be 3 | // found in the LICENSE file. 4 | 5 | #include "base/strings/utf_string_conversion_utils.h" 6 | 7 | #include "base/third_party/icu/icu_utf.h" 8 | 9 | namespace base { 10 | 11 | // ReadUnicodeCharacter -------------------------------------------------------- 12 | 13 | bool ReadUnicodeCharacter(const char* src, 14 | int32 src_len, 15 | int32* char_index, 16 | uint32* code_point_out) { 17 | // U8_NEXT expects to be able to use -1 to signal an error, so we must 18 | // use a signed type for code_point. But this function returns false 19 | // on error anyway, so code_point_out is unsigned. 20 | int32 code_point; 21 | CBU8_NEXT(src, *char_index, src_len, code_point); 22 | *code_point_out = static_cast(code_point); 23 | 24 | // The ICU macro above moves to the next char, we want to point to the last 25 | // char consumed. 26 | (*char_index)--; 27 | 28 | // Validate the decoded value. 29 | return IsValidCodepoint(code_point); 30 | } 31 | 32 | bool ReadUnicodeCharacter(const char16* src, 33 | int32 src_len, 34 | int32* char_index, 35 | uint32* code_point) { 36 | if (CBU16_IS_SURROGATE(src[*char_index])) { 37 | if (!CBU16_IS_SURROGATE_LEAD(src[*char_index]) || 38 | *char_index + 1 >= src_len || 39 | !CBU16_IS_TRAIL(src[*char_index + 1])) { 40 | // Invalid surrogate pair. 41 | return false; 42 | } 43 | 44 | // Valid surrogate pair. 45 | *code_point = CBU16_GET_SUPPLEMENTARY(src[*char_index], 46 | src[*char_index + 1]); 47 | (*char_index)++; 48 | } else { 49 | // Not a surrogate, just one 16-bit word. 50 | *code_point = src[*char_index]; 51 | } 52 | 53 | return IsValidCodepoint(*code_point); 54 | } 55 | 56 | #if defined(WCHAR_T_IS_UTF32) 57 | bool ReadUnicodeCharacter(const wchar_t* src, 58 | int32 src_len, 59 | int32* char_index, 60 | uint32* code_point) { 61 | // Conversion is easy since the source is 32-bit. 62 | *code_point = src[*char_index]; 63 | 64 | // Validate the value. 65 | return IsValidCodepoint(*code_point); 66 | } 67 | #endif // defined(WCHAR_T_IS_UTF32) 68 | 69 | // WriteUnicodeCharacter ------------------------------------------------------- 70 | 71 | size_t WriteUnicodeCharacter(uint32 code_point, std::string* output) { 72 | if (code_point <= 0x7f) { 73 | // Fast path the common case of one byte. 74 | output->push_back(static_cast(code_point)); 75 | return 1; 76 | } 77 | 78 | 79 | // CBU8_APPEND_UNSAFE can append up to 4 bytes. 80 | size_t char_offset = output->length(); 81 | size_t original_char_offset = char_offset; 82 | output->resize(char_offset + CBU8_MAX_LENGTH); 83 | 84 | CBU8_APPEND_UNSAFE(&(*output)[0], char_offset, code_point); 85 | 86 | // CBU8_APPEND_UNSAFE will advance our pointer past the inserted character, so 87 | // it will represent the new length of the string. 88 | output->resize(char_offset); 89 | return char_offset - original_char_offset; 90 | } 91 | 92 | size_t WriteUnicodeCharacter(uint32 code_point, string16* output) { 93 | if (CBU16_LENGTH(code_point) == 1) { 94 | // Thie code point is in the Basic Multilingual Plane (BMP). 95 | output->push_back(static_cast(code_point)); 96 | return 1; 97 | } 98 | // Non-BMP characters use a double-character encoding. 99 | size_t char_offset = output->length(); 100 | output->resize(char_offset + CBU16_MAX_LENGTH); 101 | CBU16_APPEND_UNSAFE(&(*output)[0], char_offset, code_point); 102 | return CBU16_MAX_LENGTH; 103 | } 104 | 105 | // Generalized Unicode converter ----------------------------------------------- 106 | 107 | template 108 | void PrepareForUTF8Output(const CHAR* src, 109 | size_t src_len, 110 | std::string* output) { 111 | output->clear(); 112 | if (src_len == 0) 113 | return; 114 | if (src[0] < 0x80) { 115 | // Assume that the entire input will be ASCII. 116 | output->reserve(src_len); 117 | } else { 118 | // Assume that the entire input is non-ASCII and will have 3 bytes per char. 119 | output->reserve(src_len * 3); 120 | } 121 | } 122 | 123 | // Instantiate versions we know callers will need. 124 | template void PrepareForUTF8Output(const wchar_t*, size_t, std::string*); 125 | template void PrepareForUTF8Output(const char16*, size_t, std::string*); 126 | 127 | template 128 | void PrepareForUTF16Or32Output(const char* src, 129 | size_t src_len, 130 | STRING* output) { 131 | output->clear(); 132 | if (src_len == 0) 133 | return; 134 | if (static_cast(src[0]) < 0x80) { 135 | // Assume the input is all ASCII, which means 1:1 correspondence. 136 | output->reserve(src_len); 137 | } else { 138 | // Otherwise assume that the UTF-8 sequences will have 2 bytes for each 139 | // character. 140 | output->reserve(src_len / 2); 141 | } 142 | } 143 | 144 | // Instantiate versions we know callers will need. 145 | template void PrepareForUTF16Or32Output(const char*, size_t, std::wstring*); 146 | template void PrepareForUTF16Or32Output(const char*, size_t, string16*); 147 | 148 | } // namespace base 149 | -------------------------------------------------------------------------------- /vendor/gurl/base/strings/utf_string_conversion_utils.h: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2011 The Chromium Authors. All rights reserved. 2 | // Use of this source code is governed by a BSD-style license that can be 3 | // found in the LICENSE file. 4 | 5 | #ifndef BASE_STRINGS_UTF_STRING_CONVERSION_UTILS_H_ 6 | #define BASE_STRINGS_UTF_STRING_CONVERSION_UTILS_H_ 7 | 8 | // This should only be used by the various UTF string conversion files. 9 | 10 | #include "base/base_export.h" 11 | #include "base/strings/string16.h" 12 | 13 | namespace base { 14 | 15 | inline bool IsValidCodepoint(uint32 code_point) { 16 | // Excludes the surrogate code points ([0xD800, 0xDFFF]) and 17 | // codepoints larger than 0x10FFFF (the highest codepoint allowed). 18 | // Non-characters and unassigned codepoints are allowed. 19 | return code_point < 0xD800u || 20 | (code_point >= 0xE000u && code_point <= 0x10FFFFu); 21 | } 22 | 23 | inline bool IsValidCharacter(uint32 code_point) { 24 | // Excludes non-characters (U+FDD0..U+FDEF, and all codepoints ending in 25 | // 0xFFFE or 0xFFFF) from the set of valid code points. 26 | return code_point < 0xD800u || (code_point >= 0xE000u && 27 | code_point < 0xFDD0u) || (code_point > 0xFDEFu && 28 | code_point <= 0x10FFFFu && (code_point & 0xFFFEu) != 0xFFFEu); 29 | } 30 | 31 | // ReadUnicodeCharacter -------------------------------------------------------- 32 | 33 | // Reads a UTF-8 stream, placing the next code point into the given output 34 | // |*code_point|. |src| represents the entire string to read, and |*char_index| 35 | // is the character offset within the string to start reading at. |*char_index| 36 | // will be updated to index the last character read, such that incrementing it 37 | // (as in a for loop) will take the reader to the next character. 38 | // 39 | // Returns true on success. On false, |*code_point| will be invalid. 40 | BASE_EXPORT bool ReadUnicodeCharacter(const char* src, 41 | int32 src_len, 42 | int32* char_index, 43 | uint32* code_point_out); 44 | 45 | // Reads a UTF-16 character. The usage is the same as the 8-bit version above. 46 | BASE_EXPORT bool ReadUnicodeCharacter(const char16* src, 47 | int32 src_len, 48 | int32* char_index, 49 | uint32* code_point); 50 | 51 | #if defined(WCHAR_T_IS_UTF32) 52 | // Reads UTF-32 character. The usage is the same as the 8-bit version above. 53 | BASE_EXPORT bool ReadUnicodeCharacter(const wchar_t* src, 54 | int32 src_len, 55 | int32* char_index, 56 | uint32* code_point); 57 | #endif // defined(WCHAR_T_IS_UTF32) 58 | 59 | // WriteUnicodeCharacter ------------------------------------------------------- 60 | 61 | // Appends a UTF-8 character to the given 8-bit string. Returns the number of 62 | // bytes written. 63 | BASE_EXPORT size_t WriteUnicodeCharacter(uint32 code_point, 64 | std::string* output); 65 | 66 | // Appends the given code point as a UTF-16 character to the given 16-bit 67 | // string. Returns the number of 16-bit values written. 68 | BASE_EXPORT size_t WriteUnicodeCharacter(uint32 code_point, string16* output); 69 | 70 | #if defined(WCHAR_T_IS_UTF32) 71 | // Appends the given UTF-32 character to the given 32-bit string. Returns the 72 | // number of 32-bit values written. 73 | inline size_t WriteUnicodeCharacter(uint32 code_point, std::wstring* output) { 74 | // This is the easy case, just append the character. 75 | output->push_back(code_point); 76 | return 1; 77 | } 78 | #endif // defined(WCHAR_T_IS_UTF32) 79 | 80 | // Generalized Unicode converter ----------------------------------------------- 81 | 82 | // Guesses the length of the output in UTF-8 in bytes, clears that output 83 | // string, and reserves that amount of space. We assume that the input 84 | // character types are unsigned, which will be true for UTF-16 and -32 on our 85 | // systems. 86 | template 87 | void PrepareForUTF8Output(const CHAR* src, size_t src_len, std::string* output); 88 | 89 | // Prepares an output buffer (containing either UTF-16 or -32 data) given some 90 | // UTF-8 input that will be converted to it. See PrepareForUTF8Output(). 91 | template 92 | void PrepareForUTF16Or32Output(const char* src, size_t src_len, STRING* output); 93 | 94 | } // namespace base 95 | 96 | #endif // BASE_STRINGS_UTF_STRING_CONVERSION_UTILS_H_ 97 | -------------------------------------------------------------------------------- /vendor/gurl/base/strings/utf_string_conversions.cc: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2010 The Chromium Authors. All rights reserved. 2 | // Use of this source code is governed by a BSD-style license that can be 3 | // found in the LICENSE file. 4 | 5 | #include "base/strings/utf_string_conversions.h" 6 | 7 | #include "base/strings/string_piece.h" 8 | #include "base/strings/string_util.h" 9 | #include "base/strings/utf_string_conversion_utils.h" 10 | 11 | namespace base { 12 | 13 | namespace { 14 | 15 | // Generalized Unicode converter ----------------------------------------------- 16 | 17 | // Converts the given source Unicode character type to the given destination 18 | // Unicode character type as a STL string. The given input buffer and size 19 | // determine the source, and the given output STL string will be replaced by 20 | // the result. 21 | template 22 | bool ConvertUnicode(const SRC_CHAR* src, 23 | size_t src_len, 24 | DEST_STRING* output) { 25 | // ICU requires 32-bit numbers. 26 | bool success = true; 27 | int32 src_len32 = static_cast(src_len); 28 | for (int32 i = 0; i < src_len32; i++) { 29 | uint32 code_point; 30 | if (ReadUnicodeCharacter(src, src_len32, &i, &code_point)) { 31 | WriteUnicodeCharacter(code_point, output); 32 | } else { 33 | WriteUnicodeCharacter(0xFFFD, output); 34 | success = false; 35 | } 36 | } 37 | 38 | return success; 39 | } 40 | 41 | } // namespace 42 | 43 | // UTF-8 <-> Wide -------------------------------------------------------------- 44 | 45 | bool WideToUTF8(const wchar_t* src, size_t src_len, std::string* output) { 46 | if (IsStringASCII(std::wstring(src, src_len))) { 47 | output->assign(src, src + src_len); 48 | return true; 49 | } else { 50 | PrepareForUTF8Output(src, src_len, output); 51 | return ConvertUnicode(src, src_len, output); 52 | } 53 | } 54 | 55 | std::string WideToUTF8(const std::wstring& wide) { 56 | if (IsStringASCII(wide)) { 57 | return std::string(wide.data(), wide.data() + wide.length()); 58 | } 59 | 60 | std::string ret; 61 | PrepareForUTF8Output(wide.data(), wide.length(), &ret); 62 | ConvertUnicode(wide.data(), wide.length(), &ret); 63 | return ret; 64 | } 65 | 66 | bool UTF8ToWide(const char* src, size_t src_len, std::wstring* output) { 67 | if (IsStringASCII(StringPiece(src, src_len))) { 68 | output->assign(src, src + src_len); 69 | return true; 70 | } else { 71 | PrepareForUTF16Or32Output(src, src_len, output); 72 | return ConvertUnicode(src, src_len, output); 73 | } 74 | } 75 | 76 | std::wstring UTF8ToWide(StringPiece utf8) { 77 | if (IsStringASCII(utf8)) { 78 | return std::wstring(utf8.begin(), utf8.end()); 79 | } 80 | 81 | std::wstring ret; 82 | PrepareForUTF16Or32Output(utf8.data(), utf8.length(), &ret); 83 | ConvertUnicode(utf8.data(), utf8.length(), &ret); 84 | return ret; 85 | } 86 | 87 | // UTF-16 <-> Wide ------------------------------------------------------------- 88 | 89 | #if defined(WCHAR_T_IS_UTF16) 90 | 91 | // When wide == UTF-16, then conversions are a NOP. 92 | bool WideToUTF16(const wchar_t* src, size_t src_len, string16* output) { 93 | output->assign(src, src_len); 94 | return true; 95 | } 96 | 97 | string16 WideToUTF16(const std::wstring& wide) { 98 | return wide; 99 | } 100 | 101 | bool UTF16ToWide(const char16* src, size_t src_len, std::wstring* output) { 102 | output->assign(src, src_len); 103 | return true; 104 | } 105 | 106 | std::wstring UTF16ToWide(const string16& utf16) { 107 | return utf16; 108 | } 109 | 110 | #elif defined(WCHAR_T_IS_UTF32) 111 | 112 | bool WideToUTF16(const wchar_t* src, size_t src_len, string16* output) { 113 | output->clear(); 114 | // Assume that normally we won't have any non-BMP characters so the counts 115 | // will be the same. 116 | output->reserve(src_len); 117 | return ConvertUnicode(src, src_len, output); 118 | } 119 | 120 | string16 WideToUTF16(const std::wstring& wide) { 121 | string16 ret; 122 | WideToUTF16(wide.data(), wide.length(), &ret); 123 | return ret; 124 | } 125 | 126 | bool UTF16ToWide(const char16* src, size_t src_len, std::wstring* output) { 127 | output->clear(); 128 | // Assume that normally we won't have any non-BMP characters so the counts 129 | // will be the same. 130 | output->reserve(src_len); 131 | return ConvertUnicode(src, src_len, output); 132 | } 133 | 134 | std::wstring UTF16ToWide(const string16& utf16) { 135 | std::wstring ret; 136 | UTF16ToWide(utf16.data(), utf16.length(), &ret); 137 | return ret; 138 | } 139 | 140 | #endif // defined(WCHAR_T_IS_UTF32) 141 | 142 | // UTF16 <-> UTF8 -------------------------------------------------------------- 143 | 144 | #if defined(WCHAR_T_IS_UTF32) 145 | 146 | bool UTF8ToUTF16(const char* src, size_t src_len, string16* output) { 147 | if (IsStringASCII(StringPiece(src, src_len))) { 148 | output->assign(src, src + src_len); 149 | return true; 150 | } else { 151 | PrepareForUTF16Or32Output(src, src_len, output); 152 | return ConvertUnicode(src, src_len, output); 153 | } 154 | } 155 | 156 | string16 UTF8ToUTF16(StringPiece utf8) { 157 | if (IsStringASCII(utf8)) { 158 | return string16(utf8.begin(), utf8.end()); 159 | } 160 | 161 | string16 ret; 162 | PrepareForUTF16Or32Output(utf8.data(), utf8.length(), &ret); 163 | // Ignore the success flag of this call, it will do the best it can for 164 | // invalid input, which is what we want here. 165 | ConvertUnicode(utf8.data(), utf8.length(), &ret); 166 | return ret; 167 | } 168 | 169 | bool UTF16ToUTF8(const char16* src, size_t src_len, std::string* output) { 170 | if (IsStringASCII(StringPiece16(src, src_len))) { 171 | output->assign(src, src + src_len); 172 | return true; 173 | } else { 174 | PrepareForUTF8Output(src, src_len, output); 175 | return ConvertUnicode(src, src_len, output); 176 | } 177 | } 178 | 179 | std::string UTF16ToUTF8(StringPiece16 utf16) { 180 | if (IsStringASCII(utf16)) { 181 | return std::string(utf16.begin(), utf16.end()); 182 | } 183 | 184 | std::string ret; 185 | // Ignore the success flag of this call, it will do the best it can for 186 | // invalid input, which is what we want here. 187 | UTF16ToUTF8(utf16.data(), utf16.length(), &ret); 188 | return ret; 189 | } 190 | 191 | #elif defined(WCHAR_T_IS_UTF16) 192 | // Easy case since we can use the "wide" versions we already wrote above. 193 | 194 | bool UTF8ToUTF16(const char* src, size_t src_len, string16* output) { 195 | return UTF8ToWide(src, src_len, output); 196 | } 197 | 198 | string16 UTF8ToUTF16(StringPiece utf8) { 199 | return UTF8ToWide(utf8); 200 | } 201 | 202 | bool UTF16ToUTF8(const char16* src, size_t src_len, std::string* output) { 203 | return WideToUTF8(src, src_len, output); 204 | } 205 | 206 | std::string UTF16ToUTF8(StringPiece16 utf16) { 207 | if (IsStringASCII(utf16)) 208 | return std::string(utf16.data(), utf16.data() + utf16.length()); 209 | 210 | std::string ret; 211 | PrepareForUTF8Output(utf16.data(), utf16.length(), &ret); 212 | ConvertUnicode(utf16.data(), utf16.length(), &ret); 213 | return ret; 214 | } 215 | 216 | #endif 217 | 218 | string16 ASCIIToUTF16(StringPiece ascii) { 219 | //DCHECK(IsStringASCII(ascii)) << ascii; 220 | return string16(ascii.begin(), ascii.end()); 221 | } 222 | 223 | std::string UTF16ToASCII(StringPiece16 utf16) { 224 | //DCHECK(IsStringASCII(utf16)) << UTF16ToUTF8(utf16); 225 | return std::string(utf16.begin(), utf16.end()); 226 | } 227 | 228 | } // namespace base 229 | -------------------------------------------------------------------------------- /vendor/gurl/base/strings/utf_string_conversions.h: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2011 The Chromium Authors. All rights reserved. 2 | // Use of this source code is governed by a BSD-style license that can be 3 | // found in the LICENSE file. 4 | 5 | #ifndef BASE_STRINGS_UTF_STRING_CONVERSIONS_H_ 6 | #define BASE_STRINGS_UTF_STRING_CONVERSIONS_H_ 7 | 8 | #include 9 | 10 | #include "base/base_export.h" 11 | #include "base/strings/string16.h" 12 | #include "base/strings/string_piece.h" 13 | 14 | namespace base { 15 | 16 | // These convert between UTF-8, -16, and -32 strings. They are potentially slow, 17 | // so avoid unnecessary conversions. The low-level versions return a boolean 18 | // indicating whether the conversion was 100% valid. In this case, it will still 19 | // do the best it can and put the result in the output buffer. The versions that 20 | // return strings ignore this error and just return the best conversion 21 | // possible. 22 | BASE_EXPORT bool WideToUTF8(const wchar_t* src, size_t src_len, 23 | std::string* output); 24 | BASE_EXPORT std::string WideToUTF8(const std::wstring& wide); 25 | BASE_EXPORT bool UTF8ToWide(const char* src, size_t src_len, 26 | std::wstring* output); 27 | BASE_EXPORT std::wstring UTF8ToWide(StringPiece utf8); 28 | 29 | BASE_EXPORT bool WideToUTF16(const wchar_t* src, size_t src_len, 30 | string16* output); 31 | BASE_EXPORT string16 WideToUTF16(const std::wstring& wide); 32 | BASE_EXPORT bool UTF16ToWide(const char16* src, size_t src_len, 33 | std::wstring* output); 34 | BASE_EXPORT std::wstring UTF16ToWide(const string16& utf16); 35 | 36 | BASE_EXPORT bool UTF8ToUTF16(const char* src, size_t src_len, string16* output); 37 | BASE_EXPORT string16 UTF8ToUTF16(StringPiece utf8); 38 | BASE_EXPORT bool UTF16ToUTF8(const char16* src, size_t src_len, 39 | std::string* output); 40 | BASE_EXPORT std::string UTF16ToUTF8(StringPiece16 utf16); 41 | 42 | // This converts an ASCII string, typically a hardcoded constant, to a UTF16 43 | // string. 44 | BASE_EXPORT string16 ASCIIToUTF16(StringPiece ascii); 45 | 46 | // Converts to 7-bit ASCII by truncating. The result must be known to be ASCII 47 | // beforehand. 48 | BASE_EXPORT std::string UTF16ToASCII(StringPiece16 utf16); 49 | 50 | } // namespace base 51 | 52 | #endif // BASE_STRINGS_UTF_STRING_CONVERSIONS_H_ 53 | -------------------------------------------------------------------------------- /vendor/gurl/base/template_util.h: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2011 The Chromium Authors. All rights reserved. 2 | // Use of this source code is governed by a BSD-style license that can be 3 | // found in the LICENSE file. 4 | 5 | #ifndef BASE_TEMPLATE_UTIL_H_ 6 | #define BASE_TEMPLATE_UTIL_H_ 7 | 8 | #include // For size_t. 9 | 10 | #include "build/build_config.h" 11 | 12 | namespace base { 13 | 14 | // template definitions from tr1 15 | 16 | template 17 | struct integral_constant { 18 | static const T value = v; 19 | typedef T value_type; 20 | typedef integral_constant type; 21 | }; 22 | 23 | template const T integral_constant::value; 24 | 25 | typedef integral_constant true_type; 26 | typedef integral_constant false_type; 27 | 28 | template struct is_pointer : false_type {}; 29 | template struct is_pointer : true_type {}; 30 | 31 | // Member function pointer detection. This is built-in to C++ 11's stdlib, and 32 | // we can remove this when we switch to it. 33 | template 34 | struct is_member_function_pointer : false_type {}; 35 | 36 | template 37 | struct is_member_function_pointer : true_type {}; 38 | template 39 | struct is_member_function_pointer : true_type {}; 40 | 41 | 42 | template struct is_same : public false_type {}; 43 | template struct is_same : true_type {}; 44 | 45 | template struct is_array : public false_type {}; 46 | template struct is_array : public true_type {}; 47 | template struct is_array : public true_type {}; 48 | 49 | template struct is_non_const_reference : false_type {}; 50 | template struct is_non_const_reference : true_type {}; 51 | template struct is_non_const_reference : false_type {}; 52 | 53 | template struct is_const : false_type {}; 54 | template struct is_const : true_type {}; 55 | 56 | template struct is_void : false_type {}; 57 | template <> struct is_void : true_type {}; 58 | 59 | namespace internal { 60 | 61 | // Types YesType and NoType are guaranteed such that sizeof(YesType) < 62 | // sizeof(NoType). 63 | typedef char YesType; 64 | 65 | struct NoType { 66 | YesType dummy[2]; 67 | }; 68 | 69 | // This class is an implementation detail for is_convertible, and you 70 | // don't need to know how it works to use is_convertible. For those 71 | // who care: we declare two different functions, one whose argument is 72 | // of type To and one with a variadic argument list. We give them 73 | // return types of different size, so we can use sizeof to trick the 74 | // compiler into telling us which function it would have chosen if we 75 | // had called it with an argument of type From. See Alexandrescu's 76 | // _Modern C++ Design_ for more details on this sort of trick. 77 | 78 | struct ConvertHelper { 79 | template 80 | static YesType Test(To); 81 | 82 | template 83 | static NoType Test(...); 84 | 85 | template 86 | static From& Create(); 87 | }; 88 | 89 | // Used to determine if a type is a struct/union/class. Inspired by Boost's 90 | // is_class type_trait implementation. 91 | struct IsClassHelper { 92 | template 93 | static YesType Test(void(C::*)(void)); 94 | 95 | template 96 | static NoType Test(...); 97 | }; 98 | 99 | } // namespace internal 100 | 101 | // Inherits from true_type if From is convertible to To, false_type otherwise. 102 | // 103 | // Note that if the type is convertible, this will be a true_type REGARDLESS 104 | // of whether or not the conversion would emit a warning. 105 | template 106 | struct is_convertible 107 | : integral_constant( 109 | internal::ConvertHelper::Create())) == 110 | sizeof(internal::YesType)> { 111 | }; 112 | 113 | template 114 | struct is_class 115 | : integral_constant(0)) == 117 | sizeof(internal::YesType)> { 118 | }; 119 | 120 | template 121 | struct enable_if {}; 122 | 123 | template 124 | struct enable_if { typedef T type; }; 125 | 126 | } // namespace base 127 | 128 | #endif // BASE_TEMPLATE_UTIL_H_ 129 | -------------------------------------------------------------------------------- /vendor/gurl/base/third_party/icu/LICENSE: -------------------------------------------------------------------------------- 1 | ICU License - ICU 1.8.1 and later 2 | 3 | COPYRIGHT AND PERMISSION NOTICE 4 | 5 | Copyright (c) 1995-2009 International Business Machines Corporation and others 6 | 7 | All rights reserved. 8 | 9 | Permission is hereby granted, free of charge, to any person obtaining 10 | a copy of this software and associated documentation files (the 11 | "Software"), to deal in the Software without restriction, including 12 | without limitation the rights to use, copy, modify, merge, publish, 13 | distribute, and/or sell copies of the Software, and to permit persons 14 | to whom the Software is furnished to do so, provided that the above 15 | copyright notice(s) and this permission notice appear in all copies of 16 | the Software and that both the above copyright notice(s) and this 17 | permission notice appear in supporting documentation. 18 | 19 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 20 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 21 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT 22 | OF THIRD PARTY RIGHTS. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR 23 | HOLDERS INCLUDED IN THIS NOTICE BE LIABLE FOR ANY CLAIM, OR ANY 24 | SPECIAL INDIRECT OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER 25 | RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF 26 | CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN 27 | CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 28 | 29 | Except as contained in this notice, the name of a copyright holder 30 | shall not be used in advertising or otherwise to promote the sale, use 31 | or other dealings in this Software without prior written authorization 32 | of the copyright holder. 33 | -------------------------------------------------------------------------------- /vendor/gurl/base/third_party/icu/README.chromium: -------------------------------------------------------------------------------- 1 | Name: ICU 2 | URL: http://site.icu-project.org/ 3 | License: MIT 4 | License File: NOT_SHIPPED 5 | 6 | This file has the relevant components from ICU copied to handle basic 7 | UTF8/16/32 conversions. Components are copied from utf.h utf8.h utf16.h and 8 | utf_impl.c 9 | 10 | The same module appears in third_party/icu, so we don't repeat the license 11 | file here. 12 | 13 | The main change is that U_/U8_/U16_ prefixes have been replaced with 14 | CBU_/CBU8_/CBU16_ (for "Chrome Base") to avoid confusion with the "real" ICU 15 | macros should ICU be in use on the system. For the same reason, the functions 16 | and types have been put in the "base_icu" namespace. 17 | -------------------------------------------------------------------------------- /vendor/gurl/base/third_party/icu/icu_utf.cc: -------------------------------------------------------------------------------- 1 | /* 2 | ****************************************************************************** 3 | * 4 | * Copyright (C) 1999-2006, International Business Machines 5 | * Corporation and others. All Rights Reserved. 6 | * 7 | ****************************************************************************** 8 | * file name: utf_impl.c 9 | * encoding: US-ASCII 10 | * tab size: 8 (not used) 11 | * indentation:4 12 | * 13 | * created on: 1999sep13 14 | * created by: Markus W. Scherer 15 | * 16 | * This file provides implementation functions for macros in the utfXX.h 17 | * that would otherwise be too long as macros. 18 | */ 19 | 20 | #include "base/third_party/icu/icu_utf.h" 21 | 22 | namespace base_icu { 23 | 24 | /** 25 | * UTF8_ERROR_VALUE_1 and UTF8_ERROR_VALUE_2 are special error values for UTF-8, 26 | * which need 1 or 2 bytes in UTF-8: 27 | * \code 28 | * U+0015 = NAK = Negative Acknowledge, C0 control character 29 | * U+009f = highest C1 control character 30 | * \endcode 31 | * 32 | * These are used by UTF8_..._SAFE macros so that they can return an error value 33 | * that needs the same number of code units (bytes) as were seen by 34 | * a macro. They should be tested with UTF_IS_ERROR() or UTF_IS_VALID(). 35 | * 36 | * @deprecated ICU 2.4. Obsolete, see utf_old.h. 37 | */ 38 | #define CBUTF8_ERROR_VALUE_1 0x15 39 | 40 | /** 41 | * See documentation on UTF8_ERROR_VALUE_1 for details. 42 | * 43 | * @deprecated ICU 2.4. Obsolete, see utf_old.h. 44 | */ 45 | #define CBUTF8_ERROR_VALUE_2 0x9f 46 | 47 | 48 | /** 49 | * Error value for all UTFs. This code point value will be set by macros with e> 50 | * checking if an error is detected. 51 | * 52 | * @deprecated ICU 2.4. Obsolete, see utf_old.h. 53 | */ 54 | #define CBUTF_ERROR_VALUE 0xffff 55 | 56 | /* 57 | * This table could be replaced on many machines by 58 | * a few lines of assembler code using an 59 | * "index of first 0-bit from msb" instruction and 60 | * one or two more integer instructions. 61 | * 62 | * For example, on an i386, do something like 63 | * - MOV AL, leadByte 64 | * - NOT AL (8-bit, leave b15..b8==0..0, reverse only b7..b0) 65 | * - MOV AH, 0 66 | * - BSR BX, AX (16-bit) 67 | * - MOV AX, 6 (result) 68 | * - JZ finish (ZF==1 if leadByte==0xff) 69 | * - SUB AX, BX (result) 70 | * -finish: 71 | * (BSR: Bit Scan Reverse, scans for a 1-bit, starting from the MSB) 72 | * 73 | * In Unicode, all UTF-8 byte sequences with more than 4 bytes are illegal; 74 | * lead bytes above 0xf4 are illegal. 75 | * We keep them in this table for skipping long ISO 10646-UTF-8 sequences. 76 | */ 77 | const uint8 78 | utf8_countTrailBytes[256]={ 79 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 80 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 81 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 82 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 83 | 84 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 85 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 86 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 87 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 88 | 89 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 90 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 91 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 92 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 93 | 94 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 95 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 96 | 97 | 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 98 | 3, 3, 3, 3, 3, 99 | 3, 3, 3, /* illegal in Unicode */ 100 | 4, 4, 4, 4, /* illegal in Unicode */ 101 | 5, 5, /* illegal in Unicode */ 102 | 0, 0 /* illegal bytes 0xfe and 0xff */ 103 | }; 104 | 105 | static const UChar32 106 | utf8_minLegal[4]={ 0, 0x80, 0x800, 0x10000 }; 107 | 108 | static const UChar32 109 | utf8_errorValue[6]={ 110 | CBUTF8_ERROR_VALUE_1, CBUTF8_ERROR_VALUE_2, CBUTF_ERROR_VALUE, 0x10ffff, 111 | 0x3ffffff, 0x7fffffff 112 | }; 113 | 114 | /* 115 | * Handle the non-inline part of the U8_NEXT() macro and its obsolete sibling 116 | * UTF8_NEXT_CHAR_SAFE(). 117 | * 118 | * The "strict" parameter controls the error behavior: 119 | * <0 "Safe" behavior of U8_NEXT(): All illegal byte sequences yield a negative 120 | * code point result. 121 | * 0 Obsolete "safe" behavior of UTF8_NEXT_CHAR_SAFE(..., FALSE): 122 | * All illegal byte sequences yield a positive code point such that this 123 | * result code point would be encoded with the same number of bytes as 124 | * the illegal sequence. 125 | * >0 Obsolete "strict" behavior of UTF8_NEXT_CHAR_SAFE(..., TRUE): 126 | * Same as the obsolete "safe" behavior, but non-characters are also treated 127 | * like illegal sequences. 128 | * 129 | * The special negative (<0) value -2 is used for lenient treatment of surrogate 130 | * code points as legal. Some implementations use this for roundtripping of 131 | * Unicode 16-bit strings that are not well-formed UTF-16, that is, they 132 | * contain unpaired surrogates. 133 | * 134 | * Note that a UBool is the same as an int8_t. 135 | */ 136 | UChar32 137 | utf8_nextCharSafeBody(const uint8 *s, int32 *pi, int32 length, UChar32 c, UBool strict) { 138 | int32 i=*pi; 139 | uint8 count=CBU8_COUNT_TRAIL_BYTES(c); 140 | if((i)+count<=(length)) { 141 | uint8 trail, illegal=0; 142 | 143 | CBU8_MASK_LEAD_BYTE((c), count); 144 | /* count==0 for illegally leading trail bytes and the illegal bytes 0xfe and 0xff */ 145 | switch(count) { 146 | /* each branch falls through to the next one */ 147 | case 5: 148 | case 4: 149 | /* count>=4 is always illegal: no more than 3 trail bytes in Unicode's UTF-8 */ 150 | illegal=1; 151 | break; 152 | case 3: 153 | trail=s[(i)++]; 154 | (c)=((c)<<6)|(trail&0x3f); 155 | if(c<0x110) { 156 | illegal|=(trail&0xc0)^0x80; 157 | } else { 158 | /* code point>0x10ffff, outside Unicode */ 159 | illegal=1; 160 | break; 161 | } 162 | case 2: 163 | trail=s[(i)++]; 164 | (c)=((c)<<6)|(trail&0x3f); 165 | illegal|=(trail&0xc0)^0x80; 166 | case 1: 167 | trail=s[(i)++]; 168 | (c)=((c)<<6)|(trail&0x3f); 169 | illegal|=(trail&0xc0)^0x80; 170 | break; 171 | case 0: 172 | if(strict>=0) { 173 | return CBUTF8_ERROR_VALUE_1; 174 | } else { 175 | return CBU_SENTINEL; 176 | } 177 | /* no default branch to optimize switch() - all values are covered */ 178 | } 179 | 180 | /* 181 | * All the error handling should return a value 182 | * that needs count bytes so that UTF8_GET_CHAR_SAFE() works right. 183 | * 184 | * Starting with Unicode 3.0.1, non-shortest forms are illegal. 185 | * Starting with Unicode 3.2, surrogate code points must not be 186 | * encoded in UTF-8, and there are no irregular sequences any more. 187 | * 188 | * U8_ macros (new in ICU 2.4) return negative values for error conditions. 189 | */ 190 | 191 | /* correct sequence - all trail bytes have (b7..b6)==(10)? */ 192 | /* illegal is also set if count>=4 */ 193 | if(illegal || (c)0 && CBU8_IS_TRAIL(s[i])) { 199 | ++(i); 200 | --count; 201 | } 202 | if(strict>=0) { 203 | c=utf8_errorValue[errorCount-count]; 204 | } else { 205 | c=CBU_SENTINEL; 206 | } 207 | } else if((strict)>0 && CBU_IS_UNICODE_NONCHAR(c)) { 208 | /* strict: forbid non-characters like U+fffe */ 209 | c=utf8_errorValue[count]; 210 | } 211 | } else /* too few bytes left */ { 212 | /* error handling */ 213 | int32 i0=i; 214 | /* don't just set (i)=(length) in case there is an illegal sequence */ 215 | while((i)<(length) && CBU8_IS_TRAIL(s[i])) { 216 | ++(i); 217 | } 218 | if(strict>=0) { 219 | c=utf8_errorValue[i-i0]; 220 | } else { 221 | c=CBU_SENTINEL; 222 | } 223 | } 224 | *pi=i; 225 | return c; 226 | } 227 | 228 | } // namespace base_icu 229 | -------------------------------------------------------------------------------- /vendor/gurl/base/third_party/icu/icu_utf.h: -------------------------------------------------------------------------------- 1 | /* 2 | ******************************************************************************* 3 | * 4 | * Copyright (C) 1999-2004, International Business Machines 5 | * Corporation and others. All Rights Reserved. 6 | * 7 | ******************************************************************************* 8 | * file name: utf.h 9 | * encoding: US-ASCII 10 | * tab size: 8 (not used) 11 | * indentation:4 12 | * 13 | * created on: 1999sep09 14 | * created by: Markus W. Scherer 15 | */ 16 | 17 | #ifndef BASE_THIRD_PARTY_ICU_ICU_UTF_H_ 18 | #define BASE_THIRD_PARTY_ICU_ICU_UTF_H_ 19 | 20 | #include "base/basictypes.h" 21 | 22 | namespace base_icu { 23 | 24 | typedef int32 UChar32; 25 | typedef uint16 UChar; 26 | typedef int8 UBool; 27 | 28 | // General --------------------------------------------------------------------- 29 | // from utf.h 30 | 31 | /** 32 | * This value is intended for sentinel values for APIs that 33 | * (take or) return single code points (UChar32). 34 | * It is outside of the Unicode code point range 0..0x10ffff. 35 | * 36 | * For example, a "done" or "error" value in a new API 37 | * could be indicated with CBU_SENTINEL. 38 | * 39 | * ICU APIs designed before ICU 2.4 usually define service-specific "done" 40 | * values, mostly 0xffff. 41 | * Those may need to be distinguished from 42 | * actual U+ffff text contents by calling functions like 43 | * CharacterIterator::hasNext() or UnicodeString::length(). 44 | * 45 | * @return -1 46 | * @see UChar32 47 | * @stable ICU 2.4 48 | */ 49 | #define CBU_SENTINEL (-1) 50 | 51 | /** 52 | * Is this code point a Unicode noncharacter? 53 | * @param c 32-bit code point 54 | * @return TRUE or FALSE 55 | * @stable ICU 2.4 56 | */ 57 | #define CBU_IS_UNICODE_NONCHAR(c) \ 58 | ((c)>=0xfdd0 && \ 59 | ((uint32)(c)<=0xfdef || ((c)&0xfffe)==0xfffe) && \ 60 | (uint32)(c)<=0x10ffff) 61 | 62 | /** 63 | * Is c a Unicode code point value (0..U+10ffff) 64 | * that can be assigned a character? 65 | * 66 | * Code points that are not characters include: 67 | * - single surrogate code points (U+d800..U+dfff, 2048 code points) 68 | * - the last two code points on each plane (U+__fffe and U+__ffff, 34 code points) 69 | * - U+fdd0..U+fdef (new with Unicode 3.1, 32 code points) 70 | * - the highest Unicode code point value is U+10ffff 71 | * 72 | * This means that all code points below U+d800 are character code points, 73 | * and that boundary is tested first for performance. 74 | * 75 | * @param c 32-bit code point 76 | * @return TRUE or FALSE 77 | * @stable ICU 2.4 78 | */ 79 | #define CBU_IS_UNICODE_CHAR(c) \ 80 | ((uint32)(c)<0xd800 || \ 81 | ((uint32)(c)>0xdfff && \ 82 | (uint32)(c)<=0x10ffff && \ 83 | !CBU_IS_UNICODE_NONCHAR(c))) 84 | 85 | /** 86 | * Is this code point a surrogate (U+d800..U+dfff)? 87 | * @param c 32-bit code point 88 | * @return TRUE or FALSE 89 | * @stable ICU 2.4 90 | */ 91 | #define CBU_IS_SURROGATE(c) (((c)&0xfffff800)==0xd800) 92 | 93 | /** 94 | * Assuming c is a surrogate code point (U_IS_SURROGATE(c)), 95 | * is it a lead surrogate? 96 | * @param c 32-bit code point 97 | * @return TRUE or FALSE 98 | * @stable ICU 2.4 99 | */ 100 | #define CBU_IS_SURROGATE_LEAD(c) (((c)&0x400)==0) 101 | 102 | 103 | // UTF-8 macros ---------------------------------------------------------------- 104 | // from utf8.h 105 | 106 | extern const uint8 utf8_countTrailBytes[256]; 107 | 108 | /** 109 | * Count the trail bytes for a UTF-8 lead byte. 110 | * @internal 111 | */ 112 | #define CBU8_COUNT_TRAIL_BYTES(leadByte) (base_icu::utf8_countTrailBytes[(uint8)leadByte]) 113 | 114 | /** 115 | * Mask a UTF-8 lead byte, leave only the lower bits that form part of the code point value. 116 | * @internal 117 | */ 118 | #define CBU8_MASK_LEAD_BYTE(leadByte, countTrailBytes) ((leadByte)&=(1<<(6-(countTrailBytes)))-1) 119 | 120 | /** 121 | * Does this code unit (byte) encode a code point by itself (US-ASCII 0..0x7f)? 122 | * @param c 8-bit code unit (byte) 123 | * @return TRUE or FALSE 124 | * @stable ICU 2.4 125 | */ 126 | #define CBU8_IS_SINGLE(c) (((c)&0x80)==0) 127 | 128 | /** 129 | * Is this code unit (byte) a UTF-8 lead byte? 130 | * @param c 8-bit code unit (byte) 131 | * @return TRUE or FALSE 132 | * @stable ICU 2.4 133 | */ 134 | #define CBU8_IS_LEAD(c) ((uint8)((c)-0xc0)<0x3e) 135 | 136 | /** 137 | * Is this code unit (byte) a UTF-8 trail byte? 138 | * @param c 8-bit code unit (byte) 139 | * @return TRUE or FALSE 140 | * @stable ICU 2.4 141 | */ 142 | #define CBU8_IS_TRAIL(c) (((c)&0xc0)==0x80) 143 | 144 | /** 145 | * How many code units (bytes) are used for the UTF-8 encoding 146 | * of this Unicode code point? 147 | * @param c 32-bit code point 148 | * @return 1..4, or 0 if c is a surrogate or not a Unicode code point 149 | * @stable ICU 2.4 150 | */ 151 | #define CBU8_LENGTH(c) \ 152 | ((uint32)(c)<=0x7f ? 1 : \ 153 | ((uint32)(c)<=0x7ff ? 2 : \ 154 | ((uint32)(c)<=0xd7ff ? 3 : \ 155 | ((uint32)(c)<=0xdfff || (uint32)(c)>0x10ffff ? 0 : \ 156 | ((uint32)(c)<=0xffff ? 3 : 4)\ 157 | ) \ 158 | ) \ 159 | ) \ 160 | ) 161 | 162 | /** 163 | * The maximum number of UTF-8 code units (bytes) per Unicode code point (U+0000..U+10ffff). 164 | * @return 4 165 | * @stable ICU 2.4 166 | */ 167 | #define CBU8_MAX_LENGTH 4 168 | 169 | /** 170 | * Function for handling "next code point" with error-checking. 171 | * @internal 172 | */ 173 | UChar32 utf8_nextCharSafeBody(const uint8 *s, int32 *pi, int32 length, UChar32 c, UBool strict); 174 | 175 | /** 176 | * Get a code point from a string at a code point boundary offset, 177 | * and advance the offset to the next code point boundary. 178 | * (Post-incrementing forward iteration.) 179 | * "Safe" macro, checks for illegal sequences and for string boundaries. 180 | * 181 | * The offset may point to the lead byte of a multi-byte sequence, 182 | * in which case the macro will read the whole sequence. 183 | * If the offset points to a trail byte or an illegal UTF-8 sequence, then 184 | * c is set to a negative value. 185 | * 186 | * @param s const uint8 * string 187 | * @param i string offset, i=0x80) { \ 196 | if(CBU8_IS_LEAD(c)) { \ 197 | (c)=base_icu::utf8_nextCharSafeBody((const uint8 *)s, &(i), (int32)(length), c, -1); \ 198 | } else { \ 199 | (c)=CBU_SENTINEL; \ 200 | } \ 201 | } \ 202 | } 203 | 204 | /** 205 | * Append a code point to a string, overwriting 1 to 4 bytes. 206 | * The offset points to the current end of the string contents 207 | * and is advanced (post-increment). 208 | * "Unsafe" macro, assumes a valid code point and sufficient space in the string. 209 | * Otherwise, the result is undefined. 210 | * 211 | * @param s const uint8 * string buffer 212 | * @param i string offset 213 | * @param c code point to append 214 | * @see CBU8_APPEND 215 | * @stable ICU 2.4 216 | */ 217 | #define CBU8_APPEND_UNSAFE(s, i, c) { \ 218 | if((uint32)(c)<=0x7f) { \ 219 | (s)[(i)++]=(uint8)(c); \ 220 | } else { \ 221 | if((uint32)(c)<=0x7ff) { \ 222 | (s)[(i)++]=(uint8)(((c)>>6)|0xc0); \ 223 | } else { \ 224 | if((uint32)(c)<=0xffff) { \ 225 | (s)[(i)++]=(uint8)(((c)>>12)|0xe0); \ 226 | } else { \ 227 | (s)[(i)++]=(uint8)(((c)>>18)|0xf0); \ 228 | (s)[(i)++]=(uint8)((((c)>>12)&0x3f)|0x80); \ 229 | } \ 230 | (s)[(i)++]=(uint8)((((c)>>6)&0x3f)|0x80); \ 231 | } \ 232 | (s)[(i)++]=(uint8)(((c)&0x3f)|0x80); \ 233 | } \ 234 | } 235 | 236 | // UTF-16 macros --------------------------------------------------------------- 237 | // from utf16.h 238 | 239 | /** 240 | * Does this code unit alone encode a code point (BMP, not a surrogate)? 241 | * @param c 16-bit code unit 242 | * @return TRUE or FALSE 243 | * @stable ICU 2.4 244 | */ 245 | #define CBU16_IS_SINGLE(c) !CBU_IS_SURROGATE(c) 246 | 247 | /** 248 | * Is this code unit a lead surrogate (U+d800..U+dbff)? 249 | * @param c 16-bit code unit 250 | * @return TRUE or FALSE 251 | * @stable ICU 2.4 252 | */ 253 | #define CBU16_IS_LEAD(c) (((c)&0xfffffc00)==0xd800) 254 | 255 | /** 256 | * Is this code unit a trail surrogate (U+dc00..U+dfff)? 257 | * @param c 16-bit code unit 258 | * @return TRUE or FALSE 259 | * @stable ICU 2.4 260 | */ 261 | #define CBU16_IS_TRAIL(c) (((c)&0xfffffc00)==0xdc00) 262 | 263 | /** 264 | * Is this code unit a surrogate (U+d800..U+dfff)? 265 | * @param c 16-bit code unit 266 | * @return TRUE or FALSE 267 | * @stable ICU 2.4 268 | */ 269 | #define CBU16_IS_SURROGATE(c) CBU_IS_SURROGATE(c) 270 | 271 | /** 272 | * Assuming c is a surrogate code point (U16_IS_SURROGATE(c)), 273 | * is it a lead surrogate? 274 | * @param c 16-bit code unit 275 | * @return TRUE or FALSE 276 | * @stable ICU 2.4 277 | */ 278 | #define CBU16_IS_SURROGATE_LEAD(c) (((c)&0x400)==0) 279 | 280 | /** 281 | * Helper constant for CBU16_GET_SUPPLEMENTARY. 282 | * @internal 283 | */ 284 | #define CBU16_SURROGATE_OFFSET ((0xd800<<10UL)+0xdc00-0x10000) 285 | 286 | /** 287 | * Get a supplementary code point value (U+10000..U+10ffff) 288 | * from its lead and trail surrogates. 289 | * The result is undefined if the input values are not 290 | * lead and trail surrogates. 291 | * 292 | * @param lead lead surrogate (U+d800..U+dbff) 293 | * @param trail trail surrogate (U+dc00..U+dfff) 294 | * @return supplementary code point (U+10000..U+10ffff) 295 | * @stable ICU 2.4 296 | */ 297 | #define CBU16_GET_SUPPLEMENTARY(lead, trail) \ 298 | (((base_icu::UChar32)(lead)<<10UL)+(base_icu::UChar32)(trail)-CBU16_SURROGATE_OFFSET) 299 | 300 | 301 | /** 302 | * Get the lead surrogate (0xd800..0xdbff) for a 303 | * supplementary code point (0x10000..0x10ffff). 304 | * @param supplementary 32-bit code point (U+10000..U+10ffff) 305 | * @return lead surrogate (U+d800..U+dbff) for supplementary 306 | * @stable ICU 2.4 307 | */ 308 | #define CBU16_LEAD(supplementary) \ 309 | (base_icu::UChar)(((supplementary)>>10)+0xd7c0) 310 | 311 | /** 312 | * Get the trail surrogate (0xdc00..0xdfff) for a 313 | * supplementary code point (0x10000..0x10ffff). 314 | * @param supplementary 32-bit code point (U+10000..U+10ffff) 315 | * @return trail surrogate (U+dc00..U+dfff) for supplementary 316 | * @stable ICU 2.4 317 | */ 318 | #define CBU16_TRAIL(supplementary) \ 319 | (base_icu::UChar)(((supplementary)&0x3ff)|0xdc00) 320 | 321 | /** 322 | * How many 16-bit code units are used to encode this Unicode code point? (1 or 2) 323 | * The result is not defined if c is not a Unicode code point (U+0000..U+10ffff). 324 | * @param c 32-bit code point 325 | * @return 1 or 2 326 | * @stable ICU 2.4 327 | */ 328 | #define CBU16_LENGTH(c) ((uint32)(c)<=0xffff ? 1 : 2) 329 | 330 | /** 331 | * The maximum number of 16-bit code units per Unicode code point (U+0000..U+10ffff). 332 | * @return 2 333 | * @stable ICU 2.4 334 | */ 335 | #define CBU16_MAX_LENGTH 2 336 | 337 | /** 338 | * Get a code point from a string at a code point boundary offset, 339 | * and advance the offset to the next code point boundary. 340 | * (Post-incrementing forward iteration.) 341 | * "Safe" macro, handles unpaired surrogates and checks for string boundaries. 342 | * 343 | * The offset may point to the lead surrogate unit 344 | * for a supplementary code point, in which case the macro will read 345 | * the following trail surrogate as well. 346 | * If the offset points to a trail surrogate or 347 | * to a single, unpaired lead surrogate, then that itself 348 | * will be returned as the code point. 349 | * 350 | * @param s const UChar * string 351 | * @param i string offset, i>10)+0xd7c0); \ 385 | (s)[(i)++]=(uint16)(((c)&0x3ff)|0xdc00); \ 386 | } \ 387 | } 388 | 389 | } // namesapce base_icu 390 | 391 | #endif // BASE_THIRD_PARTY_ICU_ICU_UTF_H_ 392 | -------------------------------------------------------------------------------- /vendor/gurl/build/build_config.h: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2012 The Chromium Authors. All rights reserved. 2 | // Use of this source code is governed by a BSD-style license that can be 3 | // found in the LICENSE file. 4 | 5 | // This file adds defines about the platform we're currently building on. 6 | // Operating System: 7 | // OS_WIN / OS_MACOSX / OS_LINUX / OS_POSIX (MACOSX or LINUX) / 8 | // OS_NACL (NACL_SFI or NACL_NONSFI) / OS_NACL_SFI / OS_NACL_NONSFI 9 | // Compiler: 10 | // COMPILER_MSVC / COMPILER_GCC 11 | // Processor: 12 | // ARCH_CPU_X86 / ARCH_CPU_X86_64 / ARCH_CPU_X86_FAMILY (X86 or X86_64) 13 | // ARCH_CPU_32_BITS / ARCH_CPU_64_BITS 14 | 15 | #ifndef BUILD_BUILD_CONFIG_H_ 16 | #define BUILD_BUILD_CONFIG_H_ 17 | 18 | // A set of macros to use for platform detection. 19 | #if defined(__native_client__) 20 | // __native_client__ must be first, so that other OS_ defines are not set. 21 | #define OS_NACL 1 22 | // OS_NACL comes in two sandboxing technology flavors, SFI or Non-SFI. 23 | // PNaCl toolchain defines __native_client_nonsfi__ macro in Non-SFI build 24 | // mode, while it does not in SFI build mode. 25 | #if defined(__native_client_nonsfi__) 26 | #define OS_NACL_NONSFI 27 | #else 28 | #define OS_NACL_SFI 29 | #endif 30 | #elif defined(ANDROID) 31 | #define OS_ANDROID 1 32 | #elif defined(__APPLE__) 33 | // only include TargetConditions after testing ANDROID as some android builds 34 | // on mac don't have this header available and it's not needed unless the target 35 | // is really mac/ios. 36 | #include 37 | #define OS_MACOSX 1 38 | #if defined(TARGET_OS_IPHONE) && TARGET_OS_IPHONE 39 | #define OS_IOS 1 40 | #endif // defined(TARGET_OS_IPHONE) && TARGET_OS_IPHONE 41 | #elif defined(__linux__) 42 | #define OS_LINUX 1 43 | // include a system header to pull in features.h for glibc/uclibc macros. 44 | #include 45 | #if defined(__GLIBC__) && !defined(__UCLIBC__) 46 | // we really are using glibc, not uClibc pretending to be glibc 47 | #define LIBC_GLIBC 1 48 | #endif 49 | #elif defined(_WIN32) 50 | #define OS_WIN 1 51 | #define TOOLKIT_VIEWS 1 52 | #elif defined(__FreeBSD__) 53 | #define OS_FREEBSD 1 54 | #elif defined(__OpenBSD__) 55 | #define OS_OPENBSD 1 56 | #elif defined(__sun) 57 | #define OS_SOLARIS 1 58 | #elif defined(__QNXNTO__) 59 | #define OS_QNX 1 60 | #else 61 | #error Please add support for your platform in build/build_config.h 62 | #endif 63 | 64 | #if defined(USE_OPENSSL_CERTS) && defined(USE_NSS_CERTS) 65 | #error Cannot use both OpenSSL and NSS for certificates 66 | #endif 67 | 68 | // For access to standard BSD features, use OS_BSD instead of a 69 | // more specific macro. 70 | #if defined(OS_FREEBSD) || defined(OS_OPENBSD) 71 | #define OS_BSD 1 72 | #endif 73 | 74 | // For access to standard POSIXish features, use OS_POSIX instead of a 75 | // more specific macro. 76 | #if defined(OS_MACOSX) || defined(OS_LINUX) || defined(OS_FREEBSD) || \ 77 | defined(OS_OPENBSD) || defined(OS_SOLARIS) || defined(OS_ANDROID) || \ 78 | defined(OS_NACL) || defined(OS_QNX) 79 | #define OS_POSIX 1 80 | #endif 81 | 82 | // Use tcmalloc 83 | #if (defined(OS_WIN) || defined(OS_LINUX) || defined(OS_ANDROID)) && \ 84 | !defined(NO_TCMALLOC) 85 | #define USE_TCMALLOC 1 86 | #endif 87 | 88 | // Compiler detection. 89 | #if defined(__GNUC__) 90 | #define COMPILER_GCC 1 91 | #elif defined(_MSC_VER) 92 | #define COMPILER_MSVC 1 93 | #else 94 | #error Please add support for your compiler in build/build_config.h 95 | #endif 96 | 97 | // Processor architecture detection. For more info on what's defined, see: 98 | // http://msdn.microsoft.com/en-us/library/b0084kay.aspx 99 | // http://www.agner.org/optimize/calling_conventions.pdf 100 | // or with gcc, run: "echo | gcc -E -dM -" 101 | #if defined(_M_X64) || defined(__x86_64__) 102 | #define ARCH_CPU_X86_FAMILY 1 103 | #define ARCH_CPU_X86_64 1 104 | #define ARCH_CPU_64_BITS 1 105 | #define ARCH_CPU_LITTLE_ENDIAN 1 106 | #elif defined(_M_IX86) || defined(__i386__) 107 | #define ARCH_CPU_X86_FAMILY 1 108 | #define ARCH_CPU_X86 1 109 | #define ARCH_CPU_32_BITS 1 110 | #define ARCH_CPU_LITTLE_ENDIAN 1 111 | #elif defined(__ARMEL__) 112 | #define ARCH_CPU_ARM_FAMILY 1 113 | #define ARCH_CPU_ARMEL 1 114 | #define ARCH_CPU_32_BITS 1 115 | #define ARCH_CPU_LITTLE_ENDIAN 1 116 | #elif defined(__aarch64__) 117 | #define ARCH_CPU_ARM_FAMILY 1 118 | #define ARCH_CPU_ARM64 1 119 | #define ARCH_CPU_64_BITS 1 120 | #define ARCH_CPU_LITTLE_ENDIAN 1 121 | #elif defined(__pnacl__) 122 | #define ARCH_CPU_32_BITS 1 123 | #define ARCH_CPU_LITTLE_ENDIAN 1 124 | #elif defined(__MIPSEL__) 125 | #if defined(__LP64__) 126 | #define ARCH_CPU_MIPS64_FAMILY 1 127 | #define ARCH_CPU_MIPS64EL 1 128 | #define ARCH_CPU_64_BITS 1 129 | #define ARCH_CPU_LITTLE_ENDIAN 1 130 | #else 131 | #define ARCH_CPU_MIPS_FAMILY 1 132 | #define ARCH_CPU_MIPSEL 1 133 | #define ARCH_CPU_32_BITS 1 134 | #define ARCH_CPU_LITTLE_ENDIAN 1 135 | #endif 136 | #else 137 | #error Please add support for your architecture in build/build_config.h 138 | #endif 139 | 140 | // Type detection for wchar_t. 141 | #if defined(OS_WIN) 142 | #define WCHAR_T_IS_UTF16 143 | #elif defined(OS_POSIX) && defined(COMPILER_GCC) && \ 144 | defined(__WCHAR_MAX__) && \ 145 | (__WCHAR_MAX__ == 0x7fffffff || __WCHAR_MAX__ == 0xffffffff) 146 | #define WCHAR_T_IS_UTF32 147 | #elif defined(OS_POSIX) && defined(COMPILER_GCC) && \ 148 | defined(__WCHAR_MAX__) && \ 149 | (__WCHAR_MAX__ == 0x7fff || __WCHAR_MAX__ == 0xffff) 150 | // On Posix, we'll detect short wchar_t, but projects aren't guaranteed to 151 | // compile in this mode (in particular, Chrome doesn't). This is intended for 152 | // other projects using base who manage their own dependencies and make sure 153 | // short wchar works for them. 154 | #define WCHAR_T_IS_UTF16 155 | #else 156 | #error Please add support for your compiler in build/build_config.h 157 | #endif 158 | 159 | #if defined(OS_ANDROID) 160 | // The compiler thinks std::string::const_iterator and "const char*" are 161 | // equivalent types. 162 | #define STD_STRING_ITERATOR_IS_CHAR_POINTER 163 | // The compiler thinks base::string16::const_iterator and "char16*" are 164 | // equivalent types. 165 | #define BASE_STRING16_ITERATOR_IS_CHAR16_POINTER 166 | #endif 167 | 168 | #endif // BUILD_BUILD_CONFIG_H_ -------------------------------------------------------------------------------- /vendor/gurl/url/third_party/mozilla/LICENSE.txt: -------------------------------------------------------------------------------- 1 | Copyright 2007, Google Inc. 2 | All rights reserved. 3 | 4 | Redistribution and use in source and binary forms, with or without 5 | modification, are permitted provided that the following conditions are 6 | met: 7 | 8 | * Redistributions of source code must retain the above copyright 9 | notice, this list of conditions and the following disclaimer. 10 | * Redistributions in binary form must reproduce the above 11 | copyright notice, this list of conditions and the following disclaimer 12 | in the documentation and/or other materials provided with the 13 | distribution. 14 | * Neither the name of Google Inc. nor the names of its 15 | contributors may be used to endorse or promote products derived from 16 | this software without specific prior written permission. 17 | 18 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 19 | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 20 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 21 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 22 | OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 23 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 24 | LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 25 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 26 | THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 27 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 28 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 29 | 30 | ------------------------------------------------------------------------------- 31 | 32 | The file url_parse.cc is based on nsURLParsers.cc from Mozilla. This file is 33 | licensed separately as follows: 34 | 35 | The contents of this file are subject to the Mozilla Public License Version 36 | 1.1 (the "License"); you may not use this file except in compliance with 37 | the License. You may obtain a copy of the License at 38 | http://www.mozilla.org/MPL/ 39 | 40 | Software distributed under the License is distributed on an "AS IS" basis, 41 | WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License 42 | for the specific language governing rights and limitations under the 43 | License. 44 | 45 | The Original Code is mozilla.org code. 46 | 47 | The Initial Developer of the Original Code is 48 | Netscape Communications Corporation. 49 | Portions created by the Initial Developer are Copyright (C) 1998 50 | the Initial Developer. All Rights Reserved. 51 | 52 | Contributor(s): 53 | Darin Fisher (original author) 54 | 55 | Alternatively, the contents of this file may be used under the terms of 56 | either the GNU General Public License Version 2 or later (the "GPL"), or 57 | the GNU Lesser General Public License Version 2.1 or later (the "LGPL"), 58 | in which case the provisions of the GPL or the LGPL are applicable instead 59 | of those above. If you wish to allow use of your version of this file only 60 | under the terms of either the GPL or the LGPL, and not to allow others to 61 | use your version of this file under the terms of the MPL, indicate your 62 | decision by deleting the provisions above and replace them with the notice 63 | and other provisions required by the GPL or the LGPL. If you do not delete 64 | the provisions above, a recipient may use your version of this file under 65 | the terms of any one of the MPL, the GPL or the LGPL. 66 | -------------------------------------------------------------------------------- /vendor/gurl/url/third_party/mozilla/README.chromium: -------------------------------------------------------------------------------- 1 | Name: url_parse 2 | URL: http://mxr.mozilla.org/comm-central/source/mozilla/netwerk/base/src/nsURLParsers.cpp 3 | License: BSD and MPL 1.1/GPL 2.0/LGPL 2.1 4 | License File: LICENSE.txt 5 | 6 | Description: 7 | 8 | The file url_parse.cc is based on nsURLParsers.cc from Mozilla. 9 | -------------------------------------------------------------------------------- /vendor/gurl/url/third_party/mozilla/a.out: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/commonsearch/urlparse4/fda910309aa189d57473dbb12e2d2acde49c1736/vendor/gurl/url/third_party/mozilla/a.out -------------------------------------------------------------------------------- /vendor/gurl/url/url_canon_filesystemurl.cc: -------------------------------------------------------------------------------- 1 | // Copyright 2013 The Chromium Authors. All rights reserved. 2 | // Use of this source code is governed by a BSD-style license that can be 3 | // found in the LICENSE file. 4 | 5 | // Functions for canonicalizing "filesystem:file:" URLs. 6 | 7 | #include "url/url_canon.h" 8 | #include "url/url_canon_internal.h" 9 | #include "url/url_file.h" 10 | #include "url/url_parse_internal.h" 11 | #include "url/url_util.h" 12 | #include "url/url_util_internal.h" 13 | 14 | namespace url { 15 | 16 | namespace { 17 | 18 | // We use the URLComponentSource for the outer URL, as it can have replacements, 19 | // whereas the inner_url can't, so it uses spec. 20 | template 21 | bool DoCanonicalizeFileSystemURL(const CHAR* spec, 22 | const URLComponentSource& source, 23 | const Parsed& parsed, 24 | CharsetConverter* charset_converter, 25 | CanonOutput* output, 26 | Parsed* new_parsed) { 27 | // filesystem only uses {scheme, path, query, ref} -- clear the rest. 28 | new_parsed->username.reset(); 29 | new_parsed->password.reset(); 30 | new_parsed->host.reset(); 31 | new_parsed->port.reset(); 32 | 33 | const Parsed* inner_parsed = parsed.inner_parsed(); 34 | Parsed new_inner_parsed; 35 | 36 | // Scheme (known, so we don't bother running it through the more 37 | // complicated scheme canonicalizer). 38 | new_parsed->scheme.begin = output->length(); 39 | output->Append("filesystem:", 11); 40 | new_parsed->scheme.len = 10; 41 | 42 | if (!parsed.inner_parsed() || !parsed.inner_parsed()->scheme.is_valid()) 43 | return false; 44 | 45 | bool success = true; 46 | if (CompareSchemeComponent(spec, inner_parsed->scheme, url::kFileScheme)) { 47 | new_inner_parsed.scheme.begin = output->length(); 48 | output->Append("file://", 7); 49 | new_inner_parsed.scheme.len = 4; 50 | success &= CanonicalizePath(spec, inner_parsed->path, output, 51 | &new_inner_parsed.path); 52 | } else if (IsStandard(spec, inner_parsed->scheme)) { 53 | success = CanonicalizeStandardURL(spec, parsed.inner_parsed()->Length(), 54 | *parsed.inner_parsed(), charset_converter, 55 | output, &new_inner_parsed); 56 | } else { 57 | // TODO(ericu): The URL is wrong, but should we try to output more of what 58 | // we were given? Echoing back filesystem:mailto etc. doesn't seem all that 59 | // useful. 60 | return false; 61 | } 62 | // The filesystem type must be more than just a leading slash for validity. 63 | success &= parsed.inner_parsed()->path.len > 1; 64 | 65 | success &= CanonicalizePath(source.path, parsed.path, output, 66 | &new_parsed->path); 67 | 68 | // Ignore failures for query/ref since the URL can probably still be loaded. 69 | CanonicalizeQuery(source.query, parsed.query, charset_converter, 70 | output, &new_parsed->query); 71 | CanonicalizeRef(source.ref, parsed.ref, output, &new_parsed->ref); 72 | if (success) 73 | new_parsed->set_inner_parsed(new_inner_parsed); 74 | 75 | return success; 76 | } 77 | 78 | } // namespace 79 | 80 | bool CanonicalizeFileSystemURL(const char* spec, 81 | int spec_len, 82 | const Parsed& parsed, 83 | CharsetConverter* charset_converter, 84 | CanonOutput* output, 85 | Parsed* new_parsed) { 86 | return DoCanonicalizeFileSystemURL( 87 | spec, URLComponentSource(spec), parsed, charset_converter, output, 88 | new_parsed); 89 | } 90 | 91 | bool CanonicalizeFileSystemURL(const base::char16* spec, 92 | int spec_len, 93 | const Parsed& parsed, 94 | CharsetConverter* charset_converter, 95 | CanonOutput* output, 96 | Parsed* new_parsed) { 97 | return DoCanonicalizeFileSystemURL( 98 | spec, URLComponentSource(spec), parsed, charset_converter, 99 | output, new_parsed); 100 | } 101 | 102 | bool ReplaceFileSystemURL(const char* base, 103 | const Parsed& base_parsed, 104 | const Replacements& replacements, 105 | CharsetConverter* charset_converter, 106 | CanonOutput* output, 107 | Parsed* new_parsed) { 108 | URLComponentSource source(base); 109 | Parsed parsed(base_parsed); 110 | SetupOverrideComponents(base, replacements, &source, &parsed); 111 | return DoCanonicalizeFileSystemURL( 112 | base, source, parsed, charset_converter, output, new_parsed); 113 | } 114 | 115 | bool ReplaceFileSystemURL(const char* base, 116 | const Parsed& base_parsed, 117 | const Replacements& replacements, 118 | CharsetConverter* charset_converter, 119 | CanonOutput* output, 120 | Parsed* new_parsed) { 121 | RawCanonOutput<1024> utf8; 122 | URLComponentSource source(base); 123 | Parsed parsed(base_parsed); 124 | SetupUTF16OverrideComponents(base, replacements, &utf8, &source, &parsed); 125 | return DoCanonicalizeFileSystemURL( 126 | base, source, parsed, charset_converter, output, new_parsed); 127 | } 128 | 129 | } // namespace url 130 | -------------------------------------------------------------------------------- /vendor/gurl/url/url_canon_fileurl.cc: -------------------------------------------------------------------------------- 1 | // Copyright 2013 The Chromium Authors. All rights reserved. 2 | // Use of this source code is governed by a BSD-style license that can be 3 | // found in the LICENSE file. 4 | 5 | // Functions for canonicalizing "file:" URLs. 6 | 7 | #include "url/url_canon.h" 8 | #include "url/url_canon_internal.h" 9 | #include "url/url_file.h" 10 | #include "url/url_parse_internal.h" 11 | 12 | namespace url { 13 | 14 | namespace { 15 | 16 | #ifdef WIN32 17 | 18 | // Given a pointer into the spec, this copies and canonicalizes the drive 19 | // letter and colon to the output, if one is found. If there is not a drive 20 | // spec, it won't do anything. The index of the next character in the input 21 | // spec is returned (after the colon when a drive spec is found, the begin 22 | // offset if one is not). 23 | template 24 | int FileDoDriveSpec(const CHAR* spec, int begin, int end, 25 | CanonOutput* output) { 26 | // The path could be one of several things: /foo/bar, c:/foo/bar, /c:/foo, 27 | // (with backslashes instead of slashes as well). 28 | int num_slashes = CountConsecutiveSlashes(spec, begin, end); 29 | int after_slashes = begin + num_slashes; 30 | 31 | if (!DoesBeginWindowsDriveSpec(spec, after_slashes, end)) 32 | return begin; // Haven't consumed any characters 33 | 34 | // A drive spec is the start of a path, so we need to add a slash for the 35 | // authority terminator (typically the third slash). 36 | output->push_back('/'); 37 | 38 | // DoesBeginWindowsDriveSpec will ensure that the drive letter is valid 39 | // and that it is followed by a colon/pipe. 40 | 41 | // Normalize Windows drive letters to uppercase 42 | if (spec[after_slashes] >= 'a' && spec[after_slashes] <= 'z') 43 | output->push_back(static_cast(spec[after_slashes] - 'a' + 'A')); 44 | else 45 | output->push_back(static_cast(spec[after_slashes])); 46 | 47 | // Normalize the character following it to a colon rather than pipe. 48 | output->push_back(':'); 49 | return after_slashes + 2; 50 | } 51 | 52 | #endif // WIN32 53 | 54 | template 55 | bool DoFileCanonicalizePath(const CHAR* spec, 56 | const Component& path, 57 | CanonOutput* output, 58 | Component* out_path) { 59 | // Copies and normalizes the "c:" at the beginning, if present. 60 | out_path->begin = output->length(); 61 | int after_drive; 62 | #ifdef WIN32 63 | after_drive = FileDoDriveSpec(spec, path.begin, path.end(), output); 64 | #else 65 | after_drive = path.begin; 66 | #endif 67 | 68 | // Copies the rest of the path, starting from the slash following the 69 | // drive colon (if any, Windows only), or the first slash of the path. 70 | bool success = true; 71 | if (after_drive < path.end()) { 72 | // Use the regular path canonicalizer to canonicalize the rest of the 73 | // path. Give it a fake output component to write into. DoCanonicalizeFile 74 | // will compute the full path component. 75 | Component sub_path = MakeRange(after_drive, path.end()); 76 | Component fake_output_path; 77 | success = CanonicalizePath(spec, sub_path, output, &fake_output_path); 78 | } else { 79 | // No input path, canonicalize to a slash. 80 | output->push_back('/'); 81 | } 82 | 83 | out_path->len = output->length() - out_path->begin; 84 | return success; 85 | } 86 | 87 | template 88 | bool DoCanonicalizeFileURL(const URLComponentSource& source, 89 | const Parsed& parsed, 90 | CharsetConverter* query_converter, 91 | CanonOutput* output, 92 | Parsed* new_parsed) { 93 | // Things we don't set in file: URLs. 94 | new_parsed->username = Component(); 95 | new_parsed->password = Component(); 96 | new_parsed->port = Component(); 97 | 98 | // Scheme (known, so we don't bother running it through the more 99 | // complicated scheme canonicalizer). 100 | new_parsed->scheme.begin = output->length(); 101 | output->Append("file://", 7); 102 | new_parsed->scheme.len = 4; 103 | 104 | // Append the host. For many file URLs, this will be empty. For UNC, this 105 | // will be present. 106 | // TODO(brettw) This doesn't do any checking for host name validity. We 107 | // should probably handle validity checking of UNC hosts differently than 108 | // for regular IP hosts. 109 | bool success = CanonicalizeHost(source.host, parsed.host, 110 | output, &new_parsed->host); 111 | success &= DoFileCanonicalizePath(source.path, parsed.path, 112 | output, &new_parsed->path); 113 | CanonicalizeQuery(source.query, parsed.query, query_converter, 114 | output, &new_parsed->query); 115 | 116 | // Ignore failure for refs since the URL can probably still be loaded. 117 | CanonicalizeRef(source.ref, parsed.ref, output, &new_parsed->ref); 118 | 119 | return success; 120 | } 121 | 122 | } // namespace 123 | 124 | bool CanonicalizeFileURL(const char* spec, 125 | int spec_len, 126 | const Parsed& parsed, 127 | CharsetConverter* query_converter, 128 | CanonOutput* output, 129 | Parsed* new_parsed) { 130 | return DoCanonicalizeFileURL( 131 | URLComponentSource(spec), parsed, query_converter, 132 | output, new_parsed); 133 | } 134 | 135 | bool CanonicalizeFileURL(const base::char16* spec, 136 | int spec_len, 137 | const Parsed& parsed, 138 | CharsetConverter* query_converter, 139 | CanonOutput* output, 140 | Parsed* new_parsed) { 141 | return DoCanonicalizeFileURL( 142 | URLComponentSource(spec), parsed, query_converter, 143 | output, new_parsed); 144 | } 145 | 146 | bool FileCanonicalizePath(const char* spec, 147 | const Component& path, 148 | CanonOutput* output, 149 | Component* out_path) { 150 | return DoFileCanonicalizePath(spec, path, 151 | output, out_path); 152 | } 153 | 154 | bool FileCanonicalizePath(const base::char16* spec, 155 | const Component& path, 156 | CanonOutput* output, 157 | Component* out_path) { 158 | return DoFileCanonicalizePath(spec, path, 159 | output, out_path); 160 | } 161 | 162 | bool ReplaceFileURL(const char* base, 163 | const Parsed& base_parsed, 164 | const Replacements& replacements, 165 | CharsetConverter* query_converter, 166 | CanonOutput* output, 167 | Parsed* new_parsed) { 168 | URLComponentSource source(base); 169 | Parsed parsed(base_parsed); 170 | SetupOverrideComponents(base, replacements, &source, &parsed); 171 | return DoCanonicalizeFileURL( 172 | source, parsed, query_converter, output, new_parsed); 173 | } 174 | 175 | bool ReplaceFileURL(const char* base, 176 | const Parsed& base_parsed, 177 | const Replacements& replacements, 178 | CharsetConverter* query_converter, 179 | CanonOutput* output, 180 | Parsed* new_parsed) { 181 | RawCanonOutput<1024> utf8; 182 | URLComponentSource source(base); 183 | Parsed parsed(base_parsed); 184 | SetupUTF16OverrideComponents(base, replacements, &utf8, &source, &parsed); 185 | return DoCanonicalizeFileURL( 186 | source, parsed, query_converter, output, new_parsed); 187 | } 188 | 189 | } // namespace url 190 | -------------------------------------------------------------------------------- /vendor/gurl/url/url_canon_ip.h: -------------------------------------------------------------------------------- 1 | // Copyright 2013 The Chromium Authors. All rights reserved. 2 | // Use of this source code is governed by a BSD-style license that can be 3 | // found in the LICENSE file. 4 | 5 | #ifndef URL_URL_CANON_IP_H_ 6 | #define URL_URL_CANON_IP_H_ 7 | 8 | #include "base/strings/string16.h" 9 | #include "url/third_party/mozilla/url_parse.h" 10 | #include "url/url_canon.h" 11 | #include "url/url_export.h" 12 | 13 | namespace url { 14 | 15 | // Writes the given IPv4 address to |output|. 16 | URL_EXPORT void AppendIPv4Address(const unsigned char address[4], 17 | CanonOutput* output); 18 | 19 | // Writes the given IPv6 address to |output|. 20 | URL_EXPORT void AppendIPv6Address(const unsigned char address[16], 21 | CanonOutput* output); 22 | 23 | // Searches the host name for the portions of the IPv4 address. On success, 24 | // each component will be placed into |components| and it will return true. 25 | // It will return false if the host can not be separated as an IPv4 address 26 | // or if there are any non-7-bit characters or other characters that can not 27 | // be in an IP address. (This is important so we fail as early as possible for 28 | // common non-IP hostnames.) 29 | // 30 | // Not all components may exist. If there are only 3 components, for example, 31 | // the last one will have a length of -1 or 0 to indicate it does not exist. 32 | // 33 | // Note that many platforms' inet_addr will ignore everything after a space 34 | // in certain circumstances if the stuff before the space looks like an IP 35 | // address. IE6 is included in this. We do NOT handle this case. In many cases, 36 | // the browser's canonicalization will get run before this which converts 37 | // spaces to %20 (in the case of IE7) or rejects them (in the case of Mozilla), 38 | // so this code path never gets hit. Our host canonicalization will notice 39 | // these spaces and escape them, which will make IP address finding fail. This 40 | // seems like better behavior than stripping after a space. 41 | URL_EXPORT bool FindIPv4Components(const char* spec, 42 | const Component& host, 43 | Component components[4]); 44 | URL_EXPORT bool FindIPv4Components(const base::char16* spec, 45 | const Component& host, 46 | Component components[4]); 47 | 48 | // Converts an IPv4 address to a 32-bit number (network byte order). 49 | // 50 | // Possible return values: 51 | // IPV4 - IPv4 address was successfully parsed. 52 | // BROKEN - Input was formatted like an IPv4 address, but overflow occurred 53 | // during parsing. 54 | // NEUTRAL - Input couldn't possibly be interpreted as an IPv4 address. 55 | // It might be an IPv6 address, or a hostname. 56 | // 57 | // On success, |num_ipv4_components| will be populated with the number of 58 | // components in the IPv4 address. 59 | URL_EXPORT CanonHostInfo::Family IPv4AddressToNumber(const char* spec, 60 | const Component& host, 61 | unsigned char address[4], 62 | int* num_ipv4_components); 63 | URL_EXPORT CanonHostInfo::Family IPv4AddressToNumber(const base::char16* spec, 64 | const Component& host, 65 | unsigned char address[4], 66 | int* num_ipv4_components); 67 | 68 | // Converts an IPv6 address to a 128-bit number (network byte order), returning 69 | // true on success. False means that the input was not a valid IPv6 address. 70 | // 71 | // NOTE that |host| is expected to be surrounded by square brackets. 72 | // i.e. "[::1]" rather than "::1". 73 | URL_EXPORT bool IPv6AddressToNumber(const char* spec, 74 | const Component& host, 75 | unsigned char address[16]); 76 | URL_EXPORT bool IPv6AddressToNumber(const base::char16* spec, 77 | const Component& host, 78 | unsigned char address[16]); 79 | 80 | } // namespace url 81 | 82 | #endif // URL_URL_CANON_IP_H_ 83 | -------------------------------------------------------------------------------- /vendor/gurl/url/url_canon_mailtourl.cc: -------------------------------------------------------------------------------- 1 | // Copyright 2013 The Chromium Authors. All rights reserved. 2 | // Use of this source code is governed by a BSD-style license that can be 3 | // found in the LICENSE file. 4 | 5 | // Functions for canonicalizing "mailto:" URLs. 6 | 7 | #include "url/url_canon.h" 8 | #include "url/url_canon_internal.h" 9 | #include "url/url_file.h" 10 | #include "url/url_parse_internal.h" 11 | 12 | namespace url { 13 | 14 | namespace { 15 | 16 | template 17 | bool DoCanonicalizeMailtoURL(const URLComponentSource& source, 18 | const Parsed& parsed, 19 | CanonOutput* output, 20 | Parsed* new_parsed) { 21 | // mailto: only uses {scheme, path, query} -- clear the rest. 22 | new_parsed->username = Component(); 23 | new_parsed->password = Component(); 24 | new_parsed->host = Component(); 25 | new_parsed->port = Component(); 26 | new_parsed->ref = Component(); 27 | 28 | // Scheme (known, so we don't bother running it through the more 29 | // complicated scheme canonicalizer). 30 | new_parsed->scheme.begin = output->length(); 31 | output->Append("mailto:", 7); 32 | new_parsed->scheme.len = 6; 33 | 34 | bool success = true; 35 | 36 | // Path 37 | if (parsed.path.is_valid()) { 38 | new_parsed->path.begin = output->length(); 39 | 40 | // Copy the path using path URL's more lax escaping rules. 41 | // We convert to UTF-8 and escape non-ASCII, but leave all 42 | // ASCII characters alone. 43 | int end = parsed.path.end(); 44 | for (int i = parsed.path.begin; i < end; ++i) { 45 | UCHAR uch = static_cast(source.path[i]); 46 | if (uch < 0x20 || uch >= 0x80) 47 | success &= AppendUTF8EscapedChar(source.path, &i, end, output); 48 | else 49 | output->push_back(static_cast(uch)); 50 | } 51 | 52 | new_parsed->path.len = output->length() - new_parsed->path.begin; 53 | } else { 54 | // No path at all 55 | new_parsed->path.reset(); 56 | } 57 | 58 | // Query -- always use the default UTF8 charset converter. 59 | CanonicalizeQuery(source.query, parsed.query, NULL, 60 | output, &new_parsed->query); 61 | 62 | return success; 63 | } 64 | 65 | } // namespace 66 | 67 | bool CanonicalizeMailtoURL(const char* spec, 68 | int spec_len, 69 | const Parsed& parsed, 70 | CanonOutput* output, 71 | Parsed* new_parsed) { 72 | return DoCanonicalizeMailtoURL( 73 | URLComponentSource(spec), parsed, output, new_parsed); 74 | } 75 | 76 | bool CanonicalizeMailtoURL(const base::char16* spec, 77 | int spec_len, 78 | const Parsed& parsed, 79 | CanonOutput* output, 80 | Parsed* new_parsed) { 81 | return DoCanonicalizeMailtoURL( 82 | URLComponentSource(spec), parsed, output, new_parsed); 83 | } 84 | 85 | bool ReplaceMailtoURL(const char* base, 86 | const Parsed& base_parsed, 87 | const Replacements& replacements, 88 | CanonOutput* output, 89 | Parsed* new_parsed) { 90 | URLComponentSource source(base); 91 | Parsed parsed(base_parsed); 92 | SetupOverrideComponents(base, replacements, &source, &parsed); 93 | return DoCanonicalizeMailtoURL( 94 | source, parsed, output, new_parsed); 95 | } 96 | 97 | bool ReplaceMailtoURL(const char* base, 98 | const Parsed& base_parsed, 99 | const Replacements& replacements, 100 | CanonOutput* output, 101 | Parsed* new_parsed) { 102 | RawCanonOutput<1024> utf8; 103 | URLComponentSource source(base); 104 | Parsed parsed(base_parsed); 105 | SetupUTF16OverrideComponents(base, replacements, &utf8, &source, &parsed); 106 | return DoCanonicalizeMailtoURL( 107 | source, parsed, output, new_parsed); 108 | } 109 | 110 | } // namespace url 111 | -------------------------------------------------------------------------------- /vendor/gurl/url/url_canon_pathurl.cc: -------------------------------------------------------------------------------- 1 | // Copyright 2013 The Chromium Authors. All rights reserved. 2 | // Use of this source code is governed by a BSD-style license that can be 3 | // found in the LICENSE file. 4 | 5 | // Functions for canonicalizing "path" URLs. Not to be confused with the path 6 | // of a URL, these are URLs that have no authority section, only a path. For 7 | // example, "javascript:" and "data:". 8 | 9 | #include "url/url_canon.h" 10 | #include "url/url_canon_internal.h" 11 | 12 | namespace url { 13 | 14 | namespace { 15 | 16 | // Canonicalize the given |component| from |source| into |output| and 17 | // |new_component|. If |separator| is non-zero, it is pre-pended to |output| 18 | // prior to the canonicalized component; i.e. for the '?' or '#' characters. 19 | template 20 | bool DoCanonicalizePathComponent(const CHAR* source, 21 | const Component& component, 22 | char separator, 23 | CanonOutput* output, 24 | Component* new_component) { 25 | bool success = true; 26 | if (component.is_valid()) { 27 | if (separator) 28 | output->push_back(separator); 29 | // Copy the path using path URL's more lax escaping rules (think for 30 | // javascript:). We convert to UTF-8 and escape non-ASCII, but leave all 31 | // ASCII characters alone. This helps readability of JavaStript. 32 | new_component->begin = output->length(); 33 | int end = component.end(); 34 | for (int i = component.begin; i < end; i++) { 35 | UCHAR uch = static_cast(source[i]); 36 | if (uch < 0x20 || uch >= 0x80) 37 | success &= AppendUTF8EscapedChar(source, &i, end, output); 38 | else 39 | output->push_back(static_cast(uch)); 40 | } 41 | new_component->len = output->length() - new_component->begin; 42 | } else { 43 | // Empty part. 44 | new_component->reset(); 45 | } 46 | return success; 47 | } 48 | 49 | template 50 | bool DoCanonicalizePathURL(const URLComponentSource& source, 51 | const Parsed& parsed, 52 | CanonOutput* output, 53 | Parsed* new_parsed) { 54 | // Scheme: this will append the colon. 55 | bool success = CanonicalizeScheme(source.scheme, parsed.scheme, 56 | output, &new_parsed->scheme); 57 | 58 | // We assume there's no authority for path URLs. Note that hosts should never 59 | // have -1 length. 60 | new_parsed->username.reset(); 61 | new_parsed->password.reset(); 62 | new_parsed->host.reset(); 63 | new_parsed->port.reset(); 64 | // We allow path URLs to have the path, query and fragment components, but we 65 | // will canonicalize each of the via the weaker path URL rules. 66 | success &= DoCanonicalizePathComponent( 67 | source.path, parsed.path, '\0', output, &new_parsed->path); 68 | success &= DoCanonicalizePathComponent( 69 | source.query, parsed.query, '?', output, &new_parsed->query); 70 | success &= DoCanonicalizePathComponent( 71 | source.ref, parsed.ref, '#', output, &new_parsed->ref); 72 | 73 | return success; 74 | } 75 | 76 | } // namespace 77 | 78 | bool CanonicalizePathURL(const char* spec, 79 | int spec_len, 80 | const Parsed& parsed, 81 | CanonOutput* output, 82 | Parsed* new_parsed) { 83 | return DoCanonicalizePathURL( 84 | URLComponentSource(spec), parsed, output, new_parsed); 85 | } 86 | 87 | bool CanonicalizePathURL(const base::char16* spec, 88 | int spec_len, 89 | const Parsed& parsed, 90 | CanonOutput* output, 91 | Parsed* new_parsed) { 92 | return DoCanonicalizePathURL( 93 | URLComponentSource(spec), parsed, output, new_parsed); 94 | } 95 | 96 | bool ReplacePathURL(const char* base, 97 | const Parsed& base_parsed, 98 | const Replacements& replacements, 99 | CanonOutput* output, 100 | Parsed* new_parsed) { 101 | URLComponentSource source(base); 102 | Parsed parsed(base_parsed); 103 | SetupOverrideComponents(base, replacements, &source, &parsed); 104 | return DoCanonicalizePathURL( 105 | source, parsed, output, new_parsed); 106 | } 107 | 108 | bool ReplacePathURL(const char* base, 109 | const Parsed& base_parsed, 110 | const Replacements& replacements, 111 | CanonOutput* output, 112 | Parsed* new_parsed) { 113 | RawCanonOutput<1024> utf8; 114 | URLComponentSource source(base); 115 | Parsed parsed(base_parsed); 116 | SetupUTF16OverrideComponents(base, replacements, &utf8, &source, &parsed); 117 | return DoCanonicalizePathURL( 118 | source, parsed, output, new_parsed); 119 | } 120 | 121 | } // namespace url 122 | -------------------------------------------------------------------------------- /vendor/gurl/url/url_canon_query.cc: -------------------------------------------------------------------------------- 1 | // Copyright 2013 The Chromium Authors. All rights reserved. 2 | // Use of this source code is governed by a BSD-style license that can be 3 | // found in the LICENSE file. 4 | 5 | #include "url/url_canon.h" 6 | #include "url/url_canon_internal.h" 7 | 8 | // Query canonicalization in IE 9 | // ---------------------------- 10 | // IE is very permissive for query parameters specified in links on the page 11 | // (in contrast to links that it constructs itself based on form data). It does 12 | // not unescape any character. It does not reject any escape sequence (be they 13 | // invalid like "%2y" or freaky like %00). 14 | // 15 | // IE only escapes spaces and nothing else. Embedded NULLs, tabs (0x09), 16 | // LF (0x0a), and CR (0x0d) are removed (this probably happens at an earlier 17 | // layer since they are removed from all portions of the URL). All other 18 | // characters are passed unmodified. Invalid UTF-16 sequences are preserved as 19 | // well, with each character in the input being converted to UTF-8. It is the 20 | // server's job to make sense of this invalid query. 21 | // 22 | // Invalid multibyte sequences (for example, invalid UTF-8 on a UTF-8 page) 23 | // are converted to the invalid character and sent as unescaped UTF-8 (0xef, 24 | // 0xbf, 0xbd). This may not be canonicalization, the parser may generate these 25 | // strings before the URL handler ever sees them. 26 | // 27 | // Our query canonicalization 28 | // -------------------------- 29 | // We escape all non-ASCII characters and control characters, like Firefox. 30 | // This is more conformant to the URL spec, and there do not seem to be many 31 | // problems relating to Firefox's behavior. 32 | // 33 | // Like IE, we will never unescape (although the application may want to try 34 | // unescaping to present the user with a more understandable URL). We will 35 | // replace all invalid sequences (including invalid UTF-16 sequences, which IE 36 | // doesn't) with the "invalid character," and we will escape it. 37 | 38 | namespace url { 39 | 40 | namespace { 41 | 42 | // Returns true if the characters starting at |begin| and going until |end| 43 | // (non-inclusive) are all representable in 7-bits. 44 | template 45 | bool IsAllASCII(const CHAR* spec, const Component& query) { 46 | int end = query.end(); 47 | for (int i = query.begin; i < end; i++) { 48 | if (static_cast(spec[i]) >= 0x80) 49 | return false; 50 | } 51 | return true; 52 | } 53 | 54 | // Appends the given string to the output, escaping characters that do not 55 | // match the given |type| in SharedCharTypes. This version will accept 8 or 16 56 | // bit characters, but assumes that they have only 7-bit values. It also assumes 57 | // that all UTF-8 values are correct, so doesn't bother checking 58 | template 59 | void AppendRaw8BitQueryString(const CHAR* source, int length, 60 | CanonOutput* output) { 61 | for (int i = 0; i < length; i++) { 62 | if (!IsQueryChar(static_cast(source[i]))) 63 | AppendEscapedChar(static_cast(source[i]), output); 64 | else // Doesn't need escaping. 65 | output->push_back(static_cast(source[i])); 66 | } 67 | } 68 | 69 | // Runs the converter on the given UTF-8 input. Since the converter expects 70 | // UTF-16, we have to convert first. The converter must be non-NULL. 71 | void RunConverter(const char* spec, 72 | const Component& query, 73 | CharsetConverter* converter, 74 | CanonOutput* output) { 75 | // This function will replace any misencoded values with the invalid 76 | // character. This is what we want so we don't have to check for error. 77 | RawCanonOutputW<1024> utf16; 78 | ConvertUTF8ToUTF16(&spec[query.begin], query.len, &utf16); 79 | converter->ConvertFromUTF16(utf16.data(), utf16.length(), output); 80 | } 81 | 82 | // Runs the converter with the given UTF-16 input. We don't have to do 83 | // anything, but this overridden function allows us to use the same code 84 | // for both UTF-8 and UTF-16 input. 85 | void RunConverter(const base::char16* spec, 86 | const Component& query, 87 | CharsetConverter* converter, 88 | CanonOutput* output) { 89 | converter->ConvertFromUTF16(&spec[query.begin], query.len, output); 90 | } 91 | 92 | template 93 | void DoConvertToQueryEncoding(const CHAR* spec, 94 | const Component& query, 95 | CharsetConverter* converter, 96 | CanonOutput* output) { 97 | if (IsAllASCII(spec, query)) { 98 | // Easy: the input can just appended with no character set conversions. 99 | AppendRaw8BitQueryString(&spec[query.begin], query.len, output); 100 | 101 | } else { 102 | // Harder: convert to the proper encoding first. 103 | if (converter) { 104 | // Run the converter to get an 8-bit string, then append it, escaping 105 | // necessary values. 106 | RawCanonOutput<1024> eight_bit; 107 | RunConverter(spec, query, converter, &eight_bit); 108 | AppendRaw8BitQueryString(eight_bit.data(), eight_bit.length(), output); 109 | 110 | } else { 111 | // No converter, do our own UTF-8 conversion. 112 | AppendStringOfType(&spec[query.begin], query.len, CHAR_QUERY, output); 113 | } 114 | } 115 | } 116 | 117 | template 118 | void DoCanonicalizeQuery(const CHAR* spec, 119 | const Component& query, 120 | CharsetConverter* converter, 121 | CanonOutput* output, 122 | Component* out_query) { 123 | if (query.len < 0) { 124 | *out_query = Component(); 125 | return; 126 | } 127 | 128 | output->push_back('?'); 129 | out_query->begin = output->length(); 130 | 131 | DoConvertToQueryEncoding(spec, query, converter, output); 132 | 133 | out_query->len = output->length() - out_query->begin; 134 | } 135 | 136 | } // namespace 137 | 138 | void CanonicalizeQuery(const char* spec, 139 | const Component& query, 140 | CharsetConverter* converter, 141 | CanonOutput* output, 142 | Component* out_query) { 143 | DoCanonicalizeQuery(spec, query, converter, 144 | output, out_query); 145 | } 146 | 147 | void CanonicalizeQuery(const base::char16* spec, 148 | const Component& query, 149 | CharsetConverter* converter, 150 | CanonOutput* output, 151 | Component* out_query) { 152 | DoCanonicalizeQuery(spec, query, converter, 153 | output, out_query); 154 | } 155 | 156 | void ConvertUTF16ToQueryEncoding(const base::char16* input, 157 | const Component& query, 158 | CharsetConverter* converter, 159 | CanonOutput* output) { 160 | DoConvertToQueryEncoding(input, query, 161 | converter, output); 162 | } 163 | 164 | } // namespace url 165 | -------------------------------------------------------------------------------- /vendor/gurl/url/url_canon_stdstring.cc: -------------------------------------------------------------------------------- 1 | // Copyright 2013 The Chromium Authors. All rights reserved. 2 | // Use of this source code is governed by a BSD-style license that can be 3 | // found in the LICENSE file. 4 | 5 | #include "url/url_canon_stdstring.h" 6 | 7 | namespace url { 8 | 9 | StdStringCanonOutput::StdStringCanonOutput(std::string* str) 10 | : CanonOutput(), str_(str) { 11 | cur_len_ = static_cast(str_->size()); // Append to existing data. 12 | str_->resize(str_->capacity()); 13 | buffer_ = str_->empty() ? NULL : &(*str_)[0]; 14 | buffer_len_ = static_cast(str_->size()); 15 | } 16 | 17 | StdStringCanonOutput::~StdStringCanonOutput() { 18 | // Nothing to do, we don't own the string. 19 | } 20 | 21 | void StdStringCanonOutput::Complete() { 22 | str_->resize(cur_len_); 23 | buffer_len_ = cur_len_; 24 | } 25 | 26 | void StdStringCanonOutput::Resize(int sz) { 27 | str_->resize(sz); 28 | buffer_ = str_->empty() ? NULL : &(*str_)[0]; 29 | buffer_len_ = sz; 30 | } 31 | 32 | } // namespace url 33 | -------------------------------------------------------------------------------- /vendor/gurl/url/url_canon_stdstring.h: -------------------------------------------------------------------------------- 1 | // Copyright 2013 The Chromium Authors. All rights reserved. 2 | // Use of this source code is governed by a BSD-style license that can be 3 | // found in the LICENSE file. 4 | 5 | #ifndef URL_URL_CANON_STDSTRING_H_ 6 | #define URL_URL_CANON_STDSTRING_H_ 7 | 8 | // This header file defines a canonicalizer output method class for STL 9 | // strings. Because the canonicalizer tries not to be dependent on the STL, 10 | // we have segregated it here. 11 | 12 | #include 13 | 14 | #include "base/compiler_specific.h" 15 | #include "base/strings/string_piece.h" 16 | #include "url/url_canon.h" 17 | #include "url/url_export.h" 18 | 19 | namespace url { 20 | 21 | // Write into a std::string given in the constructor. This object does not own 22 | // the string itself, and the user must ensure that the string stays alive 23 | // throughout the lifetime of this object. 24 | // 25 | // The given string will be appended to; any existing data in the string will 26 | // be preserved. The caller should reserve() the amount of data in the string 27 | // they expect to be written. We will resize if necessary, but that's slow. 28 | // 29 | // Note that when canonicalization is complete, the string will likely have 30 | // unused space at the end because we make the string very big to start out 31 | // with (by |initial_size|). This ends up being important because resize 32 | // operations are slow, and because the base class needs to write directly 33 | // into the buffer. 34 | // 35 | // Therefore, the user should call Complete() before using the string that 36 | // this class wrote into. 37 | class URL_EXPORT StdStringCanonOutput : public CanonOutput { 38 | public: 39 | StdStringCanonOutput(std::string* str); 40 | ~StdStringCanonOutput() override; 41 | 42 | // Must be called after writing has completed but before the string is used. 43 | void Complete(); 44 | 45 | void Resize(int sz) override; 46 | 47 | protected: 48 | std::string* str_; 49 | }; 50 | 51 | // An extension of the Replacements class that allows the setters to use 52 | // StringPieces (implicitly allowing strings or char*s). 53 | // 54 | // The contents of the StringPieces are not copied and must remain valid until 55 | // the StringPieceReplacements object goes out of scope. 56 | template 57 | class StringPieceReplacements : public Replacements { 58 | public: 59 | void SetSchemeStr(const base::BasicStringPiece& s) { 60 | this->SetScheme(s.data(), Component(0, static_cast(s.length()))); 61 | } 62 | void SetUsernameStr(const base::BasicStringPiece& s) { 63 | this->SetUsername(s.data(), Component(0, static_cast(s.length()))); 64 | } 65 | void SetPasswordStr(const base::BasicStringPiece& s) { 66 | this->SetPassword(s.data(), Component(0, static_cast(s.length()))); 67 | } 68 | void SetHostStr(const base::BasicStringPiece& s) { 69 | this->SetHost(s.data(), Component(0, static_cast(s.length()))); 70 | } 71 | void SetPortStr(const base::BasicStringPiece& s) { 72 | this->SetPort(s.data(), Component(0, static_cast(s.length()))); 73 | } 74 | void SetPathStr(const base::BasicStringPiece& s) { 75 | this->SetPath(s.data(), Component(0, static_cast(s.length()))); 76 | } 77 | void SetQueryStr(const base::BasicStringPiece& s) { 78 | this->SetQuery(s.data(), Component(0, static_cast(s.length()))); 79 | } 80 | void SetRefStr(const base::BasicStringPiece& s) { 81 | this->SetRef(s.data(), Component(0, static_cast(s.length()))); 82 | } 83 | }; 84 | 85 | } // namespace url 86 | 87 | #endif // URL_URL_CANON_STDSTRING_H_ 88 | -------------------------------------------------------------------------------- /vendor/gurl/url/url_canon_stdurl.cc: -------------------------------------------------------------------------------- 1 | // Copyright 2013 The Chromium Authors. All rights reserved. 2 | // Use of this source code is governed by a BSD-style license that can be 3 | // found in the LICENSE file. 4 | 5 | // Functions to canonicalize "standard" URLs, which are ones that have an 6 | // authority section including a host name. 7 | 8 | #include "url/url_canon.h" 9 | #include "url/url_canon_internal.h" 10 | #include "url/url_constants.h" 11 | 12 | namespace url { 13 | 14 | namespace { 15 | 16 | template 17 | bool DoCanonicalizeStandardURL(const URLComponentSource& source, 18 | const Parsed& parsed, 19 | CharsetConverter* query_converter, 20 | CanonOutput* output, 21 | Parsed* new_parsed) { 22 | // Scheme: this will append the colon. 23 | bool success = CanonicalizeScheme(source.scheme, parsed.scheme, 24 | output, &new_parsed->scheme); 25 | 26 | // Authority (username, password, host, port) 27 | bool have_authority; 28 | if (parsed.username.is_valid() || parsed.password.is_valid() || 29 | parsed.host.is_nonempty() || parsed.port.is_valid()) { 30 | have_authority = true; 31 | 32 | // Only write the authority separators when we have a scheme. 33 | if (parsed.scheme.is_valid()) { 34 | output->push_back('/'); 35 | output->push_back('/'); 36 | } 37 | 38 | // User info: the canonicalizer will handle the : and @. 39 | success &= CanonicalizeUserInfo(source.username, parsed.username, 40 | source.password, parsed.password, 41 | output, 42 | &new_parsed->username, 43 | &new_parsed->password); 44 | 45 | success &= CanonicalizeHost(source.host, parsed.host, 46 | output, &new_parsed->host); 47 | 48 | // Host must not be empty for standard URLs. 49 | if (!parsed.host.is_nonempty()) 50 | success = false; 51 | 52 | // Port: the port canonicalizer will handle the colon. 53 | int default_port = DefaultPortForScheme( 54 | &output->data()[new_parsed->scheme.begin], new_parsed->scheme.len); 55 | success &= CanonicalizePort(source.port, parsed.port, default_port, 56 | output, &new_parsed->port); 57 | } else { 58 | // No authority, clear the components. 59 | have_authority = false; 60 | new_parsed->host.reset(); 61 | new_parsed->username.reset(); 62 | new_parsed->password.reset(); 63 | new_parsed->port.reset(); 64 | success = false; // Standard URLs must have an authority. 65 | } 66 | 67 | // Path 68 | if (parsed.path.is_valid()) { 69 | success &= CanonicalizePath(source.path, parsed.path, 70 | output, &new_parsed->path); 71 | } else if (have_authority || 72 | parsed.query.is_valid() || parsed.ref.is_valid()) { 73 | // When we have an empty path, make up a path when we have an authority 74 | // or something following the path. The only time we allow an empty 75 | // output path is when there is nothing else. 76 | new_parsed->path = Component(output->length(), 1); 77 | output->push_back('/'); 78 | } else { 79 | // No path at all 80 | new_parsed->path.reset(); 81 | } 82 | 83 | // Query 84 | CanonicalizeQuery(source.query, parsed.query, query_converter, 85 | output, &new_parsed->query); 86 | 87 | // Ref: ignore failure for this, since the page can probably still be loaded. 88 | CanonicalizeRef(source.ref, parsed.ref, output, &new_parsed->ref); 89 | 90 | return success; 91 | } 92 | 93 | } // namespace 94 | 95 | 96 | // Returns the default port for the given canonical scheme, or PORT_UNSPECIFIED 97 | // if the scheme is unknown. 98 | int DefaultPortForScheme(const char* scheme, int scheme_len) { 99 | int default_port = PORT_UNSPECIFIED; 100 | switch (scheme_len) { 101 | case 4: 102 | if (!strncmp(scheme, kHttpScheme, scheme_len)) 103 | default_port = 80; 104 | break; 105 | case 5: 106 | if (!strncmp(scheme, kHttpsScheme, scheme_len)) 107 | default_port = 443; 108 | break; 109 | case 3: 110 | if (!strncmp(scheme, kFtpScheme, scheme_len)) 111 | default_port = 21; 112 | else if (!strncmp(scheme, kWssScheme, scheme_len)) 113 | default_port = 443; 114 | break; 115 | case 6: 116 | if (!strncmp(scheme, kGopherScheme, scheme_len)) 117 | default_port = 70; 118 | break; 119 | case 2: 120 | if (!strncmp(scheme, kWsScheme, scheme_len)) 121 | default_port = 80; 122 | break; 123 | } 124 | return default_port; 125 | } 126 | 127 | bool CanonicalizeStandardURL(const char* spec, 128 | int spec_len, 129 | const Parsed& parsed, 130 | CharsetConverter* query_converter, 131 | CanonOutput* output, 132 | Parsed* new_parsed) { 133 | return DoCanonicalizeStandardURL( 134 | URLComponentSource(spec), parsed, query_converter, 135 | output, new_parsed); 136 | } 137 | 138 | bool CanonicalizeStandardURL(const base::char16* spec, 139 | int spec_len, 140 | const Parsed& parsed, 141 | CharsetConverter* query_converter, 142 | CanonOutput* output, 143 | Parsed* new_parsed) { 144 | return DoCanonicalizeStandardURL( 145 | URLComponentSource(spec), parsed, query_converter, 146 | output, new_parsed); 147 | } 148 | 149 | // It might be nice in the future to optimize this so unchanged components don't 150 | // need to be recanonicalized. This is especially true since the common case for 151 | // ReplaceComponents is removing things we don't want, like reference fragments 152 | // and usernames. These cases can become more efficient if we can assume the 153 | // rest of the URL is OK with these removed (or only the modified parts 154 | // recanonicalized). This would be much more complex to implement, however. 155 | // 156 | // You would also need to update DoReplaceComponents in url_util.cc which 157 | // relies on this re-checking everything (see the comment there for why). 158 | bool ReplaceStandardURL(const char* base, 159 | const Parsed& base_parsed, 160 | const Replacements& replacements, 161 | CharsetConverter* query_converter, 162 | CanonOutput* output, 163 | Parsed* new_parsed) { 164 | URLComponentSource source(base); 165 | Parsed parsed(base_parsed); 166 | SetupOverrideComponents(base, replacements, &source, &parsed); 167 | return DoCanonicalizeStandardURL( 168 | source, parsed, query_converter, output, new_parsed); 169 | } 170 | 171 | // For 16-bit replacements, we turn all the replacements into UTF-8 so the 172 | // regular code path can be used. 173 | bool ReplaceStandardURL(const char* base, 174 | const Parsed& base_parsed, 175 | const Replacements& replacements, 176 | CharsetConverter* query_converter, 177 | CanonOutput* output, 178 | Parsed* new_parsed) { 179 | RawCanonOutput<1024> utf8; 180 | URLComponentSource source(base); 181 | Parsed parsed(base_parsed); 182 | SetupUTF16OverrideComponents(base, replacements, &utf8, &source, &parsed); 183 | return DoCanonicalizeStandardURL( 184 | source, parsed, query_converter, output, new_parsed); 185 | } 186 | 187 | } // namespace url 188 | -------------------------------------------------------------------------------- /vendor/gurl/url/url_constants.cc: -------------------------------------------------------------------------------- 1 | // Copyright 2014 The Chromium Authors. All rights reserved. 2 | // Use of this source code is governed by a BSD-style license that can be 3 | // found in the LICENSE file. 4 | 5 | #include "url/url_constants.h" 6 | 7 | namespace url { 8 | 9 | const char kAboutBlankURL[] = "about:blank"; 10 | 11 | const char kAboutScheme[] = "about"; 12 | const char kBlobScheme[] = "blob"; 13 | const char kContentScheme[] = "content"; 14 | const char kDataScheme[] = "data"; 15 | const char kFileScheme[] = "file"; 16 | const char kFileSystemScheme[] = "filesystem"; 17 | const char kFtpScheme[] = "ftp"; 18 | const char kGopherScheme[] = "gopher"; 19 | const char kHttpScheme[] = "http"; 20 | const char kHttpsScheme[] = "https"; 21 | const char kJavaScriptScheme[] = "javascript"; 22 | const char kMailToScheme[] = "mailto"; 23 | const char kWsScheme[] = "ws"; 24 | const char kWssScheme[] = "wss"; 25 | 26 | const char kStandardSchemeSeparator[] = "://"; 27 | 28 | } // namespace url 29 | -------------------------------------------------------------------------------- /vendor/gurl/url/url_constants.h: -------------------------------------------------------------------------------- 1 | // Copyright 2014 The Chromium Authors. All rights reserved. 2 | // Use of this source code is governed by a BSD-style license that can be 3 | // found in the LICENSE file. 4 | 5 | #ifndef URL_URL_CONSTANTS_H_ 6 | #define URL_URL_CONSTANTS_H_ 7 | 8 | #include "url/url_export.h" 9 | 10 | namespace url { 11 | 12 | URL_EXPORT extern const char kAboutBlankURL[]; 13 | 14 | URL_EXPORT extern const char kAboutScheme[]; 15 | URL_EXPORT extern const char kBlobScheme[]; 16 | // The content scheme is specific to Android for identifying a stored file. 17 | URL_EXPORT extern const char kContentScheme[]; 18 | URL_EXPORT extern const char kDataScheme[]; 19 | URL_EXPORT extern const char kFileScheme[]; 20 | URL_EXPORT extern const char kFileSystemScheme[]; 21 | URL_EXPORT extern const char kFtpScheme[]; 22 | URL_EXPORT extern const char kGopherScheme[]; 23 | URL_EXPORT extern const char kHttpScheme[]; 24 | URL_EXPORT extern const char kHttpsScheme[]; 25 | URL_EXPORT extern const char kJavaScriptScheme[]; 26 | URL_EXPORT extern const char kMailToScheme[]; 27 | URL_EXPORT extern const char kWsScheme[]; 28 | URL_EXPORT extern const char kWssScheme[]; 29 | 30 | // Used to separate a standard scheme and the hostname: "://". 31 | URL_EXPORT extern const char kStandardSchemeSeparator[]; 32 | 33 | } // namespace url 34 | 35 | #endif // URL_URL_CONSTANTS_H_ 36 | -------------------------------------------------------------------------------- /vendor/gurl/url/url_export.h: -------------------------------------------------------------------------------- 1 | // Copyright 2013 The Chromium Authors. All rights reserved. 2 | // Use of this source code is governed by a BSD-style license that can be 3 | // found in the LICENSE file. 4 | 5 | #ifndef URL_URL_EXPORT_H_ 6 | #define URL_URL_EXPORT_H_ 7 | 8 | #if defined(COMPONENT_BUILD) 9 | #if defined(WIN32) 10 | 11 | #if defined(URL_IMPLEMENTATION) 12 | #define URL_EXPORT __declspec(dllexport) 13 | #else 14 | #define URL_EXPORT __declspec(dllimport) 15 | #endif // defined(URL_IMPLEMENTATION) 16 | 17 | #else // !defined(WIN32) 18 | 19 | #if defined(URL_IMPLEMENTATION) 20 | #define URL_EXPORT __attribute__((visibility("default"))) 21 | #else 22 | #define URL_EXPORT 23 | #endif // defined(URL_IMPLEMENTATION) 24 | 25 | #endif // defined(WIN32) 26 | 27 | #else // !defined(COMPONENT_BUILD) 28 | 29 | #define URL_EXPORT 30 | 31 | #endif // define(COMPONENT_BUILD) 32 | 33 | #endif // URL_URL_EXPORT_H_ 34 | -------------------------------------------------------------------------------- /vendor/gurl/url/url_file.h: -------------------------------------------------------------------------------- 1 | // Copyright 2013 The Chromium Authors. All rights reserved. 2 | // Use of this source code is governed by a BSD-style license that can be 3 | // found in the LICENSE file. 4 | 5 | #ifndef URL_URL_FILE_H_ 6 | #define URL_URL_FILE_H_ 7 | 8 | // Provides shared functions used by the internals of the parser and 9 | // canonicalizer for file URLs. Do not use outside of these modules. 10 | 11 | #include "url/url_parse_internal.h" 12 | 13 | namespace url { 14 | 15 | #ifdef WIN32 16 | 17 | // We allow both "c:" and "c|" as drive identifiers. 18 | inline bool IsWindowsDriveSeparator(base::char16 ch) { 19 | return ch == ':' || ch == '|'; 20 | } 21 | inline bool IsWindowsDriveLetter(base::char16 ch) { 22 | return (ch >= 'A' && ch <= 'Z') || (ch >= 'a' && ch <= 'z'); 23 | } 24 | 25 | #endif // WIN32 26 | 27 | // Returns the index of the next slash in the input after the given index, or 28 | // spec_len if the end of the input is reached. 29 | template 30 | inline int FindNextSlash(const CHAR* spec, int begin_index, int spec_len) { 31 | int idx = begin_index; 32 | while (idx < spec_len && !IsURLSlash(spec[idx])) 33 | idx++; 34 | return idx; 35 | } 36 | 37 | #ifdef WIN32 38 | 39 | // Returns true if the start_offset in the given spec looks like it begins a 40 | // drive spec, for example "c:". This function explicitly handles start_offset 41 | // values that are equal to or larger than the spec_len to simplify callers. 42 | // 43 | // If this returns true, the spec is guaranteed to have a valid drive letter 44 | // plus a colon starting at |start_offset|. 45 | template 46 | inline bool DoesBeginWindowsDriveSpec(const CHAR* spec, int start_offset, 47 | int spec_len) { 48 | int remaining_len = spec_len - start_offset; 49 | if (remaining_len < 2) 50 | return false; // Not enough room. 51 | if (!IsWindowsDriveLetter(spec[start_offset])) 52 | return false; // Doesn't start with a valid drive letter. 53 | if (!IsWindowsDriveSeparator(spec[start_offset + 1])) 54 | return false; // Isn't followed with a drive separator. 55 | return true; 56 | } 57 | 58 | // Returns true if the start_offset in the given text looks like it begins a 59 | // UNC path, for example "\\". This function explicitly handles start_offset 60 | // values that are equal to or larger than the spec_len to simplify callers. 61 | // 62 | // When strict_slashes is set, this function will only accept backslashes as is 63 | // standard for Windows. Otherwise, it will accept forward slashes as well 64 | // which we use for a lot of URL handling. 65 | template 66 | inline bool DoesBeginUNCPath(const CHAR* text, 67 | int start_offset, 68 | int len, 69 | bool strict_slashes) { 70 | int remaining_len = len - start_offset; 71 | if (remaining_len < 2) 72 | return false; 73 | 74 | if (strict_slashes) 75 | return text[start_offset] == '\\' && text[start_offset + 1] == '\\'; 76 | return IsURLSlash(text[start_offset]) && IsURLSlash(text[start_offset + 1]); 77 | } 78 | 79 | #endif // WIN32 80 | 81 | } // namespace url 82 | 83 | #endif // URL_URL_FILE_H_ 84 | -------------------------------------------------------------------------------- /vendor/gurl/url/url_parse_file.cc: -------------------------------------------------------------------------------- 1 | // Copyright 2013 The Chromium Authors. All rights reserved. 2 | // Use of this source code is governed by a BSD-style license that can be 3 | // found in the LICENSE file. 4 | 5 | //#include "base/logging.h" 6 | #include "url/third_party/mozilla/url_parse.h" 7 | #include "url/url_file.h" 8 | #include "url/url_parse_internal.h" 9 | 10 | // Interesting IE file:isms... 11 | // 12 | // INPUT OUTPUT 13 | // ========================= ============================== 14 | // file:/foo/bar file:///foo/bar 15 | // The result here seems totally invalid!?!? This isn't UNC. 16 | // 17 | // file:/ 18 | // file:// or any other number of slashes 19 | // IE6 doesn't do anything at all if you click on this link. No error: 20 | // nothing. IE6's history system seems to always color this link, so I'm 21 | // guessing that it maps internally to the empty URL. 22 | // 23 | // C:\ file:///C:/ 24 | // When on a file: URL source page, this link will work. When over HTTP, 25 | // the file: URL will appear in the status bar but the link will not work 26 | // (security restriction for all file URLs). 27 | // 28 | // file:foo/ file:foo/ (invalid?!?!?) 29 | // file:/foo/ file:///foo/ (invalid?!?!?) 30 | // file://foo/ file://foo/ (UNC to server "foo") 31 | // file:///foo/ file:///foo/ (invalid, seems to be a file) 32 | // file:////foo/ file://foo/ (UNC to server "foo") 33 | // Any more than four slashes is also treated as UNC. 34 | // 35 | // file:C:/ file://C:/ 36 | // file:/C:/ file://C:/ 37 | // The number of slashes after "file:" don't matter if the thing following 38 | // it looks like an absolute drive path. Also, slashes and backslashes are 39 | // equally valid here. 40 | 41 | namespace url { 42 | 43 | namespace { 44 | 45 | // A subcomponent of DoInitFileURL, the input of this function should be a UNC 46 | // path name, with the index of the first character after the slashes following 47 | // the scheme given in |after_slashes|. This will initialize the host, path, 48 | // query, and ref, and leave the other output components untouched 49 | // (DoInitFileURL handles these for us). 50 | template 51 | void DoParseUNC(const CHAR* spec, 52 | int after_slashes, 53 | int spec_len, 54 | Parsed* parsed) { 55 | int next_slash = FindNextSlash(spec, after_slashes, spec_len); 56 | if (next_slash == spec_len) { 57 | // No additional slash found, as in "file://foo", treat the text as the 58 | // host with no path (this will end up being UNC to server "foo"). 59 | int host_len = spec_len - after_slashes; 60 | if (host_len) 61 | parsed->host = Component(after_slashes, host_len); 62 | else 63 | parsed->host.reset(); 64 | parsed->path.reset(); 65 | return; 66 | } 67 | 68 | #ifdef WIN32 69 | // See if we have something that looks like a path following the first 70 | // component. As in "file://localhost/c:/", we get "c:/" out. We want to 71 | // treat this as a having no host but the path given. Works on Windows only. 72 | if (DoesBeginWindowsDriveSpec(spec, next_slash + 1, spec_len)) { 73 | parsed->host.reset(); 74 | ParsePathInternal(spec, MakeRange(next_slash, spec_len), 75 | &parsed->path, &parsed->query, &parsed->ref); 76 | return; 77 | } 78 | #endif 79 | 80 | // Otherwise, everything up until that first slash we found is the host name, 81 | // which will end up being the UNC host. For example "file://foo/bar.txt" 82 | // will get a server name of "foo" and a path of "/bar". Later, on Windows, 83 | // this should be treated as the filename "\\foo\bar.txt" in proper UNC 84 | // notation. 85 | int host_len = next_slash - after_slashes; 86 | if (host_len) 87 | parsed->host = MakeRange(after_slashes, next_slash); 88 | else 89 | parsed->host.reset(); 90 | if (next_slash < spec_len) { 91 | ParsePathInternal(spec, MakeRange(next_slash, spec_len), 92 | &parsed->path, &parsed->query, &parsed->ref); 93 | } else { 94 | parsed->path.reset(); 95 | } 96 | } 97 | 98 | // A subcomponent of DoParseFileURL, the input should be a local file, with the 99 | // beginning of the path indicated by the index in |path_begin|. This will 100 | // initialize the host, path, query, and ref, and leave the other output 101 | // components untouched (DoInitFileURL handles these for us). 102 | template 103 | void DoParseLocalFile(const CHAR* spec, 104 | int path_begin, 105 | int spec_len, 106 | Parsed* parsed) { 107 | parsed->host.reset(); 108 | ParsePathInternal(spec, MakeRange(path_begin, spec_len), 109 | &parsed->path, &parsed->query, &parsed->ref); 110 | } 111 | 112 | // Backend for the external functions that operates on either char type. 113 | // Handles cases where there is a scheme, but also when handed the first 114 | // character following the "file:" at the beginning of the spec. If so, 115 | // this is usually a slash, but needn't be; we allow paths like "file:c:\foo". 116 | template 117 | void DoParseFileURL(const CHAR* spec, int spec_len, Parsed* parsed) { 118 | //DCHECK(spec_len >= 0); 119 | 120 | // Get the parts we never use for file URLs out of the way. 121 | parsed->username.reset(); 122 | parsed->password.reset(); 123 | parsed->port.reset(); 124 | 125 | // Many of the code paths don't set these, so it's convenient to just clear 126 | // them. We'll write them in those cases we need them. 127 | parsed->query.reset(); 128 | parsed->ref.reset(); 129 | 130 | // Strip leading & trailing spaces and control characters. 131 | int begin = 0; 132 | TrimURL(spec, &begin, &spec_len); 133 | 134 | // Find the scheme, if any. 135 | int num_slashes = CountConsecutiveSlashes(spec, begin, spec_len); 136 | int after_scheme; 137 | int after_slashes; 138 | #ifdef WIN32 139 | // See how many slashes there are. We want to handle cases like UNC but also 140 | // "/c:/foo". This is when there is no scheme, so we can allow pages to do 141 | // links like "c:/foo/bar" or "//foo/bar". This is also called by the 142 | // relative URL resolver when it determines there is an absolute URL, which 143 | // may give us input like "/c:/foo". 144 | after_slashes = begin + num_slashes; 145 | if (DoesBeginWindowsDriveSpec(spec, after_slashes, spec_len)) { 146 | // Windows path, don't try to extract the scheme (for example, "c:\foo"). 147 | parsed->scheme.reset(); 148 | after_scheme = after_slashes; 149 | } else if (DoesBeginUNCPath(spec, begin, spec_len, false)) { 150 | // Windows UNC path: don't try to extract the scheme, but keep the slashes. 151 | parsed->scheme.reset(); 152 | after_scheme = begin; 153 | } else 154 | #endif 155 | { 156 | // ExtractScheme doesn't understand the possibility of filenames with 157 | // colons in them, in which case it returns the entire spec up to the 158 | // colon as the scheme. So handle /foo.c:5 as a file but foo.c:5 as 159 | // the foo.c: scheme. 160 | if (!num_slashes && 161 | ExtractScheme(&spec[begin], spec_len - begin, &parsed->scheme)) { 162 | // Offset the results since we gave ExtractScheme a substring. 163 | parsed->scheme.begin += begin; 164 | after_scheme = parsed->scheme.end() + 1; 165 | } else { 166 | // No scheme found, remember that. 167 | parsed->scheme.reset(); 168 | after_scheme = begin; 169 | } 170 | } 171 | 172 | // Handle empty specs ones that contain only whitespace or control chars, 173 | // or that are just the scheme (for example "file:"). 174 | if (after_scheme == spec_len) { 175 | parsed->host.reset(); 176 | parsed->path.reset(); 177 | return; 178 | } 179 | 180 | num_slashes = CountConsecutiveSlashes(spec, after_scheme, spec_len); 181 | after_slashes = after_scheme + num_slashes; 182 | #ifdef WIN32 183 | // Check whether the input is a drive again. We checked above for windows 184 | // drive specs, but that's only at the very beginning to see if we have a 185 | // scheme at all. This test will be duplicated in that case, but will 186 | // additionally handle all cases with a real scheme such as "file:///C:/". 187 | if (!DoesBeginWindowsDriveSpec(spec, after_slashes, spec_len) && 188 | num_slashes != 3) { 189 | // Anything not beginning with a drive spec ("c:\") on Windows is treated 190 | // as UNC, with the exception of three slashes which always means a file. 191 | // Even IE7 treats file:///foo/bar as "/foo/bar", which then fails. 192 | DoParseUNC(spec, after_slashes, spec_len, parsed); 193 | return; 194 | } 195 | #else 196 | // file: URL with exactly 2 slashes is considered to have a host component. 197 | if (num_slashes == 2) { 198 | DoParseUNC(spec, after_slashes, spec_len, parsed); 199 | return; 200 | } 201 | #endif // WIN32 202 | 203 | // Easy and common case, the full path immediately follows the scheme 204 | // (modulo slashes), as in "file://c:/foo". Just treat everything from 205 | // there to the end as the path. Empty hosts have 0 length instead of -1. 206 | // We include the last slash as part of the path if there is one. 207 | DoParseLocalFile(spec, 208 | num_slashes > 0 ? after_scheme + num_slashes - 1 : after_scheme, 209 | spec_len, parsed); 210 | } 211 | 212 | } // namespace 213 | 214 | void ParseFileURL(const char* url, int url_len, Parsed* parsed) { 215 | DoParseFileURL(url, url_len, parsed); 216 | } 217 | 218 | void ParseFileURL(const base::char16* url, int url_len, Parsed* parsed) { 219 | DoParseFileURL(url, url_len, parsed); 220 | } 221 | 222 | } // namespace url 223 | -------------------------------------------------------------------------------- /vendor/gurl/url/url_parse_internal.h: -------------------------------------------------------------------------------- 1 | // Copyright 2013 The Chromium Authors. All rights reserved. 2 | // Use of this source code is governed by a BSD-style license that can be 3 | // found in the LICENSE file. 4 | 5 | #ifndef URL_URL_PARSE_INTERNAL_H_ 6 | #define URL_URL_PARSE_INTERNAL_H_ 7 | 8 | // Contains common inline helper functions used by the URL parsing routines. 9 | 10 | #include "url/third_party/mozilla/url_parse.h" 11 | 12 | namespace url { 13 | 14 | // We treat slashes and backslashes the same for IE compatibility. 15 | inline bool IsURLSlash(base::char16 ch) { 16 | return ch == '/' || ch == '\\'; 17 | } 18 | 19 | // Returns true if we should trim this character from the URL because it is a 20 | // space or a control character. 21 | inline bool ShouldTrimFromURL(base::char16 ch) { 22 | return ch <= ' '; 23 | } 24 | 25 | // Given an already-initialized begin index and length, this shrinks the range 26 | // to eliminate "should-be-trimmed" characters. Note that the length does *not* 27 | // indicate the length of untrimmed data from |*begin|, but rather the position 28 | // in the input string (so the string starts at character |*begin| in the spec, 29 | // and goes until |*len|). 30 | template 31 | inline void TrimURL(const CHAR* spec, int* begin, int* len, 32 | bool trim_path_end = true) { 33 | // Strip leading whitespace and control characters. 34 | while (*begin < *len && ShouldTrimFromURL(spec[*begin])) 35 | (*begin)++; 36 | 37 | if (trim_path_end) { 38 | // Strip trailing whitespace and control characters. We need the >i test 39 | // for when the input string is all blanks; we don't want to back past the 40 | // input. 41 | while (*len > *begin && ShouldTrimFromURL(spec[*len - 1])) 42 | (*len)--; 43 | } 44 | } 45 | 46 | // Counts the number of consecutive slashes starting at the given offset 47 | // in the given string of the given length. 48 | template 49 | inline int CountConsecutiveSlashes(const CHAR *str, 50 | int begin_offset, int str_len) { 51 | int count = 0; 52 | while (begin_offset + count < str_len && 53 | IsURLSlash(str[begin_offset + count])) 54 | ++count; 55 | return count; 56 | } 57 | 58 | // Internal functions in url_parse.cc that parse the path, that is, everything 59 | // following the authority section. The input is the range of everything 60 | // following the authority section, and the output is the identified ranges. 61 | // 62 | // This is designed for the file URL parser or other consumers who may do 63 | // special stuff at the beginning, but want regular path parsing, it just 64 | // maps to the internal parsing function for paths. 65 | void ParsePathInternal(const char* spec, 66 | const Component& path, 67 | Component* filepath, 68 | Component* query, 69 | Component* ref); 70 | void ParsePathInternal(const base::char16* spec, 71 | const Component& path, 72 | Component* filepath, 73 | Component* query, 74 | Component* ref); 75 | 76 | 77 | // Given a spec and a pointer to the character after the colon following the 78 | // scheme, this parses it and fills in the structure, Every item in the parsed 79 | // structure is filled EXCEPT for the scheme, which is untouched. 80 | void ParseAfterScheme(const char* spec, 81 | int spec_len, 82 | int after_scheme, 83 | Parsed* parsed); 84 | void ParseAfterScheme(const base::char16* spec, 85 | int spec_len, 86 | int after_scheme, 87 | Parsed* parsed); 88 | 89 | } // namespace url 90 | 91 | #endif // URL_URL_PARSE_INTERNAL_H_ 92 | -------------------------------------------------------------------------------- /vendor/gurl/url/url_util.h: -------------------------------------------------------------------------------- 1 | // Copyright 2013 The Chromium Authors. All rights reserved. 2 | // Use of this source code is governed by a BSD-style license that can be 3 | // found in the LICENSE file. 4 | 5 | #ifndef URL_URL_UTIL_H_ 6 | #define URL_URL_UTIL_H_ 7 | 8 | #include 9 | 10 | #include "base/strings/string16.h" 11 | #include "url/third_party/mozilla/url_parse.h" 12 | #include "url/url_canon.h" 13 | #include "url/url_constants.h" 14 | #include "url/url_export.h" 15 | 16 | namespace url { 17 | 18 | // Init ------------------------------------------------------------------------ 19 | 20 | // Initialization is NOT required, it will be implicitly initialized when first 21 | // used. However, this implicit initialization is NOT threadsafe. If you are 22 | // using this library in a threaded environment and don't have a consistent 23 | // "first call" (an example might be calling AddStandardScheme with your special 24 | // application-specific schemes) then you will want to call initialize before 25 | // spawning any threads. 26 | // 27 | // It is OK to call this function more than once, subsequent calls will be 28 | // no-ops, unless Shutdown was called in the mean time. This will also be a 29 | // no-op if other calls to the library have forced an initialization beforehand. 30 | URL_EXPORT void Initialize(); 31 | 32 | // Cleanup is not required, except some strings may leak. For most user 33 | // applications, this is fine. If you're using it in a library that may get 34 | // loaded and unloaded, you'll want to unload to properly clean up your 35 | // library. 36 | URL_EXPORT void Shutdown(); 37 | 38 | // Schemes -------------------------------------------------------------------- 39 | 40 | // Types of a scheme representing the requirements on the data represented by 41 | // the authority component of a URL with the scheme. 42 | enum SchemeType { 43 | // The authority component of a URL with the scheme, if any, has the port 44 | // (the default values may be omitted in a serialization). 45 | SCHEME_WITH_PORT, 46 | // The authority component of a URL with the scheme, if any, doesn't have a 47 | // port. 48 | SCHEME_WITHOUT_PORT, 49 | // A URL with the scheme doesn't have the authority component. 50 | SCHEME_WITHOUT_AUTHORITY, 51 | }; 52 | 53 | // A pair for representing a standard scheme name and the SchemeType for it. 54 | struct URL_EXPORT SchemeWithType { 55 | const char* scheme; 56 | SchemeType type; 57 | }; 58 | 59 | // Adds an application-defined scheme to the internal list of "standard-format" 60 | // URL schemes. A standard-format scheme adheres to what RFC 3986 calls "generic 61 | // URI syntax" (https://tools.ietf.org/html/rfc3986#section-3). 62 | // 63 | // This function is not threadsafe and can not be called concurrently with any 64 | // other url_util function. It will assert if the list of standard schemes has 65 | // been locked (see LockStandardSchemes). 66 | URL_EXPORT void AddStandardScheme(const char* new_scheme, 67 | SchemeType scheme_type); 68 | 69 | // Sets a flag to prevent future calls to AddStandardScheme from succeeding. 70 | // 71 | // This is designed to help prevent errors for multithreaded applications. 72 | // Normal usage would be to call AddStandardScheme for your custom schemes at 73 | // the beginning of program initialization, and then LockStandardSchemes. This 74 | // prevents future callers from mistakenly calling AddStandardScheme when the 75 | // program is running with multiple threads, where such usage would be 76 | // dangerous. 77 | // 78 | // We could have had AddStandardScheme use a lock instead, but that would add 79 | // some platform-specific dependencies we don't otherwise have now, and is 80 | // overkill considering the normal usage is so simple. 81 | URL_EXPORT void LockStandardSchemes(); 82 | 83 | // Locates the scheme in the given string and places it into |found_scheme|, 84 | // which may be NULL to indicate the caller does not care about the range. 85 | // 86 | // Returns whether the given |compare| scheme matches the scheme found in the 87 | // input (if any). The |compare| scheme must be a valid canonical scheme or 88 | // the result of the comparison is undefined. 89 | URL_EXPORT bool FindAndCompareScheme(const char* str, 90 | int str_len, 91 | const char* compare, 92 | Component* found_scheme); 93 | URL_EXPORT bool FindAndCompareScheme(const base::char16* str, 94 | int str_len, 95 | const char* compare, 96 | Component* found_scheme); 97 | inline bool FindAndCompareScheme(const std::string& str, 98 | const char* compare, 99 | Component* found_scheme) { 100 | return FindAndCompareScheme(str.data(), static_cast(str.size()), 101 | compare, found_scheme); 102 | } 103 | inline bool FindAndCompareScheme(const base::string16& str, 104 | const char* compare, 105 | Component* found_scheme) { 106 | return FindAndCompareScheme(str.data(), static_cast(str.size()), 107 | compare, found_scheme); 108 | } 109 | 110 | // Returns true if the given scheme identified by |scheme| within |spec| is in 111 | // the list of known standard-format schemes (see AddStandardScheme). 112 | URL_EXPORT bool IsStandard(const char* spec, const Component& scheme); 113 | URL_EXPORT bool IsStandard(const base::char16* spec, const Component& scheme); 114 | 115 | // Returns true and sets |type| to the SchemeType of the given scheme 116 | // identified by |scheme| within |spec| if the scheme is in the list of known 117 | // standard-format schemes (see AddStandardScheme). 118 | URL_EXPORT bool GetStandardSchemeType(const char* spec, 119 | const Component& scheme, 120 | SchemeType* type); 121 | 122 | // URL library wrappers ------------------------------------------------------- 123 | 124 | // Parses the given spec according to the extracted scheme type. Normal users 125 | // should use the URL object, although this may be useful if performance is 126 | // critical and you don't want to do the heap allocation for the std::string. 127 | // 128 | // As with the Canonicalize* functions, the charset converter can 129 | // be NULL to use UTF-8 (it will be faster in this case). 130 | // 131 | // Returns true if a valid URL was produced, false if not. On failure, the 132 | // output and parsed structures will still be filled and will be consistent, 133 | // but they will not represent a loadable URL. 134 | URL_EXPORT bool Canonicalize(const char* spec, 135 | int spec_len, 136 | bool trim_path_end, 137 | CharsetConverter* charset_converter, 138 | CanonOutput* output, 139 | Parsed* output_parsed); 140 | URL_EXPORT bool Canonicalize(const base::char16* spec, 141 | int spec_len, 142 | bool trim_path_end, 143 | CharsetConverter* charset_converter, 144 | CanonOutput* output, 145 | Parsed* output_parsed); 146 | 147 | // Resolves a potentially relative URL relative to the given parsed base URL. 148 | // The base MUST be valid. The resulting canonical URL and parsed information 149 | // will be placed in to the given out variables. 150 | // 151 | // The relative need not be relative. If we discover that it's absolute, this 152 | // will produce a canonical version of that URL. See Canonicalize() for more 153 | // about the charset_converter. 154 | // 155 | // Returns true if the output is valid, false if the input could not produce 156 | // a valid URL. 157 | URL_EXPORT bool ResolveRelative(const char* base_spec, 158 | int base_spec_len, 159 | const Parsed& base_parsed, 160 | const char* relative, 161 | int relative_length, 162 | CharsetConverter* charset_converter, 163 | CanonOutput* output, 164 | Parsed* output_parsed); 165 | URL_EXPORT bool ResolveRelative(const char* base_spec, 166 | int base_spec_len, 167 | const Parsed& base_parsed, 168 | const base::char16* relative, 169 | int relative_length, 170 | CharsetConverter* charset_converter, 171 | CanonOutput* output, 172 | Parsed* output_parsed); 173 | 174 | // Replaces components in the given VALID input URL. The new canonical URL info 175 | // is written to output and out_parsed. 176 | // 177 | // Returns true if the resulting URL is valid. 178 | URL_EXPORT bool ReplaceComponents(const char* spec, 179 | int spec_len, 180 | const Parsed& parsed, 181 | const Replacements& replacements, 182 | CharsetConverter* charset_converter, 183 | CanonOutput* output, 184 | Parsed* out_parsed); 185 | URL_EXPORT bool ReplaceComponents( 186 | const char* spec, 187 | int spec_len, 188 | const Parsed& parsed, 189 | const Replacements& replacements, 190 | CharsetConverter* charset_converter, 191 | CanonOutput* output, 192 | Parsed* out_parsed); 193 | 194 | // String helper functions ---------------------------------------------------- 195 | 196 | // Unescapes the given string using URL escaping rules. 197 | URL_EXPORT void DecodeURLEscapeSequences(const char* input, 198 | int length, 199 | CanonOutputW* output); 200 | 201 | // Escapes the given string as defined by the JS method encodeURIComponent. See 202 | // https://developer.mozilla.org/en/JavaScript/Reference/Global_Objects/encodeURIComponent 203 | URL_EXPORT void EncodeURIComponent(const char* input, 204 | int length, 205 | CanonOutput* output); 206 | 207 | } // namespace url 208 | 209 | #endif // URL_URL_UTIL_H_ 210 | -------------------------------------------------------------------------------- /vendor/gurl/url/url_util_internal.h: -------------------------------------------------------------------------------- 1 | // Copyright 2013 The Chromium Authors. All rights reserved. 2 | // Use of this source code is governed by a BSD-style license that can be 3 | // found in the LICENSE file. 4 | 5 | #ifndef URL_URL_UTIL_INTERNAL_H_ 6 | #define URL_URL_UTIL_INTERNAL_H_ 7 | 8 | #include 9 | 10 | #include "base/strings/string16.h" 11 | #include "url/third_party/mozilla/url_parse.h" 12 | 13 | namespace url { 14 | 15 | // Given a string and a range inside the string, compares it to the given 16 | // lower-case |compare_to| buffer. 17 | bool CompareSchemeComponent(const char* spec, 18 | const Component& component, 19 | const char* compare_to); 20 | bool CompareSchemeComponent(const base::char16* spec, 21 | const Component& component, 22 | const char* compare_to); 23 | 24 | } // namespace url 25 | 26 | #endif // URL_URL_UTIL_INTERNAL_H_ 27 | --------------------------------------------------------------------------------