├── .gitignore
├── Dockerfile
├── LICENSE
├── MANIFEST.in
├── Makefile
├── README.md
├── benchmarks
    └── urls.py
├── requirements.txt
├── setup.py
├── tests
    ├── test_urlparse.py
    ├── test_urlparse4.py
    └── urls
    │   ├── blink-performancetests.txt
    │   └── seeds_es_dmoz.txt
├── urlparse4
    ├── __init__.py
    ├── cgurl.cpp
    ├── cgurl.pyx
    ├── chromium_gurl.pxd
    └── mozilla_url_parse.pxd
└── vendor
    └── gurl
        ├── base
            ├── base_export.h
            ├── basictypes.h
            ├── compiler_specific.h
            ├── macros.h
            ├── memory
            │   └── scoped_ptr.h
            ├── move.h
            ├── strings
            │   ├── cscope.out
            │   ├── string16.cc
            │   ├── string16.h
            │   ├── string_piece.cc
            │   ├── string_piece.h
            │   ├── string_util.cc
            │   ├── string_util.h
            │   ├── utf_string_conversion_utils.cc
            │   ├── utf_string_conversion_utils.h
            │   ├── utf_string_conversions.cc
            │   └── utf_string_conversions.h
            ├── template_util.h
            └── third_party
            │   └── icu
            │       ├── LICENSE
            │       ├── README.chromium
            │       ├── icu_utf.cc
            │       └── icu_utf.h
        ├── build
            └── build_config.h
        └── url
            ├── gurl.cc
            ├── gurl.h
            ├── third_party
                └── mozilla
                │   ├── LICENSE.txt
                │   ├── README.chromium
                │   ├── a.out
                │   ├── url_parse.cc
                │   └── url_parse.h
            ├── url_canon.h
            ├── url_canon_etc.cc
            ├── url_canon_filesystemurl.cc
            ├── url_canon_fileurl.cc
            ├── url_canon_host.cc
            ├── url_canon_internal.cc
            ├── url_canon_internal.h
            ├── url_canon_ip.cc
            ├── url_canon_ip.h
            ├── url_canon_mailtourl.cc
            ├── url_canon_path.cc
            ├── url_canon_pathurl.cc
            ├── url_canon_query.cc
            ├── url_canon_relative.cc
            ├── url_canon_stdstring.cc
            ├── url_canon_stdstring.h
            ├── url_canon_stdurl.cc
            ├── url_constants.cc
            ├── url_constants.h
            ├── url_export.h
            ├── url_file.h
            ├── url_parse_file.cc
            ├── url_parse_internal.h
            ├── url_util.cc
            ├── url_util.h
            └── url_util_internal.h


/.gitignore:
--------------------------------------------------------------------------------
 1 | *.pyc
 2 | /venv
 3 | /src
 4 | /build
 5 | *.so
 6 | /.cache
 7 | /tmp
 8 | /MANIFEST
 9 | /urlparse4/*.html
10 | /dist


--------------------------------------------------------------------------------
/Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM debian:jessie
 2 | 
 3 | RUN apt-get update && apt-get upgrade -y && apt-get install -y --no-install-recommends \
 4 | 	curl \
 5 | 	automake \
 6 | 	gcc \
 7 | 	g++ \
 8 | 	make \
 9 | 	libtool \
10 | 	ca-certificates \
11 | 	python-pip \
12 | 	python-dev \
13 | 	python-numpy \
14 | 	bzip2 \
15 | 	git \
16 | 	pkg-config \
17 | 	liburiparser-dev \
18 | 	vim
19 | 
20 | RUN mkdir -p /cosr/urlparse4
21 | 
22 | # Upgrade pip
23 | RUN pip install --upgrade --ignore-installed pip
24 | 
25 | ADD requirements.txt /requirements.txt
26 | 
27 | # Install Cython first to be able to install other dependencies from git
28 | RUN grep -i "^Cython\=" /requirements.txt | xargs -n1 pip install
29 | 
30 | RUN pip install -r requirements.txt
31 | 
32 | RUN cd /tmp && \
33 | 	git clone --recursive https://github.com/mitghi/cyuri && \
34 | 	cd ./cyuri/liburi && \
35 | 	autoreconf -i && \
36 | 	./configure --prefix=/usr/local && \
37 | 	make && \
38 | 	make install && \
39 | 	cd .. && \
40 | 	CPPFLAGS=-I/usr/local/include/liburi make && \
41 | 	cp cyuri.so /usr/lib/python2.7/ && \
42 | 	ldconfig
43 | 
44 | ADD Makefile /Makefile
45 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
  1 |                                  Apache License
  2 |                            Version 2.0, January 2004
  3 |                         http://www.apache.org/licenses/
  4 | 
  5 |    TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
  6 | 
  7 |    1. Definitions.
  8 | 
  9 |       "License" shall mean the terms and conditions for use, reproduction,
 10 |       and distribution as defined by Sections 1 through 9 of this document.
 11 | 
 12 |       "Licensor" shall mean the copyright owner or entity authorized by
 13 |       the copyright owner that is granting the License.
 14 | 
 15 |       "Legal Entity" shall mean the union of the acting entity and all
 16 |       other entities that control, are controlled by, or are under common
 17 |       control with that entity. For the purposes of this definition,
 18 |       "control" means (i) the power, direct or indirect, to cause the
 19 |       direction or management of such entity, whether by contract or
 20 |       otherwise, or (ii) ownership of fifty percent (50%) or more of the
 21 |       outstanding shares, or (iii) beneficial ownership of such entity.
 22 | 
 23 |       "You" (or "Your") shall mean an individual or Legal Entity
 24 |       exercising permissions granted by this License.
 25 | 
 26 |       "Source" form shall mean the preferred form for making modifications,
 27 |       including but not limited to software source code, documentation
 28 |       source, and configuration files.
 29 | 
 30 |       "Object" form shall mean any form resulting from mechanical
 31 |       transformation or translation of a Source form, including but
 32 |       not limited to compiled object code, generated documentation,
 33 |       and conversions to other media types.
 34 | 
 35 |       "Work" shall mean the work of authorship, whether in Source or
 36 |       Object form, made available under the License, as indicated by a
 37 |       copyright notice that is included in or attached to the work
 38 |       (an example is provided in the Appendix below).
 39 | 
 40 |       "Derivative Works" shall mean any work, whether in Source or Object
 41 |       form, that is based on (or derived from) the Work and for which the
 42 |       editorial revisions, annotations, elaborations, or other modifications
 43 |       represent, as a whole, an original work of authorship. For the purposes
 44 |       of this License, Derivative Works shall not include works that remain
 45 |       separable from, or merely link (or bind by name) to the interfaces of,
 46 |       the Work and Derivative Works thereof.
 47 | 
 48 |       "Contribution" shall mean any work of authorship, including
 49 |       the original version of the Work and any modifications or additions
 50 |       to that Work or Derivative Works thereof, that is intentionally
 51 |       submitted to Licensor for inclusion in the Work by the copyright owner
 52 |       or by an individual or Legal Entity authorized to submit on behalf of
 53 |       the copyright owner. For the purposes of this definition, "submitted"
 54 |       means any form of electronic, verbal, or written communication sent
 55 |       to the Licensor or its representatives, including but not limited to
 56 |       communication on electronic mailing lists, source code control systems,
 57 |       and issue tracking systems that are managed by, or on behalf of, the
 58 |       Licensor for the purpose of discussing and improving the Work, but
 59 |       excluding communication that is conspicuously marked or otherwise
 60 |       designated in writing by the copyright owner as "Not a Contribution."
 61 | 
 62 |       "Contributor" shall mean Licensor and any individual or Legal Entity
 63 |       on behalf of whom a Contribution has been received by Licensor and
 64 |       subsequently incorporated within the Work.
 65 | 
 66 |    2. Grant of Copyright License. Subject to the terms and conditions of
 67 |       this License, each Contributor hereby grants to You a perpetual,
 68 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 69 |       copyright license to reproduce, prepare Derivative Works of,
 70 |       publicly display, publicly perform, sublicense, and distribute the
 71 |       Work and such Derivative Works in Source or Object form.
 72 | 
 73 |    3. Grant of Patent License. Subject to the terms and conditions of
 74 |       this License, each Contributor hereby grants to You a perpetual,
 75 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 76 |       (except as stated in this section) patent license to make, have made,
 77 |       use, offer to sell, sell, import, and otherwise transfer the Work,
 78 |       where such license applies only to those patent claims licensable
 79 |       by such Contributor that are necessarily infringed by their
 80 |       Contribution(s) alone or by combination of their Contribution(s)
 81 |       with the Work to which such Contribution(s) was submitted. If You
 82 |       institute patent litigation against any entity (including a
 83 |       cross-claim or counterclaim in a lawsuit) alleging that the Work
 84 |       or a Contribution incorporated within the Work constitutes direct
 85 |       or contributory patent infringement, then any patent licenses
 86 |       granted to You under this License for that Work shall terminate
 87 |       as of the date such litigation is filed.
 88 | 
 89 |    4. Redistribution. You may reproduce and distribute copies of the
 90 |       Work or Derivative Works thereof in any medium, with or without
 91 |       modifications, and in Source or Object form, provided that You
 92 |       meet the following conditions:
 93 | 
 94 |       (a) You must give any other recipients of the Work or
 95 |           Derivative Works a copy of this License; and
 96 | 
 97 |       (b) You must cause any modified files to carry prominent notices
 98 |           stating that You changed the files; and
 99 | 
100 |       (c) You must retain, in the Source form of any Derivative Works
101 |           that You distribute, all copyright, patent, trademark, and
102 |           attribution notices from the Source form of the Work,
103 |           excluding those notices that do not pertain to any part of
104 |           the Derivative Works; and
105 | 
106 |       (d) If the Work includes a "NOTICE" text file as part of its
107 |           distribution, then any Derivative Works that You distribute must
108 |           include a readable copy of the attribution notices contained
109 |           within such NOTICE file, excluding those notices that do not
110 |           pertain to any part of the Derivative Works, in at least one
111 |           of the following places: within a NOTICE text file distributed
112 |           as part of the Derivative Works; within the Source form or
113 |           documentation, if provided along with the Derivative Works; or,
114 |           within a display generated by the Derivative Works, if and
115 |           wherever such third-party notices normally appear. The contents
116 |           of the NOTICE file are for informational purposes only and
117 |           do not modify the License. You may add Your own attribution
118 |           notices within Derivative Works that You distribute, alongside
119 |           or as an addendum to the NOTICE text from the Work, provided
120 |           that such additional attribution notices cannot be construed
121 |           as modifying the License.
122 | 
123 |       You may add Your own copyright statement to Your modifications and
124 |       may provide additional or different license terms and conditions
125 |       for use, reproduction, or distribution of Your modifications, or
126 |       for any such Derivative Works as a whole, provided Your use,
127 |       reproduction, and distribution of the Work otherwise complies with
128 |       the conditions stated in this License.
129 | 
130 |    5. Submission of Contributions. Unless You explicitly state otherwise,
131 |       any Contribution intentionally submitted for inclusion in the Work
132 |       by You to the Licensor shall be under the terms and conditions of
133 |       this License, without any additional terms or conditions.
134 |       Notwithstanding the above, nothing herein shall supersede or modify
135 |       the terms of any separate license agreement you may have executed
136 |       with Licensor regarding such Contributions.
137 | 
138 |    6. Trademarks. This License does not grant permission to use the trade
139 |       names, trademarks, service marks, or product names of the Licensor,
140 |       except as required for reasonable and customary use in describing the
141 |       origin of the Work and reproducing the content of the NOTICE file.
142 | 
143 |    7. Disclaimer of Warranty. Unless required by applicable law or
144 |       agreed to in writing, Licensor provides the Work (and each
145 |       Contributor provides its Contributions) on an "AS IS" BASIS,
146 |       WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147 |       implied, including, without limitation, any warranties or conditions
148 |       of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149 |       PARTICULAR PURPOSE. You are solely responsible for determining the
150 |       appropriateness of using or redistributing the Work and assume any
151 |       risks associated with Your exercise of permissions under this License.
152 | 
153 |    8. Limitation of Liability. In no event and under no legal theory,
154 |       whether in tort (including negligence), contract, or otherwise,
155 |       unless required by applicable law (such as deliberate and grossly
156 |       negligent acts) or agreed to in writing, shall any Contributor be
157 |       liable to You for damages, including any direct, indirect, special,
158 |       incidental, or consequential damages of any character arising as a
159 |       result of this License or out of the use or inability to use the
160 |       Work (including but not limited to damages for loss of goodwill,
161 |       work stoppage, computer failure or malfunction, or any and all
162 |       other commercial damages or losses), even if such Contributor
163 |       has been advised of the possibility of such damages.
164 | 
165 |    9. Accepting Warranty or Additional Liability. While redistributing
166 |       the Work or Derivative Works thereof, You may choose to offer,
167 |       and charge a fee for, acceptance of support, warranty, indemnity,
168 |       or other liability obligations and/or rights consistent with this
169 |       License. However, in accepting such obligations, You may act only
170 |       on Your own behalf and on Your sole responsibility, not on behalf
171 |       of any other Contributor, and only if You agree to indemnify,
172 |       defend, and hold each Contributor harmless for any liability
173 |       incurred by, or claims asserted against, such Contributor by reason
174 |       of your accepting any such warranty or additional liability.
175 | 
176 |    END OF TERMS AND CONDITIONS
177 | 
178 |    APPENDIX: How to apply the Apache License to your work.
179 | 
180 |       To apply the Apache License to your work, attach the following
181 |       boilerplate notice, with the fields enclosed by brackets "{}"
182 |       replaced with your own identifying information. (Don't include
183 |       the brackets!)  The text should be enclosed in the appropriate
184 |       comment syntax for the file format. We also recommend that a
185 |       file or class name and description of purpose be included on the
186 |       same "printed page" as the copyright notice for easier
187 |       identification within third-party archives.
188 | 
189 |    Copyright 2016 Common Search contributors
190 | 
191 |    Licensed under the Apache License, Version 2.0 (the "License");
192 |    you may not use this file except in compliance with the License.
193 |    You may obtain a copy of the License at
194 | 
195 |        http://www.apache.org/licenses/LICENSE-2.0
196 | 
197 |    Unless required by applicable law or agreed to in writing, software
198 |    distributed under the License is distributed on an "AS IS" BASIS,
199 |    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200 |    See the License for the specific language governing permissions and
201 |    limitations under the License.


--------------------------------------------------------------------------------
/MANIFEST.in:
--------------------------------------------------------------------------------
1 | recursive-include vendor *.cpp *.h *.cc
2 | recursive-include urlparse4 *.pxd *.pyx *.cpp
3 | include README.md
4 | include LICENSE


--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
 1 | clean:
 2 | 	rm -rf *.so urlparse4/*.so build urlparse4/*.c urlparse4/*.cpp urlparse4/*.html dist .cache tests/__pycache__ *.rst
 3 | 
 4 | benchmark:
 5 | 	python benchmarks/urls.py
 6 | 
 7 | test:
 8 | 	py.test tests/ -v
 9 | 
10 | docker_build:
11 | 	docker build -t commonsearch/urlparse4 .
12 | 
13 | docker_shell:
14 | 	docker run -v "$(PWD):/cosr/urlparse4:rw" -w /cosr/urlparse4 -i -t commonsearch/urlparse4 bash
15 | 
16 | docker_test:
17 | 	docker run -v "$(PWD):/cosr/urlparse4:rw" -w /cosr/urlparse4 -i -t commonsearch/urlparse4 make test
18 | 
19 | docker_benchmark:
20 | 	docker run -v "$(PWD):/cosr/urlparse4:rw" -w /cosr/urlparse4 -i -t commonsearch/urlparse4 make benchmark
21 | 
22 | build_ext:
23 | 	python setup.py build_ext --inplace
24 | 
25 | sdist:
26 | 	python setup.py sdist
27 | 
28 | pypi: clean build_ext
29 | 	pip install pypandoc
30 | 	python setup.py sdist upload -r pypi-commonsearch


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # urlparse4
  2 | 
  3 | `urlparse4` is a performance-focused replacement for Python's `urlparse` module, using C++ code from Chromium's own URL parser.
  4 | 
  5 | It is not production-ready yet.
  6 | 
  7 | Many credits go to [gurl-cython](https://github.com/Preetwinder/gurl-cython) for inspiration.
  8 | 
  9 | ## Differences with Python's `urlparse`
 10 | 
 11 | `urlparse4` should be a transparent, drop-in replacement in almost all cases. Still, there are a few differences to be aware of:
 12 | 
 13 |  - `urlparse4` is 2-7x faster for most operations (see benchmarks below)
 14 |  - `urlparse4` currently doesn't pass CPython's `test_urlparse.py` suite due to edge cases that Chromium's parser manages differently (usually in accordance to the RFCs, which `urlparse` doesn't follow entirely).
 15 |  - `urlparse4` only supports Python 2.7 for now
 16 | 
 17 | ## How to install
 18 | 
 19 | ```
 20 | pip install urlparse4
 21 | ```
 22 | 
 23 | ## How to use
 24 | 
 25 | The most straightforward way to use `urlparse4` is to replace your imports of `urlparse` with this:
 26 | 
 27 | ```
 28 | import urlparse4 as urlparse
 29 | ```
 30 | 
 31 | You could also monkey-patch its methods in the global `urlparse` module like this:
 32 | 
 33 | ```
 34 | import urlparse4
 35 | import urlparse
 36 | 
 37 | for attr in dir(urlparse4):
 38 | 	setattr(urlparse, attr, getattr(urlparse4, attr))
 39 | 
 40 | ```
 41 | 
 42 | ## How to test
 43 | 
 44 | You must have Docker installed and running. You can run CPython's test suite for `urlparse` like this:
 45 | 
 46 | ```
 47 | make docker_build
 48 | make docker_test
 49 | ```
 50 | 
 51 | ## Benchmarks
 52 | 
 53 | We are testing the following librairies on a sample of 100k URLs from Blink and DMOZ:
 54 | 
 55 |  - urlparse4 ;-)
 56 |  - [CPython's urlparse](https://github.com/python/cpython/blob/2.7/Lib/urlparse.py)
 57 |  - [urlparse2](https://github.com/mwhooker/urlparse2)
 58 |  - [YURL](http://github.com/homm/yurl/)
 59 |  - [uritools](https://github.com/tkem/uritools)
 60 |  - [pygurl / gurl-cython](https://github.com/Preetwinder/gurl-cython)
 61 |  - [cyuri](https://github.com/mitghi/cyuri)
 62 | 
 63 | Each of them is being tested on a few different types of operations (basic urlsplit, relative link resolution, hostname extraction)
 64 | 
 65 | Here is how to launch the tests:
 66 | 
 67 | ```
 68 | make docker_build
 69 | make docker_benchmark
 70 | ```
 71 | 
 72 | Current results on a 2.2GHz Intel Core i7 MBP (in seconds):
 73 | 
 74 | ```
 75 | Benchmark results on 104300 URLs x 10 times, in seconds:
 76 | 
 77 | Name              Sum            Mean               Median             90%
 78 | ----------------  -------------  -----------------  -----------------  -----------------
 79 | 
 80 | urlsplit:
 81 | ----              ----           ----               ----               ----
 82 | urlparse4         1.681858       1.61251965484e-06  1.99999999984e-06  2.00000000006e-06
 83 | pygurl            2.031712       1.94795014382e-06  1.99999999984e-06  2.00000000028e-06
 84 | uritools          2.638991       2.53019271333e-06  2.00000000028e-06  3.00000000042e-06
 85 | yurl              3.910247       3.74903835091e-06  3.00000000131e-06  4.99999999981e-06
 86 | urlparse2         3.756782       3.60190028763e-06  2.99999999953e-06  4.00000000056e-06
 87 | urlparse          3.862006       3.70278619367e-06  3.00000000308e-06  4.99999999803e-06
 88 | cyuri             9.912275       9.50361936721e-06  8.00000000112e-06  1.30000000027e-05
 89 | 
 90 | urljoin_sibling:
 91 | ----              ----           ----               ----               ----
 92 | urlparse4         2.008453       1.92565004794e-06  2.00000000206e-06  2.00000000206e-06
 93 | pygurl            2.193427       2.10299808245e-06  2.00000000206e-06  2.99999999953e-06
 94 | uritools          10.575344      1.01393518696e-05  9.99999999607e-06  1.20000000052e-05
 95 | yurl              13.213052      1.26683144775e-05  1.19999999981e-05  1.60000000022e-05
 96 | urlparse2         14.239327      1.36522790029e-05  1.19999999981e-05  1.69999999997e-05
 97 | urlparse          9.25991500001  8.87815436242e-06  8.00000000822e-06  1.10000000006e-05
 98 | cyuri             5.742724       5.50596740172e-06  5.00000000159e-06  7.00000001075e-06
 99 | 
100 | hostname:
101 | ----              ----           ----               ----               ----
102 | urlparse4         1.883982       1.80631064237e-06  1.99999999495e-06  2.00000000916e-06
103 | pygurl            1.67332099999  1.60433461169e-06  1.99999999495e-06  2.00000000916e-06
104 | uritools          3.31632199999  3.17959923297e-06  3.00000000664e-06  4.00000000411e-06
105 | yurl              3.853319       3.69445733461e-06  3.00000000664e-06  4.00000000411e-06
106 | urlparse2         4.641513       4.45015627996e-06  4.00000000411e-06  5.99999999906e-06
107 | urlparse          5.122682       4.91148801534e-06  4.00000000411e-06  5.99999999906e-06
108 | cyuri             11.108649      1.06506701822e-05  9.0000000057e-06   1.5999999988e-05
109 | ```
110 | 
111 | Some libraries are included in the benchmark code but disabled for various reasons:
112 | 
113 |  - [urlparse3](https://pypi.python.org/pypi/urlparse3/) (Raises on valid URLs)
114 |  - [slimurl](https://github.com/mosquito/slimurl) (Too slow)
115 | 
116 | Feel free to submit pull requests to add new ones!
117 | 
118 | ## Feedback
119 | 
120 | We'd love to hear your feedback! Feel free to look at the issues on GitHub and open new ones if needed :)
121 | 


--------------------------------------------------------------------------------
/benchmarks/urls.py:
--------------------------------------------------------------------------------
  1 | from numpy import median, percentile, mean
  2 | from time import clock
  3 | import os
  4 | import gc
  5 | import tabulate
  6 | import sys
  7 | 
  8 | import urlparse
  9 | import urlparse2
 10 | from uritools import urisplit as uritools_urisplit
 11 | from uritools import urijoin as uritools_urijoin
 12 | 
 13 | from yurl import URL as yurl_url
 14 | import pygurl
 15 | 
 16 | # Disabled benchmarks
 17 | # import slimurl
 18 | # import urlparse3
 19 | import cyuri
 20 | 
 21 | sys.path.insert(-1, os.path.dirname(os.path.dirname(__file__)))
 22 | import urlparse4
 23 | 
 24 | gc.disable()
 25 | 
 26 | REPEATS = 10
 27 | 
 28 | URLS = []
 29 | for fp in os.listdir("tests/urls/"):
 30 |     with open("tests/urls/%s" % fp) as f:
 31 |         URLS += f.readlines()
 32 | 
 33 | data = []
 34 | 
 35 | 
 36 | def benchmark(name, func, debug=False):
 37 |     times = []
 38 |     for n in range(0, REPEATS):
 39 |         for i, url in enumerate(URLS):
 40 |             u = url.strip()
 41 |             if debug:
 42 |                 print u
 43 |             t = clock()
 44 |             func(u)
 45 |             times.append(clock() - t)
 46 | 
 47 |     row = [name, sum(times), mean(times), median(times), percentile(times, 90)]
 48 |     print row
 49 |     data.append(row)
 50 | 
 51 | 
 52 | def title(name):
 53 |     data.append(["", "", "", "", ""])
 54 |     data.append(["%s:" % name, "", "", "", ""])
 55 |     data.append(["----", "----", "----", "----", "----"])
 56 | 
 57 | # Segfault: https://github.com/mitghi/cyuri/issues/1
 58 | cyuri_parser = cyuri.uriparser()
 59 | 
 60 | title("urlsplit")
 61 | benchmark("urlparse4", lambda url: urlparse4.urlsplit(url))
 62 | benchmark("pygurl", lambda url: pygurl.ParseStandard(url))
 63 | benchmark("uritools", lambda url: uritools_urisplit(url))
 64 | benchmark("yurl", lambda url: yurl_url(url))
 65 | benchmark("urlparse2", lambda url: urlparse2.urlsplit(url))
 66 | benchmark("urlparse", lambda url: urlparse.urlsplit(url))
 67 | benchmark("cyuri", lambda url: cyuri_parser.components(url))
 68 | 
 69 | title("urljoin_sibling")
 70 | benchmark("urlparse4", lambda url: urlparse4.urljoin(url, "sibling.html?q=1#e=b"))
 71 | benchmark("pygurl", lambda url: pygurl.URL(url).Resolve("sibling.html?q=1#e=b"))
 72 | benchmark("uritools", lambda url: uritools_urijoin(url, "sibling.html?q=1#e=b"))
 73 | benchmark("yurl", lambda url: yurl_url(url) + yurl_url("sibling.html?q=1#e=b"))
 74 | benchmark("urlparse2", lambda url: urlparse2.urljoin(url, "sibling.html?q=1#e=b"))
 75 | benchmark("urlparse", lambda url: urlparse.urljoin(url, "sibling.html?q=1#e=b"))
 76 | benchmark("cyuri", lambda url: cyuri_parser.join(url, "sibling.html?q=1#e=b"))
 77 | 
 78 | # Not very representative because some libraries have functions to access the host directly without parsing the rest.
 79 | # Might still be useful for some people!
 80 | title("hostname")
 81 | benchmark("urlparse4", lambda url: urlparse4.urlsplit(url).hostname)
 82 | benchmark("pygurl", lambda url: pygurl.URL(url).host())
 83 | benchmark("uritools", lambda url: uritools_urisplit(url).host)
 84 | benchmark("yurl", lambda url: yurl_url(url).host)
 85 | benchmark("urlparse2", lambda url: urlparse2.urlsplit(url).hostname)
 86 | benchmark("urlparse", lambda url: urlparse.urlsplit(url).hostname)
 87 | benchmark("cyuri", lambda url: cyuri_parser.components(url)["host"])
 88 | 
 89 | # Very slow!
 90 | # benchmark("slimurl", lambda url: slimurl.URL(url))
 91 | 
 92 | # Breaks on simple URLs like http://1-14th.com/timeline-4-66T.htm
 93 | # benchmark("urlparse3_urlsplit", lambda url: urlparse3.parse_url(url))
 94 | 
 95 | 
 96 | print
 97 | print "Benchmark results on %s URLs x %s times, in seconds:" % (len(URLS), REPEATS)
 98 | print
 99 | print tabulate.tabulate(data, headers=["Name", "Sum", "Mean", "Median", "90%"])
100 | print
101 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
 1 | tabulate==0.7.5
 2 | Cython==0.24
 3 | pytest==2.9.2
 4 | 
 5 | # For benchmarks
 6 | uritools==1.0.2
 7 | YURL==0.13
 8 | urlparse2==1.1.1
 9 | urlparse3==1.0.9
10 | slimurl==0.7.2
11 | -e git+git://github.com/Preetwinder/gurl-cython.git@0b973257d9a3b8a38c209ed65e793953e21c6bf9#egg=gurl-cython
12 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | from distutils.core import setup, Extension
 2 | import os
 3 | 
 4 | VERSION = "0.1.3"
 5 | 
 6 | extension = Extension(
 7 |     name="urlparse4/cgurl",
 8 |     sources=["urlparse4/cgurl.pyx",
 9 |              "vendor/gurl/base/third_party/icu/icu_utf.cc",
10 |              "vendor/gurl/base/strings/string16.cc",
11 |              "vendor/gurl/base/strings/string_piece.cc",
12 |              "vendor/gurl/base/strings/string_util.cc",
13 |              "vendor/gurl/base/strings/utf_string_conversions.cc",
14 |              "vendor/gurl/base/strings/utf_string_conversion_utils.cc",
15 |              "vendor/gurl/url/gurl.cc",
16 |              "vendor/gurl/url/url_canon_etc.cc",
17 |              "vendor/gurl/url/url_canon_filesystemurl.cc",
18 |              "vendor/gurl/url/url_canon_fileurl.cc",
19 |              "vendor/gurl/url/url_canon_host.cc",
20 |              "vendor/gurl/url/url_canon_internal.cc",
21 |              "vendor/gurl/url/url_canon_ip.cc",
22 |              "vendor/gurl/url/url_canon_mailtourl.cc",
23 |              "vendor/gurl/url/url_canon_path.cc",
24 |              "vendor/gurl/url/url_canon_pathurl.cc",
25 |              "vendor/gurl/url/url_canon_query.cc",
26 |              "vendor/gurl/url/url_canon_relative.cc",
27 |              "vendor/gurl/url/url_canon_stdstring.cc",
28 |              "vendor/gurl/url/url_canon_stdurl.cc",
29 |              "vendor/gurl/url/url_constants.cc",
30 |              "vendor/gurl/url/url_parse_file.cc",
31 |              "vendor/gurl/url/url_util.cc",
32 |              "vendor/gurl/url/third_party/mozilla/url_parse.cc"
33 |              ],
34 |     language="c++",
35 |     extra_compile_args=["-std=gnu++0x", "-I./vendor/gurl/",
36 |                         "-fPIC", "-Ofast", "-pthread", "-w"],
37 |     extra_link_args=["-std=gnu++0x", "-w"],
38 | )
39 | 
40 | 
41 | if not os.path.isfile("urlparse4/cgurl.cpp"):
42 |     try:
43 |         from Cython.Build import cythonize
44 |         ext_modules = cythonize(extension, annotate=True)
45 |     except:
46 |         print "urlparse4/gurl.cpp not found and Cython failed to run to recreate it. Please install/upgrade Cython and try again."
47 |         raise
48 | else:
49 |     ext_modules = [extension]
50 |     ext_modules[0].sources[0] = "urlparse4/cgurl.cpp"
51 | 
52 | try:
53 |     import pypandoc
54 |     long_description = pypandoc.convert('README.md', 'rst')
55 | except ImportError:
56 |     long_description = open('README.md').read()
57 | 
58 | setup(
59 |     name="urlparse4",
60 |     packages=['urlparse4'],
61 |     version=VERSION,
62 |     description="Performance-focused replacement for Python's urlparse module",
63 |     author="Common Search contributors",
64 |     author_email="contact@commonsearch.org",
65 |     license="Apache License, Version 2.0",
66 |     url="https://github.com/commonsearch/urlparse4",
67 |     keywords=["urlparse", "urlsplit", "urljoin", "url", "parser", "urlparser", "parsing", "gurl", "cython", "faster", "speed", "performance"],
68 |     platforms='any',
69 |     classifiers=[
70 |         "Programming Language :: Python",
71 |         "Programming Language :: Python :: 2.7",
72 |         # 'Development Status :: 1 - Planning',
73 |         # 'Development Status :: 2 - Pre-Alpha',
74 |         'Development Status :: 3 - Alpha',
75 |         # 'Development Status :: 4 - Beta',
76 |         # 'Development Status :: 5 - Production/Stable',
77 |         # 'Development Status :: 6 - Mature',
78 |         # 'Development Status :: 7 - Inactive',
79 |         "Environment :: Other Environment",
80 |         "Intended Audience :: Developers",
81 |         "License :: OSI Approved :: Apache Software License",
82 |         "Operating System :: OS Independent",
83 |         "Topic :: Software Development :: Libraries"
84 |     ],
85 |     long_description=long_description,
86 |     ext_modules=ext_modules,
87 |     include_package_data=True
88 | )
89 | 


--------------------------------------------------------------------------------
/tests/test_urlparse4.py:
--------------------------------------------------------------------------------
 1 | # https://github.com/python/cpython/blob/40dac3272231773af0015fc35df5353783d77c4e/Lib/test/test_urlparse.py
 2 | import sys
 3 | import os
 4 | sys.path.insert(-1, os.path.dirname(os.path.dirname(__file__)))
 5 | 
 6 | from test import test_support
 7 | import unittest
 8 | import urlparse4 as urlparse
 9 | 
10 | 
11 | urlsplit_testcases = [
12 |     ["mailto:webtechs@oltn.odl.state.ok.us", ("mailto", "webtechs@oltn.odl.state.ok.us", "", "", "")],
13 |     ["mailto:mailto:webtechs@oltn.odl.state.ok.us", ("mailto", "mailto:webtechs@oltn.odl.state.ok.us", "", "", "")],
14 |     ["http://a@example.com:80", ("http", "a@example.com:80", "", "", "")],
15 | 
16 | ]
17 | 
18 | urljoin_testcases = [
19 |     [("", "http://example.com"), "http://example.com"]
20 | ]
21 | 
22 | 
23 | class UrlParse4TestCase(unittest.TestCase):
24 | 
25 |     def test_urlsplit(self):
26 |         for case in urlsplit_testcases:
27 |             self.assertEqual(urlparse.urlsplit(case[0]), case[1])
28 | 
29 |     def test_urljoin(self):
30 |         for case in urljoin_testcases:
31 |             self.assertEqual(urlparse.urljoin(*case[0]), case[1])
32 | 


--------------------------------------------------------------------------------
/urlparse4/__init__.py:
--------------------------------------------------------------------------------
1 | # https://github.com/python/cpython/blob/2.7/Lib/urlparse.py
2 | 
3 | from urlparse import *
4 | 
5 | _original_urlsplit = urlsplit
6 | _original_urljoin = urljoin
7 | 
8 | from cgurl import urlsplit, urljoin
9 | 


--------------------------------------------------------------------------------
/urlparse4/cgurl.pyx:
--------------------------------------------------------------------------------
  1 | from urlparse4.mozilla_url_parse cimport Component, Parsed, ParseStandardURL, ParseFileURL
  2 | from chromium_gurl cimport GURL
  3 | import urlparse as stdlib_urlparse
  4 | cimport cython
  5 | 
  6 | cdef bytes slice_component(bytes pyurl, Component comp):
  7 |     if comp.len <= 0:
  8 |         return b""
  9 | 
 10 |     return pyurl[comp.begin:comp.begin + comp.len]
 11 | 
 12 | 
 13 | cdef bytes cslice_component(char * url, Component comp):
 14 |     if comp.len <= 0:
 15 |         return b""
 16 | 
 17 |     # TODO: check if std::string brings any speedups
 18 |     return url[comp.begin:comp.begin + comp.len]
 19 | 
 20 | 
 21 | cdef bytes build_netloc(bytes url, Parsed parsed):
 22 | 
 23 |     if parsed.host.len <= 0:
 24 |         return b""
 25 | 
 26 |     # Nothing at all
 27 |     elif parsed.username.len <= 0 and parsed.password.len <= 0 and parsed.port.len <= 0:
 28 |         return url[parsed.host.begin: parsed.host.begin + parsed.host.len]
 29 | 
 30 |     # Only port
 31 |     elif parsed.username.len <= 0 and parsed.password.len <= 0 and parsed.port.len > 0:
 32 |         return url[parsed.host.begin: parsed.host.begin + parsed.host.len + 1 + parsed.port.len]
 33 | 
 34 |     # Only username
 35 |     elif parsed.username.len > 0 and parsed.password.len <= 0 and parsed.port.len <= 0:
 36 |         return url[parsed.username.begin: parsed.username.begin + parsed.host.len + 1 + parsed.username.len]
 37 | 
 38 |     # Username + password
 39 |     elif parsed.username.len > 0 and parsed.password.len > 0 and parsed.port.len <= 0:
 40 |         return url[parsed.username.begin: parsed.username.begin + parsed.host.len + 2 + parsed.username.len + parsed.password.len]
 41 | 
 42 |     # Username + port
 43 |     elif parsed.username.len > 0 and parsed.password.len <= 0 and parsed.port.len > 0:
 44 |         return url[parsed.username.begin: parsed.username.begin + parsed.host.len + 2 + parsed.username.len + parsed.port.len]
 45 | 
 46 |     # Username + port + password
 47 |     elif parsed.username.len > 0 and parsed.password.len > 0 and parsed.port.len > 0:
 48 |         return url[parsed.username.begin: parsed.username.begin + parsed.host.len + 3 + parsed.port.len  + parsed.username.len  + parsed.password.len]
 49 | 
 50 |     else:
 51 |         raise ValueError
 52 | 
 53 | 
 54 | # @cython.freelist(100)
 55 | # cdef class SplitResult:
 56 | 
 57 | #     cdef Parsed parsed
 58 | #     # cdef char * url
 59 | #     cdef bytes pyurl
 60 | 
 61 | #     def __cinit__(self, char* url):
 62 | #         # self.url = url
 63 | #         self.pyurl = url
 64 | #         if url[0:5] == b"file:":
 65 | #             ParseFileURL(url, len(url), &self.parsed)
 66 | #         else:
 67 | #             ParseStandardURL(url, len(url), &self.parsed)
 68 | 
 69 | #     property scheme:
 70 | #         def __get__(self):
 71 | #             return slice_component(self.pyurl, self.parsed.scheme)
 72 | 
 73 | #     property path:
 74 | #         def __get__(self):
 75 | #             return slice_component(self.pyurl, self.parsed.path)
 76 | 
 77 | #     property query:
 78 | #         def __get__(self):
 79 | #             return slice_component(self.pyurl, self.parsed.query)
 80 | 
 81 | #     property fragment:
 82 | #         def __get__(self):
 83 | #             return slice_component(self.pyurl, self.parsed.ref)
 84 | 
 85 | #     property username:
 86 | #         def __get__(self):
 87 | #             return slice_component(self.pyurl, self.parsed.username)
 88 | 
 89 | #     property password:
 90 | #         def __get__(self):
 91 | #             return slice_component(self.pyurl, self.parsed.password)
 92 | 
 93 | #     property port:
 94 | #         def __get__(self):
 95 | #             return slice_component(self.pyurl, self.parsed.port)
 96 | 
 97 | #     # Not in regular urlsplit() !
 98 | #     property host:
 99 | #         def __get__(self):
100 | #             return slice_component(self.pyurl, self.parsed.host)
101 | 
102 | #     property netloc:
103 | #         def __get__(self):
104 | #             return build_netloc(self.pyurl, self.parsed)
105 | 
106 | 
107 | class SplitResultNamedTuple(tuple):
108 | 
109 |     __slots__ = ()  # prevent creation of instance dictionary
110 | 
111 |     def __new__(cls, bytes url):
112 | 
113 |         cdef Parsed parsed
114 | 
115 |         if url[0:5] == b"file:":
116 |             ParseFileURL(url, len(url), &parsed)
117 |         else:
118 |             ParseStandardURL(url, len(url), &parsed)
119 | 
120 |         def _get_attr(self, prop):
121 |             if prop == "scheme":
122 |                 return self[0]
123 |             elif prop == "netloc":
124 |                 return self[1]
125 |             elif prop == "path":
126 |                 return self[2]
127 |             elif prop == "query":
128 |                 return self[3]
129 |             elif prop == "fragment":
130 |                 return self[4]
131 |             elif prop == "port":
132 |                 if parsed.port.len > 0:
133 |                     port = int(slice_component(url, parsed.port))
134 |                     if port <= 65535:
135 |                         return port
136 | 
137 |             elif prop == "username":
138 |                 return slice_component(url, parsed.username) or None
139 |             elif prop == "password":
140 |                 return slice_component(url, parsed.password) or None
141 |             elif prop == "hostname":
142 |                 return slice_component(url, parsed.host).lower()
143 | 
144 | 
145 |         cls.__getattr__ = _get_attr
146 | 
147 |         return tuple.__new__(cls, (
148 |             slice_component(url, parsed.scheme).lower(),
149 |             build_netloc(url, parsed),
150 |             slice_component(url, parsed.path),
151 |             slice_component(url, parsed.query),
152 |             slice_component(url, parsed.ref)
153 |         ))
154 | 
155 |     def geturl(self):
156 |         return stdlib_urlparse.urlunsplit(self)
157 | 
158 | 
159 | def urlsplit(url):
160 |     return SplitResultNamedTuple.__new__(SplitResultNamedTuple, url)
161 | 
162 | def urljoin(bytes base, bytes url, allow_fragments=True):
163 |     if allow_fragments and base:
164 |         return GURL(base).Resolve(url).spec()
165 |     else:
166 |         return stdlib_urlparse.urljoin(base, url, allow_fragments=allow_fragments)
167 | 


--------------------------------------------------------------------------------
/urlparse4/chromium_gurl.pxd:
--------------------------------------------------------------------------------
 1 | from libcpp.string cimport string
 2 | from libcpp cimport bool
 3 | from mozilla_url_parse cimport Component, Parsed
 4 | 
 5 | 
 6 | cdef extern from "../vendor/gurl/url/gurl.h":
 7 |     cdef cppclass GURL:
 8 |         GURL()
 9 |         GURL(const string & url_string)
10 |         GURL(const char * canonical_spec,
11 |              size_t canonical_spec_len,
12 |              const Parsed parsed,
13 |              bool is_valid)
14 | 
15 |         bool is_valid()
16 |         bool is_empty()
17 |         bool IsStandard()
18 |         string spec()
19 |         GURL Resolve(const string & relative)
20 |         string possibly_invalid_spec()
21 | 
22 |         bool has_scheme()
23 |         bool has_username()
24 |         bool has_password()
25 |         bool has_host()
26 |         bool has_port()
27 |         bool has_path()
28 |         bool has_query()
29 |         bool has_ref()
30 | 
31 |         string scheme()
32 |         string username()
33 |         string password()
34 |         string host()
35 |         string port()
36 |         string path()
37 |         string query()
38 |         string ref()
39 | 
40 |         Parsed parsed_for_possibly_invalid_spec()
41 |         # GURL ReplaceComponents(const Replacements[char] replacements)
42 | 


--------------------------------------------------------------------------------
/urlparse4/mozilla_url_parse.pxd:
--------------------------------------------------------------------------------
 1 | cdef extern from "../vendor/gurl/url/third_party/mozilla/url_parse.h" namespace "url":
 2 |     cdef struct Component:
 3 |         int begin
 4 |         int len
 5 | 
 6 |     cdef struct Parsed:
 7 |         int Length()
 8 |         Component scheme
 9 |         Component username
10 |         Component password
11 |         Component host
12 |         Component port
13 |         Component path
14 |         Component query
15 |         Component ref
16 | 
17 |     cdef void ParseStandardURL(const char* url, int url_len, Parsed* parsed)
18 |     cdef void ParseFileURL(const char* url, int url_len, Parsed* parsed)
19 | 


--------------------------------------------------------------------------------
/vendor/gurl/base/base_export.h:
--------------------------------------------------------------------------------
 1 | // Copyright (c) 2012 The Chromium Authors. All rights reserved.
 2 | // Use of this source code is governed by a BSD-style license that can be
 3 | // found in the LICENSE file.
 4 | 
 5 | #ifndef BASE_BASE_EXPORT_H_
 6 | #define BASE_BASE_EXPORT_H_
 7 | 
 8 | #if defined(COMPONENT_BUILD)
 9 | #if defined(WIN32)
10 | 
11 | #if defined(BASE_IMPLEMENTATION)
12 | #define BASE_EXPORT __declspec(dllexport)
13 | #define BASE_EXPORT_PRIVATE __declspec(dllexport)
14 | #else
15 | #define BASE_EXPORT __declspec(dllimport)
16 | #define BASE_EXPORT_PRIVATE __declspec(dllimport)
17 | #endif  // defined(BASE_IMPLEMENTATION)
18 | 
19 | #else  // defined(WIN32)
20 | #if defined(BASE_IMPLEMENTATION)
21 | #define BASE_EXPORT __attribute__((visibility("default")))
22 | #define BASE_EXPORT_PRIVATE __attribute__((visibility("default")))
23 | #else
24 | #define BASE_EXPORT
25 | #define BASE_EXPORT_PRIVATE
26 | #endif  // defined(BASE_IMPLEMENTATION)
27 | #endif
28 | 
29 | #else  // defined(COMPONENT_BUILD)
30 | #define BASE_EXPORT
31 | #define BASE_EXPORT_PRIVATE
32 | #endif
33 | 
34 | #endif  // BASE_BASE_EXPORT_H_
35 | 


--------------------------------------------------------------------------------
/vendor/gurl/base/basictypes.h:
--------------------------------------------------------------------------------
 1 | // Copyright 2013 The Chromium Authors. All rights reserved.
 2 | // Use of this source code is governed by a BSD-style license that can be
 3 | // found in the LICENSE file.
 4 | 
 5 | // This file contains definitions of our old basic integral types
 6 | // ((u)int{8,16,32,64}) and further includes. I recommend that you use the C99
 7 | // standard types instead, and include <stdint.h>/<stddef.h>/etc. as needed.
 8 | // Note that the macros and macro-like constructs that were formerly defined in
 9 | // this file are now available separately in base/macros.h.
10 | 
11 | #ifndef BASE_BASICTYPES_H_
12 | #define BASE_BASICTYPES_H_
13 | 
14 | #include <limits.h>  // So we can set the bounds of our types.
15 | #include <stddef.h>  // For size_t.
16 | #include <stdint.h>  // For intptr_t.
17 | 
18 | #include "base/macros.h"
19 | #include "build/build_config.h"
20 | 
21 | // DEPRECATED: Please use (u)int{8,16,32,64}_t instead (and include <stdint.h>).
22 | typedef int8_t int8;
23 | typedef uint8_t uint8;
24 | typedef int16_t int16;
25 | typedef uint16_t uint16;
26 | typedef int32_t int32;
27 | typedef uint32_t uint32;
28 | typedef int64_t int64;
29 | typedef uint64_t uint64;
30 | 
31 | // DEPRECATED: Please use std::numeric_limits (from <limits>) or
32 | // (U)INT{8,16,32,64}_{MIN,MAX} in case of globals (and include <stdint.h>).
33 | const uint8  kuint8max  =  0xFF;
34 | const uint16 kuint16max =  0xFFFF;
35 | const uint32 kuint32max =  0xFFFFFFFF;
36 | const uint64 kuint64max =  0xFFFFFFFFFFFFFFFFULL;
37 | const  int8  kint8min   = -0x7F - 1;
38 | const  int8  kint8max   =  0x7F;
39 | const  int16 kint16min  = -0x7FFF - 1;
40 | const  int16 kint16max  =  0x7FFF;
41 | const  int32 kint32min  = -0x7FFFFFFF - 1;
42 | const  int32 kint32max  =  0x7FFFFFFF;
43 | const  int64 kint64min  = -0x7FFFFFFFFFFFFFFFLL - 1;
44 | const  int64 kint64max  =  0x7FFFFFFFFFFFFFFFLL;
45 | 
46 | #endif  // BASE_BASICTYPES_H_
47 | 


--------------------------------------------------------------------------------
/vendor/gurl/base/compiler_specific.h:
--------------------------------------------------------------------------------
  1 | // Copyright (c) 2012 The Chromium Authors. All rights reserved.
  2 | // Use of this source code is governed by a BSD-style license that can be
  3 | // found in the LICENSE file.
  4 | 
  5 | #ifndef BASE_COMPILER_SPECIFIC_H_
  6 | #define BASE_COMPILER_SPECIFIC_H_
  7 | 
  8 | #include "build/build_config.h"
  9 | 
 10 | #if defined(COMPILER_MSVC)
 11 | 
 12 | // Macros for suppressing and disabling warnings on MSVC.
 13 | //
 14 | // Warning numbers are enumerated at:
 15 | // http://msdn.microsoft.com/en-us/library/8x5x43k7(VS.80).aspx
 16 | //
 17 | // The warning pragma:
 18 | // http://msdn.microsoft.com/en-us/library/2c8f766e(VS.80).aspx
 19 | //
 20 | // Using __pragma instead of #pragma inside macros:
 21 | // http://msdn.microsoft.com/en-us/library/d9x1s805.aspx
 22 | 
 23 | // MSVC_SUPPRESS_WARNING disables warning |n| for the remainder of the line and
 24 | // for the next line of the source file.
 25 | #define MSVC_SUPPRESS_WARNING(n) __pragma(warning(suppress:n))
 26 | 
 27 | // MSVC_PUSH_DISABLE_WARNING pushes |n| onto a stack of warnings to be disabled.
 28 | // The warning remains disabled until popped by MSVC_POP_WARNING.
 29 | #define MSVC_PUSH_DISABLE_WARNING(n) __pragma(warning(push)) \
 30 |                                      __pragma(warning(disable:n))
 31 | 
 32 | // MSVC_PUSH_WARNING_LEVEL pushes |n| as the global warning level.  The level
 33 | // remains in effect until popped by MSVC_POP_WARNING().  Use 0 to disable all
 34 | // warnings.
 35 | #define MSVC_PUSH_WARNING_LEVEL(n) __pragma(warning(push, n))
 36 | 
 37 | // Pop effects of innermost MSVC_PUSH_* macro.
 38 | #define MSVC_POP_WARNING() __pragma(warning(pop))
 39 | 
 40 | #define MSVC_DISABLE_OPTIMIZE() __pragma(optimize("", off))
 41 | #define MSVC_ENABLE_OPTIMIZE() __pragma(optimize("", on))
 42 | 
 43 | // Allows exporting a class that inherits from a non-exported base class.
 44 | // This uses suppress instead of push/pop because the delimiter after the
 45 | // declaration (either "," or "{") has to be placed before the pop macro.
 46 | //
 47 | // Example usage:
 48 | // class EXPORT_API Foo : NON_EXPORTED_BASE(public Bar) {
 49 | //
 50 | // MSVC Compiler warning C4275:
 51 | // non dll-interface class 'Bar' used as base for dll-interface class 'Foo'.
 52 | // Note that this is intended to be used only when no access to the base class'
 53 | // static data is done through derived classes or inline methods. For more info,
 54 | // see http://msdn.microsoft.com/en-us/library/3tdb471s(VS.80).aspx
 55 | #define NON_EXPORTED_BASE(code) MSVC_SUPPRESS_WARNING(4275) \
 56 |                                 code
 57 | 
 58 | #else  // Not MSVC
 59 | 
 60 | #define MSVC_SUPPRESS_WARNING(n)
 61 | #define MSVC_PUSH_DISABLE_WARNING(n)
 62 | #define MSVC_PUSH_WARNING_LEVEL(n)
 63 | #define MSVC_POP_WARNING()
 64 | #define MSVC_DISABLE_OPTIMIZE()
 65 | #define MSVC_ENABLE_OPTIMIZE()
 66 | #define NON_EXPORTED_BASE(code) code
 67 | 
 68 | #endif  // COMPILER_MSVC
 69 | 
 70 | 
 71 | // Annotate a variable indicating it's ok if the variable is not used.
 72 | // (Typically used to silence a compiler warning when the assignment
 73 | // is important for some other reason.)
 74 | // Use like:
 75 | //   int x = ...;
 76 | //   ALLOW_UNUSED_LOCAL(x);
 77 | #define ALLOW_UNUSED_LOCAL(x) false ? (void)x : (void)0
 78 | 
 79 | // Annotate a typedef or function indicating it's ok if it's not used.
 80 | // Use like:
 81 | //   typedef Foo Bar ALLOW_UNUSED_TYPE;
 82 | #if defined(COMPILER_GCC) || defined(__clang__)
 83 | #define ALLOW_UNUSED_TYPE __attribute__((unused))
 84 | #else
 85 | #define ALLOW_UNUSED_TYPE
 86 | #endif
 87 | 
 88 | // Annotate a function indicating it should not be inlined.
 89 | // Use like:
 90 | //   NOINLINE void DoStuff() { ... }
 91 | #if defined(COMPILER_GCC)
 92 | #define NOINLINE __attribute__((noinline))
 93 | #elif defined(COMPILER_MSVC)
 94 | #define NOINLINE __declspec(noinline)
 95 | #else
 96 | #define NOINLINE
 97 | #endif
 98 | 
 99 | // Specify memory alignment for structs, classes, etc.
100 | // Use like:
101 | //   class ALIGNAS(16) MyClass { ... }
102 | //   ALIGNAS(16) int array[4];
103 | #if defined(COMPILER_MSVC)
104 | #define ALIGNAS(byte_alignment) __declspec(align(byte_alignment))
105 | #elif defined(COMPILER_GCC)
106 | #define ALIGNAS(byte_alignment) __attribute__((aligned(byte_alignment)))
107 | #endif
108 | 
109 | // Return the byte alignment of the given type (available at compile time).
110 | // Use like:
111 | //   ALIGNOF(int32)  // this would be 4
112 | #if defined(COMPILER_MSVC)
113 | #define ALIGNOF(type) __alignof(type)
114 | #elif defined(COMPILER_GCC)
115 | #define ALIGNOF(type) __alignof__(type)
116 | #endif
117 | 
118 | // Annotate a function indicating the caller must examine the return value.
119 | // Use like:
120 | //   int foo() WARN_UNUSED_RESULT;
121 | // To explicitly ignore a result, see |ignore_result()| in base/macros.h.
122 | // TODO(dcheng): Update //third_party/webrtc's macro definition to match.
123 | #undef WARN_UNUSED_RESULT
124 | #if defined(COMPILER_GCC) || defined(__clang__)
125 | #define WARN_UNUSED_RESULT __attribute__((warn_unused_result))
126 | #else
127 | #define WARN_UNUSED_RESULT
128 | #endif
129 | 
130 | // Tell the compiler a function is using a printf-style format string.
131 | // |format_param| is the one-based index of the format string parameter;
132 | // |dots_param| is the one-based index of the "..." parameter.
133 | // For v*printf functions (which take a va_list), pass 0 for dots_param.
134 | // (This is undocumented but matches what the system C headers do.)
135 | #if defined(COMPILER_GCC)
136 | #define PRINTF_FORMAT(format_param, dots_param) \
137 |     __attribute__((format(printf, format_param, dots_param)))
138 | #else
139 | #define PRINTF_FORMAT(format_param, dots_param)
140 | #endif
141 | 
142 | // WPRINTF_FORMAT is the same, but for wide format strings.
143 | // This doesn't appear to yet be implemented in any compiler.
144 | // See http://gcc.gnu.org/bugzilla/show_bug.cgi?id=38308 .
145 | #define WPRINTF_FORMAT(format_param, dots_param)
146 | // If available, it would look like:
147 | //   __attribute__((format(wprintf, format_param, dots_param)))
148 | 
149 | // MemorySanitizer annotations.
150 | #if defined(MEMORY_SANITIZER) && !defined(OS_NACL)
151 | #include <sanitizer/msan_interface.h>
152 | 
153 | // Mark a memory region fully initialized.
154 | // Use this to annotate code that deliberately reads uninitialized data, for
155 | // example a GC scavenging root set pointers from the stack.
156 | #define MSAN_UNPOISON(p, size)  __msan_unpoison(p, size)
157 | 
158 | // Check a memory region for initializedness, as if it was being used here.
159 | // If any bits are uninitialized, crash with an MSan report.
160 | // Use this to sanitize data which MSan won't be able to track, e.g. before
161 | // passing data to another process via shared memory.
162 | #define MSAN_CHECK_MEM_IS_INITIALIZED(p, size) \
163 |     __msan_check_mem_is_initialized(p, size)
164 | #else  // MEMORY_SANITIZER
165 | #define MSAN_UNPOISON(p, size)
166 | #define MSAN_CHECK_MEM_IS_INITIALIZED(p, size)
167 | #endif  // MEMORY_SANITIZER
168 | 
169 | // Macro useful for writing cross-platform function pointers.
170 | #if !defined(CDECL)
171 | #if defined(OS_WIN)
172 | #define CDECL __cdecl
173 | #else  // defined(OS_WIN)
174 | #define CDECL
175 | #endif  // defined(OS_WIN)
176 | #endif  // !defined(CDECL)
177 | 
178 | // Macro for hinting that an expression is likely to be false.
179 | #if !defined(UNLIKELY)
180 | #if defined(COMPILER_GCC)
181 | #define UNLIKELY(x) __builtin_expect(!!(x), 0)
182 | #else
183 | #define UNLIKELY(x) (x)
184 | #endif  // defined(COMPILER_GCC)
185 | #endif  // !defined(UNLIKELY)
186 | 
187 | #endif  // BASE_COMPILER_SPECIFIC_H_
188 | 


--------------------------------------------------------------------------------
/vendor/gurl/base/macros.h:
--------------------------------------------------------------------------------
  1 | // Copyright 2014 The Chromium Authors. All rights reserved.
  2 | // Use of this source code is governed by a BSD-style license that can be
  3 | // found in the LICENSE file.
  4 | 
  5 | // This file contains macros and macro-like constructs (e.g., templates) that
  6 | // are commonly used throughout Chromium source. (It may also contain things
  7 | // that are closely related to things that are commonly used that belong in this
  8 | // file.)
  9 | 
 10 | #ifndef BASE_MACROS_H_
 11 | #define BASE_MACROS_H_
 12 | 
 13 | #include <stddef.h>  // For size_t.
 14 | #include <string.h>  // For memcpy.
 15 | 
 16 | // Put this in the declarations for a class to be uncopyable.
 17 | #define DISALLOW_COPY(TypeName) \
 18 |   TypeName(const TypeName&) = delete
 19 | 
 20 | // Put this in the declarations for a class to be unassignable.
 21 | #define DISALLOW_ASSIGN(TypeName) \
 22 |   void operator=(const TypeName&) = delete
 23 | 
 24 | // A macro to disallow the copy constructor and operator= functions
 25 | // This should be used in the private: declarations for a class
 26 | #define DISALLOW_COPY_AND_ASSIGN(TypeName) \
 27 |   TypeName(const TypeName&);               \
 28 |   void operator=(const TypeName&)
 29 | 
 30 | // An older, deprecated, politically incorrect name for the above.
 31 | // NOTE: The usage of this macro was banned from our code base, but some
 32 | // third_party libraries are yet using it.
 33 | // TODO(tfarina): Figure out how to fix the usage of this macro in the
 34 | // third_party libraries and get rid of it.
 35 | #define DISALLOW_EVIL_CONSTRUCTORS(TypeName) DISALLOW_COPY_AND_ASSIGN(TypeName)
 36 | 
 37 | // A macro to disallow all the implicit constructors, namely the
 38 | // default constructor, copy constructor and operator= functions.
 39 | //
 40 | // This should be used in the private: declarations for a class
 41 | // that wants to prevent anyone from instantiating it. This is
 42 | // especially useful for classes containing only static methods.
 43 | #define DISALLOW_IMPLICIT_CONSTRUCTORS(TypeName) \
 44 |   TypeName() = delete;                           \
 45 |   DISALLOW_COPY_AND_ASSIGN(TypeName)
 46 | 
 47 | // The arraysize(arr) macro returns the # of elements in an array arr.
 48 | // The expression is a compile-time constant, and therefore can be
 49 | // used in defining new arrays, for example.  If you use arraysize on
 50 | // a pointer by mistake, you will get a compile-time error.
 51 | 
 52 | // This template function declaration is used in defining arraysize.
 53 | // Note that the function doesn't need an implementation, as we only
 54 | // use its type.
 55 | template <typename T, size_t N> char (&ArraySizeHelper(T (&array)[N]))[N];
 56 | #define arraysize(array) (sizeof(ArraySizeHelper(array)))
 57 | 
 58 | // The COMPILE_ASSERT macro can be used to verify that a compile time
 59 | // expression is true. For example, you could use it to verify the
 60 | // size of a static array:
 61 | //
 62 | //   COMPILE_ASSERT(arraysize(content_type_names) == CONTENT_NUM_TYPES,
 63 | //                  content_type_names_incorrect_size);
 64 | //
 65 | // or to make sure a struct is smaller than a certain size:
 66 | //
 67 | //   COMPILE_ASSERT(sizeof(foo) < 128, foo_too_large);
 68 | //
 69 | // The second argument to the macro is the name of the variable. If
 70 | // the expression is false, most compilers will issue a warning/error
 71 | // containing the name of the variable.
 72 | 
 73 | #undef COMPILE_ASSERT
 74 | #define COMPILE_ASSERT(expr, msg) static_assert(expr, #msg)
 75 | 
 76 | // bit_cast<Dest,Source> is a template function that implements the
 77 | // equivalent of "*reinterpret_cast<Dest*>(&source)".  We need this in
 78 | // very low-level functions like the protobuf library and fast math
 79 | // support.
 80 | //
 81 | //   float f = 3.14159265358979;
 82 | //   int i = bit_cast<int32>(f);
 83 | //   // i = 0x40490fdb
 84 | //
 85 | // The classical address-casting method is:
 86 | //
 87 | //   // WRONG
 88 | //   float f = 3.14159265358979;            // WRONG
 89 | //   int i = * reinterpret_cast<int*>(&f);  // WRONG
 90 | //
 91 | // The address-casting method actually produces undefined behavior
 92 | // according to ISO C++ specification section 3.10 -15 -.  Roughly, this
 93 | // section says: if an object in memory has one type, and a program
 94 | // accesses it with a different type, then the result is undefined
 95 | // behavior for most values of "different type".
 96 | //
 97 | // This is true for any cast syntax, either *(int*)&f or
 98 | // *reinterpret_cast<int*>(&f).  And it is particularly true for
 99 | // conversions between integral lvalues and floating-point lvalues.
100 | //
101 | // The purpose of 3.10 -15- is to allow optimizing compilers to assume
102 | // that expressions with different types refer to different memory.  gcc
103 | // 4.0.1 has an optimizer that takes advantage of this.  So a
104 | // non-conforming program quietly produces wildly incorrect output.
105 | //
106 | // The problem is not the use of reinterpret_cast.  The problem is type
107 | // punning: holding an object in memory of one type and reading its bits
108 | // back using a different type.
109 | //
110 | // The C++ standard is more subtle and complex than this, but that
111 | // is the basic idea.
112 | //
113 | // Anyways ...
114 | //
115 | // bit_cast<> calls memcpy() which is blessed by the standard,
116 | // especially by the example in section 3.9 .  Also, of course,
117 | // bit_cast<> wraps up the nasty logic in one place.
118 | //
119 | // Fortunately memcpy() is very fast.  In optimized mode, with a
120 | // constant size, gcc 2.95.3, gcc 4.0.1, and msvc 7.1 produce inline
121 | // code with the minimal amount of data movement.  On a 32-bit system,
122 | // memcpy(d,s,4) compiles to one load and one store, and memcpy(d,s,8)
123 | // compiles to two loads and two stores.
124 | //
125 | // I tested this code with gcc 2.95.3, gcc 4.0.1, icc 8.1, and msvc 7.1.
126 | //
127 | // WARNING: if Dest or Source is a non-POD type, the result of the memcpy
128 | // is likely to surprise you.
129 | 
130 | template <class Dest, class Source>
131 | inline Dest bit_cast(const Source& source) {
132 |   COMPILE_ASSERT(sizeof(Dest) == sizeof(Source), VerifySizesAreEqual);
133 | 
134 |   Dest dest;
135 |   memcpy(&dest, &source, sizeof(dest));
136 |   return dest;
137 | }
138 | 
139 | // Used to explicitly mark the return value of a function as unused. If you are
140 | // really sure you don't want to do anything with the return value of a function
141 | // that has been marked WARN_UNUSED_RESULT, wrap it with this. Example:
142 | //
143 | //   scoped_ptr<MyType> my_var = ...;
144 | //   if (TakeOwnership(my_var.get()) == SUCCESS)
145 | //     ignore_result(my_var.release());
146 | //
147 | template<typename T>
148 | inline void ignore_result(const T&) {
149 | }
150 | 
151 | // The following enum should be used only as a constructor argument to indicate
152 | // that the variable has static storage class, and that the constructor should
153 | // do nothing to its state.  It indicates to the reader that it is legal to
154 | // declare a static instance of the class, provided the constructor is given
155 | // the base::LINKER_INITIALIZED argument.  Normally, it is unsafe to declare a
156 | // static variable that has a constructor or a destructor because invocation
157 | // order is undefined.  However, IF the type can be initialized by filling with
158 | // zeroes (which the loader does for static variables), AND the destructor also
159 | // does nothing to the storage, AND there are no virtual methods, then a
160 | // constructor declared as
161 | //       explicit MyClass(base::LinkerInitialized x) {}
162 | // and invoked as
163 | //       static MyClass my_variable_name(base::LINKER_INITIALIZED);
164 | namespace base {
165 | enum LinkerInitialized { LINKER_INITIALIZED };
166 | 
167 | // Use these to declare and define a static local variable (static T;) so that
168 | // it is leaked so that its destructors are not called at exit. If you need
169 | // thread-safe initialization, use base/lazy_instance.h instead.
170 | #define CR_DEFINE_STATIC_LOCAL(type, name, arguments) \
171 |   static type& name = *new type arguments
172 | 
173 | }  // base
174 | 
175 | #endif  // BASE_MACROS_H_
176 | 


--------------------------------------------------------------------------------
/vendor/gurl/base/move.h:
--------------------------------------------------------------------------------
  1 | // Copyright (c) 2012 The Chromium Authors. All rights reserved.
  2 | // Use of this source code is governed by a BSD-style license that can be
  3 | // found in the LICENSE file.
  4 | 
  5 | #ifndef BASE_MOVE_H_
  6 | #define BASE_MOVE_H_
  7 | 
  8 | #include "base/compiler_specific.h"
  9 | 
 10 | // Macro with the boilerplate that makes a type move-only in C++03.
 11 | //
 12 | // USAGE
 13 | //
 14 | // This macro should be used instead of DISALLOW_COPY_AND_ASSIGN to create
 15 | // a "move-only" type.  Unlike DISALLOW_COPY_AND_ASSIGN, this macro should be
 16 | // the first line in a class declaration.
 17 | //
 18 | // A class using this macro must call .Pass() (or somehow be an r-value already)
 19 | // before it can be:
 20 | //
 21 | //   * Passed as a function argument
 22 | //   * Used as the right-hand side of an assignment
 23 | //   * Returned from a function
 24 | //
 25 | // Each class will still need to define their own "move constructor" and "move
 26 | // operator=" to make this useful.  Here's an example of the macro, the move
 27 | // constructor, and the move operator= from the scoped_ptr class:
 28 | //
 29 | //  template <typename T>
 30 | //  class scoped_ptr {
 31 | //     MOVE_ONLY_TYPE_FOR_CPP_03(scoped_ptr, RValue)
 32 | //   public:
 33 | //    scoped_ptr(RValue& other) : ptr_(other.release()) { }
 34 | //    scoped_ptr& operator=(RValue& other) {
 35 | //      swap(other);
 36 | //      return *this;
 37 | //    }
 38 | //  };
 39 | //
 40 | // Note that the constructor must NOT be marked explicit.
 41 | //
 42 | // For consistency, the second parameter to the macro should always be RValue
 43 | // unless you have a strong reason to do otherwise.  It is only exposed as a
 44 | // macro parameter so that the move constructor and move operator= don't look
 45 | // like they're using a phantom type.
 46 | //
 47 | //
 48 | // HOW THIS WORKS
 49 | //
 50 | // For a thorough explanation of this technique, see:
 51 | //
 52 | //   http://en.wikibooks.org/wiki/More_C%2B%2B_Idioms/Move_Constructor
 53 | //
 54 | // The summary is that we take advantage of 2 properties:
 55 | //
 56 | //   1) non-const references will not bind to r-values.
 57 | //   2) C++ can apply one user-defined conversion when initializing a
 58 | //      variable.
 59 | //
 60 | // The first lets us disable the copy constructor and assignment operator
 61 | // by declaring private version of them with a non-const reference parameter.
 62 | //
 63 | // For l-values, direct initialization still fails like in
 64 | // DISALLOW_COPY_AND_ASSIGN because the copy constructor and assignment
 65 | // operators are private.
 66 | //
 67 | // For r-values, the situation is different. The copy constructor and
 68 | // assignment operator are not viable due to (1), so we are trying to call
 69 | // a non-existent constructor and non-existing operator= rather than a private
 70 | // one.  Since we have not committed an error quite yet, we can provide an
 71 | // alternate conversion sequence and a constructor.  We add
 72 | //
 73 | //   * a private struct named "RValue"
 74 | //   * a user-defined conversion "operator RValue()"
 75 | //   * a "move constructor" and "move operator=" that take the RValue& as
 76 | //     their sole parameter.
 77 | //
 78 | // Only r-values will trigger this sequence and execute our "move constructor"
 79 | // or "move operator=."  L-values will match the private copy constructor and
 80 | // operator= first giving a "private in this context" error.  This combination
 81 | // gives us a move-only type.
 82 | //
 83 | // For signaling a destructive transfer of data from an l-value, we provide a
 84 | // method named Pass() which creates an r-value for the current instance
 85 | // triggering the move constructor or move operator=.
 86 | //
 87 | // Other ways to get r-values is to use the result of an expression like a
 88 | // function call.
 89 | //
 90 | // Here's an example with comments explaining what gets triggered where:
 91 | //
 92 | //    class Foo {
 93 | //      MOVE_ONLY_TYPE_FOR_CPP_03(Foo, RValue);
 94 | //
 95 | //     public:
 96 | //       ... API ...
 97 | //       Foo(RValue other);           // Move constructor.
 98 | //       Foo& operator=(RValue rhs);  // Move operator=
 99 | //    };
100 | //
101 | //    Foo MakeFoo();  // Function that returns a Foo.
102 | //
103 | //    Foo f;
104 | //    Foo f_copy(f);  // ERROR: Foo(Foo&) is private in this context.
105 | //    Foo f_assign;
106 | //    f_assign = f;   // ERROR: operator=(Foo&) is private in this context.
107 | //
108 | //
109 | //    Foo f(MakeFoo());      // R-value so alternate conversion executed.
110 | //    Foo f_copy(f.Pass());  // R-value so alternate conversion executed.
111 | //    f = f_copy.Pass();     // R-value so alternate conversion executed.
112 | //
113 | //
114 | // IMPLEMENTATION SUBTLETIES WITH RValue
115 | //
116 | // The RValue struct is just a container for a pointer back to the original
117 | // object. It should only ever be created as a temporary, and no external
118 | // class should ever declare it or use it in a parameter.
119 | //
120 | // It is tempting to want to use the RValue type in function parameters, but
121 | // excluding the limited usage here for the move constructor and move
122 | // operator=, doing so would mean that the function could take both r-values
123 | // and l-values equially which is unexpected.  See COMPARED To Boost.Move for
124 | // more details.
125 | //
126 | // An alternate, and incorrect, implementation of the RValue class used by
127 | // Boost.Move makes RValue a fieldless child of the move-only type. RValue&
128 | // is then used in place of RValue in the various operators.  The RValue& is
129 | // "created" by doing *reinterpret_cast<RValue*>(this).  This has the appeal
130 | // of never creating a temporary RValue struct even with optimizations
131 | // disabled.  Also, by virtue of inheritance you can treat the RValue
132 | // reference as if it were the move-only type itself.  Unfortunately,
133 | // using the result of this reinterpret_cast<> is actually undefined behavior
134 | // due to C++98 5.2.10.7. In certain compilers (e.g., NaCl) the optimizer
135 | // will generate non-working code.
136 | //
137 | // In optimized builds, both implementations generate the same assembly so we
138 | // choose the one that adheres to the standard.
139 | //
140 | //
141 | // WHY HAVE typedef void MoveOnlyTypeForCPP03
142 | //
143 | // Callback<>/Bind() needs to understand movable-but-not-copyable semantics
144 | // to call .Pass() appropriately when it is expected to transfer the value.
145 | // The cryptic typedef MoveOnlyTypeForCPP03 is added to make this check
146 | // easy and automatic in helper templates for Callback<>/Bind().
147 | // See IsMoveOnlyType template and its usage in base/callback_internal.h
148 | // for more details.
149 | //
150 | //
151 | // COMPARED TO C++11
152 | //
153 | // In C++11, you would implement this functionality using an r-value reference
154 | // and our .Pass() method would be replaced with a call to std::move().
155 | //
156 | // This emulation also has a deficiency where it uses up the single
157 | // user-defined conversion allowed by C++ during initialization.  This can
158 | // cause problems in some API edge cases.  For instance, in scoped_ptr, it is
159 | // impossible to make a function "void Foo(scoped_ptr<Parent> p)" accept a
160 | // value of type scoped_ptr<Child> even if you add a constructor to
161 | // scoped_ptr<> that would make it look like it should work.  C++11 does not
162 | // have this deficiency.
163 | //
164 | //
165 | // COMPARED TO Boost.Move
166 | //
167 | // Our implementation similar to Boost.Move, but we keep the RValue struct
168 | // private to the move-only type, and we don't use the reinterpret_cast<> hack.
169 | //
170 | // In Boost.Move, RValue is the boost::rv<> template.  This type can be used
171 | // when writing APIs like:
172 | //
173 | //   void MyFunc(boost::rv<Foo>& f)
174 | //
175 | // that can take advantage of rv<> to avoid extra copies of a type.  However you
176 | // would still be able to call this version of MyFunc with an l-value:
177 | //
178 | //   Foo f;
179 | //   MyFunc(f);  // Uh oh, we probably just destroyed |f| w/o calling Pass().
180 | //
181 | // unless someone is very careful to also declare a parallel override like:
182 | //
183 | //   void MyFunc(const Foo& f)
184 | //
185 | // that would catch the l-values first.  This was declared unsafe in C++11 and
186 | // a C++11 compiler will explicitly fail MyFunc(f).  Unfortunately, we cannot
187 | // ensure this in C++03.
188 | //
189 | // Since we have no need for writing such APIs yet, our implementation keeps
190 | // RValue private and uses a .Pass() method to do the conversion instead of
191 | // trying to write a version of "std::move()." Writing an API like std::move()
192 | // would require the RValue struct to be public.
193 | //
194 | //
195 | // CAVEATS
196 | //
197 | // If you include a move-only type as a field inside a class that does not
198 | // explicitly declare a copy constructor, the containing class's implicit
199 | // copy constructor will change from Containing(const Containing&) to
200 | // Containing(Containing&).  This can cause some unexpected errors.
201 | //
202 | //   http://llvm.org/bugs/show_bug.cgi?id=11528
203 | //
204 | // The workaround is to explicitly declare your copy constructor.
205 | //
206 | #define MOVE_ONLY_TYPE_FOR_CPP_03(type, rvalue_type) \
207 |  private: \
208 |   struct rvalue_type { \
209 |     explicit rvalue_type(type* object) : object(object) {} \
210 |     type* object; \
211 |   }; \
212 |   type(type&); \
213 |   void operator=(type&); \
214 |  public: \
215 |   operator rvalue_type() { return rvalue_type(this); } \
216 |   type Pass() WARN_UNUSED_RESULT { return type(rvalue_type(this)); } \
217 |   typedef void MoveOnlyTypeForCPP03; \
218 |  private:
219 | 
220 | #define MOVE_ONLY_TYPE_WITH_MOVE_CONSTRUCTOR_FOR_CPP_03(type) \
221 |  private: \
222 |   type(const type&); \
223 |   void operator=(const type&); \
224 |  public: \
225 |   type&& Pass() WARN_UNUSED_RESULT { return static_cast<type&&>(*this); } \
226 |   typedef void MoveOnlyTypeForCPP03; \
227 |  private:
228 | 
229 | #define TYPE_WITH_MOVE_CONSTRUCTOR_FOR_CPP_03(type) \
230 |  public: \
231 |   type&& Pass() WARN_UNUSED_RESULT { return static_cast<type&&>(*this); } \
232 |  private:
233 | 
234 | #endif  // BASE_MOVE_H_
235 | 


--------------------------------------------------------------------------------
/vendor/gurl/base/strings/cscope.out:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/commonsearch/urlparse4/fda910309aa189d57473dbb12e2d2acde49c1736/vendor/gurl/base/strings/cscope.out


--------------------------------------------------------------------------------
/vendor/gurl/base/strings/string16.cc:
--------------------------------------------------------------------------------
 1 | // Copyright 2013 The Chromium Authors. All rights reserved.
 2 | // Use of this source code is governed by a BSD-style license that can be
 3 | // found in the LICENSE file.
 4 | 
 5 | #include "base/strings/string16.h"
 6 | 
 7 | #if defined(WCHAR_T_IS_UTF16)
 8 | 
 9 | #error This file should not be used on 2-byte wchar_t systems
10 | // If this winds up being needed on 2-byte wchar_t systems, either the
11 | // definitions below can be used, or the host system's wide character
12 | // functions like wmemcmp can be wrapped.
13 | 
14 | #elif defined(WCHAR_T_IS_UTF32)
15 | 
16 | #include <ostream>
17 | 
18 | //#include "base/strings/utf_string_conversions.h"
19 | 
20 | namespace base {
21 | 
22 | int c16memcmp(const char16* s1, const char16* s2, size_t n) {
23 |   // We cannot call memcmp because that changes the semantics.
24 |   while (n-- > 0) {
25 |     if (*s1 != *s2) {
26 |       // We cannot use (*s1 - *s2) because char16 is unsigned.
27 |       return ((*s1 < *s2) ? -1 : 1);
28 |     }
29 |     ++s1;
30 |     ++s2;
31 |   }
32 |   return 0;
33 | }
34 | 
35 | size_t c16len(const char16* s) {
36 |   const char16 *s_orig = s;
37 |   while (*s) {
38 |     ++s;
39 |   }
40 |   return s - s_orig;
41 | }
42 | 
43 | const char16* c16memchr(const char16* s, char16 c, size_t n) {
44 |   while (n-- > 0) {
45 |     if (*s == c) {
46 |       return s;
47 |     }
48 |     ++s;
49 |   }
50 |   return 0;
51 | }
52 | 
53 | char16* c16memmove(char16* s1, const char16* s2, size_t n) {
54 |   return static_cast<char16*>(memmove(s1, s2, n * sizeof(char16)));
55 | }
56 | 
57 | char16* c16memcpy(char16* s1, const char16* s2, size_t n) {
58 |   return static_cast<char16*>(memcpy(s1, s2, n * sizeof(char16)));
59 | }
60 | 
61 | char16* c16memset(char16* s, char16 c, size_t n) {
62 |   char16 *s_orig = s;
63 |   while (n-- > 0) {
64 |     *s = c;
65 |     ++s;
66 |   }
67 |   return s_orig;
68 | }
69 | /*
70 | std::ostream& operator<<(std::ostream& out, const string16& str) {
71 |   return out << UTF16ToUTF8(str);
72 | }*/
73 | /*
74 | void PrintTo(const string16& str, std::ostream* out) {
75 |   *out << str;
76 | }*/
77 | 
78 | }  // namespace base
79 | 
80 | template class std::basic_string<base::char16, base::string16_char_traits>;
81 | 
82 | #endif  // WCHAR_T_IS_UTF32
83 | 


--------------------------------------------------------------------------------
/vendor/gurl/base/strings/string16.h:
--------------------------------------------------------------------------------
  1 | // Copyright 2013 The Chromium Authors. All rights reserved.
  2 | // Use of this source code is governed by a BSD-style license that can be
  3 | // found in the LICENSE file.
  4 | 
  5 | #ifndef BASE_STRINGS_STRING16_H_
  6 | #define BASE_STRINGS_STRING16_H_
  7 | 
  8 | // WHAT:
  9 | // A version of std::basic_string that provides 2-byte characters even when
 10 | // wchar_t is not implemented as a 2-byte type. You can access this class as
 11 | // string16. We also define char16, which string16 is based upon.
 12 | //
 13 | // WHY:
 14 | // On Windows, wchar_t is 2 bytes, and it can conveniently handle UTF-16/UCS-2
 15 | // data. Plenty of existing code operates on strings encoded as UTF-16.
 16 | //
 17 | // On many other platforms, sizeof(wchar_t) is 4 bytes by default. We can make
 18 | // it 2 bytes by using the GCC flag -fshort-wchar. But then std::wstring fails
 19 | // at run time, because it calls some functions (like wcslen) that come from
 20 | // the system's native C library -- which was built with a 4-byte wchar_t!
 21 | // It's wasteful to use 4-byte wchar_t strings to carry UTF-16 data, and it's
 22 | // entirely improper on those systems where the encoding of wchar_t is defined
 23 | // as UTF-32.
 24 | //
 25 | // Here, we define string16, which is similar to std::wstring but replaces all
 26 | // libc functions with custom, 2-byte-char compatible routines. It is capable
 27 | // of carrying UTF-16-encoded data.
 28 | 
 29 | #include <stdio.h>
 30 | #include <string>
 31 | 
 32 | #include "base/base_export.h"
 33 | #include "base/basictypes.h"
 34 | 
 35 | #if defined(WCHAR_T_IS_UTF16)
 36 | 
 37 | namespace base {
 38 | 
 39 | typedef wchar_t char16;
 40 | typedef std::wstring string16;
 41 | typedef std::char_traits<wchar_t> string16_char_traits;
 42 | 
 43 | }  // namespace base
 44 | 
 45 | #elif defined(WCHAR_T_IS_UTF32)
 46 | 
 47 | namespace base {
 48 | 
 49 | typedef uint16 char16;
 50 | 
 51 | // char16 versions of the functions required by string16_char_traits; these
 52 | // are based on the wide character functions of similar names ("w" or "wcs"
 53 | // instead of "c16").
 54 | BASE_EXPORT int c16memcmp(const char16* s1, const char16* s2, size_t n);
 55 | BASE_EXPORT size_t c16len(const char16* s);
 56 | BASE_EXPORT const char16* c16memchr(const char16* s, char16 c, size_t n);
 57 | BASE_EXPORT char16* c16memmove(char16* s1, const char16* s2, size_t n);
 58 | BASE_EXPORT char16* c16memcpy(char16* s1, const char16* s2, size_t n);
 59 | BASE_EXPORT char16* c16memset(char16* s, char16 c, size_t n);
 60 | 
 61 | struct string16_char_traits {
 62 |   typedef char16 char_type;
 63 |   typedef int int_type;
 64 | 
 65 |   // int_type needs to be able to hold each possible value of char_type, and in
 66 |   // addition, the distinct value of eof().
 67 |   COMPILE_ASSERT(sizeof(int_type) > sizeof(char_type), unexpected_type_width);
 68 | 
 69 |   typedef std::streamoff off_type;
 70 |   typedef mbstate_t state_type;
 71 |   typedef std::fpos<state_type> pos_type;
 72 | 
 73 |   static void assign(char_type& c1, const char_type& c2) {
 74 |     c1 = c2;
 75 |   }
 76 | 
 77 |   static bool eq(const char_type& c1, const char_type& c2) {
 78 |     return c1 == c2;
 79 |   }
 80 |   static bool lt(const char_type& c1, const char_type& c2) {
 81 |     return c1 < c2;
 82 |   }
 83 | 
 84 |   static int compare(const char_type* s1, const char_type* s2, size_t n) {
 85 |     return c16memcmp(s1, s2, n);
 86 |   }
 87 | 
 88 |   static size_t length(const char_type* s) {
 89 |     return c16len(s);
 90 |   }
 91 | 
 92 |   static const char_type* find(const char_type* s, size_t n,
 93 |                                const char_type& a) {
 94 |     return c16memchr(s, a, n);
 95 |   }
 96 | 
 97 |   static char_type* move(char_type* s1, const char_type* s2, size_t n) {
 98 |     return c16memmove(s1, s2, n);
 99 |   }
100 | 
101 |   static char_type* copy(char_type* s1, const char_type* s2, size_t n) {
102 |     return c16memcpy(s1, s2, n);
103 |   }
104 | 
105 |   static char_type* assign(char_type* s, size_t n, char_type a) {
106 |     return c16memset(s, a, n);
107 |   }
108 | 
109 |   static int_type not_eof(const int_type& c) {
110 |     return eq_int_type(c, eof()) ? 0 : c;
111 |   }
112 | 
113 |   static char_type to_char_type(const int_type& c) {
114 |     return char_type(c);
115 |   }
116 | 
117 |   static int_type to_int_type(const char_type& c) {
118 |     return int_type(c);
119 |   }
120 | 
121 |   static bool eq_int_type(const int_type& c1, const int_type& c2) {
122 |     return c1 == c2;
123 |   }
124 | 
125 |   static int_type eof() {
126 |     return static_cast<int_type>(EOF);
127 |   }
128 | };
129 | 
130 | typedef std::basic_string<char16, base::string16_char_traits> string16;
131 | 
132 | BASE_EXPORT extern std::ostream& operator<<(std::ostream& out,
133 |                                             const string16& str);
134 | 
135 | // This is required by googletest to print a readable output on test failures.
136 | BASE_EXPORT extern void PrintTo(const string16& str, std::ostream* out);
137 | 
138 | }  // namespace base
139 | 
140 | // The string class will be explicitly instantiated only once, in string16.cc.
141 | //
142 | // std::basic_string<> in GNU libstdc++ contains a static data member,
143 | // _S_empty_rep_storage, to represent empty strings.  When an operation such
144 | // as assignment or destruction is performed on a string, causing its existing
145 | // data member to be invalidated, it must not be freed if this static data
146 | // member is being used.  Otherwise, it counts as an attempt to free static
147 | // (and not allocated) data, which is a memory error.
148 | //
149 | // Generally, due to C++ template magic, _S_empty_rep_storage will be marked
150 | // as a coalesced symbol, meaning that the linker will combine multiple
151 | // instances into a single one when generating output.
152 | //
153 | // If a string class is used by multiple shared libraries, a problem occurs.
154 | // Each library will get its own copy of _S_empty_rep_storage.  When strings
155 | // are passed across a library boundary for alteration or destruction, memory
156 | // errors will result.  GNU libstdc++ contains a configuration option,
157 | // --enable-fully-dynamic-string (_GLIBCXX_FULLY_DYNAMIC_STRING), which
158 | // disables the static data member optimization, but it's a good optimization
159 | // and non-STL code is generally at the mercy of the system's STL
160 | // configuration.  Fully-dynamic strings are not the default for GNU libstdc++
161 | // libstdc++ itself or for the libstdc++ installations on the systems we care
162 | // about, such as Mac OS X and relevant flavors of Linux.
163 | //
164 | // See also http://gcc.gnu.org/bugzilla/show_bug.cgi?id=24196 .
165 | //
166 | // To avoid problems, string classes need to be explicitly instantiated only
167 | // once, in exactly one library.  All other string users see it via an "extern"
168 | // declaration.  This is precisely how GNU libstdc++ handles
169 | // std::basic_string<char> (string) and std::basic_string<wchar_t> (wstring).
170 | //
171 | // This also works around a Mac OS X linker bug in ld64-85.2.1 (Xcode 3.1.2),
172 | // in which the linker does not fully coalesce symbols when dead code
173 | // stripping is enabled.  This bug causes the memory errors described above
174 | // to occur even when a std::basic_string<> does not cross shared library
175 | // boundaries, such as in statically-linked executables.
176 | //
177 | // TODO(mark): File this bug with Apple and update this note with a bug number.
178 | 
179 | extern template
180 | class BASE_EXPORT std::basic_string<base::char16, base::string16_char_traits>;
181 | 
182 | #endif  // WCHAR_T_IS_UTF32
183 | 
184 | #endif  // BASE_STRINGS_STRING16_H_
185 | 


--------------------------------------------------------------------------------
/vendor/gurl/base/strings/string_util.cc:
--------------------------------------------------------------------------------
  1 | // Copyright 2013 The Chromium Authors. All rights reserved.
  2 | // Use of this source code is governed by a BSD-style license that can be
  3 | // found in the LICENSE file.
  4 | 
  5 | #include "base/strings/string_util.h"
  6 | 
  7 | #include <ctype.h>
  8 | #include <errno.h>
  9 | #include <math.h>
 10 | #include <stdarg.h>
 11 | #include <stdio.h>
 12 | #include <stdlib.h>
 13 | #include <string.h>
 14 | #include <time.h>
 15 | #include <wchar.h>
 16 | #include <wctype.h>
 17 | 
 18 | #include <algorithm>
 19 | #include <vector>
 20 | 
 21 | #include "base/basictypes.h"
 22 | #include "build/build_config.h"
 23 | 
 24 | namespace base {
 25 | 
 26 | namespace {
 27 | 
 28 | template<typename Str>
 29 | static inline bool DoLowerCaseEqualsASCII(BasicStringPiece<Str> str,
 30 |                                           StringPiece lowercase_ascii) {
 31 |   if (str.size() != lowercase_ascii.size())
 32 |     return false;
 33 |   for (size_t i = 0; i < str.size(); i++) {
 34 |     if (ToLowerASCII(str[i]) != lowercase_ascii[i])
 35 |       return false;
 36 |   }
 37 |   return true;
 38 | }
 39 | 
 40 | } // nampspace
 41 | 
 42 | // Assuming that a pointer is the size of a "machine word", then
 43 | // uintptr_t is an integer type that is also a machine word.
 44 | typedef uintptr_t MachineWord;
 45 | const uintptr_t kMachineWordAlignmentMask = sizeof(MachineWord) - 1;
 46 | 
 47 | inline bool IsAlignedToMachineWord(const void* pointer) {
 48 |   return !(reinterpret_cast<MachineWord>(pointer) & kMachineWordAlignmentMask);
 49 | }
 50 | 
 51 | template<typename T> inline T* AlignToMachineWord(T* pointer) {
 52 |   return reinterpret_cast<T*>(reinterpret_cast<MachineWord>(pointer) &
 53 |                               ~kMachineWordAlignmentMask);
 54 | }
 55 | 
 56 | template<size_t size, typename CharacterType> struct NonASCIIMask;
 57 | template<> struct NonASCIIMask<4, char16> {
 58 |     static inline uint32_t value() { return 0xFF80FF80U; }
 59 | };
 60 | template<> struct NonASCIIMask<4, char> {
 61 |     static inline uint32_t value() { return 0x80808080U; }
 62 | };
 63 | template<> struct NonASCIIMask<8, char16> {
 64 |     static inline uint64_t value() { return 0xFF80FF80FF80FF80ULL; }
 65 | };
 66 | template<> struct NonASCIIMask<8, char> {
 67 |     static inline uint64_t value() { return 0x8080808080808080ULL; }
 68 | };
 69 | #if defined(WCHAR_T_IS_UTF32)
 70 | template<> struct NonASCIIMask<4, wchar_t> {
 71 |     static inline uint32_t value() { return 0xFFFFFF80U; }
 72 | };
 73 | template<> struct NonASCIIMask<8, wchar_t> {
 74 |     static inline uint64_t value() { return 0xFFFFFF80FFFFFF80ULL; }
 75 | };
 76 | #endif  // WCHAR_T_IS_UTF32
 77 | 
 78 | template <class Char>
 79 | inline bool DoIsStringASCII(const Char* characters, size_t length) {
 80 |   MachineWord all_char_bits = 0;
 81 |   const Char* end = characters + length;
 82 | 
 83 |   // Prologue: align the input.
 84 |   while (!IsAlignedToMachineWord(characters) && characters != end) {
 85 |     all_char_bits |= *characters;
 86 |     ++characters;
 87 |   }
 88 | 
 89 |   // Compare the values of CPU word size.
 90 |   const Char* word_end = AlignToMachineWord(end);
 91 |   const size_t loop_increment = sizeof(MachineWord) / sizeof(Char);
 92 |   while (characters < word_end) {
 93 |     all_char_bits |= *(reinterpret_cast<const MachineWord*>(characters));
 94 |     characters += loop_increment;
 95 |   }
 96 | 
 97 |   // Process the remaining bytes.
 98 |   while (characters != end) {
 99 |     all_char_bits |= *characters;
100 |     ++characters;
101 |   }
102 | 
103 |   MachineWord non_ascii_bit_mask =
104 |       NonASCIIMask<sizeof(MachineWord), Char>::value();
105 |   return !(all_char_bits & non_ascii_bit_mask);
106 | }
107 | 
108 | 
109 | bool IsStringASCII(const StringPiece& str) {
110 |   return DoIsStringASCII(str.data(), str.length());
111 | }
112 | 
113 | bool IsStringASCII(const StringPiece16& str) {
114 |   return DoIsStringASCII(str.data(), str.length());
115 | }
116 | 
117 | bool IsStringASCII(const string16& str) {
118 |   return DoIsStringASCII(str.data(), str.length());
119 | }
120 | 
121 | #if defined(WCHAR_T_IS_UTF32)
122 | bool IsStringASCII(const std::wstring& str) {
123 |   return DoIsStringASCII(str.data(), str.length());
124 | }
125 | #endif
126 | 
127 | bool LowerCaseEqualsASCII(StringPiece str, StringPiece lowercase_ascii) {
128 |   return DoLowerCaseEqualsASCII<std::string>(str, lowercase_ascii);
129 | }
130 | 
131 | bool LowerCaseEqualsASCII(StringPiece16 str, StringPiece lowercase_ascii) {
132 |   return DoLowerCaseEqualsASCII<string16>(str, lowercase_ascii);
133 | }
134 | 
135 | }  // namespace base
136 | 


--------------------------------------------------------------------------------
/vendor/gurl/base/strings/string_util.h:
--------------------------------------------------------------------------------
 1 | // Copyright 2013 The Chromium Authors. All rights reserved.
 2 | // Use of this source code is governed by a BSD-style license that can be
 3 | // found in the LICENSE file.
 4 | //
 5 | // This file defines utility functions for working with strings.
 6 | 
 7 | #ifndef BASE_STRINGS_STRING_UTIL_H_
 8 | #define BASE_STRINGS_STRING_UTIL_H_
 9 | 
10 | #include <ctype.h>
11 | #include <stdarg.h>   // va_list
12 | 
13 | #include <string>
14 | #include <vector>
15 | 
16 | #include "base/base_export.h"
17 | #include "base/basictypes.h"
18 | //#include "base/compiler_specific.h"
19 | #include "base/strings/string16.h"
20 | #include "base/strings/string_piece.h"  // For implicit conversions.
21 | 
22 | namespace base {
23 | 
24 | // ASCII-specific tolower.  The standard library's tolower is locale sensitive,
25 | // so we don't want to use it here.
26 | inline char ToLowerASCII(char c) {
27 |   return (c >= 'A' && c <= 'Z') ? (c + ('a' - 'A')) : c;
28 | }
29 | inline char16 ToLowerASCII(char16 c) {
30 |   return (c >= 'A' && c <= 'Z') ? (c + ('a' - 'A')) : c;
31 | }
32 | 
33 | // Compare the lower-case form of the given string against the given
34 | // previously-lower-cased ASCII string (typically a constant).
35 | BASE_EXPORT bool LowerCaseEqualsASCII(StringPiece str,
36 |                                       StringPiece lowecase_ascii);
37 | BASE_EXPORT bool LowerCaseEqualsASCII(StringPiece16 str,
38 |                                       StringPiece lowecase_ascii);
39 | 
40 | // Returns true if the specified string matches the criteria. How can a wide
41 | // string be 8-bit or UTF8? It contains only characters that are < 256 (in the
42 | // first case) or characters that use only 8-bits and whose 8-bit
43 | // representation looks like a UTF-8 string (the second case).
44 | //
45 | // Note that IsStringUTF8 checks not only if the input is structurally
46 | // valid but also if it doesn't contain any non-character codepoint
47 | // (e.g. U+FFFE). It's done on purpose because all the existing callers want
48 | // to have the maximum 'discriminating' power from other encodings. If
49 | // there's a use case for just checking the structural validity, we have to
50 | // add a new function for that.
51 | //
52 | // IsStringASCII assumes the input is likely all ASCII, and does not leave early
53 | // if it is not the case.
54 | BASE_EXPORT bool IsStringUTF8(const StringPiece& str);
55 | BASE_EXPORT bool IsStringASCII(const StringPiece& str);
56 | BASE_EXPORT bool IsStringASCII(const StringPiece16& str);
57 | // A convenience adaptor for WebStrings, as they don't convert into
58 | // StringPieces directly.
59 | BASE_EXPORT bool IsStringASCII(const string16& str);
60 | #if defined(WCHAR_T_IS_UTF32)
61 | BASE_EXPORT bool IsStringASCII(const std::wstring& str);
62 | #endif
63 | 
64 | }  // namespace base
65 | 
66 | #endif  // BASE_STRINGS_STRING_UTIL_H_
67 | 


--------------------------------------------------------------------------------
/vendor/gurl/base/strings/utf_string_conversion_utils.cc:
--------------------------------------------------------------------------------
  1 | // Copyright (c) 2009 The Chromium Authors. All rights reserved.
  2 | // Use of this source code is governed by a BSD-style license that can be
  3 | // found in the LICENSE file.
  4 | 
  5 | #include "base/strings/utf_string_conversion_utils.h"
  6 | 
  7 | #include "base/third_party/icu/icu_utf.h"
  8 | 
  9 | namespace base {
 10 | 
 11 | // ReadUnicodeCharacter --------------------------------------------------------
 12 | 
 13 | bool ReadUnicodeCharacter(const char* src,
 14 |                           int32 src_len,
 15 |                           int32* char_index,
 16 |                           uint32* code_point_out) {
 17 |   // U8_NEXT expects to be able to use -1 to signal an error, so we must
 18 |   // use a signed type for code_point.  But this function returns false
 19 |   // on error anyway, so code_point_out is unsigned.
 20 |   int32 code_point;
 21 |   CBU8_NEXT(src, *char_index, src_len, code_point);
 22 |   *code_point_out = static_cast<uint32>(code_point);
 23 | 
 24 |   // The ICU macro above moves to the next char, we want to point to the last
 25 |   // char consumed.
 26 |   (*char_index)--;
 27 | 
 28 |   // Validate the decoded value.
 29 |   return IsValidCodepoint(code_point);
 30 | }
 31 | 
 32 | bool ReadUnicodeCharacter(const char16* src,
 33 |                           int32 src_len,
 34 |                           int32* char_index,
 35 |                           uint32* code_point) {
 36 |   if (CBU16_IS_SURROGATE(src[*char_index])) {
 37 |     if (!CBU16_IS_SURROGATE_LEAD(src[*char_index]) ||
 38 |         *char_index + 1 >= src_len ||
 39 |         !CBU16_IS_TRAIL(src[*char_index + 1])) {
 40 |       // Invalid surrogate pair.
 41 |       return false;
 42 |     }
 43 | 
 44 |     // Valid surrogate pair.
 45 |     *code_point = CBU16_GET_SUPPLEMENTARY(src[*char_index],
 46 |                                           src[*char_index + 1]);
 47 |     (*char_index)++;
 48 |   } else {
 49 |     // Not a surrogate, just one 16-bit word.
 50 |     *code_point = src[*char_index];
 51 |   }
 52 | 
 53 |   return IsValidCodepoint(*code_point);
 54 | }
 55 | 
 56 | #if defined(WCHAR_T_IS_UTF32)
 57 | bool ReadUnicodeCharacter(const wchar_t* src,
 58 |                           int32 src_len,
 59 |                           int32* char_index,
 60 |                           uint32* code_point) {
 61 |   // Conversion is easy since the source is 32-bit.
 62 |   *code_point = src[*char_index];
 63 | 
 64 |   // Validate the value.
 65 |   return IsValidCodepoint(*code_point);
 66 | }
 67 | #endif  // defined(WCHAR_T_IS_UTF32)
 68 | 
 69 | // WriteUnicodeCharacter -------------------------------------------------------
 70 | 
 71 | size_t WriteUnicodeCharacter(uint32 code_point, std::string* output) {
 72 |   if (code_point <= 0x7f) {
 73 |     // Fast path the common case of one byte.
 74 |     output->push_back(static_cast<char>(code_point));
 75 |     return 1;
 76 |   }
 77 | 
 78 | 
 79 |   // CBU8_APPEND_UNSAFE can append up to 4 bytes.
 80 |   size_t char_offset = output->length();
 81 |   size_t original_char_offset = char_offset;
 82 |   output->resize(char_offset + CBU8_MAX_LENGTH);
 83 | 
 84 |   CBU8_APPEND_UNSAFE(&(*output)[0], char_offset, code_point);
 85 | 
 86 |   // CBU8_APPEND_UNSAFE will advance our pointer past the inserted character, so
 87 |   // it will represent the new length of the string.
 88 |   output->resize(char_offset);
 89 |   return char_offset - original_char_offset;
 90 | }
 91 | 
 92 | size_t WriteUnicodeCharacter(uint32 code_point, string16* output) {
 93 |   if (CBU16_LENGTH(code_point) == 1) {
 94 |     // Thie code point is in the Basic Multilingual Plane (BMP).
 95 |     output->push_back(static_cast<char16>(code_point));
 96 |     return 1;
 97 |   }
 98 |   // Non-BMP characters use a double-character encoding.
 99 |   size_t char_offset = output->length();
100 |   output->resize(char_offset + CBU16_MAX_LENGTH);
101 |   CBU16_APPEND_UNSAFE(&(*output)[0], char_offset, code_point);
102 |   return CBU16_MAX_LENGTH;
103 | }
104 | 
105 | // Generalized Unicode converter -----------------------------------------------
106 | 
107 | template<typename CHAR>
108 | void PrepareForUTF8Output(const CHAR* src,
109 |                           size_t src_len,
110 |                           std::string* output) {
111 |   output->clear();
112 |   if (src_len == 0)
113 |     return;
114 |   if (src[0] < 0x80) {
115 |     // Assume that the entire input will be ASCII.
116 |     output->reserve(src_len);
117 |   } else {
118 |     // Assume that the entire input is non-ASCII and will have 3 bytes per char.
119 |     output->reserve(src_len * 3);
120 |   }
121 | }
122 | 
123 | // Instantiate versions we know callers will need.
124 | template void PrepareForUTF8Output(const wchar_t*, size_t, std::string*);
125 | template void PrepareForUTF8Output(const char16*, size_t, std::string*);
126 | 
127 | template<typename STRING>
128 | void PrepareForUTF16Or32Output(const char* src,
129 |                                size_t src_len,
130 |                                STRING* output) {
131 |   output->clear();
132 |   if (src_len == 0)
133 |     return;
134 |   if (static_cast<unsigned char>(src[0]) < 0x80) {
135 |     // Assume the input is all ASCII, which means 1:1 correspondence.
136 |     output->reserve(src_len);
137 |   } else {
138 |     // Otherwise assume that the UTF-8 sequences will have 2 bytes for each
139 |     // character.
140 |     output->reserve(src_len / 2);
141 |   }
142 | }
143 | 
144 | // Instantiate versions we know callers will need.
145 | template void PrepareForUTF16Or32Output(const char*, size_t, std::wstring*);
146 | template void PrepareForUTF16Or32Output(const char*, size_t, string16*);
147 | 
148 | }  // namespace base
149 | 


--------------------------------------------------------------------------------
/vendor/gurl/base/strings/utf_string_conversion_utils.h:
--------------------------------------------------------------------------------
 1 | // Copyright (c) 2011 The Chromium Authors. All rights reserved.
 2 | // Use of this source code is governed by a BSD-style license that can be
 3 | // found in the LICENSE file.
 4 | 
 5 | #ifndef BASE_STRINGS_UTF_STRING_CONVERSION_UTILS_H_
 6 | #define BASE_STRINGS_UTF_STRING_CONVERSION_UTILS_H_
 7 | 
 8 | // This should only be used by the various UTF string conversion files.
 9 | 
10 | #include "base/base_export.h"
11 | #include "base/strings/string16.h"
12 | 
13 | namespace base {
14 | 
15 | inline bool IsValidCodepoint(uint32 code_point) {
16 |   // Excludes the surrogate code points ([0xD800, 0xDFFF]) and
17 |   // codepoints larger than 0x10FFFF (the highest codepoint allowed).
18 |   // Non-characters and unassigned codepoints are allowed.
19 |   return code_point < 0xD800u ||
20 |          (code_point >= 0xE000u && code_point <= 0x10FFFFu);
21 | }
22 | 
23 | inline bool IsValidCharacter(uint32 code_point) {
24 |   // Excludes non-characters (U+FDD0..U+FDEF, and all codepoints ending in
25 |   // 0xFFFE or 0xFFFF) from the set of valid code points.
26 |   return code_point < 0xD800u || (code_point >= 0xE000u &&
27 |       code_point < 0xFDD0u) || (code_point > 0xFDEFu &&
28 |       code_point <= 0x10FFFFu && (code_point & 0xFFFEu) != 0xFFFEu);
29 | }
30 | 
31 | // ReadUnicodeCharacter --------------------------------------------------------
32 | 
33 | // Reads a UTF-8 stream, placing the next code point into the given output
34 | // |*code_point|. |src| represents the entire string to read, and |*char_index|
35 | // is the character offset within the string to start reading at. |*char_index|
36 | // will be updated to index the last character read, such that incrementing it
37 | // (as in a for loop) will take the reader to the next character.
38 | //
39 | // Returns true on success. On false, |*code_point| will be invalid.
40 | BASE_EXPORT bool ReadUnicodeCharacter(const char* src,
41 |                                       int32 src_len,
42 |                                       int32* char_index,
43 |                                       uint32* code_point_out);
44 | 
45 | // Reads a UTF-16 character. The usage is the same as the 8-bit version above.
46 | BASE_EXPORT bool ReadUnicodeCharacter(const char16* src,
47 |                                       int32 src_len,
48 |                                       int32* char_index,
49 |                                       uint32* code_point);
50 | 
51 | #if defined(WCHAR_T_IS_UTF32)
52 | // Reads UTF-32 character. The usage is the same as the 8-bit version above.
53 | BASE_EXPORT bool ReadUnicodeCharacter(const wchar_t* src,
54 |                                       int32 src_len,
55 |                                       int32* char_index,
56 |                                       uint32* code_point);
57 | #endif  // defined(WCHAR_T_IS_UTF32)
58 | 
59 | // WriteUnicodeCharacter -------------------------------------------------------
60 | 
61 | // Appends a UTF-8 character to the given 8-bit string.  Returns the number of
62 | // bytes written.
63 | BASE_EXPORT size_t WriteUnicodeCharacter(uint32 code_point,
64 |                                          std::string* output);
65 | 
66 | // Appends the given code point as a UTF-16 character to the given 16-bit
67 | // string.  Returns the number of 16-bit values written.
68 | BASE_EXPORT size_t WriteUnicodeCharacter(uint32 code_point, string16* output);
69 | 
70 | #if defined(WCHAR_T_IS_UTF32)
71 | // Appends the given UTF-32 character to the given 32-bit string.  Returns the
72 | // number of 32-bit values written.
73 | inline size_t WriteUnicodeCharacter(uint32 code_point, std::wstring* output) {
74 |   // This is the easy case, just append the character.
75 |   output->push_back(code_point);
76 |   return 1;
77 | }
78 | #endif  // defined(WCHAR_T_IS_UTF32)
79 | 
80 | // Generalized Unicode converter -----------------------------------------------
81 | 
82 | // Guesses the length of the output in UTF-8 in bytes, clears that output
83 | // string, and reserves that amount of space.  We assume that the input
84 | // character types are unsigned, which will be true for UTF-16 and -32 on our
85 | // systems.
86 | template<typename CHAR>
87 | void PrepareForUTF8Output(const CHAR* src, size_t src_len, std::string* output);
88 | 
89 | // Prepares an output buffer (containing either UTF-16 or -32 data) given some
90 | // UTF-8 input that will be converted to it.  See PrepareForUTF8Output().
91 | template<typename STRING>
92 | void PrepareForUTF16Or32Output(const char* src, size_t src_len, STRING* output);
93 | 
94 | }  // namespace base
95 | 
96 | #endif  // BASE_STRINGS_UTF_STRING_CONVERSION_UTILS_H_
97 | 


--------------------------------------------------------------------------------
/vendor/gurl/base/strings/utf_string_conversions.cc:
--------------------------------------------------------------------------------
  1 | // Copyright (c) 2010 The Chromium Authors. All rights reserved.
  2 | // Use of this source code is governed by a BSD-style license that can be
  3 | // found in the LICENSE file.
  4 | 
  5 | #include "base/strings/utf_string_conversions.h"
  6 | 
  7 | #include "base/strings/string_piece.h"
  8 | #include "base/strings/string_util.h"
  9 | #include "base/strings/utf_string_conversion_utils.h"
 10 | 
 11 | namespace base {
 12 | 
 13 | namespace {
 14 | 
 15 | // Generalized Unicode converter -----------------------------------------------
 16 | 
 17 | // Converts the given source Unicode character type to the given destination
 18 | // Unicode character type as a STL string. The given input buffer and size
 19 | // determine the source, and the given output STL string will be replaced by
 20 | // the result.
 21 | template<typename SRC_CHAR, typename DEST_STRING>
 22 | bool ConvertUnicode(const SRC_CHAR* src,
 23 |                     size_t src_len,
 24 |                     DEST_STRING* output) {
 25 |   // ICU requires 32-bit numbers.
 26 |   bool success = true;
 27 |   int32 src_len32 = static_cast<int32>(src_len);
 28 |   for (int32 i = 0; i < src_len32; i++) {
 29 |     uint32 code_point;
 30 |     if (ReadUnicodeCharacter(src, src_len32, &i, &code_point)) {
 31 |       WriteUnicodeCharacter(code_point, output);
 32 |     } else {
 33 |       WriteUnicodeCharacter(0xFFFD, output);
 34 |       success = false;
 35 |     }
 36 |   }
 37 | 
 38 |   return success;
 39 | }
 40 | 
 41 | }  // namespace
 42 | 
 43 | // UTF-8 <-> Wide --------------------------------------------------------------
 44 | 
 45 | bool WideToUTF8(const wchar_t* src, size_t src_len, std::string* output) {
 46 |   if (IsStringASCII(std::wstring(src, src_len))) {
 47 |     output->assign(src, src + src_len);
 48 |     return true;
 49 |   } else {
 50 |     PrepareForUTF8Output(src, src_len, output);
 51 |     return ConvertUnicode(src, src_len, output);
 52 |   }
 53 | }
 54 | 
 55 | std::string WideToUTF8(const std::wstring& wide) {
 56 |   if (IsStringASCII(wide)) {
 57 |     return std::string(wide.data(), wide.data() + wide.length());
 58 |   }
 59 | 
 60 |   std::string ret;
 61 |   PrepareForUTF8Output(wide.data(), wide.length(), &ret);
 62 |   ConvertUnicode(wide.data(), wide.length(), &ret);
 63 |   return ret;
 64 | }
 65 | 
 66 | bool UTF8ToWide(const char* src, size_t src_len, std::wstring* output) {
 67 |   if (IsStringASCII(StringPiece(src, src_len))) {
 68 |     output->assign(src, src + src_len);
 69 |     return true;
 70 |   } else {
 71 |     PrepareForUTF16Or32Output(src, src_len, output);
 72 |     return ConvertUnicode(src, src_len, output);
 73 |   }
 74 | }
 75 | 
 76 | std::wstring UTF8ToWide(StringPiece utf8) {
 77 |   if (IsStringASCII(utf8)) {
 78 |     return std::wstring(utf8.begin(), utf8.end());
 79 |   }
 80 | 
 81 |   std::wstring ret;
 82 |   PrepareForUTF16Or32Output(utf8.data(), utf8.length(), &ret);
 83 |   ConvertUnicode(utf8.data(), utf8.length(), &ret);
 84 |   return ret;
 85 | }
 86 | 
 87 | // UTF-16 <-> Wide -------------------------------------------------------------
 88 | 
 89 | #if defined(WCHAR_T_IS_UTF16)
 90 | 
 91 | // When wide == UTF-16, then conversions are a NOP.
 92 | bool WideToUTF16(const wchar_t* src, size_t src_len, string16* output) {
 93 |   output->assign(src, src_len);
 94 |   return true;
 95 | }
 96 | 
 97 | string16 WideToUTF16(const std::wstring& wide) {
 98 |   return wide;
 99 | }
100 | 
101 | bool UTF16ToWide(const char16* src, size_t src_len, std::wstring* output) {
102 |   output->assign(src, src_len);
103 |   return true;
104 | }
105 | 
106 | std::wstring UTF16ToWide(const string16& utf16) {
107 |   return utf16;
108 | }
109 | 
110 | #elif defined(WCHAR_T_IS_UTF32)
111 | 
112 | bool WideToUTF16(const wchar_t* src, size_t src_len, string16* output) {
113 |   output->clear();
114 |   // Assume that normally we won't have any non-BMP characters so the counts
115 |   // will be the same.
116 |   output->reserve(src_len);
117 |   return ConvertUnicode(src, src_len, output);
118 | }
119 | 
120 | string16 WideToUTF16(const std::wstring& wide) {
121 |   string16 ret;
122 |   WideToUTF16(wide.data(), wide.length(), &ret);
123 |   return ret;
124 | }
125 | 
126 | bool UTF16ToWide(const char16* src, size_t src_len, std::wstring* output) {
127 |   output->clear();
128 |   // Assume that normally we won't have any non-BMP characters so the counts
129 |   // will be the same.
130 |   output->reserve(src_len);
131 |   return ConvertUnicode(src, src_len, output);
132 | }
133 | 
134 | std::wstring UTF16ToWide(const string16& utf16) {
135 |   std::wstring ret;
136 |   UTF16ToWide(utf16.data(), utf16.length(), &ret);
137 |   return ret;
138 | }
139 | 
140 | #endif  // defined(WCHAR_T_IS_UTF32)
141 | 
142 | // UTF16 <-> UTF8 --------------------------------------------------------------
143 | 
144 | #if defined(WCHAR_T_IS_UTF32)
145 | 
146 | bool UTF8ToUTF16(const char* src, size_t src_len, string16* output) {
147 |   if (IsStringASCII(StringPiece(src, src_len))) {
148 |     output->assign(src, src + src_len);
149 |     return true;
150 |   } else {
151 |     PrepareForUTF16Or32Output(src, src_len, output);
152 |     return ConvertUnicode(src, src_len, output);
153 |   }
154 | }
155 | 
156 | string16 UTF8ToUTF16(StringPiece utf8) {
157 |   if (IsStringASCII(utf8)) {
158 |     return string16(utf8.begin(), utf8.end());
159 |   }
160 | 
161 |   string16 ret;
162 |   PrepareForUTF16Or32Output(utf8.data(), utf8.length(), &ret);
163 |   // Ignore the success flag of this call, it will do the best it can for
164 |   // invalid input, which is what we want here.
165 |   ConvertUnicode(utf8.data(), utf8.length(), &ret);
166 |   return ret;
167 | }
168 | 
169 | bool UTF16ToUTF8(const char16* src, size_t src_len, std::string* output) {
170 |   if (IsStringASCII(StringPiece16(src, src_len))) {
171 |     output->assign(src, src + src_len);
172 |     return true;
173 |   } else {
174 |     PrepareForUTF8Output(src, src_len, output);
175 |     return ConvertUnicode(src, src_len, output);
176 |   }
177 | }
178 | 
179 | std::string UTF16ToUTF8(StringPiece16 utf16) {
180 |   if (IsStringASCII(utf16)) {
181 |     return std::string(utf16.begin(), utf16.end());
182 |   }
183 | 
184 |   std::string ret;
185 |   // Ignore the success flag of this call, it will do the best it can for
186 |   // invalid input, which is what we want here.
187 |   UTF16ToUTF8(utf16.data(), utf16.length(), &ret);
188 |   return ret;
189 | }
190 | 
191 | #elif defined(WCHAR_T_IS_UTF16)
192 | // Easy case since we can use the "wide" versions we already wrote above.
193 | 
194 | bool UTF8ToUTF16(const char* src, size_t src_len, string16* output) {
195 |   return UTF8ToWide(src, src_len, output);
196 | }
197 | 
198 | string16 UTF8ToUTF16(StringPiece utf8) {
199 |   return UTF8ToWide(utf8);
200 | }
201 | 
202 | bool UTF16ToUTF8(const char16* src, size_t src_len, std::string* output) {
203 |   return WideToUTF8(src, src_len, output);
204 | }
205 | 
206 | std::string UTF16ToUTF8(StringPiece16 utf16) {
207 |   if (IsStringASCII(utf16))
208 |     return std::string(utf16.data(), utf16.data() + utf16.length());
209 | 
210 |   std::string ret;
211 |   PrepareForUTF8Output(utf16.data(), utf16.length(), &ret);
212 |   ConvertUnicode(utf16.data(), utf16.length(), &ret);
213 |   return ret;
214 | }
215 | 
216 | #endif
217 | 
218 | string16 ASCIIToUTF16(StringPiece ascii) {
219 |   //DCHECK(IsStringASCII(ascii)) << ascii;
220 |   return string16(ascii.begin(), ascii.end());
221 | }
222 | 
223 | std::string UTF16ToASCII(StringPiece16 utf16) {
224 |   //DCHECK(IsStringASCII(utf16)) << UTF16ToUTF8(utf16);
225 |   return std::string(utf16.begin(), utf16.end());
226 | }
227 | 
228 | }  // namespace base
229 | 


--------------------------------------------------------------------------------
/vendor/gurl/base/strings/utf_string_conversions.h:
--------------------------------------------------------------------------------
 1 | // Copyright (c) 2011 The Chromium Authors. All rights reserved.
 2 | // Use of this source code is governed by a BSD-style license that can be
 3 | // found in the LICENSE file.
 4 | 
 5 | #ifndef BASE_STRINGS_UTF_STRING_CONVERSIONS_H_
 6 | #define BASE_STRINGS_UTF_STRING_CONVERSIONS_H_
 7 | 
 8 | #include <string>
 9 | 
10 | #include "base/base_export.h"
11 | #include "base/strings/string16.h"
12 | #include "base/strings/string_piece.h"
13 | 
14 | namespace base {
15 | 
16 | // These convert between UTF-8, -16, and -32 strings. They are potentially slow,
17 | // so avoid unnecessary conversions. The low-level versions return a boolean
18 | // indicating whether the conversion was 100% valid. In this case, it will still
19 | // do the best it can and put the result in the output buffer. The versions that
20 | // return strings ignore this error and just return the best conversion
21 | // possible.
22 | BASE_EXPORT bool WideToUTF8(const wchar_t* src, size_t src_len,
23 |                             std::string* output);
24 | BASE_EXPORT std::string WideToUTF8(const std::wstring& wide);
25 | BASE_EXPORT bool UTF8ToWide(const char* src, size_t src_len,
26 |                             std::wstring* output);
27 | BASE_EXPORT std::wstring UTF8ToWide(StringPiece utf8);
28 | 
29 | BASE_EXPORT bool WideToUTF16(const wchar_t* src, size_t src_len,
30 |                              string16* output);
31 | BASE_EXPORT string16 WideToUTF16(const std::wstring& wide);
32 | BASE_EXPORT bool UTF16ToWide(const char16* src, size_t src_len,
33 |                              std::wstring* output);
34 | BASE_EXPORT std::wstring UTF16ToWide(const string16& utf16);
35 | 
36 | BASE_EXPORT bool UTF8ToUTF16(const char* src, size_t src_len, string16* output);
37 | BASE_EXPORT string16 UTF8ToUTF16(StringPiece utf8);
38 | BASE_EXPORT bool UTF16ToUTF8(const char16* src, size_t src_len,
39 |                              std::string* output);
40 | BASE_EXPORT std::string UTF16ToUTF8(StringPiece16 utf16);
41 | 
42 | // This converts an ASCII string, typically a hardcoded constant, to a UTF16
43 | // string.
44 | BASE_EXPORT string16 ASCIIToUTF16(StringPiece ascii);
45 | 
46 | // Converts to 7-bit ASCII by truncating. The result must be known to be ASCII
47 | // beforehand.
48 | BASE_EXPORT std::string UTF16ToASCII(StringPiece16 utf16);
49 | 
50 | }  // namespace base
51 | 
52 | #endif  // BASE_STRINGS_UTF_STRING_CONVERSIONS_H_
53 | 


--------------------------------------------------------------------------------
/vendor/gurl/base/template_util.h:
--------------------------------------------------------------------------------
  1 | // Copyright (c) 2011 The Chromium Authors. All rights reserved.
  2 | // Use of this source code is governed by a BSD-style license that can be
  3 | // found in the LICENSE file.
  4 | 
  5 | #ifndef BASE_TEMPLATE_UTIL_H_
  6 | #define BASE_TEMPLATE_UTIL_H_
  7 | 
  8 | #include <cstddef>  // For size_t.
  9 | 
 10 | #include "build/build_config.h"
 11 | 
 12 | namespace base {
 13 | 
 14 | // template definitions from tr1
 15 | 
 16 | template<class T, T v>
 17 | struct integral_constant {
 18 |   static const T value = v;
 19 |   typedef T value_type;
 20 |   typedef integral_constant<T, v> type;
 21 | };
 22 | 
 23 | template <class T, T v> const T integral_constant<T, v>::value;
 24 | 
 25 | typedef integral_constant<bool, true> true_type;
 26 | typedef integral_constant<bool, false> false_type;
 27 | 
 28 | template <class T> struct is_pointer : false_type {};
 29 | template <class T> struct is_pointer<T*> : true_type {};
 30 | 
 31 | // Member function pointer detection. This is built-in to C++ 11's stdlib, and
 32 | // we can remove this when we switch to it.
 33 | template<typename T>
 34 | struct is_member_function_pointer : false_type {};
 35 | 
 36 | template <typename R, typename Z, typename... A>
 37 | struct is_member_function_pointer<R(Z::*)(A...)> : true_type {};
 38 | template <typename R, typename Z, typename... A>
 39 | struct is_member_function_pointer<R(Z::*)(A...) const> : true_type {};
 40 | 
 41 | 
 42 | template <class T, class U> struct is_same : public false_type {};
 43 | template <class T> struct is_same<T,T> : true_type {};
 44 | 
 45 | template<class> struct is_array : public false_type {};
 46 | template<class T, size_t n> struct is_array<T[n]> : public true_type {};
 47 | template<class T> struct is_array<T[]> : public true_type {};
 48 | 
 49 | template <class T> struct is_non_const_reference : false_type {};
 50 | template <class T> struct is_non_const_reference<T&> : true_type {};
 51 | template <class T> struct is_non_const_reference<const T&> : false_type {};
 52 | 
 53 | template <class T> struct is_const : false_type {};
 54 | template <class T> struct is_const<const T> : true_type {};
 55 | 
 56 | template <class T> struct is_void : false_type {};
 57 | template <> struct is_void<void> : true_type {};
 58 | 
 59 | namespace internal {
 60 | 
 61 | // Types YesType and NoType are guaranteed such that sizeof(YesType) <
 62 | // sizeof(NoType).
 63 | typedef char YesType;
 64 | 
 65 | struct NoType {
 66 |   YesType dummy[2];
 67 | };
 68 | 
 69 | // This class is an implementation detail for is_convertible, and you
 70 | // don't need to know how it works to use is_convertible. For those
 71 | // who care: we declare two different functions, one whose argument is
 72 | // of type To and one with a variadic argument list. We give them
 73 | // return types of different size, so we can use sizeof to trick the
 74 | // compiler into telling us which function it would have chosen if we
 75 | // had called it with an argument of type From.  See Alexandrescu's
 76 | // _Modern C++ Design_ for more details on this sort of trick.
 77 | 
 78 | struct ConvertHelper {
 79 |   template <typename To>
 80 |   static YesType Test(To);
 81 | 
 82 |   template <typename To>
 83 |   static NoType Test(...);
 84 | 
 85 |   template <typename From>
 86 |   static From& Create();
 87 | };
 88 | 
 89 | // Used to determine if a type is a struct/union/class. Inspired by Boost's
 90 | // is_class type_trait implementation.
 91 | struct IsClassHelper {
 92 |   template <typename C>
 93 |   static YesType Test(void(C::*)(void));
 94 | 
 95 |   template <typename C>
 96 |   static NoType Test(...);
 97 | };
 98 | 
 99 | }  // namespace internal
100 | 
101 | // Inherits from true_type if From is convertible to To, false_type otherwise.
102 | //
103 | // Note that if the type is convertible, this will be a true_type REGARDLESS
104 | // of whether or not the conversion would emit a warning.
105 | template <typename From, typename To>
106 | struct is_convertible
107 |     : integral_constant<bool,
108 |                         sizeof(internal::ConvertHelper::Test<To>(
109 |                                    internal::ConvertHelper::Create<From>())) ==
110 |                         sizeof(internal::YesType)> {
111 | };
112 | 
113 | template <typename T>
114 | struct is_class
115 |     : integral_constant<bool,
116 |                         sizeof(internal::IsClassHelper::Test<T>(0)) ==
117 |                             sizeof(internal::YesType)> {
118 | };
119 | 
120 | template<bool B, class T = void>
121 | struct enable_if {};
122 | 
123 | template<class T>
124 | struct enable_if<true, T> { typedef T type; };
125 | 
126 | }  // namespace base
127 | 
128 | #endif  // BASE_TEMPLATE_UTIL_H_
129 | 


--------------------------------------------------------------------------------
/vendor/gurl/base/third_party/icu/LICENSE:
--------------------------------------------------------------------------------
 1 | ICU License - ICU 1.8.1 and later
 2 | 
 3 | COPYRIGHT AND PERMISSION NOTICE
 4 | 
 5 | Copyright (c) 1995-2009 International Business Machines Corporation and others
 6 | 
 7 | All rights reserved.
 8 | 
 9 | Permission is hereby granted, free of charge, to any person obtaining
10 | a copy of this software and associated documentation files (the
11 | "Software"), to deal in the Software without restriction, including
12 | without limitation the rights to use, copy, modify, merge, publish,
13 | distribute, and/or sell copies of the Software, and to permit persons
14 | to whom the Software is furnished to do so, provided that the above
15 | copyright notice(s) and this permission notice appear in all copies of
16 | the Software and that both the above copyright notice(s) and this
17 | permission notice appear in supporting documentation.
18 | 
19 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
20 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
21 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT
22 | OF THIRD PARTY RIGHTS. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR
23 | HOLDERS INCLUDED IN THIS NOTICE BE LIABLE FOR ANY CLAIM, OR ANY
24 | SPECIAL INDIRECT OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER
25 | RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF
26 | CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
27 | CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
28 | 
29 | Except as contained in this notice, the name of a copyright holder
30 | shall not be used in advertising or otherwise to promote the sale, use
31 | or other dealings in this Software without prior written authorization
32 | of the copyright holder.
33 | 


--------------------------------------------------------------------------------
/vendor/gurl/base/third_party/icu/README.chromium:
--------------------------------------------------------------------------------
 1 | Name: ICU
 2 | URL: http://site.icu-project.org/
 3 | License: MIT
 4 | License File: NOT_SHIPPED
 5 | 
 6 | This file has the relevant components from ICU copied to handle basic
 7 | UTF8/16/32 conversions. Components are copied from utf.h utf8.h utf16.h and
 8 | utf_impl.c
 9 | 
10 | The same module appears in third_party/icu, so we don't repeat the license
11 | file here.
12 | 
13 | The main change is that U_/U8_/U16_ prefixes have been replaced with
14 | CBU_/CBU8_/CBU16_ (for "Chrome Base") to avoid confusion with the "real" ICU
15 | macros should ICU be in use on the system. For the same reason, the functions
16 | and types have been put in the "base_icu" namespace.
17 | 


--------------------------------------------------------------------------------
/vendor/gurl/base/third_party/icu/icu_utf.cc:
--------------------------------------------------------------------------------
  1 | /*
  2 | ******************************************************************************
  3 | *
  4 | *   Copyright (C) 1999-2006, International Business Machines
  5 | *   Corporation and others.  All Rights Reserved.
  6 | *
  7 | ******************************************************************************
  8 | *   file name:  utf_impl.c
  9 | *   encoding:   US-ASCII
 10 | *   tab size:   8 (not used)
 11 | *   indentation:4
 12 | *
 13 | *   created on: 1999sep13
 14 | *   created by: Markus W. Scherer
 15 | *
 16 | *   This file provides implementation functions for macros in the utfXX.h
 17 | *   that would otherwise be too long as macros.
 18 | */
 19 | 
 20 | #include "base/third_party/icu/icu_utf.h"
 21 | 
 22 | namespace base_icu {
 23 | 
 24 | /**
 25 |  * UTF8_ERROR_VALUE_1 and UTF8_ERROR_VALUE_2 are special error values for UTF-8,
 26 |  * which need 1 or 2 bytes in UTF-8:
 27 |  * \code
 28 |  * U+0015 = NAK = Negative Acknowledge, C0 control character
 29 |  * U+009f = highest C1 control character
 30 |  * \endcode
 31 |  *
 32 |  * These are used by UTF8_..._SAFE macros so that they can return an error value
 33 |  * that needs the same number of code units (bytes) as were seen by
 34 |  * a macro. They should be tested with UTF_IS_ERROR() or UTF_IS_VALID().
 35 |  *
 36 |  * @deprecated ICU 2.4. Obsolete, see utf_old.h.
 37 |  */
 38 | #define CBUTF8_ERROR_VALUE_1 0x15
 39 | 
 40 | /**
 41 |  * See documentation on UTF8_ERROR_VALUE_1 for details.
 42 |  *
 43 |  * @deprecated ICU 2.4. Obsolete, see utf_old.h.
 44 |  */
 45 | #define CBUTF8_ERROR_VALUE_2 0x9f
 46 | 
 47 | 
 48 | /**
 49 |  * Error value for all UTFs. This code point value will be set by macros with e>
 50 |  * checking if an error is detected.
 51 |  *
 52 |  * @deprecated ICU 2.4. Obsolete, see utf_old.h.
 53 |  */
 54 | #define CBUTF_ERROR_VALUE 0xffff
 55 | 
 56 | /*
 57 |  * This table could be replaced on many machines by
 58 |  * a few lines of assembler code using an
 59 |  * "index of first 0-bit from msb" instruction and
 60 |  * one or two more integer instructions.
 61 |  *
 62 |  * For example, on an i386, do something like
 63 |  * - MOV AL, leadByte
 64 |  * - NOT AL         (8-bit, leave b15..b8==0..0, reverse only b7..b0)
 65 |  * - MOV AH, 0
 66 |  * - BSR BX, AX     (16-bit)
 67 |  * - MOV AX, 6      (result)
 68 |  * - JZ finish      (ZF==1 if leadByte==0xff)
 69 |  * - SUB AX, BX (result)
 70 |  * -finish:
 71 |  * (BSR: Bit Scan Reverse, scans for a 1-bit, starting from the MSB)
 72 |  *
 73 |  * In Unicode, all UTF-8 byte sequences with more than 4 bytes are illegal;
 74 |  * lead bytes above 0xf4 are illegal.
 75 |  * We keep them in this table for skipping long ISO 10646-UTF-8 sequences.
 76 |  */
 77 | const uint8
 78 | utf8_countTrailBytes[256]={
 79 |     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 80 |     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 81 |     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 82 |     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 83 | 
 84 |     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 85 |     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 86 |     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 87 |     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 88 | 
 89 |     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 90 |     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 91 |     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 92 |     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 93 | 
 94 |     1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
 95 |     1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
 96 | 
 97 |     2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
 98 |     3, 3, 3, 3, 3,
 99 |     3, 3, 3,    /* illegal in Unicode */
100 |     4, 4, 4, 4, /* illegal in Unicode */
101 |     5, 5,       /* illegal in Unicode */
102 |     0, 0        /* illegal bytes 0xfe and 0xff */
103 | };
104 | 
105 | static const UChar32
106 | utf8_minLegal[4]={ 0, 0x80, 0x800, 0x10000 };
107 | 
108 | static const UChar32
109 | utf8_errorValue[6]={
110 |     CBUTF8_ERROR_VALUE_1, CBUTF8_ERROR_VALUE_2, CBUTF_ERROR_VALUE, 0x10ffff,
111 |     0x3ffffff, 0x7fffffff
112 | };
113 | 
114 | /*
115 |  * Handle the non-inline part of the U8_NEXT() macro and its obsolete sibling
116 |  * UTF8_NEXT_CHAR_SAFE().
117 |  *
118 |  * The "strict" parameter controls the error behavior:
119 |  * <0  "Safe" behavior of U8_NEXT(): All illegal byte sequences yield a negative
120 |  *     code point result.
121 |  *  0  Obsolete "safe" behavior of UTF8_NEXT_CHAR_SAFE(..., FALSE):
122 |  *     All illegal byte sequences yield a positive code point such that this
123 |  *     result code point would be encoded with the same number of bytes as
124 |  *     the illegal sequence.
125 |  * >0  Obsolete "strict" behavior of UTF8_NEXT_CHAR_SAFE(..., TRUE):
126 |  *     Same as the obsolete "safe" behavior, but non-characters are also treated
127 |  *     like illegal sequences.
128 |  *
129 |  * The special negative (<0) value -2 is used for lenient treatment of surrogate
130 |  * code points as legal. Some implementations use this for roundtripping of
131 |  * Unicode 16-bit strings that are not well-formed UTF-16, that is, they
132 |  * contain unpaired surrogates.
133 |  *
134 |  * Note that a UBool is the same as an int8_t.
135 |  */
136 | UChar32
137 | utf8_nextCharSafeBody(const uint8 *s, int32 *pi, int32 length, UChar32 c, UBool strict) {
138 |     int32 i=*pi;
139 |     uint8 count=CBU8_COUNT_TRAIL_BYTES(c);
140 |     if((i)+count<=(length)) {
141 |         uint8 trail, illegal=0;
142 | 
143 |         CBU8_MASK_LEAD_BYTE((c), count);
144 |         /* count==0 for illegally leading trail bytes and the illegal bytes 0xfe and 0xff */
145 |         switch(count) {
146 |         /* each branch falls through to the next one */
147 |         case 5:
148 |         case 4:
149 |             /* count>=4 is always illegal: no more than 3 trail bytes in Unicode's UTF-8 */
150 |             illegal=1;
151 |             break;
152 |         case 3:
153 |             trail=s[(i)++];
154 |             (c)=((c)<<6)|(trail&0x3f);
155 |             if(c<0x110) {
156 |                 illegal|=(trail&0xc0)^0x80;
157 |             } else {
158 |                 /* code point>0x10ffff, outside Unicode */
159 |                 illegal=1;
160 |                 break;
161 |             }
162 |         case 2:
163 |             trail=s[(i)++];
164 |             (c)=((c)<<6)|(trail&0x3f);
165 |             illegal|=(trail&0xc0)^0x80;
166 |         case 1:
167 |             trail=s[(i)++];
168 |             (c)=((c)<<6)|(trail&0x3f);
169 |             illegal|=(trail&0xc0)^0x80;
170 |             break;
171 |         case 0:
172 |             if(strict>=0) {
173 |                 return CBUTF8_ERROR_VALUE_1;
174 |             } else {
175 |                 return CBU_SENTINEL;
176 |             }
177 |         /* no default branch to optimize switch()  - all values are covered */
178 |         }
179 | 
180 |         /*
181 |          * All the error handling should return a value
182 |          * that needs count bytes so that UTF8_GET_CHAR_SAFE() works right.
183 |          *
184 |          * Starting with Unicode 3.0.1, non-shortest forms are illegal.
185 |          * Starting with Unicode 3.2, surrogate code points must not be
186 |          * encoded in UTF-8, and there are no irregular sequences any more.
187 |          *
188 |          * U8_ macros (new in ICU 2.4) return negative values for error conditions.
189 |          */
190 | 
191 |         /* correct sequence - all trail bytes have (b7..b6)==(10)? */
192 |         /* illegal is also set if count>=4 */
193 |         if(illegal || (c)<utf8_minLegal[count] || (CBU_IS_SURROGATE(c) && strict!=-2)) {
194 |             /* error handling */
195 |             uint8 errorCount=count;
196 |             /* don't go beyond this sequence */
197 |             i=*pi;
198 |             while(count>0 && CBU8_IS_TRAIL(s[i])) {
199 |                 ++(i);
200 |                 --count;
201 |             }
202 |             if(strict>=0) {
203 |                 c=utf8_errorValue[errorCount-count];
204 |             } else {
205 |                 c=CBU_SENTINEL;
206 |             }
207 |         } else if((strict)>0 && CBU_IS_UNICODE_NONCHAR(c)) {
208 |             /* strict: forbid non-characters like U+fffe */
209 |             c=utf8_errorValue[count];
210 |         }
211 |     } else /* too few bytes left */ {
212 |         /* error handling */
213 |         int32 i0=i;
214 |         /* don't just set (i)=(length) in case there is an illegal sequence */
215 |         while((i)<(length) && CBU8_IS_TRAIL(s[i])) {
216 |             ++(i);
217 |         }
218 |         if(strict>=0) {
219 |             c=utf8_errorValue[i-i0];
220 |         } else {
221 |             c=CBU_SENTINEL;
222 |         }
223 |     }
224 |     *pi=i;
225 |     return c;
226 | }
227 | 
228 | }  // namespace base_icu
229 | 


--------------------------------------------------------------------------------
/vendor/gurl/base/third_party/icu/icu_utf.h:
--------------------------------------------------------------------------------
  1 | /*
  2 | *******************************************************************************
  3 | *
  4 | *   Copyright (C) 1999-2004, International Business Machines
  5 | *   Corporation and others.  All Rights Reserved.
  6 | *
  7 | *******************************************************************************
  8 | *   file name:  utf.h
  9 | *   encoding:   US-ASCII
 10 | *   tab size:   8 (not used)
 11 | *   indentation:4
 12 | *
 13 | *   created on: 1999sep09
 14 | *   created by: Markus W. Scherer
 15 | */
 16 | 
 17 | #ifndef BASE_THIRD_PARTY_ICU_ICU_UTF_H_
 18 | #define BASE_THIRD_PARTY_ICU_ICU_UTF_H_
 19 | 
 20 | #include "base/basictypes.h"
 21 | 
 22 | namespace base_icu {
 23 | 
 24 | typedef int32 UChar32;
 25 | typedef uint16 UChar;
 26 | typedef int8 UBool;
 27 | 
 28 | // General ---------------------------------------------------------------------
 29 | // from utf.h
 30 | 
 31 | /**
 32 |  * This value is intended for sentinel values for APIs that
 33 |  * (take or) return single code points (UChar32).
 34 |  * It is outside of the Unicode code point range 0..0x10ffff.
 35 |  *
 36 |  * For example, a "done" or "error" value in a new API
 37 |  * could be indicated with CBU_SENTINEL.
 38 |  *
 39 |  * ICU APIs designed before ICU 2.4 usually define service-specific "done"
 40 |  * values, mostly 0xffff.
 41 |  * Those may need to be distinguished from
 42 |  * actual U+ffff text contents by calling functions like
 43 |  * CharacterIterator::hasNext() or UnicodeString::length().
 44 |  *
 45 |  * @return -1
 46 |  * @see UChar32
 47 |  * @stable ICU 2.4
 48 |  */
 49 | #define CBU_SENTINEL (-1)
 50 | 
 51 | /**
 52 |  * Is this code point a Unicode noncharacter?
 53 |  * @param c 32-bit code point
 54 |  * @return TRUE or FALSE
 55 |  * @stable ICU 2.4
 56 |  */
 57 | #define CBU_IS_UNICODE_NONCHAR(c) \
 58 |     ((c)>=0xfdd0 && \
 59 |      ((uint32)(c)<=0xfdef || ((c)&0xfffe)==0xfffe) && \
 60 |      (uint32)(c)<=0x10ffff)
 61 | 
 62 | /**
 63 |  * Is c a Unicode code point value (0..U+10ffff)
 64 |  * that can be assigned a character?
 65 |  *
 66 |  * Code points that are not characters include:
 67 |  * - single surrogate code points (U+d800..U+dfff, 2048 code points)
 68 |  * - the last two code points on each plane (U+__fffe and U+__ffff, 34 code points)
 69 |  * - U+fdd0..U+fdef (new with Unicode 3.1, 32 code points)
 70 |  * - the highest Unicode code point value is U+10ffff
 71 |  *
 72 |  * This means that all code points below U+d800 are character code points,
 73 |  * and that boundary is tested first for performance.
 74 |  *
 75 |  * @param c 32-bit code point
 76 |  * @return TRUE or FALSE
 77 |  * @stable ICU 2.4
 78 |  */
 79 | #define CBU_IS_UNICODE_CHAR(c) \
 80 |     ((uint32)(c)<0xd800 || \
 81 |         ((uint32)(c)>0xdfff && \
 82 |          (uint32)(c)<=0x10ffff && \
 83 |          !CBU_IS_UNICODE_NONCHAR(c)))
 84 | 
 85 | /**
 86 |  * Is this code point a surrogate (U+d800..U+dfff)?
 87 |  * @param c 32-bit code point
 88 |  * @return TRUE or FALSE
 89 |  * @stable ICU 2.4
 90 |  */
 91 | #define CBU_IS_SURROGATE(c) (((c)&0xfffff800)==0xd800)
 92 | 
 93 | /**
 94 |  * Assuming c is a surrogate code point (U_IS_SURROGATE(c)),
 95 |  * is it a lead surrogate?
 96 |  * @param c 32-bit code point
 97 |  * @return TRUE or FALSE
 98 |  * @stable ICU 2.4
 99 |  */
100 | #define CBU_IS_SURROGATE_LEAD(c) (((c)&0x400)==0)
101 | 
102 | 
103 | // UTF-8 macros ----------------------------------------------------------------
104 | // from utf8.h
105 | 
106 | extern const uint8 utf8_countTrailBytes[256];
107 | 
108 | /**
109 |  * Count the trail bytes for a UTF-8 lead byte.
110 |  * @internal
111 |  */
112 | #define CBU8_COUNT_TRAIL_BYTES(leadByte) (base_icu::utf8_countTrailBytes[(uint8)leadByte])
113 | 
114 | /**
115 |  * Mask a UTF-8 lead byte, leave only the lower bits that form part of the code point value.
116 |  * @internal
117 |  */
118 | #define CBU8_MASK_LEAD_BYTE(leadByte, countTrailBytes) ((leadByte)&=(1<<(6-(countTrailBytes)))-1)
119 | 
120 | /**
121 |  * Does this code unit (byte) encode a code point by itself (US-ASCII 0..0x7f)?
122 |  * @param c 8-bit code unit (byte)
123 |  * @return TRUE or FALSE
124 |  * @stable ICU 2.4
125 |  */
126 | #define CBU8_IS_SINGLE(c) (((c)&0x80)==0)
127 | 
128 | /**
129 |  * Is this code unit (byte) a UTF-8 lead byte?
130 |  * @param c 8-bit code unit (byte)
131 |  * @return TRUE or FALSE
132 |  * @stable ICU 2.4
133 |  */
134 | #define CBU8_IS_LEAD(c) ((uint8)((c)-0xc0)<0x3e)
135 | 
136 | /**
137 |  * Is this code unit (byte) a UTF-8 trail byte?
138 |  * @param c 8-bit code unit (byte)
139 |  * @return TRUE or FALSE
140 |  * @stable ICU 2.4
141 |  */
142 | #define CBU8_IS_TRAIL(c) (((c)&0xc0)==0x80)
143 | 
144 | /**
145 |  * How many code units (bytes) are used for the UTF-8 encoding
146 |  * of this Unicode code point?
147 |  * @param c 32-bit code point
148 |  * @return 1..4, or 0 if c is a surrogate or not a Unicode code point
149 |  * @stable ICU 2.4
150 |  */
151 | #define CBU8_LENGTH(c) \
152 |     ((uint32)(c)<=0x7f ? 1 : \
153 |         ((uint32)(c)<=0x7ff ? 2 : \
154 |             ((uint32)(c)<=0xd7ff ? 3 : \
155 |                 ((uint32)(c)<=0xdfff || (uint32)(c)>0x10ffff ? 0 : \
156 |                     ((uint32)(c)<=0xffff ? 3 : 4)\
157 |                 ) \
158 |             ) \
159 |         ) \
160 |     )
161 | 
162 | /**
163 |  * The maximum number of UTF-8 code units (bytes) per Unicode code point (U+0000..U+10ffff).
164 |  * @return 4
165 |  * @stable ICU 2.4
166 |  */
167 | #define CBU8_MAX_LENGTH 4
168 | 
169 | /**
170 |  * Function for handling "next code point" with error-checking.
171 |  * @internal
172 |  */
173 | UChar32 utf8_nextCharSafeBody(const uint8 *s, int32 *pi, int32 length, UChar32 c, UBool strict);
174 | 
175 | /**
176 |  * Get a code point from a string at a code point boundary offset,
177 |  * and advance the offset to the next code point boundary.
178 |  * (Post-incrementing forward iteration.)
179 |  * "Safe" macro, checks for illegal sequences and for string boundaries.
180 |  *
181 |  * The offset may point to the lead byte of a multi-byte sequence,
182 |  * in which case the macro will read the whole sequence.
183 |  * If the offset points to a trail byte or an illegal UTF-8 sequence, then
184 |  * c is set to a negative value.
185 |  *
186 |  * @param s const uint8 * string
187 |  * @param i string offset, i<length
188 |  * @param length string length
189 |  * @param c output UChar32 variable, set to <0 in case of an error
190 |  * @see CBU8_NEXT_UNSAFE
191 |  * @stable ICU 2.4
192 |  */
193 | #define CBU8_NEXT(s, i, length, c) { \
194 |     (c)=(s)[(i)++]; \
195 |     if(((uint8)(c))>=0x80) { \
196 |         if(CBU8_IS_LEAD(c)) { \
197 |             (c)=base_icu::utf8_nextCharSafeBody((const uint8 *)s, &(i), (int32)(length), c, -1); \
198 |         } else { \
199 |             (c)=CBU_SENTINEL; \
200 |         } \
201 |     } \
202 | }
203 | 
204 | /**
205 |  * Append a code point to a string, overwriting 1 to 4 bytes.
206 |  * The offset points to the current end of the string contents
207 |  * and is advanced (post-increment).
208 |  * "Unsafe" macro, assumes a valid code point and sufficient space in the string.
209 |  * Otherwise, the result is undefined.
210 |  *
211 |  * @param s const uint8 * string buffer
212 |  * @param i string offset
213 |  * @param c code point to append
214 |  * @see CBU8_APPEND
215 |  * @stable ICU 2.4
216 |  */
217 | #define CBU8_APPEND_UNSAFE(s, i, c) { \
218 |     if((uint32)(c)<=0x7f) { \
219 |         (s)[(i)++]=(uint8)(c); \
220 |     } else { \
221 |         if((uint32)(c)<=0x7ff) { \
222 |             (s)[(i)++]=(uint8)(((c)>>6)|0xc0); \
223 |         } else { \
224 |             if((uint32)(c)<=0xffff) { \
225 |                 (s)[(i)++]=(uint8)(((c)>>12)|0xe0); \
226 |             } else { \
227 |                 (s)[(i)++]=(uint8)(((c)>>18)|0xf0); \
228 |                 (s)[(i)++]=(uint8)((((c)>>12)&0x3f)|0x80); \
229 |             } \
230 |             (s)[(i)++]=(uint8)((((c)>>6)&0x3f)|0x80); \
231 |         } \
232 |         (s)[(i)++]=(uint8)(((c)&0x3f)|0x80); \
233 |     } \
234 | }
235 | 
236 | // UTF-16 macros ---------------------------------------------------------------
237 | // from utf16.h
238 | 
239 | /**
240 |  * Does this code unit alone encode a code point (BMP, not a surrogate)?
241 |  * @param c 16-bit code unit
242 |  * @return TRUE or FALSE
243 |  * @stable ICU 2.4
244 |  */
245 | #define CBU16_IS_SINGLE(c) !CBU_IS_SURROGATE(c)
246 | 
247 | /**
248 |  * Is this code unit a lead surrogate (U+d800..U+dbff)?
249 |  * @param c 16-bit code unit
250 |  * @return TRUE or FALSE
251 |  * @stable ICU 2.4
252 |  */
253 | #define CBU16_IS_LEAD(c) (((c)&0xfffffc00)==0xd800)
254 | 
255 | /**
256 |  * Is this code unit a trail surrogate (U+dc00..U+dfff)?
257 |  * @param c 16-bit code unit
258 |  * @return TRUE or FALSE
259 |  * @stable ICU 2.4
260 |  */
261 | #define CBU16_IS_TRAIL(c) (((c)&0xfffffc00)==0xdc00)
262 | 
263 | /**
264 |  * Is this code unit a surrogate (U+d800..U+dfff)?
265 |  * @param c 16-bit code unit
266 |  * @return TRUE or FALSE
267 |  * @stable ICU 2.4
268 |  */
269 | #define CBU16_IS_SURROGATE(c) CBU_IS_SURROGATE(c)
270 | 
271 | /**
272 |  * Assuming c is a surrogate code point (U16_IS_SURROGATE(c)),
273 |  * is it a lead surrogate?
274 |  * @param c 16-bit code unit
275 |  * @return TRUE or FALSE
276 |  * @stable ICU 2.4
277 |  */
278 | #define CBU16_IS_SURROGATE_LEAD(c) (((c)&0x400)==0)
279 | 
280 | /**
281 |  * Helper constant for CBU16_GET_SUPPLEMENTARY.
282 |  * @internal
283 |  */
284 | #define CBU16_SURROGATE_OFFSET ((0xd800<<10UL)+0xdc00-0x10000)
285 | 
286 | /**
287 |  * Get a supplementary code point value (U+10000..U+10ffff)
288 |  * from its lead and trail surrogates.
289 |  * The result is undefined if the input values are not
290 |  * lead and trail surrogates.
291 |  *
292 |  * @param lead lead surrogate (U+d800..U+dbff)
293 |  * @param trail trail surrogate (U+dc00..U+dfff)
294 |  * @return supplementary code point (U+10000..U+10ffff)
295 |  * @stable ICU 2.4
296 |  */
297 | #define CBU16_GET_SUPPLEMENTARY(lead, trail) \
298 |     (((base_icu::UChar32)(lead)<<10UL)+(base_icu::UChar32)(trail)-CBU16_SURROGATE_OFFSET)
299 | 
300 | 
301 | /**
302 |  * Get the lead surrogate (0xd800..0xdbff) for a
303 |  * supplementary code point (0x10000..0x10ffff).
304 |  * @param supplementary 32-bit code point (U+10000..U+10ffff)
305 |  * @return lead surrogate (U+d800..U+dbff) for supplementary
306 |  * @stable ICU 2.4
307 |  */
308 | #define CBU16_LEAD(supplementary) \
309 |     (base_icu::UChar)(((supplementary)>>10)+0xd7c0)
310 | 
311 | /**
312 |  * Get the trail surrogate (0xdc00..0xdfff) for a
313 |  * supplementary code point (0x10000..0x10ffff).
314 |  * @param supplementary 32-bit code point (U+10000..U+10ffff)
315 |  * @return trail surrogate (U+dc00..U+dfff) for supplementary
316 |  * @stable ICU 2.4
317 |  */
318 | #define CBU16_TRAIL(supplementary) \
319 |     (base_icu::UChar)(((supplementary)&0x3ff)|0xdc00)
320 | 
321 | /**
322 |  * How many 16-bit code units are used to encode this Unicode code point? (1 or 2)
323 |  * The result is not defined if c is not a Unicode code point (U+0000..U+10ffff).
324 |  * @param c 32-bit code point
325 |  * @return 1 or 2
326 |  * @stable ICU 2.4
327 |  */
328 | #define CBU16_LENGTH(c) ((uint32)(c)<=0xffff ? 1 : 2)
329 | 
330 | /**
331 |  * The maximum number of 16-bit code units per Unicode code point (U+0000..U+10ffff).
332 |  * @return 2
333 |  * @stable ICU 2.4
334 |  */
335 | #define CBU16_MAX_LENGTH 2
336 | 
337 | /**
338 |  * Get a code point from a string at a code point boundary offset,
339 |  * and advance the offset to the next code point boundary.
340 |  * (Post-incrementing forward iteration.)
341 |  * "Safe" macro, handles unpaired surrogates and checks for string boundaries.
342 |  *
343 |  * The offset may point to the lead surrogate unit
344 |  * for a supplementary code point, in which case the macro will read
345 |  * the following trail surrogate as well.
346 |  * If the offset points to a trail surrogate or
347 |  * to a single, unpaired lead surrogate, then that itself
348 |  * will be returned as the code point.
349 |  *
350 |  * @param s const UChar * string
351 |  * @param i string offset, i<length
352 |  * @param length string length
353 |  * @param c output UChar32 variable
354 |  * @stable ICU 2.4
355 |  */
356 | #define CBU16_NEXT(s, i, length, c) { \
357 |     (c)=(s)[(i)++]; \
358 |     if(CBU16_IS_LEAD(c)) { \
359 |         uint16 __c2; \
360 |         if((i)<(length) && CBU16_IS_TRAIL(__c2=(s)[(i)])) { \
361 |             ++(i); \
362 |             (c)=CBU16_GET_SUPPLEMENTARY((c), __c2); \
363 |         } \
364 |     } \
365 | }
366 | 
367 | /**
368 |  * Append a code point to a string, overwriting 1 or 2 code units.
369 |  * The offset points to the current end of the string contents
370 |  * and is advanced (post-increment).
371 |  * "Unsafe" macro, assumes a valid code point and sufficient space in the string.
372 |  * Otherwise, the result is undefined.
373 |  *
374 |  * @param s const UChar * string buffer
375 |  * @param i string offset
376 |  * @param c code point to append
377 |  * @see CBU16_APPEND
378 |  * @stable ICU 2.4
379 |  */
380 | #define CBU16_APPEND_UNSAFE(s, i, c) { \
381 |     if((uint32)(c)<=0xffff) { \
382 |         (s)[(i)++]=(uint16)(c); \
383 |     } else { \
384 |         (s)[(i)++]=(uint16)(((c)>>10)+0xd7c0); \
385 |         (s)[(i)++]=(uint16)(((c)&0x3ff)|0xdc00); \
386 |     } \
387 | }
388 | 
389 | }  // namesapce base_icu
390 | 
391 | #endif  // BASE_THIRD_PARTY_ICU_ICU_UTF_H_
392 | 


--------------------------------------------------------------------------------
/vendor/gurl/build/build_config.h:
--------------------------------------------------------------------------------
  1 | // Copyright (c) 2012 The Chromium Authors. All rights reserved.
  2 | // Use of this source code is governed by a BSD-style license that can be
  3 | // found in the LICENSE file.
  4 | 
  5 | // This file adds defines about the platform we're currently building on.
  6 | //  Operating System:
  7 | //    OS_WIN / OS_MACOSX / OS_LINUX / OS_POSIX (MACOSX or LINUX) /
  8 | //    OS_NACL (NACL_SFI or NACL_NONSFI) / OS_NACL_SFI / OS_NACL_NONSFI
  9 | //  Compiler:
 10 | //    COMPILER_MSVC / COMPILER_GCC
 11 | //  Processor:
 12 | //    ARCH_CPU_X86 / ARCH_CPU_X86_64 / ARCH_CPU_X86_FAMILY (X86 or X86_64)
 13 | //    ARCH_CPU_32_BITS / ARCH_CPU_64_BITS
 14 | 
 15 | #ifndef BUILD_BUILD_CONFIG_H_
 16 | #define BUILD_BUILD_CONFIG_H_
 17 | 
 18 | // A set of macros to use for platform detection.
 19 | #if defined(__native_client__)
 20 | // __native_client__ must be first, so that other OS_ defines are not set.
 21 | #define OS_NACL 1
 22 | // OS_NACL comes in two sandboxing technology flavors, SFI or Non-SFI.
 23 | // PNaCl toolchain defines __native_client_nonsfi__ macro in Non-SFI build
 24 | // mode, while it does not in SFI build mode.
 25 | #if defined(__native_client_nonsfi__)
 26 | #define OS_NACL_NONSFI
 27 | #else
 28 | #define OS_NACL_SFI
 29 | #endif
 30 | #elif defined(ANDROID)
 31 | #define OS_ANDROID 1
 32 | #elif defined(__APPLE__)
 33 | // only include TargetConditions after testing ANDROID as some android builds
 34 | // on mac don't have this header available and it's not needed unless the target
 35 | // is really mac/ios.
 36 | #include <TargetConditionals.h>
 37 | #define OS_MACOSX 1
 38 | #if defined(TARGET_OS_IPHONE) && TARGET_OS_IPHONE
 39 | #define OS_IOS 1
 40 | #endif  // defined(TARGET_OS_IPHONE) && TARGET_OS_IPHONE
 41 | #elif defined(__linux__)
 42 | #define OS_LINUX 1
 43 | // include a system header to pull in features.h for glibc/uclibc macros.
 44 | #include <unistd.h>
 45 | #if defined(__GLIBC__) && !defined(__UCLIBC__)
 46 | // we really are using glibc, not uClibc pretending to be glibc
 47 | #define LIBC_GLIBC 1
 48 | #endif
 49 | #elif defined(_WIN32)
 50 | #define OS_WIN 1
 51 | #define TOOLKIT_VIEWS 1
 52 | #elif defined(__FreeBSD__)
 53 | #define OS_FREEBSD 1
 54 | #elif defined(__OpenBSD__)
 55 | #define OS_OPENBSD 1
 56 | #elif defined(__sun)
 57 | #define OS_SOLARIS 1
 58 | #elif defined(__QNXNTO__)
 59 | #define OS_QNX 1
 60 | #else
 61 | #error Please add support for your platform in build/build_config.h
 62 | #endif
 63 | 
 64 | #if defined(USE_OPENSSL_CERTS) && defined(USE_NSS_CERTS)
 65 | #error Cannot use both OpenSSL and NSS for certificates
 66 | #endif
 67 | 
 68 | // For access to standard BSD features, use OS_BSD instead of a
 69 | // more specific macro.
 70 | #if defined(OS_FREEBSD) || defined(OS_OPENBSD)
 71 | #define OS_BSD 1
 72 | #endif
 73 | 
 74 | // For access to standard POSIXish features, use OS_POSIX instead of a
 75 | // more specific macro.
 76 | #if defined(OS_MACOSX) || defined(OS_LINUX) || defined(OS_FREEBSD) ||     \
 77 |     defined(OS_OPENBSD) || defined(OS_SOLARIS) || defined(OS_ANDROID) ||  \
 78 |     defined(OS_NACL) || defined(OS_QNX)
 79 | #define OS_POSIX 1
 80 | #endif
 81 | 
 82 | // Use tcmalloc
 83 | #if (defined(OS_WIN) || defined(OS_LINUX) || defined(OS_ANDROID)) && \
 84 |     !defined(NO_TCMALLOC)
 85 | #define USE_TCMALLOC 1
 86 | #endif
 87 | 
 88 | // Compiler detection.
 89 | #if defined(__GNUC__)
 90 | #define COMPILER_GCC 1
 91 | #elif defined(_MSC_VER)
 92 | #define COMPILER_MSVC 1
 93 | #else
 94 | #error Please add support for your compiler in build/build_config.h
 95 | #endif
 96 | 
 97 | // Processor architecture detection.  For more info on what's defined, see:
 98 | //   http://msdn.microsoft.com/en-us/library/b0084kay.aspx
 99 | //   http://www.agner.org/optimize/calling_conventions.pdf
100 | //   or with gcc, run: "echo | gcc -E -dM -"
101 | #if defined(_M_X64) || defined(__x86_64__)
102 | #define ARCH_CPU_X86_FAMILY 1
103 | #define ARCH_CPU_X86_64 1
104 | #define ARCH_CPU_64_BITS 1
105 | #define ARCH_CPU_LITTLE_ENDIAN 1
106 | #elif defined(_M_IX86) || defined(__i386__)
107 | #define ARCH_CPU_X86_FAMILY 1
108 | #define ARCH_CPU_X86 1
109 | #define ARCH_CPU_32_BITS 1
110 | #define ARCH_CPU_LITTLE_ENDIAN 1
111 | #elif defined(__ARMEL__)
112 | #define ARCH_CPU_ARM_FAMILY 1
113 | #define ARCH_CPU_ARMEL 1
114 | #define ARCH_CPU_32_BITS 1
115 | #define ARCH_CPU_LITTLE_ENDIAN 1
116 | #elif defined(__aarch64__)
117 | #define ARCH_CPU_ARM_FAMILY 1
118 | #define ARCH_CPU_ARM64 1
119 | #define ARCH_CPU_64_BITS 1
120 | #define ARCH_CPU_LITTLE_ENDIAN 1
121 | #elif defined(__pnacl__)
122 | #define ARCH_CPU_32_BITS 1
123 | #define ARCH_CPU_LITTLE_ENDIAN 1
124 | #elif defined(__MIPSEL__)
125 | #if defined(__LP64__)
126 | #define ARCH_CPU_MIPS64_FAMILY 1
127 | #define ARCH_CPU_MIPS64EL 1
128 | #define ARCH_CPU_64_BITS 1
129 | #define ARCH_CPU_LITTLE_ENDIAN 1
130 | #else
131 | #define ARCH_CPU_MIPS_FAMILY 1
132 | #define ARCH_CPU_MIPSEL 1
133 | #define ARCH_CPU_32_BITS 1
134 | #define ARCH_CPU_LITTLE_ENDIAN 1
135 | #endif
136 | #else
137 | #error Please add support for your architecture in build/build_config.h
138 | #endif
139 | 
140 | // Type detection for wchar_t.
141 | #if defined(OS_WIN)
142 | #define WCHAR_T_IS_UTF16
143 | #elif defined(OS_POSIX) && defined(COMPILER_GCC) && \
144 |     defined(__WCHAR_MAX__) && \
145 |     (__WCHAR_MAX__ == 0x7fffffff || __WCHAR_MAX__ == 0xffffffff)
146 | #define WCHAR_T_IS_UTF32
147 | #elif defined(OS_POSIX) && defined(COMPILER_GCC) && \
148 |     defined(__WCHAR_MAX__) && \
149 |     (__WCHAR_MAX__ == 0x7fff || __WCHAR_MAX__ == 0xffff)
150 | // On Posix, we'll detect short wchar_t, but projects aren't guaranteed to
151 | // compile in this mode (in particular, Chrome doesn't). This is intended for
152 | // other projects using base who manage their own dependencies and make sure
153 | // short wchar works for them.
154 | #define WCHAR_T_IS_UTF16
155 | #else
156 | #error Please add support for your compiler in build/build_config.h
157 | #endif
158 | 
159 | #if defined(OS_ANDROID)
160 | // The compiler thinks std::string::const_iterator and "const char*" are
161 | // equivalent types.
162 | #define STD_STRING_ITERATOR_IS_CHAR_POINTER
163 | // The compiler thinks base::string16::const_iterator and "char16*" are
164 | // equivalent types.
165 | #define BASE_STRING16_ITERATOR_IS_CHAR16_POINTER
166 | #endif
167 | 
168 | #endif  // BUILD_BUILD_CONFIG_H_


--------------------------------------------------------------------------------
/vendor/gurl/url/third_party/mozilla/LICENSE.txt:
--------------------------------------------------------------------------------
 1 | Copyright 2007, Google Inc.
 2 | All rights reserved.
 3 | 
 4 | Redistribution and use in source and binary forms, with or without
 5 | modification, are permitted provided that the following conditions are
 6 | met:
 7 | 
 8 |     * Redistributions of source code must retain the above copyright
 9 | notice, this list of conditions and the following disclaimer.
10 |     * Redistributions in binary form must reproduce the above
11 | copyright notice, this list of conditions and the following disclaimer
12 | in the documentation and/or other materials provided with the
13 | distribution.
14 |     * Neither the name of Google Inc. nor the names of its
15 | contributors may be used to endorse or promote products derived from
16 | this software without specific prior written permission.
17 | 
18 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19 | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22 | OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24 | LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26 | THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 | 
30 | -------------------------------------------------------------------------------
31 | 
32 | The file url_parse.cc is based on nsURLParsers.cc from Mozilla. This file is
33 | licensed separately as follows:
34 | 
35 | The contents of this file are subject to the Mozilla Public License Version
36 | 1.1 (the "License"); you may not use this file except in compliance with
37 | the License. You may obtain a copy of the License at
38 | http://www.mozilla.org/MPL/
39 | 
40 | Software distributed under the License is distributed on an "AS IS" basis,
41 | WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
42 | for the specific language governing rights and limitations under the
43 | License.
44 | 
45 | The Original Code is mozilla.org code.
46 | 
47 | The Initial Developer of the Original Code is
48 | Netscape Communications Corporation.
49 | Portions created by the Initial Developer are Copyright (C) 1998
50 | the Initial Developer. All Rights Reserved.
51 | 
52 | Contributor(s):
53 |   Darin Fisher (original author)
54 | 
55 | Alternatively, the contents of this file may be used under the terms of
56 | either the GNU General Public License Version 2 or later (the "GPL"), or
57 | the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
58 | in which case the provisions of the GPL or the LGPL are applicable instead
59 | of those above. If you wish to allow use of your version of this file only
60 | under the terms of either the GPL or the LGPL, and not to allow others to
61 | use your version of this file under the terms of the MPL, indicate your
62 | decision by deleting the provisions above and replace them with the notice
63 | and other provisions required by the GPL or the LGPL. If you do not delete
64 | the provisions above, a recipient may use your version of this file under
65 | the terms of any one of the MPL, the GPL or the LGPL.
66 | 


--------------------------------------------------------------------------------
/vendor/gurl/url/third_party/mozilla/README.chromium:
--------------------------------------------------------------------------------
1 | Name: url_parse
2 | URL: http://mxr.mozilla.org/comm-central/source/mozilla/netwerk/base/src/nsURLParsers.cpp
3 | License: BSD and MPL 1.1/GPL 2.0/LGPL 2.1
4 | License File: LICENSE.txt
5 | 
6 | Description:
7 | 
8 | The file url_parse.cc is based on nsURLParsers.cc from Mozilla.
9 | 


--------------------------------------------------------------------------------
/vendor/gurl/url/third_party/mozilla/a.out:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/commonsearch/urlparse4/fda910309aa189d57473dbb12e2d2acde49c1736/vendor/gurl/url/third_party/mozilla/a.out


--------------------------------------------------------------------------------
/vendor/gurl/url/url_canon_filesystemurl.cc:
--------------------------------------------------------------------------------
  1 | // Copyright 2013 The Chromium Authors. All rights reserved.
  2 | // Use of this source code is governed by a BSD-style license that can be
  3 | // found in the LICENSE file.
  4 | 
  5 | // Functions for canonicalizing "filesystem:file:" URLs.
  6 | 
  7 | #include "url/url_canon.h"
  8 | #include "url/url_canon_internal.h"
  9 | #include "url/url_file.h"
 10 | #include "url/url_parse_internal.h"
 11 | #include "url/url_util.h"
 12 | #include "url/url_util_internal.h"
 13 | 
 14 | namespace url {
 15 | 
 16 | namespace {
 17 | 
 18 | // We use the URLComponentSource for the outer URL, as it can have replacements,
 19 | // whereas the inner_url can't, so it uses spec.
 20 | template<typename CHAR, typename UCHAR>
 21 | bool DoCanonicalizeFileSystemURL(const CHAR* spec,
 22 |                                  const URLComponentSource<CHAR>& source,
 23 |                                  const Parsed& parsed,
 24 |                                  CharsetConverter* charset_converter,
 25 |                                  CanonOutput* output,
 26 |                                  Parsed* new_parsed) {
 27 |   // filesystem only uses {scheme, path, query, ref} -- clear the rest.
 28 |   new_parsed->username.reset();
 29 |   new_parsed->password.reset();
 30 |   new_parsed->host.reset();
 31 |   new_parsed->port.reset();
 32 | 
 33 |   const Parsed* inner_parsed = parsed.inner_parsed();
 34 |   Parsed new_inner_parsed;
 35 | 
 36 |   // Scheme (known, so we don't bother running it through the more
 37 |   // complicated scheme canonicalizer).
 38 |   new_parsed->scheme.begin = output->length();
 39 |   output->Append("filesystem:", 11);
 40 |   new_parsed->scheme.len = 10;
 41 | 
 42 |   if (!parsed.inner_parsed() || !parsed.inner_parsed()->scheme.is_valid())
 43 |     return false;
 44 | 
 45 |   bool success = true;
 46 |   if (CompareSchemeComponent(spec, inner_parsed->scheme, url::kFileScheme)) {
 47 |     new_inner_parsed.scheme.begin = output->length();
 48 |     output->Append("file://", 7);
 49 |     new_inner_parsed.scheme.len = 4;
 50 |     success &= CanonicalizePath(spec, inner_parsed->path, output,
 51 |                                 &new_inner_parsed.path);
 52 |   } else if (IsStandard(spec, inner_parsed->scheme)) {
 53 |     success = CanonicalizeStandardURL(spec, parsed.inner_parsed()->Length(),
 54 |                                       *parsed.inner_parsed(), charset_converter,
 55 |                                       output, &new_inner_parsed);
 56 |   } else {
 57 |     // TODO(ericu): The URL is wrong, but should we try to output more of what
 58 |     // we were given?  Echoing back filesystem:mailto etc. doesn't seem all that
 59 |     // useful.
 60 |     return false;
 61 |   }
 62 |   // The filesystem type must be more than just a leading slash for validity.
 63 |   success &= parsed.inner_parsed()->path.len > 1;
 64 | 
 65 |   success &= CanonicalizePath(source.path, parsed.path, output,
 66 |                               &new_parsed->path);
 67 | 
 68 |   // Ignore failures for query/ref since the URL can probably still be loaded.
 69 |   CanonicalizeQuery(source.query, parsed.query, charset_converter,
 70 |                     output, &new_parsed->query);
 71 |   CanonicalizeRef(source.ref, parsed.ref, output, &new_parsed->ref);
 72 |   if (success)
 73 |     new_parsed->set_inner_parsed(new_inner_parsed);
 74 | 
 75 |   return success;
 76 | }
 77 | 
 78 | }  // namespace
 79 | 
 80 | bool CanonicalizeFileSystemURL(const char* spec,
 81 |                                int spec_len,
 82 |                                const Parsed& parsed,
 83 |                                CharsetConverter* charset_converter,
 84 |                                CanonOutput* output,
 85 |                                Parsed* new_parsed) {
 86 |   return DoCanonicalizeFileSystemURL<char, unsigned char>(
 87 |       spec, URLComponentSource<char>(spec), parsed, charset_converter, output,
 88 |       new_parsed);
 89 | }
 90 | 
 91 | bool CanonicalizeFileSystemURL(const base::char16* spec,
 92 |                                int spec_len,
 93 |                                const Parsed& parsed,
 94 |                                CharsetConverter* charset_converter,
 95 |                                CanonOutput* output,
 96 |                                Parsed* new_parsed) {
 97 |   return DoCanonicalizeFileSystemURL<base::char16, base::char16>(
 98 |       spec, URLComponentSource<base::char16>(spec), parsed, charset_converter,
 99 |       output, new_parsed);
100 | }
101 | 
102 | bool ReplaceFileSystemURL(const char* base,
103 |                           const Parsed& base_parsed,
104 |                           const Replacements<char>& replacements,
105 |                           CharsetConverter* charset_converter,
106 |                           CanonOutput* output,
107 |                           Parsed* new_parsed) {
108 |   URLComponentSource<char> source(base);
109 |   Parsed parsed(base_parsed);
110 |   SetupOverrideComponents(base, replacements, &source, &parsed);
111 |   return DoCanonicalizeFileSystemURL<char, unsigned char>(
112 |       base, source, parsed, charset_converter, output, new_parsed);
113 | }
114 | 
115 | bool ReplaceFileSystemURL(const char* base,
116 |                           const Parsed& base_parsed,
117 |                           const Replacements<base::char16>& replacements,
118 |                           CharsetConverter* charset_converter,
119 |                           CanonOutput* output,
120 |                           Parsed* new_parsed) {
121 |   RawCanonOutput<1024> utf8;
122 |   URLComponentSource<char> source(base);
123 |   Parsed parsed(base_parsed);
124 |   SetupUTF16OverrideComponents(base, replacements, &utf8, &source, &parsed);
125 |   return DoCanonicalizeFileSystemURL<char, unsigned char>(
126 |       base, source, parsed, charset_converter, output, new_parsed);
127 | }
128 | 
129 | }  // namespace url
130 | 


--------------------------------------------------------------------------------
/vendor/gurl/url/url_canon_fileurl.cc:
--------------------------------------------------------------------------------
  1 | // Copyright 2013 The Chromium Authors. All rights reserved.
  2 | // Use of this source code is governed by a BSD-style license that can be
  3 | // found in the LICENSE file.
  4 | 
  5 | // Functions for canonicalizing "file:" URLs.
  6 | 
  7 | #include "url/url_canon.h"
  8 | #include "url/url_canon_internal.h"
  9 | #include "url/url_file.h"
 10 | #include "url/url_parse_internal.h"
 11 | 
 12 | namespace url {
 13 | 
 14 | namespace {
 15 | 
 16 | #ifdef WIN32
 17 | 
 18 | // Given a pointer into the spec, this copies and canonicalizes the drive
 19 | // letter and colon to the output, if one is found. If there is not a drive
 20 | // spec, it won't do anything. The index of the next character in the input
 21 | // spec is returned (after the colon when a drive spec is found, the begin
 22 | // offset if one is not).
 23 | template<typename CHAR>
 24 | int FileDoDriveSpec(const CHAR* spec, int begin, int end,
 25 |                     CanonOutput* output) {
 26 |   // The path could be one of several things: /foo/bar, c:/foo/bar, /c:/foo,
 27 |   // (with backslashes instead of slashes as well).
 28 |   int num_slashes = CountConsecutiveSlashes(spec, begin, end);
 29 |   int after_slashes = begin + num_slashes;
 30 | 
 31 |   if (!DoesBeginWindowsDriveSpec(spec, after_slashes, end))
 32 |     return begin;  // Haven't consumed any characters
 33 | 
 34 |   // A drive spec is the start of a path, so we need to add a slash for the
 35 |   // authority terminator (typically the third slash).
 36 |   output->push_back('/');
 37 | 
 38 |   // DoesBeginWindowsDriveSpec will ensure that the drive letter is valid
 39 |   // and that it is followed by a colon/pipe.
 40 | 
 41 |   // Normalize Windows drive letters to uppercase
 42 |   if (spec[after_slashes] >= 'a' && spec[after_slashes] <= 'z')
 43 |     output->push_back(static_cast<char>(spec[after_slashes] - 'a' + 'A'));
 44 |   else
 45 |     output->push_back(static_cast<char>(spec[after_slashes]));
 46 | 
 47 |   // Normalize the character following it to a colon rather than pipe.
 48 |   output->push_back(':');
 49 |   return after_slashes + 2;
 50 | }
 51 | 
 52 | #endif  // WIN32
 53 | 
 54 | template<typename CHAR, typename UCHAR>
 55 | bool DoFileCanonicalizePath(const CHAR* spec,
 56 |                             const Component& path,
 57 |                             CanonOutput* output,
 58 |                             Component* out_path) {
 59 |   // Copies and normalizes the "c:" at the beginning, if present.
 60 |   out_path->begin = output->length();
 61 |   int after_drive;
 62 | #ifdef WIN32
 63 |   after_drive = FileDoDriveSpec(spec, path.begin, path.end(), output);
 64 | #else
 65 |   after_drive = path.begin;
 66 | #endif
 67 | 
 68 |   // Copies the rest of the path, starting from the slash following the
 69 |   // drive colon (if any, Windows only), or the first slash of the path.
 70 |   bool success = true;
 71 |   if (after_drive < path.end()) {
 72 |     // Use the regular path canonicalizer to canonicalize the rest of the
 73 |     // path. Give it a fake output component to write into. DoCanonicalizeFile
 74 |     // will compute the full path component.
 75 |     Component sub_path = MakeRange(after_drive, path.end());
 76 |     Component fake_output_path;
 77 |     success = CanonicalizePath(spec, sub_path, output, &fake_output_path);
 78 |   } else {
 79 |     // No input path, canonicalize to a slash.
 80 |     output->push_back('/');
 81 |   }
 82 | 
 83 |   out_path->len = output->length() - out_path->begin;
 84 |   return success;
 85 | }
 86 | 
 87 | template<typename CHAR, typename UCHAR>
 88 | bool DoCanonicalizeFileURL(const URLComponentSource<CHAR>& source,
 89 |                            const Parsed& parsed,
 90 |                            CharsetConverter* query_converter,
 91 |                            CanonOutput* output,
 92 |                            Parsed* new_parsed) {
 93 |   // Things we don't set in file: URLs.
 94 |   new_parsed->username = Component();
 95 |   new_parsed->password = Component();
 96 |   new_parsed->port = Component();
 97 | 
 98 |   // Scheme (known, so we don't bother running it through the more
 99 |   // complicated scheme canonicalizer).
100 |   new_parsed->scheme.begin = output->length();
101 |   output->Append("file://", 7);
102 |   new_parsed->scheme.len = 4;
103 | 
104 |   // Append the host. For many file URLs, this will be empty. For UNC, this
105 |   // will be present.
106 |   // TODO(brettw) This doesn't do any checking for host name validity. We
107 |   // should probably handle validity checking of UNC hosts differently than
108 |   // for regular IP hosts.
109 |   bool success = CanonicalizeHost(source.host, parsed.host,
110 |                                   output, &new_parsed->host);
111 |   success &= DoFileCanonicalizePath<CHAR, UCHAR>(source.path, parsed.path,
112 |                                     output, &new_parsed->path);
113 |   CanonicalizeQuery(source.query, parsed.query, query_converter,
114 |                     output, &new_parsed->query);
115 | 
116 |   // Ignore failure for refs since the URL can probably still be loaded.
117 |   CanonicalizeRef(source.ref, parsed.ref, output, &new_parsed->ref);
118 | 
119 |   return success;
120 | }
121 | 
122 | } // namespace
123 | 
124 | bool CanonicalizeFileURL(const char* spec,
125 |                          int spec_len,
126 |                          const Parsed& parsed,
127 |                          CharsetConverter* query_converter,
128 |                          CanonOutput* output,
129 |                          Parsed* new_parsed) {
130 |   return DoCanonicalizeFileURL<char, unsigned char>(
131 |       URLComponentSource<char>(spec), parsed, query_converter,
132 |       output, new_parsed);
133 | }
134 | 
135 | bool CanonicalizeFileURL(const base::char16* spec,
136 |                          int spec_len,
137 |                          const Parsed& parsed,
138 |                          CharsetConverter* query_converter,
139 |                          CanonOutput* output,
140 |                          Parsed* new_parsed) {
141 |   return DoCanonicalizeFileURL<base::char16, base::char16>(
142 |       URLComponentSource<base::char16>(spec), parsed, query_converter,
143 |       output, new_parsed);
144 | }
145 | 
146 | bool FileCanonicalizePath(const char* spec,
147 |                           const Component& path,
148 |                           CanonOutput* output,
149 |                           Component* out_path) {
150 |   return DoFileCanonicalizePath<char, unsigned char>(spec, path,
151 |                                                      output, out_path);
152 | }
153 | 
154 | bool FileCanonicalizePath(const base::char16* spec,
155 |                           const Component& path,
156 |                           CanonOutput* output,
157 |                           Component* out_path) {
158 |   return DoFileCanonicalizePath<base::char16, base::char16>(spec, path,
159 |                                                             output, out_path);
160 | }
161 | 
162 | bool ReplaceFileURL(const char* base,
163 |                     const Parsed& base_parsed,
164 |                     const Replacements<char>& replacements,
165 |                     CharsetConverter* query_converter,
166 |                     CanonOutput* output,
167 |                     Parsed* new_parsed) {
168 |   URLComponentSource<char> source(base);
169 |   Parsed parsed(base_parsed);
170 |   SetupOverrideComponents(base, replacements, &source, &parsed);
171 |   return DoCanonicalizeFileURL<char, unsigned char>(
172 |       source, parsed, query_converter, output, new_parsed);
173 | }
174 | 
175 | bool ReplaceFileURL(const char* base,
176 |                     const Parsed& base_parsed,
177 |                     const Replacements<base::char16>& replacements,
178 |                     CharsetConverter* query_converter,
179 |                     CanonOutput* output,
180 |                     Parsed* new_parsed) {
181 |   RawCanonOutput<1024> utf8;
182 |   URLComponentSource<char> source(base);
183 |   Parsed parsed(base_parsed);
184 |   SetupUTF16OverrideComponents(base, replacements, &utf8, &source, &parsed);
185 |   return DoCanonicalizeFileURL<char, unsigned char>(
186 |       source, parsed, query_converter, output, new_parsed);
187 | }
188 | 
189 | }  // namespace url
190 | 


--------------------------------------------------------------------------------
/vendor/gurl/url/url_canon_ip.h:
--------------------------------------------------------------------------------
 1 | // Copyright 2013 The Chromium Authors. All rights reserved.
 2 | // Use of this source code is governed by a BSD-style license that can be
 3 | // found in the LICENSE file.
 4 | 
 5 | #ifndef URL_URL_CANON_IP_H_
 6 | #define URL_URL_CANON_IP_H_
 7 | 
 8 | #include "base/strings/string16.h"
 9 | #include "url/third_party/mozilla/url_parse.h"
10 | #include "url/url_canon.h"
11 | #include "url/url_export.h"
12 | 
13 | namespace url {
14 | 
15 | // Writes the given IPv4 address to |output|.
16 | URL_EXPORT void AppendIPv4Address(const unsigned char address[4],
17 |                                   CanonOutput* output);
18 | 
19 | // Writes the given IPv6 address to |output|.
20 | URL_EXPORT void AppendIPv6Address(const unsigned char address[16],
21 |                                   CanonOutput* output);
22 | 
23 | // Searches the host name for the portions of the IPv4 address. On success,
24 | // each component will be placed into |components| and it will return true.
25 | // It will return false if the host can not be separated as an IPv4 address
26 | // or if there are any non-7-bit characters or other characters that can not
27 | // be in an IP address. (This is important so we fail as early as possible for
28 | // common non-IP hostnames.)
29 | //
30 | // Not all components may exist. If there are only 3 components, for example,
31 | // the last one will have a length of -1 or 0 to indicate it does not exist.
32 | //
33 | // Note that many platforms' inet_addr will ignore everything after a space
34 | // in certain circumstances if the stuff before the space looks like an IP
35 | // address. IE6 is included in this. We do NOT handle this case. In many cases,
36 | // the browser's canonicalization will get run before this which converts
37 | // spaces to %20 (in the case of IE7) or rejects them (in the case of Mozilla),
38 | // so this code path never gets hit. Our host canonicalization will notice
39 | // these spaces and escape them, which will make IP address finding fail. This
40 | // seems like better behavior than stripping after a space.
41 | URL_EXPORT bool FindIPv4Components(const char* spec,
42 |                                    const Component& host,
43 |                                    Component components[4]);
44 | URL_EXPORT bool FindIPv4Components(const base::char16* spec,
45 |                                    const Component& host,
46 |                                    Component components[4]);
47 | 
48 | // Converts an IPv4 address to a 32-bit number (network byte order).
49 | //
50 | // Possible return values:
51 | //   IPV4    - IPv4 address was successfully parsed.
52 | //   BROKEN  - Input was formatted like an IPv4 address, but overflow occurred
53 | //             during parsing.
54 | //   NEUTRAL - Input couldn't possibly be interpreted as an IPv4 address.
55 | //             It might be an IPv6 address, or a hostname.
56 | //
57 | // On success, |num_ipv4_components| will be populated with the number of
58 | // components in the IPv4 address.
59 | URL_EXPORT CanonHostInfo::Family IPv4AddressToNumber(const char* spec,
60 |                                                      const Component& host,
61 |                                                      unsigned char address[4],
62 |                                                      int* num_ipv4_components);
63 | URL_EXPORT CanonHostInfo::Family IPv4AddressToNumber(const base::char16* spec,
64 |                                                      const Component& host,
65 |                                                      unsigned char address[4],
66 |                                                      int* num_ipv4_components);
67 | 
68 | // Converts an IPv6 address to a 128-bit number (network byte order), returning
69 | // true on success. False means that the input was not a valid IPv6 address.
70 | //
71 | // NOTE that |host| is expected to be surrounded by square brackets.
72 | // i.e. "[::1]" rather than "::1".
73 | URL_EXPORT bool IPv6AddressToNumber(const char* spec,
74 |                                     const Component& host,
75 |                                     unsigned char address[16]);
76 | URL_EXPORT bool IPv6AddressToNumber(const base::char16* spec,
77 |                                     const Component& host,
78 |                                     unsigned char address[16]);
79 | 
80 | }  // namespace url
81 | 
82 | #endif  // URL_URL_CANON_IP_H_
83 | 


--------------------------------------------------------------------------------
/vendor/gurl/url/url_canon_mailtourl.cc:
--------------------------------------------------------------------------------
  1 | // Copyright 2013 The Chromium Authors. All rights reserved.
  2 | // Use of this source code is governed by a BSD-style license that can be
  3 | // found in the LICENSE file.
  4 | 
  5 | // Functions for canonicalizing "mailto:" URLs.
  6 | 
  7 | #include "url/url_canon.h"
  8 | #include "url/url_canon_internal.h"
  9 | #include "url/url_file.h"
 10 | #include "url/url_parse_internal.h"
 11 | 
 12 | namespace url {
 13 | 
 14 | namespace {
 15 | 
 16 | template <typename CHAR, typename UCHAR>
 17 | bool DoCanonicalizeMailtoURL(const URLComponentSource<CHAR>& source,
 18 |                              const Parsed& parsed,
 19 |                              CanonOutput* output,
 20 |                              Parsed* new_parsed) {
 21 |   // mailto: only uses {scheme, path, query} -- clear the rest.
 22 |   new_parsed->username = Component();
 23 |   new_parsed->password = Component();
 24 |   new_parsed->host = Component();
 25 |   new_parsed->port = Component();
 26 |   new_parsed->ref = Component();
 27 | 
 28 |   // Scheme (known, so we don't bother running it through the more
 29 |   // complicated scheme canonicalizer).
 30 |   new_parsed->scheme.begin = output->length();
 31 |   output->Append("mailto:", 7);
 32 |   new_parsed->scheme.len = 6;
 33 | 
 34 |   bool success = true;
 35 | 
 36 |   // Path
 37 |   if (parsed.path.is_valid()) {
 38 |     new_parsed->path.begin = output->length();
 39 | 
 40 |     // Copy the path using path URL's more lax escaping rules.
 41 |     // We convert to UTF-8 and escape non-ASCII, but leave all
 42 |     // ASCII characters alone.
 43 |     int end = parsed.path.end();
 44 |     for (int i = parsed.path.begin; i < end; ++i) {
 45 |       UCHAR uch = static_cast<UCHAR>(source.path[i]);
 46 |       if (uch < 0x20 || uch >= 0x80)
 47 |         success &= AppendUTF8EscapedChar(source.path, &i, end, output);
 48 |       else
 49 |         output->push_back(static_cast<char>(uch));
 50 |     }
 51 | 
 52 |     new_parsed->path.len = output->length() - new_parsed->path.begin;
 53 |   } else {
 54 |     // No path at all
 55 |     new_parsed->path.reset();
 56 |   }
 57 | 
 58 |   // Query -- always use the default UTF8 charset converter.
 59 |   CanonicalizeQuery(source.query, parsed.query, NULL,
 60 |                     output, &new_parsed->query);
 61 | 
 62 |   return success;
 63 | }
 64 | 
 65 | } // namespace
 66 | 
 67 | bool CanonicalizeMailtoURL(const char* spec,
 68 |                            int spec_len,
 69 |                            const Parsed& parsed,
 70 |                            CanonOutput* output,
 71 |                            Parsed* new_parsed) {
 72 |   return DoCanonicalizeMailtoURL<char, unsigned char>(
 73 |       URLComponentSource<char>(spec), parsed, output, new_parsed);
 74 | }
 75 | 
 76 | bool CanonicalizeMailtoURL(const base::char16* spec,
 77 |                            int spec_len,
 78 |                            const Parsed& parsed,
 79 |                            CanonOutput* output,
 80 |                            Parsed* new_parsed) {
 81 |   return DoCanonicalizeMailtoURL<base::char16, base::char16>(
 82 |       URLComponentSource<base::char16>(spec), parsed, output, new_parsed);
 83 | }
 84 | 
 85 | bool ReplaceMailtoURL(const char* base,
 86 |                       const Parsed& base_parsed,
 87 |                       const Replacements<char>& replacements,
 88 |                       CanonOutput* output,
 89 |                       Parsed* new_parsed) {
 90 |   URLComponentSource<char> source(base);
 91 |   Parsed parsed(base_parsed);
 92 |   SetupOverrideComponents(base, replacements, &source, &parsed);
 93 |   return DoCanonicalizeMailtoURL<char, unsigned char>(
 94 |       source, parsed, output, new_parsed);
 95 | }
 96 | 
 97 | bool ReplaceMailtoURL(const char* base,
 98 |                       const Parsed& base_parsed,
 99 |                       const Replacements<base::char16>& replacements,
100 |                       CanonOutput* output,
101 |                       Parsed* new_parsed) {
102 |   RawCanonOutput<1024> utf8;
103 |   URLComponentSource<char> source(base);
104 |   Parsed parsed(base_parsed);
105 |   SetupUTF16OverrideComponents(base, replacements, &utf8, &source, &parsed);
106 |   return DoCanonicalizeMailtoURL<char, unsigned char>(
107 |       source, parsed, output, new_parsed);
108 | }
109 | 
110 | }  // namespace url
111 | 


--------------------------------------------------------------------------------
/vendor/gurl/url/url_canon_pathurl.cc:
--------------------------------------------------------------------------------
  1 | // Copyright 2013 The Chromium Authors. All rights reserved.
  2 | // Use of this source code is governed by a BSD-style license that can be
  3 | // found in the LICENSE file.
  4 | 
  5 | // Functions for canonicalizing "path" URLs. Not to be confused with the path
  6 | // of a URL, these are URLs that have no authority section, only a path. For
  7 | // example, "javascript:" and "data:".
  8 | 
  9 | #include "url/url_canon.h"
 10 | #include "url/url_canon_internal.h"
 11 | 
 12 | namespace url {
 13 | 
 14 | namespace {
 15 | 
 16 | // Canonicalize the given |component| from |source| into |output| and
 17 | // |new_component|. If |separator| is non-zero, it is pre-pended to |output|
 18 | // prior to the canonicalized component; i.e. for the '?' or '#' characters.
 19 | template<typename CHAR, typename UCHAR>
 20 | bool DoCanonicalizePathComponent(const CHAR* source,
 21 |                                  const Component& component,
 22 |                                  char separator,
 23 |                                  CanonOutput* output,
 24 |                                  Component* new_component) {
 25 |   bool success = true;
 26 |   if (component.is_valid()) {
 27 |     if (separator)
 28 |       output->push_back(separator);
 29 |     // Copy the path using path URL's more lax escaping rules (think for
 30 |     // javascript:). We convert to UTF-8 and escape non-ASCII, but leave all
 31 |     // ASCII characters alone. This helps readability of JavaStript.
 32 |     new_component->begin = output->length();
 33 |     int end = component.end();
 34 |     for (int i = component.begin; i < end; i++) {
 35 |       UCHAR uch = static_cast<UCHAR>(source[i]);
 36 |       if (uch < 0x20 || uch >= 0x80)
 37 |         success &= AppendUTF8EscapedChar(source, &i, end, output);
 38 |       else
 39 |         output->push_back(static_cast<char>(uch));
 40 |     }
 41 |     new_component->len = output->length() - new_component->begin;
 42 |   } else {
 43 |     // Empty part.
 44 |     new_component->reset();
 45 |   }
 46 |   return success;
 47 | }
 48 | 
 49 | template <typename CHAR, typename UCHAR>
 50 | bool DoCanonicalizePathURL(const URLComponentSource<CHAR>& source,
 51 |                            const Parsed& parsed,
 52 |                            CanonOutput* output,
 53 |                            Parsed* new_parsed) {
 54 |   // Scheme: this will append the colon.
 55 |   bool success = CanonicalizeScheme(source.scheme, parsed.scheme,
 56 |                                     output, &new_parsed->scheme);
 57 | 
 58 |   // We assume there's no authority for path URLs. Note that hosts should never
 59 |   // have -1 length.
 60 |   new_parsed->username.reset();
 61 |   new_parsed->password.reset();
 62 |   new_parsed->host.reset();
 63 |   new_parsed->port.reset();
 64 |   // We allow path URLs to have the path, query and fragment components, but we
 65 |   // will canonicalize each of the via the weaker path URL rules.
 66 |   success &= DoCanonicalizePathComponent<CHAR, UCHAR>(
 67 |       source.path, parsed.path, '\0', output, &new_parsed->path);
 68 |   success &= DoCanonicalizePathComponent<CHAR, UCHAR>(
 69 |       source.query, parsed.query, '?', output, &new_parsed->query);
 70 |   success &= DoCanonicalizePathComponent<CHAR, UCHAR>(
 71 |       source.ref, parsed.ref, '#', output, &new_parsed->ref);
 72 | 
 73 |   return success;
 74 | }
 75 | 
 76 | }  // namespace
 77 | 
 78 | bool CanonicalizePathURL(const char* spec,
 79 |                          int spec_len,
 80 |                          const Parsed& parsed,
 81 |                          CanonOutput* output,
 82 |                          Parsed* new_parsed) {
 83 |   return DoCanonicalizePathURL<char, unsigned char>(
 84 |       URLComponentSource<char>(spec), parsed, output, new_parsed);
 85 | }
 86 | 
 87 | bool CanonicalizePathURL(const base::char16* spec,
 88 |                          int spec_len,
 89 |                          const Parsed& parsed,
 90 |                          CanonOutput* output,
 91 |                          Parsed* new_parsed) {
 92 |   return DoCanonicalizePathURL<base::char16, base::char16>(
 93 |       URLComponentSource<base::char16>(spec), parsed, output, new_parsed);
 94 | }
 95 | 
 96 | bool ReplacePathURL(const char* base,
 97 |                     const Parsed& base_parsed,
 98 |                     const Replacements<char>& replacements,
 99 |                     CanonOutput* output,
100 |                     Parsed* new_parsed) {
101 |   URLComponentSource<char> source(base);
102 |   Parsed parsed(base_parsed);
103 |   SetupOverrideComponents(base, replacements, &source, &parsed);
104 |   return DoCanonicalizePathURL<char, unsigned char>(
105 |       source, parsed, output, new_parsed);
106 | }
107 | 
108 | bool ReplacePathURL(const char* base,
109 |                     const Parsed& base_parsed,
110 |                     const Replacements<base::char16>& replacements,
111 |                     CanonOutput* output,
112 |                     Parsed* new_parsed) {
113 |   RawCanonOutput<1024> utf8;
114 |   URLComponentSource<char> source(base);
115 |   Parsed parsed(base_parsed);
116 |   SetupUTF16OverrideComponents(base, replacements, &utf8, &source, &parsed);
117 |   return DoCanonicalizePathURL<char, unsigned char>(
118 |       source, parsed, output, new_parsed);
119 | }
120 | 
121 | }  // namespace url
122 | 


--------------------------------------------------------------------------------
/vendor/gurl/url/url_canon_query.cc:
--------------------------------------------------------------------------------
  1 | // Copyright 2013 The Chromium Authors. All rights reserved.
  2 | // Use of this source code is governed by a BSD-style license that can be
  3 | // found in the LICENSE file.
  4 | 
  5 | #include "url/url_canon.h"
  6 | #include "url/url_canon_internal.h"
  7 | 
  8 | // Query canonicalization in IE
  9 | // ----------------------------
 10 | // IE is very permissive for query parameters specified in links on the page
 11 | // (in contrast to links that it constructs itself based on form data). It does
 12 | // not unescape any character. It does not reject any escape sequence (be they
 13 | // invalid like "%2y" or freaky like %00).
 14 | //
 15 | // IE only escapes spaces and nothing else. Embedded NULLs, tabs (0x09),
 16 | // LF (0x0a), and CR (0x0d) are removed (this probably happens at an earlier
 17 | // layer since they are removed from all portions of the URL). All other
 18 | // characters are passed unmodified. Invalid UTF-16 sequences are preserved as
 19 | // well, with each character in the input being converted to UTF-8. It is the
 20 | // server's job to make sense of this invalid query.
 21 | //
 22 | // Invalid multibyte sequences (for example, invalid UTF-8 on a UTF-8 page)
 23 | // are converted to the invalid character and sent as unescaped UTF-8 (0xef,
 24 | // 0xbf, 0xbd). This may not be canonicalization, the parser may generate these
 25 | // strings before the URL handler ever sees them.
 26 | //
 27 | // Our query canonicalization
 28 | // --------------------------
 29 | // We escape all non-ASCII characters and control characters, like Firefox.
 30 | // This is more conformant to the URL spec, and there do not seem to be many
 31 | // problems relating to Firefox's behavior.
 32 | //
 33 | // Like IE, we will never unescape (although the application may want to try
 34 | // unescaping to present the user with a more understandable URL). We will
 35 | // replace all invalid sequences (including invalid UTF-16 sequences, which IE
 36 | // doesn't) with the "invalid character," and we will escape it.
 37 | 
 38 | namespace url {
 39 | 
 40 | namespace {
 41 | 
 42 | // Returns true if the characters starting at |begin| and going until |end|
 43 | // (non-inclusive) are all representable in 7-bits.
 44 | template<typename CHAR, typename UCHAR>
 45 | bool IsAllASCII(const CHAR* spec, const Component& query) {
 46 |   int end = query.end();
 47 |   for (int i = query.begin; i < end; i++) {
 48 |     if (static_cast<UCHAR>(spec[i]) >= 0x80)
 49 |       return false;
 50 |   }
 51 |   return true;
 52 | }
 53 | 
 54 | // Appends the given string to the output, escaping characters that do not
 55 | // match the given |type| in SharedCharTypes. This version will accept 8 or 16
 56 | // bit characters, but assumes that they have only 7-bit values. It also assumes
 57 | // that all UTF-8 values are correct, so doesn't bother checking
 58 | template<typename CHAR>
 59 | void AppendRaw8BitQueryString(const CHAR* source, int length,
 60 |                               CanonOutput* output) {
 61 |   for (int i = 0; i < length; i++) {
 62 |     if (!IsQueryChar(static_cast<unsigned char>(source[i])))
 63 |       AppendEscapedChar(static_cast<unsigned char>(source[i]), output);
 64 |     else  // Doesn't need escaping.
 65 |       output->push_back(static_cast<char>(source[i]));
 66 |   }
 67 | }
 68 | 
 69 | // Runs the converter on the given UTF-8 input. Since the converter expects
 70 | // UTF-16, we have to convert first. The converter must be non-NULL.
 71 | void RunConverter(const char* spec,
 72 |                   const Component& query,
 73 |                   CharsetConverter* converter,
 74 |                   CanonOutput* output) {
 75 |   // This function will replace any misencoded values with the invalid
 76 |   // character. This is what we want so we don't have to check for error.
 77 |   RawCanonOutputW<1024> utf16;
 78 |   ConvertUTF8ToUTF16(&spec[query.begin], query.len, &utf16);
 79 |   converter->ConvertFromUTF16(utf16.data(), utf16.length(), output);
 80 | }
 81 | 
 82 | // Runs the converter with the given UTF-16 input. We don't have to do
 83 | // anything, but this overridden function allows us to use the same code
 84 | // for both UTF-8 and UTF-16 input.
 85 | void RunConverter(const base::char16* spec,
 86 |                   const Component& query,
 87 |                   CharsetConverter* converter,
 88 |                   CanonOutput* output) {
 89 |   converter->ConvertFromUTF16(&spec[query.begin], query.len, output);
 90 | }
 91 | 
 92 | template<typename CHAR, typename UCHAR>
 93 | void DoConvertToQueryEncoding(const CHAR* spec,
 94 |                               const Component& query,
 95 |                               CharsetConverter* converter,
 96 |                               CanonOutput* output) {
 97 |   if (IsAllASCII<CHAR, UCHAR>(spec, query)) {
 98 |     // Easy: the input can just appended with no character set conversions.
 99 |     AppendRaw8BitQueryString(&spec[query.begin], query.len, output);
100 | 
101 |   } else {
102 |     // Harder: convert to the proper encoding first.
103 |     if (converter) {
104 |       // Run the converter to get an 8-bit string, then append it, escaping
105 |       // necessary values.
106 |       RawCanonOutput<1024> eight_bit;
107 |       RunConverter(spec, query, converter, &eight_bit);
108 |       AppendRaw8BitQueryString(eight_bit.data(), eight_bit.length(), output);
109 | 
110 |     } else {
111 |       // No converter, do our own UTF-8 conversion.
112 |       AppendStringOfType(&spec[query.begin], query.len, CHAR_QUERY, output);
113 |     }
114 |   }
115 | }
116 | 
117 | template<typename CHAR, typename UCHAR>
118 | void DoCanonicalizeQuery(const CHAR* spec,
119 |                          const Component& query,
120 |                          CharsetConverter* converter,
121 |                          CanonOutput* output,
122 |                          Component* out_query) {
123 |   if (query.len < 0) {
124 |     *out_query = Component();
125 |     return;
126 |   }
127 | 
128 |   output->push_back('?');
129 |   out_query->begin = output->length();
130 | 
131 |   DoConvertToQueryEncoding<CHAR, UCHAR>(spec, query, converter, output);
132 | 
133 |   out_query->len = output->length() - out_query->begin;
134 | }
135 | 
136 | }  // namespace
137 | 
138 | void CanonicalizeQuery(const char* spec,
139 |                        const Component& query,
140 |                        CharsetConverter* converter,
141 |                        CanonOutput* output,
142 |                        Component* out_query) {
143 |   DoCanonicalizeQuery<char, unsigned char>(spec, query, converter,
144 |                                            output, out_query);
145 | }
146 | 
147 | void CanonicalizeQuery(const base::char16* spec,
148 |                        const Component& query,
149 |                        CharsetConverter* converter,
150 |                        CanonOutput* output,
151 |                        Component* out_query) {
152 |   DoCanonicalizeQuery<base::char16, base::char16>(spec, query, converter,
153 |                                                   output, out_query);
154 | }
155 | 
156 | void ConvertUTF16ToQueryEncoding(const base::char16* input,
157 |                                  const Component& query,
158 |                                  CharsetConverter* converter,
159 |                                  CanonOutput* output) {
160 |   DoConvertToQueryEncoding<base::char16, base::char16>(input, query,
161 |                                                        converter, output);
162 | }
163 | 
164 | }  // namespace url
165 | 


--------------------------------------------------------------------------------
/vendor/gurl/url/url_canon_stdstring.cc:
--------------------------------------------------------------------------------
 1 | // Copyright 2013 The Chromium Authors. All rights reserved.
 2 | // Use of this source code is governed by a BSD-style license that can be
 3 | // found in the LICENSE file.
 4 | 
 5 | #include "url/url_canon_stdstring.h"
 6 | 
 7 | namespace url {
 8 | 
 9 | StdStringCanonOutput::StdStringCanonOutput(std::string* str)
10 |     : CanonOutput(), str_(str) {
11 |   cur_len_ = static_cast<int>(str_->size());  // Append to existing data.
12 |   str_->resize(str_->capacity());
13 |   buffer_ = str_->empty() ? NULL : &(*str_)[0];
14 |   buffer_len_ = static_cast<int>(str_->size());
15 | }
16 | 
17 | StdStringCanonOutput::~StdStringCanonOutput() {
18 |   // Nothing to do, we don't own the string.
19 | }
20 | 
21 | void StdStringCanonOutput::Complete() {
22 |   str_->resize(cur_len_);
23 |   buffer_len_ = cur_len_;
24 | }
25 | 
26 | void StdStringCanonOutput::Resize(int sz) {
27 |   str_->resize(sz);
28 |   buffer_ = str_->empty() ? NULL : &(*str_)[0];
29 |   buffer_len_ = sz;
30 | }
31 | 
32 | }  // namespace url
33 | 


--------------------------------------------------------------------------------
/vendor/gurl/url/url_canon_stdstring.h:
--------------------------------------------------------------------------------
 1 | // Copyright 2013 The Chromium Authors. All rights reserved.
 2 | // Use of this source code is governed by a BSD-style license that can be
 3 | // found in the LICENSE file.
 4 | 
 5 | #ifndef URL_URL_CANON_STDSTRING_H_
 6 | #define URL_URL_CANON_STDSTRING_H_
 7 | 
 8 | // This header file defines a canonicalizer output method class for STL
 9 | // strings. Because the canonicalizer tries not to be dependent on the STL,
10 | // we have segregated it here.
11 | 
12 | #include <string>
13 | 
14 | #include "base/compiler_specific.h"
15 | #include "base/strings/string_piece.h"
16 | #include "url/url_canon.h"
17 | #include "url/url_export.h"
18 | 
19 | namespace url {
20 | 
21 | // Write into a std::string given in the constructor. This object does not own
22 | // the string itself, and the user must ensure that the string stays alive
23 | // throughout the lifetime of this object.
24 | //
25 | // The given string will be appended to; any existing data in the string will
26 | // be preserved. The caller should reserve() the amount of data in the string
27 | // they expect to be written. We will resize if necessary, but that's slow.
28 | //
29 | // Note that when canonicalization is complete, the string will likely have
30 | // unused space at the end because we make the string very big to start out
31 | // with (by |initial_size|). This ends up being important because resize
32 | // operations are slow, and because the base class needs to write directly
33 | // into the buffer.
34 | //
35 | // Therefore, the user should call Complete() before using the string that
36 | // this class wrote into.
37 | class URL_EXPORT StdStringCanonOutput : public CanonOutput {
38 |  public:
39 |   StdStringCanonOutput(std::string* str);
40 |   ~StdStringCanonOutput() override;
41 | 
42 |   // Must be called after writing has completed but before the string is used.
43 |   void Complete();
44 | 
45 |   void Resize(int sz) override;
46 | 
47 |  protected:
48 |   std::string* str_;
49 | };
50 | 
51 | // An extension of the Replacements class that allows the setters to use
52 | // StringPieces (implicitly allowing strings or char*s).
53 | //
54 | // The contents of the StringPieces are not copied and must remain valid until
55 | // the StringPieceReplacements object goes out of scope.
56 | template<typename STR>
57 | class StringPieceReplacements : public Replacements<typename STR::value_type> {
58 |  public:
59 |   void SetSchemeStr(const base::BasicStringPiece<STR>& s) {
60 |     this->SetScheme(s.data(), Component(0, static_cast<int>(s.length())));
61 |   }
62 |   void SetUsernameStr(const base::BasicStringPiece<STR>& s) {
63 |     this->SetUsername(s.data(), Component(0, static_cast<int>(s.length())));
64 |   }
65 |   void SetPasswordStr(const base::BasicStringPiece<STR>& s) {
66 |     this->SetPassword(s.data(), Component(0, static_cast<int>(s.length())));
67 |   }
68 |   void SetHostStr(const base::BasicStringPiece<STR>& s) {
69 |     this->SetHost(s.data(), Component(0, static_cast<int>(s.length())));
70 |   }
71 |   void SetPortStr(const base::BasicStringPiece<STR>& s) {
72 |     this->SetPort(s.data(), Component(0, static_cast<int>(s.length())));
73 |   }
74 |   void SetPathStr(const base::BasicStringPiece<STR>& s) {
75 |     this->SetPath(s.data(), Component(0, static_cast<int>(s.length())));
76 |   }
77 |   void SetQueryStr(const base::BasicStringPiece<STR>& s) {
78 |     this->SetQuery(s.data(), Component(0, static_cast<int>(s.length())));
79 |   }
80 |   void SetRefStr(const base::BasicStringPiece<STR>& s) {
81 |     this->SetRef(s.data(), Component(0, static_cast<int>(s.length())));
82 |   }
83 | };
84 | 
85 | }  // namespace url
86 | 
87 | #endif  // URL_URL_CANON_STDSTRING_H_
88 | 


--------------------------------------------------------------------------------
/vendor/gurl/url/url_canon_stdurl.cc:
--------------------------------------------------------------------------------
  1 | // Copyright 2013 The Chromium Authors. All rights reserved.
  2 | // Use of this source code is governed by a BSD-style license that can be
  3 | // found in the LICENSE file.
  4 | 
  5 | // Functions to canonicalize "standard" URLs, which are ones that have an
  6 | // authority section including a host name.
  7 | 
  8 | #include "url/url_canon.h"
  9 | #include "url/url_canon_internal.h"
 10 | #include "url/url_constants.h"
 11 | 
 12 | namespace url {
 13 | 
 14 | namespace {
 15 | 
 16 | template<typename CHAR, typename UCHAR>
 17 | bool DoCanonicalizeStandardURL(const URLComponentSource<CHAR>& source,
 18 |                                const Parsed& parsed,
 19 |                                CharsetConverter* query_converter,
 20 |                                CanonOutput* output,
 21 |                                Parsed* new_parsed) {
 22 |   // Scheme: this will append the colon.
 23 |   bool success = CanonicalizeScheme(source.scheme, parsed.scheme,
 24 |                                     output, &new_parsed->scheme);
 25 | 
 26 |   // Authority (username, password, host, port)
 27 |   bool have_authority;
 28 |   if (parsed.username.is_valid() || parsed.password.is_valid() ||
 29 |       parsed.host.is_nonempty() || parsed.port.is_valid()) {
 30 |     have_authority = true;
 31 | 
 32 |     // Only write the authority separators when we have a scheme.
 33 |     if (parsed.scheme.is_valid()) {
 34 |       output->push_back('/');
 35 |       output->push_back('/');
 36 |     }
 37 | 
 38 |     // User info: the canonicalizer will handle the : and @.
 39 |     success &= CanonicalizeUserInfo(source.username, parsed.username,
 40 |                                     source.password, parsed.password,
 41 |                                     output,
 42 |                                     &new_parsed->username,
 43 |                                     &new_parsed->password);
 44 | 
 45 |     success &= CanonicalizeHost(source.host, parsed.host,
 46 |                                 output, &new_parsed->host);
 47 | 
 48 |     // Host must not be empty for standard URLs.
 49 |     if (!parsed.host.is_nonempty())
 50 |       success = false;
 51 | 
 52 |     // Port: the port canonicalizer will handle the colon.
 53 |     int default_port = DefaultPortForScheme(
 54 |         &output->data()[new_parsed->scheme.begin], new_parsed->scheme.len);
 55 |     success &= CanonicalizePort(source.port, parsed.port, default_port,
 56 |                                 output, &new_parsed->port);
 57 |   } else {
 58 |     // No authority, clear the components.
 59 |     have_authority = false;
 60 |     new_parsed->host.reset();
 61 |     new_parsed->username.reset();
 62 |     new_parsed->password.reset();
 63 |     new_parsed->port.reset();
 64 |     success = false;  // Standard URLs must have an authority.
 65 |   }
 66 | 
 67 |   // Path
 68 |   if (parsed.path.is_valid()) {
 69 |     success &= CanonicalizePath(source.path, parsed.path,
 70 |                                 output, &new_parsed->path);
 71 |   } else if (have_authority ||
 72 |              parsed.query.is_valid() || parsed.ref.is_valid()) {
 73 |     // When we have an empty path, make up a path when we have an authority
 74 |     // or something following the path. The only time we allow an empty
 75 |     // output path is when there is nothing else.
 76 |     new_parsed->path = Component(output->length(), 1);
 77 |     output->push_back('/');
 78 |   } else {
 79 |     // No path at all
 80 |     new_parsed->path.reset();
 81 |   }
 82 | 
 83 |   // Query
 84 |   CanonicalizeQuery(source.query, parsed.query, query_converter,
 85 |                     output, &new_parsed->query);
 86 | 
 87 |   // Ref: ignore failure for this, since the page can probably still be loaded.
 88 |   CanonicalizeRef(source.ref, parsed.ref, output, &new_parsed->ref);
 89 | 
 90 |   return success;
 91 | }
 92 | 
 93 | }  // namespace
 94 | 
 95 | 
 96 | // Returns the default port for the given canonical scheme, or PORT_UNSPECIFIED
 97 | // if the scheme is unknown.
 98 | int DefaultPortForScheme(const char* scheme, int scheme_len) {
 99 |   int default_port = PORT_UNSPECIFIED;
100 |   switch (scheme_len) {
101 |     case 4:
102 |       if (!strncmp(scheme, kHttpScheme, scheme_len))
103 |         default_port = 80;
104 |       break;
105 |     case 5:
106 |       if (!strncmp(scheme, kHttpsScheme, scheme_len))
107 |         default_port = 443;
108 |       break;
109 |     case 3:
110 |       if (!strncmp(scheme, kFtpScheme, scheme_len))
111 |         default_port = 21;
112 |       else if (!strncmp(scheme, kWssScheme, scheme_len))
113 |         default_port = 443;
114 |       break;
115 |     case 6:
116 |       if (!strncmp(scheme, kGopherScheme, scheme_len))
117 |         default_port = 70;
118 |       break;
119 |     case 2:
120 |       if (!strncmp(scheme, kWsScheme, scheme_len))
121 |         default_port = 80;
122 |       break;
123 |   }
124 |   return default_port;
125 | }
126 | 
127 | bool CanonicalizeStandardURL(const char* spec,
128 |                              int spec_len,
129 |                              const Parsed& parsed,
130 |                              CharsetConverter* query_converter,
131 |                              CanonOutput* output,
132 |                              Parsed* new_parsed) {
133 |   return DoCanonicalizeStandardURL<char, unsigned char>(
134 |       URLComponentSource<char>(spec), parsed, query_converter,
135 |       output, new_parsed);
136 | }
137 | 
138 | bool CanonicalizeStandardURL(const base::char16* spec,
139 |                              int spec_len,
140 |                              const Parsed& parsed,
141 |                              CharsetConverter* query_converter,
142 |                              CanonOutput* output,
143 |                              Parsed* new_parsed) {
144 |   return DoCanonicalizeStandardURL<base::char16, base::char16>(
145 |       URLComponentSource<base::char16>(spec), parsed, query_converter,
146 |       output, new_parsed);
147 | }
148 | 
149 | // It might be nice in the future to optimize this so unchanged components don't
150 | // need to be recanonicalized. This is especially true since the common case for
151 | // ReplaceComponents is removing things we don't want, like reference fragments
152 | // and usernames. These cases can become more efficient if we can assume the
153 | // rest of the URL is OK with these removed (or only the modified parts
154 | // recanonicalized). This would be much more complex to implement, however.
155 | //
156 | // You would also need to update DoReplaceComponents in url_util.cc which
157 | // relies on this re-checking everything (see the comment there for why).
158 | bool ReplaceStandardURL(const char* base,
159 |                         const Parsed& base_parsed,
160 |                         const Replacements<char>& replacements,
161 |                         CharsetConverter* query_converter,
162 |                         CanonOutput* output,
163 |                         Parsed* new_parsed) {
164 |   URLComponentSource<char> source(base);
165 |   Parsed parsed(base_parsed);
166 |   SetupOverrideComponents(base, replacements, &source, &parsed);
167 |   return DoCanonicalizeStandardURL<char, unsigned char>(
168 |       source, parsed, query_converter, output, new_parsed);
169 | }
170 | 
171 | // For 16-bit replacements, we turn all the replacements into UTF-8 so the
172 | // regular code path can be used.
173 | bool ReplaceStandardURL(const char* base,
174 |                         const Parsed& base_parsed,
175 |                         const Replacements<base::char16>& replacements,
176 |                         CharsetConverter* query_converter,
177 |                         CanonOutput* output,
178 |                         Parsed* new_parsed) {
179 |   RawCanonOutput<1024> utf8;
180 |   URLComponentSource<char> source(base);
181 |   Parsed parsed(base_parsed);
182 |   SetupUTF16OverrideComponents(base, replacements, &utf8, &source, &parsed);
183 |   return DoCanonicalizeStandardURL<char, unsigned char>(
184 |       source, parsed, query_converter, output, new_parsed);
185 | }
186 | 
187 | }  // namespace url
188 | 


--------------------------------------------------------------------------------
/vendor/gurl/url/url_constants.cc:
--------------------------------------------------------------------------------
 1 | // Copyright 2014 The Chromium Authors. All rights reserved.
 2 | // Use of this source code is governed by a BSD-style license that can be
 3 | // found in the LICENSE file.
 4 | 
 5 | #include "url/url_constants.h"
 6 | 
 7 | namespace url {
 8 | 
 9 | const char kAboutBlankURL[] = "about:blank";
10 | 
11 | const char kAboutScheme[] = "about";
12 | const char kBlobScheme[] = "blob";
13 | const char kContentScheme[] = "content";
14 | const char kDataScheme[] = "data";
15 | const char kFileScheme[] = "file";
16 | const char kFileSystemScheme[] = "filesystem";
17 | const char kFtpScheme[] = "ftp";
18 | const char kGopherScheme[] = "gopher";
19 | const char kHttpScheme[] = "http";
20 | const char kHttpsScheme[] = "https";
21 | const char kJavaScriptScheme[] = "javascript";
22 | const char kMailToScheme[] = "mailto";
23 | const char kWsScheme[] = "ws";
24 | const char kWssScheme[] = "wss";
25 | 
26 | const char kStandardSchemeSeparator[] = "://";
27 | 
28 | }  // namespace url
29 | 


--------------------------------------------------------------------------------
/vendor/gurl/url/url_constants.h:
--------------------------------------------------------------------------------
 1 | // Copyright 2014 The Chromium Authors. All rights reserved.
 2 | // Use of this source code is governed by a BSD-style license that can be
 3 | // found in the LICENSE file.
 4 | 
 5 | #ifndef URL_URL_CONSTANTS_H_
 6 | #define URL_URL_CONSTANTS_H_
 7 | 
 8 | #include "url/url_export.h"
 9 | 
10 | namespace url {
11 | 
12 | URL_EXPORT extern const char kAboutBlankURL[];
13 | 
14 | URL_EXPORT extern const char kAboutScheme[];
15 | URL_EXPORT extern const char kBlobScheme[];
16 | // The content scheme is specific to Android for identifying a stored file.
17 | URL_EXPORT extern const char kContentScheme[];
18 | URL_EXPORT extern const char kDataScheme[];
19 | URL_EXPORT extern const char kFileScheme[];
20 | URL_EXPORT extern const char kFileSystemScheme[];
21 | URL_EXPORT extern const char kFtpScheme[];
22 | URL_EXPORT extern const char kGopherScheme[];
23 | URL_EXPORT extern const char kHttpScheme[];
24 | URL_EXPORT extern const char kHttpsScheme[];
25 | URL_EXPORT extern const char kJavaScriptScheme[];
26 | URL_EXPORT extern const char kMailToScheme[];
27 | URL_EXPORT extern const char kWsScheme[];
28 | URL_EXPORT extern const char kWssScheme[];
29 | 
30 | // Used to separate a standard scheme and the hostname: "://".
31 | URL_EXPORT extern const char kStandardSchemeSeparator[];
32 | 
33 | }  // namespace url
34 | 
35 | #endif  // URL_URL_CONSTANTS_H_
36 | 


--------------------------------------------------------------------------------
/vendor/gurl/url/url_export.h:
--------------------------------------------------------------------------------
 1 | // Copyright 2013 The Chromium Authors. All rights reserved.
 2 | // Use of this source code is governed by a BSD-style license that can be
 3 | // found in the LICENSE file.
 4 | 
 5 | #ifndef URL_URL_EXPORT_H_
 6 | #define URL_URL_EXPORT_H_
 7 | 
 8 | #if defined(COMPONENT_BUILD)
 9 | #if defined(WIN32)
10 | 
11 | #if defined(URL_IMPLEMENTATION)
12 | #define URL_EXPORT __declspec(dllexport)
13 | #else
14 | #define URL_EXPORT __declspec(dllimport)
15 | #endif  // defined(URL_IMPLEMENTATION)
16 | 
17 | #else  // !defined(WIN32)
18 | 
19 | #if defined(URL_IMPLEMENTATION)
20 | #define URL_EXPORT __attribute__((visibility("default")))
21 | #else
22 | #define URL_EXPORT
23 | #endif  // defined(URL_IMPLEMENTATION)
24 | 
25 | #endif  // defined(WIN32)
26 | 
27 | #else  // !defined(COMPONENT_BUILD)
28 | 
29 | #define URL_EXPORT
30 | 
31 | #endif  // define(COMPONENT_BUILD)
32 | 
33 | #endif  // URL_URL_EXPORT_H_
34 | 


--------------------------------------------------------------------------------
/vendor/gurl/url/url_file.h:
--------------------------------------------------------------------------------
 1 | // Copyright 2013 The Chromium Authors. All rights reserved.
 2 | // Use of this source code is governed by a BSD-style license that can be
 3 | // found in the LICENSE file.
 4 | 
 5 | #ifndef URL_URL_FILE_H_
 6 | #define URL_URL_FILE_H_
 7 | 
 8 | // Provides shared functions used by the internals of the parser and
 9 | // canonicalizer for file URLs. Do not use outside of these modules.
10 | 
11 | #include "url/url_parse_internal.h"
12 | 
13 | namespace url {
14 | 
15 | #ifdef WIN32
16 | 
17 | // We allow both "c:" and "c|" as drive identifiers.
18 | inline bool IsWindowsDriveSeparator(base::char16 ch) {
19 |   return ch == ':' || ch == '|';
20 | }
21 | inline bool IsWindowsDriveLetter(base::char16 ch) {
22 |   return (ch >= 'A' && ch <= 'Z') || (ch >= 'a' && ch <= 'z');
23 | }
24 | 
25 | #endif  // WIN32
26 | 
27 | // Returns the index of the next slash in the input after the given index, or
28 | // spec_len if the end of the input is reached.
29 | template<typename CHAR>
30 | inline int FindNextSlash(const CHAR* spec, int begin_index, int spec_len) {
31 |   int idx = begin_index;
32 |   while (idx < spec_len && !IsURLSlash(spec[idx]))
33 |     idx++;
34 |   return idx;
35 | }
36 | 
37 | #ifdef WIN32
38 | 
39 | // Returns true if the start_offset in the given spec looks like it begins a
40 | // drive spec, for example "c:". This function explicitly handles start_offset
41 | // values that are equal to or larger than the spec_len to simplify callers.
42 | //
43 | // If this returns true, the spec is guaranteed to have a valid drive letter
44 | // plus a colon starting at |start_offset|.
45 | template<typename CHAR>
46 | inline bool DoesBeginWindowsDriveSpec(const CHAR* spec, int start_offset,
47 |                                       int spec_len) {
48 |   int remaining_len = spec_len - start_offset;
49 |   if (remaining_len < 2)
50 |     return false;  // Not enough room.
51 |   if (!IsWindowsDriveLetter(spec[start_offset]))
52 |     return false;  // Doesn't start with a valid drive letter.
53 |   if (!IsWindowsDriveSeparator(spec[start_offset + 1]))
54 |     return false;  // Isn't followed with a drive separator.
55 |   return true;
56 | }
57 | 
58 | // Returns true if the start_offset in the given text looks like it begins a
59 | // UNC path, for example "\\". This function explicitly handles start_offset
60 | // values that are equal to or larger than the spec_len to simplify callers.
61 | //
62 | // When strict_slashes is set, this function will only accept backslashes as is
63 | // standard for Windows. Otherwise, it will accept forward slashes as well
64 | // which we use for a lot of URL handling.
65 | template<typename CHAR>
66 | inline bool DoesBeginUNCPath(const CHAR* text,
67 |                              int start_offset,
68 |                              int len,
69 |                              bool strict_slashes) {
70 |   int remaining_len = len - start_offset;
71 |   if (remaining_len < 2)
72 |     return false;
73 | 
74 |   if (strict_slashes)
75 |     return text[start_offset] == '\\' && text[start_offset + 1] == '\\';
76 |   return IsURLSlash(text[start_offset]) && IsURLSlash(text[start_offset + 1]);
77 | }
78 | 
79 | #endif  // WIN32
80 | 
81 | }  // namespace url
82 | 
83 | #endif  // URL_URL_FILE_H_
84 | 


--------------------------------------------------------------------------------
/vendor/gurl/url/url_parse_file.cc:
--------------------------------------------------------------------------------
  1 | // Copyright 2013 The Chromium Authors. All rights reserved.
  2 | // Use of this source code is governed by a BSD-style license that can be
  3 | // found in the LICENSE file.
  4 | 
  5 | //#include "base/logging.h"
  6 | #include "url/third_party/mozilla/url_parse.h"
  7 | #include "url/url_file.h"
  8 | #include "url/url_parse_internal.h"
  9 | 
 10 | // Interesting IE file:isms...
 11 | //
 12 | //  INPUT                      OUTPUT
 13 | //  =========================  ==============================
 14 | //  file:/foo/bar              file:///foo/bar
 15 | //      The result here seems totally invalid!?!? This isn't UNC.
 16 | //
 17 | //  file:/
 18 | //  file:// or any other number of slashes
 19 | //      IE6 doesn't do anything at all if you click on this link. No error:
 20 | //      nothing. IE6's history system seems to always color this link, so I'm
 21 | //      guessing that it maps internally to the empty URL.
 22 | //
 23 | //  C:\                        file:///C:/
 24 | //      When on a file: URL source page, this link will work. When over HTTP,
 25 | //      the file: URL will appear in the status bar but the link will not work
 26 | //      (security restriction for all file URLs).
 27 | //
 28 | //  file:foo/                  file:foo/     (invalid?!?!?)
 29 | //  file:/foo/                 file:///foo/  (invalid?!?!?)
 30 | //  file://foo/                file://foo/   (UNC to server "foo")
 31 | //  file:///foo/               file:///foo/  (invalid, seems to be a file)
 32 | //  file:////foo/              file://foo/   (UNC to server "foo")
 33 | //      Any more than four slashes is also treated as UNC.
 34 | //
 35 | //  file:C:/                   file://C:/
 36 | //  file:/C:/                  file://C:/
 37 | //      The number of slashes after "file:" don't matter if the thing following
 38 | //      it looks like an absolute drive path. Also, slashes and backslashes are
 39 | //      equally valid here.
 40 | 
 41 | namespace url {
 42 | 
 43 | namespace {
 44 | 
 45 | // A subcomponent of DoInitFileURL, the input of this function should be a UNC
 46 | // path name, with the index of the first character after the slashes following
 47 | // the scheme given in |after_slashes|. This will initialize the host, path,
 48 | // query, and ref, and leave the other output components untouched
 49 | // (DoInitFileURL handles these for us).
 50 | template<typename CHAR>
 51 | void DoParseUNC(const CHAR* spec,
 52 |                 int after_slashes,
 53 |                 int spec_len,
 54 |                Parsed* parsed) {
 55 |   int next_slash = FindNextSlash(spec, after_slashes, spec_len);
 56 |   if (next_slash == spec_len) {
 57 |     // No additional slash found, as in "file://foo", treat the text as the
 58 |     // host with no path (this will end up being UNC to server "foo").
 59 |     int host_len = spec_len - after_slashes;
 60 |     if (host_len)
 61 |       parsed->host = Component(after_slashes, host_len);
 62 |     else
 63 |       parsed->host.reset();
 64 |     parsed->path.reset();
 65 |     return;
 66 |   }
 67 | 
 68 | #ifdef WIN32
 69 |   // See if we have something that looks like a path following the first
 70 |   // component. As in "file://localhost/c:/", we get "c:/" out. We want to
 71 |   // treat this as a having no host but the path given. Works on Windows only.
 72 |   if (DoesBeginWindowsDriveSpec(spec, next_slash + 1, spec_len)) {
 73 |     parsed->host.reset();
 74 |     ParsePathInternal(spec, MakeRange(next_slash, spec_len),
 75 |                       &parsed->path, &parsed->query, &parsed->ref);
 76 |     return;
 77 |   }
 78 | #endif
 79 | 
 80 |   // Otherwise, everything up until that first slash we found is the host name,
 81 |   // which will end up being the UNC host. For example "file://foo/bar.txt"
 82 |   // will get a server name of "foo" and a path of "/bar". Later, on Windows,
 83 |   // this should be treated as the filename "\\foo\bar.txt" in proper UNC
 84 |   // notation.
 85 |   int host_len = next_slash - after_slashes;
 86 |   if (host_len)
 87 |     parsed->host = MakeRange(after_slashes, next_slash);
 88 |   else
 89 |     parsed->host.reset();
 90 |   if (next_slash < spec_len) {
 91 |     ParsePathInternal(spec, MakeRange(next_slash, spec_len),
 92 |                       &parsed->path, &parsed->query, &parsed->ref);
 93 |   } else {
 94 |     parsed->path.reset();
 95 |   }
 96 | }
 97 | 
 98 | // A subcomponent of DoParseFileURL, the input should be a local file, with the
 99 | // beginning of the path indicated by the index in |path_begin|. This will
100 | // initialize the host, path, query, and ref, and leave the other output
101 | // components untouched (DoInitFileURL handles these for us).
102 | template<typename CHAR>
103 | void DoParseLocalFile(const CHAR* spec,
104 |                       int path_begin,
105 |                       int spec_len,
106 |                       Parsed* parsed) {
107 |   parsed->host.reset();
108 |   ParsePathInternal(spec, MakeRange(path_begin, spec_len),
109 |                     &parsed->path, &parsed->query, &parsed->ref);
110 | }
111 | 
112 | // Backend for the external functions that operates on either char type.
113 | // Handles cases where there is a scheme, but also when handed the first
114 | // character following the "file:" at the beginning of the spec. If so,
115 | // this is usually a slash, but needn't be; we allow paths like "file:c:\foo".
116 | template<typename CHAR>
117 | void DoParseFileURL(const CHAR* spec, int spec_len, Parsed* parsed) {
118 |   //DCHECK(spec_len >= 0);
119 | 
120 |   // Get the parts we never use for file URLs out of the way.
121 |   parsed->username.reset();
122 |   parsed->password.reset();
123 |   parsed->port.reset();
124 | 
125 |   // Many of the code paths don't set these, so it's convenient to just clear
126 |   // them. We'll write them in those cases we need them.
127 |   parsed->query.reset();
128 |   parsed->ref.reset();
129 | 
130 |   // Strip leading & trailing spaces and control characters.
131 |   int begin = 0;
132 |   TrimURL(spec, &begin, &spec_len);
133 | 
134 |   // Find the scheme, if any.
135 |   int num_slashes = CountConsecutiveSlashes(spec, begin, spec_len);
136 |   int after_scheme;
137 |   int after_slashes;
138 | #ifdef WIN32
139 |   // See how many slashes there are. We want to handle cases like UNC but also
140 |   // "/c:/foo". This is when there is no scheme, so we can allow pages to do
141 |   // links like "c:/foo/bar" or "//foo/bar". This is also called by the
142 |   // relative URL resolver when it determines there is an absolute URL, which
143 |   // may give us input like "/c:/foo".
144 |   after_slashes = begin + num_slashes;
145 |   if (DoesBeginWindowsDriveSpec(spec, after_slashes, spec_len)) {
146 |     // Windows path, don't try to extract the scheme (for example, "c:\foo").
147 |     parsed->scheme.reset();
148 |     after_scheme = after_slashes;
149 |   } else if (DoesBeginUNCPath(spec, begin, spec_len, false)) {
150 |     // Windows UNC path: don't try to extract the scheme, but keep the slashes.
151 |     parsed->scheme.reset();
152 |     after_scheme = begin;
153 |   } else
154 | #endif
155 |   {
156 |     // ExtractScheme doesn't understand the possibility of filenames with
157 |     // colons in them, in which case it returns the entire spec up to the
158 |     // colon as the scheme. So handle /foo.c:5 as a file but foo.c:5 as
159 |     // the foo.c: scheme.
160 |     if (!num_slashes &&
161 |         ExtractScheme(&spec[begin], spec_len - begin, &parsed->scheme)) {
162 |       // Offset the results since we gave ExtractScheme a substring.
163 |       parsed->scheme.begin += begin;
164 |       after_scheme = parsed->scheme.end() + 1;
165 |     } else {
166 |       // No scheme found, remember that.
167 |       parsed->scheme.reset();
168 |       after_scheme = begin;
169 |     }
170 |   }
171 | 
172 |   // Handle empty specs ones that contain only whitespace or control chars,
173 |   // or that are just the scheme (for example "file:").
174 |   if (after_scheme == spec_len) {
175 |     parsed->host.reset();
176 |     parsed->path.reset();
177 |     return;
178 |   }
179 | 
180 |   num_slashes = CountConsecutiveSlashes(spec, after_scheme, spec_len);
181 |   after_slashes = after_scheme + num_slashes;
182 | #ifdef WIN32
183 |   // Check whether the input is a drive again. We checked above for windows
184 |   // drive specs, but that's only at the very beginning to see if we have a
185 |   // scheme at all. This test will be duplicated in that case, but will
186 |   // additionally handle all cases with a real scheme such as "file:///C:/".
187 |   if (!DoesBeginWindowsDriveSpec(spec, after_slashes, spec_len) &&
188 |       num_slashes != 3) {
189 |     // Anything not beginning with a drive spec ("c:\") on Windows is treated
190 |     // as UNC, with the exception of three slashes which always means a file.
191 |     // Even IE7 treats file:///foo/bar as "/foo/bar", which then fails.
192 |     DoParseUNC(spec, after_slashes, spec_len, parsed);
193 |     return;
194 |   }
195 | #else
196 |   // file: URL with exactly 2 slashes is considered to have a host component.
197 |   if (num_slashes == 2) {
198 |     DoParseUNC(spec, after_slashes, spec_len, parsed);
199 |     return;
200 |   }
201 | #endif  // WIN32
202 | 
203 |   // Easy and common case, the full path immediately follows the scheme
204 |   // (modulo slashes), as in "file://c:/foo". Just treat everything from
205 |   // there to the end as the path. Empty hosts have 0 length instead of -1.
206 |   // We include the last slash as part of the path if there is one.
207 |   DoParseLocalFile(spec,
208 |       num_slashes > 0 ? after_scheme + num_slashes - 1 : after_scheme,
209 |       spec_len, parsed);
210 | }
211 | 
212 | }  // namespace
213 | 
214 | void ParseFileURL(const char* url, int url_len, Parsed* parsed) {
215 |   DoParseFileURL(url, url_len, parsed);
216 | }
217 | 
218 | void ParseFileURL(const base::char16* url, int url_len, Parsed* parsed) {
219 |   DoParseFileURL(url, url_len, parsed);
220 | }
221 | 
222 | }  // namespace url
223 | 


--------------------------------------------------------------------------------
/vendor/gurl/url/url_parse_internal.h:
--------------------------------------------------------------------------------
 1 | // Copyright 2013 The Chromium Authors. All rights reserved.
 2 | // Use of this source code is governed by a BSD-style license that can be
 3 | // found in the LICENSE file.
 4 | 
 5 | #ifndef URL_URL_PARSE_INTERNAL_H_
 6 | #define URL_URL_PARSE_INTERNAL_H_
 7 | 
 8 | // Contains common inline helper functions used by the URL parsing routines.
 9 | 
10 | #include "url/third_party/mozilla/url_parse.h"
11 | 
12 | namespace url {
13 | 
14 | // We treat slashes and backslashes the same for IE compatibility.
15 | inline bool IsURLSlash(base::char16 ch) {
16 |   return ch == '/' || ch == '\\';
17 | }
18 | 
19 | // Returns true if we should trim this character from the URL because it is a
20 | // space or a control character.
21 | inline bool ShouldTrimFromURL(base::char16 ch) {
22 |   return ch <= ' ';
23 | }
24 | 
25 | // Given an already-initialized begin index and length, this shrinks the range
26 | // to eliminate "should-be-trimmed" characters. Note that the length does *not*
27 | // indicate the length of untrimmed data from |*begin|, but rather the position
28 | // in the input string (so the string starts at character |*begin| in the spec,
29 | // and goes until |*len|).
30 | template<typename CHAR>
31 | inline void TrimURL(const CHAR* spec, int* begin, int* len,
32 |                     bool trim_path_end = true) {
33 |   // Strip leading whitespace and control characters.
34 |   while (*begin < *len && ShouldTrimFromURL(spec[*begin]))
35 |     (*begin)++;
36 | 
37 |   if (trim_path_end) {
38 |     // Strip trailing whitespace and control characters. We need the >i test
39 |     // for when the input string is all blanks; we don't want to back past the
40 |     // input.
41 |     while (*len > *begin && ShouldTrimFromURL(spec[*len - 1]))
42 |       (*len)--;
43 |   }
44 | }
45 | 
46 | // Counts the number of consecutive slashes starting at the given offset
47 | // in the given string of the given length.
48 | template<typename CHAR>
49 | inline int CountConsecutiveSlashes(const CHAR *str,
50 |                                    int begin_offset, int str_len) {
51 |   int count = 0;
52 |   while (begin_offset + count < str_len &&
53 |          IsURLSlash(str[begin_offset + count]))
54 |     ++count;
55 |   return count;
56 | }
57 | 
58 | // Internal functions in url_parse.cc that parse the path, that is, everything
59 | // following the authority section. The input is the range of everything
60 | // following the authority section, and the output is the identified ranges.
61 | //
62 | // This is designed for the file URL parser or other consumers who may do
63 | // special stuff at the beginning, but want regular path parsing, it just
64 | // maps to the internal parsing function for paths.
65 | void ParsePathInternal(const char* spec,
66 |                        const Component& path,
67 |                        Component* filepath,
68 |                        Component* query,
69 |                        Component* ref);
70 | void ParsePathInternal(const base::char16* spec,
71 |                        const Component& path,
72 |                        Component* filepath,
73 |                        Component* query,
74 |                        Component* ref);
75 | 
76 | 
77 | // Given a spec and a pointer to the character after the colon following the
78 | // scheme, this parses it and fills in the structure, Every item in the parsed
79 | // structure is filled EXCEPT for the scheme, which is untouched.
80 | void ParseAfterScheme(const char* spec,
81 |                       int spec_len,
82 |                       int after_scheme,
83 |                       Parsed* parsed);
84 | void ParseAfterScheme(const base::char16* spec,
85 |                       int spec_len,
86 |                       int after_scheme,
87 |                       Parsed* parsed);
88 | 
89 | }  // namespace url
90 | 
91 | #endif  // URL_URL_PARSE_INTERNAL_H_
92 | 


--------------------------------------------------------------------------------
/vendor/gurl/url/url_util.h:
--------------------------------------------------------------------------------
  1 | // Copyright 2013 The Chromium Authors. All rights reserved.
  2 | // Use of this source code is governed by a BSD-style license that can be
  3 | // found in the LICENSE file.
  4 | 
  5 | #ifndef URL_URL_UTIL_H_
  6 | #define URL_URL_UTIL_H_
  7 | 
  8 | #include <string>
  9 | 
 10 | #include "base/strings/string16.h"
 11 | #include "url/third_party/mozilla/url_parse.h"
 12 | #include "url/url_canon.h"
 13 | #include "url/url_constants.h"
 14 | #include "url/url_export.h"
 15 | 
 16 | namespace url {
 17 | 
 18 | // Init ------------------------------------------------------------------------
 19 | 
 20 | // Initialization is NOT required, it will be implicitly initialized when first
 21 | // used. However, this implicit initialization is NOT threadsafe. If you are
 22 | // using this library in a threaded environment and don't have a consistent
 23 | // "first call" (an example might be calling AddStandardScheme with your special
 24 | // application-specific schemes) then you will want to call initialize before
 25 | // spawning any threads.
 26 | //
 27 | // It is OK to call this function more than once, subsequent calls will be
 28 | // no-ops, unless Shutdown was called in the mean time. This will also be a
 29 | // no-op if other calls to the library have forced an initialization beforehand.
 30 | URL_EXPORT void Initialize();
 31 | 
 32 | // Cleanup is not required, except some strings may leak. For most user
 33 | // applications, this is fine. If you're using it in a library that may get
 34 | // loaded and unloaded, you'll want to unload to properly clean up your
 35 | // library.
 36 | URL_EXPORT void Shutdown();
 37 | 
 38 | // Schemes --------------------------------------------------------------------
 39 | 
 40 | // Types of a scheme representing the requirements on the data represented by
 41 | // the authority component of a URL with the scheme.
 42 | enum SchemeType {
 43 |   // The authority component of a URL with the scheme, if any, has the port
 44 |   // (the default values may be omitted in a serialization).
 45 |   SCHEME_WITH_PORT,
 46 |   // The authority component of a URL with the scheme, if any, doesn't have a
 47 |   // port.
 48 |   SCHEME_WITHOUT_PORT,
 49 |   // A URL with the scheme doesn't have the authority component.
 50 |   SCHEME_WITHOUT_AUTHORITY,
 51 | };
 52 | 
 53 | // A pair for representing a standard scheme name and the SchemeType for it.
 54 | struct URL_EXPORT SchemeWithType {
 55 |   const char* scheme;
 56 |   SchemeType type;
 57 | };
 58 | 
 59 | // Adds an application-defined scheme to the internal list of "standard-format"
 60 | // URL schemes. A standard-format scheme adheres to what RFC 3986 calls "generic
 61 | // URI syntax" (https://tools.ietf.org/html/rfc3986#section-3).
 62 | //
 63 | // This function is not threadsafe and can not be called concurrently with any
 64 | // other url_util function. It will assert if the list of standard schemes has
 65 | // been locked (see LockStandardSchemes).
 66 | URL_EXPORT void AddStandardScheme(const char* new_scheme,
 67 |                                   SchemeType scheme_type);
 68 | 
 69 | // Sets a flag to prevent future calls to AddStandardScheme from succeeding.
 70 | //
 71 | // This is designed to help prevent errors for multithreaded applications.
 72 | // Normal usage would be to call AddStandardScheme for your custom schemes at
 73 | // the beginning of program initialization, and then LockStandardSchemes. This
 74 | // prevents future callers from mistakenly calling AddStandardScheme when the
 75 | // program is running with multiple threads, where such usage would be
 76 | // dangerous.
 77 | //
 78 | // We could have had AddStandardScheme use a lock instead, but that would add
 79 | // some platform-specific dependencies we don't otherwise have now, and is
 80 | // overkill considering the normal usage is so simple.
 81 | URL_EXPORT void LockStandardSchemes();
 82 | 
 83 | // Locates the scheme in the given string and places it into |found_scheme|,
 84 | // which may be NULL to indicate the caller does not care about the range.
 85 | //
 86 | // Returns whether the given |compare| scheme matches the scheme found in the
 87 | // input (if any). The |compare| scheme must be a valid canonical scheme or
 88 | // the result of the comparison is undefined.
 89 | URL_EXPORT bool FindAndCompareScheme(const char* str,
 90 |                                      int str_len,
 91 |                                      const char* compare,
 92 |                                      Component* found_scheme);
 93 | URL_EXPORT bool FindAndCompareScheme(const base::char16* str,
 94 |                                      int str_len,
 95 |                                      const char* compare,
 96 |                                      Component* found_scheme);
 97 | inline bool FindAndCompareScheme(const std::string& str,
 98 |                                  const char* compare,
 99 |                                  Component* found_scheme) {
100 |   return FindAndCompareScheme(str.data(), static_cast<int>(str.size()),
101 |                               compare, found_scheme);
102 | }
103 | inline bool FindAndCompareScheme(const base::string16& str,
104 |                                  const char* compare,
105 |                                  Component* found_scheme) {
106 |   return FindAndCompareScheme(str.data(), static_cast<int>(str.size()),
107 |                               compare, found_scheme);
108 | }
109 | 
110 | // Returns true if the given scheme identified by |scheme| within |spec| is in
111 | // the list of known standard-format schemes (see AddStandardScheme).
112 | URL_EXPORT bool IsStandard(const char* spec, const Component& scheme);
113 | URL_EXPORT bool IsStandard(const base::char16* spec, const Component& scheme);
114 | 
115 | // Returns true and sets |type| to the SchemeType of the given scheme
116 | // identified by |scheme| within |spec| if the scheme is in the list of known
117 | // standard-format schemes (see AddStandardScheme).
118 | URL_EXPORT bool GetStandardSchemeType(const char* spec,
119 |                                       const Component& scheme,
120 |                                       SchemeType* type);
121 | 
122 | // URL library wrappers -------------------------------------------------------
123 | 
124 | // Parses the given spec according to the extracted scheme type. Normal users
125 | // should use the URL object, although this may be useful if performance is
126 | // critical and you don't want to do the heap allocation for the std::string.
127 | //
128 | // As with the Canonicalize* functions, the charset converter can
129 | // be NULL to use UTF-8 (it will be faster in this case).
130 | //
131 | // Returns true if a valid URL was produced, false if not. On failure, the
132 | // output and parsed structures will still be filled and will be consistent,
133 | // but they will not represent a loadable URL.
134 | URL_EXPORT bool Canonicalize(const char* spec,
135 |                              int spec_len,
136 |                              bool trim_path_end,
137 |                              CharsetConverter* charset_converter,
138 |                              CanonOutput* output,
139 |                              Parsed* output_parsed);
140 | URL_EXPORT bool Canonicalize(const base::char16* spec,
141 |                              int spec_len,
142 |                              bool trim_path_end,
143 |                              CharsetConverter* charset_converter,
144 |                              CanonOutput* output,
145 |                              Parsed* output_parsed);
146 | 
147 | // Resolves a potentially relative URL relative to the given parsed base URL.
148 | // The base MUST be valid. The resulting canonical URL and parsed information
149 | // will be placed in to the given out variables.
150 | //
151 | // The relative need not be relative. If we discover that it's absolute, this
152 | // will produce a canonical version of that URL. See Canonicalize() for more
153 | // about the charset_converter.
154 | //
155 | // Returns true if the output is valid, false if the input could not produce
156 | // a valid URL.
157 | URL_EXPORT bool ResolveRelative(const char* base_spec,
158 |                                 int base_spec_len,
159 |                                 const Parsed& base_parsed,
160 |                                 const char* relative,
161 |                                 int relative_length,
162 |                                 CharsetConverter* charset_converter,
163 |                                 CanonOutput* output,
164 |                                 Parsed* output_parsed);
165 | URL_EXPORT bool ResolveRelative(const char* base_spec,
166 |                                 int base_spec_len,
167 |                                 const Parsed& base_parsed,
168 |                                 const base::char16* relative,
169 |                                 int relative_length,
170 |                                 CharsetConverter* charset_converter,
171 |                                 CanonOutput* output,
172 |                                 Parsed* output_parsed);
173 | 
174 | // Replaces components in the given VALID input URL. The new canonical URL info
175 | // is written to output and out_parsed.
176 | //
177 | // Returns true if the resulting URL is valid.
178 | URL_EXPORT bool ReplaceComponents(const char* spec,
179 |                                   int spec_len,
180 |                                   const Parsed& parsed,
181 |                                   const Replacements<char>& replacements,
182 |                                   CharsetConverter* charset_converter,
183 |                                   CanonOutput* output,
184 |                                   Parsed* out_parsed);
185 | URL_EXPORT bool ReplaceComponents(
186 |     const char* spec,
187 |     int spec_len,
188 |     const Parsed& parsed,
189 |     const Replacements<base::char16>& replacements,
190 |     CharsetConverter* charset_converter,
191 |     CanonOutput* output,
192 |     Parsed* out_parsed);
193 | 
194 | // String helper functions ----------------------------------------------------
195 | 
196 | // Unescapes the given string using URL escaping rules.
197 | URL_EXPORT void DecodeURLEscapeSequences(const char* input,
198 |                                          int length,
199 |                                          CanonOutputW* output);
200 | 
201 | // Escapes the given string as defined by the JS method encodeURIComponent. See
202 | // https://developer.mozilla.org/en/JavaScript/Reference/Global_Objects/encodeURIComponent
203 | URL_EXPORT void EncodeURIComponent(const char* input,
204 |                                    int length,
205 |                                    CanonOutput* output);
206 | 
207 | }  // namespace url
208 | 
209 | #endif  // URL_URL_UTIL_H_
210 | 


--------------------------------------------------------------------------------
/vendor/gurl/url/url_util_internal.h:
--------------------------------------------------------------------------------
 1 | // Copyright 2013 The Chromium Authors. All rights reserved.
 2 | // Use of this source code is governed by a BSD-style license that can be
 3 | // found in the LICENSE file.
 4 | 
 5 | #ifndef URL_URL_UTIL_INTERNAL_H_
 6 | #define URL_URL_UTIL_INTERNAL_H_
 7 | 
 8 | #include <string>
 9 | 
10 | #include "base/strings/string16.h"
11 | #include "url/third_party/mozilla/url_parse.h"
12 | 
13 | namespace url {
14 | 
15 | // Given a string and a range inside the string, compares it to the given
16 | // lower-case |compare_to| buffer.
17 | bool CompareSchemeComponent(const char* spec,
18 |                             const Component& component,
19 |                             const char* compare_to);
20 | bool CompareSchemeComponent(const base::char16* spec,
21 |                             const Component& component,
22 |                             const char* compare_to);
23 | 
24 | }  // namespace url
25 | 
26 | #endif  // URL_URL_UTIL_INTERNAL_H_
27 | 


--------------------------------------------------------------------------------