├── .gitattributes ├── .gitignore ├── CONTRIBUTING.md ├── LICENSE └── scripts ├── github_pull_request.sh ├── requirements.txt ├── tests ├── __init__.py ├── test_validate_format.py └── test_validate_links.py └── validate ├── __init__.py ├── format.py └── links.py /.gitattributes: -------------------------------------------------------------------------------- 1 | * text=auto 2 | 3 | /.github export-ignore 4 | /build export-ignore 5 | .travis.yml export-ignore 6 | README.md export-ignore 7 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | pip-wheel-metadata/ 24 | share/python-wheels/ 25 | *.egg-info/ 26 | .installed.cfg 27 | *.egg 28 | MANIFEST 29 | .pypirc 30 | 31 | # PyInstaller 32 | # Usually these files are written by a python script from a template 33 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 34 | *.manifest 35 | *.spec 36 | 37 | # Installer logs 38 | pip-log.txt 39 | pip-delete-this-directory.txt 40 | 41 | # Unit test / coverage reports 42 | htmlcov/ 43 | .tox/ 44 | .nox/ 45 | .coverage 46 | .coverage.* 47 | .cache 48 | nosetests.xml 49 | coverage.xml 50 | *.cover 51 | *.py,cover 52 | .hypothesis/ 53 | .pytest_cache/ 54 | 55 | # Translations 56 | *.mo 57 | *.pot 58 | 59 | # Django stuff: 60 | *.log 61 | local_settings.py 62 | db.sqlite3 63 | db.sqlite3-journal 64 | 65 | # Flask stuff: 66 | instance/ 67 | .webassets-cache 68 | 69 | # Scrapy stuff: 70 | .scrapy 71 | 72 | # Sphinx documentation 73 | docs/_build/ 74 | 75 | # PyBuilder 76 | target/ 77 | 78 | # Jupyter Notebook 79 | .ipynb_checkpoints 80 | 81 | # IPython 82 | profile_default/ 83 | ipython_config.py 84 | 85 | # pyenv 86 | .python-version 87 | 88 | # pipenv 89 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 90 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 91 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 92 | # install all needed dependencies. 93 | #Pipfile.lock 94 | 95 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow 96 | __pypackages__/ 97 | 98 | # Celery stuff 99 | celerybeat-schedule 100 | celerybeat.pid 101 | 102 | # SageMath parsed files 103 | *.sage.py 104 | 105 | # Environments 106 | .env 107 | .venv 108 | env/ 109 | venv/ 110 | ENV/ 111 | env.bak/ 112 | venv.bak/ 113 | 114 | # Spyder project settings 115 | .spyderproject 116 | .spyproject 117 | 118 | # Rope project settings 119 | .ropeproject 120 | 121 | # mkdocs documentation 122 | /site 123 | 124 | # mypy 125 | .mypy_cache/ 126 | .dmypy.json 127 | dmypy.json 128 | 129 | # Pyre type checker 130 | .pyre/ -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # Contributing to public-apis 2 | 3 | > While the masses of pull requests and community involvement are appreciated, some pull requests have been specifically 4 | opened to market company APIs that offer paid solutions. This API list is not a marketing tool, but a tool to help the 5 | community build applications and use free, public APIs quickly and easily. Pull requests that are identified as marketing attempts will not be accepted. 6 | > 7 | > Please make sure the API you want to add has full, free access or at least a free tier and does not depend on the purchase of a device/service before submitting. An example that would be rejected is an API that is used to control a smart outlet - the API is free, but you must purchase the smart device. 8 | > 9 | > Thanks for understanding! :) 10 | 11 | ## Formatting 12 | 13 | Current API entry format: 14 | 15 | | API | Description | Auth | HTTPS | CORS | Call this API | 16 | | --- | --- | --- | --- | --- | --- | 17 | | API Title(Link to API documentation) | Description of API | Does this API require authentication? * | Does the API support HTTPS? | Does the API support [CORS](https://developer.mozilla.org/en-US/docs/Web/HTTP/CORS)? * | [Does this API have a public Postman Collection?](https://learning.postman.com/docs/publishing-your-api/run-in-postman/creating-run-button/) | 18 | 19 | Example entry: 20 | 21 | ``` 22 | | [NASA](https://api.nasa.gov) | NASA data, including imagery | No | Yes | Yes | [Run in Postman Button] 23 | ``` 24 | 25 | \* Currently, the only accepted inputs for the `Auth` field are as follows: 26 | 27 | * `OAuth` - _the API supports OAuth_ 28 | * `apiKey` - _the API uses a private key string/token for authentication - try and use the correct parameter_ 29 | * `X-Mashape-Key` - _the name of the header which may need to be sent_ 30 | * `No` - _the API requires no authentication to run_ 31 | * `User-Agent` - _the name of the header to be sent with requests to the API_ 32 | 33 | \* Currently, the only accepted inputs for the `CORS` field are as follows: 34 | 35 | * `Yes` - _the API supports CORS_ 36 | * `No` - _the API does not support CORS_ 37 | * `Unknown` - _it is unknown if the API supports CORS_ 38 | 39 | \* For the Call this API column, add a link to a Postman collection. You may need to [create a collection](https://learning.postman.com/docs/getting-started/first-steps/creating-the-first-collection/) to create a Run in Postman Button. 40 | 41 | 42 | _Without proper [CORS configuration](https://developer.mozilla.org/en-US/docs/Web/HTTP/CORS) an API will only be usable server side._ 43 | 44 | After you've created a branch on your fork with your changes, it's time to [make a pull request][pr-link]. 45 | 46 | 47 | *Please follow the guidelines given below while making a Pull Request to the Public APIs* 48 | 49 | ## Pull Request Guidelines 50 | 51 | * Never put an update/new version of an API that is already listed, the old version of the API gets deprecated. 52 | * Continue to follow the alphabetical ordering that is in place per section. 53 | * Each table column should be padded with one space on either side. 54 | * The Description should not exceed 100 characters. 55 | * If an API seems to fall into multiple categories, please place the listing within the section most in line with the services offered through the API. For example, the Instagram API is listed under `Social` since it is mainly a social network, even though it could also apply to `Photography`. 56 | * Add one link per Pull Request. 57 | * Make sure the PR title is in the format of `Add Api-name API` *for e.g.*: `Add Blockchain API` 58 | * Use a short descriptive commit message. *for e.g.*: ❌`Update Readme.md` ✔ `Add Blockchain API to Cryptocurrency` 59 | * Search previous Pull Requests or Issues before making a new one, as yours may be a duplicate. 60 | * Don't mention the TLD(Top Level Domain) in the name of the API. *for e.g.*: ❌Gmail.com ✔Gmail 61 | * Please make sure the API name does not end with `API`. *for e.g.*: ❌Gmail API ✔Gmail 62 | * Please make sure the API has proper documentation. 63 | * Please make sure you squash all commits together before opening a pull request. If your pull request requires changes upon review, please be sure to squash all additional commits as well. [This wiki page][squash-link] outlines the squash process. 64 | * Target your Pull Request to the `master` branch of the `public-apis` 65 | 66 | Once you’ve submitted a pull request, the collaborators can review your proposed changes and decide whether or not to incorporate (pull in) your changes. 67 | 68 | ### Pull Request Pro Tips 69 | 70 | * [Fork][fork-link] the repository and [clone][clone-link] it locally. 71 | Connect your local repository to the original `upstream` repository by adding it as a [remote][remote-link]. 72 | Pull in changes from `upstream` often so that you stay up to date and so when you submit your pull request, 73 | merge conflicts will be less likely. See more detailed instructions [here][syncing-link]. 74 | * Create a [branch][branch-link] for your edits. 75 | * Contribute in the style of the project as outlined above. This makes it easier for the collaborators to merge 76 | and for others to understand and maintain in the future. 77 | 78 | ### Open Pull Requests 79 | 80 | Once you’ve opened a pull request, a discussion will start around your proposed changes. 81 | 82 | Other contributors and users may chime in, but ultimately the decision is made by the collaborators. 83 | 84 | During the discussion, you may be asked to make some changes to your pull request. 85 | 86 | If so, add more commits to your branch and push them – they will automatically go into the existing pull request. But don't forget to squash them. 87 | 88 | Opening a pull request will trigger a build to check the validity of all links in the project. After the build completes, **please ensure that the build has passed**. If the build did not pass, please view the build logs and correct any errors that were found in your contribution. 89 | 90 | *Thanks for being a part of this project, and we look forward to hearing from you soon!* 91 | 92 | [branch-link]: <http://guides.github.com/introduction/flow/> 93 | [clone-link]: <https://help.github.com/articles/cloning-a-repository/> 94 | [fork-link]: <http://guides.github.com/activities/forking/> 95 | [oauth-link]: <https://en.wikipedia.org/wiki/OAuth> 96 | [pr-link]: <https://help.github.com/articles/creating-a-pull-request/> 97 | [remote-link]: <https://help.github.com/articles/configuring-a-remote-for-a-fork/> 98 | [syncing-link]: <https://help.github.com/articles/syncing-a-fork> 99 | [squash-link]: <https://github.com/todotxt/todo.txt-android/wiki/Squash-All-Commits-Related-to-a-Single-Issue-into-a-Single-Commit> 100 | 101 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2022 public-apis 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /scripts/github_pull_request.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | set -e 4 | 5 | # Argument validation 6 | if [ $# -ne 3 ]; then 7 | echo "Usage: $0 <github-repo> <pull-number> <filename>" 8 | exit 1 9 | fi 10 | 11 | # Assign variables 12 | GITHUB_REPOSITORY="$1" 13 | GITHUB_PULL_REQUEST="$2" 14 | FILENAME="$3" 15 | 16 | # Move to root of project 17 | cd "$GITHUB_WORKSPACE" 18 | 19 | # Determine files 20 | FILENAME="$( realpath "${FILENAME}" )" 21 | 22 | # Skip if build number could not be determined 23 | if [ -z "$GITHUB_REPOSITORY" -o -z "$GITHUB_PULL_REQUEST" ]; then 24 | echo "No pull request and/or repository is provided" 25 | exit 1 26 | fi 27 | 28 | # Pull changes on PR 29 | echo "running on Pull Request #$GITHUB_PULL_REQUEST" 30 | 31 | # Trick the URL validator python script into not seeing this as a URL 32 | DUMMY_SCHEME="https" 33 | DIFF_URL="$DUMMY_SCHEME://patch-diff.githubusercontent.com/raw/$GITHUB_REPOSITORY/pull/$GITHUB_PULL_REQUEST.diff" 34 | curl -L "$DIFF_URL" -o diff.txt 35 | 36 | # Construct diff 37 | echo "------- BEGIN DIFF -------" 38 | cat diff.txt 39 | echo "-------- END DIFF --------" 40 | cat diff.txt | egrep "\+" > additions.txt 41 | 42 | echo "------ BEGIN ADDITIONS -----" 43 | cat additions.txt 44 | echo "------- END ADDITIONS ------" 45 | LINK_FILE=additions.txt 46 | 47 | # Validate links 48 | echo "Running link validation on additions..." 49 | python scripts/validate/links.py "$LINK_FILE" 50 | 51 | # Vebosity 52 | if [[ $? != 0 ]]; then 53 | echo "link validation failed on additions!" 54 | exit 1 55 | else 56 | echo "link validation passed on additions!" 57 | fi 58 | -------------------------------------------------------------------------------- /scripts/requirements.txt: -------------------------------------------------------------------------------- 1 | certifi==2021.10.8 2 | charset-normalizer==2.0.10 3 | idna==3.3 4 | requests==2.27.1 5 | urllib3==1.26.8 6 | -------------------------------------------------------------------------------- /scripts/tests/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | -------------------------------------------------------------------------------- /scripts/tests/test_validate_format.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | import unittest 4 | 5 | from validate.format import error_message 6 | from validate.format import get_categories_content 7 | from validate.format import check_alphabetical_order 8 | from validate.format import check_title 9 | from validate.format import check_description, max_description_length 10 | from validate.format import check_auth, auth_keys 11 | from validate.format import check_https, https_keys 12 | from validate.format import check_cors, cors_keys 13 | from validate.format import check_entry 14 | from validate.format import check_file_format, min_entries_per_category, num_segments 15 | 16 | 17 | class TestValidadeFormat(unittest.TestCase): 18 | 19 | def test_error_message_return_and_return_type(self): 20 | line_num_unity = 1 21 | line_num_ten = 10 22 | line_num_hundred = 100 23 | line_num_thousand = 1000 24 | 25 | msg = 'This is a unit test' 26 | 27 | err_msg_unity = error_message(line_num_unity, msg) 28 | err_msg_ten = error_message(line_num_ten, msg) 29 | err_msg_hundred = error_message(line_num_hundred, msg) 30 | err_msg_thousand = error_message(line_num_thousand, msg) 31 | 32 | self.assertIsInstance(err_msg_unity, str) 33 | self.assertIsInstance(err_msg_ten, str) 34 | self.assertIsInstance(err_msg_hundred, str) 35 | self.assertIsInstance(err_msg_thousand, str) 36 | 37 | self.assertEqual(err_msg_unity, '(L002) This is a unit test') 38 | self.assertEqual(err_msg_ten, '(L011) This is a unit test') 39 | self.assertEqual(err_msg_hundred, '(L101) This is a unit test') 40 | self.assertEqual(err_msg_thousand, '(L1001) This is a unit test') 41 | 42 | def test_if_get_categories_content_return_correct_data_of_categories(self): 43 | fake_contents = [ 44 | '### A', 45 | 'API | Description | Auth | HTTPS | CORS |', 46 | '|---|---|---|---|---|', 47 | '| [AA](https://www.ex.com) | Desc | `apiKey` | Yes | Yes |', 48 | '| [AB](https://www.ex.com) | Desc | `apiKey` | Yes | Yes |', 49 | '', 50 | '### B', 51 | 'API | Description | Auth | HTTPS | CORS |', 52 | '|---|---|---|---|---|', 53 | '| [BA](https://www.ex.com) | Desc | `apiKey` | Yes | Yes |', 54 | '| [BB](https://www.ex.com) | Desc | `apiKey` | Yes | Yes |' 55 | ] 56 | 57 | result = get_categories_content(fake_contents) 58 | self.assertIsInstance(result, tuple) 59 | 60 | categories, category_line_num = result 61 | self.assertIsInstance(categories, dict) 62 | self.assertIsInstance(category_line_num, dict) 63 | 64 | expected_result = ({'A': ['AA', 'AB'], 'B': ['BA', 'BB']}, {'A': 0, 'B': 6}) 65 | 66 | for res, ex_res in zip(result, expected_result): 67 | 68 | with self.subTest(): 69 | self.assertEqual(res, ex_res) 70 | 71 | def test_if_check_alphabetical_order_return_correct_msg_error(self): 72 | correct_lines = [ 73 | '### A', 74 | 'API | Description | Auth | HTTPS | CORS |', 75 | '|---|---|---|---|---|', 76 | '| [AA](https://www.ex.com) | Desc | `apiKey` | Yes | Yes |', 77 | '| [AB](https://www.ex.com) | Desc | `apiKey` | Yes | Yes |', 78 | '', 79 | '### B', 80 | 'API | Description | Auth | HTTPS | CORS |', 81 | '|---|---|---|---|---|', 82 | '| [BA](https://www.ex.com) | Desc | `apiKey` | Yes | Yes |', 83 | '| [BB](https://www.ex.com) | Desc | `apiKey` | Yes | Yes |' 84 | ] 85 | 86 | incorrect_lines = [ 87 | '### A', 88 | 'API | Description | Auth | HTTPS | CORS |', 89 | '|---|---|---|---|---|', 90 | '| [AB](https://www.ex.com) | Desc | `apiKey` | Yes | Yes |', 91 | '| [AA](https://www.ex.com) | Desc | `apiKey` | Yes | Yes |', 92 | '', 93 | '### B', 94 | 'API | Description | Auth | HTTPS | CORS |', 95 | '|---|---|---|---|---|', 96 | '| [BB](https://www.ex.com) | Desc | `apiKey` | Yes | Yes |', 97 | '| [BA](https://www.ex.com) | Desc | `apiKey` | Yes | Yes |' 98 | ] 99 | 100 | 101 | err_msgs_1 = check_alphabetical_order(correct_lines) 102 | err_msgs_2 = check_alphabetical_order(incorrect_lines) 103 | 104 | self.assertIsInstance(err_msgs_1, list) 105 | self.assertIsInstance(err_msgs_2, list) 106 | 107 | self.assertEqual(len(err_msgs_1), 0) 108 | self.assertEqual(len(err_msgs_2), 2) 109 | 110 | expected_err_msgs = [ 111 | '(L001) A category is not alphabetical order', 112 | '(L007) B category is not alphabetical order' 113 | ] 114 | 115 | for err_msg, ex_err_msg in zip(err_msgs_2, expected_err_msgs): 116 | 117 | with self.subTest(): 118 | self.assertEqual(err_msg, ex_err_msg) 119 | 120 | def test_check_title_with_correct_title(self): 121 | raw_title = '[A](https://www.ex.com)' 122 | 123 | err_msgs = check_title(0, raw_title) 124 | 125 | self.assertIsInstance(err_msgs, list) 126 | self.assertEqual(len(err_msgs), 0) 127 | self.assertEqual(err_msgs, []) 128 | 129 | def test_check_title_with_markdown_syntax_incorrect(self): 130 | raw_title = '[A(https://www.ex.com)' 131 | 132 | err_msgs = check_title(0, raw_title) 133 | 134 | self.assertIsInstance(err_msgs, list) 135 | self.assertEqual(len(err_msgs), 1) 136 | 137 | err_msg = err_msgs[0] 138 | expected_err_msg = '(L001) Title syntax should be "[TITLE](LINK)"' 139 | 140 | self.assertEqual(err_msg, expected_err_msg) 141 | 142 | def test_check_title_with_api_at_the_end_of_the_title(self): 143 | raw_title = '[A API](https://www.ex.com)' 144 | 145 | err_msgs = check_title(0, raw_title) 146 | 147 | self.assertIsInstance(err_msgs, list) 148 | self.assertEqual(len(err_msgs), 1) 149 | 150 | err_msg = err_msgs[0] 151 | expected_err_msg = '(L001) Title should not end with "... API". Every entry is an API here!' 152 | 153 | self.assertEqual(err_msg, expected_err_msg) 154 | 155 | def test_check_description_with_correct_description(self): 156 | desc = 'This is a fake description' 157 | 158 | err_msgs = check_description(0, desc) 159 | 160 | self.assertIsInstance(err_msgs, list) 161 | self.assertEqual(len(err_msgs), 0) 162 | self.assertEqual(err_msgs, []) 163 | 164 | def test_check_description_with_first_char_is_not_capitalized(self): 165 | desc = 'this is a fake description' 166 | 167 | err_msgs = check_description(0, desc) 168 | 169 | self.assertIsInstance(err_msgs, list) 170 | self.assertEqual(len(err_msgs), 1) 171 | 172 | err_msg = err_msgs[0] 173 | expected_err_msg = '(L001) first character of description is not capitalized' 174 | 175 | self.assertIsInstance(err_msg, str) 176 | self.assertEqual(err_msg, expected_err_msg) 177 | 178 | def test_check_description_with_punctuation_in_the_end(self): 179 | base_desc = 'This is a fake description' 180 | punctuation = r"""!"#$%&'*+,-./:;<=>?@[\]^_`{|}~""" 181 | desc_with_punc = [base_desc + punc for punc in punctuation] 182 | 183 | for desc in desc_with_punc: 184 | 185 | with self.subTest(): 186 | err_msgs = check_description(0, desc) 187 | 188 | self.assertIsInstance(err_msgs, list) 189 | self.assertEqual(len(err_msgs), 1) 190 | 191 | err_msg = err_msgs[0] 192 | expected_err_msg = f'(L001) description should not end with {desc[-1]}' 193 | 194 | self.assertIsInstance(err_msg, str) 195 | self.assertEqual(err_msg, expected_err_msg) 196 | 197 | def test_check_description_that_exceeds_the_character_limit(self): 198 | long_desc = 'Desc' * max_description_length 199 | long_desc_length = len(long_desc) 200 | 201 | err_msgs = check_description(0, long_desc) 202 | 203 | self.assertIsInstance(err_msgs, list) 204 | self.assertEqual(len(err_msgs), 1) 205 | 206 | err_msg = err_msgs[0] 207 | expected_err_msg = f'(L001) description should not exceed {max_description_length} characters (currently {long_desc_length})' 208 | 209 | self.assertIsInstance(err_msg, str) 210 | self.assertEqual(err_msg, expected_err_msg) 211 | 212 | def test_check_auth_with_valid_auth(self): 213 | auth_valid = [f'`{auth}`' for auth in auth_keys if auth != 'No'] 214 | auth_valid.append('No') 215 | 216 | for auth in auth_valid: 217 | with self.subTest(): 218 | err_msgs = check_auth(0, auth) 219 | self.assertIsInstance(err_msgs, list) 220 | self.assertEqual(len(err_msgs), 0) 221 | self.assertEqual(err_msgs, []) 222 | 223 | def test_check_auth_without_backtick(self): 224 | auth_without_backtick = [auth for auth in auth_keys if auth != 'No'] 225 | 226 | for auth in auth_without_backtick: 227 | with self.subTest(): 228 | err_msgs = check_auth(0, auth) 229 | self.assertIsInstance(err_msgs, list) 230 | self.assertEqual(len(err_msgs), 1) 231 | 232 | err_msg = err_msgs[0] 233 | expected_err_msg = '(L001) auth value is not enclosed with `backticks`' 234 | 235 | self.assertIsInstance(err_msg, str) 236 | self.assertEqual(err_msg, expected_err_msg) 237 | 238 | def test_check_auth_with_invalid_auth(self): 239 | auth_invalid_without_backtick = ['Yes', 'yes', 'no', 'random', 'Unknown'] 240 | auth_invalid_with_backtick = ['`Yes`', '`yes`', '`no`', '`random`', '`Unknown`'] 241 | 242 | for auth in auth_invalid_without_backtick: 243 | with self.subTest(): 244 | err_msgs = check_auth(0, auth) 245 | self.assertIsInstance(err_msgs, list) 246 | self.assertEqual(len(err_msgs), 2) 247 | 248 | err_msg_1 = err_msgs[0] 249 | err_msg_2 = err_msgs[1] 250 | 251 | expected_err_msg_1 = f'(L001) auth value is not enclosed with `backticks`' 252 | expected_err_msg_2 = f'(L001) {auth} is not a valid Auth option' 253 | 254 | self.assertIsInstance(err_msg_1, str) 255 | self.assertIsInstance(err_msg_2, str) 256 | self.assertEqual(err_msg_1, expected_err_msg_1) 257 | self.assertEqual(err_msg_2, expected_err_msg_2) 258 | 259 | for auth in auth_invalid_with_backtick: 260 | with self.subTest(): 261 | err_msgs = check_auth(0, auth) 262 | self.assertIsInstance(err_msgs, list) 263 | self.assertEqual(len(err_msgs), 1) 264 | 265 | err_msg = err_msgs[0] 266 | expected_err_msg = f'(L001) {auth} is not a valid Auth option' 267 | 268 | self.assertIsInstance(err_msg, str) 269 | self.assertEqual(err_msg, expected_err_msg) 270 | 271 | def test_check_https_with_valid_https(self): 272 | for https in https_keys: 273 | with self.subTest(): 274 | err_msgs = check_https(0, https) 275 | self.assertIsInstance(err_msgs, list) 276 | self.assertEqual(len(err_msgs), 0) 277 | self.assertEqual(err_msgs, []) 278 | 279 | def test_check_https_with_invalid_https(self): 280 | invalid_https_keys = ['yes', 'no', 'Unknown', 'https', 'http'] 281 | 282 | for https in invalid_https_keys: 283 | with self.subTest(): 284 | err_msgs = check_https(0, https) 285 | self.assertIsInstance(err_msgs, list) 286 | self.assertEqual(len(err_msgs), 1) 287 | 288 | err_msg = err_msgs[0] 289 | expected_err_msg = f'(L001) {https} is not a valid HTTPS option' 290 | 291 | self.assertIsInstance(err_msg, str) 292 | self.assertEqual(err_msg, expected_err_msg) 293 | 294 | def test_check_cors_with_valid_cors(self): 295 | for cors in cors_keys: 296 | with self.subTest(): 297 | err_msgs = check_cors(0, cors) 298 | self.assertIsInstance(err_msgs, list) 299 | self.assertEqual(len(err_msgs), 0) 300 | self.assertEqual(err_msgs, []) 301 | 302 | def test_check_cors_with_invalid_cors(self): 303 | invalid_cors_keys = ['yes', 'no', 'unknown', 'cors'] 304 | 305 | for cors in invalid_cors_keys: 306 | with self.subTest(): 307 | err_msgs = check_cors(0, cors) 308 | self.assertIsInstance(err_msgs, list) 309 | self.assertEqual(len(err_msgs), 1) 310 | 311 | err_msg = err_msgs[0] 312 | expected_err_msg = f'(L001) {cors} is not a valid CORS option' 313 | 314 | self.assertIsInstance(err_msg, str) 315 | self.assertEqual(err_msg, expected_err_msg) 316 | 317 | def test_check_entry_with_correct_segments(self): 318 | correct_segments = ['[A](https://www.ex.com)', 'Desc', '`apiKey`', 'Yes', 'Yes'] 319 | 320 | err_msgs = check_entry(0, correct_segments) 321 | 322 | self.assertIsInstance(err_msgs, list) 323 | self.assertEqual(len(err_msgs), 0) 324 | self.assertEqual(err_msgs, []) 325 | 326 | def test_check_entry_with_incorrect_segments(self): 327 | incorrect_segments = ['[A API](https://www.ex.com)', 'desc.', 'yes', 'yes', 'yes'] 328 | 329 | err_msgs = check_entry(0, incorrect_segments) 330 | expected_err_msgs = [ 331 | '(L001) Title should not end with "... API". Every entry is an API here!', 332 | '(L001) first character of description is not capitalized', 333 | '(L001) description should not end with .', 334 | '(L001) auth value is not enclosed with `backticks`', 335 | '(L001) yes is not a valid Auth option', 336 | '(L001) yes is not a valid HTTPS option', 337 | '(L001) yes is not a valid CORS option' 338 | ] 339 | 340 | self.assertIsInstance(err_msgs, list) 341 | self.assertEqual(len(err_msgs), 7) 342 | for err_msg in err_msgs: 343 | with self.subTest(): 344 | self.assertIsInstance(err_msg, str) 345 | self.assertEqual(err_msgs, expected_err_msgs) 346 | 347 | def test_check_file_format_with_correct_format(self): 348 | correct_format = [ 349 | '## Index', 350 | '* [A](#a)', 351 | '* [B](#b)', 352 | '', 353 | '### A', 354 | 'API | Description | Auth | HTTPS | CORS |', 355 | '|---|---|---|---|---|', 356 | '| [AA](https://www.ex.com) | Desc | `apiKey` | Yes | Yes |', 357 | '| [AB](https://www.ex.com) | Desc | `apiKey` | Yes | Yes |', 358 | '| [AC](https://www.ex.com) | Desc | `apiKey` | Yes | Yes |', 359 | '', 360 | '### B', 361 | 'API | Description | Auth | HTTPS | CORS |', 362 | '|---|---|---|---|---|', 363 | '| [BA](https://www.ex.com) | Desc | `apiKey` | Yes | Yes |', 364 | '| [BB](https://www.ex.com) | Desc | `apiKey` | Yes | Yes |', 365 | '| [BC](https://www.ex.com) | Desc | `apiKey` | Yes | Yes |' 366 | ] 367 | 368 | err_msgs = check_file_format(lines=correct_format) 369 | 370 | self.assertIsInstance(err_msgs, list) 371 | self.assertEqual(len(err_msgs), 0) 372 | self.assertEqual(err_msgs, []) 373 | 374 | def test_check_file_format_with_category_header_not_added_to_index(self): 375 | incorrect_format = [ 376 | '## Index', 377 | '', 378 | '### A', 379 | 'API | Description | Auth | HTTPS | CORS |', 380 | '|---|---|---|---|---|', 381 | '| [AA](https://www.ex.com) | Desc | `apiKey` | Yes | Yes |', 382 | '| [AB](https://www.ex.com) | Desc | `apiKey` | Yes | Yes |', 383 | '| [AC](https://www.ex.com) | Desc | `apiKey` | Yes | Yes |', 384 | ] 385 | 386 | err_msgs = check_file_format(lines=incorrect_format) 387 | expected_err_msg = '(L003) category header (A) not added to Index section' 388 | 389 | self.assertIsInstance(err_msgs, list) 390 | self.assertEqual(len(err_msgs), 1) 391 | err_msg = err_msgs[0] 392 | self.assertEqual(err_msg, expected_err_msg) 393 | 394 | def test_check_file_format_with_category_without_min_entries(self): 395 | incorrect_format = [ 396 | '## Index', 397 | '* [A](#a)', 398 | '* [B](#b)', 399 | '', 400 | '### A', 401 | 'API | Description | Auth | HTTPS | CORS |', 402 | '|---|---|---|---|---|', 403 | '| [AA](https://www.ex.com) | Desc | `apiKey` | Yes | Yes |', 404 | '', 405 | '### B', 406 | 'API | Description | Auth | HTTPS | CORS |', 407 | '|---|---|---|---|---|', 408 | '| [BA](https://www.ex.com) | Desc | `apiKey` | Yes | Yes |', 409 | '| [BB](https://www.ex.com) | Desc | `apiKey` | Yes | Yes |', 410 | '| [BC](https://www.ex.com) | Desc | `apiKey` | Yes | Yes |' 411 | ] 412 | 413 | category_with_err = 'A' 414 | num_in_category = 1 415 | 416 | err_msgs = check_file_format(lines=incorrect_format) 417 | expected_err_msg = f'(L005) {category_with_err} category does not have the minimum {min_entries_per_category} entries (only has {num_in_category})' 418 | 419 | self.assertIsInstance(err_msgs, list) 420 | self.assertEqual(len(err_msgs), 1) 421 | err_msg = err_msgs[0] 422 | self.assertEqual(err_msg, expected_err_msg) 423 | 424 | def test_check_file_format_entry_without_all_necessary_columns(self): 425 | incorrect_format = [ 426 | '## Index', 427 | '* [A](#a)', 428 | '', 429 | '### A', 430 | 'API | Description | Auth | HTTPS | CORS |', 431 | '|---|---|---|---|---|', 432 | '| [AA](https://www.ex.com) | Desc | `apiKey` | Yes | Yes |', 433 | '| [AB](https://www.ex.com) | Desc | `apiKey` |', # missing https and cors 434 | '| [AC](https://www.ex.com) | Desc | `apiKey` | Yes | Yes |', 435 | ] 436 | 437 | current_segments_num = 3 438 | 439 | err_msgs = check_file_format(lines=incorrect_format) 440 | expected_err_msg = f'(L008) entry does not have all the required columns (have {current_segments_num}, need {num_segments})' 441 | 442 | self.assertIsInstance(err_msgs, list) 443 | self.assertEqual(len(err_msgs), 1) 444 | err_msg = err_msgs[0] 445 | self.assertEqual(err_msg, expected_err_msg) 446 | 447 | def test_check_file_format_without_1_space_between_the_segments(self): 448 | incorrect_format = [ 449 | '## Index', 450 | '* [A](#a)', 451 | '', 452 | '### A', 453 | 'API | Description | Auth | HTTPS | CORS |', 454 | '|---|---|---|---|---|', 455 | '| [AA](https://www.ex.com) | Desc |`apiKey`| Yes | Yes |', # space between segment of auth column missing 456 | '| [AB](https://www.ex.com) | Desc | `apiKey` | Yes | Yes |', 457 | '| [AC](https://www.ex.com) | Desc | `apiKey` | Yes | Yes |', 458 | ] 459 | 460 | err_msgs = check_file_format(lines=incorrect_format) 461 | expected_err_msg = f'(L007) each segment must start and end with exactly 1 space' 462 | 463 | self.assertIsInstance(err_msgs, list) 464 | self.assertEqual(len(err_msgs), 1) 465 | err_msg = err_msgs[0] 466 | self.assertEqual(err_msg, expected_err_msg) 467 | -------------------------------------------------------------------------------- /scripts/tests/test_validate_links.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | import unittest 4 | 5 | from validate.links import find_links_in_text 6 | from validate.links import check_duplicate_links 7 | from validate.links import fake_user_agent 8 | from validate.links import get_host_from_link 9 | from validate.links import has_cloudflare_protection 10 | 11 | 12 | class FakeResponse(): 13 | def __init__(self, code: int, headers: dict, text: str) -> None: 14 | self.status_code = code 15 | self.headers = headers 16 | self.text = text 17 | 18 | 19 | class TestValidateLinks(unittest.TestCase): 20 | 21 | def setUp(self): 22 | self.duplicate_links = [ 23 | 'https://www.example.com', 24 | 'https://www.example.com', 25 | 'https://www.example.com', 26 | 'https://www.anotherexample.com', 27 | ] 28 | self.no_duplicate_links = [ 29 | 'https://www.firstexample.com', 30 | 'https://www.secondexample.com', 31 | 'https://www.anotherexample.com', 32 | ] 33 | 34 | self.code_200 = 200 35 | self.code_403 = 403 36 | self.code_503 = 503 37 | 38 | self.cloudflare_headers = {'Server': 'cloudflare'} 39 | self.no_cloudflare_headers = {'Server': 'google'} 40 | 41 | self.text_with_cloudflare_flags = '403 Forbidden Cloudflare We are checking your browser...' 42 | self.text_without_cloudflare_flags = 'Lorem Ipsum' 43 | 44 | def test_find_link_in_text(self): 45 | text = """ 46 | # this is valid 47 | 48 | http://example.com?param1=1¶m2=2#anchor 49 | https://www.example.com?param1=1¶m2=2#anchor 50 | https://www.example.com.br 51 | https://www.example.com.gov.br 52 | [Example](https://www.example.com?param1=1¶m2=2#anchor) 53 | lorem ipsum https://www.example.com?param1=1¶m2=2#anchor 54 | https://www.example.com?param1=1¶m2=2#anchor lorem ipsum 55 | 56 | # this not is valid 57 | 58 | example.com 59 | https:example.com 60 | https:/example.com 61 | https//example.com 62 | https//.com 63 | """ 64 | 65 | links = find_links_in_text(text) 66 | 67 | self.assertIsInstance(links, list) 68 | self.assertEqual(len(links), 7) 69 | 70 | for link in links: 71 | with self.subTest(): 72 | self.assertIsInstance(link, str) 73 | 74 | def test_find_link_in_text_with_invalid_argument(self): 75 | with self.assertRaises(TypeError): 76 | find_links_in_text() 77 | find_links_in_text(1) 78 | find_links_in_text(True) 79 | 80 | def test_if_check_duplicate_links_has_the_correct_return(self): 81 | result_1 = check_duplicate_links(self.duplicate_links) 82 | result_2 = check_duplicate_links(self.no_duplicate_links) 83 | 84 | self.assertIsInstance(result_1, tuple) 85 | self.assertIsInstance(result_2, tuple) 86 | 87 | has_duplicate_links, links = result_1 88 | no_duplicate_links, no_links = result_2 89 | 90 | self.assertTrue(has_duplicate_links) 91 | self.assertFalse(no_duplicate_links) 92 | 93 | self.assertIsInstance(links, list) 94 | self.assertIsInstance(no_links, list) 95 | 96 | self.assertEqual(len(links), 2) 97 | self.assertEqual(len(no_links), 0) 98 | 99 | def test_if_fake_user_agent_has_a_str_as_return(self): 100 | user_agent = fake_user_agent() 101 | self.assertIsInstance(user_agent, str) 102 | 103 | def test_get_host_from_link(self): 104 | links = [ 105 | 'example.com', 106 | 'https://example.com', 107 | 'https://www.example.com', 108 | 'https://www.example.com.br', 109 | 'https://www.example.com/route', 110 | 'https://www.example.com?p=1&q=2', 111 | 'https://www.example.com#anchor' 112 | ] 113 | 114 | for link in links: 115 | host = get_host_from_link(link) 116 | 117 | with self.subTest(): 118 | self.assertIsInstance(host, str) 119 | 120 | self.assertNotIn('://', host) 121 | self.assertNotIn('/', host) 122 | self.assertNotIn('?', host) 123 | self.assertNotIn('#', host) 124 | 125 | with self.assertRaises(TypeError): 126 | get_host_from_link() 127 | 128 | def test_has_cloudflare_protection_with_code_403_and_503_in_response(self): 129 | resp_with_cloudflare_protection_code_403 = FakeResponse( 130 | code=self.code_403, 131 | headers=self.cloudflare_headers, 132 | text=self.text_with_cloudflare_flags 133 | ) 134 | 135 | resp_with_cloudflare_protection_code_503 = FakeResponse( 136 | code=self.code_503, 137 | headers=self.cloudflare_headers, 138 | text=self.text_with_cloudflare_flags 139 | ) 140 | 141 | result1 = has_cloudflare_protection(resp_with_cloudflare_protection_code_403) 142 | result2 = has_cloudflare_protection(resp_with_cloudflare_protection_code_503) 143 | 144 | self.assertTrue(result1) 145 | self.assertTrue(result2) 146 | 147 | def test_has_cloudflare_protection_when_there_is_no_protection(self): 148 | resp_without_cloudflare_protection1 = FakeResponse( 149 | code=self.code_200, 150 | headers=self.no_cloudflare_headers, 151 | text=self.text_without_cloudflare_flags 152 | ) 153 | 154 | resp_without_cloudflare_protection2 = FakeResponse( 155 | code=self.code_403, 156 | headers=self.no_cloudflare_headers, 157 | text=self.text_without_cloudflare_flags 158 | ) 159 | 160 | resp_without_cloudflare_protection3 = FakeResponse( 161 | code=self.code_503, 162 | headers=self.no_cloudflare_headers, 163 | text=self.text_without_cloudflare_flags 164 | ) 165 | 166 | result1 = has_cloudflare_protection(resp_without_cloudflare_protection1) 167 | result2 = has_cloudflare_protection(resp_without_cloudflare_protection2) 168 | result3 = has_cloudflare_protection(resp_without_cloudflare_protection3) 169 | 170 | self.assertFalse(result1) 171 | self.assertFalse(result2) 172 | self.assertFalse(result3) 173 | -------------------------------------------------------------------------------- /scripts/validate/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | from validate import format 4 | from validate import links 5 | -------------------------------------------------------------------------------- /scripts/validate/format.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | import re 4 | import sys 5 | from string import punctuation 6 | from typing import List, Tuple, Dict 7 | 8 | # Temporary replacement 9 | # The descriptions that contain () at the end must adapt to the new policy later 10 | punctuation = punctuation.replace('()', '') 11 | 12 | anchor = '###' 13 | auth_keys = ['apiKey', 'OAuth', 'X-Mashape-Key', 'User-Agent', 'No'] 14 | https_keys = ['Yes', 'No'] 15 | cors_keys = ['Yes', 'No', 'Unknown'] 16 | 17 | index_title = 0 18 | index_desc = 1 19 | index_auth = 2 20 | index_https = 3 21 | index_cors = 4 22 | 23 | num_segments = 5 24 | min_entries_per_category = 3 25 | max_description_length = 100 26 | 27 | anchor_re = re.compile(anchor + '\s(.+)') 28 | category_title_in_index_re = re.compile('\*\s\[(.*)\]') 29 | link_re = re.compile('\[(.+)\]\((http.*)\)') 30 | 31 | # Type aliases 32 | APIList = List[str] 33 | Categories = Dict[str, APIList] 34 | CategoriesLineNumber = Dict[str, int] 35 | 36 | 37 | def error_message(line_number: int, message: str) -> str: 38 | line = line_number + 1 39 | return f'(L{line:03d}) {message}' 40 | 41 | 42 | def get_categories_content(contents: List[str]) -> Tuple[Categories, CategoriesLineNumber]: 43 | 44 | categories = {} 45 | category_line_num = {} 46 | 47 | for line_num, line_content in enumerate(contents): 48 | 49 | if line_content.startswith(anchor): 50 | category = line_content.split(anchor)[1].strip() 51 | categories[category] = [] 52 | category_line_num[category] = line_num 53 | continue 54 | 55 | if not line_content.startswith('|') or line_content.startswith('|---'): 56 | continue 57 | 58 | raw_title = [ 59 | raw_content.strip() for raw_content in line_content.split('|')[1:-1] 60 | ][0] 61 | 62 | title_match = link_re.match(raw_title) 63 | if title_match: 64 | title = title_match.group(1).upper() 65 | categories[category].append(title) 66 | 67 | return (categories, category_line_num) 68 | 69 | 70 | def check_alphabetical_order(lines: List[str]) -> List[str]: 71 | 72 | err_msgs = [] 73 | 74 | categories, category_line_num = get_categories_content(contents=lines) 75 | 76 | for category, api_list in categories.items(): 77 | if sorted(api_list) != api_list: 78 | err_msg = error_message( 79 | category_line_num[category], 80 | f'{category} category is not alphabetical order' 81 | ) 82 | err_msgs.append(err_msg) 83 | 84 | return err_msgs 85 | 86 | 87 | def check_title(line_num: int, raw_title: str) -> List[str]: 88 | 89 | err_msgs = [] 90 | 91 | title_match = link_re.match(raw_title) 92 | 93 | # url should be wrapped in "[TITLE](LINK)" Markdown syntax 94 | if not title_match: 95 | err_msg = error_message(line_num, 'Title syntax should be "[TITLE](LINK)"') 96 | err_msgs.append(err_msg) 97 | else: 98 | # do not allow "... API" in the entry title 99 | title = title_match.group(1) 100 | if title.upper().endswith(' API'): 101 | err_msg = error_message(line_num, 'Title should not end with "... API". Every entry is an API here!') 102 | err_msgs.append(err_msg) 103 | 104 | return err_msgs 105 | 106 | 107 | def check_description(line_num: int, description: str) -> List[str]: 108 | 109 | err_msgs = [] 110 | 111 | first_char = description[0] 112 | if first_char.upper() != first_char: 113 | err_msg = error_message(line_num, 'first character of description is not capitalized') 114 | err_msgs.append(err_msg) 115 | 116 | last_char = description[-1] 117 | if last_char in punctuation: 118 | err_msg = error_message(line_num, f'description should not end with {last_char}') 119 | err_msgs.append(err_msg) 120 | 121 | desc_length = len(description) 122 | if desc_length > max_description_length: 123 | err_msg = error_message(line_num, f'description should not exceed {max_description_length} characters (currently {desc_length})') 124 | err_msgs.append(err_msg) 125 | 126 | return err_msgs 127 | 128 | 129 | def check_auth(line_num: int, auth: str) -> List[str]: 130 | 131 | err_msgs = [] 132 | 133 | backtick = '`' 134 | if auth != 'No' and (not auth.startswith(backtick) or not auth.endswith(backtick)): 135 | err_msg = error_message(line_num, 'auth value is not enclosed with `backticks`') 136 | err_msgs.append(err_msg) 137 | 138 | if auth.replace(backtick, '') not in auth_keys: 139 | err_msg = error_message(line_num, f'{auth} is not a valid Auth option') 140 | err_msgs.append(err_msg) 141 | 142 | return err_msgs 143 | 144 | 145 | def check_https(line_num: int, https: str) -> List[str]: 146 | 147 | err_msgs = [] 148 | 149 | if https not in https_keys: 150 | err_msg = error_message(line_num, f'{https} is not a valid HTTPS option') 151 | err_msgs.append(err_msg) 152 | 153 | return err_msgs 154 | 155 | 156 | def check_cors(line_num: int, cors: str) -> List[str]: 157 | 158 | err_msgs = [] 159 | 160 | if cors not in cors_keys: 161 | err_msg = error_message(line_num, f'{cors} is not a valid CORS option') 162 | err_msgs.append(err_msg) 163 | 164 | return err_msgs 165 | 166 | 167 | def check_entry(line_num: int, segments: List[str]) -> List[str]: 168 | 169 | raw_title = segments[index_title] 170 | description = segments[index_desc] 171 | auth = segments[index_auth] 172 | https = segments[index_https] 173 | cors = segments[index_cors] 174 | 175 | title_err_msgs = check_title(line_num, raw_title) 176 | desc_err_msgs = check_description(line_num, description) 177 | auth_err_msgs = check_auth(line_num, auth) 178 | https_err_msgs = check_https(line_num, https) 179 | cors_err_msgs = check_cors(line_num, cors) 180 | 181 | err_msgs = [ 182 | *title_err_msgs, 183 | *desc_err_msgs, 184 | *auth_err_msgs, 185 | *https_err_msgs, 186 | *cors_err_msgs 187 | ] 188 | 189 | return err_msgs 190 | 191 | 192 | def check_file_format(lines: List[str]) -> List[str]: 193 | 194 | err_msgs = [] 195 | category_title_in_index = [] 196 | 197 | alphabetical_err_msgs = check_alphabetical_order(lines) 198 | err_msgs.extend(alphabetical_err_msgs) 199 | 200 | num_in_category = min_entries_per_category + 1 201 | category = '' 202 | category_line = 0 203 | 204 | for line_num, line_content in enumerate(lines): 205 | 206 | category_title_match = category_title_in_index_re.match(line_content) 207 | if category_title_match: 208 | category_title_in_index.append(category_title_match.group(1)) 209 | 210 | # check each category for the minimum number of entries 211 | if line_content.startswith(anchor): 212 | category_match = anchor_re.match(line_content) 213 | if category_match: 214 | if category_match.group(1) not in category_title_in_index: 215 | err_msg = error_message(line_num, f'category header ({category_match.group(1)}) not added to Index section') 216 | err_msgs.append(err_msg) 217 | else: 218 | err_msg = error_message(line_num, 'category header is not formatted correctly') 219 | err_msgs.append(err_msg) 220 | 221 | if num_in_category < min_entries_per_category: 222 | err_msg = error_message(category_line, f'{category} category does not have the minimum {min_entries_per_category} entries (only has {num_in_category})') 223 | err_msgs.append(err_msg) 224 | 225 | category = line_content.split(' ')[1] 226 | category_line = line_num 227 | num_in_category = 0 228 | continue 229 | 230 | # skips lines that we do not care about 231 | if not line_content.startswith('|') or line_content.startswith('|---'): 232 | continue 233 | 234 | num_in_category += 1 235 | segments = line_content.split('|')[1:-1] 236 | if len(segments) < num_segments: 237 | err_msg = error_message(line_num, f'entry does not have all the required columns (have {len(segments)}, need {num_segments})') 238 | err_msgs.append(err_msg) 239 | continue 240 | 241 | for segment in segments: 242 | # every line segment should start and end with exactly 1 space 243 | if len(segment) - len(segment.lstrip()) != 1 or len(segment) - len(segment.rstrip()) != 1: 244 | err_msg = error_message(line_num, 'each segment must start and end with exactly 1 space') 245 | err_msgs.append(err_msg) 246 | 247 | segments = [segment.strip() for segment in segments] 248 | entry_err_msgs = check_entry(line_num, segments) 249 | err_msgs.extend(entry_err_msgs) 250 | 251 | return err_msgs 252 | 253 | 254 | def main(filename: str) -> None: 255 | 256 | with open(filename, mode='r', encoding='utf-8') as file: 257 | lines = list(line.rstrip() for line in file) 258 | 259 | file_format_err_msgs = check_file_format(lines) 260 | 261 | if file_format_err_msgs: 262 | for err_msg in file_format_err_msgs: 263 | print(err_msg) 264 | sys.exit(1) 265 | 266 | 267 | if __name__ == '__main__': 268 | 269 | num_args = len(sys.argv) 270 | 271 | if num_args < 2: 272 | print('No .md file passed (file should contain Markdown table syntax)') 273 | sys.exit(1) 274 | 275 | filename = sys.argv[1] 276 | 277 | main(filename) 278 | -------------------------------------------------------------------------------- /scripts/validate/links.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | import re 4 | import sys 5 | import random 6 | from typing import List, Tuple 7 | 8 | import requests 9 | from requests.models import Response 10 | 11 | 12 | def find_links_in_text(text: str) -> List[str]: 13 | """Find links in a text and return a list of URLs.""" 14 | 15 | link_pattern = re.compile(r'((?:https?://|www\d{0,3}[.]|[a-z0-9.\-]+[.][a-z]{2,4}/)(?:[^\s()<>]+|\(([^\s()<>]+|(\([^\s()<>]+\)))*\))+(?:\(([^\s()<>]+|(\([^\s()<>]+\)))*\)|[^\s`!()\[\]{};:\'\".,<>?«»“”‘’]))') 16 | 17 | raw_links = re.findall(link_pattern, text) 18 | 19 | links = [ 20 | str(raw_link[0]) for raw_link in raw_links 21 | ] 22 | 23 | return links 24 | 25 | 26 | def find_links_in_file(filename: str) -> List[str]: 27 | """Find links in a file and return a list of URLs from text file.""" 28 | 29 | with open(filename, mode='r', encoding='utf-8') as file: 30 | readme = file.read() 31 | index_section = readme.find('## Index') 32 | if index_section == -1: 33 | index_section = 0 34 | content = readme[index_section:] 35 | 36 | links = find_links_in_text(content) 37 | 38 | return links 39 | 40 | 41 | def check_duplicate_links(links: List[str]) -> Tuple[bool, List]: 42 | """Check for duplicated links. 43 | 44 | Returns a tuple with True or False and duplicate list. 45 | """ 46 | 47 | seen = {} 48 | duplicates = [] 49 | has_duplicate = False 50 | 51 | for link in links: 52 | link = link.rstrip('/') 53 | if link not in seen: 54 | seen[link] = 1 55 | else: 56 | if seen[link] == 1: 57 | duplicates.append(link) 58 | 59 | if duplicates: 60 | has_duplicate = True 61 | 62 | return (has_duplicate, duplicates) 63 | 64 | 65 | def fake_user_agent() -> str: 66 | """Faking user agent as some hosting services block not-whitelisted UA.""" 67 | 68 | user_agents = [ 69 | 'Mozilla/5.0 (Windows NT 6.2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/28.0.1467.0 Safari/537.36', 70 | 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_6) AppleWebKit/605.1.15 (KHTML, like Gecko)', 71 | 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.132 Safari/537.36', 72 | 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/97.0.4692.71 Safari/537.36', 73 | ] 74 | 75 | return random.choice(user_agents) 76 | 77 | 78 | def get_host_from_link(link: str) -> str: 79 | 80 | host = link.split('://', 1)[1] if '://' in link else link 81 | 82 | # Remove routes, arguments and anchors 83 | if '/' in host: 84 | host = host.split('/', 1)[0] 85 | 86 | elif '?' in host: 87 | host = host.split('?', 1)[0] 88 | 89 | elif '#' in host: 90 | host = host.split('#', 1)[0] 91 | 92 | return host 93 | 94 | 95 | def has_cloudflare_protection(resp: Response) -> bool: 96 | """Checks if there is any cloudflare protection in the response. 97 | 98 | Cloudflare implements multiple network protections on a given link, 99 | this script tries to detect if any of them exist in the response from request. 100 | 101 | Common protections have the following HTTP code as a response: 102 | - 403: When host header is missing or incorrect (and more) 103 | - 503: When DDOS protection exists 104 | 105 | See more about it at: 106 | - https://support.cloudflare.com/hc/en-us/articles/115003014512-4xx-Client-Error 107 | - https://support.cloudflare.com/hc/en-us/articles/115003011431-Troubleshooting-Cloudflare-5XX-errors 108 | - https://www.cloudflare.com/ddos/ 109 | - https://superuser.com/a/888526 110 | 111 | Discussions in issues and pull requests: 112 | - https://github.com/public-apis/public-apis/pull/2409 113 | - https://github.com/public-apis/public-apis/issues/2960 114 | """ 115 | 116 | code = resp.status_code 117 | server = resp.headers.get('Server') or resp.headers.get('server') 118 | cloudflare_flags = [ 119 | '403 Forbidden', 120 | 'cloudflare', 121 | 'Cloudflare', 122 | 'Security check', 123 | 'Please Wait... | Cloudflare', 124 | 'We are checking your browser...', 125 | 'Please stand by, while we are checking your browser...', 126 | 'Checking your browser before accessing', 127 | 'This process is automatic.', 128 | 'Your browser will redirect to your requested content shortly.', 129 | 'Please allow up to 5 seconds', 130 | 'DDoS protection by', 131 | 'Ray ID:', 132 | 'Cloudflare Ray ID:', 133 | '_cf_chl', 134 | '_cf_chl_opt', 135 | '__cf_chl_rt_tk', 136 | 'cf-spinner-please-wait', 137 | 'cf-spinner-redirecting' 138 | ] 139 | 140 | if code in [403, 503] and server == 'cloudflare': 141 | html = resp.text 142 | 143 | flags_found = [flag in html for flag in cloudflare_flags] 144 | any_flag_found = any(flags_found) 145 | 146 | if any_flag_found: 147 | return True 148 | 149 | return False 150 | 151 | 152 | def check_if_link_is_working(link: str) -> Tuple[bool, str]: 153 | """Checks if a link is working. 154 | 155 | If an error is identified when the request for the link occurs, 156 | the return will be a tuple with the first value True and the second 157 | value a string containing the error message. 158 | 159 | If no errors are identified, the return will be a tuple with the 160 | first value False and the second an empty string. 161 | """ 162 | 163 | has_error = False 164 | error_message = '' 165 | 166 | try: 167 | resp = requests.get(link, timeout=25, headers={ 168 | 'User-Agent': fake_user_agent(), 169 | 'host': get_host_from_link(link) 170 | }) 171 | 172 | code = resp.status_code 173 | 174 | if code >= 400 and not has_cloudflare_protection(resp): 175 | has_error = True 176 | error_message = f'ERR:CLT: {code} : {link}' 177 | 178 | except requests.exceptions.SSLError as error: 179 | has_error = True 180 | error_message = f'ERR:SSL: {error} : {link}' 181 | 182 | except requests.exceptions.ConnectionError as error: 183 | has_error = True 184 | error_message = f'ERR:CNT: {error} : {link}' 185 | 186 | except (TimeoutError, requests.exceptions.ConnectTimeout): 187 | has_error = True 188 | error_message = f'ERR:TMO: {link}' 189 | 190 | except requests.exceptions.TooManyRedirects as error: 191 | has_error = True 192 | error_message = f'ERR:TMR: {error} : {link}' 193 | 194 | except (Exception, requests.exceptions.RequestException) as error: 195 | has_error = True 196 | error_message = f'ERR:UKN: {error} : {link}' 197 | 198 | return (has_error, error_message) 199 | 200 | 201 | def check_if_list_of_links_are_working(list_of_links: List[str]) -> List[str]: 202 | error_messages = [] 203 | for link in list_of_links: 204 | has_error, error_message = check_if_link_is_working(link) 205 | 206 | if has_error: 207 | error_messages.append(error_message) 208 | 209 | return error_messages 210 | 211 | 212 | def start_duplicate_links_checker(links: List[str]) -> None: 213 | 214 | print('Checking for duplicate links...') 215 | 216 | has_duplicate_link, duplicates_links = check_duplicate_links(links) 217 | 218 | if has_duplicate_link: 219 | print(f'Found duplicate links:') 220 | 221 | for duplicate_link in duplicates_links: 222 | print(duplicate_link) 223 | 224 | sys.exit(1) 225 | else: 226 | print('No duplicate links.') 227 | 228 | 229 | def start_links_working_checker(links: List[str]) -> None: 230 | 231 | print(f'Checking if {len(links)} links are working...') 232 | 233 | errors = check_if_list_of_links_are_working(links) 234 | if errors: 235 | 236 | num_errors = len(errors) 237 | print(f'Apparently {num_errors} links are not working properly. See in:') 238 | 239 | for error_message in errors: 240 | print(error_message) 241 | 242 | sys.exit(1) 243 | 244 | 245 | def main(filename: str, only_duplicate_links_checker: bool) -> None: 246 | 247 | links = find_links_in_file(filename) 248 | 249 | start_duplicate_links_checker(links) 250 | 251 | if not only_duplicate_links_checker: 252 | start_links_working_checker(links) 253 | 254 | 255 | if __name__ == '__main__': 256 | num_args = len(sys.argv) 257 | only_duplicate_links_checker = False 258 | 259 | if num_args < 2: 260 | print('No .md file passed') 261 | sys.exit(1) 262 | elif num_args == 3: 263 | third_arg = sys.argv[2].lower() 264 | 265 | if third_arg == '-odlc' or third_arg == '--only_duplicate_links_checker': 266 | only_duplicate_links_checker = True 267 | else: 268 | print(f'Third invalid argument. Usage: python {__file__} [-odlc | --only_duplicate_links_checker]') 269 | sys.exit(1) 270 | 271 | filename = sys.argv[1] 272 | 273 | main(filename, only_duplicate_links_checker) 274 | --------------------------------------------------------------------------------