├── .github └── workflows │ ├── linting.yml │ └── test.yml ├── .gitignore ├── .pre-commit-config.yaml ├── CHANGELOG ├── LICENSE ├── MANIFEST.in ├── README.rst ├── examples ├── linkcheck.jpg └── linklists.py ├── linkcheck.jpg ├── linkcheck ├── __init__.py ├── admin_blocks.py ├── apps.py ├── build_meta.py ├── cron.py ├── dashboard.py ├── filebrowser.py ├── linkcheck_settings.py ├── listeners.py ├── locale │ ├── de │ │ └── LC_MESSAGES │ │ │ └── django.po │ └── fr │ │ └── LC_MESSAGES │ │ └── django.po ├── management │ ├── __init__.py │ └── commands │ │ ├── __init__.py │ │ ├── checkexternal.py │ │ ├── checkinternal.py │ │ ├── checklinks.py │ │ ├── findlinks.py │ │ ├── linkcheck_suggest_config.py │ │ └── unignore_links.py ├── migrations │ ├── 0001_initial.py │ ├── 0002_url_redirect_to.py │ ├── 0003_redirect_to_as_textfield.py │ ├── 0004_remove_url_still_exists.py │ ├── 0005_default_big_auto_field.py │ ├── 0006_url_add_status_code.py │ ├── 0007_url_add_redirect_status_code.py │ ├── 0008_url_add_anchor_status.py │ ├── 0009_url_add_ssl_status.py │ ├── 0010_url_add_error_message.py │ ├── 0011_link_add_content_object_index.py │ └── __init__.py ├── models.py ├── templates │ └── linkcheck │ │ ├── base_linkcheck.html │ │ ├── paginator.html │ │ └── report.html ├── templatetags │ ├── __init__.py │ └── linkcheck_model_tags.py ├── tests │ ├── __init__.py │ ├── media │ │ └── found │ ├── sampleapp │ │ ├── __init__.py │ │ ├── fixture.json │ │ ├── linklists.py │ │ ├── models.py │ │ └── views.py │ ├── test_linkcheck.py │ └── urls.py ├── urls.py ├── utils.py └── views.py ├── pyproject.toml └── runtests.py /.github/workflows/linting.yml: -------------------------------------------------------------------------------- 1 | name: Linting 2 | 3 | on: 4 | push: 5 | branches: [ master ] 6 | pull_request: 7 | branches: [ master ] 8 | 9 | jobs: 10 | flake8: 11 | runs-on: ubuntu-latest 12 | steps: 13 | - uses: actions/checkout@v4 14 | - uses: actions/setup-python@v5 15 | - name: Install dependencies 16 | run: pip install flake8 17 | - name: Run flake8 18 | run: flake8 --max-line-length=120 linkcheck 19 | isort: 20 | runs-on: ubuntu-latest 21 | steps: 22 | - uses: actions/checkout@v4 23 | - uses: actions/setup-python@v5 24 | - uses: jamescurtin/isort-action@master 25 | with: 26 | configuration: --multi-line=3 --trailing-comma --check-only 27 | -------------------------------------------------------------------------------- /.github/workflows/test.yml: -------------------------------------------------------------------------------- 1 | name: Test 2 | 3 | on: 4 | push: 5 | branches: [ master ] 6 | pull_request: 7 | branches: [ master ] 8 | 9 | jobs: 10 | build: 11 | runs-on: ubuntu-latest 12 | strategy: 13 | max-parallel: 5 14 | matrix: 15 | python-version: ['3.9', '3.10', '3.11', '3.12', '3.13'] 16 | django-version: ['4.2', '5.0', '5.1'] 17 | exclude: 18 | - python-version: '3.9' 19 | django-version: '5.0' 20 | - python-version: '3.9' 21 | django-version: '5.1' 22 | - python-version: '3.13' 23 | django-version: '4.2' 24 | - python-version: '3.13' 25 | django-version: '5.0' 26 | 27 | steps: 28 | - uses: actions/checkout@v4 29 | 30 | - name: Set up Python ${{ matrix.python-version }} 31 | uses: actions/setup-python@v5 32 | with: 33 | python-version: ${{ matrix.python-version }} 34 | 35 | - name: Install dependencies 36 | run: | 37 | python -m pip install --upgrade pip 38 | python -m pip install --upgrade django~=${{ matrix.django-version }}.0 39 | python -m pip install --upgrade requests 40 | python -m pip install --upgrade requests_mock 41 | 42 | - name: Run tests 43 | run: python runtests.py 44 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | 5 | # C extensions 6 | *.so 7 | 8 | # Distribution / packaging 9 | .Python 10 | .venv/ 11 | env/ 12 | build/ 13 | develop-eggs/ 14 | dist/ 15 | downloads/ 16 | eggs/ 17 | .eggs/ 18 | lib/ 19 | lib64/ 20 | parts/ 21 | sdist/ 22 | var/ 23 | *.egg-info/ 24 | .installed.cfg 25 | *.egg 26 | 27 | # PyInstaller 28 | # Usually these files are written by a python script from a template 29 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 30 | *.manifest 31 | *.spec 32 | 33 | # Installer logs 34 | pip-log.txt 35 | pip-delete-this-directory.txt 36 | 37 | # Unit test / coverage reports 38 | htmlcov/ 39 | .tox/ 40 | .coverage 41 | .coverage.* 42 | .cache 43 | nosetests.xml 44 | coverage.xml 45 | *,cover 46 | 47 | # Translations 48 | *.mo 49 | *.pot 50 | 51 | # Django stuff: 52 | *.log 53 | 54 | # Sphinx documentation 55 | docs/_build/ 56 | 57 | # PyBuilder 58 | target/ 59 | -------------------------------------------------------------------------------- /.pre-commit-config.yaml: -------------------------------------------------------------------------------- 1 | repos: 2 | - repo: https://github.com/pycqa/flake8 3 | rev: 6.1.0 4 | hooks: 5 | - id: flake8 6 | args: [--max-line-length=120] 7 | - repo: https://github.com/PyCQA/isort 8 | rev: 5.12.0 9 | hooks: 10 | - id: isort 11 | args: [--multi-line=3, --trailing-comma] 12 | -------------------------------------------------------------------------------- /CHANGELOG: -------------------------------------------------------------------------------- 1 | Unreleased 2 | 3 | * Add index to Link (David Venhoff, #202) 4 | * Add support for Django 5.1 5 | * Add support for Python 3.13 6 | * Remove support for Django < 4.2 7 | * Remove support for Python 3.8 8 | 9 | 2.3.0 (2023-12-27) 10 | 11 | * Fix encoding of utf-8 domain names (Timo Brembeck, #190) 12 | * Move coverage view to management command (Timo Brembeck, #187) 13 | * Add new management command `linkcheck_suggest_config` 14 | * Delete coverage view 15 | * Improve formatting for `NameResolutionError` (Timo Brembeck, #192) 16 | * Fix internal redirect checker (Timo Ludwig, #180) 17 | * Fix SSL status of unreachable domains (Timo Ludwig, #184) 18 | * Fix URL message for internal server errorrs (Timo Ludwig, #182) 19 | * Add support for Django 4.2 and 5.0 20 | * Add support for Python 3.12 21 | * Remove support for Django 4.0 22 | * Remove support for Python 3.7 23 | 24 | 2.2.1 (2023-04-03) 25 | 26 | * Include the compiled translations in the released package (#177) 27 | 28 | 2.2.0 (2023-04-01) 29 | 30 | * Enable internationalization for URL status messages (Timo Ludwig, #125) 31 | * Enable re-checking after rate limit was hit (Timo Ludwig, #153) 32 | * Ignore raw `post_save` signal (Timo Ludwig, #106) 33 | * Retry with fallback user agent on forbidden response (Timo Ludwig, #159) 34 | * Also set `redirect_to` on internal redirects (Timo Ludwig, #163) 35 | * Add new fields to `Url` model: 36 | * `status_code`: The HTTP status code of the initial request 37 | * `redirect_status_code`: The HTTP status code of the final request 38 | * `anchor_status`: The validity of the HTML hash anchor 39 | * `ssl_status` The validity of the SSL certificate 40 | * `error_message` The error message if the request failed 41 | * Add new properties to `Url` model: 42 | * `anchor_message`: The human-readable meaning of the `anchor_status` 43 | * `ssl_message` The human-readable meaning of the `ssl_status` 44 | * Add French translations. 45 | 46 | 2.1.0 (2023-02-05) 47 | 48 | * Fix `SSL Error` for missing root certificates (Timo Ludwig, #157) 49 | * Fix `NotImplementedError`/`AssertionError` when checking 50 | video links with hash anchors (Timo Ludwig, #150) 51 | * Skip checking of hash anchors for non-html files 52 | * Avoid decorating the `report` view with `csrf_exempt` (#155) 53 | * recheck/ignore/unignore requests were using an obsolete `request.is_ajax` call 54 | (#147) 55 | 56 | 2.0.0 (2022-12-17) 57 | 58 | * Add German translations for filebrowser integration 59 | * Fix django-filebrowser integration (Timo Ludwig, #144) 60 | * Use `django.db.models.BigAutoField` as default auto field 61 | (Timo Ludwig, #137) 62 | * Add German translations for the templates 63 | * Fix `type` property for internal URLs (Timo Ludwig, #141) 64 | * Fix incorrect message when redirect has broken anchor 65 | (Timo Ludwig, #128) 66 | * Breaking change: Treat broken hash anchors as valid 67 | unless `LINKCHECK_TOLERATE_BROKEN_ANCHOR` is manually 68 | set to `False` (Timo Ludwig, #98) 69 | * Remove unused field `still_exists` from `Url` model 70 | * Delete outdated `Url` and `Link` objects when 71 | running `findlinks` command (Timo Ludwig, #101) 72 | * Avoid crash when unexpected error in signal listener occurs 73 | (Sven Seeberg, #117) 74 | * Ignore Urls longer than `MAX_URL_LENGTH` in signal listeners 75 | (Timo Ludwig, #115) 76 | * Verify SSL certificates (Timo Ludwig, #118) 77 | * Added support for Python 3.10/3.11 and Django 4.1. 78 | * Dropped support for Python 3.6 and Django < 3.2. 79 | 80 | 1.9.1 (2022-03-23) 81 | 82 | * Added `Linklist.filter_callable` optional hook to allow for more 83 | flexible link list filtering (Giuliano Mele). 84 | 85 | 1.9 (2021-12-23) 86 | 87 | * Added support for Django 3.2 and 4.0 and removed support for Django < 2.2. 88 | * Ignore raw `pre_save` signal (Timo Ludwig, #106). 89 | 90 | 1.8.1 (2021-04-01) 91 | 92 | * The 1.8 release contained unwanted temporary stuff and was 93 | therefore a broken release. Many thanks to Stefan Borer for 94 | noticing that. 95 | 96 | 1.8 (2021-02-25) 97 | 98 | * Added explicit `listeners.register_listeners` and 99 | `listeners.unregister_listeners` functions. 100 | * Added `listeners.enable_listeners` and `listeners.disable_listeners` context 101 | managers. 102 | * Avoid crash when looking for anchors in response content. 103 | * Avoid possible failures when checking internal links depending on 104 | ALLOWED_HOSTS setting. 105 | * Confirmed compatibility with Django 3.1. 106 | * Dropped support for Python 3.4. 107 | 108 | 1.7 (2020-01-13) 109 | 110 | * Dropped support for Python 2 and Django < 1.11. 111 | * Added support for Django 3.0. 112 | * Made more usage of the requests library. 113 | 114 | 1.6 (2019-03-20) 115 | 116 | * Use requests library when getting 'certificate verify failed' errors. 117 | * Fixed compatibility issues with newer versions of Django. 118 | * Fixed pip installation issues with encoding errors (#87). 119 | 120 | 1.5 (2017-09-16) 121 | 122 | * Added support for `tel:` links. 123 | * For redirecting links, linkcheck now reports the status of the redirect 124 | target (#78). 125 | * Dropped South migrations. 126 | * 'Url.redirect_to' was migrated to a TextField to not limit its length (#75). 127 | * Fixed handling of the '--limit' argument of the 'checklinks' command (#73). 128 | * Fixed the task queue of links to check (#69). 129 | 130 | 1.4 (2017-01-13) 131 | 132 | * Dropped support for Django 1.6 and Django 1.7, the minimal Python version is 133 | now Python 2.7. Django 1.10 is also supported. 134 | * Listeners registration and post_delete signal are now happening in the app 135 | config ready() method. This means that the process can be customized by 136 | having custom AppConfig classes and referring to those classes in the 137 | INSTALLED_APPS setting. 138 | * A new DISABLE_LISTENERS setting has been added to ease deactivation of 139 | listeners registration. 140 | * A task queue is now used to process link checking, so as to prevent exhaustion 141 | of available threads during massive updates. 142 | 143 | 1.3 (2016-06-05) 144 | 145 | * Django 1.9 compatibility added. 146 | * When checking internal links, redirects are not followed any longer. 147 | * Added support for the django-admin-tools dashboard, if present. 148 | * Fixed a bug where internal links were skipped based on the external interval 149 | setting. 150 | * Handle situations where content_type.model_class() returns None. 151 | * Allow extra field types to be added via settings. (Used for coverage view only). 152 | * Improve coverage suggested configs - include 'ignore_empty' settings plus a 153 | raw code view via /linkcheck/coverage?config=1. 154 | * Fix - correctly handle tags that are inside tags. 155 | * Fix - don't run pre_save if it's a new instance. 156 | 157 | 1.2 (2015-11-13) 158 | 159 | * Added migration folders (missing in the 1.1 package). Also added support for 160 | South migrations (compatibility). 161 | * When a link produces a 301 Moved Permanently redirection, the redirect target 162 | is stored in Url.redirect_to and displayed in the link report. 163 | * Better support for URLs containing non-ASCII characters. 164 | 165 | 1.1 (2015-06-03) 166 | 167 | * Minimal software requirements are now Python 2.6 / Django 1.6 (and South 1.0 if 168 | you still use Django 1.6). 169 | * Python 3 is supported. 170 | * Django 1.7 / 1.8 compatibility added. 171 | * notifications.py is now based on django-admin-blocks. 172 | * Linklist classes now support an ignore_empty list to ignore empty URLField values. 173 | 174 | 1.0 175 | 176 | Changes: 177 | 178 | Bug fixes since 0.6 Please see commit log here: https://github.com/andybak/django-linkcheck/commits/master 179 | 180 | 0.6.0 181 | 182 | Changes: 183 | 184 | * Support ignoring (and unignoring) external broken links via buttons in the linkcheck report 185 | * 'Recheck' button in the linkcheck report 186 | * External links with anchors were being reported as broken because we switched to a HEAD request 187 | * One particular url caused a crash in urllib2 when doing a HEAD request. Implemented a workaround: catch the exception and run a normal GET 188 | * Inconsistant use of seconds in some places and minutes in others. Switch to minutes for all parameters. 189 | * Clean up CSS 190 | * Use normal links for navigating between report types instead of javascript+radio buttons 191 | * Removed some unused javascript 192 | * Fixed some issues with anchor links 193 | * Broken link notification count was counting urls rather than links 194 | * Mark length of url field configurable for those not cursed with MySQl 195 | * Remove the pointless disinguishing images/documents/other in Url.type 196 | * Document settings properly 197 | * Remove unused pagination tag from template and thus dependency on django-pagination 198 | * All tests now pass ( because I commented out the one that didn't :-P ) 199 | 200 | 0.5.0 201 | 202 | Start this changelog 203 | Added some more comments throughout 204 | Fixed dependency on django-filebrowser by wrapping it in an exception check 205 | Handle get_absolute_url returning None 206 | Use HEAD requests for checking external URLs 207 | Handle HREF="#" correctly 208 | Cleaner display of hashtag links in reports 209 | Handle 'Bad Status Line' responses from remote servers. 210 | Don't spawn a thread if running from tests as this prevents the new thread from seeing the same database transaction as the parent thread 211 | Fix some tests from prior to the big refactor. nb Tests are still incomplete and many are broken :( 212 | document filebrowser dependency 213 | 214 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (c) 2009-2010, Andy Baker and contributors 2 | All rights reserved. 3 | 4 | Redistribution and use in source and binary forms, with or without 5 | modification, are permitted provided that the following conditions are 6 | met: 7 | 8 | * Redistributions of source code must retain the above copyright 9 | notice, this list of conditions and the following disclaimer. 10 | * Redistributions in binary form must reproduce the above 11 | copyright notice, this list of conditions and the following 12 | disclaimer in the documentation and/or other materials provided 13 | with the distribution. 14 | * Neither the name of the author nor the names of other 15 | contributors may be used to endorse or promote products derived 16 | from this software without specific prior written permission. 17 | 18 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 19 | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 20 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 21 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 22 | OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 23 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 24 | LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 25 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 26 | THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 27 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 28 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | include LICENSE 2 | include CHANGELOG 3 | include README.rst 4 | include linkcheck/locale/*/LC_MESSAGES/django.mo 5 | exclude linkcheck/locale/*/LC_MESSAGES/django.po 6 | recursive-include linkcheck/templates/linkcheck * 7 | recursive-include linkcheck/tests/media * 8 | -------------------------------------------------------------------------------- /README.rst: -------------------------------------------------------------------------------- 1 | 2 | django-linkcheck 3 | =================== 4 | 5 | .. image:: https://github.com/DjangoAdminHackers/django-linkcheck/workflows/Test/badge.svg 6 | :target: https://github.com/DjangoAdminHackers/django-linkcheck/actions 7 | :alt: GitHub Actions 8 | 9 | .. image:: https://img.shields.io/pypi/v/django-linkcheck.svg 10 | :alt: PyPI version 11 | :target: https://pypi.org/project/django-linkcheck/ 12 | 13 | A fairly flexible app that will analyze and report on links in any model that 14 | you register with it. 15 | 16 | .. image:: https://github.com/DjangoAdminHackers/django-linkcheck/raw/master/linkcheck.jpg 17 | 18 | Links can be bare (urls or image and file fields) or 19 | embedded in HTML (linkcheck handles the parsing). It's fairly easy to override 20 | methods of the Linkcheck object should you need to do anything more 21 | complicated (like generate URLs from slug fields etc). 22 | 23 | You should run its management command via cron or similar to check external 24 | links regularly to see if their status changes. All links are checked 25 | automatically when objects are saved. This is handled by signals. 26 | 27 | Minimal requirements 28 | -------------------- 29 | 30 | django-linkcheck requires Python 3.9 and Django 4.2. 31 | 32 | Basic usage 33 | ----------- 34 | 35 | #. Install app to somewhere on your Python path (e.g. ``pip install 36 | django-linkcheck``). 37 | 38 | #. Add ``'linkcheck'`` to your ``settings.INSTALLED_APPS``. 39 | 40 | #. Add a file named ``linklists.py`` to every app (see an example in ``examples/linklists.py``) that either: 41 | 42 | #) has models that contain content (e.g. url/image fields, chunks of markup 43 | or anything that gets transformed into a IMG or HREF when displayed 44 | #) can be the target of a link - i.e. is addressed by a url - in this case 45 | make sure it has an instance method named 'get_absolute_url' 46 | 47 | *Hint:* You can create a sample config for your model with:: 48 | 49 | manage.py linkcheck_suggest_config --model sampleapp.SampleModel > sampleapp/linklists.py 50 | 51 | #. Run ``./manage.py migrate``. 52 | 53 | #. Add to your root url config:: 54 | 55 | path('admin/linkcheck/', include('linkcheck.urls')) 56 | 57 | #. View ``/admin/linkcheck/`` from your browser. 58 | 59 | We are aware that this documentation is on the brief side of things so any 60 | suggestions for elaboration or clarification would be gratefully accepted. 61 | 62 | Linklist classes 63 | ---------------- 64 | 65 | The following class attributes can be added to your ``Linklist`` subclasses to 66 | customize the extracted links: 67 | 68 | ``object_filter``: a dictionary which will be passed as a filter argument to 69 | the ``filter`` applied to the default queryset of the target class. This 70 | allows you to filter the objects from which the links will be extracted. 71 | (example: ``{'active': True}``) 72 | 73 | ``object_exclude``: a dictionary which will be passed as a filter argument to 74 | the ``exclude`` applied to the default queryset of the target class. As with 75 | ``object_filter``, this allows you to exclude objects from which the links 76 | will be extracted. 77 | 78 | ``html_fields``: a list of field names which will be searched for links. 79 | 80 | ``url_fields``: a list of ``URLField`` field names whose content will be 81 | considered as links. If the field content is empty and the field name is 82 | in ``ignore_empty``, the content is ignored. 83 | 84 | ``ignore_empty``: a list of fields from ``url_fields``. See the explanation 85 | above. (new in django-linkcheck 1.1) 86 | 87 | ``image_fields``: a list of ``ImageField`` field names whose content will be 88 | considered as links. Empty ``ImageField`` content is always ignored. 89 | 90 | ``filter_callable``: a callable which allows to pass a function as filter 91 | for your linklist class. It allows to apply more advanced filter operations. 92 | This function must be a class method and it should be passed the objects query 93 | set and return the filtered objects. 94 | Example usage in your linklists.py - only check latest versions:: 95 | 96 | @classmethod 97 | def filter_callable(cls, objects): 98 | latest = Model.objects.filter(id=OuterRef('id')).order_by('-version') 99 | return objects.filter(version=Subquery(latest.values('version')[:1])) 100 | 101 | Management commands 102 | ------------------- 103 | 104 | findlinks 105 | ~~~~~~~~~ 106 | 107 | This command goes through all registered fields and records the URLs it finds. 108 | This command does not validate anything. Typically run just after installing 109 | and configuring django-linkcheck. 110 | 111 | checklinks 112 | ~~~~~~~~~~ 113 | 114 | For each recorded URL, check and report the validity of the URL. All internal 115 | links are checked, but only external links that have not been checked during 116 | the last ``LINKCHECK_EXTERNAL_RECHECK_INTERVAL`` minutes are checked. This 117 | interval can be adapted per-invocation by using the ``--externalinterval`` 118 | (``-e``) command option (in minutes). 119 | 120 | You can also limit the maximum number of links to be checked by passing a number 121 | to the ``--limit`` (``--l``) command option. 122 | 123 | linkcheck_suggest_config 124 | ~~~~~~~~~~~~~~~~~~~~~~~~ 125 | 126 | This command goes through all models and checks whether they contain fields that 127 | can potentially be checked by linkcheck. 128 | If they are not yet registered, a sample config is suggested. 129 | 130 | You can also pass the option ``--model`` to generate a sample config for the given model. 131 | 132 | Settings 133 | -------- 134 | 135 | LINKCHECK_DISABLE_LISTENERS 136 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~ 137 | 138 | A setting to totally disable linkcheck, typically when running tests. See also 139 | the context managers below. 140 | 141 | LINKCHECK_EXTERNAL_RECHECK_INTERVAL 142 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 143 | 144 | Default: 10080 (1 week in minutes) 145 | 146 | Will not recheck any external link that has been checked more recently than this value. 147 | 148 | LINKCHECK_EXTERNAL_REGEX_STRING 149 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 150 | 151 | Default: r'^https?://' 152 | 153 | A string applied as a regex to a URL to determine whether it's internal or external. 154 | 155 | LINKCHECK_MEDIA_PREFIX 156 | ~~~~~~~~~~~~~~~~~~~~~~ 157 | 158 | Default: '/media/' 159 | 160 | Currently linkcheck tests whether links to internal static media are correct by wrangling the URL to be a local filesystem path. 161 | 162 | It strips MEDIA_PREFIX off the interal link and concatenates the result onto settings.MEDIA_ROOT and tests that using os.path.exists 163 | 164 | This 'works for me' but it is probably going to break for other people's setups. Patches welcome. 165 | 166 | LINKCHECK_RESULTS_PER_PAGE 167 | ~~~~~~~~~~~~~~~~~~~~~~~~~~ 168 | 169 | Controls pagination. 170 | 171 | Pagination is slightly peculiar at the moment due to the way links are grouped by object. 172 | 173 | 174 | LINKCHECK_MAX_URL_LENGTH 175 | ~~~~~~~~~~~~~~~~~~~~~~~~ 176 | 177 | Default: 255 178 | 179 | The length of the URL field. Defaults to 255 for compatibility with MySQL (see http://docs.djangoproject.com/en/dev/ref/databases/#notes-on-specific-fields ) 180 | 181 | 182 | LINKCHECK_CONNECTION_ATTEMPT_TIMEOUT 183 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 184 | 185 | Default: 10 186 | 187 | The timeout in seconds for each connection attempts. Sometimes it is useful to limit check time per connection in order to hold at bay the total check time. 188 | 189 | 190 | SITE_DOMAIN and LINKCHECK_SITE_DOMAINS 191 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 192 | 193 | Linkcheck tests external and internal using differently. Internal links use the Django test client whereas external links are tested using urllib2. 194 | 195 | Testing internal links this as if they were external can cause errors in some circumstances so Linkcheck needs to know which external urls are to be treated as internal. 196 | 197 | Linkcheck looks for either of the settings above. It only uses SITE_DOMAIN if LINKCHECK_SITE_DOMAINS isn't present 198 | 199 | 200 | SITE_DOMAIN = "mysite.com" 201 | 202 | would tell linkchecker to treat the following as internal links: 203 | 204 | mysite.com 205 | www.mysite.com 206 | test.mysite.com 207 | 208 | If you instead set LINKCHECK_SITE_DOMAINS to be a list or tuple then you can explicitly list the domains that should be treated as internal. 209 | 210 | 211 | LINKCHECK_TOLERATE_BROKEN_ANCHOR 212 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 213 | 214 | Default: ``True`` 215 | 216 | Whether links with broken hash anchors should be marked as valid. 217 | Disable this if you want that links to anchors which are not contained in the link target's HTML source are marked as invalid. 218 | 219 | 220 | django-filebrowser integration 221 | ------------------------------ 222 | 223 | If django-filebrowser is present on your path then linkcheck will listen to the post-upload, delete and rename signals and update itself according 224 | 225 | 226 | Contributing 227 | ------------ 228 | 229 | You can install all requirements of the development setup with the extra ``dev``: 230 | 231 | .. code-block:: bash 232 | 233 | $ python3 -m venv .venv 234 | $ source .venv/bin/activate 235 | $ pip install -e .[dev] 236 | $ django-admin compilemessages --ignore=.venv # Optionally compile translation file 237 | 238 | If you want to make use of the flake8 and isort pre-commit hooks, enable them with: 239 | 240 | .. code-block:: bash 241 | 242 | $ pre-commit install 243 | 244 | Running tests 245 | ~~~~~~~~~~~~~ 246 | 247 | Tests can be run standalone by using the ``runtests.py`` script in linkcheck root: 248 | 249 | .. code-block:: bash 250 | 251 | $ python runtests.py 252 | 253 | If you want to run linkcheck tests in the context of your project, you should include ``'linkcheck.tests.sampleapp'`` in your ``INSTALLED_APPS`` setting. 254 | 255 | Linkcheck gives you two context managers to enable or disable listeners in your 256 | own tests. For example: 257 | 258 | .. code-block:: python3 259 | 260 | def test_something_without_listeners(self): 261 | with listeners.disable_listeners(): 262 | # Create/update here without linkcheck intervening. 263 | 264 | In the case you defined the ``LINKCHECK_DISABLE_LISTENERS`` setting, you can 265 | temporarily enable it by: 266 | 267 | .. code-block:: python3 268 | 269 | def test_something_with_listeners(self): 270 | with listeners.enable_listeners(): 271 | # Create/update here and see linkcheck activated. 272 | 273 | Translations 274 | ~~~~~~~~~~~~ 275 | 276 | At the moment this app is available in English, German, and French. 277 | If you want to contribute translations for ``LOCALE``, run: 278 | 279 | .. code-block:: bash 280 | 281 | django-admin makemessages --locale LOCALE 282 | 283 | and edit the corresponding file in ``linkcheck/locale/LOCALE/LC_MESSAGES/django.po``. 284 | 285 | Create new release 286 | ~~~~~~~~~~~~~~~~~~ 287 | 288 | 1. Bump version in `pyproject.toml <./pyproject.toml>`_ 289 | 2. Update `CHANGELOG <./CHANGELOG>`_ 290 | 3. Create release commit: ``git commit --message "Release vX.Y.Z"`` 291 | 4. Create git tag: ``git tag -a "X.Y.Z" -m "Release vX.Y.Z"`` 292 | 5. Push the commit and tag to the repository: ``git push && git push --tags`` 293 | 6. Build the source distribution: ``python -m build`` 294 | 7. Publish the package to PyPI: ``twine upload dist/django-linkcheck-X.Y.Z*`` 295 | -------------------------------------------------------------------------------- /examples/linkcheck.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DjangoAdminHackers/django-linkcheck/bae5ac0e140521b7f3d7b26f91afc714cfa1439b/examples/linkcheck.jpg -------------------------------------------------------------------------------- /examples/linklists.py: -------------------------------------------------------------------------------- 1 | from cms.models import Page 2 | 3 | from linkcheck import Linklist 4 | 5 | 6 | class PageLinklist(Linklist): 7 | 8 | model = Page 9 | object_filter = {'active': True} 10 | html_fields = ['content', 'extra_content'] 11 | 12 | 13 | linklists = {'Pages': PageLinklist} 14 | -------------------------------------------------------------------------------- /linkcheck.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DjangoAdminHackers/django-linkcheck/bae5ac0e140521b7f3d7b26f91afc714cfa1439b/linkcheck.jpg -------------------------------------------------------------------------------- /linkcheck/__init__.py: -------------------------------------------------------------------------------- 1 | import threading 2 | from html.parser import HTMLParser 3 | 4 | # A global lock, showing whether linkcheck is busy 5 | update_lock = threading.Lock() 6 | 7 | 8 | class Lister(HTMLParser): 9 | 10 | def reset(self): 11 | HTMLParser.reset(self) 12 | self.urls = [] 13 | 14 | 15 | class URLLister(Lister): 16 | 17 | def __init__(self): 18 | self.in_a = False 19 | self.text = '' 20 | self.url = '' 21 | HTMLParser.__init__(self) 22 | 23 | def handle_starttag(self, tag, attrs): 24 | if tag == 'a': 25 | href = [v for k, v in attrs if k == 'href'] 26 | if href: 27 | self.in_a = True 28 | self.url = href[0] 29 | elif tag == 'img' and self.in_a: 30 | src = [v for k, v in attrs if k == 'src'] 31 | if src: 32 | self.text += f' [image:{src[0]}] ' 33 | 34 | def handle_endtag(self, tag): 35 | if tag == 'a' and self.in_a: 36 | self.urls.append((self.text[:256], self.url)) 37 | self.in_a = False 38 | self.text = '' 39 | self.url = '' 40 | 41 | def handle_data(self, data): 42 | if self.in_a: 43 | self.text += data 44 | 45 | 46 | class ImageLister(Lister): 47 | 48 | def handle_starttag(self, tag, attrs): 49 | if tag == 'img': 50 | src = [v for k, v in attrs if k == 'src'] 51 | if src: 52 | self.urls.append(('', src[0])) 53 | 54 | 55 | class AnchorLister(HTMLParser): 56 | def __init__(self): 57 | self.names = [] 58 | HTMLParser.__init__(self) 59 | 60 | def reset(self): 61 | HTMLParser.reset(self) 62 | self.names = [] 63 | 64 | def handle_starttag(self, tag, attributes): 65 | name = [v for k, v in attributes if k == 'id'] 66 | if name: 67 | self.names.append(name[0]) 68 | if tag == 'a': 69 | name = [v for k, v in attributes if k == 'name'] 70 | if name: 71 | self.names.append(name[0]) 72 | 73 | 74 | def parse(obj, field, parser): 75 | html = getattr(obj, field) 76 | if html: 77 | parser.feed(html) 78 | parser.close() 79 | return parser.urls 80 | else: 81 | return [] 82 | 83 | 84 | def parse_urls(obj, field): 85 | parser = URLLister() 86 | return parse(obj, field, parser) 87 | 88 | 89 | def parse_images(obj, field): 90 | parser = ImageLister() 91 | return parse(obj, field, parser) 92 | 93 | 94 | def parse_anchors(content): 95 | parser = AnchorLister() 96 | if not isinstance(content, str): 97 | content = str(content) 98 | parser.feed(content) 99 | parser.close() 100 | return parser.names 101 | 102 | 103 | class Linklist: 104 | 105 | html_fields = [] 106 | url_fields = [] 107 | ignore_empty = [] 108 | image_fields = [] 109 | 110 | # You can override object_filter and object_exclude in a linklist class. 111 | # Just provide a dictionary to be used as a Django lookup filter. 112 | # Only objects that pass the filter will be queried for links. 113 | # This doesn't affect whether an object is regarded as a valid link target. Only as a link source. 114 | # Example usage in your linklists.py: 115 | # object_filter = {'active': True} - Would only check active objects for links 116 | 117 | object_filter = None 118 | object_exclude = None 119 | filter_callable = None 120 | 121 | def __get(self, name, obj, default=None): 122 | try: 123 | attr = getattr(self, name) 124 | except AttributeError: 125 | return default 126 | if callable(attr): 127 | return attr(obj) 128 | return attr 129 | 130 | @staticmethod 131 | def extract_url_from_field(obj, field_name): 132 | val = getattr(obj, field_name) 133 | try: 134 | try: 135 | url = val.url # FileField and ImageField have a url property 136 | except ValueError: # And it throws an exception for empty fields 137 | url = '' 138 | except AttributeError: 139 | url = val # Assume the field returns the url directly 140 | 141 | return url or '' # Coerce None to '' 142 | 143 | def get_urls_from_field_list(self, obj, field_list): 144 | urls = [] 145 | for field_name in field_list: 146 | url = self.extract_url_from_field(obj, field_name) 147 | if field_name in self.ignore_empty and not url: 148 | continue 149 | urls.append((field_name, '', url)) 150 | return urls 151 | 152 | def urls(self, obj): 153 | 154 | urls = [] 155 | 156 | # Look for HREFS in HTML fields 157 | for field_name in self.html_fields: 158 | urls += [(field_name, text, url) for text, url in parse_urls(obj, field_name)] 159 | 160 | # Now add in the URL fields 161 | urls += self.get_urls_from_field_list(obj, self.url_fields) 162 | 163 | return urls 164 | 165 | def images(self, obj): 166 | 167 | urls = [] 168 | 169 | # Look for IMGs in HTML fields 170 | for field_name in self.html_fields: 171 | urls += [(field_name, text, url) for text, url in parse_images(obj, field_name)] 172 | 173 | # hostname_length = settings.MEDIA_URL[:-1].rfind('/') 174 | # url[hostname_length:] 175 | 176 | # Now add in the image fields 177 | urls += self.get_urls_from_field_list(obj, self.image_fields) 178 | 179 | return urls 180 | 181 | @classmethod 182 | def objects(cls): 183 | 184 | objects = cls.model.objects.all() 185 | 186 | if cls.object_filter: 187 | objects = objects.filter(**cls.object_filter).distinct() 188 | if cls.object_exclude: 189 | objects = objects.exclude(**cls.object_exclude).distinct() 190 | if cls.filter_callable: 191 | objects = cls.filter_callable(objects) 192 | return objects 193 | 194 | def get_linklist(self, extra_filter=None): 195 | 196 | extra_filter = extra_filter or {} 197 | 198 | linklist = [] 199 | objects = self.objects() 200 | 201 | if extra_filter: 202 | objects = objects.filter(**extra_filter) 203 | 204 | for obj in objects: 205 | linklist.append({ 206 | 'object': obj, 207 | 'urls': self.urls(obj), 208 | 'images': self.images(obj), 209 | }) 210 | 211 | return linklist 212 | 213 | @classmethod 214 | def content_type(cls): 215 | from django.contrib.contenttypes.models import ContentType 216 | return ContentType.objects.get_for_model(cls.model) 217 | -------------------------------------------------------------------------------- /linkcheck/admin_blocks.py: -------------------------------------------------------------------------------- 1 | import django_admin_blocks 2 | 3 | from linkcheck.views import get_status_message 4 | 5 | """Legacy internal helper""" 6 | 7 | 8 | def notification(): 9 | return get_status_message() 10 | 11 | 12 | django_admin_blocks.register({ 13 | 'errors': (notification,), 14 | }) 15 | -------------------------------------------------------------------------------- /linkcheck/apps.py: -------------------------------------------------------------------------------- 1 | import importlib 2 | 3 | from django.apps import AppConfig, apps 4 | from django.db.models.signals import post_delete 5 | 6 | 7 | class AlreadyRegistered(Exception): 8 | pass 9 | 10 | 11 | class BaseLinkcheckConfig(AppConfig): 12 | name = 'linkcheck' 13 | verbose_name = "Linkcheck" 14 | 15 | default_auto_field = "django.db.models.BigAutoField" 16 | 17 | all_linklists = {} 18 | 19 | def ready(self): 20 | self.build_linklists() 21 | 22 | def build_linklists(self): 23 | """Autodiscovery of linkLists""" 24 | for app in apps.get_app_configs(): 25 | module_name = f"{app.name}.linklists" 26 | try: 27 | if not importlib.util.find_spec(module_name): 28 | continue 29 | except ModuleNotFoundError: 30 | continue 31 | the_module = importlib.import_module(module_name) 32 | try: 33 | for k in the_module.linklists.keys(): 34 | if k in self.all_linklists.keys(): 35 | raise AlreadyRegistered(f'The key {k} is already registered in all_linklists') 36 | 37 | for link_list in the_module.linklists.values(): 38 | for link_list2 in self.all_linklists.values(): 39 | if link_list.model == link_list2.model: 40 | raise AlreadyRegistered(f"The LinkList {link_list} is already registered in all_linklists") 41 | self.all_linklists.update(the_module.linklists) 42 | except AttributeError: 43 | pass 44 | # Add a reference to the linklist in the model. This change is for internal hash link, 45 | # But might also be useful elsewhere in the future 46 | for key, linklist in self.all_linklists.items(): 47 | setattr(linklist.model, '_linklist', linklist) 48 | 49 | 50 | class LinkcheckConfig(BaseLinkcheckConfig): 51 | default = True 52 | 53 | def ready(self): 54 | from .linkcheck_settings import DISABLE_LISTENERS 55 | from .listeners import register_listeners 56 | super().ready() 57 | 58 | if not DISABLE_LISTENERS: 59 | register_listeners() 60 | 61 | from .models import Link, link_post_delete 62 | post_delete.connect(link_post_delete, sender=Link) 63 | -------------------------------------------------------------------------------- /linkcheck/build_meta.py: -------------------------------------------------------------------------------- 1 | import subprocess 2 | 3 | from setuptools import build_meta as default 4 | from setuptools.build_meta import * # noqa: F401, F403 5 | 6 | 7 | def compile_translation_files(): 8 | print("Compile translation files") 9 | subprocess.run(["django-admin", "compilemessages"], cwd="linkcheck") 10 | 11 | 12 | def build_sdist(sdist_directory, config_settings=None): 13 | compile_translation_files() 14 | return default.build_sdist(sdist_directory, config_settings) 15 | 16 | 17 | def build_wheel(wheel_directory, config_settings=None, metadata_directory=None): 18 | compile_translation_files() 19 | return default.build_wheel( 20 | wheel_directory, 21 | config_settings=config_settings, 22 | metadata_directory=metadata_directory, 23 | ) 24 | -------------------------------------------------------------------------------- /linkcheck/cron.py: -------------------------------------------------------------------------------- 1 | # This file works with our fork of django-cron. 2 | # It's use is optional 3 | # Use any means you like to run scheduled jobs. 4 | # 5 | # Note - you only need to run scheduled jobs if you want to check external links 6 | # that may have died since the link was last edited 7 | # 8 | # Links are checked via signals any time a link-containing object is saved by Django 9 | 10 | from django_cron import WEEK, Job, cronScheduler 11 | 12 | from linkcheck.linkcheck_settings import ( 13 | EXTERNAL_RECHECK_INTERVAL, 14 | MAX_CHECKS_PER_RUN, 15 | ) 16 | from linkcheck.utils import check_links, find_all_links 17 | 18 | 19 | class RunLinkCheckFind(Job): 20 | 21 | run_every = WEEK 22 | 23 | def job(self): 24 | find_all_links() 25 | 26 | 27 | cronScheduler.register(RunLinkCheckFind) 28 | 29 | 30 | class RunLinkCheckInternal(Job): 31 | 32 | run_every = WEEK 33 | 34 | def job(self): 35 | check_links(limit=MAX_CHECKS_PER_RUN, check_external=False) 36 | 37 | 38 | cronScheduler.register(RunLinkCheckInternal) 39 | 40 | 41 | class RunLinkCheckExternal(Job): 42 | 43 | run_every = WEEK 44 | 45 | def job(self): 46 | check_links( 47 | external_recheck_interval=EXTERNAL_RECHECK_INTERVAL, 48 | limit=MAX_CHECKS_PER_RUN, 49 | check_internal=False, 50 | ) 51 | 52 | 53 | cronScheduler.register(RunLinkCheckExternal) 54 | -------------------------------------------------------------------------------- /linkcheck/dashboard.py: -------------------------------------------------------------------------------- 1 | from admin_tools.dashboard import modules 2 | from django.urls import reverse 3 | 4 | from linkcheck.views import get_status_message 5 | 6 | linkcheck_dashboard_module = modules.LinkList( 7 | title="Linkchecker", 8 | pre_content=get_status_message, 9 | children=( 10 | {'title': 'Valid links', 'url': reverse('linkcheck_report') + '?filters=show_valid'}, 11 | {'title': 'Broken links', 'url': reverse('linkcheck_report')}, 12 | {'title': 'Untested links', 'url': reverse('linkcheck_report') + '?filters=show_unchecked'}, 13 | {'title': 'Ignored links', 'url': reverse('linkcheck_report') + '?filters=ignored'}, 14 | ) 15 | ) 16 | -------------------------------------------------------------------------------- /linkcheck/filebrowser.py: -------------------------------------------------------------------------------- 1 | """Integrate with django-filebrowser if present.""" 2 | import logging 3 | import os.path 4 | 5 | from django.conf import settings 6 | from django.contrib import messages 7 | from django.utils.translation import gettext as _ 8 | from django.utils.translation import ngettext 9 | 10 | try: 11 | from filebrowser.settings import DIRECTORY 12 | from filebrowser.signals import ( 13 | filebrowser_post_delete, 14 | filebrowser_post_rename, 15 | filebrowser_post_upload, 16 | ) 17 | FILEBROWSER_PRESENT = True 18 | except ImportError: 19 | FILEBROWSER_PRESENT = False 20 | 21 | from linkcheck.models import Url 22 | 23 | logger = logging.getLogger(__name__) 24 | 25 | 26 | def get_relative_media_url(): 27 | if settings.MEDIA_URL.startswith('http'): 28 | relative_media_url = ('/'+'/'.join(settings.MEDIA_URL.split('/')[3:]))[:-1] 29 | else: 30 | relative_media_url = settings.MEDIA_URL 31 | return relative_media_url 32 | 33 | 34 | def handle_upload(sender, path=None, **kwargs): 35 | logger.debug('uploaded path %s with kwargs %r', path, kwargs) 36 | 37 | url = os.path.join(get_relative_media_url(), kwargs['file'].url) 38 | url_qs = Url.objects.filter(url=url).filter(status=False) 39 | count = url_qs.count() 40 | if count: 41 | url_qs.update(status=True, message="Working document link") 42 | msg = ngettext( 43 | "Uploading {} has corrected {} broken link.", 44 | "Uploading {} has corrected {} broken links.", 45 | count, 46 | ).format(url, count) 47 | messages.success(sender, '{}: {} {}'.format( 48 | _('Please note'), 49 | msg, 50 | _('See the Link Checker for more details.') 51 | )) 52 | 53 | 54 | def handle_rename(sender, path=None, **kwargs): 55 | logger.debug('renamed path %s with kwargs %r', path, kwargs) 56 | 57 | def isdir(filename): 58 | if filename.count('.'): 59 | return False 60 | else: 61 | return True 62 | 63 | old_url = os.path.join(get_relative_media_url(), DIRECTORY, path) 64 | new_url = os.path.join(get_relative_media_url(), DIRECTORY, path.replace(kwargs['name'], kwargs['new_name'])) 65 | # Renaming a file will cause it's urls to become invalid 66 | # Renaming a directory will cause the urls of all it's contents to become invalid 67 | old_url_qs = Url.objects.filter(url=old_url).filter(status=True) 68 | if isdir(kwargs['name']): 69 | old_url_qs = Url.objects.filter(url__startswith=old_url).filter(status=True) 70 | old_count = old_url_qs.count() 71 | if old_count: 72 | old_url_qs.update(status=False, message="Missing Document") 73 | msg = ngettext( 74 | "Renaming {} has caused {} link to break.", 75 | "Renaming {} has caused {} links to break.", 76 | old_count, 77 | ).format(old_url, old_count) 78 | messages.warning(sender, '{}: {} {}'.format( 79 | _('Warning'), 80 | msg, 81 | _('Please use the Link Checker to fix them.') 82 | )) 83 | 84 | # The new directory may fix some invalid links, so we also check for that 85 | if isdir(kwargs['new_name']): 86 | new_count = 0 87 | new_url_qs = Url.objects.filter(url__startswith=new_url).filter(status=False) 88 | for url in new_url_qs: 89 | if url.check_url(): 90 | new_count += 1 91 | else: 92 | new_url_qs = Url.objects.filter(url=new_url).filter(status=False) 93 | new_count = new_url_qs.count() 94 | if new_count: 95 | new_url_qs.update(status=True, message='Working document link') 96 | if new_count: 97 | msg = ngettext( 98 | "Renaming {} has corrected {} broken link.", 99 | "Renaming {} has corrected {} broken links.", 100 | new_count, 101 | ).format(new_url, new_count) 102 | messages.success(sender, '{}: {} {}'.format( 103 | _('Please note'), 104 | msg, 105 | _('See the Link Checker for more details.') 106 | )) 107 | 108 | 109 | def handle_delete(sender, path=None, **kwargs): 110 | logger.debug('deleted path %s with kwargs %r', path, kwargs) 111 | 112 | url = os.path.join(get_relative_media_url(), DIRECTORY, path) 113 | url_qs = Url.objects.filter(url=url).filter(status=True) 114 | count = url_qs.count() 115 | if count: 116 | url_qs.update(status=False, message="Missing Document") 117 | msg = ngettext( 118 | "Deleting {} has caused {} link to break.", 119 | "Deleting {} has caused {} links to break.", 120 | count, 121 | ).format(url, count) 122 | messages.warning(sender, '{}: {} {}'.format( 123 | _('Warning'), 124 | msg, 125 | _('Please use the Link Checker to fix them.') 126 | )) 127 | 128 | 129 | def register_listeners(): 130 | if FILEBROWSER_PRESENT: 131 | filebrowser_post_upload.connect(handle_upload) 132 | filebrowser_post_rename.connect(handle_rename) 133 | filebrowser_post_delete.connect(handle_delete) 134 | 135 | 136 | def unregister_listeners(): 137 | if FILEBROWSER_PRESENT: 138 | filebrowser_post_upload.disconnect(handle_upload) 139 | filebrowser_post_rename.disconnect(handle_rename) 140 | filebrowser_post_delete.disconnect(handle_delete) 141 | -------------------------------------------------------------------------------- /linkcheck/linkcheck_settings.py: -------------------------------------------------------------------------------- 1 | from django.conf import settings 2 | from django.db import models 3 | 4 | # Used for coverage view 5 | 6 | DEFAULT_HTML_FIELD_CLASSES = [] 7 | DEFAULT_IMAGE_FIELD_CLASSES = [models.ImageField] 8 | DEFAULT_URL_FIELD_CLASSES = [models.FileField] 9 | 10 | 11 | # The coverage view warns you if you use any fields that haven't been registered with Linkcheck when they should have 12 | # Let's add a few likely candidates. You can add your own via the LINKCHECK_EXTRA_xxx_FIELD_CLASSES setting 13 | # Pull requests welcome 14 | 15 | try: 16 | from sorl.thumbnail import ImageField 17 | DEFAULT_IMAGE_FIELD_CLASSES.append(ImageField) 18 | except ImportError: 19 | pass 20 | 21 | try: 22 | from mcefield.custom_fields import MCEField 23 | DEFAULT_HTML_FIELD_CLASSES.append(MCEField) 24 | except ImportError: 25 | pass 26 | 27 | try: 28 | from select_url_field.fields import SelectURLField 29 | DEFAULT_URL_FIELD_CLASSES.append(SelectURLField) 30 | except ImportError: 31 | pass 32 | 33 | try: 34 | from filebrowser.fields import FileBrowseField 35 | DEFAULT_URL_FIELD_CLASSES.append(FileBrowseField) 36 | except ImportError: 37 | pass 38 | 39 | try: 40 | from browse_and_upload_field.fields import FileBrowseAndUploadField 41 | DEFAULT_URL_FIELD_CLASSES.append(FileBrowseAndUploadField) 42 | except ImportError: 43 | pass 44 | 45 | 46 | HTML_FIELD_CLASSES = getattr(settings, 'LINKCHECK_EXTRA_HTML_FIELD_CLASSES', []) + DEFAULT_HTML_FIELD_CLASSES 47 | IMAGE_FIELD_CLASSES = getattr(settings, 'LINKCHECK_EXTRA_IMAGE_FIELD_CLASSES', []) + DEFAULT_IMAGE_FIELD_CLASSES 48 | URL_FIELD_CLASSES = getattr(settings, 'LINKCHECK_EXTRA_URL_FIELD_CLASSES', []) + DEFAULT_URL_FIELD_CLASSES 49 | 50 | # Main (non-coverage related) settings 51 | 52 | EXTERNAL_RECHECK_INTERVAL = getattr(settings, 'LINKCHECK_EXTERNAL_RECHECK_INTERVAL', 10080) # 1 week 53 | EXTERNAL_REGEX_STRING = getattr(settings, 'LINKCHECK_EXTERNAL_REGEX_STRING', r'^https?://') 54 | LINKCHECK_CONNECTION_ATTEMPT_TIMEOUT = getattr(settings, 'LINKCHECK_CONNECTION_ATTEMPT_TIMEOUT', 10) 55 | MAX_CHECKS_PER_RUN = getattr(settings, 'LINKCHECK_MAX_CHECKS_PER_RUN', -1) 56 | MAX_URL_LENGTH = getattr(settings, 'LINKCHECK_MAX_URL_LENGTH', 255) 57 | MEDIA_PREFIX = getattr(settings, 'LINKCHECK_MEDIA_PREFIX', settings.MEDIA_URL) 58 | RESULTS_PER_PAGE = getattr(settings, 'LINKCHECK_RESULTS_PER_PAGE', 500) 59 | SITE_DOMAINS = getattr(settings, 'LINKCHECK_SITE_DOMAINS', []) 60 | DISABLE_LISTENERS = getattr(settings, 'LINKCHECK_DISABLE_LISTENERS', False) 61 | TOLERATE_BROKEN_ANCHOR = getattr(settings, 'LINKCHECK_TOLERATE_BROKEN_ANCHOR', True) 62 | -------------------------------------------------------------------------------- /linkcheck/listeners.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import sys 3 | import time 4 | from contextlib import contextmanager 5 | from queue import Empty, LifoQueue 6 | from threading import Thread 7 | 8 | from django.apps import apps 9 | from django.db.models import signals as model_signals 10 | 11 | from linkcheck.models import Link, Url 12 | 13 | from . import filebrowser, update_lock 14 | from .linkcheck_settings import MAX_URL_LENGTH 15 | 16 | logger = logging.getLogger(__name__) 17 | 18 | 19 | tasks_queue = LifoQueue() 20 | worker_running = False 21 | tests_running = len(sys.argv) > 1 and sys.argv[1] == 'test' or sys.argv[0].endswith('runtests.py') 22 | 23 | 24 | def linkcheck_worker(block=True): 25 | global worker_running 26 | while tasks_queue.not_empty: 27 | try: 28 | task = tasks_queue.get(block=block) 29 | except Empty: 30 | break 31 | # An error in any task should not stop the worker from continuing with the queue 32 | try: 33 | task['target'](*task['args'], **task['kwargs']) 34 | except Exception as e: 35 | logger.exception( 36 | "%s while running %s with args=%r and kwargs=%r: %s", 37 | type(e).__name__, 38 | task['target'].__name__, 39 | task['args'], 40 | task['kwargs'], 41 | e 42 | ) 43 | tasks_queue.task_done() 44 | worker_running = False 45 | 46 | 47 | def start_worker(): 48 | global worker_running 49 | if worker_running is False: 50 | worker_running = True 51 | t = Thread(target=linkcheck_worker) 52 | t.daemon = True 53 | t.start() 54 | 55 | 56 | def check_instance_links(sender, instance, **kwargs): 57 | """ 58 | When an object is saved: 59 | new Link/Urls are created, checked 60 | 61 | When an object is modified: 62 | new link/urls are created, checked 63 | existing link/urls are checked 64 | Removed links are deleted 65 | """ 66 | linklist_cls = sender._linklist 67 | 68 | def do_check_instance_links(sender, instance, wait=False): 69 | # On some installations, this wait time might be enough for the 70 | # thread transaction to account for the object change (GH #41). 71 | # A candidate for the future post_commit signal. 72 | 73 | global worker_running 74 | 75 | if wait: 76 | time.sleep(0.1) 77 | with update_lock: 78 | content_type = linklist_cls.content_type() 79 | new_links = [] 80 | old_links = Link.objects.filter(content_type=content_type, object_id=instance.pk) 81 | 82 | linklists = linklist_cls().get_linklist(extra_filter={'pk': instance.pk}) 83 | 84 | if not linklists: 85 | # This object is no longer watched by linkcheck according to object_filter 86 | links = [] 87 | else: 88 | linklist = linklists[0] 89 | links = linklist['urls']+linklist['images'] 90 | 91 | for link in links: 92 | # url structure = (field, link text, url) 93 | url = link[2] 94 | if url.startswith('#'): 95 | url = instance.get_absolute_url() + url 96 | 97 | if len(url) > MAX_URL_LENGTH: 98 | # We cannot handle url longer than MAX_URL_LENGTH at the moment 99 | logger.warning('URL exceeding max length will be skipped: %s', url) 100 | continue 101 | 102 | u, created = Url.objects.get_or_create(url=url) 103 | l, created = Link.objects.get_or_create( 104 | url=u, field=link[0], text=link[1], content_type=content_type, object_id=instance.pk 105 | ) 106 | new_links.append(l.id) 107 | u.check_url() 108 | 109 | gone_links = old_links.exclude(id__in=new_links) 110 | gone_links.delete() 111 | 112 | # Don't run in a separate thread if we are running tests 113 | if tests_running: 114 | do_check_instance_links(sender, instance) 115 | else: 116 | tasks_queue.put({ 117 | 'target': do_check_instance_links, 118 | 'args': (sender, instance, True), 119 | 'kwargs': {} 120 | }) 121 | start_worker() 122 | 123 | 124 | def delete_instance_links(sender, instance, **kwargs): 125 | """ 126 | Delete all links belonging to a model instance when that instance is deleted 127 | """ 128 | linklist_cls = sender._linklist 129 | content_type = linklist_cls.content_type() 130 | old_links = Link.objects.filter(content_type=content_type, object_id=instance.pk) 131 | old_links.delete() 132 | 133 | 134 | def instance_pre_save(sender, instance, raw=False, **kwargs): 135 | if not instance.pk or raw: 136 | # Ignore unsaved instances or raw imports 137 | return 138 | current_url = instance.get_absolute_url() 139 | previous_url = sender.objects.get(pk=instance.pk).get_absolute_url() 140 | setattr(instance, '__previous_url', previous_url) 141 | if previous_url == current_url: 142 | return 143 | else: 144 | if previous_url is not None: 145 | old_urls = Url.objects.filter(url__startswith=previous_url) 146 | old_urls.update(status=False, message='Broken internal link') 147 | if current_url is not None: 148 | new_urls = Url.objects.filter(url__startswith=current_url) 149 | # Mark these urls' status as False, so that post_save will check them 150 | new_urls.update(status=False, message='Should be checked now!') 151 | 152 | 153 | def instance_post_save(sender, instance, **kwargs): 154 | # Ignore raw imports 155 | if kwargs.get('raw'): 156 | return 157 | 158 | def do_instance_post_save(sender, instance, **kwargs): 159 | current_url = instance.get_absolute_url() 160 | previous_url = getattr(instance, '__previous_url', None) 161 | # We assume returning None from get_absolute_url means that this instance doesn't have a URL 162 | # Not sure if we should do the same for '' as this could refer to '/' 163 | if current_url is not None and current_url != previous_url: 164 | linklist_cls = sender._linklist 165 | active = linklist_cls.objects().filter(pk=instance.pk).count() 166 | 167 | if kwargs['created'] or (not active): 168 | new_urls = Url.objects.filter(url__startswith=current_url) 169 | else: 170 | new_urls = Url.objects.filter(status=False).filter(url__startswith=current_url) 171 | 172 | if new_urls: 173 | for url in new_urls: 174 | url.check_url() 175 | 176 | if tests_running: 177 | do_instance_post_save(sender, instance, **kwargs) 178 | else: 179 | tasks_queue.put({ 180 | 'target': do_instance_post_save, 181 | 'args': (sender, instance), 182 | 'kwargs': kwargs 183 | }) 184 | start_worker() 185 | 186 | 187 | def instance_pre_delete(sender, instance, **kwargs): 188 | instance.linkcheck_deleting = True 189 | deleted_url = instance.get_absolute_url() 190 | if deleted_url: 191 | old_urls = Url.objects.filter(url__startswith=deleted_url).exclude(status=False) 192 | if old_urls: 193 | old_urls.update(status=False, message='Broken internal link') 194 | 195 | 196 | def register_listeners(): 197 | # 1. register listeners for the objects that contain Links 198 | for linklist_name, linklist_cls in apps.get_app_config('linkcheck').all_linklists.items(): 199 | model_signals.post_save.connect(check_instance_links, sender=linklist_cls.model) 200 | model_signals.post_delete.connect(delete_instance_links, sender=linklist_cls.model) 201 | 202 | # 2. register listeners for the objects that are targets of Links, 203 | # only when get_absolute_url() is defined for the model 204 | if getattr(linklist_cls.model, 'get_absolute_url', None): 205 | model_signals.pre_save.connect(instance_pre_save, sender=linklist_cls.model) 206 | model_signals.post_save.connect(instance_post_save, sender=linklist_cls.model) 207 | model_signals.pre_delete.connect(instance_pre_delete, sender=linklist_cls.model) 208 | 209 | filebrowser.register_listeners() 210 | 211 | 212 | def unregister_listeners(): 213 | # 1. register listeners for the objects that contain Links 214 | for linklist_name, linklist_cls in apps.get_app_config('linkcheck').all_linklists.items(): 215 | model_signals.post_save.disconnect(check_instance_links, sender=linklist_cls.model) 216 | model_signals.post_delete.disconnect(delete_instance_links, sender=linklist_cls.model) 217 | 218 | # 2. register listeners for the objects that are targets of Links, 219 | # only when get_absolute_url() is defined for the model 220 | if getattr(linklist_cls.model, 'get_absolute_url', None): 221 | model_signals.pre_save.disconnect(instance_pre_save, sender=linklist_cls.model) 222 | model_signals.post_save.disconnect(instance_post_save, sender=linklist_cls.model) 223 | model_signals.pre_delete.disconnect(instance_pre_delete, sender=linklist_cls.model) 224 | 225 | filebrowser.unregister_listeners() 226 | 227 | 228 | @contextmanager 229 | def enable_listeners(*args, **kwargs): 230 | register_listeners() 231 | try: 232 | yield 233 | finally: 234 | unregister_listeners() 235 | 236 | 237 | @contextmanager 238 | def disable_listeners(*args, **kwargs): 239 | unregister_listeners() 240 | try: 241 | yield 242 | finally: 243 | register_listeners() 244 | -------------------------------------------------------------------------------- /linkcheck/locale/de/LC_MESSAGES/django.po: -------------------------------------------------------------------------------- 1 | msgid "" 2 | msgstr "" 3 | "Report-Msgid-Bugs-To: \n" 4 | "POT-Creation-Date: 2023-02-28 23:01+0100\n" 5 | "Language: German\n" 6 | "MIME-Version: 1.0\n" 7 | "Content-Type: text/plain; charset=UTF-8\n" 8 | "Content-Transfer-Encoding: 8bit\n" 9 | "Plural-Forms: nplurals=2; plural=(n != 1);\n" 10 | 11 | #: filebrowser.py:43 12 | msgid "Uploading {} has corrected {} broken link." 13 | msgid_plural "Uploading {} has corrected {} broken links." 14 | msgstr[0] "Das Hochladen von {} hat {} fehlerhaften Link korrigiert." 15 | msgstr[1] "Das Hochladen von {} hat {} fehlerhafte Links korrigiert." 16 | 17 | #: filebrowser.py:48 filebrowser.py:103 18 | msgid "Please note" 19 | msgstr "Bitte beachten Sie" 20 | 21 | #: filebrowser.py:50 filebrowser.py:105 22 | msgid "See the Link Checker for more details." 23 | msgstr "Weitere Einzelheiten finden Sie im Link Checker." 24 | 25 | #: filebrowser.py:74 26 | msgid "Renaming {} has caused {} link to break." 27 | msgid_plural "Renaming {} has caused {} links to break." 28 | msgstr[0] "" 29 | "Das Umbenennen von {} hat dazu geführt, dass {} Link nicht mehr funktioniert." 30 | msgstr[1] "" 31 | "Das Umbenennen von {} hat dazu geführt, dass {} Links nicht mehr " 32 | "funktionieren." 33 | 34 | #: filebrowser.py:79 filebrowser.py:123 35 | msgid "Warning" 36 | msgstr "Warnung" 37 | 38 | #: filebrowser.py:81 filebrowser.py:125 39 | msgid "Please use the Link Checker to fix them." 40 | msgstr "Bitte verwenden Sie den Link Checker, um sie zu korrigieren." 41 | 42 | #: filebrowser.py:98 43 | msgid "Renaming {} has corrected {} broken link." 44 | msgid_plural "Renaming {} has corrected {} broken links." 45 | msgstr[0] "Das Umbenennen von {} hat {} fehlerhaften Link korrigiert." 46 | msgstr[1] "Das Umbenennen von {} hat {} fehlerhafte Links korrigiert." 47 | 48 | #: filebrowser.py:118 49 | msgid "Deleting {} has caused {} link to break." 50 | msgid_plural "Deleting {} has caused {} links to break." 51 | msgstr[0] "" 52 | "Das Löschen von {} hat dazu geführt, dass {} Link nicht mehr funktioniert." 53 | msgstr[1] "" 54 | "Das Löschen von {} hat dazu geführt, dass {} Links nicht mehr funktionieren." 55 | 56 | #: models.py:118 57 | msgid "Working empty anchor" 58 | msgstr "Funktionierender leerer Anker" 59 | 60 | #: models.py:120 61 | msgid "Anchor could not be checked" 62 | msgstr "Anker konnte nicht geprüft werden" 63 | 64 | #: models.py:122 65 | msgid "Broken anchor" 66 | msgstr "Ungültiger Anker" 67 | 68 | #: models.py:123 69 | msgid "Working anchor" 70 | msgstr "Funktionierender Anker" 71 | 72 | #: models.py:130 73 | msgid "Insecure link" 74 | msgstr "Unsicherer Link" 75 | 76 | #: models.py:132 77 | msgid "SSL certificate could not be checked" 78 | msgstr "SSL-Zertifikat konnte nicht überprüft werden" 79 | 80 | #: models.py:134 81 | msgid "Broken SSL certificate" 82 | msgstr "Fehlerhaftes SSL-Zertifikat" 83 | 84 | #: models.py:135 85 | msgid "Valid SSL certificate" 86 | msgstr "Valides SSL-Zertifikat" 87 | 88 | #: models.py:140 89 | msgid "URL Not Yet Checked" 90 | msgstr "URL noch nicht geprüft" 91 | 92 | #: models.py:142 93 | msgid "Empty link" 94 | msgstr "Leerer Link" 95 | 96 | #: models.py:144 97 | msgid "Invalid URL" 98 | msgstr "Ungültige URL" 99 | 100 | #: models.py:146 101 | msgid "Email link" 102 | msgstr "Email-Link" 103 | 104 | #: models.py:146 models.py:148 models.py:150 105 | msgid "not automatically checked" 106 | msgstr "nicht automatisch geprüft" 107 | 108 | #: models.py:148 109 | msgid "Phone number link" 110 | msgstr "Telefonnummern-Link" 111 | 112 | #: models.py:150 113 | msgid "Anchor link" 114 | msgstr "Anker-Link" 115 | 116 | #: models.py:152 117 | msgid "Working file link" 118 | msgstr "Funktionierender Datei-Link" 119 | 120 | #: models.py:152 121 | msgid "Missing file" 122 | msgstr "Fehlende Datei" 123 | 124 | #: models.py:156 125 | msgid "Working external link" 126 | msgstr "Funktionierender externer Link" 127 | 128 | #: models.py:156 129 | msgid "Working internal link" 130 | msgstr "Funktionierender interner Link" 131 | 132 | #: models.py:160 133 | msgid "Working permanent redirect" 134 | msgstr "Funktionierende dauerhafte Weiterleitung" 135 | 136 | #: models.py:160 137 | msgid "Working temporary redirect" 138 | msgstr "Funktionierende temporäre Weiterleitung" 139 | 140 | #: models.py:162 141 | msgid "Broken permanent redirect" 142 | msgstr "Fehlerhafte dauerhafte Weiterleitung" 143 | 144 | #: models.py:162 145 | msgid "Broken temporary redirect" 146 | msgstr "Fehlerhafte temporäre Weiterleitung" 147 | 148 | #: models.py:163 149 | msgid "Broken external link" 150 | msgstr "Fehlerhafter externer Link" 151 | 152 | #: models.py:163 153 | msgid "Broken internal link" 154 | msgstr "Fehlerhafter interner Link" 155 | 156 | #: templates/linkcheck/base_linkcheck.html:5 157 | #: templates/linkcheck/base_linkcheck.html:11 158 | #: templates/linkcheck/base_linkcheck.html:17 159 | #: templates/linkcheck/coverage.html:14 160 | msgid "Link Checker" 161 | msgstr "" 162 | 163 | #: templates/linkcheck/base_linkcheck.html:10 164 | #: templates/linkcheck/coverage.html:13 165 | msgid "Home" 166 | msgstr "" 167 | 168 | #: templates/linkcheck/coverage.html:8 templates/linkcheck/coverage.html:15 169 | msgid "Coverage" 170 | msgstr "Abdeckung" 171 | 172 | #: templates/linkcheck/coverage.html:22 173 | msgid "Model" 174 | msgstr "Datenbank-Modell" 175 | 176 | #: templates/linkcheck/coverage.html:23 177 | msgid "Covered" 178 | msgstr "Überprüft" 179 | 180 | #: templates/linkcheck/coverage.html:24 181 | msgid "Suggested config" 182 | msgstr "Empfohlene Konfiguration" 183 | 184 | #: templates/linkcheck/coverage.html:30 185 | msgid "Yes,No" 186 | msgstr "Ja,Nein" 187 | 188 | #: templates/linkcheck/paginator.html:7 189 | msgid "First" 190 | msgstr "Erste" 191 | 192 | #: templates/linkcheck/paginator.html:11 templates/linkcheck/paginator.html:13 193 | msgid "Previous" 194 | msgstr "Vorherige" 195 | 196 | #: templates/linkcheck/paginator.html:17 197 | #, python-format 198 | msgid "Page %(current)s of %(max)s" 199 | msgstr "Seite %(current)s von %(max)s" 200 | 201 | #: templates/linkcheck/paginator.html:21 templates/linkcheck/paginator.html:23 202 | msgid "Next" 203 | msgstr "Nächste" 204 | 205 | #: templates/linkcheck/paginator.html:27 templates/linkcheck/paginator.html:29 206 | msgid "Last" 207 | msgstr "Letze" 208 | 209 | #: templates/linkcheck/report.html:125 210 | msgid "Show" 211 | msgstr "Anzeigen" 212 | 213 | #: templates/linkcheck/report.html:126 views.py:83 214 | msgid "Valid links" 215 | msgstr "Gültige Links" 216 | 217 | #: templates/linkcheck/report.html:127 views.py:92 218 | msgid "Broken links" 219 | msgstr "Ungültige Links" 220 | 221 | #: templates/linkcheck/report.html:128 views.py:86 222 | msgid "Untested links" 223 | msgstr "Ungetestete Links" 224 | 225 | #: templates/linkcheck/report.html:129 views.py:89 226 | msgid "Ignored links" 227 | msgstr "Ignorierte Links" 228 | 229 | #: templates/linkcheck/report.html:140 230 | #, python-format 231 | msgid "View %(content_type_name)s" 232 | msgstr "%(content_type_name)s anzeigen" 233 | 234 | #: templates/linkcheck/report.html:141 235 | #, python-format 236 | msgid "Edit %(content_type_name)s" 237 | msgstr "%(content_type_name)s bearbeiten" 238 | 239 | #: templates/linkcheck/report.html:143 240 | msgid "Destination" 241 | msgstr "Ziel" 242 | 243 | #: templates/linkcheck/report.html:144 244 | msgid "Linked Text" 245 | msgstr "Link-Text" 246 | 247 | #: templates/linkcheck/report.html:145 248 | msgid "Field to edit" 249 | msgstr "Zu bearbeitendes Feld" 250 | 251 | #: templates/linkcheck/report.html:146 252 | msgid "Status" 253 | msgstr "" 254 | 255 | #: templates/linkcheck/report.html:157 256 | msgid "Recheck" 257 | msgstr "Erneut prüfen" 258 | 259 | #: templates/linkcheck/report.html:164 260 | msgid "Ignore" 261 | msgstr "Ignorieren" 262 | 263 | #: templates/linkcheck/report.html:166 264 | msgid "Unignore" 265 | msgstr "Nicht ignorieren" 266 | 267 | #: templates/linkcheck/report.html:173 268 | msgid "Redirects to" 269 | msgstr "Leitet weiter zu" 270 | 271 | #~ msgid "Link to section on same page" 272 | #~ msgstr "Link zu Abschnitt auf derselben Seite" 273 | -------------------------------------------------------------------------------- /linkcheck/locale/fr/LC_MESSAGES/django.po: -------------------------------------------------------------------------------- 1 | # This file is distributed under the same license as the django-linkcheck package. 2 | # Paroz Claude , 2023 3 | # 4 | msgid "" 5 | msgstr "" 6 | "Project-Id-Version: django-linkcheck master\n" 7 | "Report-Msgid-Bugs-To: \n" 8 | "POT-Creation-Date: 2023-02-05 11:05+0100\n" 9 | "PO-Revision-Date: 2023-02-05 12:00+0100\n" 10 | "Last-Translator: Paroz Claude \n" 11 | "Language-Team: French\n" 12 | "Language: fr\n" 13 | "MIME-Version: 1.0\n" 14 | "Content-Type: text/plain; charset=UTF-8\n" 15 | "Content-Transfer-Encoding: 8bit\n" 16 | "Plural-Forms: nplurals=2; plural=(n > 1);\n" 17 | 18 | #: linkcheck/filebrowser.py:43 19 | msgid "Uploading {} has corrected {} broken link." 20 | msgid_plural "Uploading {} has corrected {} broken links." 21 | msgstr[0] "L’envoi de {} a corrigé {} lien brisé." 22 | msgstr[1] "L’envoi de {} a corrigé {} liens brisés." 23 | 24 | #: linkcheck/filebrowser.py:48 linkcheck/filebrowser.py:103 25 | msgid "Please note" 26 | msgstr "Prenez note" 27 | 28 | #: linkcheck/filebrowser.py:50 linkcheck/filebrowser.py:105 29 | msgid "See the Link Checker for more details." 30 | msgstr "Consultez le Contrôleur de liens pour plus de détails." 31 | 32 | #: linkcheck/filebrowser.py:74 33 | msgid "Renaming {} has caused {} link to break." 34 | msgid_plural "Renaming {} has caused {} links to break." 35 | msgstr[0] "Le renommage de {} a brisé {} lien." 36 | msgstr[1] "Le renommage de {} a brisé {} liens." 37 | 38 | #: linkcheck/filebrowser.py:79 linkcheck/filebrowser.py:123 39 | msgid "Warning" 40 | msgstr "Avertissement" 41 | 42 | #: linkcheck/filebrowser.py:81 linkcheck/filebrowser.py:125 43 | msgid "Please use the Link Checker to fix them." 44 | msgstr "Veuillez utiliser le Contrôleur de liens pour les corriger." 45 | 46 | #: linkcheck/filebrowser.py:98 47 | msgid "Renaming {} has corrected {} broken link." 48 | msgid_plural "Renaming {} has corrected {} broken links." 49 | msgstr[0] "Le renommage de {} a corrigé {} lien brisé." 50 | msgstr[1] "Le renommage de {} a corrigé {} liens brisés." 51 | 52 | #: linkcheck/filebrowser.py:118 53 | msgid "Deleting {} has caused {} link to break." 54 | msgid_plural "Deleting {} has caused {} links to break." 55 | msgstr[0] "La suppression de {} a brisé {} lien." 56 | msgstr[1] "La suppression de {} a brisé {} liens." 57 | 58 | #: linkcheck/templates/linkcheck/base_linkcheck.html:5 59 | #: linkcheck/templates/linkcheck/base_linkcheck.html:11 60 | #: linkcheck/templates/linkcheck/base_linkcheck.html:17 61 | #: linkcheck/templates/linkcheck/coverage.html:14 62 | msgid "Link Checker" 63 | msgstr "Contrôleur de liens" 64 | 65 | #: linkcheck/templates/linkcheck/base_linkcheck.html:10 66 | #: linkcheck/templates/linkcheck/coverage.html:13 67 | msgid "Home" 68 | msgstr "Accueil" 69 | 70 | #: linkcheck/templates/linkcheck/coverage.html:8 71 | #: linkcheck/templates/linkcheck/coverage.html:15 72 | msgid "Coverage" 73 | msgstr "Couverture" 74 | 75 | #: linkcheck/templates/linkcheck/coverage.html:22 76 | msgid "Model" 77 | msgstr "Modèle" 78 | 79 | #: linkcheck/templates/linkcheck/coverage.html:23 80 | msgid "Covered" 81 | msgstr "Couvert" 82 | 83 | #: linkcheck/templates/linkcheck/coverage.html:24 84 | msgid "Suggested config" 85 | msgstr "Configuration suggérée" 86 | 87 | #: linkcheck/templates/linkcheck/coverage.html:30 88 | msgid "Yes,No" 89 | msgstr "Oui,Non" 90 | 91 | #: linkcheck/templates/linkcheck/paginator.html:7 92 | msgid "First" 93 | msgstr "Première" 94 | 95 | #: linkcheck/templates/linkcheck/paginator.html:11 96 | #: linkcheck/templates/linkcheck/paginator.html:13 97 | msgid "Previous" 98 | msgstr "Précédente" 99 | 100 | #: linkcheck/templates/linkcheck/paginator.html:17 101 | #, python-format 102 | msgid "Page %(current)s of %(max)s" 103 | msgstr "Page %(current)s sur %(max)s" 104 | 105 | #: linkcheck/templates/linkcheck/paginator.html:21 106 | #: linkcheck/templates/linkcheck/paginator.html:23 107 | msgid "Next" 108 | msgstr "Suivante" 109 | 110 | #: linkcheck/templates/linkcheck/paginator.html:27 111 | #: linkcheck/templates/linkcheck/paginator.html:29 112 | msgid "Last" 113 | msgstr "Dernière" 114 | 115 | #: linkcheck/templates/linkcheck/report.html:125 116 | msgid "Show" 117 | msgstr "Afficher" 118 | 119 | #: linkcheck/templates/linkcheck/report.html:126 linkcheck/views.py:83 120 | msgid "Valid links" 121 | msgstr "Liens valables" 122 | 123 | #: linkcheck/templates/linkcheck/report.html:127 linkcheck/views.py:92 124 | msgid "Broken links" 125 | msgstr "Liens brisés" 126 | 127 | #: linkcheck/templates/linkcheck/report.html:128 linkcheck/views.py:86 128 | msgid "Untested links" 129 | msgstr "Liens non testés" 130 | 131 | #: linkcheck/templates/linkcheck/report.html:129 linkcheck/views.py:89 132 | msgid "Ignored links" 133 | msgstr "Liens ignorés" 134 | 135 | #: linkcheck/templates/linkcheck/report.html:140 136 | #, python-format 137 | msgid "View %(content_type_name)s" 138 | msgstr "Voir %(content_type_name)s" 139 | 140 | #: linkcheck/templates/linkcheck/report.html:141 141 | #, python-format 142 | msgid "Edit %(content_type_name)s" 143 | msgstr "Modifier %(content_type_name)s" 144 | 145 | #: linkcheck/templates/linkcheck/report.html:143 146 | msgid "Destination" 147 | msgstr "Destination" 148 | 149 | #: linkcheck/templates/linkcheck/report.html:144 150 | msgid "Linked Text" 151 | msgstr "Texte de lien" 152 | 153 | #: linkcheck/templates/linkcheck/report.html:145 154 | msgid "Field to edit" 155 | msgstr "Champ à modifier" 156 | 157 | #: linkcheck/templates/linkcheck/report.html:146 158 | msgid "Status" 159 | msgstr "Statut" 160 | 161 | #: linkcheck/templates/linkcheck/report.html:157 162 | msgid "Recheck" 163 | msgstr "Recontrôler" 164 | 165 | #: linkcheck/templates/linkcheck/report.html:164 166 | msgid "Ignore" 167 | msgstr "Ignorer" 168 | 169 | #: linkcheck/templates/linkcheck/report.html:166 170 | msgid "Unignore" 171 | msgstr "Ne plus ignorer" 172 | 173 | #: linkcheck/templates/linkcheck/report.html:173 174 | msgid "Redirects to" 175 | msgstr "Redirige vers" 176 | -------------------------------------------------------------------------------- /linkcheck/management/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DjangoAdminHackers/django-linkcheck/bae5ac0e140521b7f3d7b26f91afc714cfa1439b/linkcheck/management/__init__.py -------------------------------------------------------------------------------- /linkcheck/management/commands/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DjangoAdminHackers/django-linkcheck/bae5ac0e140521b7f3d7b26f91afc714cfa1439b/linkcheck/management/commands/__init__.py -------------------------------------------------------------------------------- /linkcheck/management/commands/checkexternal.py: -------------------------------------------------------------------------------- 1 | from django.core.management.base import BaseCommand 2 | 3 | from linkcheck.linkcheck_settings import ( 4 | EXTERNAL_RECHECK_INTERVAL, 5 | MAX_CHECKS_PER_RUN, 6 | ) 7 | from linkcheck.utils import check_links 8 | 9 | 10 | class Command(BaseCommand): 11 | 12 | help = 'Check and record external link status' 13 | 14 | def add_arguments(self, parser): 15 | parser.add_argument( 16 | '-e', '--externalinterval', type=int, 17 | help='Specifies the length of time in minutes until external links are rechecked. ' 18 | 'Defaults to linkcheck_config setting' 19 | ) 20 | parser.add_argument( 21 | '-l', '--limit', type=int, 22 | help='Specifies the maximum number (int) of links to be checked. ' 23 | 'Defaults to linkcheck_config setting. Value less than 1 will check all' 24 | ) 25 | 26 | def handle(self, *args, **options): 27 | externalinterval = options['externalinterval'] or EXTERNAL_RECHECK_INTERVAL 28 | limit = options.get('limit', None) or MAX_CHECKS_PER_RUN 29 | 30 | self.stdout.write(f"Checking all external links that haven't been tested for {externalinterval} minutes.") 31 | if limit != -1: 32 | self.stdout.write(f"Will run maximum of {limit} checks this run.") 33 | 34 | check_count = check_links(external_recheck_interval=externalinterval, limit=limit, check_internal=False) 35 | return f"{check_count} external URLs have been checked." 36 | -------------------------------------------------------------------------------- /linkcheck/management/commands/checkinternal.py: -------------------------------------------------------------------------------- 1 | from django.core.management.base import BaseCommand 2 | 3 | from linkcheck.linkcheck_settings import MAX_CHECKS_PER_RUN 4 | from linkcheck.utils import check_links 5 | 6 | 7 | class Command(BaseCommand): 8 | 9 | help = 'Check and record internal link status' 10 | 11 | def add_arguments(self, parser): 12 | parser.add_argument( 13 | '-l', '--limit', type=int, 14 | help='Specifies the maximum number (int) of links to be checked. ' 15 | 'Defaults to linkcheck_config setting. Value less than 1 will check all') 16 | 17 | def handle(self, *args, **options): 18 | limit = options.get('limit', None) or MAX_CHECKS_PER_RUN 19 | 20 | self.stdout.write("Checking all internal links.") 21 | if limit != -1: 22 | self.stdout.write(f"Will run maximum of {limit} checks this run.") 23 | 24 | check_count = check_links(limit=limit, check_external=False) 25 | return f"{check_count} internal URLs have been checked." 26 | -------------------------------------------------------------------------------- /linkcheck/management/commands/checklinks.py: -------------------------------------------------------------------------------- 1 | from django.core.management.base import BaseCommand 2 | 3 | from linkcheck.linkcheck_settings import ( 4 | EXTERNAL_RECHECK_INTERVAL, 5 | MAX_CHECKS_PER_RUN, 6 | ) 7 | from linkcheck.utils import check_links 8 | 9 | 10 | class Command(BaseCommand): 11 | 12 | help = 'Check and record internal and external link status' 13 | 14 | def add_arguments(self, parser): 15 | parser.add_argument( 16 | '-e', '--externalinterval', type=int, 17 | help='Specifies the length of time in minutes until external links are rechecked. ' 18 | 'Defaults to linkcheck_config setting' 19 | ) 20 | parser.add_argument( 21 | '-l', '--limit', type=int, 22 | help='Specifies the maximum number (int) of links to be checked. ' 23 | 'Defaults to linkcheck_config setting. Value less than 1 will check all' 24 | ) 25 | 26 | def handle(self, *args, **options): 27 | externalinterval = options['externalinterval'] or EXTERNAL_RECHECK_INTERVAL 28 | limit = options['limit'] or MAX_CHECKS_PER_RUN 29 | 30 | self.stdout.write(f"Checking all links that haven't been tested for {externalinterval} minutes.") 31 | if limit != -1: 32 | self.stdout.write(f"Will run maximum of {limit} checks this run.") 33 | 34 | internal_checked = check_links(limit=limit, check_external=False) 35 | external_checked = check_links(external_recheck_interval=externalinterval, limit=limit, check_internal=False) 36 | return f"{internal_checked} internal URLs and {external_checked} external URLs have been checked." 37 | -------------------------------------------------------------------------------- /linkcheck/management/commands/findlinks.py: -------------------------------------------------------------------------------- 1 | from django.core.management.base import BaseCommand 2 | 3 | from linkcheck.utils import find_all_links 4 | 5 | 6 | class Command(BaseCommand): 7 | 8 | help = ( 9 | "Goes through all models registered with Linkcheck, records any new links found" 10 | "and removes all outdated links" 11 | ) 12 | 13 | def handle(self, *args, **options): 14 | self.stdout.write("Updating all links...") 15 | return "\n".join( 16 | [ 17 | f"{model.capitalize()}: {', '.join([f'{count} {label}' for label, count in data.items()])}" 18 | for model, data in find_all_links().items() 19 | ] 20 | ) 21 | -------------------------------------------------------------------------------- /linkcheck/management/commands/linkcheck_suggest_config.py: -------------------------------------------------------------------------------- 1 | from django.apps import apps 2 | from django.core.management.base import BaseCommand, CommandError 3 | from django.utils.termcolors import make_style 4 | 5 | from linkcheck.utils import get_coverage_data, get_suggested_linklist_config 6 | 7 | 8 | class Command(BaseCommand): 9 | 10 | cyan = staticmethod(make_style(fg='cyan')) 11 | 12 | help = 'Go through all models and check whether they are registered with linkcheck' 13 | 14 | def add_arguments(self, parser): 15 | parser.add_argument( 16 | '--model', 17 | help="Generate the suggested config for this model", 18 | ) 19 | 20 | def handle(self, *args, model, **options): 21 | if model: 22 | try: 23 | model_class = apps.get_model(model) 24 | except Exception as e: 25 | raise CommandError( 26 | f'Model "{model}" does not exist.' 27 | ) from e 28 | self.stdout.write(get_suggested_linklist_config(model_class)) 29 | else: 30 | covered, uncovered = get_coverage_data() 31 | self.stdout.write('All covered models:\n') 32 | self.stdout.write(', '.join(map(self.cyan, covered))) 33 | for model, suggested_config in uncovered: 34 | self.stdout.write(f'\nSuggested config for model {model}:') 35 | self.stdout.write(self.cyan(suggested_config)) 36 | -------------------------------------------------------------------------------- /linkcheck/management/commands/unignore_links.py: -------------------------------------------------------------------------------- 1 | from django.core.management.base import BaseCommand 2 | 3 | from linkcheck.utils import unignore 4 | 5 | 6 | class Command(BaseCommand): 7 | 8 | help = "Updates the `ignore` status of all links to `False`" 9 | 10 | def execute(self, *args, **options): 11 | print("Unignoring all links") 12 | unignore() 13 | -------------------------------------------------------------------------------- /linkcheck/migrations/0001_initial.py: -------------------------------------------------------------------------------- 1 | from django.db import migrations, models 2 | 3 | 4 | class Migration(migrations.Migration): 5 | 6 | dependencies = [ 7 | ('contenttypes', '0001_initial'), 8 | ] 9 | 10 | operations = [ 11 | migrations.CreateModel( 12 | name='Link', 13 | fields=[ 14 | ('id', models.AutoField(verbose_name='ID', serialize=False, auto_created=True, primary_key=True)), 15 | ('object_id', models.PositiveIntegerField()), 16 | ('field', models.CharField(max_length=128)), 17 | ('text', models.CharField(default='', max_length=256)), 18 | ('ignore', models.BooleanField(default=False)), 19 | ('content_type', models.ForeignKey(to='contenttypes.ContentType', on_delete=models.CASCADE)), 20 | ], 21 | ), 22 | migrations.CreateModel( 23 | name='Url', 24 | fields=[ 25 | ('id', models.AutoField(verbose_name='ID', serialize=False, auto_created=True, primary_key=True)), 26 | ('url', models.CharField(unique=True, max_length=255)), 27 | ('last_checked', models.DateTimeField(null=True, blank=True)), 28 | ('status', models.BooleanField(null=True)), 29 | ('message', models.CharField(max_length=1024, null=True, blank=True)), 30 | ('still_exists', models.BooleanField(default=False)), 31 | ], 32 | ), 33 | migrations.AddField( 34 | model_name='link', 35 | name='url', 36 | field=models.ForeignKey(related_name='links', to='linkcheck.Url', on_delete=models.CASCADE), 37 | ), 38 | ] 39 | -------------------------------------------------------------------------------- /linkcheck/migrations/0002_url_redirect_to.py: -------------------------------------------------------------------------------- 1 | from django.db import migrations, models 2 | 3 | 4 | class Migration(migrations.Migration): 5 | 6 | dependencies = [ 7 | ('linkcheck', '0001_initial'), 8 | ] 9 | 10 | operations = [ 11 | migrations.AddField( 12 | model_name='url', 13 | name='redirect_to', 14 | field=models.CharField(default='', max_length=255), 15 | ), 16 | ] 17 | -------------------------------------------------------------------------------- /linkcheck/migrations/0003_redirect_to_as_textfield.py: -------------------------------------------------------------------------------- 1 | from django.db import migrations, models 2 | 3 | 4 | class Migration(migrations.Migration): 5 | 6 | dependencies = [ 7 | ('linkcheck', '0002_url_redirect_to'), 8 | ] 9 | 10 | operations = [ 11 | migrations.AlterField( 12 | model_name='url', 13 | name='redirect_to', 14 | field=models.TextField(blank=True), 15 | ), 16 | ] 17 | -------------------------------------------------------------------------------- /linkcheck/migrations/0004_remove_url_still_exists.py: -------------------------------------------------------------------------------- 1 | from django.db import migrations 2 | 3 | 4 | class Migration(migrations.Migration): 5 | 6 | dependencies = [ 7 | ('linkcheck', '0003_redirect_to_as_textfield'), 8 | ] 9 | 10 | operations = [ 11 | migrations.RemoveField( 12 | model_name='url', 13 | name='still_exists', 14 | ), 15 | ] 16 | -------------------------------------------------------------------------------- /linkcheck/migrations/0005_default_big_auto_field.py: -------------------------------------------------------------------------------- 1 | from django.db import migrations, models 2 | 3 | 4 | class Migration(migrations.Migration): 5 | 6 | dependencies = [ 7 | ('linkcheck', '0004_remove_url_still_exists'), 8 | ] 9 | 10 | operations = [ 11 | migrations.AlterField( 12 | model_name='link', 13 | name='id', 14 | field=models.BigAutoField( 15 | auto_created=True, primary_key=True, serialize=False, verbose_name='ID' 16 | ), 17 | ), 18 | migrations.AlterField( 19 | model_name='url', 20 | name='id', 21 | field=models.BigAutoField( 22 | auto_created=True, primary_key=True, serialize=False, verbose_name='ID' 23 | ), 24 | ), 25 | ] 26 | -------------------------------------------------------------------------------- /linkcheck/migrations/0006_url_add_status_code.py: -------------------------------------------------------------------------------- 1 | from django.db import migrations, models 2 | 3 | from linkcheck.models import STATUS_CODE_CHOICES 4 | 5 | 6 | class Migration(migrations.Migration): 7 | 8 | dependencies = [ 9 | ("linkcheck", "0005_default_big_auto_field"), 10 | ] 11 | 12 | operations = [ 13 | migrations.AddField( 14 | model_name="url", 15 | name="status_code", 16 | field=models.IntegerField( 17 | choices=STATUS_CODE_CHOICES, 18 | null=True, 19 | ), 20 | ), 21 | ] 22 | -------------------------------------------------------------------------------- /linkcheck/migrations/0007_url_add_redirect_status_code.py: -------------------------------------------------------------------------------- 1 | from django.db import migrations, models 2 | 3 | from linkcheck.models import STATUS_CODE_CHOICES 4 | 5 | 6 | class Migration(migrations.Migration): 7 | 8 | dependencies = [ 9 | ("linkcheck", "0006_url_add_status_code"), 10 | ] 11 | 12 | operations = [ 13 | migrations.AddField( 14 | model_name="url", 15 | name="redirect_status_code", 16 | field=models.IntegerField( 17 | choices=STATUS_CODE_CHOICES, 18 | null=True, 19 | ), 20 | ), 21 | ] 22 | -------------------------------------------------------------------------------- /linkcheck/migrations/0008_url_add_anchor_status.py: -------------------------------------------------------------------------------- 1 | from django.db import migrations, models 2 | 3 | 4 | class Migration(migrations.Migration): 5 | 6 | dependencies = [ 7 | ("linkcheck", "0007_url_add_redirect_status_code"), 8 | ] 9 | 10 | operations = [ 11 | migrations.AddField( 12 | model_name="url", 13 | name="anchor_status", 14 | field=models.BooleanField(null=True), 15 | ), 16 | ] 17 | -------------------------------------------------------------------------------- /linkcheck/migrations/0009_url_add_ssl_status.py: -------------------------------------------------------------------------------- 1 | from django.db import migrations, models 2 | 3 | 4 | class Migration(migrations.Migration): 5 | 6 | dependencies = [ 7 | ("linkcheck", "0008_url_add_anchor_status"), 8 | ] 9 | 10 | operations = [ 11 | migrations.AddField( 12 | model_name="url", 13 | name="ssl_status", 14 | field=models.BooleanField(null=True), 15 | ), 16 | ] 17 | -------------------------------------------------------------------------------- /linkcheck/migrations/0010_url_add_error_message.py: -------------------------------------------------------------------------------- 1 | from django.db import migrations, models 2 | 3 | 4 | class Migration(migrations.Migration): 5 | 6 | dependencies = [ 7 | ('linkcheck', '0009_url_add_ssl_status'), 8 | ] 9 | 10 | operations = [ 11 | migrations.AddField( 12 | model_name='url', 13 | name='error_message', 14 | field=models.CharField(blank=True, default='', max_length=1024), 15 | ), 16 | ] 17 | -------------------------------------------------------------------------------- /linkcheck/migrations/0011_link_add_content_object_index.py: -------------------------------------------------------------------------------- 1 | # Generated by Django 5.1.3 on 2024-11-25 18:00 2 | 3 | from django.db import migrations, models 4 | 5 | 6 | class Migration(migrations.Migration): 7 | 8 | dependencies = [ 9 | ('contenttypes', '0002_remove_content_type_name'), 10 | ('linkcheck', '0010_url_add_error_message'), 11 | ] 12 | 13 | operations = [ 14 | migrations.AddIndex( 15 | model_name='link', 16 | index=models.Index(fields=['content_type', 'object_id'], name='content_type_and_object_id'), 17 | ), 18 | ] 19 | -------------------------------------------------------------------------------- /linkcheck/migrations/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DjangoAdminHackers/django-linkcheck/bae5ac0e140521b7f3d7b26f91afc714cfa1439b/linkcheck/migrations/__init__.py -------------------------------------------------------------------------------- /linkcheck/models.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import os.path 3 | import re 4 | from datetime import timedelta 5 | from http import HTTPStatus 6 | from urllib.parse import unquote, urlparse 7 | 8 | import requests 9 | from django.conf import settings 10 | from django.contrib.contenttypes.fields import GenericForeignKey 11 | from django.contrib.contenttypes.models import ContentType 12 | from django.db import models 13 | from django.test.client import Client 14 | from django.test.utils import modify_settings 15 | from django.utils.encoding import iri_to_uri 16 | from django.utils.functional import cached_property 17 | from django.utils.timezone import now 18 | from django.utils.translation import gettext as _ 19 | from requests.exceptions import ConnectionError, ReadTimeout 20 | 21 | try: 22 | from reversion.revisions import revision_context_manager 23 | USE_REVERSION = True 24 | except ImportError: 25 | USE_REVERSION = False 26 | 27 | from .linkcheck_settings import ( 28 | EXTERNAL_RECHECK_INTERVAL, 29 | EXTERNAL_REGEX_STRING, 30 | LINKCHECK_CONNECTION_ATTEMPT_TIMEOUT, 31 | MAX_URL_LENGTH, 32 | MEDIA_PREFIX, 33 | SITE_DOMAINS, 34 | TOLERATE_BROKEN_ANCHOR, 35 | ) 36 | 37 | logger = logging.getLogger(__name__) 38 | 39 | 40 | EXTERNAL_REGEX = re.compile(EXTERNAL_REGEX_STRING) 41 | 42 | 43 | def html_decode(s): 44 | """ 45 | Returns the ASCII decoded version of the given HTML string. This does 46 | NOT remove normal HTML tags like

. 47 | """ 48 | html_codes = ( 49 | ("'", '''), 50 | ('"', '"'), 51 | ('>', '>'), 52 | ('<', '<'), 53 | ('&', '&') 54 | ) 55 | for code in html_codes: 56 | s = s.replace(code[1], code[0]) 57 | return s 58 | 59 | 60 | STATUS_CODE_CHOICES = [(s.value, f'{s.value} {s.phrase}') for s in HTTPStatus] 61 | DEFAULT_USER_AGENT = f'{settings.SITE_DOMAIN} Linkchecker' 62 | FALLBACK_USER_AGENT = 'Mozilla/5.0 (Windows NT 10.0; rv:91.0) Gecko/20100101 Firefox/91.0' 63 | 64 | 65 | class Url(models.Model): 66 | """ 67 | Represents a distinct URL found somewhere in the models registered with linkcheck 68 | A single Url can have multiple Links associated with it. 69 | """ 70 | # See http://www.boutell.com/newfaq/misc/urllength.html 71 | url = models.CharField(max_length=MAX_URL_LENGTH, unique=True) 72 | last_checked = models.DateTimeField(blank=True, null=True) 73 | anchor_status = models.BooleanField(null=True) 74 | ssl_status = models.BooleanField(null=True) 75 | status = models.BooleanField(null=True) 76 | status_code = models.IntegerField(choices=STATUS_CODE_CHOICES, null=True) 77 | redirect_status_code = models.IntegerField(choices=STATUS_CODE_CHOICES, null=True) 78 | message = models.CharField(max_length=1024, blank=True, null=True) 79 | error_message = models.CharField(max_length=1024, default='', blank=True) 80 | redirect_to = models.TextField(blank=True) 81 | 82 | @property 83 | def redirect_ok(self): 84 | return self.redirect_status_code < 300 if self.redirect_status_code else None 85 | 86 | @property 87 | def type(self): 88 | if self.external: 89 | return 'external' 90 | if self.url.startswith('mailto:'): 91 | return 'mailto' 92 | if self.url.startswith('tel:'): 93 | return 'phone' 94 | elif self.internal_url == '': 95 | return 'empty' 96 | elif self.internal_url.startswith('#'): 97 | return 'anchor' 98 | elif self.internal_url.startswith(MEDIA_PREFIX): 99 | return 'file' 100 | elif self.internal_url.startswith('/'): 101 | return 'internal' 102 | else: 103 | return 'invalid' 104 | 105 | @property 106 | def has_anchor(self): 107 | return '#' in self.url 108 | 109 | @property 110 | def anchor(self): 111 | return self.url.split('#')[1] if self.has_anchor else None 112 | 113 | @property 114 | def anchor_message(self): 115 | if not self.has_anchor or not self.last_checked: 116 | return '' 117 | if self.anchor == '': 118 | return _('Working empty anchor') 119 | if self.anchor_status is None: 120 | return _('Anchor could not be checked') 121 | elif self.anchor_status is False: 122 | return _('Broken anchor') 123 | return _('Working anchor') 124 | 125 | @property 126 | def ssl_message(self): 127 | if self.internal: 128 | return '' 129 | if self.external_url.startswith('http://'): 130 | return _('Insecure link') 131 | if self.ssl_status is None: 132 | return _('SSL certificate could not be checked') 133 | elif self.ssl_status is False: 134 | return _('Broken SSL certificate') 135 | return _('Valid SSL certificate') 136 | 137 | @property 138 | def get_message(self): 139 | if not self.last_checked and self.status is None: 140 | return _('URL Not Yet Checked') 141 | elif self.type == 'empty': 142 | return _('Empty link') 143 | elif self.type == 'invalid': 144 | return _('Invalid URL') 145 | elif self.type == 'mailto': 146 | return '{} ({})'.format(_("Email link"), _("not automatically checked")) 147 | elif self.type == 'phone': 148 | return '{} ({})'.format(_("Phone number link"), _("not automatically checked")) 149 | elif self.type == 'anchor': 150 | return '{} ({})'.format(_("Anchor link"), _("not automatically checked")) 151 | elif self.type == 'file': 152 | return _('Working file link') if self.status else _('Missing file') 153 | elif not self.status_code: 154 | return self.error_message 155 | elif self.status_code < 300: 156 | return _('Working external link') if self.external else _('Working internal link') 157 | elif self.status_code < 400: 158 | permanent = self.status_code in [HTTPStatus.MOVED_PERMANENTLY, HTTPStatus.PERMANENT_REDIRECT] 159 | if self.redirect_ok: 160 | return _('Working permanent redirect') if permanent else _('Working temporary redirect') 161 | else: 162 | return _('Broken permanent redirect') if permanent else _('Broken temporary redirect') 163 | return _('Broken external link') if self.external else _('Broken internal link') 164 | 165 | @property 166 | def colour(self): 167 | if not self.last_checked: 168 | return 'blue' 169 | elif self.status is True: 170 | return 'green' 171 | else: 172 | return 'red' 173 | 174 | def __str__(self): 175 | return self.url 176 | 177 | def __repr__(self): 178 | return f"" 179 | 180 | @cached_property 181 | def internal_url(self): 182 | """ 183 | Remove current domain from URLs as the test client chokes when trying to test them during a page save 184 | They shouldn't generally exist but occasionally slip through 185 | If settings.SITE_DOMAINS isn't set then use settings.SITE_DOMAIN 186 | but also check for variants: example.org, www.example.org, test.example.org 187 | 188 | In case the URLs is external, `None` is returned. 189 | """ 190 | 191 | # If the URL is not external, directly return it without processing 192 | if not EXTERNAL_REGEX.match(self.url): 193 | return self.url 194 | 195 | # May receive transformation before being checked 196 | prepared_url = self.url 197 | 198 | internal_exceptions = [] 199 | if SITE_DOMAINS: # If the setting is present 200 | internal_exceptions = SITE_DOMAINS 201 | elif getattr(settings, 'SITE_DOMAIN', None): # try using SITE_DOMAIN 202 | root_domain = settings.SITE_DOMAIN 203 | if root_domain.startswith('www.'): 204 | root_domain = root_domain[4:] 205 | elif root_domain.startswith('test.'): 206 | root_domain = root_domain[5:] 207 | internal_exceptions = [ 208 | f'{protocol}://{sub}{root_domain}' for sub in ['', 'www.', 'test.'] for protocol in ['http', 'https'] 209 | ] 210 | 211 | for ex in internal_exceptions: 212 | if ex and prepared_url.startswith(ex): 213 | prepared_url = prepared_url.replace(ex, '', 1) 214 | 215 | # If the URL is still external, return `None` 216 | if EXTERNAL_REGEX.match(prepared_url): 217 | return None 218 | 219 | logger.debug('Internal URL: %s', prepared_url) 220 | return prepared_url 221 | 222 | @cached_property 223 | def external_url(self): 224 | """ 225 | Prepare an external URL to be checked with requests: 226 | - Remove hash anchors 227 | - Ensure correct encoding 228 | """ 229 | # If the URL is internal, return `None` 230 | if self.internal: 231 | return None 232 | 233 | # Encode path and query and remove anchor fragment 234 | parsed = urlparse(self.url) 235 | external_url = parsed._replace( 236 | path=iri_to_uri(parsed.path), 237 | query=iri_to_uri(parsed.query), 238 | fragment="" 239 | ).geturl() 240 | 241 | logger.debug('External URL: %s', external_url) 242 | return external_url 243 | 244 | @property 245 | def internal(self): 246 | """ 247 | Check whether this URL is internal 248 | """ 249 | return self.internal_url is not None 250 | 251 | @property 252 | def external(self): 253 | """ 254 | Check whether this URL is external 255 | """ 256 | return not self.internal 257 | 258 | def reset_for_check(self): 259 | """ 260 | Reset all fields which depend on the status after checking a URL. 261 | This is done to ensure that results from the last check do not remain if the fields are not overwritten. 262 | """ 263 | # Reset all database fields 264 | self.anchor_status = None 265 | self.status = None 266 | self.status_code = None 267 | self.redirect_status_code = None 268 | self.ssl_status = None 269 | self.error_message = '' 270 | self.message = '' 271 | 272 | def check_url(self, check_internal=True, check_external=True, external_recheck_interval=EXTERNAL_RECHECK_INTERVAL): 273 | """ 274 | Return: 275 | * True if the link was checked and found valid 276 | * False if the link was checked and found invalid 277 | * None if the link was not checked 278 | """ 279 | 280 | if check_internal and self.internal: 281 | return self.check_internal() 282 | elif check_external and self.external: 283 | return self.check_external(external_recheck_interval) 284 | else: 285 | return None 286 | 287 | def check_internal(self): 288 | """ 289 | Check an internal URL 290 | """ 291 | if not self.internal: 292 | logger.info('URL %r is not internal', self) 293 | return None 294 | 295 | logger.debug('checking internal link: %s', self.internal_url) 296 | 297 | # Reset all fields in case they were already set 298 | self.reset_for_check() 299 | 300 | from linkcheck.utils import LinkCheckHandler 301 | 302 | if self.type == 'empty': 303 | self.status = False 304 | self.message = 'Empty link' 305 | 306 | elif self.type == 'mailto': 307 | self.message = 'Email link (not automatically checked)' 308 | 309 | elif self.type == 'phone': 310 | self.message = 'Phone number (not automatically checked)' 311 | 312 | elif self.type == 'anchor': 313 | self.message = 'Link to within the same page (not automatically checked)' 314 | 315 | elif self.type == 'file': 316 | # TODO: Assumes a direct mapping from media url to local filesystem path. 317 | # This will break quite easily for alternate setups 318 | path = settings.MEDIA_ROOT + unquote(self.internal_url)[len(MEDIA_PREFIX) - 1:] 319 | decoded_path = html_decode(path) 320 | self.status = os.path.exists(path) or os.path.exists(decoded_path) 321 | self.message = 'Working file link' if self.status else 'Missing Document' 322 | 323 | elif self.type == 'internal': 324 | old_prepend_setting = settings.PREPEND_WWW 325 | settings.PREPEND_WWW = False 326 | c = Client() 327 | c.handler = LinkCheckHandler() 328 | with modify_settings(ALLOWED_HOSTS={'append': 'testserver'}): 329 | response = c.get(self.internal_url) 330 | self.status_code = response.status_code 331 | if response.status_code < 300: 332 | self.message = 'Working internal link' 333 | self.status = True 334 | elif response.status_code < 400: 335 | initial_location = response.get('Location') 336 | redirect_type = "permanent" if response.status_code == 301 else "temporary" 337 | with modify_settings(ALLOWED_HOSTS={'append': 'testserver'}): 338 | response = c.get(self.internal_url, follow=True) 339 | if response.redirect_chain: 340 | self.redirect_to, _ = response.redirect_chain[-1] 341 | else: 342 | self.redirect_to = initial_location 343 | self.redirect_status_code = response.status_code 344 | self.status = response.status_code < 300 345 | redirect_result = "Working" if self.status else "Broken" 346 | self.message = f'{redirect_result} {redirect_type} redirect' 347 | else: 348 | self.status = False 349 | self.message = 'Broken internal link' 350 | 351 | # Check the anchor (if it exists) 352 | self.check_anchor(response.content) 353 | 354 | settings.PREPEND_WWW = old_prepend_setting 355 | else: 356 | self.status = False 357 | self.message = 'Invalid URL' 358 | 359 | if USE_REVERSION: 360 | # using test client will clear the RevisionContextManager stack. 361 | revision_context_manager.start() 362 | 363 | self.last_checked = now() 364 | self.save() 365 | return self.status 366 | 367 | def check_external(self, external_recheck_interval=EXTERNAL_RECHECK_INTERVAL): 368 | """ 369 | Check an external URL 370 | """ 371 | if not self.external: 372 | logger.info('URL %r is not external', self) 373 | return None 374 | 375 | logger.info('checking external link: %s', self.url) 376 | external_recheck_datetime = now() - timedelta(minutes=external_recheck_interval) 377 | 378 | if self.last_checked and (self.last_checked > external_recheck_datetime): 379 | logger.debug( 380 | 'URL was last checked in the last %s minutes, so not checking it again', 381 | external_recheck_interval 382 | ) 383 | return self.status 384 | 385 | # Reset all fields in case they were already set 386 | self.reset_for_check() 387 | 388 | request_params = { 389 | 'allow_redirects': True, 390 | 'headers': {'User-Agent': DEFAULT_USER_AGENT}, 391 | 'timeout': LINKCHECK_CONNECTION_ATTEMPT_TIMEOUT, 392 | 'verify': True, 393 | } 394 | try: 395 | try: 396 | # At first try a HEAD request 397 | fetch = requests.head 398 | response = fetch(self.external_url, **request_params) 399 | # If no exceptions occur, the SSL certificate is valid 400 | if self.external_url.startswith('https://'): 401 | self.ssl_status = True 402 | except ConnectionError as e: 403 | # This error could also be caused by an incomplete root certificate bundle, 404 | # so let's retry without verifying the certificate 405 | if "unable to get local issuer certificate" in str(e): 406 | request_params['verify'] = False 407 | response = fetch(self.external_url, **request_params) 408 | else: 409 | # Re-raise exception if it's definitely not a false positive 410 | raise 411 | # If HEAD is not allowed, let's try with GET 412 | if response.status_code in [HTTPStatus.BAD_REQUEST, HTTPStatus.METHOD_NOT_ALLOWED]: 413 | logger.debug('HEAD is not allowed, retry with GET') 414 | fetch = requests.get 415 | response = fetch(self.external_url, **request_params) 416 | # If access is denied, possibly the user agent is blocked 417 | if response.status_code == HTTPStatus.FORBIDDEN: 418 | logger.debug('Forbidden, retry with different user agent') 419 | request_params['headers'] = {'User-Agent': FALLBACK_USER_AGENT} 420 | response = fetch(self.external_url, **request_params) 421 | # If URL contains hash anchor and is a valid HTML document, let's repeat with GET 422 | elif ( 423 | self.has_anchor and 424 | response.ok and 425 | fetch == requests.head and 426 | 'text/html' in response.headers.get('content-type') 427 | ): 428 | logger.debug('Retrieve content for anchor check') 429 | fetch = requests.get 430 | response = fetch(self.external_url, **request_params) 431 | except ReadTimeout: 432 | self.status = False 433 | self.message = 'Other Error: The read operation timed out' 434 | self.error_message = 'The read operation timed out' 435 | except ConnectionError as e: 436 | self.status = False 437 | self.message = self.error_message = format_connection_error(e) 438 | if 'SSLError' in str(e): 439 | self.ssl_status = False 440 | except Exception as e: 441 | self.status = False 442 | self.message = f'Other Error: {e}' 443 | self.error_message = str(e) 444 | else: 445 | self.status = response.status_code < 300 446 | self.message = f"{response.status_code} {response.reason}" 447 | logger.debug('Response message: %s', self.message) 448 | 449 | # If initial response was a redirect, return the initial return code 450 | if response.history: 451 | logger.debug('Redirect history: %r', response.history) 452 | if response.ok: 453 | self.message = f'{response.history[0].status_code} {response.history[0].reason}' 454 | self.redirect_to = response.url 455 | self.redirect_status_code = response.status_code 456 | self.status_code = response.history[0].status_code 457 | else: 458 | self.status_code = response.status_code 459 | 460 | # Check the anchor (if it exists) 461 | if fetch == requests.get: 462 | self.check_anchor(response.text) 463 | if not request_params['verify']: 464 | self.message += ', SSL certificate could not be verified' 465 | 466 | # When a rate limit was hit or the server returned an internal error, do not update 467 | # the last_checked date so the result is not cached for EXTERNAL_RECHECK_INTERVAL minutes 468 | if ( 469 | not self.status_code or 470 | self.status_code != HTTPStatus.TOO_MANY_REQUESTS and 471 | self.status_code < 500 472 | ): 473 | self.last_checked = now() 474 | self.save() 475 | return self.status 476 | 477 | def check_anchor(self, html): 478 | from linkcheck import parse_anchors 479 | 480 | scope = "internal" if self.internal else "external" 481 | 482 | # Only check when the URL contains an anchor 483 | if self.has_anchor: 484 | # Empty fragment '#' is always valid 485 | if not self.anchor: 486 | self.anchor_status = True 487 | self.message += f', working {scope} hash anchor' 488 | else: 489 | try: 490 | names = parse_anchors(html) 491 | # Known possible errors include: AssertionError, NotImplementedError, UnicodeDecodeError 492 | except Exception as e: 493 | logger.debug( 494 | '%s while parsing anchors: %s', 495 | type(e).__name__, 496 | e 497 | ) 498 | self.message += ', failed to parse HTML for anchor' 499 | if not TOLERATE_BROKEN_ANCHOR: 500 | self.status = False 501 | else: 502 | if self.anchor in names: 503 | self.anchor_status = True 504 | self.message += f', working {scope} hash anchor' 505 | else: 506 | self.anchor_status = False 507 | self.message += f', broken {scope} hash anchor' 508 | if not TOLERATE_BROKEN_ANCHOR: 509 | self.status = False 510 | return self.anchor_status, self.anchor_message 511 | 512 | 513 | class Link(models.Model): 514 | """ 515 | A Link represents a specific URL in a specific field in a specific model 516 | It can be come from a single field such as a URLField or a field containing multiple links 517 | Such as a HTML or Rich Text field. 518 | Multiple Links can reference a single Url 519 | """ 520 | content_type = models.ForeignKey(ContentType, on_delete=models.CASCADE) 521 | object_id = models.PositiveIntegerField() 522 | content_object = GenericForeignKey('content_type', 'object_id') 523 | field = models.CharField(max_length=128) 524 | url = models.ForeignKey(Url, related_name="links", on_delete=models.CASCADE) 525 | text = models.CharField(max_length=256, default='') 526 | ignore = models.BooleanField(default=False) 527 | 528 | class Meta: 529 | indexes = [ 530 | models.Index(fields=["content_type", "object_id"], name="content_type_and_object_id"), 531 | ] 532 | 533 | @property 534 | def display_url(self): 535 | # when page /test/ has a anchor link to /test/#anchor, we display it 536 | # as "#anchor" rather than "/test/#anchor" 537 | if self.url.url.count('#') and hasattr(self.content_object, 'get_absolute_url'): 538 | url_part, anchor_part = self.url.url.split('#') 539 | absolute_url = self.content_object.get_absolute_url() 540 | if url_part == absolute_url: 541 | return '#' + anchor_part 542 | return self.url.url 543 | 544 | def __str__(self): 545 | return f"{self.url.url} ({self.content_object})" 546 | 547 | def __repr__(self): 548 | return f"" 549 | 550 | 551 | def link_post_delete(sender, instance, **kwargs): 552 | try: 553 | # url.delete() => link.delete() => link_post_delete 554 | # in this case link.url is already deleted from db, so we need a try here. 555 | url = instance.url 556 | count = url.links.all().count() 557 | if count == 0: 558 | logger.debug('This was the last link for %r, so deleting it', url) 559 | url.delete() 560 | except Url.DoesNotExist: 561 | pass 562 | 563 | 564 | def format_connection_error(e): 565 | """ 566 | Helper function to provide better readable output of connection errors 567 | """ 568 | # If the exception message is wrapped in an "HTTPSConnectionPool", only give the underlying cause 569 | reason = re.search(r"\(Caused by ([a-zA-Z]+\(.+\))\)", str(e)) 570 | if not reason: 571 | return f"Connection Error: {e}" 572 | reason = reason[1] 573 | # If the underlying cause is a new connection error, provide additional formatting 574 | if reason.startswith("NewConnectionError"): 575 | return format_new_connection_error(reason) 576 | # If the underlying cause is a name resolution error, provide additional formatting 577 | if reason.startswith("NameResolutionError"): 578 | return format_name_resolution_error(reason) 579 | # If the underlying cause is an SSL error, provide additional formatting 580 | if reason.startswith("SSLError"): 581 | return format_ssl_error(reason) 582 | return f"Connection Error: {reason}" 583 | 584 | 585 | def format_new_connection_error(reason): 586 | """ 587 | Helper function to provide better readable output of new connection errors thrown by urllib3 588 | """ 589 | connection_reason = re.search( 590 | r"NewConnectionError\(': (.+)'\)", 591 | reason, 592 | ) 593 | if connection_reason: 594 | return f"New Connection Error: {connection_reason[1]}" 595 | return reason 596 | 597 | 598 | def format_name_resolution_error(reason): 599 | """ 600 | Helper function to provide better readable output of name resolution errors thrown by urllib3 601 | """ 602 | resolution_reason = re.search( 603 | r"NameResolutionError\([\"']: (.+)[\"']\)", 604 | reason, 605 | ) 606 | if resolution_reason: 607 | return f"Name Resolution Error: {resolution_reason[1]}" 608 | return reason 609 | 610 | 611 | def format_ssl_error(reason): 612 | """ 613 | Helper function to provide better readable output of SSL errors thrown by urllib3 614 | """ 615 | ssl_reason = re.search(r"SSLError\([a-zA-Z]+\((.+)\)\)", reason) 616 | if ssl_reason: 617 | # If the reason lies withing the ssl c library, hide additional debug output 618 | ssl_c_reason = re.search(r"1, '\[SSL: [A-Z\d_]+\] (.+) \(_ssl\.c:\d+\)'", ssl_reason[1]) 619 | if ssl_c_reason: 620 | return f"SSL Error: {ssl_c_reason[1]}" 621 | return f"SSL Error: {ssl_reason[1]}" 622 | return reason 623 | -------------------------------------------------------------------------------- /linkcheck/templates/linkcheck/base_linkcheck.html: -------------------------------------------------------------------------------- 1 | {% extends "admin/change_list.html" %} 2 | {% load i18n %} 3 | 4 | {% block title %} 5 | {% translate "Link Checker" %} {{ block.super }} 6 | {% endblock %} 7 | 8 | {% block breadcrumbs %} 9 |

13 | {% endblock %} 14 | 15 | {% block content %} 16 |
17 |

{% translate "Link Checker" %}

18 |
19 | {% block innercontent %} 20 | {% endblock %} 21 |
22 |
23 | {% endblock %} 24 | -------------------------------------------------------------------------------- /linkcheck/templates/linkcheck/paginator.html: -------------------------------------------------------------------------------- 1 | {% load i18n %} 2 |
3 | 4 | {% if pages.number > 1 %} 5 | < First 6 | {% else %} 7 | < {% translate "First" %} 8 | {% endif %} 9 | 10 | {% if pages.has_previous %} 11 | << {% translate "Previous" %} 12 | {% else %} 13 | << {% translate "Previous" %} 14 | {% endif %} 15 | 16 | 17 | {% blocktrans with current=pages.number max=pages.paginator.num_pages %}Page {{ current }} of {{ max }}{% endblocktrans %} 18 | 19 | 20 | {% if pages.has_next %} 21 | {% translate "Next" %} >> 22 | {% else %} 23 | {% translate "Next" %} >> 24 | {% endif %} 25 | 26 | {% if pages.number != pages.paginator.num_pages %} 27 | {% translate "Last" %} > 28 | {% else %} 29 | {% translate "Last" %} > 30 | {% endif %} 31 | 32 |
33 | -------------------------------------------------------------------------------- /linkcheck/templates/linkcheck/report.html: -------------------------------------------------------------------------------- 1 | {% extends "linkcheck/base_linkcheck.html" %} 2 | {% load i18n %} 3 | {% load linkcheck_model_tags %} 4 | {% block extrahead %} 5 | {{ block.super }} 6 | 63 | 64 | 120 | {% endblock %} 121 | 122 | {% block innercontent %} 123 | 124 |
125 | {% translate "Show" %}:   126 | {% if filter == 'show_valid' %}{% translate "Valid links" %}{% else %}{% translate "Valid links" %}{% endif %}   127 | {% if filter == 'show_invalid' %}{% translate "Broken links" %}{% else %}{% translate "Broken links" %}{% endif %}   128 | {% if filter == 'show_unchecked' %}{% translate "Untested links" %}{% else %}{% translate "Untested links" %}{% endif %}   129 | {% if filter == 'ignored' %}{% translate "Ignored links" %}{% else %}{% translate "Ignored links" %}{% endif %} 130 | ({{ ignored_count }}) 131 |
132 |
133 | 134 | {% if content_types_list %} 135 | {% for content_type in content_types_list %} 136 | 137 | 181 |

{{content_type.content_type|get_verbose_name_plural}}

138 | {% for object in content_type.object_list %} 139 |
140 |

{{report_type}} in '{{object.object}}'

   141 | {% blocktrans with content_type_name=content_type.content_type.name %}View {{ content_type_name }}{% endblocktrans %}   142 | {% if object.admin_url %}{% blocktrans with content_type_name=content_type.content_type.name %}Edit {{ content_type_name }}{% endblocktrans %}{% endif %} 143 | 144 | 145 | 146 | 147 | 148 | 149 | 150 | {% for link in object.link_list %} 151 | 152 | 153 | 154 | 155 | 156 | 161 | 172 | 173 | {% if link.url.redirect_to %} 174 | 175 | {% endif %} 176 | {% endfor %} 177 | 178 |
179 | {% endfor %} 180 |
182 | {% endfor %} 183 | {% else %} 184 |

No results

185 | {% endif %} 186 | {% csrf_token %} 187 | {% if content_types_list %} 188 | {% include "linkcheck/paginator.html" %} 189 | {% endif %} 190 | {% endblock %} 191 | -------------------------------------------------------------------------------- /linkcheck/templatetags/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DjangoAdminHackers/django-linkcheck/bae5ac0e140521b7f3d7b26f91afc714cfa1439b/linkcheck/templatetags/__init__.py -------------------------------------------------------------------------------- /linkcheck/templatetags/linkcheck_model_tags.py: -------------------------------------------------------------------------------- 1 | from django import template 2 | 3 | register = template.Library() 4 | 5 | 6 | @register.filter 7 | def get_verbose_name_plural(content_type): 8 | """ 9 | Returns verbose_name_plural for a content type. 10 | """ 11 | return content_type.model_class()._meta.verbose_name_plural.title() 12 | -------------------------------------------------------------------------------- /linkcheck/tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DjangoAdminHackers/django-linkcheck/bae5ac0e140521b7f3d7b26f91afc714cfa1439b/linkcheck/tests/__init__.py -------------------------------------------------------------------------------- /linkcheck/tests/media/found: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DjangoAdminHackers/django-linkcheck/bae5ac0e140521b7f3d7b26f91afc714cfa1439b/linkcheck/tests/media/found -------------------------------------------------------------------------------- /linkcheck/tests/sampleapp/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DjangoAdminHackers/django-linkcheck/bae5ac0e140521b7f3d7b26f91afc714cfa1439b/linkcheck/tests/sampleapp/__init__.py -------------------------------------------------------------------------------- /linkcheck/tests/sampleapp/fixture.json: -------------------------------------------------------------------------------- 1 | [ 2 | { 3 | "model": "sampleapp.Page", 4 | "pk": 1, 5 | "fields": { 6 | "book": 1 7 | } 8 | }, 9 | { 10 | "model": "sampleapp.Book", 11 | "pk": 1, 12 | "fields": { 13 | "title": "My Title", 14 | "description": "My description" 15 | } 16 | } 17 | ] 18 | -------------------------------------------------------------------------------- /linkcheck/tests/sampleapp/linklists.py: -------------------------------------------------------------------------------- 1 | from django.db.models import OuterRef, Subquery 2 | 3 | from linkcheck import Linklist 4 | from linkcheck.tests.sampleapp.models import Author, Book, Journal, Page 5 | 6 | 7 | class BookLinklist(Linklist): 8 | """ Class to let linkcheck app discover fields containing links """ 9 | model = Book 10 | object_filter = {} 11 | html_fields = ['description'] 12 | 13 | 14 | class PageLinklist(Linklist): 15 | """ Class to let linkcheck app discover fields containing links """ 16 | model = Page 17 | 18 | 19 | class AuthorLinklist(Linklist): 20 | """ Class to let linkcheck app discover fields containing links """ 21 | model = Author 22 | object_filter = {} 23 | url_fields = ['website'] 24 | 25 | 26 | class JournalLinklist(Linklist): 27 | """ Class to let linkcheck app discover fields containing links """ 28 | model = Journal 29 | html_fields = ['description'] 30 | 31 | @classmethod 32 | def filter_callable(cls, objects): 33 | latest = Journal.objects.filter(title=OuterRef('title')).order_by('-version') 34 | return objects.filter(version=Subquery(latest.values('version')[:1])) 35 | 36 | 37 | linklists = { 38 | 'Books': BookLinklist, 39 | 'Pages': PageLinklist, 40 | 'Authors': AuthorLinklist, 41 | 'Journals': JournalLinklist, 42 | } 43 | -------------------------------------------------------------------------------- /linkcheck/tests/sampleapp/models.py: -------------------------------------------------------------------------------- 1 | from django.db import models 2 | 3 | 4 | class Book(models.Model): 5 | title = models.CharField(max_length=50) 6 | description = models.TextField() 7 | 8 | def get_absolute_url(self): 9 | return f"/book/{self.id}/" 10 | 11 | 12 | class Page(models.Model): 13 | book = models.ForeignKey(Book, on_delete=models.CASCADE) 14 | 15 | def get_absolute_url(self): 16 | return f"/book/{self.book.id}/{self.id}" 17 | 18 | 19 | class Author(models.Model): 20 | # This model has purposefully no get_absolute_url 21 | name = models.CharField(max_length=50) 22 | website = models.URLField(blank=True) 23 | 24 | 25 | class Journal(models.Model): 26 | title = models.CharField(max_length=50) 27 | description = models.TextField() 28 | version = models.PositiveIntegerField(default=0) 29 | 30 | 31 | class UncoveredModel(models.Model): 32 | book = models.ForeignKey(Book, on_delete=models.CASCADE) 33 | website = models.URLField(blank=True) 34 | 35 | def get_absolute_url(self): 36 | return f'/uncovered/{self.id}' 37 | -------------------------------------------------------------------------------- /linkcheck/tests/sampleapp/views.py: -------------------------------------------------------------------------------- 1 | import time 2 | 3 | from django.core.exceptions import PermissionDenied 4 | from django.http import ( 5 | HttpResponse, 6 | HttpResponsePermanentRedirect, 7 | HttpResponseRedirect, 8 | ) 9 | 10 | 11 | def http_response(request, code): 12 | return HttpResponse("", status=int(code)) 13 | 14 | 15 | def http_response_get_only(request, code): 16 | status = int(code) if request.method == 'HEAD' else 200 17 | return HttpResponse("", status=status) 18 | 19 | 20 | def http_block_user_agent(request, block_head=False): 21 | if block_head and request.method == 'HEAD': 22 | return HttpResponse('', status=405) 23 | if 'Linkchecker' in request.headers.get('User-Agent', ''): 24 | raise PermissionDenied() 25 | return HttpResponse('') 26 | 27 | 28 | def http_redirect(request, code): 29 | return HttpResponseRedirect("/http/200/", status=int(code)) 30 | 31 | 32 | def http_redirect_to_404(request): 33 | return HttpResponsePermanentRedirect("/http/404/") 34 | 35 | 36 | def timeout(request): 37 | time.sleep(2) 38 | return HttpResponse("") 39 | 40 | 41 | def http_response_with_anchor(request): 42 | return HttpResponse("

Anchor

") 43 | 44 | 45 | def http_redirect_to_anchor(request): 46 | return HttpResponseRedirect("/http/anchor/") 47 | 48 | 49 | def static_video(request): 50 | return HttpResponse(b'', content_type='video/mp4') 51 | 52 | 53 | def static_video_forged_content_type(request): 54 | return HttpResponse(b': " 452 | "Failed to resolve 'name-resolution-error.example.com' ([Errno -2] Name or service not known)\"))" 453 | ) 454 | mocked_url = 'https://name-resolution-error.example.com/' 455 | mocker.register_uri('HEAD', mocked_url, exc=exc), 456 | uv = Url(url=mocked_url) 457 | uv.check_url() 458 | formatted_message = ( 459 | "Name Resolution Error: Failed to resolve 'name-resolution-error.example.com' " 460 | "([Errno -2] Name or service not known)" 461 | ) 462 | self.assertEqual(uv.message, formatted_message) 463 | self.assertEqual(uv.get_message, formatted_message) 464 | self.assertEqual(uv.error_message, formatted_message) 465 | self.assertEqual(uv.status, False) 466 | self.assertEqual(uv.anchor_message, '') 467 | self.assertEqual(uv.ssl_status, None) 468 | self.assertEqual(uv.ssl_message, 'SSL certificate could not be checked') 469 | self.assertEqual(uv.get_status_code_display(), None) 470 | self.assertEqual(uv.get_redirect_status_code_display(), None) 471 | self.assertEqual(uv.redirect_to, '') 472 | self.assertEqual(uv.type, 'external') 473 | 474 | def test_external_check_200_utf8(self): 475 | uv = Url(url=f"{self.live_server_url}/http/200/r%C3%BCckmeldung/") 476 | uv.check_url() 477 | self.assertEqual(uv.message, '200 OK') 478 | self.assertEqual(uv.get_message, 'Working external link') 479 | self.assertEqual(uv.error_message, '') 480 | self.assertEqual(uv.status, True) 481 | self.assertEqual(uv.anchor_message, '') 482 | self.assertEqual(uv.ssl_status, None) 483 | self.assertEqual(uv.ssl_message, 'Insecure link') 484 | self.assertEqual(uv.get_status_code_display(), '200 OK') 485 | self.assertEqual(uv.get_redirect_status_code_display(), None) 486 | self.assertEqual(uv.redirect_to, '') 487 | self.assertEqual(uv.type, 'external') 488 | 489 | def test_external_check_200_utf8_not_encoded(self): 490 | uv = Url(url=f"{self.live_server_url}/http/200/rückmeldung/") 491 | uv.check_url() 492 | self.assertEqual(uv.message, '200 OK') 493 | self.assertEqual(uv.get_message, 'Working external link') 494 | self.assertEqual(uv.error_message, '') 495 | self.assertEqual(uv.status, True) 496 | self.assertEqual(uv.anchor_message, '') 497 | self.assertEqual(uv.ssl_status, None) 498 | self.assertEqual(uv.ssl_message, 'Insecure link') 499 | self.assertEqual(uv.get_status_code_display(), '200 OK') 500 | self.assertEqual(uv.get_redirect_status_code_display(), None) 501 | self.assertEqual(uv.redirect_to, '') 502 | self.assertEqual(uv.type, 'external') 503 | 504 | @requests_mock.Mocker() 505 | def test_external_check_200_utf8_domain(self, mocker): 506 | mocker.register_uri('HEAD', 'https://xn--utf8-test--z5a0txc.example.com/', reason='OK'), 507 | uv = Url(url='https://utf8-test-äüö.example.com/') 508 | uv.check_url() 509 | self.assertEqual(uv.message, '200 OK') 510 | self.assertEqual(uv.get_message, 'Working external link') 511 | self.assertEqual(uv.error_message, '') 512 | self.assertEqual(uv.status, True) 513 | self.assertEqual(uv.anchor_message, '') 514 | self.assertEqual(uv.ssl_status, True) 515 | self.assertEqual(uv.ssl_message, 'Valid SSL certificate') 516 | self.assertEqual(uv.get_status_code_display(), '200 OK') 517 | self.assertEqual(uv.get_redirect_status_code_display(), None) 518 | self.assertEqual(uv.redirect_to, '') 519 | self.assertEqual(uv.type, 'external') 520 | 521 | @requests_mock.Mocker() 522 | def test_external_check_200_punycode_domain(self, mocker): 523 | punycode_domain = 'https://xn--utf8-test--z5a0txc.example.com/' 524 | mocker.register_uri('HEAD', punycode_domain, reason='OK'), 525 | uv = Url(url=punycode_domain) 526 | uv.check_url() 527 | self.assertEqual(uv.message, '200 OK') 528 | self.assertEqual(uv.get_message, 'Working external link') 529 | self.assertEqual(uv.error_message, '') 530 | self.assertEqual(uv.status, True) 531 | self.assertEqual(uv.anchor_message, '') 532 | self.assertEqual(uv.ssl_status, True) 533 | self.assertEqual(uv.ssl_message, 'Valid SSL certificate') 534 | self.assertEqual(uv.get_status_code_display(), '200 OK') 535 | self.assertEqual(uv.get_redirect_status_code_display(), None) 536 | self.assertEqual(uv.redirect_to, '') 537 | self.assertEqual(uv.type, 'external') 538 | 539 | def test_external_check_301(self): 540 | uv = Url(url=f"{self.live_server_url}/http/301/") 541 | uv.check_url() 542 | self.assertEqual(uv.message, '301 Moved Permanently') 543 | self.assertEqual(uv.get_message, 'Broken permanent redirect') 544 | self.assertEqual(uv.error_message, '') 545 | self.assertEqual(uv.status, False) 546 | self.assertEqual(uv.anchor_message, '') 547 | self.assertEqual(uv.ssl_status, None) 548 | self.assertEqual(uv.ssl_message, 'Insecure link') 549 | self.assertEqual(uv.get_status_code_display(), '301 Moved Permanently') 550 | self.assertEqual(uv.get_redirect_status_code_display(), None) 551 | self.assertEqual(uv.redirect_to, '') 552 | self.assertEqual(uv.type, 'external') 553 | 554 | def test_external_check_301_followed(self): 555 | uv = Url(url=f"{self.live_server_url}/http/redirect/301/") 556 | uv.check_url() 557 | self.assertEqual(uv.message, '301 Moved Permanently') 558 | self.assertEqual(uv.get_message, 'Working permanent redirect') 559 | self.assertEqual(uv.error_message, '') 560 | self.assertEqual(uv.status, True) 561 | self.assertEqual(uv.anchor_message, '') 562 | self.assertEqual(uv.ssl_status, None) 563 | self.assertEqual(uv.ssl_message, 'Insecure link') 564 | self.assertEqual(uv.get_status_code_display(), '301 Moved Permanently') 565 | self.assertEqual(uv.get_redirect_status_code_display(), '200 OK') 566 | self.assertEqual(uv.redirect_to, f'{self.live_server_url}/http/200/') 567 | self.assertEqual(uv.type, 'external') 568 | 569 | def test_external_check_302_followed(self): 570 | uv = Url(url=f"{self.live_server_url}/http/redirect/302/") 571 | uv.check_url() 572 | self.assertEqual(uv.message, '302 Found') 573 | self.assertEqual(uv.get_message, 'Working temporary redirect') 574 | self.assertEqual(uv.error_message, '') 575 | self.assertEqual(uv.status, True) 576 | self.assertEqual(uv.anchor_message, '') 577 | self.assertEqual(uv.ssl_status, None) 578 | self.assertEqual(uv.ssl_message, 'Insecure link') 579 | self.assertEqual(uv.get_status_code_display(), '302 Found') 580 | self.assertEqual(uv.get_redirect_status_code_display(), '200 OK') 581 | self.assertEqual(uv.redirect_to, f'{self.live_server_url}/http/200/') 582 | self.assertEqual(uv.type, 'external') 583 | 584 | def test_external_check_404(self): 585 | uv = Url(url=f"{self.live_server_url}/whatever/") 586 | uv.check_url() 587 | self.assertEqual(uv.message, '404 Not Found') 588 | self.assertEqual(uv.get_message, 'Broken external link') 589 | self.assertEqual(uv.error_message, '') 590 | self.assertEqual(uv.status, False) 591 | self.assertEqual(uv.anchor_message, '') 592 | self.assertEqual(uv.ssl_status, None) 593 | self.assertEqual(uv.ssl_message, 'Insecure link') 594 | self.assertEqual(uv.get_status_code_display(), '404 Not Found') 595 | self.assertEqual(uv.get_redirect_status_code_display(), None) 596 | self.assertEqual(uv.redirect_to, '') 597 | self.assertEqual(uv.type, 'external') 598 | 599 | def test_external_check_redirect_final_404(self): 600 | uv = Url(url=f"{self.live_server_url}/http/redirect_to_404/") 601 | uv.check_url() 602 | self.assertEqual(uv.message, '404 Not Found') 603 | self.assertEqual(uv.get_message, 'Broken permanent redirect') 604 | self.assertEqual(uv.error_message, '') 605 | self.assertEqual(uv.status, False) 606 | self.assertEqual(uv.anchor_message, '') 607 | self.assertEqual(uv.ssl_status, None) 608 | self.assertEqual(uv.ssl_message, 'Insecure link') 609 | self.assertEqual(uv.get_status_code_display(), '301 Moved Permanently') 610 | self.assertEqual(uv.get_redirect_status_code_display(), '404 Not Found') 611 | self.assertEqual(uv.redirect_to, f'{self.live_server_url}/http/404/') 612 | self.assertEqual(uv.type, 'external') 613 | 614 | def test_external_check_get_only_405(self): 615 | # An URL that allows GET but not HEAD, linkcheck should fallback on GET. 616 | uv = Url(url=f"{self.live_server_url}/http/getonly/405/") 617 | uv.check_url() 618 | self.assertEqual(uv.message, '200 OK') 619 | self.assertEqual(uv.get_message, 'Working external link') 620 | self.assertEqual(uv.error_message, '') 621 | self.assertEqual(uv.status, True) 622 | self.assertEqual(uv.anchor_message, '') 623 | self.assertEqual(uv.ssl_status, None) 624 | self.assertEqual(uv.ssl_message, 'Insecure link') 625 | self.assertEqual(uv.get_status_code_display(), '200 OK') 626 | self.assertEqual(uv.get_redirect_status_code_display(), None) 627 | self.assertEqual(uv.redirect_to, '') 628 | self.assertEqual(uv.type, 'external') 629 | 630 | def test_external_check_get_only_400(self): 631 | uv = Url(url=f"{self.live_server_url}/http/getonly/400/") 632 | uv.check_url() 633 | self.assertEqual(uv.message, '200 OK') 634 | self.assertEqual(uv.get_message, 'Working external link') 635 | self.assertEqual(uv.error_message, '') 636 | self.assertEqual(uv.status, True) 637 | self.assertEqual(uv.anchor_message, '') 638 | self.assertEqual(uv.ssl_status, None) 639 | self.assertEqual(uv.ssl_message, 'Insecure link') 640 | self.assertEqual(uv.get_status_code_display(), '200 OK') 641 | self.assertEqual(uv.get_redirect_status_code_display(), None) 642 | self.assertEqual(uv.redirect_to, '') 643 | self.assertEqual(uv.type, 'external') 644 | 645 | def test_external_check_blocked_user_agent(self): 646 | uv = Url(url=f"{self.live_server_url}/http/block-user-agent/") 647 | uv.check_url() 648 | self.assertEqual(uv.message, '200 OK') 649 | self.assertEqual(uv.get_message, 'Working external link') 650 | self.assertEqual(uv.error_message, '') 651 | self.assertEqual(uv.status, True) 652 | self.assertEqual(uv.anchor_message, '') 653 | self.assertEqual(uv.ssl_status, None) 654 | self.assertEqual(uv.ssl_message, 'Insecure link') 655 | self.assertEqual(uv.get_status_code_display(), '200 OK') 656 | self.assertEqual(uv.get_redirect_status_code_display(), None) 657 | self.assertEqual(uv.redirect_to, '') 658 | self.assertEqual(uv.type, 'external') 659 | 660 | def test_external_check_blocked_user_agent_blocked_head(self): 661 | uv = Url(url=f"{self.live_server_url}/http/block-user-agent/block-head/") 662 | uv.check_url() 663 | self.assertEqual(uv.message, '200 OK') 664 | self.assertEqual(uv.get_message, 'Working external link') 665 | self.assertEqual(uv.error_message, '') 666 | self.assertEqual(uv.status, True) 667 | self.assertEqual(uv.anchor_message, '') 668 | self.assertEqual(uv.ssl_status, None) 669 | self.assertEqual(uv.ssl_message, 'Insecure link') 670 | self.assertEqual(uv.get_status_code_display(), '200 OK') 671 | self.assertEqual(uv.get_redirect_status_code_display(), None) 672 | self.assertEqual(uv.redirect_to, '') 673 | self.assertEqual(uv.type, 'external') 674 | 675 | def test_external_check_timedout(self): 676 | uv = Url(url=f"{self.live_server_url}/timeout/") 677 | uv.check_url() 678 | self.assertEqual(uv.message, 'Other Error: The read operation timed out') 679 | self.assertEqual(uv.get_message, 'The read operation timed out') 680 | self.assertEqual(uv.error_message, 'The read operation timed out') 681 | self.assertEqual(uv.status, False) 682 | self.assertEqual(uv.anchor_message, '') 683 | self.assertEqual(uv.ssl_status, None) 684 | self.assertEqual(uv.ssl_message, 'Insecure link') 685 | self.assertEqual(uv.get_status_code_display(), None) 686 | self.assertEqual(uv.get_redirect_status_code_display(), None) 687 | self.assertEqual(uv.redirect_to, '') 688 | self.assertEqual(uv.type, 'external') 689 | 690 | def test_external_check_rate_limit(self): 691 | uv = Url(url=f"{self.live_server_url}/http/429/") 692 | uv.check_url() 693 | self.assertEqual(uv.last_checked, None) 694 | self.assertEqual(uv.message, '429 Too Many Requests') 695 | self.assertEqual(uv.get_message, 'Broken external link') 696 | self.assertEqual(uv.status, False) 697 | self.assertEqual(uv.error_message, '') 698 | self.assertEqual(uv.anchor_message, '') 699 | self.assertEqual(uv.ssl_status, None) 700 | self.assertEqual(uv.ssl_message, 'Insecure link') 701 | self.assertEqual(uv.get_status_code_display(), '429 Too Many Requests') 702 | self.assertEqual(uv.get_redirect_status_code_display(), None) 703 | self.assertEqual(uv.redirect_to, '') 704 | self.assertEqual(uv.type, 'external') 705 | 706 | def test_working_external_anchor(self): 707 | uv = Url(url=f"{self.live_server_url}/http/anchor/#anchor") 708 | uv.check_url() 709 | self.assertEqual(uv.message, "200 OK, working external hash anchor") 710 | self.assertEqual(uv.get_message, 'Working external link') 711 | self.assertEqual(uv.error_message, '') 712 | self.assertEqual(uv.status, True) 713 | self.assertEqual(uv.anchor_message, 'Working anchor') 714 | self.assertEqual(uv.ssl_status, None) 715 | self.assertEqual(uv.ssl_message, 'Insecure link') 716 | self.assertEqual(uv.get_status_code_display(), '200 OK') 717 | self.assertEqual(uv.get_redirect_status_code_display(), None) 718 | self.assertEqual(uv.redirect_to, '') 719 | self.assertEqual(uv.type, 'external') 720 | 721 | @patch("linkcheck.models.TOLERATE_BROKEN_ANCHOR", False) 722 | def test_broken_external_anchor(self): 723 | uv = Url(url=f"{self.live_server_url}/http/anchor/#broken-anchor") 724 | uv.check_url() 725 | self.assertEqual(uv.message, "200 OK, broken external hash anchor") 726 | self.assertEqual(uv.get_message, 'Working external link') 727 | self.assertEqual(uv.error_message, '') 728 | self.assertEqual(uv.status, False) 729 | self.assertEqual(uv.anchor_message, 'Broken anchor') 730 | self.assertEqual(uv.ssl_status, None) 731 | self.assertEqual(uv.ssl_message, 'Insecure link') 732 | self.assertEqual(uv.get_status_code_display(), '200 OK') 733 | self.assertEqual(uv.get_redirect_status_code_display(), None) 734 | self.assertEqual(uv.redirect_to, '') 735 | self.assertEqual(uv.type, 'external') 736 | 737 | def test_broken_external_anchor_tolerated(self): 738 | uv = Url(url=f"{self.live_server_url}/http/anchor/#broken-anchor") 739 | uv.check_url() 740 | self.assertEqual(uv.message, "200 OK, broken external hash anchor") 741 | self.assertEqual(uv.get_message, 'Working external link') 742 | self.assertEqual(uv.error_message, '') 743 | self.assertEqual(uv.status, True) 744 | self.assertEqual(uv.anchor_message, 'Broken anchor') 745 | self.assertEqual(uv.ssl_status, None) 746 | self.assertEqual(uv.ssl_message, 'Insecure link') 747 | self.assertEqual(uv.get_status_code_display(), '200 OK') 748 | self.assertEqual(uv.get_redirect_status_code_display(), None) 749 | self.assertEqual(uv.redirect_to, '') 750 | self.assertEqual(uv.type, 'external') 751 | 752 | def test_redirect_working_external_anchor(self): 753 | uv = Url(url=f"{self.live_server_url}/http/redirect_to_anchor/#anchor") 754 | uv.check_url() 755 | self.assertEqual(uv.message, "302 Found, working external hash anchor") 756 | self.assertEqual(uv.get_message, 'Working temporary redirect') 757 | self.assertEqual(uv.error_message, '') 758 | self.assertEqual(uv.status, True) 759 | self.assertEqual(uv.anchor_message, 'Working anchor') 760 | self.assertEqual(uv.ssl_status, None) 761 | self.assertEqual(uv.ssl_message, 'Insecure link') 762 | self.assertEqual(uv.get_status_code_display(), '302 Found') 763 | self.assertEqual(uv.get_redirect_status_code_display(), '200 OK') 764 | self.assertEqual(uv.redirect_to, f'{self.live_server_url}/http/anchor/') 765 | self.assertEqual(uv.type, 'external') 766 | 767 | @patch("linkcheck.models.TOLERATE_BROKEN_ANCHOR", False) 768 | def test_redirect_broken_external_anchor(self): 769 | uv = Url(url=f"{self.live_server_url}/http/redirect_to_anchor/#broken-anchor") 770 | uv.check_url() 771 | self.assertEqual(uv.message, "302 Found, broken external hash anchor") 772 | self.assertEqual(uv.get_message, 'Working temporary redirect') 773 | self.assertEqual(uv.error_message, '') 774 | self.assertEqual(uv.status, False) 775 | self.assertEqual(uv.anchor_message, 'Broken anchor') 776 | self.assertEqual(uv.ssl_status, None) 777 | self.assertEqual(uv.ssl_message, 'Insecure link') 778 | self.assertEqual(uv.get_status_code_display(), '302 Found') 779 | self.assertEqual(uv.get_redirect_status_code_display(), '200 OK') 780 | self.assertEqual(uv.redirect_to, f'{self.live_server_url}/http/anchor/') 781 | self.assertEqual(uv.type, 'external') 782 | 783 | def test_redirect_broken_external_anchor_tolerated(self): 784 | uv = Url(url=f"{self.live_server_url}/http/redirect_to_anchor/#broken-anchor") 785 | uv.check_url() 786 | self.assertEqual(uv.message, "302 Found, broken external hash anchor") 787 | self.assertEqual(uv.get_message, 'Working temporary redirect') 788 | self.assertEqual(uv.error_message, '') 789 | self.assertEqual(uv.status, True) 790 | self.assertEqual(uv.anchor_message, 'Broken anchor') 791 | self.assertEqual(uv.ssl_status, None) 792 | self.assertEqual(uv.ssl_message, 'Insecure link') 793 | self.assertEqual(uv.get_status_code_display(), '302 Found') 794 | self.assertEqual(uv.get_redirect_status_code_display(), '200 OK') 795 | self.assertEqual(uv.redirect_to, f'{self.live_server_url}/http/anchor/') 796 | self.assertEqual(uv.type, 'external') 797 | 798 | def test_video_with_time_anchor(self): 799 | uv = Url(url=f"{self.live_server_url}/static-files/video.mp4#t=2.0") 800 | uv.check_url() 801 | self.assertEqual(uv.message, "200 OK") 802 | self.assertEqual(uv.get_message, 'Working external link') 803 | self.assertEqual(uv.error_message, '') 804 | self.assertEqual(uv.status, True) 805 | self.assertEqual(uv.anchor_message, 'Anchor could not be checked') 806 | self.assertEqual(uv.ssl_status, None) 807 | self.assertEqual(uv.ssl_message, 'Insecure link') 808 | self.assertEqual(uv.get_status_code_display(), '200 OK') 809 | self.assertEqual(uv.get_redirect_status_code_display(), None) 810 | self.assertEqual(uv.redirect_to, '') 811 | self.assertEqual(uv.type, 'external') 812 | 813 | def test_forged_video_with_time_anchor(self): 814 | uv = Url(url=f"{self.live_server_url}/static-files/fake-video.mp4#t=2.0") 815 | uv.check_url() 816 | self.assertEqual(uv.message, "200 OK, failed to parse HTML for anchor") 817 | self.assertEqual(uv.get_message, 'Working external link') 818 | self.assertEqual(uv.error_message, '') 819 | self.assertEqual(uv.status, True) 820 | self.assertEqual(uv.anchor_message, 'Anchor could not be checked') 821 | self.assertEqual(uv.ssl_status, None) 822 | self.assertEqual(uv.ssl_message, 'Insecure link') 823 | self.assertEqual(uv.get_status_code_display(), '200 OK') 824 | self.assertEqual(uv.get_redirect_status_code_display(), None) 825 | self.assertEqual(uv.redirect_to, '') 826 | self.assertEqual(uv.type, 'external') 827 | 828 | 829 | class ModelTestCase(TestCase): 830 | 831 | def test_str(self): 832 | Author.objects.create(name="John Smith", website="http://www.example.org/smith") 833 | self.assertEqual( 834 | str(Url.objects.first()), 835 | "http://www.example.org/smith", 836 | ) 837 | self.assertEqual( 838 | str(Link.objects.first()), 839 | "http://www.example.org/smith (Author object (1))", 840 | ) 841 | 842 | def test_repr(self): 843 | Author.objects.create(name="John Smith", website="http://www.example.org/smith") 844 | self.assertEqual( 845 | repr(Url.objects.first()), 846 | "", 847 | ) 848 | self.assertEqual( 849 | repr(Link.objects.first()), 850 | ( 851 | ", " 852 | "source: )>" 853 | ), 854 | ) 855 | 856 | 857 | class ChecklinksTestCase(TestCase): 858 | 859 | def test_checklinks_command(self): 860 | Book.objects.create(title='My Title', description=""" 861 | Here's an external link: External, 862 | an internal link: Internal, 863 | and an image: logo""") 864 | 865 | out = StringIO() 866 | call_command('checklinks', stdout=out) 867 | self.assertEqual( 868 | out.getvalue(), 869 | "Checking all links that haven't been tested for 10080 minutes.\n" 870 | "1 internal URLs and 0 external URLs have been checked.\n" 871 | ) 872 | 873 | yesterday = datetime.now() - timedelta(days=1) 874 | Url.objects.all().update(last_checked=yesterday) 875 | out = StringIO() 876 | call_command('checklinks', externalinterval=20, stdout=out) 877 | self.assertEqual( 878 | out.getvalue(), 879 | "Checking all links that haven't been tested for 20 minutes.\n" 880 | "1 internal URLs and 2 external URLs have been checked.\n" 881 | ) 882 | 883 | Url.objects.all().update(last_checked=yesterday) 884 | out = StringIO() 885 | call_command('checklinks', externalinterval=20, limit=1, stdout=out) 886 | self.assertEqual( 887 | out.getvalue(), 888 | "Checking all links that haven't been tested for 20 minutes.\n" 889 | "Will run maximum of 1 checks this run.\n" 890 | "1 internal URLs and 1 external URLs have been checked.\n" 891 | ) 892 | 893 | 894 | class FindingLinksTestCase(TestCase): 895 | def test_found_links(self): 896 | self.assertEqual(Url.objects.all().count(), 0) 897 | Book.objects.create(title='My Title', description=""" 898 | Here's a link: Example, 899 | and an image: logo""") 900 | self.assertEqual(Url.objects.all().count(), 2) 901 | self.assertQuerySetEqual( 902 | Url.objects.all().order_by('url'), 903 | ["http://www.example.org", "http://www.example.org/logo.png"], 904 | transform=lambda obj: obj.url 905 | ) 906 | 907 | def test_urls_exceeding_max_length(self): 908 | self.assertEqual(Url.objects.all().count(), 0) 909 | with self.assertLogs(logger="linkcheck", level="WARN") as cm: 910 | Book.objects.create( 911 | title="My Title", 912 | description=( 913 | "Here's a link: Example, and here's a url exceeding " 914 | f"the max length: logo" 915 | ), 916 | ) 917 | # We skip urls which are too long because we can't store them in the database 918 | self.assertIn( 919 | ( 920 | "WARNING:linkcheck.listeners:URL exceeding max length will be skipped: " 921 | f"http://www.example.org/{MAX_URL_LENGTH * 'X'}" 922 | ), 923 | cm.output, 924 | ) 925 | self.assertEqual(Url.objects.all().count(), 1) 926 | 927 | def test_empty_url_field(self): 928 | """ 929 | Test that URLField empty content is excluded depending on ignore_empty list. 930 | """ 931 | all_linklists = apps.get_app_config('linkcheck').all_linklists 932 | all_linklists['Authors'].ignore_empty = ['website'] 933 | try: 934 | Author.objects.create(name="William Shakespeare") 935 | Author.objects.create(name="John Smith", website="http://www.example.org/smith") 936 | self.assertEqual(Url.objects.all().count(), 1) 937 | finally: 938 | all_linklists['Authors'].ignore_empty = [] 939 | Author.objects.create(name="Alphonse Daudet") 940 | # This time, the empty 'website' is extracted 941 | self.assertEqual(Url.objects.all().count(), 2) 942 | 943 | def test_findlinks_command(self): 944 | # Disable listeners to only check the management command 945 | with disable_listeners(): 946 | Author.objects.create(name="John Smith", website="https://www.example.org/smith") 947 | self.assertEqual( 948 | findlinks(), 949 | "Updating all links...\n" 950 | "Urls: 1 created, 0 deleted, 0 unchanged\n" 951 | "Links: 1 created, 0 deleted, 0 unchanged\n" 952 | ) 953 | Author.objects.create(name="John Doe", website="https://www.example.org/doe") 954 | Book.objects.create( 955 | title='My Title', 956 | description="My fav author: John Doe" 957 | ) 958 | self.assertEqual( 959 | findlinks(), 960 | "Updating all links...\n" 961 | "Urls: 1 created, 0 deleted, 1 unchanged\n" 962 | "Links: 2 created, 0 deleted, 1 unchanged\n" 963 | ) 964 | Author.objects.get(name="John Doe").delete() 965 | self.assertEqual( 966 | findlinks(), 967 | "Updating all links...\n" 968 | "Urls: 0 created, 0 deleted, 2 unchanged\n" 969 | "Links: 0 created, 1 deleted, 2 unchanged\n" 970 | ) 971 | Book.objects.first().delete() 972 | self.assertEqual( 973 | findlinks(), 974 | "Updating all links...\n" 975 | "Urls: 0 created, 1 deleted, 1 unchanged\n" 976 | "Links: 0 created, 1 deleted, 1 unchanged\n" 977 | ) 978 | 979 | 980 | class ManagementCommandTestCase(TestCase): 981 | 982 | def test_linkcheck_suggest_config(self): 983 | """ 984 | Test that the config of uncovered models is correctly suggested 985 | """ 986 | out, err = get_command_output('linkcheck_suggest_config') 987 | self.assertEqual( 988 | out, 989 | 'All covered models:\n' 990 | '\x1b[36msampleapp.Book\x1b[0m, \x1b[36msampleapp.Page\x1b[0m\n\n' 991 | 'Suggested config for model sampleapp.UncoveredModel:\n' 992 | '\x1b[36mfrom sampleapp.models import UncoveredModel\n\n' 993 | 'class UncoveredModelLinklist(Linklist):\n' 994 | ' model = UncoveredModel\n\n' 995 | 'linklists = {\n' 996 | ' "UncoveredModel": UncoveredModelLinklist,\n' 997 | '}\n\x1b[0m\n' 998 | ) 999 | self.assertEqual(err, '') 1000 | 1001 | def test_linkcheck_suggest_config_model(self): 1002 | """ 1003 | Test that the config of given model is correctly printed 1004 | """ 1005 | out, err = get_command_output('linkcheck_suggest_config', '--model', 'sampleapp.Author') 1006 | self.assertEqual( 1007 | out, 1008 | 'from sampleapp.models import Author\n\n' 1009 | 'class AuthorLinklist(Linklist):\n' 1010 | ' model = Author\n\n' 1011 | 'linklists = {\n' 1012 | ' "Author": AuthorLinklist,\n' 1013 | '}\n' 1014 | ) 1015 | self.assertEqual(err, '') 1016 | 1017 | def test_linkcheck_suggest_config_model_non_existing(self): 1018 | """ 1019 | Test that the command raises an error when the model does not exist 1020 | """ 1021 | with self.assertRaises(CommandError) as cm: 1022 | get_command_output('linkcheck_suggest_config', '--model', 'non-existing') 1023 | self.assertEqual(str(cm.exception), 'Model "non-existing" does not exist.') 1024 | 1025 | 1026 | class ObjectsUpdateTestCase(TestCase): 1027 | def test_update_object(self): 1028 | """ 1029 | Test that updating a broken URL in an object also updates the 1030 | corresponding Link, and don't leak the old URL. 1031 | """ 1032 | bad_url = "/broken/internal/link" 1033 | good_url = "/public/" 1034 | author = Author.objects.create(name="John Smith", website=bad_url) 1035 | self.assertEqual( 1036 | Link.objects.filter(ignore=False, url__status=False).count(), 1037 | 1 1038 | ) 1039 | self.assertEqual( 1040 | Link.objects.filter(ignore=False, url__status=True).count(), 1041 | 0 1042 | ) 1043 | self.assertEqual(Url.objects.all().count(), 1) 1044 | self.assertEqual(Url.objects.all()[0].url, bad_url) 1045 | # Fix the link 1046 | author.website = good_url 1047 | author.save() 1048 | self.assertEqual( 1049 | Link.objects.filter(ignore=False, url__status=False).count(), 1050 | 0 1051 | ) 1052 | self.assertEqual( 1053 | Link.objects.filter(ignore=False, url__status=True).count(), 1054 | 1 1055 | ) 1056 | self.assertEqual(Url.objects.all().count(), 1) 1057 | self.assertEqual(Url.objects.all()[0].url, good_url) 1058 | 1059 | 1060 | class RegisteringTests(TestCase): 1061 | good_url = "/public/" 1062 | 1063 | def test_unregister(self): 1064 | self.assertEqual(Link.objects.count(), 0) 1065 | unregister_listeners() 1066 | Author.objects.create(name="John Smith", website=self.good_url) 1067 | self.assertEqual(Link.objects.count(), 0) 1068 | register_listeners() 1069 | Author.objects.create(name="Jill Smith", website=self.good_url) 1070 | self.assertEqual(Link.objects.count(), 1) 1071 | 1072 | def test_disable_listeners(self): 1073 | self.assertEqual(Link.objects.count(), 0) 1074 | with disable_listeners(): 1075 | Author.objects.create(name="John Smith", website=self.good_url) 1076 | self.assertEqual(Link.objects.count(), 0) 1077 | Author.objects.create(name="Jill Smith", website=self.good_url) 1078 | self.assertEqual(Link.objects.count(), 1) 1079 | 1080 | def test_enable_listeners(self): 1081 | self.assertEqual(Link.objects.count(), 0) 1082 | unregister_listeners() 1083 | with enable_listeners(): 1084 | Author.objects.create(name="John Smith", website=self.good_url) 1085 | self.assertEqual(Link.objects.count(), 1) 1086 | Author.objects.create(name="Jill Smith", website=self.good_url) 1087 | self.assertEqual(Link.objects.count(), 1) 1088 | register_listeners() 1089 | 1090 | 1091 | class QueueTests(TestCase): 1092 | def test_queue_handling_continue_on_task_crash(self): 1093 | assert tasks_queue.empty() is True 1094 | 1095 | def raising(): 1096 | raise RuntimeError("Failing task") 1097 | 1098 | def passing(): 1099 | pass 1100 | 1101 | for func in (raising, passing): 1102 | tasks_queue.put({ 1103 | 'target': func, 1104 | 'args': (), 1105 | 'kwargs': {}, 1106 | }) 1107 | with self.assertLogs() as cm: 1108 | linkcheck_worker(block=False) 1109 | self.assertEqual( 1110 | cm.output[0].split('\n')[0], 1111 | 'ERROR:linkcheck.listeners:RuntimeError while running raising with ' 1112 | 'args=() and kwargs={}: Failing task' 1113 | ) 1114 | 1115 | 1116 | class ViewTestCase(TestCase): 1117 | def setUp(self): 1118 | self.user = User.objects.create_superuser('admin', 'admin@example.org', 'password') 1119 | 1120 | def test_display_url(self): 1121 | Book.objects.create( 1122 | title='My Title', description="Here's a link: Example" 1123 | ) 1124 | Author.objects.create(name="John Smith", website="http://www.example.org#john") 1125 | self.assertEqual(Link.objects.count(), 2) 1126 | self.assertEqual( 1127 | set([link.display_url for link in Link.objects.all()]), 1128 | set(["http://www.example.org", "http://www.example.org#john"]), 1129 | ) 1130 | 1131 | def test_report_view(self): 1132 | self.client.force_login(self.user) 1133 | response = self.client.get(reverse('linkcheck_report')) 1134 | self.assertContains(response, "

Link Checker

") 1135 | 1136 | def test_report_ignore_unignore(self): 1137 | Author.objects.create(name="John Smith", website="http://www.example.org/john") 1138 | self.client.force_login(self.user) 1139 | link = Link.objects.first() 1140 | self.assertFalse(link.ignore) 1141 | response = self.client.post( 1142 | reverse('linkcheck_report') + f"?ignore={link.pk}", 1143 | HTTP_X_REQUESTED_WITH='XMLHttpRequest' 1144 | ) 1145 | self.assertEqual(response.json(), {'link': link.pk}) 1146 | link.refresh_from_db() 1147 | self.assertTrue(link.ignore) 1148 | response = self.client.post( 1149 | reverse('linkcheck_report') + f"?unignore={link.pk}", 1150 | HTTP_X_REQUESTED_WITH='XMLHttpRequest' 1151 | ) 1152 | self.assertEqual(response.json(), {'link': link.pk}) 1153 | link.refresh_from_db() 1154 | self.assertFalse(link.ignore) 1155 | 1156 | def test_report_recheck(self): 1157 | Author.objects.create(name="John Smith", website="http://www.example.org/john") 1158 | self.client.force_login(self.user) 1159 | link = Link.objects.first() 1160 | response = self.client.post( 1161 | reverse('linkcheck_report') + f"?recheck={link.pk}", 1162 | HTTP_X_REQUESTED_WITH='XMLHttpRequest' 1163 | ) 1164 | self.assertEqual(response.json(), { 1165 | 'colour': 'red', 1166 | 'links': [link.pk], 1167 | 'message': '404 Not Found', 1168 | }) 1169 | 1170 | 1171 | class GetJqueryMinJsTestCase(TestCase): 1172 | def test(self): 1173 | self.assertEqual( 1174 | 'admin/js/vendor/jquery/jquery.min.js', get_jquery_min_js() 1175 | ) 1176 | 1177 | 1178 | class FixtureTestCase(TestCase): 1179 | fixtures = ['linkcheck/tests/sampleapp/fixture.json'] 1180 | 1181 | def test_fixture(self): 1182 | self.assertEqual(Book.objects.count(), 1) 1183 | self.assertEqual(Page.objects.count(), 1) 1184 | 1185 | 1186 | class FilterCallableTestCase(TestCase): 1187 | def test_filter_callable(self): 1188 | all_linklists = apps.get_app_config('linkcheck').all_linklists 1189 | all_linklists['Journals'].html_fields = [] 1190 | Journal.objects.create(title='My Title', description=""" 1191 | My description Example""") 1192 | Journal.objects.create(title='My Title', version=1, description=""" 1193 | My new description Example""") 1194 | all_linklists['Journals'].html_fields = ['description'] 1195 | # assert there are two versions of the same journal 1196 | self.assertEqual(Journal.objects.count(), 2) 1197 | # assert command just finds the latest version of same journals 1198 | self.assertEqual( 1199 | findlinks(), 1200 | "Updating all links...\n" 1201 | "Urls: 1 created, 0 deleted, 0 unchanged\n" 1202 | "Links: 1 created, 0 deleted, 0 unchanged\n" 1203 | ) 1204 | 1205 | 1206 | def get_command_output(command, *args, **kwargs): 1207 | """ 1208 | Helper function for running a management command and checking its output 1209 | """ 1210 | out = StringIO() 1211 | err = StringIO() 1212 | call_command(command, *args, stdout=out, stderr=err, **kwargs) 1213 | return out.getvalue(), err.getvalue() 1214 | 1215 | 1216 | def findlinks(): 1217 | """ 1218 | Helper function for running the findlinks command and checking its output 1219 | """ 1220 | return get_command_output('findlinks')[0] 1221 | -------------------------------------------------------------------------------- /linkcheck/tests/urls.py: -------------------------------------------------------------------------------- 1 | from django import http 2 | from django.contrib import admin 3 | from django.urls import include, path 4 | from django.views.generic import RedirectView 5 | 6 | from linkcheck.tests.sampleapp import views 7 | 8 | 9 | def handler404(*args, **kwargs): 10 | return http.HttpResponseNotFound("") 11 | 12 | 13 | urlpatterns = [ 14 | path('admin/linkcheck/', include('linkcheck.urls')), 15 | path('admin/', admin.site.urls), 16 | path('public/', views.http_response, {'code': '200'}), 17 | path('http//', views.http_response), 18 | path('http//rückmeldung/', views.http_response), 19 | path('http/getonly//', views.http_response_get_only), 20 | path('http/block-user-agent/', views.http_block_user_agent), 21 | path('http/block-user-agent/block-head/', views.http_block_user_agent, {'block_head': True}), 22 | path('http/redirect//', views.http_redirect), 23 | path('http/redirect_to_404/', views.http_redirect_to_404), 24 | path('http/redirect_to_anchor/', views.http_redirect_to_anchor), 25 | path('http/brokenredirect/', RedirectView.as_view(url='/non-existent/')), 26 | path('http/anchor/', views.http_response_with_anchor), 27 | path('timeout/', views.timeout), 28 | path('static-files/video.mp4', views.static_video), 29 | path('static-files/fake-video.mp4', views.static_video_forged_content_type), 30 | ] 31 | -------------------------------------------------------------------------------- /linkcheck/urls.py: -------------------------------------------------------------------------------- 1 | from django.urls import path 2 | 3 | from . import views 4 | 5 | urlpatterns = [ 6 | path('', views.report, name='linkcheck_report'), 7 | ] 8 | -------------------------------------------------------------------------------- /linkcheck/utils.py: -------------------------------------------------------------------------------- 1 | import logging 2 | from datetime import timedelta 3 | 4 | from django.apps import apps 5 | from django.db import models 6 | from django.test.client import ClientHandler 7 | from django.utils import timezone 8 | 9 | from .linkcheck_settings import ( 10 | HTML_FIELD_CLASSES, 11 | IMAGE_FIELD_CLASSES, 12 | MAX_URL_LENGTH, 13 | URL_FIELD_CLASSES, 14 | ) 15 | from .models import Link, Url 16 | 17 | logger = logging.getLogger(__name__) 18 | 19 | 20 | class LinkCheckHandler(ClientHandler): 21 | 22 | # Customize the ClientHandler to allow us removing some middlewares 23 | 24 | def load_middleware(self): 25 | self.ignore_keywords = ['reversion.middleware', 'MaintenanceModeMiddleware', 'raven_compat'] 26 | super().load_middleware() 27 | new_request_middleware = [] 28 | 29 | #################################################### 30 | # _request_middleware (is removed in newer django) # 31 | #################################################### 32 | if getattr(self, "_request_middleware", None): 33 | for method in self._request_middleware: 34 | ignored = False 35 | for keyword in self.ignore_keywords: 36 | if method.__str__().count(keyword): 37 | ignored = True 38 | break 39 | if not ignored: 40 | new_request_middleware.append(method) 41 | self._request_middleware = new_request_middleware 42 | 43 | #################### 44 | # _view_middleware # 45 | #################### 46 | new_view_middleware = [] 47 | for method in self._view_middleware: 48 | ignored = False 49 | for keyword in self.ignore_keywords: 50 | if method.__str__().count(keyword): 51 | ignored = True 52 | break 53 | if not ignored: 54 | new_view_middleware.append(method) 55 | self._view_middleware = new_view_middleware 56 | 57 | ########################## 58 | # _response_middleware## # 59 | ########################## 60 | if getattr(self, "_response_middleware", None): 61 | new_response_middleware = [] 62 | for method in self._response_middleware: 63 | ignored = False 64 | for keyword in self.ignore_keywords: 65 | if method.__str__().count(keyword): 66 | ignored = True 67 | break 68 | if not ignored: 69 | new_response_middleware.append(method) 70 | self._response_middleware = new_response_middleware 71 | 72 | ################################# 73 | # _template_response_middleware # 74 | ################################# 75 | if getattr(self, "_template_response_middleware", None): 76 | new_template_response_middleware = [] 77 | for method in self._template_response_middleware: 78 | ignored = False 79 | for keyword in self.ignore_keywords: 80 | if method.__str__().count(keyword): 81 | ignored = True 82 | break 83 | if not ignored: 84 | new_template_response_middleware.append(method) 85 | self._template_response_middleware = new_template_response_middleware 86 | 87 | ######################### 88 | # _exception_middleware # 89 | ######################### 90 | new_exception_middleware = [] 91 | for method in self._exception_middleware: 92 | ignored = False 93 | for keyword in self.ignore_keywords: 94 | if method.__str__().count(keyword): 95 | ignored = True 96 | break 97 | if not ignored: 98 | new_exception_middleware.append(method) 99 | self._exception_middleware = new_exception_middleware 100 | 101 | 102 | def check_links(external_recheck_interval=10080, limit=-1, check_internal=True, check_external=True): 103 | """ 104 | Return the number of links effectively checked. 105 | """ 106 | 107 | urls = Url.objects.all() 108 | 109 | # An optimization for when check_internal is False 110 | if not check_internal: 111 | recheck_datetime = timezone.now() - timedelta(minutes=external_recheck_interval) 112 | urls = urls.exclude(last_checked__gt=recheck_datetime) 113 | 114 | check_count = 0 115 | for u in urls: 116 | status = u.check_url(check_internal=check_internal, check_external=check_external) 117 | check_count += 1 if status is not None else 0 118 | if -1 < limit <= check_count: 119 | break 120 | 121 | return check_count 122 | 123 | 124 | def update_urls(urls, content_type, object_id): 125 | 126 | # Structure of urls param is [(field, link text, url), ... ] 127 | 128 | urls_created = links_created = 0 129 | new_url_ids = set() 130 | new_link_ids = set() 131 | 132 | for field, link_text, url in urls: 133 | 134 | if url is not None and url.startswith('#'): 135 | instance = content_type.get_object_for_this_type(id=object_id) 136 | url = instance.get_absolute_url() + url 137 | 138 | if len(url) > MAX_URL_LENGTH: 139 | # We cannot handle url longer than MAX_URL_LENGTH at the moment 140 | logger.warning('URL exceeding max length will be skipped: %s', url) 141 | continue 142 | 143 | url, url_created = Url.objects.get_or_create(url=url) 144 | 145 | link, link_created = Link.objects.get_or_create( 146 | url=url, 147 | field=field, 148 | text=link_text, 149 | content_type=content_type, 150 | object_id=object_id, 151 | ) 152 | 153 | # Keep track of how many objects were created 154 | urls_created += url_created 155 | links_created += link_created 156 | 157 | # Keep track of object ids (no matter if created or existing) 158 | new_url_ids.add(url.id) 159 | new_link_ids.add(link.id) 160 | 161 | return { 162 | "urls": { 163 | "created": urls_created, 164 | "ids": new_url_ids, 165 | }, 166 | "links": { 167 | "created": links_created, 168 | "ids": new_link_ids, 169 | }, 170 | } 171 | 172 | 173 | def find_all_links(linklists=None): 174 | 175 | if linklists is None: 176 | linklists = apps.get_app_config('linkcheck').all_linklists 177 | 178 | urls_created = links_created = 0 179 | new_url_ids = set() 180 | new_link_ids = set() 181 | 182 | urls_before = Url.objects.count() 183 | links_before = Link.objects.count() 184 | 185 | for linklist_name, linklist_cls in linklists.items(): 186 | 187 | content_type = linklist_cls.content_type() 188 | linklists = linklist_cls().get_linklist() 189 | 190 | for linklist in linklists: 191 | object_id = linklist['object'].id 192 | urls = linklist['urls'] + linklist['images'] 193 | if urls: 194 | new = update_urls(urls, content_type, object_id) 195 | 196 | urls_created += new["urls"]["created"] 197 | links_created += new["links"]["created"] 198 | 199 | new_url_ids.update(new["urls"]["ids"]) 200 | new_link_ids.update(new["links"]["ids"]) 201 | 202 | # Delete all urls and links which are no longer part of the link lists 203 | Url.objects.all().exclude(id__in=new_url_ids).delete() 204 | Link.objects.all().exclude(id__in=new_link_ids).delete() 205 | 206 | # Calculate diff 207 | urls_after = Url.objects.count() 208 | links_after = Link.objects.count() 209 | 210 | return { 211 | "urls": { 212 | "created": urls_created, 213 | "deleted": urls_before + urls_created - urls_after, 214 | "unchanged": urls_after - urls_created, 215 | }, 216 | "links": { 217 | "created": links_created, 218 | "deleted": links_before + links_created - links_after, 219 | "unchanged": links_after - links_created, 220 | }, 221 | } 222 | 223 | 224 | def unignore(): 225 | Link.objects.update(ignore=False) 226 | 227 | 228 | # Utilities for testing models coverage 229 | 230 | def is_interesting_field(field): 231 | return is_url_field(field) or is_image_field(field) or is_html_field(field) 232 | 233 | 234 | def is_url_field(field): 235 | return any(isinstance(field, cls) for cls in URL_FIELD_CLASSES) 236 | 237 | 238 | def is_image_field(field): 239 | return any(isinstance(field, cls) for cls in IMAGE_FIELD_CLASSES) 240 | 241 | 242 | def is_html_field(field): 243 | return any(isinstance(field, cls) for cls in HTML_FIELD_CLASSES) 244 | 245 | 246 | def has_active_field(klass): 247 | return any( 248 | field.name == 'active' and isinstance(field, models.BooleanField) 249 | for field in klass._meta.fields 250 | ) 251 | 252 | 253 | def get_ignore_empty_fields(klass): 254 | return [ 255 | field 256 | for field in klass._meta.fields 257 | if is_interesting_field(field) and (field.blank or field.null) 258 | ] 259 | 260 | 261 | def get_type_fields(klass, the_type): 262 | check_funcs = { 263 | 'html': is_html_field, 264 | 'url': is_url_field, 265 | 'image': is_image_field, 266 | } 267 | check_func = check_funcs[the_type] 268 | return [field for field in klass._meta.fields if check_func(field)] 269 | 270 | 271 | def is_model_covered(klass): 272 | app = apps.get_app_config('linkcheck') 273 | return any(linklist[1].model == klass for linklist in app.all_linklists.items()) 274 | 275 | 276 | def format_config(meta, active_field, html_fields, image_fields, url_fields, ignore_empty_fields): 277 | config = f'from { meta.app_label }.models import { meta.object_name }\n\n' 278 | config += f'class { meta.object_name }Linklist(Linklist):\n' 279 | config += f' model = { meta.object_name }\n' 280 | if html_fields: 281 | config += f' html_fields = [{", ".join(map(str, html_fields))}]\n' 282 | if image_fields: 283 | config += f' image_fields = [{", ".join(map(str, image_fields))}]\n' 284 | if url_fields: 285 | config += f' url_fields = [{", ".join(map(str, url_fields))}]\n' 286 | if ignore_empty_fields: 287 | config += f' ignore_empty = [{", ".join(map(str, ignore_empty_fields))}]\n' 288 | if active_field: 289 | config += ' object_filter = {"active": True}\n' 290 | config += f'\nlinklists = {{\n "{ meta.object_name }": { meta.object_name }Linklist,\n}}\n' 291 | return config 292 | 293 | 294 | def get_suggested_linklist_config(klass): 295 | meta = klass._meta 296 | html_fields = get_type_fields(klass, 'html') 297 | url_fields = get_type_fields(klass, 'url') 298 | image_fields = get_type_fields(klass, 'image') 299 | active_field = has_active_field(klass) 300 | ignore_empty_fields = get_ignore_empty_fields(klass) 301 | return format_config(**{ 302 | 'meta': meta, 303 | 'html_fields': html_fields, 304 | 'url_fields': url_fields, 305 | 'image_fields': image_fields, 306 | 'active_field': active_field, 307 | 'ignore_empty_fields': ignore_empty_fields, 308 | }) 309 | 310 | 311 | def get_coverage_data(): 312 | """ 313 | Check which models are covered by linkcheck 314 | This view assumes the key for link 315 | """ 316 | covered = [] 317 | uncovered = [] 318 | for app in apps.get_app_configs(): 319 | for model in app.get_models(): 320 | should_append = False 321 | if getattr(model, 'get_absolute_url', None): 322 | should_append = True 323 | else: 324 | for field in model._meta.fields: 325 | if is_interesting_field(field): 326 | should_append = True 327 | break 328 | if should_append: 329 | if is_model_covered(model): 330 | covered.append(f'{model._meta.app_label}.{model._meta.object_name}') 331 | else: 332 | uncovered.append(( 333 | f'{model._meta.app_label}.{model._meta.object_name}', 334 | get_suggested_linklist_config(model), 335 | )) 336 | 337 | return covered, uncovered 338 | -------------------------------------------------------------------------------- /linkcheck/views.py: -------------------------------------------------------------------------------- 1 | from itertools import groupby 2 | from operator import itemgetter 3 | 4 | from django import forms 5 | from django.contrib.admin.views.decorators import staff_member_required 6 | from django.contrib.contenttypes.models import ContentType 7 | from django.core.exceptions import ObjectDoesNotExist 8 | from django.core.paginator import Paginator 9 | from django.http import JsonResponse 10 | from django.shortcuts import render 11 | from django.templatetags.static import static 12 | from django.urls import NoReverseMatch, reverse 13 | from django.utils.translation import gettext as _ 14 | 15 | from linkcheck import update_lock 16 | from linkcheck.linkcheck_settings import RESULTS_PER_PAGE 17 | from linkcheck.models import Link 18 | from linkcheck.utils import get_coverage_data 19 | 20 | 21 | @staff_member_required 22 | def coverage(request): 23 | 24 | coverage_data = get_coverage_data() 25 | 26 | if request.GET.get('config', False): 27 | # Just render the suggested linklist code 28 | template = 'linkcheck/suggested_configs.html' 29 | context = {'coverage_data': [x['suggested_config'] for x in coverage_data]} 30 | else: 31 | # Render a nice report 32 | template = 'linkcheck/coverage.html' 33 | context = {'coverage_data': coverage_data} 34 | 35 | return render(request, template, context) 36 | 37 | 38 | @staff_member_required 39 | def report(request): 40 | 41 | outerkeyfunc = itemgetter('content_type_id') 42 | content_types_list = [] 43 | 44 | if request.method == 'POST': 45 | 46 | ignore_link_id = request.GET.get('ignore', None) 47 | if ignore_link_id is not None: 48 | link = Link.objects.get(id=ignore_link_id) 49 | link.ignore = True 50 | link.save() 51 | if is_ajax(request): 52 | json_data = {'link': link.pk} 53 | return JsonResponse(json_data) 54 | 55 | unignore_link_id = request.GET.get('unignore', None) 56 | if unignore_link_id is not None: 57 | link = Link.objects.get(id=unignore_link_id) 58 | link.ignore = False 59 | link.save() 60 | if is_ajax(request): 61 | json_data = {'link': link.pk} 62 | return JsonResponse(json_data) 63 | 64 | recheck_link_id = request.GET.get('recheck', None) 65 | if recheck_link_id is not None: 66 | link = Link.objects.get(id=recheck_link_id) 67 | url = link.url 68 | url.check_url(external_recheck_interval=0) 69 | links = [x[0] for x in url.links.values_list('id')] 70 | if is_ajax(request): 71 | json_data = ({ 72 | 'links': links, 73 | 'message': url.message, 74 | 'colour': url.colour, 75 | }) 76 | return JsonResponse(json_data) 77 | 78 | link_filter = request.GET.get('filters', 'show_invalid') 79 | 80 | qset = Link.objects.order_by('-url__last_checked') 81 | if link_filter == 'show_valid': 82 | qset = qset.filter(ignore=False, url__status__exact=True) 83 | report_type = _('Valid links') 84 | elif link_filter == 'show_unchecked': 85 | qset = qset.filter(ignore=False, url__last_checked__exact=None) 86 | report_type = _('Untested links') 87 | elif link_filter == 'ignored': 88 | qset = qset.filter(ignore=True) 89 | report_type = _('Ignored links') 90 | else: 91 | qset = qset.filter(ignore=False, url__status__exact=False) 92 | report_type = _('Broken links') 93 | 94 | paginated_links = Paginator(qset, RESULTS_PER_PAGE, 0, True) 95 | 96 | try: 97 | page = int(request.GET.get("page", "1")) 98 | except ValueError: 99 | page = 0 100 | # offset = (page - 1) * RESULTS_PER_PAGE 101 | links = paginated_links.page(page) 102 | 103 | # This code groups links into nested lists by content type and object id 104 | # It's a bit nasty but we can't use groupby unless be get values() 105 | # instead of a queryset because of the 'Object is not subscriptable' error 106 | 107 | t = sorted(links.object_list.values(), key=outerkeyfunc) 108 | for tk, tg in groupby(t, outerkeyfunc): 109 | innerkeyfunc = itemgetter('object_id') 110 | objects = [] 111 | tg = sorted(tg, key=innerkeyfunc) 112 | for ok, og in groupby(tg, innerkeyfunc): 113 | content_type = ContentType.objects.get(pk=tk) 114 | og = list(og) 115 | try: 116 | object = None 117 | if content_type.model_class(): 118 | object = content_type.model_class().objects.get(pk=ok) 119 | except ObjectDoesNotExist: 120 | pass 121 | try: 122 | admin_url = object.get_admin_url() # TODO allow method name to be configurable 123 | except AttributeError: 124 | try: 125 | admin_url = reverse(f'admin:{content_type.app_label}_{content_type.model}_change', args=[ok]) 126 | except NoReverseMatch: 127 | admin_url = None 128 | 129 | objects.append({ 130 | 'object': object, 131 | # Convert values_list back to queryset. Do we need to get values() or do we just need a list of ids? 132 | 'link_list': Link.objects.in_bulk([x['id'] for x in og]).values(), 133 | 'admin_url': admin_url, 134 | }) 135 | content_types_list.append({ 136 | 'content_type': content_type, 137 | 'object_list': objects 138 | }) 139 | 140 | # Pass any querystring data back to the form minus page 141 | rqst = request.GET.copy() 142 | if 'page' in rqst: 143 | del rqst['page'] 144 | 145 | return render(request, 'linkcheck/report.html', { 146 | 'content_types_list': content_types_list, 147 | 'pages': links, 148 | 'filter': link_filter, 149 | 'media': forms.Media(js=[static(get_jquery_min_js())]), 150 | 'qry_data': rqst.urlencode(), 151 | 'report_type': report_type, 152 | 'ignored_count': Link.objects.filter(ignore=True).count(), 153 | }, 154 | ) 155 | 156 | 157 | def get_jquery_min_js(): 158 | """ 159 | Return the location of jquery.min.js. It's an entry point to adapt the path 160 | when it changes in Django. 161 | """ 162 | return 'admin/js/vendor/jquery/jquery.min.js' 163 | 164 | 165 | def get_status_message(): 166 | if update_lock.locked(): 167 | return "Still checking. Please refresh this page in a short while. " 168 | else: 169 | broken_links = Link.objects.filter(ignore=False, url__status=False).count() 170 | if broken_links: 171 | return ( 172 | "We've found {} broken link{}.
" 173 | "View/fix broken links".format( 174 | broken_links, 175 | "s" if broken_links > 1 else "", 176 | reverse('linkcheck_report'), 177 | ) 178 | ) 179 | else: 180 | return '' 181 | 182 | 183 | def is_ajax(request): 184 | return request.META.get('HTTP_X_REQUESTED_WITH') == 'XMLHttpRequest' 185 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [build-system] 2 | requires = ["setuptools>=61.2"] 3 | build-backend = "build_meta" 4 | backend-path = ["linkcheck"] 5 | 6 | [project] 7 | name = "django-linkcheck" 8 | version = "2.3.0" 9 | authors = [ 10 | {name = "Andy Baker", email = "andy@andybak.net"}, 11 | {name = "Fruits Chen", email = "fruitschen@gmail.com"}, 12 | {name = "Tim Graves", email = "gravesit@gmail.com"}, 13 | {name = "Jannis Leidel", email = "jannis@leidel.info"}, 14 | {name = "Claude Paroz", email = "claude@2xlibre.net"}, 15 | {name = "Timo Brembeck", email = "opensource@timo.brembeck.email"} 16 | ] 17 | description = "A Django app that will analyze and report on links in any model that you register with it." 18 | readme = "README.rst" 19 | classifiers = [ 20 | "Development Status :: 5 - Production/Stable", 21 | "Environment :: Web Environment", 22 | "Intended Audience :: Developers", 23 | "License :: OSI Approved :: BSD License", 24 | "Operating System :: OS Independent", 25 | "Programming Language :: Python", 26 | "Programming Language :: Python :: 3", 27 | "Programming Language :: Python :: 3 :: Only", 28 | "Programming Language :: Python :: 3.9", 29 | "Programming Language :: Python :: 3.10", 30 | "Programming Language :: Python :: 3.11", 31 | "Programming Language :: Python :: 3.12", 32 | "Programming Language :: Python :: 3.13", 33 | "Framework :: Django", 34 | "Framework :: Django :: 4.2", 35 | "Framework :: Django :: 5.0", 36 | "Framework :: Django :: 5.1", 37 | ] 38 | license = {text = "BSD-3-Clause"} 39 | requires-python = ">=3.9" 40 | dependencies = [ 41 | "django>=4.2", 42 | "requests", 43 | ] 44 | 45 | [project.urls] 46 | Homepage = "https://github.com/DjangoAdminHackers/django-linkcheck" 47 | Issues = "https://github.com/DjangoAdminHackers/django-linkcheck/issues" 48 | Changelog = "https://github.com/DjangoAdminHackers/django-linkcheck/blob/master/CHANGELOG" 49 | 50 | [project.optional-dependencies] 51 | dev = [ 52 | "build", 53 | "flake8", 54 | "isort", 55 | "pre-commit", 56 | "requests_mock", 57 | ] 58 | 59 | [tool.setuptools] 60 | include-package-data = true 61 | license-files = ["LICENSE"] 62 | 63 | [tool.setuptools.packages.find] 64 | include = ["linkcheck*"] 65 | -------------------------------------------------------------------------------- /runtests.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | import sys 3 | from os.path import abspath, dirname 4 | 5 | import django 6 | from django.conf import settings 7 | 8 | if not settings.configured: 9 | test_settings = { 10 | 'DATABASES': {'default': {'ENGINE': 'django.db.backends.sqlite3'}}, 11 | 'STATIC_URL': '/static/', 12 | 'MEDIA_URL': '/media/', 13 | 'INSTALLED_APPS': [ 14 | 'django.contrib.admin', 'django.contrib.auth', 15 | 'django.contrib.sessions', 'django.contrib.contenttypes', 16 | 'django.contrib.messages', 17 | 'linkcheck', 'linkcheck.tests.sampleapp', 18 | ], 19 | 'ROOT_URLCONF': "linkcheck.tests.urls", 20 | 'SITE_DOMAIN': "localhost", 21 | 'MIDDLEWARE': [ 22 | 'django.contrib.sessions.middleware.SessionMiddleware', 23 | 'django.contrib.messages.middleware.MessageMiddleware', 24 | 'django.middleware.common.CommonMiddleware', 25 | 'django.middleware.csrf.CsrfViewMiddleware', 26 | 'django.contrib.auth.middleware.AuthenticationMiddleware', 27 | ], 28 | 'TEMPLATES': [{ 29 | 'BACKEND': 'django.template.backends.django.DjangoTemplates', 30 | 'APP_DIRS': True, 31 | 'OPTIONS': { 32 | 'context_processors': [ 33 | 'django.contrib.auth.context_processors.auth', 34 | 'django.contrib.messages.context_processors.messages', 35 | 'django.template.context_processors.static', 36 | 'django.template.context_processors.request', 37 | ], 38 | }, 39 | }], 40 | 'DEFAULT_AUTO_FIELD': 'django.db.models.AutoField', 41 | 'SECRET_KEY': 'arandomstring', 42 | 'LINKCHECK_CONNECTION_ATTEMPT_TIMEOUT': 1, 43 | } 44 | settings.configure(**test_settings) 45 | 46 | 47 | def runtests(*test_args): 48 | from django.test.runner import DiscoverRunner 49 | 50 | parent = dirname(abspath(__file__)) 51 | sys.path.insert(0, parent) 52 | test_runner = DiscoverRunner(verbosity=1, interactive=True) 53 | failures = test_runner.run_tests(test_args) 54 | sys.exit(failures) 55 | 56 | 57 | if __name__ == '__main__': 58 | django.setup() 59 | runtests(*sys.argv[1:]) 60 | --------------------------------------------------------------------------------