├── .github ├── dependabot.yml └── workflows │ └── build-publish-docker.yml ├── .gitignore ├── .idea ├── .gitignore ├── inspectionProfiles │ └── profiles_settings.xml ├── misc.xml ├── modules.xml ├── reddit-anti-amp-bot.iml └── vcs.xml ├── Dockerfile ├── LICENSE ├── main.py ├── readme.md ├── requirements.txt └── utils.py /.github/dependabot.yml: -------------------------------------------------------------------------------- 1 | # To get started with Dependabot version updates, you'll need to specify which 2 | # package ecosystems to update and where the package manifests are located. 3 | # Please see the documentation for all configuration options: 4 | # https://help.github.com/github/administering-a-repository/configuration-options-for-dependency-updates 5 | 6 | version: 2 7 | updates: 8 | - package-ecosystem: "pip" # See documentation for possible values 9 | directory: "/" # Location of package manifests 10 | schedule: 11 | interval: "daily" 12 | -------------------------------------------------------------------------------- /.github/workflows/build-publish-docker.yml: -------------------------------------------------------------------------------- 1 | name: Build and publish a Docker image on GHCR 2 | 3 | on: 4 | push: 5 | branches: 6 | - master 7 | 8 | jobs: 9 | docker: 10 | runs-on: ubuntu-latest 11 | steps: 12 | - name: Set up Docker Buildx 13 | uses: docker/setup-buildx-action@v1 14 | - name: Login to GitHub Container Registry 15 | uses: docker/login-action@v1 16 | with: 17 | registry: ghcr.io 18 | username: ${{ github.repository_owner }} 19 | password: ${{ secrets.GITHUB_TOKEN }} 20 | - name: Build and push 21 | id: docker_build 22 | uses: docker/build-push-action@v2 23 | with: 24 | push: true 25 | tags: ghcr.io/${{ github.repository_owner }}/reddit-no-google-amp-bot:latest 26 | - name: Image digest 27 | run: echo ${{ steps.docker_build.outputs.digest }} 28 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | venv/ 2 | praw.ini -------------------------------------------------------------------------------- /.idea/.gitignore: -------------------------------------------------------------------------------- 1 | # Default ignored files 2 | /shelf/ 3 | /workspace.xml 4 | # Datasource local storage ignored files 5 | /dataSources/ 6 | /dataSources.local.xml 7 | # Editor-based HTTP Client requests 8 | /httpRequests/ 9 | -------------------------------------------------------------------------------- /.idea/inspectionProfiles/profiles_settings.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 6 | -------------------------------------------------------------------------------- /.idea/misc.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | -------------------------------------------------------------------------------- /.idea/modules.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | -------------------------------------------------------------------------------- /.idea/reddit-anti-amp-bot.iml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | -------------------------------------------------------------------------------- /.idea/vcs.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | FROM python:3 2 | 3 | WORKDIR /usr/src/app 4 | 5 | COPY requirements.txt ./ 6 | RUN pip install --no-cache-dir -r requirements.txt 7 | 8 | COPY . . 9 | 10 | CMD [ "python", "./main.py" ] -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2020 Laurin Neff 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /main.py: -------------------------------------------------------------------------------- 1 | import praw 2 | import utils 3 | import re 4 | import traceback 5 | 6 | comment_template_single = "Non-AMP Link: {links}\n\n" \ 7 | "I'm a bot. [Why?](https://np.reddit.com/user/NoGoogleAMPBot/comments/lbz2sg/) " \ 8 | "| [Code](https://github.com/laurinneff/no-google-amp-bot) " \ 9 | "| [Report issues](https://github.com/laurinneff/no-google-amp-bot/issues)" 10 | comment_template_multi = "Non-AMP Links:\n\n" \ 11 | "{links}\n\n" \ 12 | "I'm a bot. [Why?](https://np.reddit.com/user/NoGoogleAMPBot/comments/lbz2sg/) " \ 13 | "| [Code](https://github.com/laurinneff/no-google-amp-bot) " \ 14 | "| [Report issues](https://github.com/laurinneff/no-google-amp-bot/issues)" 15 | 16 | link_regex = r'\[([^\[\]\(\)]+)\]\((https?:\/\/[\w\d./?=#%+&-]+)\)' 17 | implicit_link_regex = r'(? 1 else "" 46 | if fixed_arr: 47 | out += '\n- '.join(fixed_arr) if len(fixed_arr) > 1 else fixed_arr[0] 48 | print(f'Comment by {comment.author} with ID {comment.id} (https://reddit.com{comment.permalink})') 49 | reply_body = comment_template_multi.format(links=out) if len(fixed_arr) > 1 else comment_template_single.format(links=out) 50 | print(reply_body) 51 | reply = comment.reply(reply_body) 52 | print(f'Reply: https://reddit.com{reply.permalink}') 53 | 54 | 55 | def process_link(link, implicit=False): 56 | if not implicit: 57 | text = link[0] 58 | url = link[1] 59 | else: 60 | text = link 61 | url = link 62 | if utils.is_amp(url): 63 | fixed = utils.amp_to_normal(url) 64 | # Sometimes, amp_to_normal returns Google redirects (https://www.google.com/url?q=...) 65 | if utils.is_google_redirect(fixed): 66 | fixed = utils.follow_google_redirect(fixed) 67 | if implicit: 68 | text = fixed 69 | return "[{text}]({fixed})".format(text=text, fixed=fixed) 70 | 71 | 72 | print("Anti AMP Bot is running!") 73 | for comment in subreddit.stream.comments(): 74 | try: 75 | process_comments(comment) 76 | except Exception as e: 77 | print('Error:', e, f'Comment: https://reddit.com{comment.permalink}') 78 | traceback.print_tb(e.__traceback__) 79 | -------------------------------------------------------------------------------- /readme.md: -------------------------------------------------------------------------------- 1 | # u/NoGoogleAMPBot 2 | A Reddit bot which provides normal links when people post Google AMP links 3 | 4 | ## Running in Docker 5 | ``` 6 | docker run --rm -t -v /path/to/praw.ini:/usr/src/app/praw.ini ghcr.io/laurinneff/reddit-no-google-amp-bot 7 | ``` -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | requests~=2.25.1 2 | tld~=0.12.2 3 | praw~=7.2.0 -------------------------------------------------------------------------------- /utils.py: -------------------------------------------------------------------------------- 1 | from urllib.parse import urlparse, parse_qs 2 | import requests 3 | from tld import get_tld 4 | 5 | 6 | def is_amp(url): 7 | """ 8 | Check if the given URL is an AMP URL 9 | :param url: The URL to check 10 | :type url: string 11 | :returns: Returns a boolean if it's an AMP URL 12 | :rtype: bool 13 | """ 14 | parsed = urlparse(url) 15 | tld = get_tld(parsed.hostname, as_object=True, fix_protocol=True, fail_silently=True) 16 | 17 | if tld and tld.domain == 'google' \ 18 | and parsed.path.startswith('/amp/'): 19 | return True 20 | return False 21 | 22 | 23 | def amp_to_normal(url): 24 | """ 25 | Check if the given URL is an AMP url. If it is, send a request to find the normal URL 26 | 27 | :param url: The URL to check 28 | :type url: string 29 | :returns: Returns the non-AMP version of the given URL if it's an AMP URL. Otherwise, it returns None 30 | :rtype: str or None 31 | """ 32 | if is_amp(url): 33 | r = requests.get(url) 34 | return r.url 35 | else: 36 | return None 37 | 38 | 39 | def is_google_redirect(url): 40 | """ 41 | Check if the given URL is a Google redirect (https://www.google.com/url?q=...) 42 | :param url: The URL to check 43 | :type url: string 44 | :returns: Returns a boolean if it's a redirect 45 | :rtype: bool 46 | """ 47 | parsed = urlparse(url) 48 | tld = get_tld(parsed.hostname, as_object=True, fix_protocol=True, fail_silently=True) 49 | 50 | if tld and tld.domain == 'google' \ 51 | and parsed.path.startswith('/url'): 52 | return True 53 | return False 54 | 55 | 56 | def follow_google_redirect(url): 57 | """ 58 | Check if the given URL is a Google redirect (https://www.google.com/url?q=...). If it is, extract the q query 59 | parameter to find the real link 60 | :param url: The URL to check 61 | :type url: string 62 | :returns: Returns the real link if it's a redirect. Otherwise, it returns None 63 | :rtype: str or None 64 | """ 65 | if is_google_redirect(url): 66 | parsed = parse_qs(urlparse(url).query) 67 | q = parsed.get('q') 68 | if isinstance(q, list): 69 | return q[0] 70 | return q 71 | return None 72 | --------------------------------------------------------------------------------