├── .github
├── dependabot.yml
└── workflows
│ └── build-publish-docker.yml
├── .gitignore
├── .idea
├── .gitignore
├── inspectionProfiles
│ └── profiles_settings.xml
├── misc.xml
├── modules.xml
├── reddit-anti-amp-bot.iml
└── vcs.xml
├── Dockerfile
├── LICENSE
├── main.py
├── readme.md
├── requirements.txt
└── utils.py
/.github/dependabot.yml:
--------------------------------------------------------------------------------
1 | # To get started with Dependabot version updates, you'll need to specify which
2 | # package ecosystems to update and where the package manifests are located.
3 | # Please see the documentation for all configuration options:
4 | # https://help.github.com/github/administering-a-repository/configuration-options-for-dependency-updates
5 |
6 | version: 2
7 | updates:
8 | - package-ecosystem: "pip" # See documentation for possible values
9 | directory: "/" # Location of package manifests
10 | schedule:
11 | interval: "daily"
12 |
--------------------------------------------------------------------------------
/.github/workflows/build-publish-docker.yml:
--------------------------------------------------------------------------------
1 | name: Build and publish a Docker image on GHCR
2 |
3 | on:
4 | push:
5 | branches:
6 | - master
7 |
8 | jobs:
9 | docker:
10 | runs-on: ubuntu-latest
11 | steps:
12 | - name: Set up Docker Buildx
13 | uses: docker/setup-buildx-action@v1
14 | - name: Login to GitHub Container Registry
15 | uses: docker/login-action@v1
16 | with:
17 | registry: ghcr.io
18 | username: ${{ github.repository_owner }}
19 | password: ${{ secrets.GITHUB_TOKEN }}
20 | - name: Build and push
21 | id: docker_build
22 | uses: docker/build-push-action@v2
23 | with:
24 | push: true
25 | tags: ghcr.io/${{ github.repository_owner }}/reddit-no-google-amp-bot:latest
26 | - name: Image digest
27 | run: echo ${{ steps.docker_build.outputs.digest }}
28 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | venv/
2 | praw.ini
--------------------------------------------------------------------------------
/.idea/.gitignore:
--------------------------------------------------------------------------------
1 | # Default ignored files
2 | /shelf/
3 | /workspace.xml
4 | # Datasource local storage ignored files
5 | /dataSources/
6 | /dataSources.local.xml
7 | # Editor-based HTTP Client requests
8 | /httpRequests/
9 |
--------------------------------------------------------------------------------
/.idea/inspectionProfiles/profiles_settings.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
--------------------------------------------------------------------------------
/.idea/misc.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
--------------------------------------------------------------------------------
/.idea/modules.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
--------------------------------------------------------------------------------
/.idea/reddit-anti-amp-bot.iml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
--------------------------------------------------------------------------------
/.idea/vcs.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
--------------------------------------------------------------------------------
/Dockerfile:
--------------------------------------------------------------------------------
1 | FROM python:3
2 |
3 | WORKDIR /usr/src/app
4 |
5 | COPY requirements.txt ./
6 | RUN pip install --no-cache-dir -r requirements.txt
7 |
8 | COPY . .
9 |
10 | CMD [ "python", "./main.py" ]
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) 2020 Laurin Neff
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 |
--------------------------------------------------------------------------------
/main.py:
--------------------------------------------------------------------------------
1 | import praw
2 | import utils
3 | import re
4 | import traceback
5 |
6 | comment_template_single = "Non-AMP Link: {links}\n\n" \
7 | "I'm a bot. [Why?](https://np.reddit.com/user/NoGoogleAMPBot/comments/lbz2sg/) " \
8 | "| [Code](https://github.com/laurinneff/no-google-amp-bot) " \
9 | "| [Report issues](https://github.com/laurinneff/no-google-amp-bot/issues)"
10 | comment_template_multi = "Non-AMP Links:\n\n" \
11 | "{links}\n\n" \
12 | "I'm a bot. [Why?](https://np.reddit.com/user/NoGoogleAMPBot/comments/lbz2sg/) " \
13 | "| [Code](https://github.com/laurinneff/no-google-amp-bot) " \
14 | "| [Report issues](https://github.com/laurinneff/no-google-amp-bot/issues)"
15 |
16 | link_regex = r'\[([^\[\]\(\)]+)\]\((https?:\/\/[\w\d./?=#%+&-]+)\)'
17 | implicit_link_regex = r'(? 1 else ""
46 | if fixed_arr:
47 | out += '\n- '.join(fixed_arr) if len(fixed_arr) > 1 else fixed_arr[0]
48 | print(f'Comment by {comment.author} with ID {comment.id} (https://reddit.com{comment.permalink})')
49 | reply_body = comment_template_multi.format(links=out) if len(fixed_arr) > 1 else comment_template_single.format(links=out)
50 | print(reply_body)
51 | reply = comment.reply(reply_body)
52 | print(f'Reply: https://reddit.com{reply.permalink}')
53 |
54 |
55 | def process_link(link, implicit=False):
56 | if not implicit:
57 | text = link[0]
58 | url = link[1]
59 | else:
60 | text = link
61 | url = link
62 | if utils.is_amp(url):
63 | fixed = utils.amp_to_normal(url)
64 | # Sometimes, amp_to_normal returns Google redirects (https://www.google.com/url?q=...)
65 | if utils.is_google_redirect(fixed):
66 | fixed = utils.follow_google_redirect(fixed)
67 | if implicit:
68 | text = fixed
69 | return "[{text}]({fixed})".format(text=text, fixed=fixed)
70 |
71 |
72 | print("Anti AMP Bot is running!")
73 | for comment in subreddit.stream.comments():
74 | try:
75 | process_comments(comment)
76 | except Exception as e:
77 | print('Error:', e, f'Comment: https://reddit.com{comment.permalink}')
78 | traceback.print_tb(e.__traceback__)
79 |
--------------------------------------------------------------------------------
/readme.md:
--------------------------------------------------------------------------------
1 | # u/NoGoogleAMPBot
2 | A Reddit bot which provides normal links when people post Google AMP links
3 |
4 | ## Running in Docker
5 | ```
6 | docker run --rm -t -v /path/to/praw.ini:/usr/src/app/praw.ini ghcr.io/laurinneff/reddit-no-google-amp-bot
7 | ```
--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | requests~=2.25.1
2 | tld~=0.12.2
3 | praw~=7.2.0
--------------------------------------------------------------------------------
/utils.py:
--------------------------------------------------------------------------------
1 | from urllib.parse import urlparse, parse_qs
2 | import requests
3 | from tld import get_tld
4 |
5 |
6 | def is_amp(url):
7 | """
8 | Check if the given URL is an AMP URL
9 | :param url: The URL to check
10 | :type url: string
11 | :returns: Returns a boolean if it's an AMP URL
12 | :rtype: bool
13 | """
14 | parsed = urlparse(url)
15 | tld = get_tld(parsed.hostname, as_object=True, fix_protocol=True, fail_silently=True)
16 |
17 | if tld and tld.domain == 'google' \
18 | and parsed.path.startswith('/amp/'):
19 | return True
20 | return False
21 |
22 |
23 | def amp_to_normal(url):
24 | """
25 | Check if the given URL is an AMP url. If it is, send a request to find the normal URL
26 |
27 | :param url: The URL to check
28 | :type url: string
29 | :returns: Returns the non-AMP version of the given URL if it's an AMP URL. Otherwise, it returns None
30 | :rtype: str or None
31 | """
32 | if is_amp(url):
33 | r = requests.get(url)
34 | return r.url
35 | else:
36 | return None
37 |
38 |
39 | def is_google_redirect(url):
40 | """
41 | Check if the given URL is a Google redirect (https://www.google.com/url?q=...)
42 | :param url: The URL to check
43 | :type url: string
44 | :returns: Returns a boolean if it's a redirect
45 | :rtype: bool
46 | """
47 | parsed = urlparse(url)
48 | tld = get_tld(parsed.hostname, as_object=True, fix_protocol=True, fail_silently=True)
49 |
50 | if tld and tld.domain == 'google' \
51 | and parsed.path.startswith('/url'):
52 | return True
53 | return False
54 |
55 |
56 | def follow_google_redirect(url):
57 | """
58 | Check if the given URL is a Google redirect (https://www.google.com/url?q=...). If it is, extract the q query
59 | parameter to find the real link
60 | :param url: The URL to check
61 | :type url: string
62 | :returns: Returns the real link if it's a redirect. Otherwise, it returns None
63 | :rtype: str or None
64 | """
65 | if is_google_redirect(url):
66 | parsed = parse_qs(urlparse(url).query)
67 | q = parsed.get('q')
68 | if isinstance(q, list):
69 | return q[0]
70 | return q
71 | return None
72 |
--------------------------------------------------------------------------------