├── .gitignore ├── LICENSE ├── MANIFEST.in ├── README.md ├── article.html ├── people_also_ask ├── __init__.py ├── data_collector.py ├── exceptions.py ├── google.py ├── parser.py ├── plugins │ ├── __init__.py │ └── article_generator │ │ ├── __init__.py │ │ ├── article_generators.py │ │ └── templates │ │ └── base.html ├── request │ ├── __init__.py │ └── session.py ├── tests │ ├── fixtures │ │ ├── cheetah_vs_lion.html │ │ ├── gangnam_style.html │ │ ├── how_to_make_a_cold_brew_coffee.html │ │ ├── the_10_highest-grossing_movies_of_all_time.html │ │ ├── what_are_3_basic_programming_languages.html │ │ ├── what_time_is_it.html │ │ ├── why_was_ho_chi_minh_a_hero.html │ │ └── world_university_rankings_2019.html │ ├── test_google.py │ ├── test_parser.py │ ├── where_is_france │ └── who_is_ho_chi_minh? └── tools.py └── setup.py /.gitignore: -------------------------------------------------------------------------------- 1 | *_pycache_* 2 | *.swp 3 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2020 LE Van Tuan 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | include plugins/article_generator/templates/*.html -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # People-also-ask Api 2 | 3 | [![Code style: black](https://img.shields.io/badge/code%20style-black-000000.svg)](https://github.com/python/black) 4 | [![PyPI](https://img.shields.io/pypi/v/people_also_ask.svg)](https://pypi.org/project/people-also-ask) 5 | [![versions](https://img.shields.io/pypi/pyversions/people_also_ask.svg)](https://github.com/lagranges/people_also_ask) 6 | 7 | People-also-ask provides APIs to easily crawl the data of google featured snippet. 8 | 9 | ## ⚠ Warning 10 | 11 | Search engines like Google do not allow any sort of automated access to their service but from a legal point of view there is no known case or broken law. Google does not take legal action against scraping, likely for self-protective reasons. 12 | API have been configured to not abuse google search engine. 13 | 14 | ## Installation 15 | 16 | ``` 17 | pip install people_also_ask 18 | ``` 19 | 20 | ## Usage 21 | 22 | Goal of ``people_also_ask`` is to provide simple and easy to use API for retrieving informations from Google Featured Snippet. 23 | 24 | ### Importing 25 | 26 | ```python 27 | import people_also_ask 28 | ``` 29 | 30 | ### How to get related questions 31 | 32 | ```python 33 | people_also_ask.get_related_questions("coffee") 34 | 35 | ['Is coffee good for your health?', 36 | 'Why is coffee bad for you?', 37 | 'Who invented coffee?', 38 | 'What do u know about coffee?'] 39 | ``` 40 | 41 | ### How to get more questions 42 | 43 | ```python 44 | people_also_ask.get_related_questions("coffee", 5) 45 | 46 | ['How did coffee originate?', 47 | 'Is coffee good for your health?', 48 | 'Who brought coffee America?', 49 | 'Who invented coffee?', 50 | 'Why is coffee bad for you?', 51 | 'Why is drinking coffee bad for you?'] 52 | ``` 53 | 54 | ### Generate unlimited questions 55 | 56 | ```python 57 | for question in people_also_ask.generate_related_questions("cofee") 58 | 59 | Why is coffee bad for you? 60 | Who invented coffee? 61 | Is coffee good for your health? 62 | Who brought coffee America? 63 | How did coffee originate? 64 | Why is drinking coffee bad for you? 65 | .... 66 | ``` 67 | 68 | ### Get answer for a question 69 | 70 | ```python 71 | people_also_ask.get_answer("Why is coffee bad for you?") 72 | 73 | {'has_answer': True, 74 | 'question': 'Why is coffee bad for you?', 75 | 'related_questions': ['Why is drinking coffee bad for you?', 76 | 'Is coffee good for your health?', 77 | 'Is coffee toxic to your body?', 78 | 'What does coffee do to your body?'], 79 | 'response': 'Consuming too much caffeine can lead to jitteriness, anxiety, heart palpitations and even exacerbated panic attacks (34). If you are sensitive to caffeine and tend to become overstimulated, you may want to avoid coffee altogether. Another unwanted side effect is that it can disrupt sleep ( 35 ).Aug 30, 2018', 80 | 'heading': 'Consuming too much caffeine can lead to jitteriness, anxiety, heart palpitations and even exacerbated panic attacks (34). If you are sensitive to caffeine and tend to become overstimulated, you may want to avoid coffee altogether. Another unwanted side effect is that it can disrupt sleep ( 35 ).Aug 30, 2018', 81 | 'title': 'Coffee — Good or Bad? - Healthline', 82 | 'link': 'https://www.healthline.com/nutrition/coffee-good-or-bad#:~:text=Consuming%20too%20much%20caffeine%20can,can%20disrupt%20sleep%20(%2035%20).', 83 | 'displayed_link': 'www.healthline.com › nutrition › coffee-good-or-bad', 84 | 'snippet_str': 'Consuming too much caffeine can lead to jitteriness, anxiety, heart palpitations and even exacerbated panic attacks (34). If you are sensitive to caffeine and tend to become overstimulated, you may want to avoid coffee altogether. Another unwanted side effect is that it can disrupt sleep ( 35 ).Aug 30, 2018\nwww.healthline.com › nutrition › coffee-good-or-bad\nhttps://www.healthline.com/nutrition/coffee-good-or-bad#:~:text=Consuming%20too%20much%20caffeine%20can,can%20disrupt%20sleep%20(%2035%20).\nCoffee — Good or Bad? - Healthline', 85 | 'snippet_data': None, 86 | 'date': None, 87 | 'snippet_type': 'Definition Featured Snippet', 88 | 'snippet_str_body': '', 89 | 'raw_text': 'Featured snippet from the web\nConsuming too much caffeine can lead to jitteriness, anxiety, heart palpitations and even exacerbated panic attacks (34). If \nyou\n are sensitive to caffeine and tend to become overstimulated, \n may want to avoid \ncoffee\n altogether. Another unwanted side effect is that it can disrupt sleep ( 35 ).\nAug 30, 2018\nCoffee — Good or Bad? - Healthline\nwww.healthline.com\n › nutrition › coffee-good-or-bad'} 90 | ``` 91 | 92 | ### Get Simple Answer for a question 93 | 94 | ```python 95 | people_also_ask.get_simple_answer("Why is coffee bad for you?") 96 | 97 | 'Consuming too much caffeine can lead to jitteriness, anxiety, heart palpitations and even exacerbated panic attacks (34). If you are sensitive to caffeine and tend to become overstimulated, you may want to avoid coffee altogether. Another unwanted side effect is that it can disrupt sleep ( 35 ).Aug 30, 2018' 98 | ``` 99 | 100 | ### Generate questions and answers relative to a subject 101 | 102 | ```python 103 | people_also_ask.generate_answer("coffee") 104 | ``` 105 | 106 | ### Using proxies 107 | 108 | ```python 109 | import people_also_ask.request.session 110 | 111 | people_also_ask.request.session.set_proxies( 112 | ( 113 | "http://1234.5.6.7:8080", 114 | "http://1237.5.6.7:8080", 115 | ) 116 | ) 117 | ``` 118 | 119 | ### Using google domain different than global 120 | 121 | Default domain is ".com", but it doesn't always show good PAA for a keyword in other language than English. 122 | 123 | Example of usage: 124 | 125 | ```python 126 | import people_also_ask 127 | 128 | keyword = "kawa" 129 | paa = people_also_ask.get_related_questions(text=keyword, max_nb_questions=10, domain="pl") 130 | print(paa) 131 | ``` 132 | Output: 133 | ```python 134 | ['Na co dobra jest kawa?', 'Jakie są wady picia kawy?', 'Kiedy kawa jest zdrowa?', 'Jakie są minusy picia kawy?', 'Na co wpływa kawa?', 'Na jakie choroby pomaga kawa?', 'Jaka kawa jest najlepsza?', 'Jakie skutki uboczne ma kawa?', 'Jaka kawa ma najlepszy smak?', 'Jaka kawa smakuje dobrze?', 'Na co pomaga picie kawy?'] 135 | ``` 136 | 137 | 138 | -------------------------------------------------------------------------------- /article.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | The role of ORM (Online Review Management) in digital marketing for your business 6 | 7 | 8 |

The role of ORM (Online Review Management) in digital marketing for your business

9 | Online reputation management (ORM) is about monitoring and improving how your business is viewed online. It means looking and analyzing what a potential customer, reporter, or partner will discover about your brand, your people, or your product/service when they perform a Google search. 10 | 24 |
25 |

What is the role of ORM in digital marketing?

26 |
27 | Online reputation management (ORM) is about monitoring and improving how your business is viewed online. It means looking and analyzing what a potential customer, reporter, or partner will discover about your brand, your people, or your product/service when they perform a Google search. 28 |
29 |

What is the role of ORM?

30 |
31 | Online reputation management (ORM) is about monitoring and improving how your business is viewed online. It means looking and analyzing what a potential customer, reporter, or partner will discover about your brand, your people, or your product/service when they perform a Google search. 32 |
33 |

Is ORM part of digital marketing?

34 |
35 | ORM in digital marketing assists businesses in establishing and maintaining a positive brand image on the internet. 36 |
37 |

What is the role of online reputation management?

38 |
39 | Online reputation management includes having accurate business listings and relevant advertisements, staying on top of engagement with consumers on your company's marketplace and social channels, understanding their pain points and responding to reviews and surveys.Feb 12, 2021 40 | 41 | 42 | -------------------------------------------------------------------------------- /people_also_ask/__init__.py: -------------------------------------------------------------------------------- 1 | #! /usr/bin/env python3 2 | from people_also_ask.google import ( 3 | get_answer, 4 | generate_answer, 5 | get_simple_answer, 6 | get_related_questions, 7 | generate_related_questions, 8 | ) 9 | -------------------------------------------------------------------------------- /people_also_ask/data_collector.py: -------------------------------------------------------------------------------- 1 | #! /usr/bin/env python3 2 | import time 3 | import json 4 | import argparse 5 | import traceback 6 | from collections import OrderedDict 7 | from people_also_ask.google import get_simple_answer 8 | from people_also_ask.exceptions import ( 9 | InvalidQuestionInputFileError, 10 | FailedToWriteOuputFileError, 11 | ) 12 | 13 | def parse_args(): 14 | parser = argparse.ArgumentParser() 15 | 16 | parser.add_argument("--input-file", "-i", help="input file which is a txt file containing list of questions", required=True) 17 | parser.add_argument("--output-file", "-o", help="output file which is .json file containing a dictionary of question: answer", required=True) 18 | 19 | return parser.parse_args() 20 | 21 | 22 | def read_questions(input_file): 23 | try: 24 | with open(input_file, "r") as fd: 25 | text = fd.read() 26 | return OrderedDict.fromkeys(text.strip().split("\n")).keys() 27 | except Exception: 28 | message = traceback.format_exc() 29 | raise InvalidQuestionInputFileError(input_file, message) 30 | 31 | def write_question_answers(output_file, data): 32 | try: 33 | with open(output_file, "w") as fd: 34 | fd.write(json.dumps(data)) 35 | except Exception: 36 | message = traceback.format_exc() 37 | raise FailedToWriteOuputFileError(output_file, message) 38 | 39 | 40 | def collect_one_question(question): 41 | try: 42 | answer = get_simple_answer(question) 43 | print(f"{question}: {answer}") 44 | except Exception: 45 | traceback.print_exc() 46 | answer = "" 47 | return {question: answer} 48 | 49 | 50 | def collect_data(input_file, output_file): 51 | questions = read_questions(input_file) 52 | data = {} 53 | 54 | counter = 0 55 | 56 | start_time = time.time() 57 | for question in questions: 58 | counter += 1 59 | print(f"COLLECTING {counter}/{len(questions)}") 60 | data.update(collect_one_question(question)) 61 | collect_time = (time.time() - start_time) / 60 # minutes 62 | 63 | print(f"Collected answers for {len(questions)} questions in {collect_time} minutes") 64 | write_question_answers(output_file, data) 65 | 66 | def main(): 67 | args = parse_args() 68 | collect_data(args.input_file, args.output_file) 69 | 70 | if __name__ == "__main__": 71 | main() 72 | -------------------------------------------------------------------------------- /people_also_ask/exceptions.py: -------------------------------------------------------------------------------- 1 | #! /usr/bin/env python3 2 | """ 3 | Global realted-questions exception and warning classes. 4 | """ 5 | 6 | 7 | GITHUB_LINK = "https://github.com/lagranges/people_also_ask" 8 | 9 | 10 | class RelatedQuestionError(Exception): 11 | """Base Related-Questions exception class.""" 12 | 13 | def __init__(self, error): 14 | self.error = error 15 | 16 | def __repr__(self): 17 | return ( 18 | f'An unkown error occured: {self.error}.' 19 | f' Please report it on {GITHUB_LINK}.' 20 | ) 21 | 22 | 23 | class FeaturedSnippetParserError(RelatedQuestionError): 24 | """ 25 | Exception raised when failed to get answer from 26 | search result page 27 | """ 28 | 29 | def __init__(self, text): 30 | self.keyword = text 31 | 32 | def __repr__(self): 33 | return ( 34 | f"Cannot parse result page of '{self.text}'." 35 | f" It mays due to a format change of result page." 36 | f' Please report it on {GITHUB_LINK}.' 37 | ) 38 | 39 | 40 | class RelatedQuestionParserError(RelatedQuestionError): 41 | """ 42 | Exception raised when failed to get related questions 43 | from search result page 44 | """ 45 | 46 | def __init__(self, text): 47 | self.keyword = text 48 | 49 | def __repr__(self): 50 | return ( 51 | f"Cannot parse result page of '{self.text}'." 52 | f" It mays due to a format change of result page." 53 | f' Please report it on {GITHUB_LINK}.' 54 | ) 55 | 56 | 57 | class GoogleSearchRequestFailedError(RelatedQuestionError): 58 | """Exception raised when failed to request search on google""" 59 | 60 | def __init__(self, url, keyword, message): 61 | self.url = url 62 | self.keyword = keyword 63 | self.message = message 64 | 65 | def __repr__(self): 66 | return ( 67 | f"Failed to requests {self.url}/{self.keyword}" 68 | f"\n{self.message}" 69 | ) 70 | 71 | 72 | class InvalidQuestionInputFileError(RelatedQuestionError): 73 | """Exception raised when user enter an invalid question input""" 74 | """ for data collector """ 75 | 76 | def __init__(self, input_file, message): 77 | self.input_file = input_file 78 | self.message = message 79 | 80 | def __repr__(self): 81 | return ( 82 | f"Invalid input file: {self.input_file}\n{self.message}" 83 | ) 84 | 85 | 86 | class FailedToWriteOuputFileError(RelatedQuestionError): 87 | """Exception raised when program fails to write data to """ 88 | """ output file for data colletor""" 89 | 90 | def __init__(self, output_file, message): 91 | self.output_file = output_file 92 | self.message = message 93 | 94 | def __repr__(self): 95 | return ( 96 | f"Cannot write to {self.output_file}\n{self.message}" 97 | ) 98 | 99 | 100 | class RequestError(RelatedQuestionError): 101 | """Exception raised when failed to request""" 102 | 103 | def __init__(self, url, params, proxies, message): 104 | self.url = url 105 | self.keyword = params 106 | self.proxies = proxies 107 | self.message = message 108 | 109 | def __repr__(self): 110 | return ( 111 | f"Failed to requests {self.url}" 112 | f"\nParams = {self.params}" 113 | f"\nProxy = {self.proxies}" 114 | f"\nResp = {self.message}" 115 | ) 116 | -------------------------------------------------------------------------------- /people_also_ask/google.py: -------------------------------------------------------------------------------- 1 | #! /usr/bin/env python3 2 | import sys 3 | from bs4 import BeautifulSoup 4 | from typing import List, Dict, Any, Optional, Generator 5 | 6 | from people_also_ask.parser import ( 7 | extract_related_questions, 8 | get_featured_snippet_parser, 9 | ) 10 | from people_also_ask.exceptions import ( 11 | RelatedQuestionParserError, 12 | FeaturedSnippetParserError 13 | ) 14 | from people_also_ask.request import get 15 | from people_also_ask.request.session import user_agent 16 | 17 | 18 | URL = "https://www.google.com/search" 19 | 20 | 21 | def search(keyword: str, url: str = URL) -> Optional[BeautifulSoup]: 22 | """return html parser of google search result""" 23 | browser = user_agent['browser'] 24 | params = {"client": browser, 25 | "q": keyword, 26 | "sourceid": browser, 27 | "ie": "UTF-8", 28 | "oe": "UTF-8"} 29 | 30 | response = get(url, params=params) 31 | 32 | return BeautifulSoup(response.text, "html.parser") 33 | 34 | 35 | def _get_related_questions(text: str, domain: str="com") -> List[str]: 36 | """ 37 | return a list of questions related to text. 38 | These questions are from search result of text 39 | 40 | :param str text: text to search 41 | :param str domain: specify google domain to improve searching in a native language 42 | """ 43 | 44 | url = f"https://www.google.{domain}/search" 45 | document = search(text, url=url) 46 | if not document: 47 | return [] 48 | try: 49 | return extract_related_questions(document) 50 | except Exception: 51 | raise RelatedQuestionParserError(text) 52 | 53 | 54 | def generate_related_questions(text: str, domain: str="com") -> Generator[str, None, None]: 55 | """ 56 | generate the questions related to text, 57 | these quetions are found recursively 58 | 59 | :param str text: text to search 60 | :param str domain: specify google domain to improve searching in a native language 61 | """ 62 | questions = set(_get_related_questions(text, domain=domain)) 63 | searched_text = set(text) 64 | while questions: 65 | text = questions.pop() 66 | yield text 67 | searched_text.add(text) 68 | questions |= set(_get_related_questions(text, domain=domain)) 69 | questions -= searched_text 70 | 71 | 72 | def get_related_questions(text: str, max_nb_questions: Optional[int] = None, domain: str="com"): 73 | """ 74 | return a number of questions related to text. 75 | These questions are found recursively. 76 | 77 | :param str text: text to search 78 | :param str domain: specify google domain to improve searching in a native language 79 | """ 80 | if max_nb_questions is None: 81 | return _get_related_questions(text, domain=domain) 82 | nb_question_regenerated = 0 83 | questions = [] 84 | for question in generate_related_questions(text, domain=domain): 85 | if len(set(questions)) >= max_nb_questions: 86 | break 87 | questions.append(question) 88 | nb_question_regenerated += 1 89 | 90 | return list(OrderedDict.fromkeys(questions)) 91 | return list(questions) 92 | 93 | 94 | def get_answer(question: str, domain: str="com") -> Dict[str, Any]: 95 | """ 96 | return a dictionary as answer for a question. 97 | 98 | :param str question: asked question 99 | :param str domain: specify google domain to improve searching in a native language 100 | """ 101 | 102 | url = f"https://www.google.{domain}/search" 103 | document = search(question, url=url) 104 | related_questions = extract_related_questions(document) 105 | featured_snippet = get_featured_snippet_parser( 106 | question, document) 107 | if not featured_snippet: 108 | res = dict( 109 | has_answer=False, 110 | question=question, 111 | related_questions=related_questions, 112 | ) 113 | else: 114 | res = dict( 115 | has_answer=True, 116 | question=question, 117 | related_questions=related_questions, 118 | ) 119 | try: 120 | res.update(featured_snippet.to_dict()) 121 | except Exception: 122 | raise FeaturedSnippetParserError(question) 123 | return res 124 | 125 | 126 | def generate_answer(text: str, domain: str="com", enhance_search=True) -> Generator[dict, None, None]: 127 | """ 128 | generate answers of questions related to text 129 | 130 | :param str text: text to search 131 | :param str domain: specify google domain to improve searching in a native language 132 | """ 133 | if enhance_search: 134 | tries = 0 135 | answer = {"link": False} 136 | 137 | while not answer["link"] and tries < 4: 138 | answer = get_answer(text, domain) 139 | tries += 1 140 | else: 141 | answer = get_answer(text, domain) 142 | 143 | questions = set(answer["related_questions"]) 144 | searched_text = set(text) 145 | if answer["has_answer"]: 146 | yield answer 147 | while questions: 148 | text = questions.pop() 149 | answer = get_answer(text, domain) 150 | if answer["has_answer"]: 151 | yield answer 152 | searched_text.add(text) 153 | questions |= set(get_answer(text, domain)["related_questions"]) 154 | questions -= searched_text 155 | 156 | 157 | def get_simple_answer(question: str, depth: bool = False, domain: str="com") -> str: 158 | """ 159 | return a text as summary answer for the question 160 | 161 | :param str question: asked quetion 162 | :param bool depth: return the answer of first related question 163 | if no answer found for question 164 | :param str domain: specify google domain to improve searching in a native language 165 | """ 166 | 167 | url = f"https://www.google.{domain}/search" 168 | document = search(question, url=url) 169 | featured_snippet = get_featured_snippet_parser( 170 | question, document) 171 | if featured_snippet: 172 | return featured_snippet.response 173 | if depth: 174 | related_questions = get_related_questions(question) 175 | if not related_questions: 176 | return "" 177 | return get_simple_answer(related_questions[0], domain) 178 | return "" 179 | 180 | 181 | if __name__ == "__main__": 182 | from pprint import pprint as print 183 | print(get_answer(sys.argv[1])) 184 | -------------------------------------------------------------------------------- /people_also_ask/parser.py: -------------------------------------------------------------------------------- 1 | #! /usr/bin/env python3 2 | from bs4.element import Tag 3 | from bs4 import BeautifulSoup 4 | from operator import attrgetter 5 | from typing import List, Optional 6 | from people_also_ask.tools import itemize, tabulate, remove_redundant 7 | 8 | 9 | FEATURED_SNIPPET_ATTRIBUTES = [ 10 | "response", "heading", "title", "link", "displayed_link", 11 | "snippet_str", "snippet_data", "date", "snippet_data", 12 | "snippet_type", "snippet_str_body", "raw_text" 13 | ] 14 | 15 | 16 | def extract_related_questions(document: BeautifulSoup) -> List[str]: 17 | related_questions = document.find_all("div", class_="related-question-pair") 18 | if not related_questions: 19 | return [] 20 | extract_question = lambda a: a.text.split('Search for:')[0] 21 | return list(map(extract_question, related_questions)) 22 | 23 | 24 | def is_ol_but_not_a_menu(tag): 25 | return ( 26 | tag.name == "ol" 27 | and ( 28 | not tag.has_attr("role") 29 | or (tag.has_attr("role") and tag["role"] != "menu") 30 | ) 31 | ) 32 | 33 | 34 | def get_tag_heading(tag): 35 | return ( 36 | tag.find("div", {"role": "heading", "aria-level": "3"}) 37 | or tag.find("div", {"role": "heading"}) 38 | ) 39 | 40 | 41 | def has_youtube_link(tag): 42 | youtube_links = tag.findAll( 43 | lambda x: x.name == "a" and "youtube" in x.get("href", "") 44 | ) 45 | return bool(youtube_links) 46 | 47 | 48 | def get_raw_text(tag): 49 | return "\n".join(remove_redundant(tag.strings)) 50 | 51 | 52 | def get_span_text(tag): 53 | return "\n".join( 54 | remove_redundant( 55 | [e.text for e in tag.findAll("span") if e.text] 56 | ) 57 | ) 58 | 59 | 60 | class FeaturedSnippetParser(object): 61 | 62 | def __init__(self, text: str, tag: Tag): 63 | self.text = text 64 | self.tag = tag 65 | 66 | def __getattr__(self, attr): 67 | if attr in FEATURED_SNIPPET_ATTRIBUTES: 68 | return None 69 | raise AttributeError(f'{self.__class__.__name__}.{attr} is invalid.') 70 | 71 | @property 72 | def raw_text(self): 73 | return get_raw_text(self.tag) 74 | 75 | def to_dict(self): 76 | return { 77 | attr: getattr(self, attr) for attr in FEATURED_SNIPPET_ATTRIBUTES 78 | } 79 | 80 | 81 | class SimpleFeaturedSnippetParser(FeaturedSnippetParser): 82 | 83 | @classmethod 84 | def get_instance(self, text, tag): 85 | if tag.table is not None: 86 | return TableFeaturedSnippetParser(text, tag) 87 | if tag.findAll(is_ol_but_not_a_menu): 88 | return OrderedFeaturedSnippetParser(text, tag) 89 | if tag.ul is not None: 90 | return UnorderedFeaturedSnippetParser(text, tag) 91 | if get_tag_heading(tag): 92 | return DefinitionFeaturedSnippetParser(text, tag) 93 | if has_youtube_link(tag): 94 | return YoutubeFeaturedSnippetParser(text, tag) 95 | 96 | @property 97 | def tag_link(self): 98 | if hasattr(self, "_tag_link"): 99 | return self._tag_link 100 | self._tag_link = self.tag.find( 101 | lambda tag: ( 102 | tag.name == "a" 103 | and tag.has_attr("href") 104 | and tag["href"].startswith("http") 105 | and (tag.h3 or tag.h2) is not None 106 | ) 107 | ) 108 | return self._tag_link 109 | 110 | @property 111 | def link(self): 112 | return self.tag_link["href"] if self.tag_link else None 113 | 114 | @property 115 | def displayed_link(self): 116 | return self.tag.cite.text if self.tag.cite else None 117 | 118 | @property 119 | def title(self): 120 | if self.tag_link is None: 121 | return None 122 | tag_title = self.tag_link.h3 or self.tag_link.h2 123 | return tag_title.text 124 | 125 | @property 126 | def heading(self): 127 | tag_heading = get_tag_heading(self.tag) 128 | return tag_heading.text 129 | 130 | @property 131 | def snippet_str(self): 132 | lines = [] 133 | for field in ( 134 | "heading", "snippet_str_body", 135 | "displayed_link", "link", "title" 136 | ): 137 | if getattr(self, field): 138 | lines.append(getattr(self, field)) 139 | return "\n".join(lines) 140 | 141 | @property 142 | def date(self): 143 | return None 144 | 145 | @property 146 | def snippet_data(self): 147 | return None 148 | 149 | @property 150 | def snippet_type(self): 151 | return "Unknown Featured Snippet" 152 | 153 | @property 154 | def snippet_str_body(self): 155 | return "" 156 | 157 | 158 | class TableFeaturedSnippetParser(SimpleFeaturedSnippetParser): 159 | """Example: world university rankings 2019""" 160 | 161 | @property 162 | def snippet_type(self): 163 | return "Table Featured Snippet" 164 | 165 | @property 166 | def snippet_str_body(self): 167 | header = self.snippet_data["columns"] 168 | table = self.snippet_data["values"] 169 | return tabulate(header=header, table=table) 170 | 171 | @property 172 | def response(self): 173 | return self.snippet_str_body 174 | 175 | @property 176 | def snippet_data(self): 177 | table_tag = self.tag.find("table") 178 | tr_tags = table_tag.findAll("tr") 179 | if tr_tags[0].find("th"): 180 | columns = [ 181 | th_tag.text for th_tag in tr_tags[0].findAll("th") 182 | ] 183 | body_table_tags = tr_tags[1:] 184 | else: 185 | columns = None 186 | body_table_tags = tr_tags 187 | values = [ 188 | [td_tag.text for td_tag in tr_tag.findAll("td")] 189 | for tr_tag in body_table_tags 190 | ] 191 | if columns is None: 192 | columns = list(range(len(values[0]))) 193 | return { 194 | "columns": columns, 195 | "values": values 196 | } 197 | 198 | 199 | class OrderedFeaturedSnippetParser(SimpleFeaturedSnippetParser): 200 | """Example: top grossing movies""" 201 | 202 | @property 203 | def snippet_type(self): 204 | return "Ordered Featured Snippet" 205 | 206 | @property 207 | def response(self): 208 | return self.snippet_str_body 209 | 210 | @property 211 | def snippet_str_body(self): 212 | return "\n".join(itemize(self.snippet_data)) 213 | 214 | @property 215 | def snippet_data(self): 216 | ol_tags = self.tag.find("ol") 217 | li_tags = ol_tags.findAll("li") 218 | return [tag.text for tag in li_tags] 219 | 220 | 221 | class UnorderedFeaturedSnippetParser(SimpleFeaturedSnippetParser): 222 | """ What are 3 basic programming languages? """ 223 | 224 | @property 225 | def snippet_type(self): 226 | return "Unordered Featured Snippet" 227 | 228 | @property 229 | def snippet_str_body(self): 230 | return "\n".join(itemize(self.snippet_data)) 231 | 232 | @property 233 | def response(self): 234 | return self.snippet_str_body 235 | 236 | @property 237 | def snippet_data(self): 238 | ul_tag = self.tag.find("ul") 239 | li_tags = ul_tag.findAll("li") 240 | return [tag.text for tag in li_tags] 241 | 242 | 243 | class DefinitionFeaturedSnippetParser(SimpleFeaturedSnippetParser): 244 | """Why was ho chi minh a hero""" 245 | 246 | @property 247 | def snippet_type(self): 248 | return "Definition Featured Snippet" 249 | 250 | @property 251 | def response(self): 252 | return self.heading 253 | 254 | 255 | class YoutubeFeaturedSnippetParser(SimpleFeaturedSnippetParser): 256 | """Ex: cheetah vs lion""" 257 | 258 | @property 259 | def snippet_type(self): 260 | return "Youtube Featured Snippet" 261 | 262 | @property 263 | def heading(self): 264 | return "" 265 | 266 | @property 267 | def response(self): 268 | return self.link 269 | 270 | 271 | class MultipleCardsFeaturedSnippetTag(FeaturedSnippetParser): 272 | """How to make a cold brew coffee""" 273 | 274 | @property 275 | def heading(self): 276 | tag_heading = ( 277 | self.tag.find("h3", {"role": "heading"}) 278 | or self.tag.find("h2", {"role": "heading"}) 279 | ) 280 | return tag_heading.text 281 | 282 | @property 283 | def snippet_type(self): 284 | return "Multiple Cards Featured Snippet Tag" 285 | 286 | def parse_card(self, tag_card): 287 | return { 288 | "heading": tag_card.find("div", {"role": "heading"}).text, 289 | "title": tag_card.cite.text, 290 | "link": tag_card.find('a', attrs={'data-jsarwt': True})['href'], 291 | "raw_text": get_raw_text(tag_card), 292 | } 293 | 294 | def str_card(self, card_data): 295 | lines = [card_data["raw_text"]] 296 | lines.append(f"Link: {card_data['link']}") 297 | return "\n".join(lines) 298 | 299 | @property 300 | def snippet_str(self): 301 | if not self.snippet_data: 302 | return "" 303 | return "\n-------------\n".join(map(self.str_card, self.snippet_data)) 304 | 305 | @property 306 | def snippet_data(self): 307 | return list(map(self.parse_card, self.tag.findAll("g-inner-card"))) 308 | 309 | @property 310 | def response(self): 311 | return self.snippet_str 312 | 313 | 314 | class SingleCardFeaturedSnippetParser(FeaturedSnippetParser): 315 | """What time is it""" 316 | 317 | @property 318 | def snippet_type(self): 319 | return "Single Card FeaturedSnippet" 320 | 321 | @property 322 | def heading(self): 323 | tag_heading = get_tag_heading(self.tag) 324 | return get_raw_text(tag_heading) 325 | 326 | @property 327 | def response(self): 328 | heading = self.heading 329 | if heading: 330 | return heading 331 | return self.raw_text 332 | 333 | @property 334 | def raw_text(self): 335 | return get_span_text(self.tag) 336 | 337 | 338 | class WholePageTabContainer(FeaturedSnippetParser): 339 | """Gangnam Style""" 340 | 341 | @property 342 | def snippet_type(self): 343 | return "Whole Page Tab Container" 344 | 345 | @property 346 | def tag_link(self): 347 | if hasattr(self, "_tag_link"): 348 | return self._tag_link 349 | self._tag_link = self.tag.find( 350 | lambda tag: ( 351 | tag.name == "a" 352 | and tag.has_attr("href") 353 | and tag["href"].startswith("http") 354 | and (tag.h3 or tag.h2) is not None 355 | ) 356 | ) 357 | return self._tag_link 358 | 359 | @property 360 | def link(self): 361 | return self.tag_link["href"] if self.tag_link else None 362 | 363 | @property 364 | def displayed_link(self): 365 | return self.tag.cite.text if self.tag.cite else None 366 | 367 | @property 368 | def title(self): 369 | if self.tag_link is None: 370 | return None 371 | tag_title = self.tag_link.h3 or self.tag_link.h2 372 | return tag_title.text 373 | 374 | @property 375 | def response(self): 376 | return self.raw_text 377 | 378 | @property 379 | def raw_text(self): 380 | return get_span_text(self.tag) 381 | 382 | 383 | def is_simple_featured_snippet_tag(tag): 384 | class_tuple = tuple(tag.get("class", "")) 385 | is_xpdopen = (tag.name == "div" and class_tuple == ("xpdopen",)) 386 | if not is_xpdopen: 387 | return False 388 | is_xpdopen_of_related_questions = ( 389 | tag.h2 is not None and tag.h2.text == "People also ask" 390 | ) 391 | return not is_xpdopen_of_related_questions 392 | 393 | 394 | def is_single_card_featured_snippet_tag(tag): 395 | is_card_section = ( 396 | tag.name == "div" and "card-section" in tag.get("class", []) 397 | ) 398 | if not is_card_section: 399 | return False 400 | is_card_section_of_tip = tag.text.startswith("Tip:") 401 | return not is_card_section_of_tip 402 | 403 | 404 | def is_multiple_card_snippet_tag(tag): 405 | return (tag.name == "g-section-with-header") 406 | 407 | 408 | def is_whole_page_tabs_container(tag): 409 | return (tag.get("id") == "wp-tabs-container") 410 | 411 | 412 | def is_web_results(tag): 413 | return (tag.name == "h2" and tag.text == "Web results") 414 | 415 | 416 | def get_featured_snippet_tag(document): 417 | 418 | def lookup_featured_snippet_tag(tag): 419 | return ( 420 | is_simple_featured_snippet_tag(tag) 421 | or is_single_card_featured_snippet_tag(tag) 422 | or is_multiple_card_snippet_tag(tag) 423 | or is_web_results(tag) 424 | ) 425 | whole_page_tag = document.find(is_whole_page_tabs_container) 426 | tag = document.find(lookup_featured_snippet_tag) 427 | if tag and is_simple_featured_snippet_tag(tag): 428 | return tag 429 | if whole_page_tag: 430 | return whole_page_tag 431 | if not tag or tag.name == "h2": 432 | return None 433 | return tag 434 | 435 | 436 | def get_featured_snippet_parser(question, document: BeautifulSoup): 437 | tag = get_featured_snippet_tag(document) 438 | if tag is None: 439 | return 440 | if is_simple_featured_snippet_tag(tag): 441 | return SimpleFeaturedSnippetParser.get_instance(question, tag) 442 | if is_multiple_card_snippet_tag(tag): 443 | return MultipleCardsFeaturedSnippetTag(question, tag) 444 | if is_single_card_featured_snippet_tag(tag): 445 | return SingleCardFeaturedSnippetParser(question, tag) 446 | if is_whole_page_tabs_container(tag): 447 | return WholePageTabContainer(question, tag) 448 | -------------------------------------------------------------------------------- /people_also_ask/plugins/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lagranges/people_also_ask/4e14287125ff3593b0814db718af7b97afe73d89/people_also_ask/plugins/__init__.py -------------------------------------------------------------------------------- /people_also_ask/plugins/article_generator/__init__.py: -------------------------------------------------------------------------------- 1 | from .article_generators import generate_article 2 | 3 | 4 | __all__ = [ 5 | "generate_article" 6 | ] -------------------------------------------------------------------------------- /people_also_ask/plugins/article_generator/article_generators.py: -------------------------------------------------------------------------------- 1 | import people_also_ask as paa 2 | from pathlib import Path 3 | from jinja2 import Environment, FileSystemLoader 4 | 5 | 6 | NB_QUESTION = 10 7 | 8 | 9 | def generate_article(title: str): 10 | questions = paa.get_related_questions(title, max_nb_questions=NB_QUESTION) 11 | 12 | introduction = paa.get_simple_answer(title) 13 | 14 | contents = {} 15 | for question in questions: 16 | contents[question] = paa.get_simple_answer(question) 17 | 18 | file_loader = FileSystemLoader('templates') 19 | env = Environment(loader=file_loader) 20 | 21 | cur_dir = Path(__file__).parent 22 | template_path = cur_dir / "templates" / "base.html" 23 | template = env.from_string(template_path.read_text()) 24 | 25 | output = template.render( 26 | title=title, 27 | introduction=introduction, 28 | contents=contents, 29 | get_question_id=lambda x: x.replace(" ", "_") 30 | ) 31 | with open("article.html", "w") as fd: 32 | fd.write(output) -------------------------------------------------------------------------------- /people_also_ask/plugins/article_generator/templates/base.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | {{ title }} 6 | 7 | 8 |

{{ title }}

9 | {{ introduction}} 10 | 17 | {% for question, answer in contents.items() -%} 18 |
19 |

{{ question }}

20 |
21 | {{ answer }} 22 | {% endfor %} 23 | 24 | -------------------------------------------------------------------------------- /people_also_ask/request/__init__.py: -------------------------------------------------------------------------------- 1 | from .session import get 2 | 3 | 4 | __all__ = ["get", "user_agent"] 5 | -------------------------------------------------------------------------------- /people_also_ask/request/session.py: -------------------------------------------------------------------------------- 1 | import os 2 | import logging 3 | import requests 4 | import traceback 5 | from fake_useragent import UserAgent 6 | 7 | from people_also_ask.tools import retryable 8 | from itertools import cycle 9 | from typing import Optional 10 | from people_also_ask.tools import CallingSemaphore 11 | from people_also_ask.exceptions import RequestError 12 | 13 | from requests import Session as _Session 14 | 15 | 16 | SESSION = _Session() 17 | NB_TIMES_RETRY = os.environ.get( 18 | "RELATED_QUESTION_NB_TIMES_RETRY", 3 19 | ) 20 | NB_REQUESTS_LIMIT = os.environ.get( 21 | "RELATED_QUESTION_NB_REQUESTS_LIMIT", 25 22 | ) 23 | NB_REQUESTS_DURATION_LIMIT = os.environ.get( 24 | "RELATED_QUESTION_NB_REQUESTS_DURATION_LIMIT", 60 # seconds 25 | ) 26 | logging.basicConfig() 27 | semaphore = CallingSemaphore( 28 | NB_REQUESTS_LIMIT, NB_REQUESTS_DURATION_LIMIT 29 | ) 30 | 31 | ua = UserAgent() 32 | user_agent = ua.getRandom 33 | 34 | HEADERS = { 35 | 'User-Agent': user_agent['useragent'] 36 | } 37 | 38 | logger = logging.getLogger(__name__) 39 | 40 | 41 | class ProxyGeneator: 42 | 43 | def __init__(self, proxies: Optional[tuple]): 44 | self.proxies = proxies 45 | 46 | @property 47 | def iter_proxy(self): 48 | if not self.proxies: 49 | raise ValueError("No proxy found") 50 | if getattr(self, "_iter_proxy", None) is None: 51 | self._iter_proxy = cycle(self.proxies) 52 | return self._iter_proxy 53 | 54 | def get(self) -> dict: 55 | if not self.proxies: 56 | return {} 57 | proxy = next(self.iter_proxy) 58 | if not proxy.startswith("https"): 59 | proxy = f"http://{proxy}" 60 | return { 61 | "https": proxy 62 | } 63 | 64 | 65 | def _load_proxies() -> Optional[tuple]: 66 | filepath = os.getenv("PAA_PROXY_FILE") 67 | if filepath: 68 | with open(filepath, "w") as fd: 69 | proxies = [e.strip() for e in fd.read().splitlines() if e.strip()] 70 | else: 71 | proxies = None 72 | return proxies 73 | 74 | 75 | def set_proxies(proxies: Optional[tuple]) -> ProxyGeneator: 76 | global PROXY_GENERATORS 77 | PROXY_GENERATORS = ProxyGeneator(proxies=proxies) 78 | 79 | 80 | set_proxies(proxies=_load_proxies()) 81 | 82 | 83 | @retryable(NB_TIMES_RETRY) 84 | def get(url: str, params) -> requests.Response: 85 | proxies = PROXY_GENERATORS.get() 86 | try: 87 | with semaphore: 88 | response = SESSION.get( 89 | url, 90 | params=params, 91 | headers=HEADERS, 92 | proxies=proxies, 93 | ) 94 | except Exception: 95 | raise RequestError( 96 | url, params, proxies, traceback.format_exc() 97 | ) 98 | if response.status_code != 200: 99 | raise RequestError( 100 | url, params, proxies, response.text 101 | ) 102 | return response 103 | -------------------------------------------------------------------------------- /people_also_ask/tests/test_google.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | from people_also_ask import google 3 | 4 | 5 | config = dict( 6 | test_get_answer=dict( 7 | text="Who is Ho Chi Minh?" 8 | ), 9 | test_get_related_questions=dict( 10 | text="where is france" 11 | ) 12 | ) 13 | 14 | 15 | def test_get_answer(): 16 | answer = google.get_answer(config["test_get_answer"]["text"]) 17 | assert "response" in answer 18 | 19 | 20 | def test_get_related_questions(): 21 | related_questions = google.get_related_questions( 22 | config["test_get_related_questions"]["text"] 23 | ) 24 | assert len(related_questions) > 0 25 | 26 | -------------------------------------------------------------------------------- /people_also_ask/tests/test_parser.py: -------------------------------------------------------------------------------- 1 | import os 2 | import unittest 3 | from bs4 import BeautifulSoup 4 | from people_also_ask.parser import ( 5 | get_featured_snippet_parser, 6 | WholePageTabContainer, 7 | TableFeaturedSnippetParser, 8 | YoutubeFeaturedSnippetParser, 9 | OrderedFeaturedSnippetParser, 10 | UnorderedFeaturedSnippetParser, 11 | DefinitionFeaturedSnippetParser, 12 | MultipleCardsFeaturedSnippetTag, 13 | SingleCardFeaturedSnippetParser, 14 | ) 15 | 16 | 17 | HTMLS_PARSER = { 18 | "cheetah_vs_lion.html": YoutubeFeaturedSnippetParser, 19 | "gangnam_style.html": WholePageTabContainer, 20 | "how_to_make_a_cold_brew_coffee.html": MultipleCardsFeaturedSnippetTag, 21 | "the_10_highest-grossing_movies_of_all_time.html": ( 22 | OrderedFeaturedSnippetParser 23 | ), 24 | "what_are_3_basic_programming_languages.html": ( 25 | UnorderedFeaturedSnippetParser 26 | ), 27 | "what_time_is_it.html": SingleCardFeaturedSnippetParser, 28 | "why_was_ho_chi_minh_a_hero.html": DefinitionFeaturedSnippetParser, 29 | "world_university_rankings_2019.html": TableFeaturedSnippetParser 30 | } 31 | FIXTURES_DIR = os.path.join( 32 | os.path.dirname(__file__), 33 | "fixtures" 34 | ) 35 | 36 | 37 | class TestParser(unittest.TestCase): 38 | 39 | def test_parsers(self): 40 | for html_filename, Parser in HTMLS_PARSER.items(): 41 | html_file = os.path.join(FIXTURES_DIR, html_filename) 42 | with open(html_file, "r") as fd: 43 | document = BeautifulSoup(fd.read(), "html.parser") 44 | question, _ = html_filename.split(".") 45 | question.replace("_", " ") 46 | parser = get_featured_snippet_parser(question, document) 47 | self.assertIsInstance(parser, Parser) 48 | self.assertIsNotNone(parser.response) 49 | 50 | 51 | if __name__ == "__main__": 52 | unittest.main() 53 | -------------------------------------------------------------------------------- /people_also_ask/tools.py: -------------------------------------------------------------------------------- 1 | #! /usr/bin/env python3 2 | import time 3 | import random 4 | import traceback 5 | from contextlib import ContextDecorator 6 | from typing import Callable, List 7 | from people_also_ask.exceptions import FeaturedSnippetParserError 8 | 9 | 10 | def raise_featuredsnippetparsererror_if_failed(func): 11 | def wrapper(self: "SimpleFeaturedSnippetParser", *args, **kwargs): 12 | try: 13 | return func(self, *args, **kwargs) 14 | except Exception: 15 | traceback.print_exc() 16 | raise FeaturedSnippetParserError(self.text) 17 | return wrapper 18 | 19 | 20 | def retryable(nb_times_retry): 21 | 22 | def decorator(func: Callable): 23 | 24 | def wrapper(*args, **kwargs): 25 | for _ in range(nb_times_retry-1): 26 | try: 27 | return func(*args, **kwargs) 28 | except Exception: 29 | pass 30 | return func(*args, **kwargs) 31 | 32 | return wrapper 33 | return decorator 34 | 35 | 36 | def itemize(lines: List[str]) -> List[str]: 37 | return ["\t- " + line for line in lines] 38 | 39 | 40 | def tabulate(header, table): 41 | length_columns = [] 42 | if header: 43 | table = [header] + table 44 | length_columns = [len(str(e)) for e in header] 45 | for row in table: 46 | current_lengh = [len(str(e)) for e in row] 47 | length_columns = [ 48 | max(i, j) for i, j in zip(length_columns, current_lengh) 49 | ] 50 | tabulated_rows = [] 51 | for row in table: 52 | tabulated_rows.append("\t".join([ 53 | str(e).rjust(length, " ") for e, length in zip(row, length_columns) 54 | ])) 55 | if header: 56 | tabulated_rows.insert( 57 | 1, 58 | "\t".join(["-"*length for length in length_columns]) 59 | ) 60 | return "\n".join(tabulated_rows) 61 | 62 | 63 | def remove_redundant(elements): return list(dict.fromkeys(elements)) 64 | 65 | 66 | class CallingSemaphore(ContextDecorator): 67 | 68 | def __init__(self, nb_call_times_limit, expired_time): 69 | self.nb_call_times_limit = nb_call_times_limit 70 | self.expired_time = expired_time 71 | self.called_timestamps = list() 72 | 73 | def __enter__(self): 74 | while len(self.called_timestamps) > self.nb_call_times_limit: 75 | now = time.time() 76 | self.called_timestamps = list(filter( 77 | lambda x: now - x < self.expired_time, 78 | self.called_timestamps 79 | )) 80 | time.sleep(random.random() * 2) 81 | self.called_timestamps.append(time.time()) 82 | 83 | def __exit__(self, *exc): 84 | pass 85 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | import codecs 3 | import os 4 | import setuptools 5 | 6 | 7 | def local_file(file): 8 | return codecs.open( 9 | os.path.join(os.path.dirname(__file__), file), 'r', 'utf-8' 10 | ) 11 | 12 | setuptools.setup( 13 | name="people_also_ask", 14 | version="1.1.0", 15 | author="LE Van Tuan", 16 | author_email="leavantuan2312@gmail.com", 17 | packages=setuptools.find_packages(), 18 | long_description=local_file('README.md').read(), 19 | long_description_content_type="text/markdown", 20 | url="https://github.com/lagranges/people_also_ask", 21 | classifiers=[ 22 | "Topic :: Software Development :: Libraries :: Python Modules", 23 | "Topic :: Utilities", 24 | "Development Status :: 5 - Production/Stable", 25 | "Operating System :: MacOS", 26 | "Operating System :: Microsoft", 27 | "Programming Language :: Python :: 3.6", 28 | "Programming Language :: Python :: 3.7", 29 | "Programming Language :: Python :: 3.8", 30 | "Typing :: Typed", 31 | ], 32 | install_requires=[ 33 | "beautifulsoup4", 34 | "requests", 35 | "jinja2", 36 | "fake-useragent" 37 | ], 38 | python_requires=">=3.6" 39 | ) 40 | --------------------------------------------------------------------------------