├── requirements.txt ├── requirements-dev.txt ├── .gitignore ├── src └── trendspy │ ├── constants.py │ ├── __init__.py │ ├── trend_list.py │ ├── news_article.py │ ├── utils.py │ ├── hierarchical_search.py │ ├── converter.py │ ├── timeframe_utils.py │ ├── trend_keyword.py │ └── client.py ├── LICENSE ├── pyproject.toml ├── tests └── timeframe_utils_test.py └── README.md /requirements.txt: -------------------------------------------------------------------------------- 1 | requests>=2.25.0 2 | pandas>=1.2.0 3 | numpy>=1.19.0 4 | python-dateutil>=2.8.0 -------------------------------------------------------------------------------- /requirements-dev.txt: -------------------------------------------------------------------------------- 1 | # Install base requirements 2 | -r requirements.txt 3 | 4 | # Development tools 5 | build>=0.10.0 6 | twine>=4.0.0 -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Python 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | *.so 6 | .Python 7 | build/ 8 | develop-eggs/ 9 | dist/ 10 | downloads/ 11 | eggs/ 12 | .eggs/ 13 | lib/ 14 | lib64/ 15 | parts/ 16 | sdist/ 17 | var/ 18 | wheels/ 19 | *.egg-info/ 20 | .installed.cfg 21 | *.egg 22 | 23 | # Environments 24 | .env 25 | .venv 26 | env/ 27 | venv/ 28 | ENV/ 29 | 30 | # IDE 31 | .idea/ 32 | .vscode/ 33 | *.swp 34 | *.swo 35 | 36 | # Jupyter Notebook 37 | .ipynb_checkpoints 38 | 39 | # Testing 40 | .coverage 41 | .pytest_cache/ 42 | htmlcov/ -------------------------------------------------------------------------------- /src/trendspy/constants.py: -------------------------------------------------------------------------------- 1 | TREND_TOPICS = { 2 | 1: "Autos and Vehicles", 3 | 2: "Beauty and Fashion", 4 | 3: "Business and Finance", 5 | 20: "Climate", 6 | 4: "Entertainment", 7 | 5: "Food and Drink", 8 | 6: "Games", 9 | 7: "Health", 10 | 8: "Hobbies and Leisure", 11 | 9: "Jobs and Education", 12 | 10: "Law and Government", 13 | 11: "Other", 14 | 13: "Pets and Animals", 15 | 14: "Politics", 16 | 15: "Science", 17 | 16: "Shopping", 18 | 17: "Sports", 19 | 18: "Technology", 20 | 19: "Travel and Transportation" 21 | } -------------------------------------------------------------------------------- /src/trendspy/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | TrendsPy - A Python library for working with Google Trends. 3 | 4 | This library provides a simple and convenient interface for accessing Google Trends data, 5 | allowing you to analyze search trends, get real-time trending topics, and track interest 6 | over time and regions. 7 | 8 | Main components: 9 | - Trends: Main client class for accessing Google Trends data 10 | - BatchPeriod: Enum for specifying time periods in batch operations 11 | - TrendKeyword: Class representing a trending search term with metadata 12 | - NewsArticle: Class representing news articles related to trends 13 | 14 | Project links: 15 | Homepage: https://github.com/sdil87/trendspy 16 | Repository: https://github.com/sdil87/trendspy.git 17 | Issues: https://github.com/sdil87/trendspy/issues 18 | """ 19 | 20 | from .client import Trends, BatchPeriod 21 | from .trend_keyword import TrendKeyword, TrendKeywordLite 22 | from .news_article import NewsArticle 23 | 24 | __version__ = "0.1.6" 25 | __all__ = ['Trends', 'BatchPeriod', 'TrendKeyword', 'TrendKeywordLite', 'NewsArticle', 'TrendList'] -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2024 SDil 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [build-system] 2 | requires = ["hatchling"] 3 | build-backend = "hatchling.build" 4 | 5 | [project] 6 | name = "trendspy" 7 | version = "0.1.6" 8 | description = "A Python library for accessing Google Trends data" 9 | readme = "README.md" 10 | requires-python = ">=3.7" 11 | license = {file = "LICENSE"} 12 | authors = [ 13 | {name = "SDil"}, 14 | ] 15 | dependencies = [ 16 | "requests>=2.25.0", 17 | "pandas>=1.2.0", 18 | "numpy>=1.19.0", 19 | "python-dateutil>=2.8.0" 20 | ] 21 | classifiers = [ 22 | "Development Status :: 4 - Beta", 23 | "Intended Audience :: Developers", 24 | "Intended Audience :: Science/Research", 25 | "License :: OSI Approved :: MIT License", 26 | "Operating System :: OS Independent", 27 | "Programming Language :: Python :: 3", 28 | "Programming Language :: Python :: 3.7", 29 | "Programming Language :: Python :: 3.8", 30 | "Programming Language :: Python :: 3.9", 31 | "Programming Language :: Python :: 3.10", 32 | "Programming Language :: Python :: 3.11", 33 | "Topic :: Internet", 34 | "Topic :: Scientific/Engineering :: Information Analysis", 35 | "Topic :: Software Development :: Libraries :: Python Modules" 36 | ] 37 | keywords = ["google-trends", "trends", "analytics", "data-analysis"] 38 | 39 | [project.urls] 40 | Homepage = "https://github.com/sdil87/trendspy" 41 | Repository = "https://github.com/sdil87/trendspy.git" 42 | Issues = "https://github.com/sdil87/trendspy/issues" 43 | 44 | [tool.hatch.build.targets.wheel] 45 | packages = ["src/trendspy"] -------------------------------------------------------------------------------- /tests/timeframe_utils_test.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | from datetime import datetime, timedelta 3 | from trendspy.timeframe_utils import * 4 | from trendspy.timeframe_utils import _is_valid_date, _is_valid_format, _extract_time_parts, _decode_trend_datetime 5 | # Тесты 6 | def test_is_valid_date(): 7 | assert _is_valid_date('2024-09-13') is True 8 | assert _is_valid_date('2024-09-13T22') is True 9 | assert _is_valid_date('2024/09/13') is False 10 | assert _is_valid_date('invalid') is False 11 | 12 | def test_is_valid_format(): 13 | assert _is_valid_format('1-H') is True 14 | assert _is_valid_format('5-y') is True 15 | assert _is_valid_format('10-m') is True 16 | assert _is_valid_format('invalid') is False 17 | 18 | def test_extract_time_parts(): 19 | assert _extract_time_parts('5-H') == (5, 'H') 20 | assert _extract_time_parts('10-d') == (10, 'd') 21 | assert _extract_time_parts('invalid') is None 22 | 23 | def test_decode_trend_datetime(): 24 | assert _decode_trend_datetime('2024-09-13T22') == datetime(2024, 9, 13, 22) 25 | assert _decode_trend_datetime('2024-09-13') == datetime(2024, 9, 13) 26 | 27 | def test_convert_timeframe(): 28 | assert convert_timeframe('now 1-H') == 'now 1-H' 29 | assert convert_timeframe('2024-09-12T23 5-H') == '2024-09-12T18 2024-09-12T23' 30 | assert convert_timeframe('2024-09-12T23 1-d') == '2024-09-11T23 2024-09-12T23' 31 | assert convert_timeframe('2024-09-12 1-y') == '2023-09-12 2024-09-12' 32 | assert convert_timeframe('2024-09-12T23 2024-09-13') == '2024-09-12T23 2024-09-14T00' 33 | assert convert_timeframe('2024-09-12 2024-09-13T12') == '2024-09-12T00 2024-09-13T12' 34 | with pytest.raises(ValueError): 35 | convert_timeframe('2024-09-12T23 invalid') 36 | with pytest.raises(ValueError): 37 | convert_timeframe('2024-09-12T23 8-d') 38 | with pytest.raises(ValueError): 39 | convert_timeframe('2024-09-12T23 all') 40 | 41 | def test_month_diff(): 42 | assert convert_timeframe('2024-09-12 1-m') == '2024-08-13 2024-09-12' 43 | 44 | 45 | def test_convert_timeframe_range(): 46 | assert timeframe_to_timedelta('now 1-H') == timedelta(seconds=60*60) 47 | assert timeframe_to_timedelta('now 5-H') == timedelta(seconds=5*60*60) 48 | 49 | if __name__ == "__main__": 50 | pytest.main() -------------------------------------------------------------------------------- /src/trendspy/trend_list.py: -------------------------------------------------------------------------------- 1 | from typing import List, Union, Optional 2 | from .constants import TREND_TOPICS 3 | from .trend_keyword import TrendKeyword 4 | 5 | class TrendList(list): 6 | """ 7 | A list-like container for trending topics with additional filtering capabilities. 8 | Inherits from list to maintain all standard list functionality. 9 | """ 10 | 11 | def __init__(self, trends: List[TrendKeyword]): 12 | super().__init__(trends) 13 | 14 | def filter_by_topic(self, topic: Union[int, str, List[Union[int, str]]]) -> 'TrendList': 15 | """ 16 | Filter trends by topic ID or name. 17 | 18 | Args: 19 | topic: Topic identifier. Can be: 20 | - int: Topic ID (e.g., 18 for Technology) 21 | - str: Topic name (e.g., 'Technology') 22 | - list of int/str: Multiple topics (matches any) 23 | 24 | Returns: 25 | TrendList: New TrendList containing only trends matching the specified topic(s) 26 | """ 27 | topics = [topic] if not isinstance(topic, list) else topic 28 | 29 | name_to_id = {name.lower(): id_ for id_, name in TREND_TOPICS.items()} 30 | 31 | topic_ids = set() 32 | for t in topics: 33 | if isinstance(t, int): 34 | topic_ids.add(t) 35 | elif isinstance(t, str): 36 | topic_id = name_to_id.get(t.lower()) 37 | if topic_id: 38 | topic_ids.add(topic_id) 39 | 40 | filtered = [ 41 | trend for trend in self 42 | if any(topic_id in trend.topics for topic_id in topic_ids) 43 | ] 44 | 45 | return TrendList(filtered) 46 | 47 | def get_topics_summary(self) -> dict: 48 | """ 49 | Get a summary of topics present in the trends. 50 | 51 | Returns: 52 | dict: Mapping of topic names to count of trends 53 | """ 54 | topic_counts = {} 55 | for trend in self: 56 | for topic_id in trend.topics: 57 | topic_name = TREND_TOPICS.get(topic_id, f"Unknown ({topic_id})") 58 | topic_counts[topic_name] = topic_counts.get(topic_name, 0) + 1 59 | return dict(sorted(topic_counts.items(), key=lambda x: (-x[1], x[0]))) 60 | 61 | def __str__(self) -> str: 62 | """Return string representation of the trends.""" 63 | if not self: 64 | return "[]" 65 | return "[\n " + ",\n ".join(trend.brief_summary() for trend in self) + "\n]" -------------------------------------------------------------------------------- /src/trendspy/news_article.py: -------------------------------------------------------------------------------- 1 | from .utils import parse_time_ago, flatten_dict 2 | from datetime import datetime 3 | 4 | class NewsArticle: 5 | """ 6 | Represents a news article related to a trending topic. 7 | 8 | This class handles both dictionary and list-based article data from 9 | various Google Trends API endpoints. 10 | 11 | Parameters: 12 | title (str): Article title 13 | url (str): Article URL 14 | source (str): News source name 15 | picture (str): URL to article image 16 | time (str or int): Publication time or timestamp 17 | snippet (str): Article preview text 18 | 19 | Note: 20 | If time is provided as a string with 'ago' format (e.g., '2 hours ago'), 21 | it will be automatically converted to a timestamp. 22 | """ 23 | def __init__(self, title=None, url=None, source=None, picture=None, time=None, snippet=None, article_ids=None): 24 | self.title = title 25 | self.url = url 26 | self.source = source 27 | self.picture = picture 28 | self.time = time 29 | if isinstance(self.time, str) and ('ago' in self.time): 30 | self.time = parse_time_ago(self.time) 31 | self.snippet = snippet 32 | 33 | @classmethod 34 | def from_api(cls, data): 35 | if isinstance(data, dict): 36 | return cls( 37 | title=data.get('title') or data.get('articleTitle'), 38 | url=data.get('url'), 39 | source=data.get('source'), 40 | picture=data.get('picture') or data.get('image', {}).get('imageUrl'), 41 | time=data.get('time') or data.get('timeAgo'), 42 | snippet=data.get('snippet') 43 | ) 44 | elif isinstance(data, list): 45 | return cls( 46 | title=data[0], 47 | url=data[1], 48 | source=data[2], 49 | time=data[3][0] if data[3] else None, 50 | picture=data[4] if len(data) > 4 else None 51 | ) 52 | else: 53 | raise ValueError("Unsupported data format: must be dict or list") 54 | 55 | def __repr__(self): 56 | return f"NewsArticle(title={self.title!r}, url={self.url!r}, source={self.source!r}, " \ 57 | f"picture={self.picture!r}, time={self.time!r}, snippet={self.snippet!r})" 58 | 59 | def __str__(self): 60 | s = 'Title : {}'.format(self.title) 61 | s += '\nURL : {}'.format(self.url) if self.url else '' 62 | s += '\nSource : {}'.format(self.source) if self.source else '' 63 | s += '\nPicture : {}'.format(self.picture) if self.picture else '' 64 | s += '\nTime : {}'.format(datetime.fromtimestamp(self.time).strftime('%Y-%m-%d %H:%M:%S')) if self.time else '' 65 | s += '\nSnippet : {}'.format(self.snippet) if self.snippet else '' 66 | return s -------------------------------------------------------------------------------- /src/trendspy/utils.py: -------------------------------------------------------------------------------- 1 | from collections import OrderedDict 2 | from typing import Any 3 | import re 4 | import json 5 | from enum import Enum 6 | from datetime import datetime, timedelta, timezone 7 | import time 8 | 9 | _HEX_TO_CHAR_DICT = { 10 | r'\x7b':'{', 11 | r'\x7d':'}', 12 | r'\x22':'"', 13 | r'\x5d':']', 14 | r'\x5b':'[', 15 | '\\\\':'\\' 16 | } 17 | _tag_pattern = re.compile(r'<([\w:]+)>(.*?)', re.DOTALL) 18 | 19 | class EnumEncoder(json.JSONEncoder): 20 | def default(self, obj): 21 | if isinstance(obj, Enum): 22 | return obj.value 23 | return super().default(obj) 24 | 25 | class LRUCache(OrderedDict): 26 | def __init__(self, maxsize=128): 27 | super().__init__() 28 | self.maxsize = maxsize 29 | 30 | def __getitem__(self, key): 31 | value = super().__getitem__(key) 32 | self.move_to_end(key) 33 | return value 34 | 35 | def __setitem__(self, key, value): 36 | if key in self: 37 | self.move_to_end(key) 38 | super().__setitem__(key, value) 39 | if len(self) > self.maxsize: 40 | oldest = next(iter(self)) 41 | del self[oldest] 42 | 43 | def ensure_list(item): 44 | return list(item) if hasattr(item, '__iter__') and not isinstance(item, str) and not isinstance(item, dict) else [item] 45 | 46 | def extract_column(data, column, default: Any = None, f=None): 47 | if f is None: 48 | return [item.get(column, default) for item in data] 49 | return [f(item.get(column, default)) for item in data] 50 | 51 | def flatten_data(data, columns): 52 | return [{**{kk: vv for k in columns if k in d for kk, vv in d[k].items()}, 53 | **{k: v for k, v in d.items() if k not in columns}} 54 | for d in data] 55 | 56 | def flatten_dict(d, parent_key='', sep='_'): 57 | items = [] 58 | for k, v in d.items(): 59 | new_key = f"{parent_key}{sep}{k}" if parent_key else k 60 | if isinstance(v, dict): 61 | items.extend(flatten_dict(v, new_key, sep=sep).items()) 62 | else: 63 | items.append((new_key, v)) 64 | return dict(items) 65 | 66 | def filter_data(data, desired_columns): 67 | desired_columns = set(desired_columns) 68 | return [{k: v for k, v in item.items() if k in desired_columns} for item in data] 69 | 70 | def decode_escape_text(text): 71 | for k,v in _HEX_TO_CHAR_DICT.items(): 72 | text = text.replace(k, v) 73 | 74 | if r'\x' in text: 75 | text = re.sub(r'\\x[0-9a-fA-F]{2}', lambda match:chr(int(match.group(0)[2:], 16)), text) 76 | return text 77 | 78 | def parse_xml_to_dict(text, prefix=''): 79 | item_dict = {} 80 | for tag, content in _tag_pattern.findall(text): 81 | content = parse_xml_to_dict(content.strip(), tag+'_') 82 | tag = tag.replace(prefix, '') 83 | if tag in item_dict: 84 | if not isinstance(item_dict[tag], list): 85 | item_dict[tag] = [item_dict[tag]] 86 | item_dict[tag].append(content) 87 | continue 88 | item_dict[tag] = content 89 | if not item_dict: 90 | return text 91 | return item_dict 92 | 93 | def get_utc_offset_minutes(): 94 | """ 95 | Returns the local time offset from UTC in minutes. 96 | Positive values for time zones ahead of UTC (eastward), 97 | negative values for time zones behind UTC (westward). 98 | """ 99 | # Get current local time 100 | now = datetime.now() 101 | 102 | # Get offset in seconds 103 | utc_offset = -time.timezone 104 | 105 | # Account for daylight saving time if active 106 | if time.localtime().tm_isdst: 107 | utc_offset += 3600 # Add one hour in seconds 108 | 109 | # Convert seconds to minutes 110 | return utc_offset // 60 111 | 112 | def parse_time_ago(time_ago): 113 | if not time_ago: 114 | return None 115 | 116 | match = re.match(r'(\d+)\s*(\w+)', time_ago) 117 | if not match: 118 | return None 119 | 120 | value, unit = match.groups() 121 | value = int(value) 122 | 123 | if 'h' in unit: 124 | delta = timedelta(hours=value) 125 | elif 'd' in unit: 126 | delta = timedelta(days=value) 127 | elif 'm' in unit: 128 | delta = timedelta(minutes=value) 129 | else: 130 | delta = timedelta(0) 131 | 132 | now = datetime.now(timezone.utc) 133 | timestamp = int((now - delta).replace(microsecond=0).timestamp()) 134 | return timestamp 135 | 136 | def truncate_string(s, max_length): 137 | if len(s) > max_length: 138 | return s[:max_length - 3] + '...' 139 | return s -------------------------------------------------------------------------------- /src/trendspy/hierarchical_search.py: -------------------------------------------------------------------------------- 1 | from typing import Dict, List, Optional 2 | import re 3 | 4 | def flatten_tree(node, parent_id='', result=None, join_ids=True): 5 | """ 6 | Recursively transforms a tree structure into a flat list. 7 | 8 | Args: 9 | node (dict): Tree node with 'name', 'id' and optional 'children' keys 10 | parent_id (str): Parent node ID 11 | result (list): Accumulated result 12 | join_ids (bool): Whether to join IDs with parent (True for geo, False for categories) 13 | 14 | Returns: 15 | list: List of dictionaries with name and id 16 | """ 17 | if result is None: 18 | result = [] 19 | 20 | current_id = node['id'] 21 | # Join IDs only for geographical data 22 | if join_ids and parent_id: 23 | full_id = f"{parent_id}-{current_id}" 24 | else: 25 | full_id = current_id 26 | 27 | result.append({ 28 | 'name': node['name'], 29 | 'id': full_id 30 | }) 31 | 32 | if 'children' in node: 33 | for child in node['children']: 34 | flatten_tree(child, full_id if join_ids else '', result, join_ids) 35 | 36 | return result 37 | 38 | class HierarchicalIndex: 39 | """ 40 | An index for efficient searches in hierarchical Google Trends data structures. 41 | 42 | This class provides fast lookups for hierarchical data like locations and categories, 43 | supporting both exact and partial matching of names. 44 | 45 | Examples: 46 | - Geographical hierarchies (Country -> Region -> City) 47 | - Category hierarchies (Main category -> Subcategory) 48 | 49 | Methods: 50 | add_item(item): Add an item to the index 51 | exact_search(name): Find exact match for name 52 | partial_search(query): Find items containing the query 53 | id_search(id_query): Find by ID (supports both exact and partial matching) 54 | """ 55 | 56 | def __init__(self, items: List[dict], partial_id_search: bool = True): 57 | """ 58 | Initialize the search index. 59 | 60 | Args: 61 | items (List[dict]): List of dictionaries with 'name' and 'id' 62 | partial_id_search (bool): Whether to allow partial ID matches 63 | (True for geo locations, False for categories) 64 | """ 65 | # Main storage: dict with lowercase name as key 66 | self.name_to_item: Dict[str, dict] = {} 67 | 68 | # Inverted index for partial matching 69 | self.word_index: Dict[str, List[str]] = {} 70 | 71 | # Store search mode 72 | self.partial_id_search = partial_id_search 73 | 74 | # Build indexes 75 | for item in items: 76 | self.add_item(item) 77 | 78 | def add_item(self, item: dict) -> None: 79 | """ 80 | Add a single item to the index. 81 | 82 | Args: 83 | item (dict): Dictionary with 'name' and 'id' 84 | """ 85 | name = item['name'].lower() 86 | 87 | # Add to main storage 88 | self.name_to_item[name] = item 89 | 90 | # Split name into words and add to inverted index 91 | words = set(re.split(r'\W+', name)) 92 | for word in words: 93 | if word: 94 | if word not in self.word_index: 95 | self.word_index[word] = [] 96 | self.word_index[word].append(name) 97 | 98 | def exact_search(self, name: str) -> Optional[dict]: 99 | """ 100 | Perform exact name search (case-insensitive). 101 | 102 | Args: 103 | name (str): Name to search for 104 | 105 | Returns: 106 | Optional[dict]: Item dictionary if found, None otherwise 107 | """ 108 | return self.name_to_item.get(name.lower()) 109 | 110 | def partial_search(self, query: str) -> List[dict]: 111 | """ 112 | Perform partial name search (case-insensitive). 113 | 114 | Args: 115 | query (str): Search query string 116 | 117 | Returns: 118 | List[dict]: List of matching item dictionaries 119 | """ 120 | query = query.lower() 121 | results = set() 122 | 123 | # Search for partial matches in word index 124 | for word, items in self.word_index.items(): 125 | if query in word: 126 | results.update(items) 127 | 128 | # Also check if query matches any part of full names 129 | for name in self.name_to_item: 130 | if query in name: 131 | results.add(name) 132 | 133 | # Return found items 134 | return [self.name_to_item[name] for name in results] 135 | 136 | def id_search(self, id_query: str) -> List[dict]: 137 | """ 138 | Search by ID. 139 | 140 | Args: 141 | id_query (str): ID or partial ID to search for 142 | 143 | Returns: 144 | List[dict]: List of matching item dictionaries 145 | """ 146 | if self.partial_id_search: 147 | # For geo data - allow partial matches 148 | return [item for item in self.name_to_item.values() 149 | if id_query in item['id']] 150 | else: 151 | # For categories - only exact matches 152 | return [item for item in self.name_to_item.values() 153 | if item['id'] == id_query] 154 | 155 | def create_hierarchical_index(tree_data: dict, join_ids: bool = True) -> HierarchicalIndex: 156 | """ 157 | Create a complete search system from a hierarchical tree structure. 158 | 159 | Args: 160 | tree_data (dict): Original tree structure 161 | join_ids (bool): Whether to join IDs with parent 162 | (True for geo locations, False for categories) 163 | 164 | Returns: 165 | HierarchicalIndex: Initialized search system 166 | """ 167 | # First flatten the tree 168 | flat_items = flatten_tree(tree_data, join_ids=join_ids) 169 | # Then create and return the search index 170 | return HierarchicalIndex(flat_items, partial_id_search=join_ids) -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # TrendsPy 2 | 3 | Python library for accessing Google Trends data. 4 | 5 | ## Key Features 6 | 7 | **Explore** 8 | - Track popularity over time (`interest_over_time`) 9 | - Analyze geographic distribution (`interest_by_region`) 10 | - Compare interest across different timeframes and regions (multirange support) 11 | - Get related queries and topics (`related_queries`, `related_topics`) 12 | 13 | **Trending Now** 14 | - Access current trending searches (`trending_now`, `trending_now_by_rss`) 15 | - Get related news articles (`trending_now_news_by_ids`) 16 | - Retrieve historical data for 500+ trending keywords with independent normalization (`trending_now_showcase_timeline`) 17 | 18 | **Search Utilities** 19 | - Find category IDs (`categories`) 20 | - Search for location codes (`geo`) 21 | 22 | **Flexible Time Formats** 23 | - Custom intervals: `'now 123-H'`, `'today 45-d'` 24 | - Date-based offsets: `'2024-02-01 10-d'` 25 | - Standard ranges: `'2024-01-01 2024-12-31'` 26 | 27 | ## Installation 28 | 29 | ```bash 30 | pip install trendspy 31 | ``` 32 | 33 | ## Basic Usage 34 | 35 | ```python 36 | from trendspy import Trends 37 | tr = Trends() 38 | df = tr.interest_over_time(['python', 'javascript']) 39 | df.plot(title='Python vs JavaScript Interest Over Time', 40 | figsize=(12, 6)) 41 | ``` 42 | 43 | ```python 44 | # Analyze geographic distribution 45 | geo_df = tr.interest_by_region('python') 46 | ``` 47 | ```python 48 | # Get related queries 49 | related = tr.related_queries('python') 50 | ``` 51 | 52 | ## Advanced Features 53 | 54 | ### Search Categories and Locations 55 | 56 | ```python 57 | # Find technology-related categories 58 | categories = tr.categories(find='technology') 59 | # Output: [{'name': 'Computers & Electronics', 'id': '13'}, ...] 60 | 61 | # Search for locations 62 | locations = tr.geo(find='york') 63 | # Output: [{'name': 'New York', 'id': 'US-NY'}, ...] 64 | 65 | # Use in queries 66 | df = tr.interest_over_time( 67 | 'python', 68 | geo='US-NY', # Found location ID 69 | cat='13' # Found category ID 70 | ) 71 | ``` 72 | 73 | ### Real-time Trending Searches and News 74 | 75 | ```python 76 | # Get current trending searches in the US 77 | trends = tr.trending_now(geo='US') 78 | 79 | # Get trending searches with news articles 80 | trends_with_news = tr.trending_now_by_rss(geo='US') 81 | print(trends_with_news[0]) # First trending topic 82 | print(trends_with_news[0].news[0]) # Associated news article 83 | 84 | # Get news articles for specific trending topics 85 | news = tr.trending_now_news_by_ids( 86 | trends[0].news_tokens, # News tokens from trending topic 87 | max_news=3 # Number of articles to retrieve 88 | ) 89 | for article in news: 90 | print(f"Title: {article.title}") 91 | print(f"Source: {article.source}") 92 | print(f"URL: {article.url}\n") 93 | ``` 94 | 95 | ### Independent Historical Data for Multiple Keywords 96 | 97 | ```python 98 | from trendspy import BatchPeriod 99 | 100 | # Unlike standard interest_over_time where data is normalized across all keywords, 101 | # trending_now_showcase_timeline provides independent data for each keyword 102 | # (up to 500+ keywords in a single request) 103 | 104 | keywords = ['keyword1', 'keyword2', ..., 'keyword500'] 105 | 106 | # Get independent historical data 107 | df_24h = tr.trending_now_showcase_timeline( 108 | keywords, 109 | timeframe=BatchPeriod.Past24H # 16-minute intervals 110 | ) 111 | 112 | # Each keyword's data is normalized only to itself 113 | df_24h.plot( 114 | subplots=True, 115 | layout=(5, 2), 116 | figsize=(15, 20), 117 | title="Independent Trend Lines" 118 | ) 119 | 120 | # Available time windows: 121 | # - Past4H: ~30 points (8-minute intervals) 122 | # - Past24H: ~90 points (16-minute intervals) 123 | # - Past48H: ~180 points (16-minute intervals) 124 | # - Past7D: ~42 points (4-hour intervals) 125 | ``` 126 | 127 | ### Geographic Analysis 128 | 129 | ```python 130 | # Country-level data 131 | country_df = tr.interest_by_region('python') 132 | 133 | # State-level data for the US 134 | state_df = tr.interest_by_region( 135 | 'python', 136 | geo='US', 137 | resolution='REGION' 138 | ) 139 | 140 | # City-level data for California 141 | city_df = tr.interest_by_region( 142 | 'python', 143 | geo='US-CA', 144 | resolution='CITY' 145 | ) 146 | ``` 147 | 148 | ### Timeframe Formats 149 | 150 | - Standard API timeframes: `'now 1-H'`, `'now 4-H'`, `'today 1-m'`, `'today 3-m'`, `'today 12-m'` 151 | - Custom intervals: 152 | - Short-term (< 8 days): `'now 123-H'`, `'now 72-H'` 153 | - Long-term: `'today 45-d'`, `'today 90-d'`, `'today 18-m'` 154 | - Date-based: `'2024-02-01 10-d'`, `'2024-03-15 3-m'` 155 | - Date ranges: `'2024-01-01 2024-12-31'` 156 | - Hourly precision: `'2024-03-25T12 2024-03-25T15'` (for periods < 8 days) 157 | - All available data: `'all'` 158 | 159 | ### Multirange Interest Over Time 160 | 161 | Compare search interest across different time periods and regions: 162 | 163 | ```python 164 | # Compare different time periods 165 | timeframes = [ 166 | '2024-01-25 12-d', # 12-day period 167 | '2024-06-20 23-d' # 23-day period 168 | ] 169 | geo = ['US', 'GB'] # Compare US and UK 170 | 171 | df = tr.interest_over_time( 172 | 'python', 173 | timeframe=timeframes, 174 | geo=geo 175 | ) 176 | ``` 177 | 178 | Note: When using multiple timeframes, they must maintain consistent resolution and the maximum timeframe cannot be more than twice the length of the minimum timeframe. 179 | 180 | ### Proxy Support 181 | 182 | TrendsPy supports the same proxy configuration as the `requests` library: 183 | 184 | ```python 185 | # Initialize with proxy 186 | tr = Trends(proxy="http://user:pass@10.10.1.10:3128") 187 | # or 188 | tr = Trends(proxy={ 189 | "http": "http://10.10.1.10:3128", 190 | "https": "http://10.10.1.10:1080" 191 | }) 192 | 193 | # Configure proxy after initialization 194 | tr.set_proxy("http://10.10.1.10:3128") 195 | ``` 196 | 197 | ## Documentation 198 | 199 | For more examples and detailed API documentation, check out the Jupyter notebook in the repository: `basic_usage.ipynb` 200 | 201 | ## License 202 | 203 | MIT License - see the [LICENSE](LICENSE) file for details. 204 | 205 | ## Disclaimer 206 | 207 | This library is not affiliated with Google. Please ensure compliance with Google's terms of service when using this library. 208 | -------------------------------------------------------------------------------- /src/trendspy/converter.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import pandas as pd 3 | from .utils import * 4 | 5 | _RELATED_QUERIES_DESIRED_COLUMNS = ['query','topic','title','type','mid','value'] 6 | 7 | class TrendsDataConverter: 8 | """ 9 | Converts raw Google Trends API responses to pandas DataFrames. 10 | 11 | This class provides static methods for converting various types of 12 | Google Trends data into more usable formats. 13 | 14 | Methods: 15 | interest_over_time: Converts timeline data 16 | related_queries: Converts related queries data 17 | geo_data: Converts geographic data 18 | suggestions: Converts search suggestions 19 | rss_items: Parses RSS feed items 20 | """ 21 | @staticmethod 22 | def token_to_bullets(token_data): 23 | items = token_data.get('request', {}).get('comparisonItem', []) 24 | bullets = [item.get('complexKeywordsRestriction', {}).get('keyword', [''])[0].get('value','') for item in items] 25 | metadata = [next(iter(item.get('geo', {'':'unk'}).values()), 'unk') for item in items] 26 | if len(set(metadata))>1: 27 | bullets = [b+' | '+m for b,m in zip(bullets, metadata)] 28 | metadata = [item.get('time', '').replace('\\', '') for item in items] 29 | if len(set(metadata))>1: 30 | bullets = [b+' | '+m for b,m in zip(bullets, metadata)] 31 | 32 | return bullets 33 | 34 | @staticmethod 35 | def interest_over_time(widget_data, keywords, time_as_index=True): 36 | """ 37 | Converts interest over time data to a pandas DataFrame. 38 | 39 | Parameters: 40 | widget_data (dict): Raw API response data 41 | keywords (list): List of keywords for column names 42 | time_as_index (bool): Use time as DataFrame index 43 | 44 | Returns: 45 | pandas.DataFrame: Processed interest over time data 46 | """ 47 | timeline_data = widget_data 48 | timeline_data = timeline_data.get('default', timeline_data) 49 | timeline_data = timeline_data.get('timelineData', timeline_data) 50 | if not timeline_data: 51 | return pd.DataFrame(columns=keywords) 52 | 53 | 54 | df_data = np.array(extract_column(timeline_data, 'value')).reshape(len(timeline_data), -1) 55 | df_data = dict(zip(keywords, df_data.T)) 56 | if ('isPartial' in timeline_data[-1]) or any('isPartial' in row for row in timeline_data): 57 | df_data['isPartial'] = extract_column(timeline_data, 'isPartial', False) 58 | 59 | 60 | timestamps = extract_column(timeline_data, 'time', f=lambda x:int(x) if x else None) 61 | timestamps = np.array(timestamps, dtype='datetime64[s]').astype('datetime64[ns]') 62 | # timestamps += np.timedelta64(get_utc_offset_minutes(), 'm') 63 | if time_as_index: 64 | return pd.DataFrame(df_data, index=pd.DatetimeIndex(timestamps, name='time [UTC]')) 65 | return pd.DataFrame({'time':timestamps, **df_data}) 66 | 67 | @staticmethod 68 | def multirange_interest_over_time(data, bullets=None): 69 | data = data.get('default', {}).get('timelineData', [{}]) 70 | if not 'columnData' in data[0]: 71 | return pd.DataFrame() 72 | 73 | num_parts = len(data[0]['columnData']) 74 | if bullets is None: 75 | bullets = ['keyword_'+str(i) for i in range(num_parts)] 76 | 77 | df_data = {} 78 | for i in range(num_parts): 79 | timeline_data = [item['columnData'][i] for item in data] 80 | df_data[bullets[i]] = extract_column(timeline_data, 'value', f=lambda x:x if x!=-1 else None) 81 | 82 | if ('isPartial' in timeline_data[-1]) or any('isPartial' in row for row in timeline_data): 83 | df_data['isPartial_'+str(i)] = extract_column(timeline_data, 'isPartial', False) 84 | 85 | timestamps = extract_column(timeline_data, 'time', f=lambda ts:int(ts) if ts else None) 86 | timestamps = np.array(timestamps, dtype='datetime64[s]').astype('datetime64[ns]') 87 | df_data['index_'+str(i)] = timestamps 88 | return pd.DataFrame(df_data) 89 | 90 | @staticmethod 91 | def related_queries(widget_data): 92 | ranked_data = widget_data.get('default',{}).get('rankedList') 93 | if not ranked_data: 94 | return {'top':pd.DataFrame(), 'rising':pd.DataFrame()} 95 | 96 | result = {} 97 | result['top'] = pd.DataFrame(flatten_data(filter_data(ranked_data[0]['rankedKeyword'], _RELATED_QUERIES_DESIRED_COLUMNS), ['topic'])) 98 | result['rising'] = pd.DataFrame(flatten_data(filter_data(ranked_data[1]['rankedKeyword'], _RELATED_QUERIES_DESIRED_COLUMNS), ['topic'])) 99 | return result 100 | 101 | @staticmethod 102 | def geo_data(widget_data, bullets=None): 103 | data = widget_data.get('default', {}).get('geoMapData', []) 104 | filtered_data = list(filter(lambda item:item['hasData'][0], data)) 105 | if not filtered_data: 106 | return pd.DataFrame() 107 | 108 | num_keywords = len(filtered_data[0]['value']) 109 | if not bullets: 110 | bullets = ['keyword_'+str(i) for i in range(num_keywords)] 111 | 112 | found_cols = set(filtered_data[0].keys()) & {'coordinates', 'geoCode', 'geoName', 'value'} 113 | df_data = {} 114 | df_data['geoName'] = extract_column(filtered_data, 'geoName') 115 | if 'geoCode' in found_cols: 116 | df_data['geoCode'] = extract_column(filtered_data, 'geoCode') 117 | if 'coordinates' in found_cols: 118 | df_data['lat'] = extract_column(filtered_data, 'coordinates', f=lambda x:x['lat']) 119 | df_data['lng'] = extract_column(filtered_data, 'coordinates', f=lambda x:x['lng']) 120 | 121 | values = np.array(extract_column(filtered_data, 'value')).reshape(len(filtered_data), -1) 122 | for keyword,values_row in zip(bullets, values.T): 123 | df_data[keyword] = values_row 124 | return pd.DataFrame(df_data) 125 | 126 | @staticmethod 127 | def suggestions(data): 128 | return pd.DataFrame(data['default']['topics']) 129 | 130 | @staticmethod 131 | def rss_items(data): 132 | item_pattern = re.compile(r'(.*?)', re.DOTALL) 133 | items = list(map(lambda item:parse_xml_to_dict(item, 'ht:'), item_pattern.findall(data))) 134 | return items 135 | 136 | @staticmethod 137 | def trending_now_showcase_timeline(data, request_timestamp=None): 138 | lens = [len(item[1]) for item in data] 139 | min_len, max_len = min(lens), max(lens) 140 | if min_len in {30,90,180,42}: 141 | max_len = min_len + 1 142 | 143 | time_offset = 480 if max_len < 32 else 14400 if max_len < 45 else 960 144 | 145 | timestamp = int(request_timestamp or datetime.now(timezone.utc).timestamp()) 146 | timestamps = [timestamp // time_offset * time_offset - time_offset * i for i in range(max_len+2)][::-1] 147 | timestamps = np.array(timestamps, dtype='datetime64[s]').astype('datetime64[ns]') 148 | if (timestamp%time_offset) <= 60: # Time delay determined empirically 149 | df_data = {item[0]:item[1][-min_len:] for item in data} 150 | df = pd.DataFrame(df_data, index=timestamps[:-1][-min_len:]) 151 | return df 152 | 153 | res = {} 154 | for item in data: 155 | res[item[0]] = np.pad(item[1], (0, max_len - len(item[1])), mode='constant', constant_values=0) 156 | df = pd.DataFrame(res, index=timestamps[-max_len:]) 157 | return df -------------------------------------------------------------------------------- /src/trendspy/timeframe_utils.py: -------------------------------------------------------------------------------- 1 | __all__ = ['convert_timeframe', 'timeframe_to_timedelta', 'verify_consistent_timeframes'] 2 | 3 | import re 4 | from datetime import datetime, timedelta, timezone 5 | from dateutil.relativedelta import relativedelta 6 | from typing import Any 7 | from .utils import ensure_list 8 | # Regular expression pattern to validate date strings in the format 'YYYY-MM-DD' or 'YYYY-MM-DDTHH' 9 | VALID_DATE_PATTERN = r'^\d{4}-\d{2}-\d{2}(T\d{2})?$' 10 | 11 | # Set of fixed timeframes supported by an external API 12 | FIXED_TIMEFRAMES = {'now 1-H', 'now 4-H', 'now 1-d', 'now 7-d', 'today 1-m', 'today 3-m', 'today 5-y', 'today 12-m', 'all'} 13 | 14 | # Date format strings for standard and datetime with hour formats 15 | DATE_FORMAT = "%Y-%m-%d" 16 | DATE_T_FORMAT = "%Y-%m-%dT%H" 17 | 18 | # Regular expression pattern to validate offset strings like '10-d', '5-H', etc. 19 | OFFSET_PATTERN = r'\d+[-]?[Hdmy]$' 20 | 21 | # Mapping of units (H, d, m, y) to relativedelta arguments 22 | UNIT_MAP = {'H': 'hours', 'd': 'days', 'm': 'months', 'y': 'years'} 23 | 24 | 25 | def _is_valid_date(date_str): 26 | # Checks if the given string matches the valid date pattern 27 | return bool(re.match(VALID_DATE_PATTERN, date_str)) 28 | 29 | 30 | def _is_valid_format(offset_str): 31 | # Checks if the given string matches the valid offset pattern 32 | return bool(re.match(OFFSET_PATTERN, offset_str)) 33 | 34 | 35 | def _extract_time_parts(offset_str): 36 | # Extracts numerical value and unit (H, d, m, y) from the offset string 37 | match = re.search(r'(\d+)[-]?([Hdmy]+)', offset_str) 38 | if match: 39 | return int(match.group(1)), match.group(2) 40 | return None 41 | 42 | 43 | def _decode_trend_datetime(date_str): 44 | # Parses the date string into a datetime object based on whether it includes time ('T' character) 45 | return datetime.strptime(date_str, DATE_T_FORMAT) if 'T' in date_str else datetime.strptime(date_str, DATE_FORMAT) 46 | 47 | 48 | def _process_two_dates(date_part_1, date_part_2): 49 | isT1 = 'T' in date_part_1 50 | isT2 = 'T' in date_part_2 51 | if (not isT1) and (not isT2): 52 | return f'{date_part_1} {date_part_2}' 53 | 54 | # Processes two date parts and returns the formatted result 55 | date_1 = _decode_trend_datetime(date_part_1) 56 | date_2 = _decode_trend_datetime(date_part_2) 57 | 58 | # Adjust date formatting if only one of the dates includes hour information 59 | if (isT1) and (not isT2): 60 | date_2 += timedelta(days=1) 61 | date_2 = date_2.replace(hour=0) 62 | elif (not isT1) and (isT2): 63 | date_1 = date_1.replace(hour=0) 64 | 65 | # Ensure the difference between dates does not exceed 7 days when time information is included 66 | if ('T' in date_part_1 or 'T' in date_part_2) and abs((date_1 - date_2).days) > 7: 67 | raise ValueError(f'Date difference cannot exceed 7 days for format with hours: {date_part_1} {date_part_2}') 68 | 69 | # Return the formatted result with both dates including hours 70 | return f'{date_1.strftime(DATE_T_FORMAT)} {date_2.strftime(DATE_T_FORMAT)}' 71 | 72 | 73 | def _process_date_with_offset(date_part_1, offset_part): 74 | # Processes a date part with an offset to calculate the resulting timeframe 75 | date_1 = _decode_trend_datetime(date_part_1) 76 | count, unit = _extract_time_parts(offset_part) 77 | 78 | # Calculate the offset using relativedelta 79 | raw_diff = relativedelta(**{UNIT_MAP[unit]: count}) 80 | if unit in {'m', 'y'}: 81 | # Special handling for months and years: adjust based on the current UTC date 82 | now = datetime.now(timezone.utc) 83 | end_date = now - raw_diff 84 | raw_diff = now - end_date 85 | 86 | # Raise an error if the offset exceeds 7 days for formats that include time 87 | if 'T' in date_part_1 and ((unit == 'd' and count > 7) or (unit == 'H' and count > 7 * 24)): 88 | raise ValueError(f'Offset cannot exceed 7 days for format with time: {date_part_1} {offset_part}. Use YYYY-MM-DD format or "today".') 89 | 90 | # Determine the appropriate date format based on the unit (hours/days or months/years) 91 | date_format = DATE_T_FORMAT if 'T' in date_part_1 else DATE_FORMAT 92 | return f'{(date_1 - raw_diff).strftime(date_format)} {date_1.strftime(date_format)}' 93 | 94 | 95 | def convert_timeframe(timeframe, convert_fixed_timeframes_to_dates=False): 96 | """ 97 | Converts timeframe strings to Google Trends format. 98 | 99 | Supports multiple formats: 100 | 1. Fixed timeframes ('now 1-H', 'today 12-m', etc.) 101 | 2. Date ranges ('2024-01-01 2024-12-31') 102 | 3. Date with offset ('2024-03-25 5-m') 103 | 4. Hour-specific ranges ('2024-03-25T12 2024-03-25T15') 104 | 105 | Parameters: 106 | timeframe (str): Input timeframe string 107 | convert_fixed_timeframes_to_dates (bool): Convert fixed timeframes to dates 108 | 109 | Returns: 110 | str: Converted timeframe string in Google Trends format 111 | 112 | Raises: 113 | ValueError: If timeframe format is invalid 114 | """ 115 | # If the timeframe is in the fixed set and conversion is not requested, return as is 116 | if (timeframe in FIXED_TIMEFRAMES) and (not convert_fixed_timeframes_to_dates): 117 | return timeframe 118 | 119 | # Replace 'now' and 'today' with the current datetime in the appropriate format 120 | utc_now = datetime.now(timezone.utc) 121 | if convert_fixed_timeframes_to_dates and timeframe=='all': 122 | return '2024-01-01 {}'.format(utc_now.strftime(DATE_FORMAT)) 123 | 124 | timeframe = timeframe.replace('now', utc_now.strftime(DATE_T_FORMAT)).replace('today', utc_now.strftime(DATE_FORMAT)) 125 | 126 | # Split the timeframe into two parts 127 | parts = timeframe.split() 128 | if len(parts) != 2: 129 | raise ValueError(f"Invalid timeframe format: {timeframe}. Expected format: ' ' or ' '.") 130 | 131 | date_part_1, date_part_2 = parts 132 | 133 | # Process the timeframe based on its parts 134 | if _is_valid_date(date_part_1): 135 | if _is_valid_date(date_part_2): 136 | # Process if both parts are valid dates 137 | return _process_two_dates(date_part_1, date_part_2) 138 | elif _is_valid_format(date_part_2): 139 | # Process if the second part is a valid offset 140 | return _process_date_with_offset(date_part_1, date_part_2) 141 | 142 | raise ValueError(f'Could not process timeframe: {timeframe}') 143 | 144 | def timeframe_to_timedelta(timeframe): 145 | result = convert_timeframe(timeframe, convert_fixed_timeframes_to_dates=True) 146 | date_1, date_2 = result.split() 147 | datetime_1 = _decode_trend_datetime(date_1) 148 | datetime_2 = _decode_trend_datetime(date_2) 149 | return (datetime_2 - datetime_1) 150 | 151 | def verify_consistent_timeframes(timeframes): 152 | """ 153 | Verifies that all timeframes have consistent resolution. 154 | 155 | Google Trends requires all timeframes in a request to have the same 156 | data resolution (e.g., hourly, daily, weekly). 157 | 158 | Parameters: 159 | timeframes (list): List of timeframe strings 160 | 161 | Returns: 162 | bool: True if timeframes are consistent 163 | 164 | Raises: 165 | ValueError: If timeframes have different resolutions 166 | """ 167 | if isinstance(timeframes, str): 168 | return True 169 | 170 | timedeltas = list(map(timeframe_to_timedelta, timeframes)) 171 | if all(td == timedeltas[0] for td in timedeltas): 172 | return True 173 | else: 174 | raise ValueError(f"Inconsistent timeframes detected: {[str(td) for td in timedeltas]}") 175 | 176 | # Define the mapping between time range, resolution, and its range 177 | def get_resolution_and_range(timeframe): 178 | delta = timeframe_to_timedelta(timeframe) 179 | if delta < timedelta(hours=5): 180 | return "1 minute", "delta < 5 hours" 181 | elif delta < timedelta(hours=36): 182 | return "8 minutes", "5 hours <= delta < 36 hours" 183 | elif delta < timedelta(hours=72): 184 | return "16 minutes", "36 hours <= delta < 72 hours" 185 | elif delta < timedelta(days=8): 186 | return "1 hour", "72 hours <= delta < 8 days" 187 | elif delta < timedelta(days=270): 188 | return "1 day", "8 days <= delta < 270 days" 189 | elif delta < timedelta(days=1900): 190 | return "1 week", "270 days <= delta < 1900 days" 191 | else: 192 | return "1 month", "delta >= 1900 days" 193 | 194 | # Function to check if all timeframes have the same resolution 195 | def check_timeframe_resolution(timeframes): 196 | timeframes = ensure_list(timeframes) 197 | resolutions = list(map(get_resolution_and_range, timeframes)) 198 | 199 | # Extract only resolutions (without ranges) to check if they are the same 200 | resolution_values = [r[0] for r in resolutions] 201 | 202 | # Check if all resolutions are the same 203 | deltas = [timeframe_to_timedelta(timeframe) for timeframe in timeframes] 204 | if len(set(resolution_values)) > 1: 205 | # If there are differences, output an error message with details 206 | error_message = "Error: Different resolutions detected for the timeframes:\n" 207 | for timeframe, delta, (resolution, time_range) in zip(timeframes, deltas, resolutions): 208 | error_message += ( 209 | f"Timeframe: {timeframe}, Delta: {delta}, " 210 | f"Resolution: {resolution} (based on range: {time_range})\n" 211 | ) 212 | raise ValueError(error_message) 213 | 214 | min_delta, min_timeframe = min(zip(deltas, timeframes)) 215 | max_delta, max_timeframe = max(zip(deltas, timeframes)) 216 | 217 | if max_delta >= min_delta * 2: 218 | raise ValueError( 219 | f"Error: The maximum delta {max_delta} (from timeframe {max_timeframe}) " 220 | f"should be less than twice the minimum delta {min_delta} (from timeframe {min_timeframe})." 221 | ) -------------------------------------------------------------------------------- /src/trendspy/trend_keyword.py: -------------------------------------------------------------------------------- 1 | from datetime import datetime, timezone 2 | from .news_article import NewsArticle 3 | from .utils import ensure_list, truncate_string 4 | from .constants import TREND_TOPICS 5 | 6 | class TrendKeyword: 7 | """ 8 | Represents a trending search term with associated metadata. 9 | 10 | This class encapsulates information about a trending keyword, including 11 | its search volume, related news, geographic information, and timing data. 12 | 13 | Attributes: 14 | keyword (str): The trending search term 15 | news (list): Related news articles 16 | geo (str): Geographic location code 17 | started_timestamp (tuple): When the trend started 18 | ended_timestamp (tuple): When the trend ended (if finished) 19 | volume (int): Search volume 20 | volume_growth_pct (float): Percentage growth in search volume 21 | trend_keywords (list): Related keywords 22 | topics (list): Related topics 23 | news_tokens (list): Associated news tokens 24 | normalized_keyword (str): Normalized form of the keyword 25 | """ 26 | def __init__(self, item: list): 27 | ( 28 | self.keyword, 29 | self.news, # news! 30 | self.geo, 31 | self.started_timestamp, 32 | self.ended_timestamp, 33 | self._unk2, 34 | self.volume, 35 | self._unk3, 36 | self.volume_growth_pct, 37 | self.trend_keywords, 38 | self.topics, 39 | self.news_tokens, 40 | self.normalized_keyword 41 | ) = item 42 | if self.news: 43 | self.news = list(map(NewsArticle.from_api, self.news)) 44 | 45 | @property 46 | def topic_names(self): 47 | """Returns a list of topic names for the trend's topic IDs.""" 48 | return [TREND_TOPICS.get(topic_id, f"Unknown Topic ({topic_id})") for topic_id in self.topics] 49 | 50 | def _convert_to_datetime(self, raw_time): 51 | """Converts time in seconds to a datetime object with UTC timezone, if it exists.""" 52 | return datetime.fromtimestamp(raw_time, tz=timezone.utc) if raw_time else None 53 | 54 | @property 55 | def is_trend_finished(self) -> bool: 56 | """Checks if the trend is finished.""" 57 | return self.ended_timestamp is not None 58 | 59 | def hours_since_started(self) -> float: 60 | """Returns the number of hours elapsed since the trend started.""" 61 | if not self.started_timestamp: 62 | return 0 63 | delta = datetime.now(tz=timezone.utc) - datetime.fromtimestamp(self.started_timestamp[0], tz=timezone.utc) 64 | return delta.total_seconds() / 3600 65 | 66 | def __repr__(self): 67 | """Returns a complete string representation for object reconstruction.""" 68 | # Convert NewsArticle objects back to their original form 69 | news_data = self.news 70 | if self.news: 71 | news_data = [ 72 | { 73 | 'title': article.title, 74 | 'url': article.url, 75 | 'source': article.source, 76 | 'time': article.time, 77 | 'picture': article.picture, 78 | 'snippet': article.snippet 79 | } for article in self.news 80 | ] 81 | 82 | # Create list of all components in initialization order 83 | components = [ 84 | self.keyword, 85 | news_data, 86 | self.geo, 87 | self.started_timestamp, 88 | self.ended_timestamp, 89 | self._unk2, 90 | self.volume, 91 | self._unk3, 92 | self.volume_growth_pct, 93 | self.trend_keywords, 94 | self.topics, 95 | self.news_tokens, 96 | self.normalized_keyword 97 | ] 98 | 99 | return f"{self.__class__.__name__}({components!r})" 100 | 101 | def __str__(self): 102 | """Returns a human-readable string representation.""" 103 | timeframe = datetime.fromtimestamp(self.started_timestamp[0]).strftime('%Y-%m-%d %H:%M:%S') 104 | if self.is_trend_finished: 105 | timeframe += ' - ' + datetime.fromtimestamp(self.ended_timestamp[0]).strftime('%Y-%m-%d %H:%M:%S') 106 | else: 107 | timeframe += ' - now' 108 | 109 | s = f'Keyword : {self.keyword}' 110 | s += f'\nGeo : {self.geo}' 111 | s += f'\nVolume : {self.volume} ({self.volume_growth_pct}%)' 112 | s += f'\nTimeframe : {timeframe}' 113 | s += f'\nTrend keywords : {len(self.trend_keywords)} keywords ({truncate_string(",".join(self.trend_keywords), 50)})' 114 | s += f'\nNews tokens : {len(self.news_tokens)} tokens' 115 | return s 116 | 117 | def brief_summary(self): 118 | """Returns an informative summary of the trend.""" 119 | # Начинаем с географии в квадратных скобках 120 | parts = [f"[{self.geo}] {self.keyword}: {self.volume:,} searches"] 121 | 122 | # Добавляем дополнительную информацию 123 | if self.trend_keywords: 124 | parts.append(f"{len(self.trend_keywords)} related keywords") 125 | if self.topics: 126 | topic_list = ", ".join(self.topic_names) 127 | parts.append(f"topics: {topic_list}") 128 | if self.news: 129 | parts.append(f"{len(self.news)} news articles") 130 | 131 | return ", ".join(parts) 132 | 133 | def _repr_pretty_(self, p, cycle): 134 | """Integration with IPython's pretty printer.""" 135 | if cycle: 136 | p.text("[...]") 137 | else: 138 | p.text(self.brief_summary()) 139 | 140 | def __format__(self, format_spec): 141 | """Implements formatting for f-strings and format() method.""" 142 | return self.brief_summary() 143 | 144 | # Переопределяем __str__ для использования brief_summary 145 | def __str__(self): 146 | return self.brief_summary() 147 | 148 | class TrendKeywordLite: 149 | """ 150 | A lightweight version of TrendKeyword for simple trend representation. 151 | 152 | This class provides a simplified view of trending keywords, primarily used 153 | for RSS feeds and basic trending data. 154 | 155 | Attributes: 156 | keyword (str): The trending search term 157 | volume (str): Approximate search volume 158 | trend_keywords (list): Related keywords 159 | link (str): URL to more information 160 | started (int): Unix timestamp when the trend started 161 | picture (str): URL to related image 162 | picture_source (str): Source of the picture 163 | news (list): Related news articles 164 | """ 165 | def __init__(self, keyword, volume, trend_keywords, link, started, picture, picture_source, news): 166 | self.keyword = keyword 167 | self.volume = volume 168 | self.trend_keywords = trend_keywords 169 | self.link = link 170 | self.started = None 171 | self.picture = picture 172 | self.picture_source = picture_source 173 | self.news = news 174 | if started: 175 | self.started = self._parse_pub_date(started) 176 | elif news: 177 | self.started = min([item.time for item in news]) 178 | 179 | @staticmethod 180 | def _parse_pub_date(pub_date): 181 | return int(datetime.strptime(pub_date, '%a, %d %b %Y %H:%M:%S %z').timestamp()) 182 | 183 | @classmethod 184 | def from_api(cls, data): 185 | title = data.get('title') 186 | if isinstance(title, dict): 187 | title = title.get('query') 188 | volume = data.get('formattedTraffic') or data.get('approx_traffic') 189 | trend_keywords = ([item.get('query') for item in data.get('relatedQueries', [])]) 190 | trend_keywords = trend_keywords or (data.get('description', '').split(', ') if 'description' in data else None) 191 | trend_keywords = trend_keywords or list(set([word for item in data.get('idsForDedup', '') for word in item.split(' ')])) 192 | link = data.get('shareUrl') or data.get('link') 193 | started = data.get('pubDate') 194 | picture = data.get('picture') or data.get('image', {}).get('imageUrl') 195 | picture_source = data.get('picture_source') or data.get('image', {}).get('source') 196 | articles = data.get('articles') or data.get('news_item') or [] 197 | 198 | return cls( 199 | keyword = title, 200 | volume = volume, 201 | trend_keywords = trend_keywords, 202 | link = link, 203 | started = started, 204 | picture = picture, 205 | picture_source = picture_source, 206 | news = [NewsArticle.from_api(item) for item in ensure_list(articles)] 207 | ) 208 | 209 | def __repr__(self): 210 | return f"TrendKeywordLite(title={self.keyword}, traffic={self.volume}, started={self.started})" 211 | 212 | def __str__(self): 213 | s = 'Keyword : {}'.format(self.keyword) 214 | s += '\nVolume : {}'.format(self.volume) if self.volume else '' 215 | s += '\nStarted : {}'.format(datetime.fromtimestamp(self.started).strftime('%Y-%m-%d %H:%M:%S')) if self.started else '' 216 | s += '\nTrend keywords : {} keywords ({})'.format(len(self.trend_keywords), truncate_string(','.join(self.trend_keywords), 50)) if self.trend_keywords else '' 217 | s += '\nNews : {} news'.format(len(self.news)) if self.news else '' 218 | return s -------------------------------------------------------------------------------- /src/trendspy/client.py: -------------------------------------------------------------------------------- 1 | import re 2 | import json 3 | import requests 4 | import pandas as pd 5 | import numpy as np 6 | from enum import Enum 7 | from typing import Dict, List, Optional 8 | from urllib.parse import quote, quote_plus 9 | from .utils import * 10 | from .converter import TrendsDataConverter 11 | from .trend_keyword import * 12 | from .news_article import * 13 | from .timeframe_utils import convert_timeframe, check_timeframe_resolution 14 | from .hierarchical_search import create_hierarchical_index 15 | from .trend_list import TrendList 16 | from time import sleep,time 17 | 18 | class TrendsQuotaExceededError(Exception): 19 | """Raised when the Google Trends API quota is exceeded for related queries/topics.""" 20 | def __init__(self): 21 | super().__init__( 22 | "API quota exceeded for related queries/topics. " 23 | "To resolve this, you can try:\n" 24 | "1. Use a different referer in request headers:\n" 25 | " tr.related_queries(keyword, headers={'referer': 'https://www.google.com/'})\n" 26 | "2. Use a different IP address by configuring a proxy:\n" 27 | " tr.set_proxy('http://proxy:port')\n" 28 | " # or\n" 29 | " tr = Trends(proxy={'http': 'http://proxy:port', 'https': 'https://proxy:port'})\n" 30 | "3. Wait before making additional requests" 31 | ) 32 | 33 | class BatchPeriod(Enum): # update every 2 min 34 | ''' 35 | Time periods for batch operations. 36 | ''' 37 | Past4H = 2 #31 points (new points every 8 min) 38 | Past24H = 3 #91 points (every 16 min) 39 | Past48H = 5 #181 points (every 16 min) 40 | Past7D = 4 #43 points (every 4 hours) 41 | 42 | BATCH_URL = f'https://trends.google.com/_/TrendsUi/data/batchexecute' 43 | HOT_TRENDS_URL = f'https://trends.google.com/trends/hottrends/visualize/internal/data' 44 | 45 | # ----------- API LINKS ------------- 46 | API_URL = f'https://trends.google.com/trends/api' 47 | API_EXPLORE_URL = f'{API_URL}/explore' 48 | API_GEO_DATA_URL = f'{API_URL}/explore/pickers/geo' 49 | API_CATEGORY_URL = f'{API_URL}/explore/pickers/category' 50 | API_TOPCHARTS_URL = f'{API_URL}/topcharts' 51 | API_AUTOCOMPLETE = f'{API_URL}/autocomplete/' 52 | DAILY_SEARCHES_URL = f'{API_URL}/dailytrends' 53 | REALTIME_SEARCHES_URL = f'{API_URL}/realtimetrends' 54 | 55 | API_TOKEN_URL = f'https://trends.google.com/trends/api/widgetdata' 56 | API_TIMELINE_URL = f'{API_TOKEN_URL}/multiline' 57 | API_MULTIRANGE_URL = f'{API_TOKEN_URL}/multirange' 58 | API_GEO_URL = f'{API_TOKEN_URL}/comparedgeo' 59 | API_RELATED_QUERIES_URL = f'{API_TOKEN_URL}/relatedsearches' 60 | 61 | # ----------- EMBED LINKS ------------- 62 | EMBED_URL = f'https://trends.google.com/trends/embed/explore' 63 | EMBED_GEO_URL = f'{EMBED_URL}/GEO_MAP' 64 | EMBED_TOPICS_URL = f'{EMBED_URL}/RELATED_TOPICS' 65 | EMBED_QUERIES_URL = f'{EMBED_URL}/RELATED_QUERIES' 66 | EMBED_TIMESERIES_URL = f'{EMBED_URL}/TIMESERIES' 67 | 68 | # --------------- RSS ----------------- 69 | DAILY_RSS = f'https://trends.google.com/trends/trendingsearches/daily/rss' 70 | REALTIME_RSS = f'https://trends.google.com/trending/rss' 71 | 72 | class Trends: 73 | """ 74 | A client for accessing Google Trends data. 75 | 76 | This class provides methods to analyze search trends, get real-time trending topics, 77 | and track interest over time and regions. 78 | 79 | Parameters: 80 | hl (str): Language and country code (e.g., 'en-US'). Defaults to 'en-US'. 81 | tzs (int): Timezone offset in minutes. Defaults to current system timezone. 82 | use_entity_names (bool): Whether to use entity names instead of keywords. 83 | Defaults to False. 84 | proxy (str or dict): Proxy configuration. Can be a string URL or a dictionary 85 | with protocol-specific proxies. Examples: 86 | - "http://user:pass@10.10.1.10:3128" 87 | - {"http": "http://10.10.1.10:3128", "https": "http://10.10.1.10:1080"} 88 | """ 89 | 90 | def __init__(self, language='en', tzs=360, request_delay=1., max_retries=3, use_enitity_names = False, proxy=None, **kwargs): 91 | """ 92 | Initialize the Trends client. 93 | 94 | Args: 95 | language (str): Language code (e.g., 'en', 'es', 'fr'). 96 | tzs (int): Timezone offset in minutes. Defaults to 360. 97 | request_delay (float): Minimum time interval between requests in seconds. Helps avoid hitting rate limits and behaving like a bot. Set to 0 to disable. 98 | max_retries (int): Maximum number of retry attempts for failed requests. Each retry includes exponential backoff delay of 2^(max_retries-retries) seconds for rate limit errors (429, 302). 99 | use_enitity_names (bool): Whether to use entity names instead of keywords. 100 | proxy (str or dict): Proxy configuration. 101 | **kwargs: Additional arguments for backwards compatibility. 102 | - hl (str, deprecated): Old-style language code (e.g., 'en' or 'en-US'). 103 | If provided, will be used as fallback when language is invalid. 104 | """ 105 | if isinstance(language, str) and len(language) >= 2: 106 | self.language = language[:2].lower() 107 | elif 'hl' in kwargs and isinstance(kwargs['hl'], str) and len(kwargs['hl']) >= 2: 108 | self.language = kwargs['hl'][:2].lower() 109 | else: 110 | self.language = 'en' 111 | 112 | # self.hl = hl 113 | self.tzs = tzs or -int(datetime.now().astimezone().utcoffset().total_seconds()/60) 114 | self._default_params = {'hl': self.language, 'tz': tzs} 115 | self.use_enitity_names = use_enitity_names 116 | self.session = requests.session() 117 | self._headers = {'accept-language': self.language} 118 | self._geo_cache = {} 119 | self._category_cache = {} # Add category cache 120 | self.request_delay = request_delay 121 | self.max_retires = max_retries 122 | self.last_request_times = {0,1} 123 | # Initialize proxy configuration 124 | self.set_proxy(proxy) 125 | 126 | def set_proxy(self, proxy=None): 127 | """ 128 | Set or update proxy configuration for the session. 129 | 130 | Args: 131 | proxy (str or dict, optional): Proxy configuration. Can be: 132 | - None: Remove proxy configuration 133 | - str: URL for all protocols (e.g., "http://10.10.1.10:3128") 134 | - dict: Protocol-specific proxies (e.g., {"http": "...", "https": "..."}) 135 | """ 136 | if isinstance(proxy, str): 137 | # Convert string URL to dictionary format 138 | proxy = { 139 | 'http': proxy, 140 | 'https': proxy 141 | } 142 | 143 | # Update session's proxy configuration 144 | self.session.proxies.clear() 145 | if proxy: 146 | self.session.proxies.update(proxy) 147 | 148 | def _extract_keywords_from_token(self, token): 149 | if self.use_enitity_names: 150 | return [item['text'] for item in token['bullets']] 151 | else : 152 | return [item['complexKeywordsRestriction']['keyword'][0]['value'] for item in token['request']['comparisonItem']] 153 | 154 | @staticmethod 155 | def _parse_protected_json(response: requests.models.Response): 156 | """ 157 | Parses JSON data from a protected API response. 158 | 159 | Args: 160 | response (requests.models.Response): Response object from requests 161 | 162 | Returns: 163 | dict: Parsed JSON data 164 | 165 | Raises: 166 | ValueError: If response status is not 200, content type is invalid, 167 | or JSON parsing fails 168 | """ 169 | valid_content_types = {'application/json', 'application/javascript', 'text/javascript'} 170 | content_type = response.headers.get('Content-Type', '').split(';')[0].strip().lower() 171 | 172 | if (response.status_code != 200) or (content_type not in valid_content_types): 173 | raise ValueError(f"Invalid response: status {response.status_code}, content type '{content_type}'") 174 | 175 | try: 176 | json_data = response.text.split('\n')[-1] 177 | return json.loads(json_data) 178 | except json.JSONDecodeError: 179 | raise ValueError("Failed to parse JSON data") 180 | 181 | def _encode_items(self, keywords, timeframe="today 12-m", geo=''): 182 | data = list(map(ensure_list, [keywords, timeframe, geo])) 183 | lengths = list(map(len, data)) 184 | max_len = max(lengths) 185 | if not all(max_len % length == 0 for length in lengths): 186 | raise ValueError(f"Ambiguous input sizes: unable to determine how to combine inputs of lengths {lengths}") 187 | data = [item * (max_len // len(item)) for item in data] 188 | items = [dict(zip(['keyword', 'time', 'geo'], values)) for values in zip(*data)] 189 | return items 190 | 191 | def _encode_request(self, params): 192 | if 'keyword' in params: 193 | keywords = ensure_list(params.pop('keyword')) 194 | if len(keywords) != 1: 195 | raise ValueError("This endpoint only supports a single keyword") 196 | params['keywords'] = keywords 197 | 198 | items = self._encode_items( 199 | keywords = params['keywords'], 200 | timeframe = params.get('timeframe', "today 12-m"), 201 | geo = params.get('geo', '') 202 | ) 203 | 204 | req = {'req': json.dumps({ 205 | 'comparisonItem': items, 206 | 'category': params.get('cat', 0), 207 | 'property': params.get('gprop', '') 208 | })} 209 | 210 | req.update(self._default_params) 211 | return req 212 | 213 | def _get(self, url, params=None, headers=None): 214 | """ 215 | Make HTTP GET request with retry logic and proxy support. 216 | 217 | Args: 218 | url (str): URL to request 219 | params (dict, optional): Query parameters 220 | 221 | Returns: 222 | requests.Response: Response object 223 | 224 | Raises: 225 | ValueError: If response status code is not 200 226 | requests.exceptions.RequestException: For network-related errors 227 | """ 228 | retries = self.max_retires 229 | response_code = 429 230 | response_codes = [] 231 | last_response = None 232 | req = None 233 | while (retries > 0): 234 | try: 235 | 236 | if self.request_delay: 237 | min_time = min(self.last_request_times) 238 | sleep_time = max(0, self.request_delay - (time() - min_time)) 239 | sleep(sleep_time) 240 | self.last_request_times = (self.last_request_times - {min_time,}) | {time(),} 241 | 242 | req = self.session.get(url, params=params, headers=headers) 243 | last_response = req 244 | response_code = req.status_code 245 | response_codes.append(response_code) 246 | 247 | if response_code == 200: 248 | return req 249 | else: 250 | if response_code in {429,302}: 251 | sleep(2**(self.max_retires-retries)) 252 | retries -= 1 253 | 254 | except Exception as e: 255 | if retries == 0: 256 | raise 257 | retries -= 1 258 | 259 | if response_codes.count(429) > len(response_codes) / 2: 260 | current_delay = self.request_delay or 1 261 | print(f"\nWarning: Too many rate limit errors (429). Consider increasing request_delay " 262 | f"to Trends(request_delay={current_delay*2}) before Google implements a long-term " 263 | f"rate limit!") 264 | last_response.raise_for_status() 265 | 266 | @classmethod 267 | def _extract_embedded_data(cls, text): 268 | pattern = re.compile(r"JSON\.parse\('([^']+)'\)") 269 | matches = pattern.findall(text) 270 | # If matches found, decode and return result 271 | if matches: 272 | return json.loads(decode_escape_text(matches[0])) # Take first match 273 | print("Failed to extract JSON data") 274 | 275 | def _token_to_data(self, token): 276 | URL = { 277 | 'fe_line_chart': API_TIMELINE_URL, 278 | 'fe_multi_range_chart': API_MULTIRANGE_URL, 279 | 'fe_multi_heat_map': API_GEO_URL, 280 | 'fe_geo_chart_explore': API_GEO_URL, 281 | 'fe_related_searches': API_RELATED_QUERIES_URL 282 | }[token['type']] 283 | 284 | params = {'req': json.dumps(token['request']), 'token': token['token']} 285 | params.update(self._default_params) 286 | # req = self.session.get(URL, params=params) 287 | req = self._get(URL, params=params) 288 | data = Trends._parse_protected_json(req) 289 | return data 290 | 291 | def _get_token_data(self, url, params=None, request_fix=None, headers=None, raise_quota_error=False): 292 | """ 293 | Internal method to get token data from Google Trends API. 294 | 295 | Handles both 'keyword' and 'keywords' parameters for backward compatibility 296 | and convenience. 297 | """ 298 | 299 | params = self._encode_request(params) 300 | req = self._get(url, params=params, headers=headers) 301 | token = self._extract_embedded_data(req.text) 302 | 303 | if request_fix is not None: 304 | token = {**token, 'request':{**token['request'], **request_fix}} 305 | 306 | if raise_quota_error: 307 | user_type = token.get('request', {}).get('userConfig', {}).get('userType', '') 308 | if user_type == "USER_TYPE_EMBED_OVER_QUOTA": 309 | raise TrendsQuotaExceededError() 310 | 311 | data = self._token_to_data(token) 312 | return token, data 313 | 314 | def _get_batch(self, req_id, data): 315 | req_data = json.dumps([[[req_id,f"{json.dumps(data)}", None,"generic"]]]) 316 | post_data = f'f.req={req_data}' 317 | headers = { 318 | "content-type": "application/x-www-form-urlencoded;charset=UTF-8" 319 | } 320 | req = self.session.post(BATCH_URL, post_data, headers=headers) 321 | return req 322 | 323 | def interest_over_time(self, keywords, timeframe="today 12-m", geo='', cat=0, gprop='', return_raw = False, headers=None): 324 | """ 325 | Retrieves interest over time data for specified keywords. 326 | 327 | Parameters: 328 | keywords (str or list): Keywords to analyze. 329 | timeframe : str or list 330 | Defines the time range for querying interest over time. It can be specified as a single string or a list. 331 | Supported formats include: 332 | 333 | - 'now 1-H', 'now 4-H', 'now 1-d', 'now 7-d' 334 | - 'today 1-m', 'today 3-m', 'today 12-m', 'today 5-y' 335 | - 'all' for all available data 336 | - 'YYYY-MM-DD YYYY-MM-DD' for specific date ranges 337 | - 'YYYY-MM-DDTHH YYYY-MM-DDTHH' for hourly data (if less than 8 days) 338 | 339 | Additional flexible formats: 340 | 341 | 1. **'now {offset}'**: Timeframes less than 8 days (e.g., 'now 72-H' for the last 72 hours). 342 | 2. **'today {offset}'**: Larger periods starting from today (e.g., 'today 5-m' for the last 5 months). 343 | 3. **'date {offset}'**: Specific date with offset (e.g., '2024-03-25 5-m' for 5 months back from March 25, 2024). 344 | 345 | **Note:** Offsets always go backward in time. 346 | 347 | Resolutions based on timeframe length: 348 | 349 | - `< 5 hours`: 1 minute 350 | - `5 hours <= delta < 36 hours`: 8 minutes 351 | - `36 hours <= delta < 72 hours`: 16 minutes 352 | - `72 hours <= delta < 8 days`: 1 hour 353 | - `8 days <= delta < 270 days`: 1 day 354 | - `270 days <= delta < 1900 days`: 1 week 355 | - `>= 1900 days`: 1 month 356 | 357 | Restrictions: 358 | - **Same resolution**: All timeframes must have the same resolution. 359 | - **Timeframe length**: Maximum timeframe cannot be more than twice the length of the minimum timeframe. 360 | geo (str): Geographic location code (e.g., 'US' for United States). 361 | cat (int): Category ID. Defaults to 0 (all categories). 362 | gprop (str): Google property filter. 363 | return_raw (bool): If True, returns raw API response. 364 | 365 | Returns: 366 | pandas.DataFrame or raw API response 367 | Processed trending keywords data or raw API data if `return_raw=True` 368 | """ 369 | check_timeframe_resolution(timeframe) 370 | timeframe = list(map(convert_timeframe, ensure_list(timeframe))) 371 | 372 | token, data = self._get_token_data(EMBED_TIMESERIES_URL, locals(), headers=headers) 373 | if return_raw: 374 | return token, data 375 | 376 | if token['type']=='fe_line_chart': 377 | keywords = self._extract_keywords_from_token(token) 378 | return TrendsDataConverter.interest_over_time(data, keywords=keywords) 379 | if token['type']=='fe_multi_range_chart': 380 | bullets = TrendsDataConverter.token_to_bullets(token) 381 | return TrendsDataConverter.multirange_interest_over_time(data, bullets=bullets) 382 | return data 383 | 384 | def related_queries(self, keyword, timeframe="today 12-m", geo='', cat=0, gprop='', return_raw = False, headers=None): 385 | """ 386 | Retrieves related queries for a single search term. 387 | 388 | Args: 389 | keyword (str): A single keyword to analyze 390 | timeframe (str): Time range for analysis 391 | geo (str): Geographic location code 392 | cat (int): Category ID 393 | gprop (str): Google property filter 394 | return_raw (bool): If True, returns raw API response 395 | headers (dict, optional): Custom request headers. Can be used to set different referer 396 | to help bypass quota limits 397 | 398 | Raises: 399 | TrendsQuotaExceededError: When API quota is exceeded 400 | 401 | Parameters: 402 | dict: Two DataFrames containing 'top' and 'rising' related queries 403 | 404 | Example: 405 | >>> tr = Trends() 406 | >>> related = tr.related_queries('python') 407 | >>> print("Top queries:") 408 | >>> print(related['top']) 409 | >>> print("\nRising queries:") 410 | >>> print(related['rising']) 411 | """ 412 | headers = headers or {"referer": "https://trends.google.com/trends/explore"} 413 | token, data = self._get_token_data(EMBED_QUERIES_URL, locals(), headers=headers, raise_quota_error=True) 414 | if return_raw: 415 | return token, data 416 | return TrendsDataConverter.related_queries(data) 417 | 418 | def related_topics(self, keyword, timeframe="today 12-m", geo='', cat=0, gprop='', return_raw = False, headers=None): 419 | """ 420 | Retrieves related topics for a single search term. 421 | 422 | Parameters: 423 | keyword (str): A single keyword to analyze 424 | timeframe (str): Time range for analysis 425 | geo (str): Geographic location code 426 | cat (int): Category ID 427 | gprop (str): Google property filter 428 | return_raw (bool): If True, returns raw API response 429 | headers (dict, optional): Custom request headers. Can be used to set different referer 430 | to help bypass quota limits 431 | 432 | Raises: 433 | TrendsQuotaExceededError: When API quota is exceeded 434 | 435 | Example: 436 | >>> tr = Trends() 437 | >>> related = tr.related_topics('python') 438 | >>> print("Top topics:") 439 | >>> print(related['top']) 440 | >>> print("\nRising topics:") 441 | >>> print(related['rising']) 442 | """ 443 | headers = headers or {"referer": "https://trends.google.com/trends/explore"} 444 | token, data = self._get_token_data(EMBED_TOPICS_URL, locals(), headers=headers, raise_quota_error=True) 445 | if return_raw: 446 | return token, data 447 | return TrendsDataConverter.related_queries(data) 448 | 449 | 450 | def interest_by_region(self, keywords, timeframe="today 12-m", geo='', cat=0, gprop='', resolution=None, inc_low_vol=False, return_raw=False): 451 | """ 452 | Retrieves geographical interest data based on keywords and other parameters. 453 | 454 | Parameters: 455 | keywords (str or list): Search keywords to analyze. 456 | timeframe (str): Time range for analysis (e.g., "today 12-m", "2022-01-01 2022-12-31") 457 | geo (str): Geographic region code (e.g., "US" for United States) 458 | cat (int): Category ID (default: 0 for all categories) 459 | gprop (str): Google property filter 460 | resolution (str): Geographic resolution level: 461 | - 'COUNTRY' (default when geo is empty) 462 | - 'REGION' (states/provinces) 463 | - 'CITY' (cities) 464 | - 'DMA' (Designated Market Areas) 465 | inc_low_vol (bool): Include regions with low search volume 466 | return_raw (bool): Return unprocessed API response data 467 | 468 | Returns: 469 | pandas.DataFrame or dict: Processed geographic interest data, or raw API response if return_raw=True 470 | """ 471 | if (not resolution): 472 | resolution = 'COUNTRY' if ((geo=='') or (not geo)) else 'REGION' 473 | 474 | data_injection = {'resolution': resolution, 'includeLowSearchVolumeGeos': inc_low_vol} 475 | token, data = self._get_token_data(EMBED_GEO_URL, locals(), request_fix=data_injection) 476 | if return_raw: 477 | return token, data 478 | 479 | bullets = TrendsDataConverter.token_to_bullets(token) 480 | return TrendsDataConverter.geo_data(data, bullets) 481 | 482 | def suggestions(self, keyword, language=None, return_raw=False): 483 | params = {'hz':language, 'tz':self.tzs} if language else self._default_params 484 | encoded_keyword = keyword.replace("'", "") 485 | encoded_keyword = quote(encoded_keyword, safe='-') 486 | req = self._get(API_AUTOCOMPLETE+encoded_keyword, params) 487 | data = self._parse_protected_json(req) 488 | if return_raw: 489 | return data 490 | return TrendsDataConverter.suggestions(data) 491 | 492 | def hot_trends(self): 493 | req = self.session.get(HOT_TRENDS_URL) 494 | return json.loads(req.text) 495 | 496 | def top_year_charts(self, year='2023', geo='GLOBAL'): 497 | """ 498 | https://trends.google.com/trends/yis/2023/GLOBAL/ 499 | """ 500 | params = {'date':year, 'geo':geo, 'isMobile':False} 501 | params.update(self._default_params) 502 | req = self._get(API_TOPCHARTS_URL, params) 503 | data = self._parse_protected_json(req) 504 | return data 505 | 506 | def trending_stories(self, geo='US', category='all', max_stories=200, return_raw=False): 507 | ''' 508 | Old API 509 | category: all: "all", business: "b", entertainment: "e", health: "m", sicTech: "t", sports: "s", top: "h" 510 | ''' 511 | forms = {'ns': 15, 'geo': geo, 'tz': self.tzs, 'hl': 'en', 'cat': category, 'fi' : '0', 'fs' : '0', 'ri' : max_stories, 'rs' : max_stories, 'sort' : 0} 512 | url = 'https://trends.google.com/trends/api/realtimetrends' 513 | req = self._get(url, forms) 514 | data = self._parse_protected_json(req) 515 | if return_raw: 516 | return data 517 | 518 | data = data.get('storySummaries', {}).get('trendingStories', []) 519 | data = [TrendKeywordLite.from_api(item) for item in data] 520 | return data 521 | 522 | def daily_trends_deprecated(self, geo='US', return_raw=False): 523 | params = {'ns': 15, 'geo': geo, 'hl':'en'} 524 | params.update(self._default_params) 525 | req = self._get(DAILY_SEARCHES_URL, params) 526 | data = self._parse_protected_json(req) 527 | if return_raw: 528 | return data 529 | data = data.get('default', {}).get('trendingSearchesDays', []) 530 | data = [TrendKeywordLite.from_api(item) for day in data for item in day['trendingSearches']] 531 | return data 532 | 533 | def daily_trends_deprecated_by_rss(self, geo='US', safe=True, return_raw=False): 534 | ''' 535 | Only last 20 daily news 536 | ''' 537 | 538 | params = {'geo':geo, 'safe':safe} 539 | req = self._get(DAILY_RSS, params) 540 | if return_raw: 541 | return req.text 542 | data = TrendsDataConverter.rss_items(req.text) 543 | data = list(map(TrendKeywordLite.from_api, data)) 544 | return data 545 | 546 | def trending_now(self, geo='US', language='en', hours=24, num_news=0, return_raw=False): 547 | """ 548 | Retrieves trending keywords that have seen significant growth in popularity within the last specified number of hours. 549 | 550 | Parameters: 551 | ----------- 552 | geo : str, optional 553 | The geographical region for the trends, default is 'US' (United States). 554 | language : str, optional 555 | The language of the trends, default is 'en' (English). 556 | hours : int, optional 557 | The time window (in hours) for detecting trending keywords. Minimum value is 1, and the maximum is 191. Default is 24. 558 | num_news : int, optional 559 | NOT RECOMMENDED to use as it significantly slows down the function. The feature for fetching news associated with the trends is rarely used on the platform. 560 | If you want trending keywords with news, consider using `trending_now_by_rss` instead. Default is 0. 561 | return_raw : bool, optional 562 | If set to True, the function returns the raw data directly from the API. Default is False, meaning processed data will be returned. 563 | 564 | Returns: 565 | -------- 566 | dict or raw API response 567 | Processed trending keywords data or raw API data if `return_raw=True`. 568 | """ 569 | req_data = [None, None, geo, num_news, language, hours, 1] 570 | req = self._get_batch('i0OFE', req_data) 571 | data = self._parse_protected_json(req) 572 | if return_raw: 573 | return data 574 | 575 | data = json.loads(data[0][2]) 576 | data = TrendList(map(TrendKeyword, data[1])) 577 | return data 578 | 579 | def trending_now_by_rss(self, geo='US', return_raw=False): 580 | """ 581 | Retrieves trending keywords from the RSS feed for a specified geographical region. 582 | 583 | Parameters: 584 | ----------- 585 | geo : str, optional 586 | The geographical region for the trends, default is 'US' (United States). 587 | return_raw : bool, optional 588 | If set to True, the function returns the raw data directly from the API. Default is False, meaning processed data will be returned. 589 | 590 | Returns: 591 | -------- 592 | Union[dict, List[TrendKeywordLite]] 593 | A dictionary with raw RSS feed data if `return_raw=True`, or a list of `TrendKeyword` objects otherwise. 594 | """ 595 | params = {'geo':geo} 596 | req = self._get(REALTIME_RSS, params) 597 | if return_raw: 598 | return req.text 599 | data = TrendsDataConverter.rss_items(req.text) 600 | data = list(map(TrendKeywordLite.from_api, data)) 601 | return data 602 | 603 | def trending_now_news_by_ids(self, news_ids, max_news=3, return_raw=False): 604 | req = self._get_batch('w4opAf', [news_ids, max_news]) 605 | data = self._parse_protected_json(req) 606 | if return_raw: 607 | return data 608 | 609 | data = json.loads(data[0][2]) 610 | data = list(map(NewsArticle.from_api, data[0])) 611 | return data 612 | 613 | def trending_now_showcase_timeline(self, keywords, geo='US', timeframe=BatchPeriod.Past24H, return_raw=False): 614 | req_data = [None,None,[[geo, keyword, timeframe.value, 0, 3] for keyword in keywords]] 615 | request_timestamp = int(datetime.now(timezone.utc).timestamp()) 616 | req = self._get_batch('jpdkv', req_data) 617 | data = self._parse_protected_json(req) 618 | if return_raw: 619 | return data 620 | 621 | data = json.loads(data[0][2])[0] 622 | data = TrendsDataConverter.trending_now_showcase_timeline(data, request_timestamp) 623 | return data 624 | 625 | def categories(self, find: str = None, language: str = None) -> List[dict]: 626 | """ 627 | Search for categories in Google Trends data. 628 | 629 | This function retrieves and caches category data from Google Trends API, then performs 630 | a partial search on the categories. The results are cached by language to minimize API calls. 631 | 632 | Args: 633 | find (str, optional): Search query for categories. If None or empty string, 634 | returns all available categories. Defaults to None. 635 | language (str, optional): Language code for the response (e.g., 'en', 'es'). 636 | If None, uses the instance's default language. Defaults to None. 637 | 638 | Returns: 639 | List[dict]: List of matching categories. Each category is a dictionary containing: 640 | - name (str): Category name in the specified language 641 | - id (str): Category identifier 642 | 643 | Examples: 644 | >>> trends = Trends() 645 | >>> # Find all categories containing "computer" 646 | >>> computer_cats = trends.categories(find="computer") 647 | >>> # Find all categories in Spanish 648 | >>> spanish_cats = trends.categories(language="es") 649 | >>> # Find specific category in German 650 | >>> tech_cats = trends.categories(find="Technologie", language="de") 651 | """ 652 | cur_language = language or self.language 653 | 654 | if cur_language not in self._category_cache: 655 | req = self._get(API_CATEGORY_URL, {'hl': cur_language, 'tz': self.tzs}) 656 | data = self._parse_protected_json(req) 657 | self._category_cache[cur_language] = create_hierarchical_index(data, join_ids=False) 658 | 659 | if not find: 660 | return list(self._category_cache[cur_language].name_to_item.values()) 661 | 662 | return self._category_cache[cur_language].partial_search(find) 663 | 664 | def geo(self, find: str = None, language: str = None) -> List[dict]: 665 | """ 666 | Search for geographical locations in Google Trends data. 667 | 668 | This function retrieves and caches geographical data from Google Trends API, then performs 669 | a partial search on the locations. The results are cached by language to minimize API calls. 670 | 671 | Args: 672 | find (str, optional): Search query for locations. If None or empty string, 673 | returns all available locations. Defaults to None. 674 | language (str, optional): Language code for the response (e.g., 'en', 'es'). 675 | If None, uses the instance's default language. Defaults to None. 676 | 677 | Returns: 678 | List[dict]: List of matching locations. Each location is a dictionary containing: 679 | - name (str): Location name in the specified language 680 | - id (str): Location identifier (e.g., 'US-NY' for New York, United States) 681 | 682 | Examples: 683 | >>> trends = GoogleTrends() 684 | >>> # Find all locations containing "York" 685 | >>> locations = trends.geo(find="York") 686 | >>> # Find all locations in Spanish 687 | >>> spanish_locations = trends.geo(language="es") 688 | >>> # Find specific location in German 689 | >>> berlin = trends.geo(find="Berlin", language="de") 690 | 691 | Note: 692 | - Results are cached by language to improve performance 693 | - API response is parsed and structured for efficient searching 694 | - Case-insensitive partial matching is used for searches 695 | """ 696 | # Use provided language or fall back to instance default 697 | cur_language = language or self.language 698 | 699 | # Check if we need to fetch and cache data for this language 700 | if cur_language not in self._geo_cache: 701 | # Fetch geographical data from Google Trends API 702 | data = self._get(API_GEO_DATA_URL, 703 | {'hl': cur_language, 'tz': self.tzs}) 704 | data = self._parse_protected_json(data) 705 | # Create and cache search system for this language 706 | self._geo_cache[cur_language] = create_hierarchical_index(data) 707 | 708 | # Perform partial search (empty string returns all locations) 709 | if not find: 710 | return list(self._geo_cache[cur_language].name_to_location.values()) 711 | 712 | return self._geo_cache[cur_language].partial_search(find) --------------------------------------------------------------------------------