├── requirements.txt
├── requirements-dev.txt
├── .gitignore
├── src
└── trendspy
│ ├── constants.py
│ ├── __init__.py
│ ├── trend_list.py
│ ├── news_article.py
│ ├── utils.py
│ ├── hierarchical_search.py
│ ├── converter.py
│ ├── timeframe_utils.py
│ ├── trend_keyword.py
│ └── client.py
├── LICENSE
├── pyproject.toml
├── tests
└── timeframe_utils_test.py
└── README.md
/requirements.txt:
--------------------------------------------------------------------------------
1 | requests>=2.25.0
2 | pandas>=1.2.0
3 | numpy>=1.19.0
4 | python-dateutil>=2.8.0
--------------------------------------------------------------------------------
/requirements-dev.txt:
--------------------------------------------------------------------------------
1 | # Install base requirements
2 | -r requirements.txt
3 |
4 | # Development tools
5 | build>=0.10.0
6 | twine>=4.0.0
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | # Python
2 | __pycache__/
3 | *.py[cod]
4 | *$py.class
5 | *.so
6 | .Python
7 | build/
8 | develop-eggs/
9 | dist/
10 | downloads/
11 | eggs/
12 | .eggs/
13 | lib/
14 | lib64/
15 | parts/
16 | sdist/
17 | var/
18 | wheels/
19 | *.egg-info/
20 | .installed.cfg
21 | *.egg
22 |
23 | # Environments
24 | .env
25 | .venv
26 | env/
27 | venv/
28 | ENV/
29 |
30 | # IDE
31 | .idea/
32 | .vscode/
33 | *.swp
34 | *.swo
35 |
36 | # Jupyter Notebook
37 | .ipynb_checkpoints
38 |
39 | # Testing
40 | .coverage
41 | .pytest_cache/
42 | htmlcov/
--------------------------------------------------------------------------------
/src/trendspy/constants.py:
--------------------------------------------------------------------------------
1 | TREND_TOPICS = {
2 | 1: "Autos and Vehicles",
3 | 2: "Beauty and Fashion",
4 | 3: "Business and Finance",
5 | 20: "Climate",
6 | 4: "Entertainment",
7 | 5: "Food and Drink",
8 | 6: "Games",
9 | 7: "Health",
10 | 8: "Hobbies and Leisure",
11 | 9: "Jobs and Education",
12 | 10: "Law and Government",
13 | 11: "Other",
14 | 13: "Pets and Animals",
15 | 14: "Politics",
16 | 15: "Science",
17 | 16: "Shopping",
18 | 17: "Sports",
19 | 18: "Technology",
20 | 19: "Travel and Transportation"
21 | }
--------------------------------------------------------------------------------
/src/trendspy/__init__.py:
--------------------------------------------------------------------------------
1 | """
2 | TrendsPy - A Python library for working with Google Trends.
3 |
4 | This library provides a simple and convenient interface for accessing Google Trends data,
5 | allowing you to analyze search trends, get real-time trending topics, and track interest
6 | over time and regions.
7 |
8 | Main components:
9 | - Trends: Main client class for accessing Google Trends data
10 | - BatchPeriod: Enum for specifying time periods in batch operations
11 | - TrendKeyword: Class representing a trending search term with metadata
12 | - NewsArticle: Class representing news articles related to trends
13 |
14 | Project links:
15 | Homepage: https://github.com/sdil87/trendspy
16 | Repository: https://github.com/sdil87/trendspy.git
17 | Issues: https://github.com/sdil87/trendspy/issues
18 | """
19 |
20 | from .client import Trends, BatchPeriod
21 | from .trend_keyword import TrendKeyword, TrendKeywordLite
22 | from .news_article import NewsArticle
23 |
24 | __version__ = "0.1.6"
25 | __all__ = ['Trends', 'BatchPeriod', 'TrendKeyword', 'TrendKeywordLite', 'NewsArticle', 'TrendList']
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) 2024 SDil
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
1 | [build-system]
2 | requires = ["hatchling"]
3 | build-backend = "hatchling.build"
4 |
5 | [project]
6 | name = "trendspy"
7 | version = "0.1.6"
8 | description = "A Python library for accessing Google Trends data"
9 | readme = "README.md"
10 | requires-python = ">=3.7"
11 | license = {file = "LICENSE"}
12 | authors = [
13 | {name = "SDil"},
14 | ]
15 | dependencies = [
16 | "requests>=2.25.0",
17 | "pandas>=1.2.0",
18 | "numpy>=1.19.0",
19 | "python-dateutil>=2.8.0"
20 | ]
21 | classifiers = [
22 | "Development Status :: 4 - Beta",
23 | "Intended Audience :: Developers",
24 | "Intended Audience :: Science/Research",
25 | "License :: OSI Approved :: MIT License",
26 | "Operating System :: OS Independent",
27 | "Programming Language :: Python :: 3",
28 | "Programming Language :: Python :: 3.7",
29 | "Programming Language :: Python :: 3.8",
30 | "Programming Language :: Python :: 3.9",
31 | "Programming Language :: Python :: 3.10",
32 | "Programming Language :: Python :: 3.11",
33 | "Topic :: Internet",
34 | "Topic :: Scientific/Engineering :: Information Analysis",
35 | "Topic :: Software Development :: Libraries :: Python Modules"
36 | ]
37 | keywords = ["google-trends", "trends", "analytics", "data-analysis"]
38 |
39 | [project.urls]
40 | Homepage = "https://github.com/sdil87/trendspy"
41 | Repository = "https://github.com/sdil87/trendspy.git"
42 | Issues = "https://github.com/sdil87/trendspy/issues"
43 |
44 | [tool.hatch.build.targets.wheel]
45 | packages = ["src/trendspy"]
--------------------------------------------------------------------------------
/tests/timeframe_utils_test.py:
--------------------------------------------------------------------------------
1 | import pytest
2 | from datetime import datetime, timedelta
3 | from trendspy.timeframe_utils import *
4 | from trendspy.timeframe_utils import _is_valid_date, _is_valid_format, _extract_time_parts, _decode_trend_datetime
5 | # Тесты
6 | def test_is_valid_date():
7 | assert _is_valid_date('2024-09-13') is True
8 | assert _is_valid_date('2024-09-13T22') is True
9 | assert _is_valid_date('2024/09/13') is False
10 | assert _is_valid_date('invalid') is False
11 |
12 | def test_is_valid_format():
13 | assert _is_valid_format('1-H') is True
14 | assert _is_valid_format('5-y') is True
15 | assert _is_valid_format('10-m') is True
16 | assert _is_valid_format('invalid') is False
17 |
18 | def test_extract_time_parts():
19 | assert _extract_time_parts('5-H') == (5, 'H')
20 | assert _extract_time_parts('10-d') == (10, 'd')
21 | assert _extract_time_parts('invalid') is None
22 |
23 | def test_decode_trend_datetime():
24 | assert _decode_trend_datetime('2024-09-13T22') == datetime(2024, 9, 13, 22)
25 | assert _decode_trend_datetime('2024-09-13') == datetime(2024, 9, 13)
26 |
27 | def test_convert_timeframe():
28 | assert convert_timeframe('now 1-H') == 'now 1-H'
29 | assert convert_timeframe('2024-09-12T23 5-H') == '2024-09-12T18 2024-09-12T23'
30 | assert convert_timeframe('2024-09-12T23 1-d') == '2024-09-11T23 2024-09-12T23'
31 | assert convert_timeframe('2024-09-12 1-y') == '2023-09-12 2024-09-12'
32 | assert convert_timeframe('2024-09-12T23 2024-09-13') == '2024-09-12T23 2024-09-14T00'
33 | assert convert_timeframe('2024-09-12 2024-09-13T12') == '2024-09-12T00 2024-09-13T12'
34 | with pytest.raises(ValueError):
35 | convert_timeframe('2024-09-12T23 invalid')
36 | with pytest.raises(ValueError):
37 | convert_timeframe('2024-09-12T23 8-d')
38 | with pytest.raises(ValueError):
39 | convert_timeframe('2024-09-12T23 all')
40 |
41 | def test_month_diff():
42 | assert convert_timeframe('2024-09-12 1-m') == '2024-08-13 2024-09-12'
43 |
44 |
45 | def test_convert_timeframe_range():
46 | assert timeframe_to_timedelta('now 1-H') == timedelta(seconds=60*60)
47 | assert timeframe_to_timedelta('now 5-H') == timedelta(seconds=5*60*60)
48 |
49 | if __name__ == "__main__":
50 | pytest.main()
--------------------------------------------------------------------------------
/src/trendspy/trend_list.py:
--------------------------------------------------------------------------------
1 | from typing import List, Union, Optional
2 | from .constants import TREND_TOPICS
3 | from .trend_keyword import TrendKeyword
4 |
5 | class TrendList(list):
6 | """
7 | A list-like container for trending topics with additional filtering capabilities.
8 | Inherits from list to maintain all standard list functionality.
9 | """
10 |
11 | def __init__(self, trends: List[TrendKeyword]):
12 | super().__init__(trends)
13 |
14 | def filter_by_topic(self, topic: Union[int, str, List[Union[int, str]]]) -> 'TrendList':
15 | """
16 | Filter trends by topic ID or name.
17 |
18 | Args:
19 | topic: Topic identifier. Can be:
20 | - int: Topic ID (e.g., 18 for Technology)
21 | - str: Topic name (e.g., 'Technology')
22 | - list of int/str: Multiple topics (matches any)
23 |
24 | Returns:
25 | TrendList: New TrendList containing only trends matching the specified topic(s)
26 | """
27 | topics = [topic] if not isinstance(topic, list) else topic
28 |
29 | name_to_id = {name.lower(): id_ for id_, name in TREND_TOPICS.items()}
30 |
31 | topic_ids = set()
32 | for t in topics:
33 | if isinstance(t, int):
34 | topic_ids.add(t)
35 | elif isinstance(t, str):
36 | topic_id = name_to_id.get(t.lower())
37 | if topic_id:
38 | topic_ids.add(topic_id)
39 |
40 | filtered = [
41 | trend for trend in self
42 | if any(topic_id in trend.topics for topic_id in topic_ids)
43 | ]
44 |
45 | return TrendList(filtered)
46 |
47 | def get_topics_summary(self) -> dict:
48 | """
49 | Get a summary of topics present in the trends.
50 |
51 | Returns:
52 | dict: Mapping of topic names to count of trends
53 | """
54 | topic_counts = {}
55 | for trend in self:
56 | for topic_id in trend.topics:
57 | topic_name = TREND_TOPICS.get(topic_id, f"Unknown ({topic_id})")
58 | topic_counts[topic_name] = topic_counts.get(topic_name, 0) + 1
59 | return dict(sorted(topic_counts.items(), key=lambda x: (-x[1], x[0])))
60 |
61 | def __str__(self) -> str:
62 | """Return string representation of the trends."""
63 | if not self:
64 | return "[]"
65 | return "[\n " + ",\n ".join(trend.brief_summary() for trend in self) + "\n]"
--------------------------------------------------------------------------------
/src/trendspy/news_article.py:
--------------------------------------------------------------------------------
1 | from .utils import parse_time_ago, flatten_dict
2 | from datetime import datetime
3 |
4 | class NewsArticle:
5 | """
6 | Represents a news article related to a trending topic.
7 |
8 | This class handles both dictionary and list-based article data from
9 | various Google Trends API endpoints.
10 |
11 | Parameters:
12 | title (str): Article title
13 | url (str): Article URL
14 | source (str): News source name
15 | picture (str): URL to article image
16 | time (str or int): Publication time or timestamp
17 | snippet (str): Article preview text
18 |
19 | Note:
20 | If time is provided as a string with 'ago' format (e.g., '2 hours ago'),
21 | it will be automatically converted to a timestamp.
22 | """
23 | def __init__(self, title=None, url=None, source=None, picture=None, time=None, snippet=None, article_ids=None):
24 | self.title = title
25 | self.url = url
26 | self.source = source
27 | self.picture = picture
28 | self.time = time
29 | if isinstance(self.time, str) and ('ago' in self.time):
30 | self.time = parse_time_ago(self.time)
31 | self.snippet = snippet
32 |
33 | @classmethod
34 | def from_api(cls, data):
35 | if isinstance(data, dict):
36 | return cls(
37 | title=data.get('title') or data.get('articleTitle'),
38 | url=data.get('url'),
39 | source=data.get('source'),
40 | picture=data.get('picture') or data.get('image', {}).get('imageUrl'),
41 | time=data.get('time') or data.get('timeAgo'),
42 | snippet=data.get('snippet')
43 | )
44 | elif isinstance(data, list):
45 | return cls(
46 | title=data[0],
47 | url=data[1],
48 | source=data[2],
49 | time=data[3][0] if data[3] else None,
50 | picture=data[4] if len(data) > 4 else None
51 | )
52 | else:
53 | raise ValueError("Unsupported data format: must be dict or list")
54 |
55 | def __repr__(self):
56 | return f"NewsArticle(title={self.title!r}, url={self.url!r}, source={self.source!r}, " \
57 | f"picture={self.picture!r}, time={self.time!r}, snippet={self.snippet!r})"
58 |
59 | def __str__(self):
60 | s = 'Title : {}'.format(self.title)
61 | s += '\nURL : {}'.format(self.url) if self.url else ''
62 | s += '\nSource : {}'.format(self.source) if self.source else ''
63 | s += '\nPicture : {}'.format(self.picture) if self.picture else ''
64 | s += '\nTime : {}'.format(datetime.fromtimestamp(self.time).strftime('%Y-%m-%d %H:%M:%S')) if self.time else ''
65 | s += '\nSnippet : {}'.format(self.snippet) if self.snippet else ''
66 | return s
--------------------------------------------------------------------------------
/src/trendspy/utils.py:
--------------------------------------------------------------------------------
1 | from collections import OrderedDict
2 | from typing import Any
3 | import re
4 | import json
5 | from enum import Enum
6 | from datetime import datetime, timedelta, timezone
7 | import time
8 |
9 | _HEX_TO_CHAR_DICT = {
10 | r'\x7b':'{',
11 | r'\x7d':'}',
12 | r'\x22':'"',
13 | r'\x5d':']',
14 | r'\x5b':'[',
15 | '\\\\':'\\'
16 | }
17 | _tag_pattern = re.compile(r'<([\w:]+)>(.*?)\1>', re.DOTALL)
18 |
19 | class EnumEncoder(json.JSONEncoder):
20 | def default(self, obj):
21 | if isinstance(obj, Enum):
22 | return obj.value
23 | return super().default(obj)
24 |
25 | class LRUCache(OrderedDict):
26 | def __init__(self, maxsize=128):
27 | super().__init__()
28 | self.maxsize = maxsize
29 |
30 | def __getitem__(self, key):
31 | value = super().__getitem__(key)
32 | self.move_to_end(key)
33 | return value
34 |
35 | def __setitem__(self, key, value):
36 | if key in self:
37 | self.move_to_end(key)
38 | super().__setitem__(key, value)
39 | if len(self) > self.maxsize:
40 | oldest = next(iter(self))
41 | del self[oldest]
42 |
43 | def ensure_list(item):
44 | return list(item) if hasattr(item, '__iter__') and not isinstance(item, str) and not isinstance(item, dict) else [item]
45 |
46 | def extract_column(data, column, default: Any = None, f=None):
47 | if f is None:
48 | return [item.get(column, default) for item in data]
49 | return [f(item.get(column, default)) for item in data]
50 |
51 | def flatten_data(data, columns):
52 | return [{**{kk: vv for k in columns if k in d for kk, vv in d[k].items()},
53 | **{k: v for k, v in d.items() if k not in columns}}
54 | for d in data]
55 |
56 | def flatten_dict(d, parent_key='', sep='_'):
57 | items = []
58 | for k, v in d.items():
59 | new_key = f"{parent_key}{sep}{k}" if parent_key else k
60 | if isinstance(v, dict):
61 | items.extend(flatten_dict(v, new_key, sep=sep).items())
62 | else:
63 | items.append((new_key, v))
64 | return dict(items)
65 |
66 | def filter_data(data, desired_columns):
67 | desired_columns = set(desired_columns)
68 | return [{k: v for k, v in item.items() if k in desired_columns} for item in data]
69 |
70 | def decode_escape_text(text):
71 | for k,v in _HEX_TO_CHAR_DICT.items():
72 | text = text.replace(k, v)
73 |
74 | if r'\x' in text:
75 | text = re.sub(r'\\x[0-9a-fA-F]{2}', lambda match:chr(int(match.group(0)[2:], 16)), text)
76 | return text
77 |
78 | def parse_xml_to_dict(text, prefix=''):
79 | item_dict = {}
80 | for tag, content in _tag_pattern.findall(text):
81 | content = parse_xml_to_dict(content.strip(), tag+'_')
82 | tag = tag.replace(prefix, '')
83 | if tag in item_dict:
84 | if not isinstance(item_dict[tag], list):
85 | item_dict[tag] = [item_dict[tag]]
86 | item_dict[tag].append(content)
87 | continue
88 | item_dict[tag] = content
89 | if not item_dict:
90 | return text
91 | return item_dict
92 |
93 | def get_utc_offset_minutes():
94 | """
95 | Returns the local time offset from UTC in minutes.
96 | Positive values for time zones ahead of UTC (eastward),
97 | negative values for time zones behind UTC (westward).
98 | """
99 | # Get current local time
100 | now = datetime.now()
101 |
102 | # Get offset in seconds
103 | utc_offset = -time.timezone
104 |
105 | # Account for daylight saving time if active
106 | if time.localtime().tm_isdst:
107 | utc_offset += 3600 # Add one hour in seconds
108 |
109 | # Convert seconds to minutes
110 | return utc_offset // 60
111 |
112 | def parse_time_ago(time_ago):
113 | if not time_ago:
114 | return None
115 |
116 | match = re.match(r'(\d+)\s*(\w+)', time_ago)
117 | if not match:
118 | return None
119 |
120 | value, unit = match.groups()
121 | value = int(value)
122 |
123 | if 'h' in unit:
124 | delta = timedelta(hours=value)
125 | elif 'd' in unit:
126 | delta = timedelta(days=value)
127 | elif 'm' in unit:
128 | delta = timedelta(minutes=value)
129 | else:
130 | delta = timedelta(0)
131 |
132 | now = datetime.now(timezone.utc)
133 | timestamp = int((now - delta).replace(microsecond=0).timestamp())
134 | return timestamp
135 |
136 | def truncate_string(s, max_length):
137 | if len(s) > max_length:
138 | return s[:max_length - 3] + '...'
139 | return s
--------------------------------------------------------------------------------
/src/trendspy/hierarchical_search.py:
--------------------------------------------------------------------------------
1 | from typing import Dict, List, Optional
2 | import re
3 |
4 | def flatten_tree(node, parent_id='', result=None, join_ids=True):
5 | """
6 | Recursively transforms a tree structure into a flat list.
7 |
8 | Args:
9 | node (dict): Tree node with 'name', 'id' and optional 'children' keys
10 | parent_id (str): Parent node ID
11 | result (list): Accumulated result
12 | join_ids (bool): Whether to join IDs with parent (True for geo, False for categories)
13 |
14 | Returns:
15 | list: List of dictionaries with name and id
16 | """
17 | if result is None:
18 | result = []
19 |
20 | current_id = node['id']
21 | # Join IDs only for geographical data
22 | if join_ids and parent_id:
23 | full_id = f"{parent_id}-{current_id}"
24 | else:
25 | full_id = current_id
26 |
27 | result.append({
28 | 'name': node['name'],
29 | 'id': full_id
30 | })
31 |
32 | if 'children' in node:
33 | for child in node['children']:
34 | flatten_tree(child, full_id if join_ids else '', result, join_ids)
35 |
36 | return result
37 |
38 | class HierarchicalIndex:
39 | """
40 | An index for efficient searches in hierarchical Google Trends data structures.
41 |
42 | This class provides fast lookups for hierarchical data like locations and categories,
43 | supporting both exact and partial matching of names.
44 |
45 | Examples:
46 | - Geographical hierarchies (Country -> Region -> City)
47 | - Category hierarchies (Main category -> Subcategory)
48 |
49 | Methods:
50 | add_item(item): Add an item to the index
51 | exact_search(name): Find exact match for name
52 | partial_search(query): Find items containing the query
53 | id_search(id_query): Find by ID (supports both exact and partial matching)
54 | """
55 |
56 | def __init__(self, items: List[dict], partial_id_search: bool = True):
57 | """
58 | Initialize the search index.
59 |
60 | Args:
61 | items (List[dict]): List of dictionaries with 'name' and 'id'
62 | partial_id_search (bool): Whether to allow partial ID matches
63 | (True for geo locations, False for categories)
64 | """
65 | # Main storage: dict with lowercase name as key
66 | self.name_to_item: Dict[str, dict] = {}
67 |
68 | # Inverted index for partial matching
69 | self.word_index: Dict[str, List[str]] = {}
70 |
71 | # Store search mode
72 | self.partial_id_search = partial_id_search
73 |
74 | # Build indexes
75 | for item in items:
76 | self.add_item(item)
77 |
78 | def add_item(self, item: dict) -> None:
79 | """
80 | Add a single item to the index.
81 |
82 | Args:
83 | item (dict): Dictionary with 'name' and 'id'
84 | """
85 | name = item['name'].lower()
86 |
87 | # Add to main storage
88 | self.name_to_item[name] = item
89 |
90 | # Split name into words and add to inverted index
91 | words = set(re.split(r'\W+', name))
92 | for word in words:
93 | if word:
94 | if word not in self.word_index:
95 | self.word_index[word] = []
96 | self.word_index[word].append(name)
97 |
98 | def exact_search(self, name: str) -> Optional[dict]:
99 | """
100 | Perform exact name search (case-insensitive).
101 |
102 | Args:
103 | name (str): Name to search for
104 |
105 | Returns:
106 | Optional[dict]: Item dictionary if found, None otherwise
107 | """
108 | return self.name_to_item.get(name.lower())
109 |
110 | def partial_search(self, query: str) -> List[dict]:
111 | """
112 | Perform partial name search (case-insensitive).
113 |
114 | Args:
115 | query (str): Search query string
116 |
117 | Returns:
118 | List[dict]: List of matching item dictionaries
119 | """
120 | query = query.lower()
121 | results = set()
122 |
123 | # Search for partial matches in word index
124 | for word, items in self.word_index.items():
125 | if query in word:
126 | results.update(items)
127 |
128 | # Also check if query matches any part of full names
129 | for name in self.name_to_item:
130 | if query in name:
131 | results.add(name)
132 |
133 | # Return found items
134 | return [self.name_to_item[name] for name in results]
135 |
136 | def id_search(self, id_query: str) -> List[dict]:
137 | """
138 | Search by ID.
139 |
140 | Args:
141 | id_query (str): ID or partial ID to search for
142 |
143 | Returns:
144 | List[dict]: List of matching item dictionaries
145 | """
146 | if self.partial_id_search:
147 | # For geo data - allow partial matches
148 | return [item for item in self.name_to_item.values()
149 | if id_query in item['id']]
150 | else:
151 | # For categories - only exact matches
152 | return [item for item in self.name_to_item.values()
153 | if item['id'] == id_query]
154 |
155 | def create_hierarchical_index(tree_data: dict, join_ids: bool = True) -> HierarchicalIndex:
156 | """
157 | Create a complete search system from a hierarchical tree structure.
158 |
159 | Args:
160 | tree_data (dict): Original tree structure
161 | join_ids (bool): Whether to join IDs with parent
162 | (True for geo locations, False for categories)
163 |
164 | Returns:
165 | HierarchicalIndex: Initialized search system
166 | """
167 | # First flatten the tree
168 | flat_items = flatten_tree(tree_data, join_ids=join_ids)
169 | # Then create and return the search index
170 | return HierarchicalIndex(flat_items, partial_id_search=join_ids)
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # TrendsPy
2 |
3 | Python library for accessing Google Trends data.
4 |
5 | ## Key Features
6 |
7 | **Explore**
8 | - Track popularity over time (`interest_over_time`)
9 | - Analyze geographic distribution (`interest_by_region`)
10 | - Compare interest across different timeframes and regions (multirange support)
11 | - Get related queries and topics (`related_queries`, `related_topics`)
12 |
13 | **Trending Now**
14 | - Access current trending searches (`trending_now`, `trending_now_by_rss`)
15 | - Get related news articles (`trending_now_news_by_ids`)
16 | - Retrieve historical data for 500+ trending keywords with independent normalization (`trending_now_showcase_timeline`)
17 |
18 | **Search Utilities**
19 | - Find category IDs (`categories`)
20 | - Search for location codes (`geo`)
21 |
22 | **Flexible Time Formats**
23 | - Custom intervals: `'now 123-H'`, `'today 45-d'`
24 | - Date-based offsets: `'2024-02-01 10-d'`
25 | - Standard ranges: `'2024-01-01 2024-12-31'`
26 |
27 | ## Installation
28 |
29 | ```bash
30 | pip install trendspy
31 | ```
32 |
33 | ## Basic Usage
34 |
35 | ```python
36 | from trendspy import Trends
37 | tr = Trends()
38 | df = tr.interest_over_time(['python', 'javascript'])
39 | df.plot(title='Python vs JavaScript Interest Over Time',
40 | figsize=(12, 6))
41 | ```
42 |
43 | ```python
44 | # Analyze geographic distribution
45 | geo_df = tr.interest_by_region('python')
46 | ```
47 | ```python
48 | # Get related queries
49 | related = tr.related_queries('python')
50 | ```
51 |
52 | ## Advanced Features
53 |
54 | ### Search Categories and Locations
55 |
56 | ```python
57 | # Find technology-related categories
58 | categories = tr.categories(find='technology')
59 | # Output: [{'name': 'Computers & Electronics', 'id': '13'}, ...]
60 |
61 | # Search for locations
62 | locations = tr.geo(find='york')
63 | # Output: [{'name': 'New York', 'id': 'US-NY'}, ...]
64 |
65 | # Use in queries
66 | df = tr.interest_over_time(
67 | 'python',
68 | geo='US-NY', # Found location ID
69 | cat='13' # Found category ID
70 | )
71 | ```
72 |
73 | ### Real-time Trending Searches and News
74 |
75 | ```python
76 | # Get current trending searches in the US
77 | trends = tr.trending_now(geo='US')
78 |
79 | # Get trending searches with news articles
80 | trends_with_news = tr.trending_now_by_rss(geo='US')
81 | print(trends_with_news[0]) # First trending topic
82 | print(trends_with_news[0].news[0]) # Associated news article
83 |
84 | # Get news articles for specific trending topics
85 | news = tr.trending_now_news_by_ids(
86 | trends[0].news_tokens, # News tokens from trending topic
87 | max_news=3 # Number of articles to retrieve
88 | )
89 | for article in news:
90 | print(f"Title: {article.title}")
91 | print(f"Source: {article.source}")
92 | print(f"URL: {article.url}\n")
93 | ```
94 |
95 | ### Independent Historical Data for Multiple Keywords
96 |
97 | ```python
98 | from trendspy import BatchPeriod
99 |
100 | # Unlike standard interest_over_time where data is normalized across all keywords,
101 | # trending_now_showcase_timeline provides independent data for each keyword
102 | # (up to 500+ keywords in a single request)
103 |
104 | keywords = ['keyword1', 'keyword2', ..., 'keyword500']
105 |
106 | # Get independent historical data
107 | df_24h = tr.trending_now_showcase_timeline(
108 | keywords,
109 | timeframe=BatchPeriod.Past24H # 16-minute intervals
110 | )
111 |
112 | # Each keyword's data is normalized only to itself
113 | df_24h.plot(
114 | subplots=True,
115 | layout=(5, 2),
116 | figsize=(15, 20),
117 | title="Independent Trend Lines"
118 | )
119 |
120 | # Available time windows:
121 | # - Past4H: ~30 points (8-minute intervals)
122 | # - Past24H: ~90 points (16-minute intervals)
123 | # - Past48H: ~180 points (16-minute intervals)
124 | # - Past7D: ~42 points (4-hour intervals)
125 | ```
126 |
127 | ### Geographic Analysis
128 |
129 | ```python
130 | # Country-level data
131 | country_df = tr.interest_by_region('python')
132 |
133 | # State-level data for the US
134 | state_df = tr.interest_by_region(
135 | 'python',
136 | geo='US',
137 | resolution='REGION'
138 | )
139 |
140 | # City-level data for California
141 | city_df = tr.interest_by_region(
142 | 'python',
143 | geo='US-CA',
144 | resolution='CITY'
145 | )
146 | ```
147 |
148 | ### Timeframe Formats
149 |
150 | - Standard API timeframes: `'now 1-H'`, `'now 4-H'`, `'today 1-m'`, `'today 3-m'`, `'today 12-m'`
151 | - Custom intervals:
152 | - Short-term (< 8 days): `'now 123-H'`, `'now 72-H'`
153 | - Long-term: `'today 45-d'`, `'today 90-d'`, `'today 18-m'`
154 | - Date-based: `'2024-02-01 10-d'`, `'2024-03-15 3-m'`
155 | - Date ranges: `'2024-01-01 2024-12-31'`
156 | - Hourly precision: `'2024-03-25T12 2024-03-25T15'` (for periods < 8 days)
157 | - All available data: `'all'`
158 |
159 | ### Multirange Interest Over Time
160 |
161 | Compare search interest across different time periods and regions:
162 |
163 | ```python
164 | # Compare different time periods
165 | timeframes = [
166 | '2024-01-25 12-d', # 12-day period
167 | '2024-06-20 23-d' # 23-day period
168 | ]
169 | geo = ['US', 'GB'] # Compare US and UK
170 |
171 | df = tr.interest_over_time(
172 | 'python',
173 | timeframe=timeframes,
174 | geo=geo
175 | )
176 | ```
177 |
178 | Note: When using multiple timeframes, they must maintain consistent resolution and the maximum timeframe cannot be more than twice the length of the minimum timeframe.
179 |
180 | ### Proxy Support
181 |
182 | TrendsPy supports the same proxy configuration as the `requests` library:
183 |
184 | ```python
185 | # Initialize with proxy
186 | tr = Trends(proxy="http://user:pass@10.10.1.10:3128")
187 | # or
188 | tr = Trends(proxy={
189 | "http": "http://10.10.1.10:3128",
190 | "https": "http://10.10.1.10:1080"
191 | })
192 |
193 | # Configure proxy after initialization
194 | tr.set_proxy("http://10.10.1.10:3128")
195 | ```
196 |
197 | ## Documentation
198 |
199 | For more examples and detailed API documentation, check out the Jupyter notebook in the repository: `basic_usage.ipynb`
200 |
201 | ## License
202 |
203 | MIT License - see the [LICENSE](LICENSE) file for details.
204 |
205 | ## Disclaimer
206 |
207 | This library is not affiliated with Google. Please ensure compliance with Google's terms of service when using this library.
208 |
--------------------------------------------------------------------------------
/src/trendspy/converter.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import pandas as pd
3 | from .utils import *
4 |
5 | _RELATED_QUERIES_DESIRED_COLUMNS = ['query','topic','title','type','mid','value']
6 |
7 | class TrendsDataConverter:
8 | """
9 | Converts raw Google Trends API responses to pandas DataFrames.
10 |
11 | This class provides static methods for converting various types of
12 | Google Trends data into more usable formats.
13 |
14 | Methods:
15 | interest_over_time: Converts timeline data
16 | related_queries: Converts related queries data
17 | geo_data: Converts geographic data
18 | suggestions: Converts search suggestions
19 | rss_items: Parses RSS feed items
20 | """
21 | @staticmethod
22 | def token_to_bullets(token_data):
23 | items = token_data.get('request', {}).get('comparisonItem', [])
24 | bullets = [item.get('complexKeywordsRestriction', {}).get('keyword', [''])[0].get('value','') for item in items]
25 | metadata = [next(iter(item.get('geo', {'':'unk'}).values()), 'unk') for item in items]
26 | if len(set(metadata))>1:
27 | bullets = [b+' | '+m for b,m in zip(bullets, metadata)]
28 | metadata = [item.get('time', '').replace('\\', '') for item in items]
29 | if len(set(metadata))>1:
30 | bullets = [b+' | '+m for b,m in zip(bullets, metadata)]
31 |
32 | return bullets
33 |
34 | @staticmethod
35 | def interest_over_time(widget_data, keywords, time_as_index=True):
36 | """
37 | Converts interest over time data to a pandas DataFrame.
38 |
39 | Parameters:
40 | widget_data (dict): Raw API response data
41 | keywords (list): List of keywords for column names
42 | time_as_index (bool): Use time as DataFrame index
43 |
44 | Returns:
45 | pandas.DataFrame: Processed interest over time data
46 | """
47 | timeline_data = widget_data
48 | timeline_data = timeline_data.get('default', timeline_data)
49 | timeline_data = timeline_data.get('timelineData', timeline_data)
50 | if not timeline_data:
51 | return pd.DataFrame(columns=keywords)
52 |
53 |
54 | df_data = np.array(extract_column(timeline_data, 'value')).reshape(len(timeline_data), -1)
55 | df_data = dict(zip(keywords, df_data.T))
56 | if ('isPartial' in timeline_data[-1]) or any('isPartial' in row for row in timeline_data):
57 | df_data['isPartial'] = extract_column(timeline_data, 'isPartial', False)
58 |
59 |
60 | timestamps = extract_column(timeline_data, 'time', f=lambda x:int(x) if x else None)
61 | timestamps = np.array(timestamps, dtype='datetime64[s]').astype('datetime64[ns]')
62 | # timestamps += np.timedelta64(get_utc_offset_minutes(), 'm')
63 | if time_as_index:
64 | return pd.DataFrame(df_data, index=pd.DatetimeIndex(timestamps, name='time [UTC]'))
65 | return pd.DataFrame({'time':timestamps, **df_data})
66 |
67 | @staticmethod
68 | def multirange_interest_over_time(data, bullets=None):
69 | data = data.get('default', {}).get('timelineData', [{}])
70 | if not 'columnData' in data[0]:
71 | return pd.DataFrame()
72 |
73 | num_parts = len(data[0]['columnData'])
74 | if bullets is None:
75 | bullets = ['keyword_'+str(i) for i in range(num_parts)]
76 |
77 | df_data = {}
78 | for i in range(num_parts):
79 | timeline_data = [item['columnData'][i] for item in data]
80 | df_data[bullets[i]] = extract_column(timeline_data, 'value', f=lambda x:x if x!=-1 else None)
81 |
82 | if ('isPartial' in timeline_data[-1]) or any('isPartial' in row for row in timeline_data):
83 | df_data['isPartial_'+str(i)] = extract_column(timeline_data, 'isPartial', False)
84 |
85 | timestamps = extract_column(timeline_data, 'time', f=lambda ts:int(ts) if ts else None)
86 | timestamps = np.array(timestamps, dtype='datetime64[s]').astype('datetime64[ns]')
87 | df_data['index_'+str(i)] = timestamps
88 | return pd.DataFrame(df_data)
89 |
90 | @staticmethod
91 | def related_queries(widget_data):
92 | ranked_data = widget_data.get('default',{}).get('rankedList')
93 | if not ranked_data:
94 | return {'top':pd.DataFrame(), 'rising':pd.DataFrame()}
95 |
96 | result = {}
97 | result['top'] = pd.DataFrame(flatten_data(filter_data(ranked_data[0]['rankedKeyword'], _RELATED_QUERIES_DESIRED_COLUMNS), ['topic']))
98 | result['rising'] = pd.DataFrame(flatten_data(filter_data(ranked_data[1]['rankedKeyword'], _RELATED_QUERIES_DESIRED_COLUMNS), ['topic']))
99 | return result
100 |
101 | @staticmethod
102 | def geo_data(widget_data, bullets=None):
103 | data = widget_data.get('default', {}).get('geoMapData', [])
104 | filtered_data = list(filter(lambda item:item['hasData'][0], data))
105 | if not filtered_data:
106 | return pd.DataFrame()
107 |
108 | num_keywords = len(filtered_data[0]['value'])
109 | if not bullets:
110 | bullets = ['keyword_'+str(i) for i in range(num_keywords)]
111 |
112 | found_cols = set(filtered_data[0].keys()) & {'coordinates', 'geoCode', 'geoName', 'value'}
113 | df_data = {}
114 | df_data['geoName'] = extract_column(filtered_data, 'geoName')
115 | if 'geoCode' in found_cols:
116 | df_data['geoCode'] = extract_column(filtered_data, 'geoCode')
117 | if 'coordinates' in found_cols:
118 | df_data['lat'] = extract_column(filtered_data, 'coordinates', f=lambda x:x['lat'])
119 | df_data['lng'] = extract_column(filtered_data, 'coordinates', f=lambda x:x['lng'])
120 |
121 | values = np.array(extract_column(filtered_data, 'value')).reshape(len(filtered_data), -1)
122 | for keyword,values_row in zip(bullets, values.T):
123 | df_data[keyword] = values_row
124 | return pd.DataFrame(df_data)
125 |
126 | @staticmethod
127 | def suggestions(data):
128 | return pd.DataFrame(data['default']['topics'])
129 |
130 | @staticmethod
131 | def rss_items(data):
132 | item_pattern = re.compile(r'- (.*?)
', re.DOTALL)
133 | items = list(map(lambda item:parse_xml_to_dict(item, 'ht:'), item_pattern.findall(data)))
134 | return items
135 |
136 | @staticmethod
137 | def trending_now_showcase_timeline(data, request_timestamp=None):
138 | lens = [len(item[1]) for item in data]
139 | min_len, max_len = min(lens), max(lens)
140 | if min_len in {30,90,180,42}:
141 | max_len = min_len + 1
142 |
143 | time_offset = 480 if max_len < 32 else 14400 if max_len < 45 else 960
144 |
145 | timestamp = int(request_timestamp or datetime.now(timezone.utc).timestamp())
146 | timestamps = [timestamp // time_offset * time_offset - time_offset * i for i in range(max_len+2)][::-1]
147 | timestamps = np.array(timestamps, dtype='datetime64[s]').astype('datetime64[ns]')
148 | if (timestamp%time_offset) <= 60: # Time delay determined empirically
149 | df_data = {item[0]:item[1][-min_len:] for item in data}
150 | df = pd.DataFrame(df_data, index=timestamps[:-1][-min_len:])
151 | return df
152 |
153 | res = {}
154 | for item in data:
155 | res[item[0]] = np.pad(item[1], (0, max_len - len(item[1])), mode='constant', constant_values=0)
156 | df = pd.DataFrame(res, index=timestamps[-max_len:])
157 | return df
--------------------------------------------------------------------------------
/src/trendspy/timeframe_utils.py:
--------------------------------------------------------------------------------
1 | __all__ = ['convert_timeframe', 'timeframe_to_timedelta', 'verify_consistent_timeframes']
2 |
3 | import re
4 | from datetime import datetime, timedelta, timezone
5 | from dateutil.relativedelta import relativedelta
6 | from typing import Any
7 | from .utils import ensure_list
8 | # Regular expression pattern to validate date strings in the format 'YYYY-MM-DD' or 'YYYY-MM-DDTHH'
9 | VALID_DATE_PATTERN = r'^\d{4}-\d{2}-\d{2}(T\d{2})?$'
10 |
11 | # Set of fixed timeframes supported by an external API
12 | FIXED_TIMEFRAMES = {'now 1-H', 'now 4-H', 'now 1-d', 'now 7-d', 'today 1-m', 'today 3-m', 'today 5-y', 'today 12-m', 'all'}
13 |
14 | # Date format strings for standard and datetime with hour formats
15 | DATE_FORMAT = "%Y-%m-%d"
16 | DATE_T_FORMAT = "%Y-%m-%dT%H"
17 |
18 | # Regular expression pattern to validate offset strings like '10-d', '5-H', etc.
19 | OFFSET_PATTERN = r'\d+[-]?[Hdmy]$'
20 |
21 | # Mapping of units (H, d, m, y) to relativedelta arguments
22 | UNIT_MAP = {'H': 'hours', 'd': 'days', 'm': 'months', 'y': 'years'}
23 |
24 |
25 | def _is_valid_date(date_str):
26 | # Checks if the given string matches the valid date pattern
27 | return bool(re.match(VALID_DATE_PATTERN, date_str))
28 |
29 |
30 | def _is_valid_format(offset_str):
31 | # Checks if the given string matches the valid offset pattern
32 | return bool(re.match(OFFSET_PATTERN, offset_str))
33 |
34 |
35 | def _extract_time_parts(offset_str):
36 | # Extracts numerical value and unit (H, d, m, y) from the offset string
37 | match = re.search(r'(\d+)[-]?([Hdmy]+)', offset_str)
38 | if match:
39 | return int(match.group(1)), match.group(2)
40 | return None
41 |
42 |
43 | def _decode_trend_datetime(date_str):
44 | # Parses the date string into a datetime object based on whether it includes time ('T' character)
45 | return datetime.strptime(date_str, DATE_T_FORMAT) if 'T' in date_str else datetime.strptime(date_str, DATE_FORMAT)
46 |
47 |
48 | def _process_two_dates(date_part_1, date_part_2):
49 | isT1 = 'T' in date_part_1
50 | isT2 = 'T' in date_part_2
51 | if (not isT1) and (not isT2):
52 | return f'{date_part_1} {date_part_2}'
53 |
54 | # Processes two date parts and returns the formatted result
55 | date_1 = _decode_trend_datetime(date_part_1)
56 | date_2 = _decode_trend_datetime(date_part_2)
57 |
58 | # Adjust date formatting if only one of the dates includes hour information
59 | if (isT1) and (not isT2):
60 | date_2 += timedelta(days=1)
61 | date_2 = date_2.replace(hour=0)
62 | elif (not isT1) and (isT2):
63 | date_1 = date_1.replace(hour=0)
64 |
65 | # Ensure the difference between dates does not exceed 7 days when time information is included
66 | if ('T' in date_part_1 or 'T' in date_part_2) and abs((date_1 - date_2).days) > 7:
67 | raise ValueError(f'Date difference cannot exceed 7 days for format with hours: {date_part_1} {date_part_2}')
68 |
69 | # Return the formatted result with both dates including hours
70 | return f'{date_1.strftime(DATE_T_FORMAT)} {date_2.strftime(DATE_T_FORMAT)}'
71 |
72 |
73 | def _process_date_with_offset(date_part_1, offset_part):
74 | # Processes a date part with an offset to calculate the resulting timeframe
75 | date_1 = _decode_trend_datetime(date_part_1)
76 | count, unit = _extract_time_parts(offset_part)
77 |
78 | # Calculate the offset using relativedelta
79 | raw_diff = relativedelta(**{UNIT_MAP[unit]: count})
80 | if unit in {'m', 'y'}:
81 | # Special handling for months and years: adjust based on the current UTC date
82 | now = datetime.now(timezone.utc)
83 | end_date = now - raw_diff
84 | raw_diff = now - end_date
85 |
86 | # Raise an error if the offset exceeds 7 days for formats that include time
87 | if 'T' in date_part_1 and ((unit == 'd' and count > 7) or (unit == 'H' and count > 7 * 24)):
88 | raise ValueError(f'Offset cannot exceed 7 days for format with time: {date_part_1} {offset_part}. Use YYYY-MM-DD format or "today".')
89 |
90 | # Determine the appropriate date format based on the unit (hours/days or months/years)
91 | date_format = DATE_T_FORMAT if 'T' in date_part_1 else DATE_FORMAT
92 | return f'{(date_1 - raw_diff).strftime(date_format)} {date_1.strftime(date_format)}'
93 |
94 |
95 | def convert_timeframe(timeframe, convert_fixed_timeframes_to_dates=False):
96 | """
97 | Converts timeframe strings to Google Trends format.
98 |
99 | Supports multiple formats:
100 | 1. Fixed timeframes ('now 1-H', 'today 12-m', etc.)
101 | 2. Date ranges ('2024-01-01 2024-12-31')
102 | 3. Date with offset ('2024-03-25 5-m')
103 | 4. Hour-specific ranges ('2024-03-25T12 2024-03-25T15')
104 |
105 | Parameters:
106 | timeframe (str): Input timeframe string
107 | convert_fixed_timeframes_to_dates (bool): Convert fixed timeframes to dates
108 |
109 | Returns:
110 | str: Converted timeframe string in Google Trends format
111 |
112 | Raises:
113 | ValueError: If timeframe format is invalid
114 | """
115 | # If the timeframe is in the fixed set and conversion is not requested, return as is
116 | if (timeframe in FIXED_TIMEFRAMES) and (not convert_fixed_timeframes_to_dates):
117 | return timeframe
118 |
119 | # Replace 'now' and 'today' with the current datetime in the appropriate format
120 | utc_now = datetime.now(timezone.utc)
121 | if convert_fixed_timeframes_to_dates and timeframe=='all':
122 | return '2024-01-01 {}'.format(utc_now.strftime(DATE_FORMAT))
123 |
124 | timeframe = timeframe.replace('now', utc_now.strftime(DATE_T_FORMAT)).replace('today', utc_now.strftime(DATE_FORMAT))
125 |
126 | # Split the timeframe into two parts
127 | parts = timeframe.split()
128 | if len(parts) != 2:
129 | raise ValueError(f"Invalid timeframe format: {timeframe}. Expected format: ' ' or ' '.")
130 |
131 | date_part_1, date_part_2 = parts
132 |
133 | # Process the timeframe based on its parts
134 | if _is_valid_date(date_part_1):
135 | if _is_valid_date(date_part_2):
136 | # Process if both parts are valid dates
137 | return _process_two_dates(date_part_1, date_part_2)
138 | elif _is_valid_format(date_part_2):
139 | # Process if the second part is a valid offset
140 | return _process_date_with_offset(date_part_1, date_part_2)
141 |
142 | raise ValueError(f'Could not process timeframe: {timeframe}')
143 |
144 | def timeframe_to_timedelta(timeframe):
145 | result = convert_timeframe(timeframe, convert_fixed_timeframes_to_dates=True)
146 | date_1, date_2 = result.split()
147 | datetime_1 = _decode_trend_datetime(date_1)
148 | datetime_2 = _decode_trend_datetime(date_2)
149 | return (datetime_2 - datetime_1)
150 |
151 | def verify_consistent_timeframes(timeframes):
152 | """
153 | Verifies that all timeframes have consistent resolution.
154 |
155 | Google Trends requires all timeframes in a request to have the same
156 | data resolution (e.g., hourly, daily, weekly).
157 |
158 | Parameters:
159 | timeframes (list): List of timeframe strings
160 |
161 | Returns:
162 | bool: True if timeframes are consistent
163 |
164 | Raises:
165 | ValueError: If timeframes have different resolutions
166 | """
167 | if isinstance(timeframes, str):
168 | return True
169 |
170 | timedeltas = list(map(timeframe_to_timedelta, timeframes))
171 | if all(td == timedeltas[0] for td in timedeltas):
172 | return True
173 | else:
174 | raise ValueError(f"Inconsistent timeframes detected: {[str(td) for td in timedeltas]}")
175 |
176 | # Define the mapping between time range, resolution, and its range
177 | def get_resolution_and_range(timeframe):
178 | delta = timeframe_to_timedelta(timeframe)
179 | if delta < timedelta(hours=5):
180 | return "1 minute", "delta < 5 hours"
181 | elif delta < timedelta(hours=36):
182 | return "8 minutes", "5 hours <= delta < 36 hours"
183 | elif delta < timedelta(hours=72):
184 | return "16 minutes", "36 hours <= delta < 72 hours"
185 | elif delta < timedelta(days=8):
186 | return "1 hour", "72 hours <= delta < 8 days"
187 | elif delta < timedelta(days=270):
188 | return "1 day", "8 days <= delta < 270 days"
189 | elif delta < timedelta(days=1900):
190 | return "1 week", "270 days <= delta < 1900 days"
191 | else:
192 | return "1 month", "delta >= 1900 days"
193 |
194 | # Function to check if all timeframes have the same resolution
195 | def check_timeframe_resolution(timeframes):
196 | timeframes = ensure_list(timeframes)
197 | resolutions = list(map(get_resolution_and_range, timeframes))
198 |
199 | # Extract only resolutions (without ranges) to check if they are the same
200 | resolution_values = [r[0] for r in resolutions]
201 |
202 | # Check if all resolutions are the same
203 | deltas = [timeframe_to_timedelta(timeframe) for timeframe in timeframes]
204 | if len(set(resolution_values)) > 1:
205 | # If there are differences, output an error message with details
206 | error_message = "Error: Different resolutions detected for the timeframes:\n"
207 | for timeframe, delta, (resolution, time_range) in zip(timeframes, deltas, resolutions):
208 | error_message += (
209 | f"Timeframe: {timeframe}, Delta: {delta}, "
210 | f"Resolution: {resolution} (based on range: {time_range})\n"
211 | )
212 | raise ValueError(error_message)
213 |
214 | min_delta, min_timeframe = min(zip(deltas, timeframes))
215 | max_delta, max_timeframe = max(zip(deltas, timeframes))
216 |
217 | if max_delta >= min_delta * 2:
218 | raise ValueError(
219 | f"Error: The maximum delta {max_delta} (from timeframe {max_timeframe}) "
220 | f"should be less than twice the minimum delta {min_delta} (from timeframe {min_timeframe})."
221 | )
--------------------------------------------------------------------------------
/src/trendspy/trend_keyword.py:
--------------------------------------------------------------------------------
1 | from datetime import datetime, timezone
2 | from .news_article import NewsArticle
3 | from .utils import ensure_list, truncate_string
4 | from .constants import TREND_TOPICS
5 |
6 | class TrendKeyword:
7 | """
8 | Represents a trending search term with associated metadata.
9 |
10 | This class encapsulates information about a trending keyword, including
11 | its search volume, related news, geographic information, and timing data.
12 |
13 | Attributes:
14 | keyword (str): The trending search term
15 | news (list): Related news articles
16 | geo (str): Geographic location code
17 | started_timestamp (tuple): When the trend started
18 | ended_timestamp (tuple): When the trend ended (if finished)
19 | volume (int): Search volume
20 | volume_growth_pct (float): Percentage growth in search volume
21 | trend_keywords (list): Related keywords
22 | topics (list): Related topics
23 | news_tokens (list): Associated news tokens
24 | normalized_keyword (str): Normalized form of the keyword
25 | """
26 | def __init__(self, item: list):
27 | (
28 | self.keyword,
29 | self.news, # news!
30 | self.geo,
31 | self.started_timestamp,
32 | self.ended_timestamp,
33 | self._unk2,
34 | self.volume,
35 | self._unk3,
36 | self.volume_growth_pct,
37 | self.trend_keywords,
38 | self.topics,
39 | self.news_tokens,
40 | self.normalized_keyword
41 | ) = item
42 | if self.news:
43 | self.news = list(map(NewsArticle.from_api, self.news))
44 |
45 | @property
46 | def topic_names(self):
47 | """Returns a list of topic names for the trend's topic IDs."""
48 | return [TREND_TOPICS.get(topic_id, f"Unknown Topic ({topic_id})") for topic_id in self.topics]
49 |
50 | def _convert_to_datetime(self, raw_time):
51 | """Converts time in seconds to a datetime object with UTC timezone, if it exists."""
52 | return datetime.fromtimestamp(raw_time, tz=timezone.utc) if raw_time else None
53 |
54 | @property
55 | def is_trend_finished(self) -> bool:
56 | """Checks if the trend is finished."""
57 | return self.ended_timestamp is not None
58 |
59 | def hours_since_started(self) -> float:
60 | """Returns the number of hours elapsed since the trend started."""
61 | if not self.started_timestamp:
62 | return 0
63 | delta = datetime.now(tz=timezone.utc) - datetime.fromtimestamp(self.started_timestamp[0], tz=timezone.utc)
64 | return delta.total_seconds() / 3600
65 |
66 | def __repr__(self):
67 | """Returns a complete string representation for object reconstruction."""
68 | # Convert NewsArticle objects back to their original form
69 | news_data = self.news
70 | if self.news:
71 | news_data = [
72 | {
73 | 'title': article.title,
74 | 'url': article.url,
75 | 'source': article.source,
76 | 'time': article.time,
77 | 'picture': article.picture,
78 | 'snippet': article.snippet
79 | } for article in self.news
80 | ]
81 |
82 | # Create list of all components in initialization order
83 | components = [
84 | self.keyword,
85 | news_data,
86 | self.geo,
87 | self.started_timestamp,
88 | self.ended_timestamp,
89 | self._unk2,
90 | self.volume,
91 | self._unk3,
92 | self.volume_growth_pct,
93 | self.trend_keywords,
94 | self.topics,
95 | self.news_tokens,
96 | self.normalized_keyword
97 | ]
98 |
99 | return f"{self.__class__.__name__}({components!r})"
100 |
101 | def __str__(self):
102 | """Returns a human-readable string representation."""
103 | timeframe = datetime.fromtimestamp(self.started_timestamp[0]).strftime('%Y-%m-%d %H:%M:%S')
104 | if self.is_trend_finished:
105 | timeframe += ' - ' + datetime.fromtimestamp(self.ended_timestamp[0]).strftime('%Y-%m-%d %H:%M:%S')
106 | else:
107 | timeframe += ' - now'
108 |
109 | s = f'Keyword : {self.keyword}'
110 | s += f'\nGeo : {self.geo}'
111 | s += f'\nVolume : {self.volume} ({self.volume_growth_pct}%)'
112 | s += f'\nTimeframe : {timeframe}'
113 | s += f'\nTrend keywords : {len(self.trend_keywords)} keywords ({truncate_string(",".join(self.trend_keywords), 50)})'
114 | s += f'\nNews tokens : {len(self.news_tokens)} tokens'
115 | return s
116 |
117 | def brief_summary(self):
118 | """Returns an informative summary of the trend."""
119 | # Начинаем с географии в квадратных скобках
120 | parts = [f"[{self.geo}] {self.keyword}: {self.volume:,} searches"]
121 |
122 | # Добавляем дополнительную информацию
123 | if self.trend_keywords:
124 | parts.append(f"{len(self.trend_keywords)} related keywords")
125 | if self.topics:
126 | topic_list = ", ".join(self.topic_names)
127 | parts.append(f"topics: {topic_list}")
128 | if self.news:
129 | parts.append(f"{len(self.news)} news articles")
130 |
131 | return ", ".join(parts)
132 |
133 | def _repr_pretty_(self, p, cycle):
134 | """Integration with IPython's pretty printer."""
135 | if cycle:
136 | p.text("[...]")
137 | else:
138 | p.text(self.brief_summary())
139 |
140 | def __format__(self, format_spec):
141 | """Implements formatting for f-strings and format() method."""
142 | return self.brief_summary()
143 |
144 | # Переопределяем __str__ для использования brief_summary
145 | def __str__(self):
146 | return self.brief_summary()
147 |
148 | class TrendKeywordLite:
149 | """
150 | A lightweight version of TrendKeyword for simple trend representation.
151 |
152 | This class provides a simplified view of trending keywords, primarily used
153 | for RSS feeds and basic trending data.
154 |
155 | Attributes:
156 | keyword (str): The trending search term
157 | volume (str): Approximate search volume
158 | trend_keywords (list): Related keywords
159 | link (str): URL to more information
160 | started (int): Unix timestamp when the trend started
161 | picture (str): URL to related image
162 | picture_source (str): Source of the picture
163 | news (list): Related news articles
164 | """
165 | def __init__(self, keyword, volume, trend_keywords, link, started, picture, picture_source, news):
166 | self.keyword = keyword
167 | self.volume = volume
168 | self.trend_keywords = trend_keywords
169 | self.link = link
170 | self.started = None
171 | self.picture = picture
172 | self.picture_source = picture_source
173 | self.news = news
174 | if started:
175 | self.started = self._parse_pub_date(started)
176 | elif news:
177 | self.started = min([item.time for item in news])
178 |
179 | @staticmethod
180 | def _parse_pub_date(pub_date):
181 | return int(datetime.strptime(pub_date, '%a, %d %b %Y %H:%M:%S %z').timestamp())
182 |
183 | @classmethod
184 | def from_api(cls, data):
185 | title = data.get('title')
186 | if isinstance(title, dict):
187 | title = title.get('query')
188 | volume = data.get('formattedTraffic') or data.get('approx_traffic')
189 | trend_keywords = ([item.get('query') for item in data.get('relatedQueries', [])])
190 | trend_keywords = trend_keywords or (data.get('description', '').split(', ') if 'description' in data else None)
191 | trend_keywords = trend_keywords or list(set([word for item in data.get('idsForDedup', '') for word in item.split(' ')]))
192 | link = data.get('shareUrl') or data.get('link')
193 | started = data.get('pubDate')
194 | picture = data.get('picture') or data.get('image', {}).get('imageUrl')
195 | picture_source = data.get('picture_source') or data.get('image', {}).get('source')
196 | articles = data.get('articles') or data.get('news_item') or []
197 |
198 | return cls(
199 | keyword = title,
200 | volume = volume,
201 | trend_keywords = trend_keywords,
202 | link = link,
203 | started = started,
204 | picture = picture,
205 | picture_source = picture_source,
206 | news = [NewsArticle.from_api(item) for item in ensure_list(articles)]
207 | )
208 |
209 | def __repr__(self):
210 | return f"TrendKeywordLite(title={self.keyword}, traffic={self.volume}, started={self.started})"
211 |
212 | def __str__(self):
213 | s = 'Keyword : {}'.format(self.keyword)
214 | s += '\nVolume : {}'.format(self.volume) if self.volume else ''
215 | s += '\nStarted : {}'.format(datetime.fromtimestamp(self.started).strftime('%Y-%m-%d %H:%M:%S')) if self.started else ''
216 | s += '\nTrend keywords : {} keywords ({})'.format(len(self.trend_keywords), truncate_string(','.join(self.trend_keywords), 50)) if self.trend_keywords else ''
217 | s += '\nNews : {} news'.format(len(self.news)) if self.news else ''
218 | return s
--------------------------------------------------------------------------------
/src/trendspy/client.py:
--------------------------------------------------------------------------------
1 | import re
2 | import json
3 | import requests
4 | import pandas as pd
5 | import numpy as np
6 | from enum import Enum
7 | from typing import Dict, List, Optional
8 | from urllib.parse import quote, quote_plus
9 | from .utils import *
10 | from .converter import TrendsDataConverter
11 | from .trend_keyword import *
12 | from .news_article import *
13 | from .timeframe_utils import convert_timeframe, check_timeframe_resolution
14 | from .hierarchical_search import create_hierarchical_index
15 | from .trend_list import TrendList
16 | from time import sleep,time
17 |
18 | class TrendsQuotaExceededError(Exception):
19 | """Raised when the Google Trends API quota is exceeded for related queries/topics."""
20 | def __init__(self):
21 | super().__init__(
22 | "API quota exceeded for related queries/topics. "
23 | "To resolve this, you can try:\n"
24 | "1. Use a different referer in request headers:\n"
25 | " tr.related_queries(keyword, headers={'referer': 'https://www.google.com/'})\n"
26 | "2. Use a different IP address by configuring a proxy:\n"
27 | " tr.set_proxy('http://proxy:port')\n"
28 | " # or\n"
29 | " tr = Trends(proxy={'http': 'http://proxy:port', 'https': 'https://proxy:port'})\n"
30 | "3. Wait before making additional requests"
31 | )
32 |
33 | class BatchPeriod(Enum): # update every 2 min
34 | '''
35 | Time periods for batch operations.
36 | '''
37 | Past4H = 2 #31 points (new points every 8 min)
38 | Past24H = 3 #91 points (every 16 min)
39 | Past48H = 5 #181 points (every 16 min)
40 | Past7D = 4 #43 points (every 4 hours)
41 |
42 | BATCH_URL = f'https://trends.google.com/_/TrendsUi/data/batchexecute'
43 | HOT_TRENDS_URL = f'https://trends.google.com/trends/hottrends/visualize/internal/data'
44 |
45 | # ----------- API LINKS -------------
46 | API_URL = f'https://trends.google.com/trends/api'
47 | API_EXPLORE_URL = f'{API_URL}/explore'
48 | API_GEO_DATA_URL = f'{API_URL}/explore/pickers/geo'
49 | API_CATEGORY_URL = f'{API_URL}/explore/pickers/category'
50 | API_TOPCHARTS_URL = f'{API_URL}/topcharts'
51 | API_AUTOCOMPLETE = f'{API_URL}/autocomplete/'
52 | DAILY_SEARCHES_URL = f'{API_URL}/dailytrends'
53 | REALTIME_SEARCHES_URL = f'{API_URL}/realtimetrends'
54 |
55 | API_TOKEN_URL = f'https://trends.google.com/trends/api/widgetdata'
56 | API_TIMELINE_URL = f'{API_TOKEN_URL}/multiline'
57 | API_MULTIRANGE_URL = f'{API_TOKEN_URL}/multirange'
58 | API_GEO_URL = f'{API_TOKEN_URL}/comparedgeo'
59 | API_RELATED_QUERIES_URL = f'{API_TOKEN_URL}/relatedsearches'
60 |
61 | # ----------- EMBED LINKS -------------
62 | EMBED_URL = f'https://trends.google.com/trends/embed/explore'
63 | EMBED_GEO_URL = f'{EMBED_URL}/GEO_MAP'
64 | EMBED_TOPICS_URL = f'{EMBED_URL}/RELATED_TOPICS'
65 | EMBED_QUERIES_URL = f'{EMBED_URL}/RELATED_QUERIES'
66 | EMBED_TIMESERIES_URL = f'{EMBED_URL}/TIMESERIES'
67 |
68 | # --------------- RSS -----------------
69 | DAILY_RSS = f'https://trends.google.com/trends/trendingsearches/daily/rss'
70 | REALTIME_RSS = f'https://trends.google.com/trending/rss'
71 |
72 | class Trends:
73 | """
74 | A client for accessing Google Trends data.
75 |
76 | This class provides methods to analyze search trends, get real-time trending topics,
77 | and track interest over time and regions.
78 |
79 | Parameters:
80 | hl (str): Language and country code (e.g., 'en-US'). Defaults to 'en-US'.
81 | tzs (int): Timezone offset in minutes. Defaults to current system timezone.
82 | use_entity_names (bool): Whether to use entity names instead of keywords.
83 | Defaults to False.
84 | proxy (str or dict): Proxy configuration. Can be a string URL or a dictionary
85 | with protocol-specific proxies. Examples:
86 | - "http://user:pass@10.10.1.10:3128"
87 | - {"http": "http://10.10.1.10:3128", "https": "http://10.10.1.10:1080"}
88 | """
89 |
90 | def __init__(self, language='en', tzs=360, request_delay=1., max_retries=3, use_enitity_names = False, proxy=None, **kwargs):
91 | """
92 | Initialize the Trends client.
93 |
94 | Args:
95 | language (str): Language code (e.g., 'en', 'es', 'fr').
96 | tzs (int): Timezone offset in minutes. Defaults to 360.
97 | request_delay (float): Minimum time interval between requests in seconds. Helps avoid hitting rate limits and behaving like a bot. Set to 0 to disable.
98 | max_retries (int): Maximum number of retry attempts for failed requests. Each retry includes exponential backoff delay of 2^(max_retries-retries) seconds for rate limit errors (429, 302).
99 | use_enitity_names (bool): Whether to use entity names instead of keywords.
100 | proxy (str or dict): Proxy configuration.
101 | **kwargs: Additional arguments for backwards compatibility.
102 | - hl (str, deprecated): Old-style language code (e.g., 'en' or 'en-US').
103 | If provided, will be used as fallback when language is invalid.
104 | """
105 | if isinstance(language, str) and len(language) >= 2:
106 | self.language = language[:2].lower()
107 | elif 'hl' in kwargs and isinstance(kwargs['hl'], str) and len(kwargs['hl']) >= 2:
108 | self.language = kwargs['hl'][:2].lower()
109 | else:
110 | self.language = 'en'
111 |
112 | # self.hl = hl
113 | self.tzs = tzs or -int(datetime.now().astimezone().utcoffset().total_seconds()/60)
114 | self._default_params = {'hl': self.language, 'tz': tzs}
115 | self.use_enitity_names = use_enitity_names
116 | self.session = requests.session()
117 | self._headers = {'accept-language': self.language}
118 | self._geo_cache = {}
119 | self._category_cache = {} # Add category cache
120 | self.request_delay = request_delay
121 | self.max_retires = max_retries
122 | self.last_request_times = {0,1}
123 | # Initialize proxy configuration
124 | self.set_proxy(proxy)
125 |
126 | def set_proxy(self, proxy=None):
127 | """
128 | Set or update proxy configuration for the session.
129 |
130 | Args:
131 | proxy (str or dict, optional): Proxy configuration. Can be:
132 | - None: Remove proxy configuration
133 | - str: URL for all protocols (e.g., "http://10.10.1.10:3128")
134 | - dict: Protocol-specific proxies (e.g., {"http": "...", "https": "..."})
135 | """
136 | if isinstance(proxy, str):
137 | # Convert string URL to dictionary format
138 | proxy = {
139 | 'http': proxy,
140 | 'https': proxy
141 | }
142 |
143 | # Update session's proxy configuration
144 | self.session.proxies.clear()
145 | if proxy:
146 | self.session.proxies.update(proxy)
147 |
148 | def _extract_keywords_from_token(self, token):
149 | if self.use_enitity_names:
150 | return [item['text'] for item in token['bullets']]
151 | else :
152 | return [item['complexKeywordsRestriction']['keyword'][0]['value'] for item in token['request']['comparisonItem']]
153 |
154 | @staticmethod
155 | def _parse_protected_json(response: requests.models.Response):
156 | """
157 | Parses JSON data from a protected API response.
158 |
159 | Args:
160 | response (requests.models.Response): Response object from requests
161 |
162 | Returns:
163 | dict: Parsed JSON data
164 |
165 | Raises:
166 | ValueError: If response status is not 200, content type is invalid,
167 | or JSON parsing fails
168 | """
169 | valid_content_types = {'application/json', 'application/javascript', 'text/javascript'}
170 | content_type = response.headers.get('Content-Type', '').split(';')[0].strip().lower()
171 |
172 | if (response.status_code != 200) or (content_type not in valid_content_types):
173 | raise ValueError(f"Invalid response: status {response.status_code}, content type '{content_type}'")
174 |
175 | try:
176 | json_data = response.text.split('\n')[-1]
177 | return json.loads(json_data)
178 | except json.JSONDecodeError:
179 | raise ValueError("Failed to parse JSON data")
180 |
181 | def _encode_items(self, keywords, timeframe="today 12-m", geo=''):
182 | data = list(map(ensure_list, [keywords, timeframe, geo]))
183 | lengths = list(map(len, data))
184 | max_len = max(lengths)
185 | if not all(max_len % length == 0 for length in lengths):
186 | raise ValueError(f"Ambiguous input sizes: unable to determine how to combine inputs of lengths {lengths}")
187 | data = [item * (max_len // len(item)) for item in data]
188 | items = [dict(zip(['keyword', 'time', 'geo'], values)) for values in zip(*data)]
189 | return items
190 |
191 | def _encode_request(self, params):
192 | if 'keyword' in params:
193 | keywords = ensure_list(params.pop('keyword'))
194 | if len(keywords) != 1:
195 | raise ValueError("This endpoint only supports a single keyword")
196 | params['keywords'] = keywords
197 |
198 | items = self._encode_items(
199 | keywords = params['keywords'],
200 | timeframe = params.get('timeframe', "today 12-m"),
201 | geo = params.get('geo', '')
202 | )
203 |
204 | req = {'req': json.dumps({
205 | 'comparisonItem': items,
206 | 'category': params.get('cat', 0),
207 | 'property': params.get('gprop', '')
208 | })}
209 |
210 | req.update(self._default_params)
211 | return req
212 |
213 | def _get(self, url, params=None, headers=None):
214 | """
215 | Make HTTP GET request with retry logic and proxy support.
216 |
217 | Args:
218 | url (str): URL to request
219 | params (dict, optional): Query parameters
220 |
221 | Returns:
222 | requests.Response: Response object
223 |
224 | Raises:
225 | ValueError: If response status code is not 200
226 | requests.exceptions.RequestException: For network-related errors
227 | """
228 | retries = self.max_retires
229 | response_code = 429
230 | response_codes = []
231 | last_response = None
232 | req = None
233 | while (retries > 0):
234 | try:
235 |
236 | if self.request_delay:
237 | min_time = min(self.last_request_times)
238 | sleep_time = max(0, self.request_delay - (time() - min_time))
239 | sleep(sleep_time)
240 | self.last_request_times = (self.last_request_times - {min_time,}) | {time(),}
241 |
242 | req = self.session.get(url, params=params, headers=headers)
243 | last_response = req
244 | response_code = req.status_code
245 | response_codes.append(response_code)
246 |
247 | if response_code == 200:
248 | return req
249 | else:
250 | if response_code in {429,302}:
251 | sleep(2**(self.max_retires-retries))
252 | retries -= 1
253 |
254 | except Exception as e:
255 | if retries == 0:
256 | raise
257 | retries -= 1
258 |
259 | if response_codes.count(429) > len(response_codes) / 2:
260 | current_delay = self.request_delay or 1
261 | print(f"\nWarning: Too many rate limit errors (429). Consider increasing request_delay "
262 | f"to Trends(request_delay={current_delay*2}) before Google implements a long-term "
263 | f"rate limit!")
264 | last_response.raise_for_status()
265 |
266 | @classmethod
267 | def _extract_embedded_data(cls, text):
268 | pattern = re.compile(r"JSON\.parse\('([^']+)'\)")
269 | matches = pattern.findall(text)
270 | # If matches found, decode and return result
271 | if matches:
272 | return json.loads(decode_escape_text(matches[0])) # Take first match
273 | print("Failed to extract JSON data")
274 |
275 | def _token_to_data(self, token):
276 | URL = {
277 | 'fe_line_chart': API_TIMELINE_URL,
278 | 'fe_multi_range_chart': API_MULTIRANGE_URL,
279 | 'fe_multi_heat_map': API_GEO_URL,
280 | 'fe_geo_chart_explore': API_GEO_URL,
281 | 'fe_related_searches': API_RELATED_QUERIES_URL
282 | }[token['type']]
283 |
284 | params = {'req': json.dumps(token['request']), 'token': token['token']}
285 | params.update(self._default_params)
286 | # req = self.session.get(URL, params=params)
287 | req = self._get(URL, params=params)
288 | data = Trends._parse_protected_json(req)
289 | return data
290 |
291 | def _get_token_data(self, url, params=None, request_fix=None, headers=None, raise_quota_error=False):
292 | """
293 | Internal method to get token data from Google Trends API.
294 |
295 | Handles both 'keyword' and 'keywords' parameters for backward compatibility
296 | and convenience.
297 | """
298 |
299 | params = self._encode_request(params)
300 | req = self._get(url, params=params, headers=headers)
301 | token = self._extract_embedded_data(req.text)
302 |
303 | if request_fix is not None:
304 | token = {**token, 'request':{**token['request'], **request_fix}}
305 |
306 | if raise_quota_error:
307 | user_type = token.get('request', {}).get('userConfig', {}).get('userType', '')
308 | if user_type == "USER_TYPE_EMBED_OVER_QUOTA":
309 | raise TrendsQuotaExceededError()
310 |
311 | data = self._token_to_data(token)
312 | return token, data
313 |
314 | def _get_batch(self, req_id, data):
315 | req_data = json.dumps([[[req_id,f"{json.dumps(data)}", None,"generic"]]])
316 | post_data = f'f.req={req_data}'
317 | headers = {
318 | "content-type": "application/x-www-form-urlencoded;charset=UTF-8"
319 | }
320 | req = self.session.post(BATCH_URL, post_data, headers=headers)
321 | return req
322 |
323 | def interest_over_time(self, keywords, timeframe="today 12-m", geo='', cat=0, gprop='', return_raw = False, headers=None):
324 | """
325 | Retrieves interest over time data for specified keywords.
326 |
327 | Parameters:
328 | keywords (str or list): Keywords to analyze.
329 | timeframe : str or list
330 | Defines the time range for querying interest over time. It can be specified as a single string or a list.
331 | Supported formats include:
332 |
333 | - 'now 1-H', 'now 4-H', 'now 1-d', 'now 7-d'
334 | - 'today 1-m', 'today 3-m', 'today 12-m', 'today 5-y'
335 | - 'all' for all available data
336 | - 'YYYY-MM-DD YYYY-MM-DD' for specific date ranges
337 | - 'YYYY-MM-DDTHH YYYY-MM-DDTHH' for hourly data (if less than 8 days)
338 |
339 | Additional flexible formats:
340 |
341 | 1. **'now {offset}'**: Timeframes less than 8 days (e.g., 'now 72-H' for the last 72 hours).
342 | 2. **'today {offset}'**: Larger periods starting from today (e.g., 'today 5-m' for the last 5 months).
343 | 3. **'date {offset}'**: Specific date with offset (e.g., '2024-03-25 5-m' for 5 months back from March 25, 2024).
344 |
345 | **Note:** Offsets always go backward in time.
346 |
347 | Resolutions based on timeframe length:
348 |
349 | - `< 5 hours`: 1 minute
350 | - `5 hours <= delta < 36 hours`: 8 minutes
351 | - `36 hours <= delta < 72 hours`: 16 minutes
352 | - `72 hours <= delta < 8 days`: 1 hour
353 | - `8 days <= delta < 270 days`: 1 day
354 | - `270 days <= delta < 1900 days`: 1 week
355 | - `>= 1900 days`: 1 month
356 |
357 | Restrictions:
358 | - **Same resolution**: All timeframes must have the same resolution.
359 | - **Timeframe length**: Maximum timeframe cannot be more than twice the length of the minimum timeframe.
360 | geo (str): Geographic location code (e.g., 'US' for United States).
361 | cat (int): Category ID. Defaults to 0 (all categories).
362 | gprop (str): Google property filter.
363 | return_raw (bool): If True, returns raw API response.
364 |
365 | Returns:
366 | pandas.DataFrame or raw API response
367 | Processed trending keywords data or raw API data if `return_raw=True`
368 | """
369 | check_timeframe_resolution(timeframe)
370 | timeframe = list(map(convert_timeframe, ensure_list(timeframe)))
371 |
372 | token, data = self._get_token_data(EMBED_TIMESERIES_URL, locals(), headers=headers)
373 | if return_raw:
374 | return token, data
375 |
376 | if token['type']=='fe_line_chart':
377 | keywords = self._extract_keywords_from_token(token)
378 | return TrendsDataConverter.interest_over_time(data, keywords=keywords)
379 | if token['type']=='fe_multi_range_chart':
380 | bullets = TrendsDataConverter.token_to_bullets(token)
381 | return TrendsDataConverter.multirange_interest_over_time(data, bullets=bullets)
382 | return data
383 |
384 | def related_queries(self, keyword, timeframe="today 12-m", geo='', cat=0, gprop='', return_raw = False, headers=None):
385 | """
386 | Retrieves related queries for a single search term.
387 |
388 | Args:
389 | keyword (str): A single keyword to analyze
390 | timeframe (str): Time range for analysis
391 | geo (str): Geographic location code
392 | cat (int): Category ID
393 | gprop (str): Google property filter
394 | return_raw (bool): If True, returns raw API response
395 | headers (dict, optional): Custom request headers. Can be used to set different referer
396 | to help bypass quota limits
397 |
398 | Raises:
399 | TrendsQuotaExceededError: When API quota is exceeded
400 |
401 | Parameters:
402 | dict: Two DataFrames containing 'top' and 'rising' related queries
403 |
404 | Example:
405 | >>> tr = Trends()
406 | >>> related = tr.related_queries('python')
407 | >>> print("Top queries:")
408 | >>> print(related['top'])
409 | >>> print("\nRising queries:")
410 | >>> print(related['rising'])
411 | """
412 | headers = headers or {"referer": "https://trends.google.com/trends/explore"}
413 | token, data = self._get_token_data(EMBED_QUERIES_URL, locals(), headers=headers, raise_quota_error=True)
414 | if return_raw:
415 | return token, data
416 | return TrendsDataConverter.related_queries(data)
417 |
418 | def related_topics(self, keyword, timeframe="today 12-m", geo='', cat=0, gprop='', return_raw = False, headers=None):
419 | """
420 | Retrieves related topics for a single search term.
421 |
422 | Parameters:
423 | keyword (str): A single keyword to analyze
424 | timeframe (str): Time range for analysis
425 | geo (str): Geographic location code
426 | cat (int): Category ID
427 | gprop (str): Google property filter
428 | return_raw (bool): If True, returns raw API response
429 | headers (dict, optional): Custom request headers. Can be used to set different referer
430 | to help bypass quota limits
431 |
432 | Raises:
433 | TrendsQuotaExceededError: When API quota is exceeded
434 |
435 | Example:
436 | >>> tr = Trends()
437 | >>> related = tr.related_topics('python')
438 | >>> print("Top topics:")
439 | >>> print(related['top'])
440 | >>> print("\nRising topics:")
441 | >>> print(related['rising'])
442 | """
443 | headers = headers or {"referer": "https://trends.google.com/trends/explore"}
444 | token, data = self._get_token_data(EMBED_TOPICS_URL, locals(), headers=headers, raise_quota_error=True)
445 | if return_raw:
446 | return token, data
447 | return TrendsDataConverter.related_queries(data)
448 |
449 |
450 | def interest_by_region(self, keywords, timeframe="today 12-m", geo='', cat=0, gprop='', resolution=None, inc_low_vol=False, return_raw=False):
451 | """
452 | Retrieves geographical interest data based on keywords and other parameters.
453 |
454 | Parameters:
455 | keywords (str or list): Search keywords to analyze.
456 | timeframe (str): Time range for analysis (e.g., "today 12-m", "2022-01-01 2022-12-31")
457 | geo (str): Geographic region code (e.g., "US" for United States)
458 | cat (int): Category ID (default: 0 for all categories)
459 | gprop (str): Google property filter
460 | resolution (str): Geographic resolution level:
461 | - 'COUNTRY' (default when geo is empty)
462 | - 'REGION' (states/provinces)
463 | - 'CITY' (cities)
464 | - 'DMA' (Designated Market Areas)
465 | inc_low_vol (bool): Include regions with low search volume
466 | return_raw (bool): Return unprocessed API response data
467 |
468 | Returns:
469 | pandas.DataFrame or dict: Processed geographic interest data, or raw API response if return_raw=True
470 | """
471 | if (not resolution):
472 | resolution = 'COUNTRY' if ((geo=='') or (not geo)) else 'REGION'
473 |
474 | data_injection = {'resolution': resolution, 'includeLowSearchVolumeGeos': inc_low_vol}
475 | token, data = self._get_token_data(EMBED_GEO_URL, locals(), request_fix=data_injection)
476 | if return_raw:
477 | return token, data
478 |
479 | bullets = TrendsDataConverter.token_to_bullets(token)
480 | return TrendsDataConverter.geo_data(data, bullets)
481 |
482 | def suggestions(self, keyword, language=None, return_raw=False):
483 | params = {'hz':language, 'tz':self.tzs} if language else self._default_params
484 | encoded_keyword = keyword.replace("'", "")
485 | encoded_keyword = quote(encoded_keyword, safe='-')
486 | req = self._get(API_AUTOCOMPLETE+encoded_keyword, params)
487 | data = self._parse_protected_json(req)
488 | if return_raw:
489 | return data
490 | return TrendsDataConverter.suggestions(data)
491 |
492 | def hot_trends(self):
493 | req = self.session.get(HOT_TRENDS_URL)
494 | return json.loads(req.text)
495 |
496 | def top_year_charts(self, year='2023', geo='GLOBAL'):
497 | """
498 | https://trends.google.com/trends/yis/2023/GLOBAL/
499 | """
500 | params = {'date':year, 'geo':geo, 'isMobile':False}
501 | params.update(self._default_params)
502 | req = self._get(API_TOPCHARTS_URL, params)
503 | data = self._parse_protected_json(req)
504 | return data
505 |
506 | def trending_stories(self, geo='US', category='all', max_stories=200, return_raw=False):
507 | '''
508 | Old API
509 | category: all: "all", business: "b", entertainment: "e", health: "m", sicTech: "t", sports: "s", top: "h"
510 | '''
511 | forms = {'ns': 15, 'geo': geo, 'tz': self.tzs, 'hl': 'en', 'cat': category, 'fi' : '0', 'fs' : '0', 'ri' : max_stories, 'rs' : max_stories, 'sort' : 0}
512 | url = 'https://trends.google.com/trends/api/realtimetrends'
513 | req = self._get(url, forms)
514 | data = self._parse_protected_json(req)
515 | if return_raw:
516 | return data
517 |
518 | data = data.get('storySummaries', {}).get('trendingStories', [])
519 | data = [TrendKeywordLite.from_api(item) for item in data]
520 | return data
521 |
522 | def daily_trends_deprecated(self, geo='US', return_raw=False):
523 | params = {'ns': 15, 'geo': geo, 'hl':'en'}
524 | params.update(self._default_params)
525 | req = self._get(DAILY_SEARCHES_URL, params)
526 | data = self._parse_protected_json(req)
527 | if return_raw:
528 | return data
529 | data = data.get('default', {}).get('trendingSearchesDays', [])
530 | data = [TrendKeywordLite.from_api(item) for day in data for item in day['trendingSearches']]
531 | return data
532 |
533 | def daily_trends_deprecated_by_rss(self, geo='US', safe=True, return_raw=False):
534 | '''
535 | Only last 20 daily news
536 | '''
537 |
538 | params = {'geo':geo, 'safe':safe}
539 | req = self._get(DAILY_RSS, params)
540 | if return_raw:
541 | return req.text
542 | data = TrendsDataConverter.rss_items(req.text)
543 | data = list(map(TrendKeywordLite.from_api, data))
544 | return data
545 |
546 | def trending_now(self, geo='US', language='en', hours=24, num_news=0, return_raw=False):
547 | """
548 | Retrieves trending keywords that have seen significant growth in popularity within the last specified number of hours.
549 |
550 | Parameters:
551 | -----------
552 | geo : str, optional
553 | The geographical region for the trends, default is 'US' (United States).
554 | language : str, optional
555 | The language of the trends, default is 'en' (English).
556 | hours : int, optional
557 | The time window (in hours) for detecting trending keywords. Minimum value is 1, and the maximum is 191. Default is 24.
558 | num_news : int, optional
559 | NOT RECOMMENDED to use as it significantly slows down the function. The feature for fetching news associated with the trends is rarely used on the platform.
560 | If you want trending keywords with news, consider using `trending_now_by_rss` instead. Default is 0.
561 | return_raw : bool, optional
562 | If set to True, the function returns the raw data directly from the API. Default is False, meaning processed data will be returned.
563 |
564 | Returns:
565 | --------
566 | dict or raw API response
567 | Processed trending keywords data or raw API data if `return_raw=True`.
568 | """
569 | req_data = [None, None, geo, num_news, language, hours, 1]
570 | req = self._get_batch('i0OFE', req_data)
571 | data = self._parse_protected_json(req)
572 | if return_raw:
573 | return data
574 |
575 | data = json.loads(data[0][2])
576 | data = TrendList(map(TrendKeyword, data[1]))
577 | return data
578 |
579 | def trending_now_by_rss(self, geo='US', return_raw=False):
580 | """
581 | Retrieves trending keywords from the RSS feed for a specified geographical region.
582 |
583 | Parameters:
584 | -----------
585 | geo : str, optional
586 | The geographical region for the trends, default is 'US' (United States).
587 | return_raw : bool, optional
588 | If set to True, the function returns the raw data directly from the API. Default is False, meaning processed data will be returned.
589 |
590 | Returns:
591 | --------
592 | Union[dict, List[TrendKeywordLite]]
593 | A dictionary with raw RSS feed data if `return_raw=True`, or a list of `TrendKeyword` objects otherwise.
594 | """
595 | params = {'geo':geo}
596 | req = self._get(REALTIME_RSS, params)
597 | if return_raw:
598 | return req.text
599 | data = TrendsDataConverter.rss_items(req.text)
600 | data = list(map(TrendKeywordLite.from_api, data))
601 | return data
602 |
603 | def trending_now_news_by_ids(self, news_ids, max_news=3, return_raw=False):
604 | req = self._get_batch('w4opAf', [news_ids, max_news])
605 | data = self._parse_protected_json(req)
606 | if return_raw:
607 | return data
608 |
609 | data = json.loads(data[0][2])
610 | data = list(map(NewsArticle.from_api, data[0]))
611 | return data
612 |
613 | def trending_now_showcase_timeline(self, keywords, geo='US', timeframe=BatchPeriod.Past24H, return_raw=False):
614 | req_data = [None,None,[[geo, keyword, timeframe.value, 0, 3] for keyword in keywords]]
615 | request_timestamp = int(datetime.now(timezone.utc).timestamp())
616 | req = self._get_batch('jpdkv', req_data)
617 | data = self._parse_protected_json(req)
618 | if return_raw:
619 | return data
620 |
621 | data = json.loads(data[0][2])[0]
622 | data = TrendsDataConverter.trending_now_showcase_timeline(data, request_timestamp)
623 | return data
624 |
625 | def categories(self, find: str = None, language: str = None) -> List[dict]:
626 | """
627 | Search for categories in Google Trends data.
628 |
629 | This function retrieves and caches category data from Google Trends API, then performs
630 | a partial search on the categories. The results are cached by language to minimize API calls.
631 |
632 | Args:
633 | find (str, optional): Search query for categories. If None or empty string,
634 | returns all available categories. Defaults to None.
635 | language (str, optional): Language code for the response (e.g., 'en', 'es').
636 | If None, uses the instance's default language. Defaults to None.
637 |
638 | Returns:
639 | List[dict]: List of matching categories. Each category is a dictionary containing:
640 | - name (str): Category name in the specified language
641 | - id (str): Category identifier
642 |
643 | Examples:
644 | >>> trends = Trends()
645 | >>> # Find all categories containing "computer"
646 | >>> computer_cats = trends.categories(find="computer")
647 | >>> # Find all categories in Spanish
648 | >>> spanish_cats = trends.categories(language="es")
649 | >>> # Find specific category in German
650 | >>> tech_cats = trends.categories(find="Technologie", language="de")
651 | """
652 | cur_language = language or self.language
653 |
654 | if cur_language not in self._category_cache:
655 | req = self._get(API_CATEGORY_URL, {'hl': cur_language, 'tz': self.tzs})
656 | data = self._parse_protected_json(req)
657 | self._category_cache[cur_language] = create_hierarchical_index(data, join_ids=False)
658 |
659 | if not find:
660 | return list(self._category_cache[cur_language].name_to_item.values())
661 |
662 | return self._category_cache[cur_language].partial_search(find)
663 |
664 | def geo(self, find: str = None, language: str = None) -> List[dict]:
665 | """
666 | Search for geographical locations in Google Trends data.
667 |
668 | This function retrieves and caches geographical data from Google Trends API, then performs
669 | a partial search on the locations. The results are cached by language to minimize API calls.
670 |
671 | Args:
672 | find (str, optional): Search query for locations. If None or empty string,
673 | returns all available locations. Defaults to None.
674 | language (str, optional): Language code for the response (e.g., 'en', 'es').
675 | If None, uses the instance's default language. Defaults to None.
676 |
677 | Returns:
678 | List[dict]: List of matching locations. Each location is a dictionary containing:
679 | - name (str): Location name in the specified language
680 | - id (str): Location identifier (e.g., 'US-NY' for New York, United States)
681 |
682 | Examples:
683 | >>> trends = GoogleTrends()
684 | >>> # Find all locations containing "York"
685 | >>> locations = trends.geo(find="York")
686 | >>> # Find all locations in Spanish
687 | >>> spanish_locations = trends.geo(language="es")
688 | >>> # Find specific location in German
689 | >>> berlin = trends.geo(find="Berlin", language="de")
690 |
691 | Note:
692 | - Results are cached by language to improve performance
693 | - API response is parsed and structured for efficient searching
694 | - Case-insensitive partial matching is used for searches
695 | """
696 | # Use provided language or fall back to instance default
697 | cur_language = language or self.language
698 |
699 | # Check if we need to fetch and cache data for this language
700 | if cur_language not in self._geo_cache:
701 | # Fetch geographical data from Google Trends API
702 | data = self._get(API_GEO_DATA_URL,
703 | {'hl': cur_language, 'tz': self.tzs})
704 | data = self._parse_protected_json(data)
705 | # Create and cache search system for this language
706 | self._geo_cache[cur_language] = create_hierarchical_index(data)
707 |
708 | # Perform partial search (empty string returns all locations)
709 | if not find:
710 | return list(self._geo_cache[cur_language].name_to_location.values())
711 |
712 | return self._geo_cache[cur_language].partial_search(find)
--------------------------------------------------------------------------------