├── .gitattributes
├── requirements.txt
├── .gitignore
├── docs
├── members
│ └── funcs
│ │ └── top_users.md
├── films
│ └── funcs
│ │ ├── get_upcoming_movies.md
│ │ ├── get_movies_by_year.md
│ │ ├── print_movies.md
│ │ ├── get_movies_by_theme.md
│ │ ├── get_movies_by_decade.md
│ │ ├── get_movies_by_similar.md
│ │ ├── get_movies_by_mini_theme.md
│ │ ├── get_movies_by_nanogenre.md
│ │ ├── get_movies_by_service.md
│ │ └── get_movies_by_genre.md
├── user
│ └── funcs
│ │ ├── user_films_liked.md
│ │ ├── user_films_rated.md
│ │ ├── extract_user_films.md
│ │ ├── user_network.md
│ │ ├── user_followers.md
│ │ ├── user_following.md
│ │ ├── user_lists.md
│ │ ├── user_genre_info.md
│ │ ├── user_tags.md
│ │ ├── user_films.md
│ │ ├── user_watchlist.md
│ │ ├── user_diary.md
│ │ ├── user_activity.md
│ │ ├── user_reviews.md
│ │ ├── user_liked_reviews.md
│ │ └── user_wrapped.md
├── movie
│ └── funcs
│ │ ├── movie_watchers.md
│ │ └── movie_details.md
├── search
│ └── funcs
│ │ └── get_film_slug_from_title.md
└── check_docs.py
├── tests
├── run.tests.sh
├── test_user.py
├── test_search.py
├── test_scraper.py
└── test_movie.py
├── examples
├── requirements.txt
├── exports
│ └── users
│ │ └── nmcassa
│ │ ├── genre_info.json
│ │ ├── lists.json
│ │ ├── user_tags.json
│ │ ├── followers.json
│ │ ├── following.json
│ │ ├── user.json
│ │ ├── films_by_rating.json
│ │ └── activity_following.json
├── README.md
├── follow_stats.py
├── search_and_export_lists.py
├── export_user_data.py
├── export_user_diary_posters.py
├── user_plot_statistics.py
└── user_rating_plot.py
├── CONTRIBUTING.md
├── letterboxdpy
├── pages
│ ├── movie_lists.py
│ ├── user_lists.py
│ ├── movie_similar.py
│ ├── movie_details.py
│ ├── movie_reviews.py
│ ├── movie_members.py
│ ├── user_tags.py
│ ├── user_activity.py
│ ├── user_reviews.py
│ ├── user_watchlist.py
│ ├── user_network.py
│ ├── user_films.py
│ └── user_list.py
├── utils
│ ├── utils_transform.py
│ ├── utils_string.py
│ ├── utils_validators.py
│ ├── utils_url.py
│ ├── utils_terminal.py
│ ├── utils_file.py
│ ├── movies_extractor.py
│ ├── date_utils.py
│ └── lists_extractor.py
├── core
│ ├── exceptions.py
│ ├── decorators.py
│ ├── encoder.py
│ └── scraper.py
├── constants
│ ├── project.py
│ └── selectors.py
├── url.py
├── avatar.py
├── members.py
├── watchlist.py
├── list.py
├── movie.py
├── films.py
└── user.py
├── LICENSE
├── pyproject.toml
└── README.md
/.gitattributes:
--------------------------------------------------------------------------------
1 | # Auto detect text files and perform LF normalization
2 | * text=auto
3 |
--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | requests>=2.31.0
2 | beautifulsoup4>=4.12.3
3 | lxml>=5.1.0
4 | validators
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | # Python cache and bytecode
2 | __pycache__/
3 | *.pyc
4 | *.pyo
5 | *~
6 |
7 | # Build artifacts
8 | build/
9 | dist/
10 | *.egg-info/
11 |
12 | # macOS
13 | .DS_Store
14 |
15 | # Windows
16 | desktop.ini
17 |
--------------------------------------------------------------------------------
/docs/members/funcs/top_users.md:
--------------------------------------------------------------------------------
1 |
18 | LIST: Selector = ('div', {'class': 'js-list-entries'})
19 | #
20 | HEADLINE: Selector = ('h2', {'class': 'name'})
21 | #
2007
22 | YEAR: Selector = ('span', {'class': 'releasedate'})
23 |
24 | @dataclass
25 | class MetaSelectors:
26 | """Selectors for meta elements"""
27 | DESCRIPTION: Selector = ('meta', {'name': 'description'})
28 |
29 | @dataclass
30 | class PageSelectors:
31 | """Selectors for page elements"""
32 | ERROR_BODY: Selector = ('body', {'class': 'error'})
33 | ERROR_MESSAGE: Selector = ('section', {'class': 'message'})
34 | LAST_PAGE: Selector = ('div', {'class': 'paginate-pages'})
35 | ARTICLES: Selector = ('ul', {'class': 'poster-list -p70 film-list clear film-details-list'})
--------------------------------------------------------------------------------
/letterboxdpy/utils/utils_validators.py:
--------------------------------------------------------------------------------
1 | import re
2 | import validators
3 |
4 | def is_url(url) -> bool:
5 | """
6 | this function checks if the URL is valid or not,
7 | and returns a boolean value as the result.
8 | """
9 | return validators.url(url)
10 |
11 | def is_null_or_empty(value):
12 | """Check if the given string is null or empty."""
13 | if value is None or value == "":
14 | return True
15 | return False
16 |
17 | def is_whitespace_or_empty(value):
18 | """Check if the given string is whitespace or empty."""
19 | if not isinstance(value, str):
20 | return False
21 | return not value.strip()
22 |
23 | def is_non_negative_integer(value):
24 | """Check if the given value is a non-negative integer."""
25 | return isinstance(value, int) and value >= 0
26 |
27 | def is_valid_email(value):
28 | """Check if the given string is a valid email address."""
29 | if not isinstance(value, str):
30 | return False
31 | email_pattern = r"^[\w\.-]+@[\w\.-]+\.\w+$"
32 | return bool(re.match(email_pattern, value))
33 |
34 | def is_positive_float(value):
35 | """Check if the given value is a positive float."""
36 | try:
37 | number = float(value)
38 | return number > 0
39 | except (ValueError, TypeError):
40 | return False
41 |
42 | def is_boolean(value):
43 | """Check if the given value is a boolean."""
44 | return isinstance(value, bool)
--------------------------------------------------------------------------------
/letterboxdpy/core/decorators.py:
--------------------------------------------------------------------------------
1 | from functools import wraps
2 |
3 | # -- DECORATORS --
4 |
5 | def assert_instance(expected_class: type) -> callable:
6 | """Ensures the argument passed is an instance of a specified class."""
7 |
8 | def decorator(func):
9 | @wraps(func)
10 | def wrapper(instance, *args, **kwargs):
11 | """
12 | Verifies if the argument is an instance of the expected class.
13 |
14 | Args:
15 | instance: Object to check if it's an instance of the expected class.
16 | *args: Additional positional arguments.
17 | **kwargs: Additional keyword arguments.
18 |
19 | Returns:
20 | The result of the decorated function.
21 |
22 | Raises:
23 | AssertionError: If the instance is not of the expected class.
24 | """
25 | if not isinstance(instance, expected_class):
26 | raise AssertionError(f"Argument {instance} is not an instance of {expected_class.__name__}")
27 | return func(instance, *args, **kwargs)
28 |
29 | return wrapper
30 |
31 | return decorator
32 |
33 |
34 | if __name__ == "__main__":
35 |
36 | @assert_instance(int)
37 | def printint(arg: int):
38 | print(arg)
39 |
40 | try:
41 | printint(1)
42 | printint("2")
43 | except AssertionError as e:
44 | print(e)
45 |
--------------------------------------------------------------------------------
/letterboxdpy/utils/utils_url.py:
--------------------------------------------------------------------------------
1 | import re
2 | from letterboxdpy.constants.project import DOMAIN_SHORT, URL_PROTOCOLS, DOMAIN
3 |
4 |
5 | def get_list_slug(url) -> str:
6 | """
7 | extract the slug from a URL containing '/list/'.
8 | example: 'https://letterboxd.com/fastfingertips/list/list_name/' -> 'list_name'
9 | """
10 | return url[url.index('/list/') + len('/list/'):].replace('/', '')
11 |
12 | def check_url_match(base_url, target_url) -> bool:
13 | """
14 | this function checks if two URLs match,
15 | and returns a boolean value as the result.
16 | """
17 | return base_url == target_url or f'{base_url}/' == target_url
18 |
19 | def is_short_url(url) -> bool:
20 | """
21 | this function checks if the URL is a short URL or not,
22 | and returns a boolean value as the result.
23 | """
24 | return any(prot+DOMAIN_SHORT in url for prot in URL_PROTOCOLS)
25 |
26 | def parse_list_url(url: str) -> tuple:
27 | """Parse list URL to extract username and slug."""
28 | # URL format: https://letterboxd.com/username/list/slug/
29 | pattern = r'letterboxd\.com/([^/]+)/list/([^/]+)'
30 | match = re.search(pattern, url)
31 | if match:
32 | return match.group(1), match.group(2)
33 | raise ValueError(f"Invalid list URL format: {url}")
34 |
35 |
36 | def build_list_url(username: str, slug: str) -> str:
37 | """Build list URL from username and slug."""
38 | return f"{DOMAIN}/{username}/list/{slug}/"
--------------------------------------------------------------------------------
/letterboxdpy/pages/movie_details.py:
--------------------------------------------------------------------------------
1 | from letterboxdpy.core.scraper import parse_url
2 | from letterboxdpy.constants.project import DOMAIN
3 |
4 |
5 | class MovieDetails:
6 | """Movie details page operations - production information from /details page."""
7 |
8 | def __init__(self, slug: str):
9 | """Initialize MovieDetails with a movie slug."""
10 | self.slug = slug
11 | self.url = f"{DOMAIN}/film/{slug}/details"
12 | self.dom = parse_url(self.url)
13 |
14 | def get_extended_details(self) -> dict:
15 | """Get extended details (country, studio, language) from details page."""
16 | return extract_movie_extended_details(self.dom)
17 |
18 | def extract_movie_extended_details(dom) -> dict:
19 | """Extract detailed movie information from details page."""
20 | dom_details = dom.find("div", {"id": ["tab-details"]})
21 |
22 | data = {
23 | 'country': [],
24 | 'studio': [],
25 | 'language': []
26 | }
27 |
28 | if dom_details:
29 | for a in dom_details.find_all("a"):
30 | text = a.text.strip()
31 | if a['href'][1:7] == 'studio':
32 | data['studio'].append(text)
33 | elif a['href'][7:14] == 'country':
34 | data['country'].append(text)
35 | elif a['href'][7:15] == 'language':
36 | data['language'].append(text)
37 |
38 | return data
39 |
40 | if __name__ == "__main__":
41 | details = MovieDetails("v-for-vendetta")
42 | print(details.get_extended_details())
--------------------------------------------------------------------------------
/letterboxdpy/pages/movie_reviews.py:
--------------------------------------------------------------------------------
1 | from letterboxdpy.constants.project import DOMAIN
2 |
3 |
4 | class MovieReviews:
5 | """Movie reviews page operations - user reviews for this movie."""
6 |
7 | def __init__(self, slug: str):
8 | """Initialize MovieReviews with a movie slug."""
9 | self.slug = slug
10 | self.url = f"{DOMAIN}/film/{slug}/reviews"
11 |
12 | def get_reviews(self) -> dict:
13 | """Get all reviews for this movie."""
14 | return extract_movie_reviews(self.url)
15 |
16 | def get_reviews_by_rating(self, rating: float) -> dict:
17 | """Get reviews filtered by rating."""
18 | return extract_movie_reviews_by_rating(self.url, rating)
19 |
20 |
21 | def extract_movie_reviews(url: str) -> dict:
22 | """Extract all reviews for a movie."""
23 |
24 | # TODO: Implement movie reviews extraction
25 | # This would parse /film/slug/reviews/ page
26 | # Similar to user_reviews.py but for movie reviews
27 |
28 | return {
29 | 'available': False,
30 | 'count': 0,
31 | 'reviews': []
32 | }
33 |
34 |
35 | def extract_movie_reviews_by_rating(url: str, rating: float) -> dict:
36 | """Extract reviews filtered by specific rating."""
37 | by_rating_url = f"{url}/by/rating/{rating}"
38 |
39 | # TODO: Implement movie reviews by rating extraction
40 | # This would parse /film/slug/reviews/by/rating/X/ page
41 |
42 | return {
43 | 'available': False,
44 | 'rating': rating,
45 | 'count': 0,
46 | 'reviews': []
47 | }
--------------------------------------------------------------------------------
/docs/user/funcs/user_diary.md:
--------------------------------------------------------------------------------
1 |
user_diary(user object)
2 |
3 | ```python
4 | from letterboxdpy import user
5 | user_instance = user.User("nmcassa")
6 | print(user.user_diary(user_instance))
7 | ```
8 |
9 |
10 | Click to expand the demo response for user_diary method or view the full response
11 |
12 | ```json
13 | {
14 | "entries": {
15 | "513520182": {
16 | "name": "Black Swan",
17 | "slug": "black-swan",
18 | "id": "20956",
19 | "release": 2010,
20 | "runtime": 108,
21 | "rewatched": false,
22 | "rating": 9,
23 | "liked": true,
24 | "reviewed": false,
25 | "date": {
26 | "year": 2024,
27 | "month": 1,
28 | "day": 15
29 | },
30 | "page": 1
31 | },...
32 | ...},
33 | "129707465": {
34 | "name": "mid90s",
35 | "slug": "mid90s",
36 | "id": "370451",
37 | "release": 2018,
38 | "runtime": 86,
39 | "rewatched": false,
40 | "rating": 8,
41 | "liked": false,
42 | "reviewed": false,
43 | "date": {
44 | "year": 2020,
45 | "month": 10,
46 | "day": 20
47 | },
48 | "page": 7
49 | }
50 | },
51 | "count": 337,
52 | "last_page": 7
53 | }
54 | ```
55 |
--------------------------------------------------------------------------------
/letterboxdpy/core/encoder.py:
--------------------------------------------------------------------------------
1 | from json import JSONEncoder
2 | from letterboxdpy.core.exceptions import CustomEncoderError
3 |
4 |
5 | class Encoder(JSONEncoder):
6 | """
7 | Encoder class provides a way to serialize custom class
8 | .. instances to JSON by overriding the default serialization
9 | .. logic to return the object's namespace dictionary.
10 | """
11 | def default(self, o):
12 | if not hasattr(o, '__dict__'):
13 | raise CustomEncoderError(f"Object of type {type(o).__name__} has no __dict__ attribute")
14 |
15 | try:
16 | return o.__dict__
17 | except Exception as e:
18 | raise CustomEncoderError("An error occurred during encoding") from e
19 |
20 | class SecretsEncoder(JSONEncoder):
21 | """JSON encoder that excludes specified attributes from the output."""
22 |
23 | def __init__(self, secrets: list = ['secrets'], **kwargs):
24 | if not isinstance(secrets, list):
25 | raise TypeError("secrets must be a list")
26 | if not secrets:
27 | raise ValueError("secrets must not be empty")
28 | if not all(isinstance(attr, str) for attr in secrets):
29 | raise TypeError("All elements in secrets must be strings")
30 |
31 | self.secrets = set(secrets)
32 | super().__init__(**kwargs)
33 |
34 | def default(self, o):
35 | """Encodes the object to JSON format excluding specified attributes."""
36 | if not hasattr(o, '__dict__'):
37 | raise CustomEncoderError(f"Object of type {type(o).__name__} has no __dict__ attribute")
38 | return {k: v for k, v in o.__dict__.items() if k not in self.secrets}
--------------------------------------------------------------------------------
/letterboxdpy/pages/movie_members.py:
--------------------------------------------------------------------------------
1 | from letterboxdpy.core.scraper import parse_url
2 | from letterboxdpy.constants.project import DOMAIN
3 | from letterboxdpy.utils.utils_parser import extract_numeric_text
4 |
5 |
6 | class MovieMembers:
7 | """Movie members page operations - watchers statistics."""
8 |
9 | def __init__(self, slug: str):
10 | """Initialize MovieMembers with a movie slug."""
11 | self.slug = slug
12 | self.url = f"{DOMAIN}/film/{slug}/members"
13 | self.dom = parse_url(self.url)
14 |
15 | def get_watchers_stats(self) -> dict:
16 | """Get movie watchers' statistics."""
17 | return extract_movie_watchers_stats(self.dom)
18 |
19 | # TODO: /fans, /likes, /reviews, /lists
20 |
21 | def extract_movie_watchers_stats(dom) -> dict:
22 | """Extract movie watchers' statistics from members page."""
23 | try:
24 | # Extract watchers data from DOM.
25 | stats = {}
26 | content_nav = dom.find("div", {"id": "content-nav"})
27 | if content_nav:
28 | for a in content_nav.find_all("a", title=True):
29 | a_text = a.text.strip().lower()
30 | a_title = a['title']
31 | count = extract_numeric_text(a_title)
32 | stats[a_text] = count
33 | return stats
34 | except Exception as e:
35 | raise RuntimeError("Failed to retrieve movie watchers' statistics") from e
36 |
37 | if __name__ == "__main__":
38 | members_instance = MovieMembers("v-for-vendetta")
39 |
40 | print(f"Movie: {members_instance.slug}")
41 | for key, value in members_instance.get_watchers_stats().items():
42 | print(f"{key}: {value}")
--------------------------------------------------------------------------------
/examples/README.md:
--------------------------------------------------------------------------------
1 | # Letterboxd Examples
2 |
3 | Example scripts demonstrating `letterboxdpy` library features.
4 |
5 | ## Installation
6 |
7 | ```bash
8 | pip install -e .
9 | pip install -r examples/requirements.txt
10 | ```
11 |
12 | ## Examples
13 |
14 | **`user_rating_plot.py`**
15 | Creates a rating distribution histogram with Letterboxd styling.
16 | ```bash
17 | python examples/user_rating_plot.py --user
18 | ```
19 |
20 | **`user_plot_statistics.py`**
21 | Visualizes movie watching patterns over time with monthly and daily statistics.
22 | ```bash
23 | python examples/user_plot_statistics.py --user --start-year 2020 --end-year 2024
24 | ```
25 |
26 | **`follow_stats.py`**
27 | Analyzes follow relationships, followback ratios, and mutual follows.
28 | ```bash
29 | echo | python examples/follow_stats.py
30 | ```
31 |
32 | **`export_user_data.py`**
33 | Exports all user data (films, reviews, lists, followers, etc.) to JSON files.
34 | ```bash
35 | echo | python examples/export_user_data.py
36 | ```
37 |
38 | **`export_user_diary_posters.py`**
39 | Downloads movie posters from diary entries and organizes them by year.
40 | ```bash
41 | echo | python examples/export_user_diary_posters.py
42 | ```
43 |
44 | **`search_and_export_lists.py`**
45 | Searches for lists by query and exports them to CSV format.
46 | ```bash
47 | echo -e "query\n3" > input.txt
48 | Get-Content input.txt | python examples/search_and_export_lists.py
49 | ```
50 |
51 | ## Requirements
52 |
53 | - **Core**: requests, beautifulsoup4, lxml, validators
54 | - **Visualization**: matplotlib, numpy, pillow
55 | - **Data Processing**: pandas
56 |
57 | See `requirements.txt` for details.
58 |
--------------------------------------------------------------------------------
/letterboxdpy/utils/utils_terminal.py:
--------------------------------------------------------------------------------
1 | import sys
2 | import os
3 |
4 |
5 | def get_input(prompt: str, *, index: int = None, expected_type: type = str) -> any:
6 | """"Retrieve value from command-line argument or prompt user for input."""
7 | def convert(value):
8 | return expected_type(value)
9 |
10 | if index:
11 | try:
12 | return convert(sys.argv[index])
13 | except (IndexError, ValueError):
14 | pass
15 |
16 | while True:
17 | try:
18 | value = input(prompt).strip()
19 | if value:
20 | return convert(value)
21 | except ValueError:
22 | pass
23 | except KeyboardInterrupt:
24 | print("\nKeyboard interrupt detected. Exiting...")
25 | sys.exit(0)
26 |
27 | def args_exists() -> bool:
28 | """Check if command-line arguments exist."""
29 | return len(sys.argv) > 1
30 |
31 | # CORE
32 |
33 | def get_arg(index: int, default: str = None) -> str:
34 | """Retrieve command-line argument at a given index."""
35 | if index < 0:
36 | raise ValueError("Index cannot be negative")
37 | if len(sys.argv) > index:
38 | return sys.argv[index]
39 | return default
40 |
41 | def ask_confirmation(prompt: str = "Do you want to continue? (y/n): ") -> bool:
42 | """Prompt the user for confirmation and return boolean response."""
43 | response = input(prompt).lower()
44 | return response in ['y', 'yes']
45 |
46 | def clear_screen() -> None:
47 | """Clear the terminal screen based on the operating system."""
48 | os_name = os.name
49 | if os_name == 'nt':
50 | os.system('cls')
51 | elif os_name == 'posix':
52 | os.system('clear')
53 | else:
54 | raise NotImplementedError("Unsupported operating system")
--------------------------------------------------------------------------------
/docs/user/funcs/user_activity.md:
--------------------------------------------------------------------------------
1 | get_activity()
2 |
3 | ```python
4 | from letterboxdpy.user import User
5 | user_instance = User("nmcassa")
6 | print(user_instance.get_activity())
7 | ```
8 |
9 |
10 | Click to expand the demo response for get_activity method
11 |
12 | ```json
13 | {
14 | "metadata": {
15 | "export_timestamp": "2025-08-28T17:31:22.001861",
16 | "source_url": "https://letterboxd.com/ajax/activity-pagination/nmcassa",
17 | "total_activities": 3
18 | },
19 | "activities": {
20 | "9659817024": {
21 | "activity_type": "review",
22 | "timestamp": "2025-08-24T14:40:23.000000Z",
23 | "content": {
24 | "action": "watched",
25 | "description": "nmcassa watched and rated The Matrix ★★★★★",
26 | "movie": {
27 | "title": "The Matrix",
28 | "year": 1999,
29 | "slug": "the-matrix",
30 | "url": "https://letterboxd.com/film/the-matrix/"
31 | },
32 | "rating": 5.0
33 | }
34 | },
35 | "9624102431": {
36 | "activity_type": "basic",
37 | "timestamp": "2025-08-19T16:49:13.000000Z",
38 | "content": {
39 | "action": "liked",
40 | "description": "nmcassa liked Ben Wold's review of Superman",
41 | "movie": {
42 | "title": "Superman",
43 | "slug": "superman",
44 | "url": "https://letterboxd.com/film/superman/"
45 | }
46 | }
47 | },
48 | "9624100380": {
49 | "activity_type": "basic",
50 | "timestamp": "2025-08-19T16:48:50.000000Z",
51 | "content": {
52 | "action": "added",
53 | "description": "nmcassa added The Substance to their watchlist",
54 | "movie": {
55 | "title": "The Substance",
56 | "slug": "the-substance",
57 | "url": "https://letterboxd.com/film/the-substance/"
58 | }
59 | }
60 | }
61 | }
62 | }
63 | ```
64 |
--------------------------------------------------------------------------------
/letterboxdpy/url.py:
--------------------------------------------------------------------------------
1 | def get_live_feed_url() -> str:
2 | # total watches and last reviews
3 | return "https://letterboxd.com/csi/films-live-feed/"
4 |
5 | def get_metadata_url() -> str:
6 | return "https://letterboxd.com/ajax/letterboxd-metadata/"
7 |
8 | # -- FILM --
9 |
10 | def get_popular_lists_url(film_slug: str) -> str:
11 | # top lists
12 | return f"https://letterboxd.com/csi/film/{film_slug}/popular-lists/"
13 |
14 | def get_recent_reviews_url(film_slug: str) -> str:
15 | # last reviews
16 | return f"https://letterboxd.com/csi/film/{film_slug}/recent-reviews/"
17 |
18 | def get_rating_histogram_url(film_slug: str) -> str:
19 | # fan count and ratings
20 | return f"https://letterboxd.com/csi/film/{film_slug}/rating-histogram/"
21 |
22 | def get_user_actions_url(film_slug: str) -> str:
23 | return f"https://letterboxd.com/csi/film/{film_slug}/sidebar-user-actions/"
24 |
25 | def get_stats_url(film_slug: str) -> str:
26 | # watches, lists and likes
27 | return f"https://letterboxd.com/csi/film/{film_slug}/stats/"
28 |
29 | def get_news_url(film_slug: str) -> str:
30 | # posts: journal, video, etc.
31 | return f"https://letterboxd.com/csi/film/{film_slug}/news/"
32 |
33 | def get_availability_url(film_slug: str) -> str:
34 | # trailer and services
35 | return f"https://letterboxd.com/csi/film/{film_slug}/availability/"
36 |
37 | """
38 | # -- USER --
39 |
40 | def get_user_homepage_url() -> str:
41 | return "https://letterboxd.com/ajax/user-homepage/"
42 |
43 | def get_friend_reviews_url(film_slug: str) -> str:
44 | return f"https://letterboxd.com/csi/film/{film_slug}/friend-reviews/"
45 |
46 | def get_friend_activity_url(film_slug: str) -> str:
47 | return f"https://letterboxd.com/csi/film/{film_slug}/friend-activity/"
48 |
49 | def get_own_reviews_url(film_slug: str) -> str:
50 | return f"https://letterboxd.com/csi/film/{film_slug}/own-reviews/"
51 |
52 | def get_likes_reviews_url(film_slug: str) -> str:
53 | return "https://letterboxd.com/csi/film/{film_slug}/liked-reviews/"
54 | """
--------------------------------------------------------------------------------
/letterboxdpy/utils/utils_file.py:
--------------------------------------------------------------------------------
1 | import os
2 | from json import dump as json_dump
3 | from typing import Union
4 |
5 |
6 | def save_data(path: str, data: dict, format: str = 'json') -> None:
7 | """Save data to a file in the specified format."""
8 | if format == 'json':
9 | save_json(path, data)
10 | else:
11 | raise ValueError(f"Unsupported format '{format}'. Only 'json' is currently supported.")
12 |
13 | def check_and_create_dirs(directories: Union[list, str]) -> None:
14 | """Checks if directories exist, creates them if not."""
15 | if isinstance(directories, str):
16 | directories = [directories]
17 |
18 | print('\nChecking directories...')
19 | for directory in directories:
20 | create_directory(directory)
21 | print('\tAll directories checked, continuing...', end='\n\n')
22 |
23 | def save_json(path: str, data: dict) -> None:
24 | """Save data to a file as JSON."""
25 | with open(f'{path}.json', 'w') as f:
26 | json_dump(data, f, indent=2)
27 |
28 | def create_directory(directory: str) -> None:
29 | """Creates a directory if it does not exist."""
30 | try:
31 | if not os.path.exists(directory):
32 | print(f'\tCreating {directory}')
33 | os.makedirs(directory, exist_ok=True)
34 | else:
35 | print(f'\tFound {directory}')
36 | except OSError as e:
37 | print(f"\tError creating {directory}: {e}")
38 |
39 | def build_path(*segments: str, normalize: bool = True) -> str:
40 | """Build and format file paths from the given segments."""
41 | path = os.path.join(*segments)
42 | if normalize:
43 | return os.path.normpath(path)
44 | return path
45 |
46 | def build_click_url(file_path: str, protocol: str = 'file') -> str:
47 | """Build a clickable file URL with the specified protocol."""
48 | if protocol == 'file':
49 | return f"file:///{build_path(os.getcwd(), file_path).replace(os.sep, '/')}"
50 | elif protocol in ['http', 'https']:
51 | return f"{protocol}://{file_path}"
52 | else:
53 | raise ValueError(f"Unsupported protocol '{protocol}'")
--------------------------------------------------------------------------------
/docs/user/funcs/user_reviews.md:
--------------------------------------------------------------------------------
1 |
2 | user_reviews(user object)
3 |
4 | ```python
5 | from letterboxdpy import user
6 | user_instance = user.User("nmcassa")
7 | print(user.user_reviews(user_instance))
8 | ```
9 |
10 |
11 | Click to expand user_reviews method response
12 |
13 | ```json
14 | {
15 | "reviews": {
16 | "495592379": {
17 | "movie": {
18 | "name": "Poor Things",
19 | "slug": "poor-things-2023",
20 | "id": "710352",
21 | "release": 2023,
22 | "link": "https://letterboxd.com/film/poor-things-2023/"
23 | },
24 | "type": "Watched",
25 | "no": 0,
26 | "link": "https://letterboxd.com/nmcassa/film/poor-things-2023/",
27 | "rating": 6,
28 | "review": {
29 | "content": "It looks like AI art and weird movie",
30 | "spoiler": false
31 | },
32 | "date": {
33 | "year": 2023,
34 | "month": 12,
35 | "day": 26
36 | },
37 | "page": 1
38 | },
39 | "152420824": {
40 | "movie": {
41 | "name": "I'm Thinking of Ending Things",
42 | "slug": "im-thinking-of-ending-things",
43 | "id": "430806",
44 | "release": 2020,
45 | "link": "https://letterboxd.com/film/im-thinking-of-ending-things/"
46 | },
47 | "type": "Watched",
48 | "no": 0,
49 | "link": "https://letterboxd.com/nmcassa/film/im-thinking-of-ending-things/",
50 | "rating": 8,
51 | "review": {
52 | "content": "yeah i dont get it",
53 | "spoiler": false
54 | },
55 | "date": {
56 | "year": 2021,
57 | "month": 2,
58 | "day": 14
59 | },
60 | "page": 1
61 | }
62 | },
63 | "count": 7,
64 | "last_page": 1
65 | }
66 | ```
67 |
--------------------------------------------------------------------------------
/letterboxdpy/avatar.py:
--------------------------------------------------------------------------------
1 | class Avatar:
2 | """Class to manage avatar URLs and upscale them if necessary."""
3 |
4 | # Default upscale size
5 | UPSCALE_SIZE = (1000, 1000)
6 | # List of default sizes to check against
7 | DEFAULT_SIZES = [(80, 80), (220, 220)]
8 |
9 | def __init__(self, url: str) -> None:
10 | """Initialize Avatar with the provided URL."""
11 | self.top_level = url.split('.')[0].split('//')[1]
12 | # Top levels: avatar:a, statics:s, secure
13 | self.avatar_exists = self.top_level == 'a'
14 | # Storing the URL without query parameters if the avatar exists
15 | self.url = url.split('?')[0] if self.avatar_exists else url
16 | # Initializing data dictionary with the initial state
17 | self.data = {
18 | 'exists': self.avatar_exists,
19 | 'upscaled': False,
20 | 'url': self.url
21 | }
22 | # Storing a copy of data for internal use
23 | self._upscaled_data = self.data.copy()
24 |
25 | @property
26 | def upscaled_data(self) -> dict:
27 | """Return upscaled avatar data if applicable."""
28 | if self.avatar_exists:
29 | for default_size in self.DEFAULT_SIZES:
30 | pattern_default = '-0-'.join(map(str, default_size))
31 | # If a match is found, update the data with upscaled information
32 | if pattern_default in self.url:
33 | pattern_upscale = '-0-'.join(map(str, self.UPSCALE_SIZE))
34 | self._upscaled_data.update({
35 | 'upscaled': True,
36 | 'url': self.url.replace(pattern_default, pattern_upscale)
37 | })
38 | return self._upscaled_data
39 |
40 |
41 | if __name__ == '__main__':
42 | try:
43 | print(Avatar('https://unknown.example.com/test.png').upscaled_data)
44 | print(Avatar('https://s.example.com/a/0-220-0-220.png').upscaled_data)
45 | print(Avatar('https://a.example.com/a/0-220-0-220.png').upscaled_data)
46 | print(Avatar('https://a.example.com/a/0-80-0-80.png').upscaled_data)
47 | except Exception as e:
48 | raise RuntimeError(f"An error occurred: {e}")
49 |
--------------------------------------------------------------------------------
/letterboxdpy/members.py:
--------------------------------------------------------------------------------
1 | if __loader__.name == '__main__':
2 | import sys
3 | sys.path.append(sys.path[0] + '/..')
4 |
5 | from json import (
6 | dump as json_dump,
7 | dumps as json_dumps,
8 | loads as json_loads
9 | )
10 | import re
11 | from typing import List
12 | from letterboxdpy.core.encoder import Encoder
13 | from letterboxdpy.core.scraper import parse_url
14 |
15 |
16 | class Members:
17 | """Class for handling member data from Letterboxd."""
18 |
19 | MEMBERS_YEAR_TOP = "https://letterboxd.com/members/popular/this/year/"
20 | MEMBERS_PER_PAGE = 30
21 |
22 | def __init__(self, url: str = ""):
23 | """Initialize Members with the base URL."""
24 | self.url = url
25 |
26 | def self_check_value(self, value: str) -> None:
27 | """Check if the value contains only valid characters."""
28 | if not re.match("^[A-Za-z0-9_]+$", value):
29 | raise ValueError(f"Invalid {self.__class__.__name__}")
30 |
31 | def __str__(self) -> str:
32 | """Return a JSON string representation of the instance."""
33 | return json_dumps(self, indent=2, cls=Encoder)
34 |
35 | def jsonify(self) -> dict:
36 | """Convert the instance to a JSON dictionary."""
37 | return json_loads(self.__str__())
38 |
39 | # -- FUNCTIONS --
40 |
41 | def top_users(max:int = 100) -> List:
42 | """Fetch the top n members from the Letterboxd popular members page."""
43 | # max 256 page?
44 | members_instance = Members()
45 |
46 | data = []
47 | page = 1
48 | while True:
49 | url = f"{members_instance.MEMBERS_YEAR_TOP}page/{page}/"
50 | dom = parse_url(url)
51 |
52 | table = dom.find_all('table', {"class": ["member-table"]})[0]
53 | avatars = table.find_all("a", {"class": ["avatar -a40"]})
54 |
55 | for avatar in avatars:
56 | user_url = avatar['href']
57 | user_name = user_url.replace('/', '')
58 | data.append(user_name)
59 |
60 | if len(data) >= max:
61 | return data
62 |
63 | if len(avatars) < members_instance.MEMBERS_PER_PAGE:
64 | break
65 |
66 | page += 1
67 |
68 | return data
69 |
70 | if __name__=="__main__":
71 | data = top_users(max=200)
72 | with open(f'top_members_{len(data)}.json', 'w') as f:
73 | json_dump(data, f, indent=2)
--------------------------------------------------------------------------------
/letterboxdpy/pages/user_tags.py:
--------------------------------------------------------------------------------
1 | from letterboxdpy.core.scraper import parse_url
2 | from letterboxdpy.constants.project import DOMAIN
3 |
4 |
5 | class UserTags:
6 |
7 | def __init__(self, username: str) -> None:
8 | self.username = username
9 | self.url = f"{DOMAIN}/{self.username}/tags"
10 | self.films_url = f"{DOMAIN}/{self.username}/tags/films"
11 | self.diary_url = f"{DOMAIN}/{self.username}/tags/diary"
12 | self.reviews_url = f"{DOMAIN}/{self.username}/tags/reviews"
13 | self.lists_url = f"{DOMAIN}/{self.username}/tags/lists"
14 |
15 | def get_user_tags(self) -> dict: return extract_user_tags(self.url)
16 |
17 | def extract_user_tags(url: str) -> dict:
18 | BASE_URL = url
19 | PAGES = ['films', 'diary', 'reviews', 'lists']
20 |
21 | def extract_tags(page: str) -> dict:
22 | """Extract tags from the page."""
23 |
24 | def fetch_dom() -> any:
25 | """Fetch and return the DOM for the page."""
26 | return parse_url(f"{BASE_URL}/{page}")
27 |
28 | def parse_tag(tag) -> dict:
29 | """Extract tag information from a single tag element."""
30 | name = tag.a.text.strip()
31 | title = tag.a['title']
32 | link = tag.a['href']
33 | slug = link.split('/')[-3]
34 | count = int(tag.span.text.strip() or 1)
35 | return {
36 | 'name': name,
37 | 'title': title,
38 | 'slug': slug,
39 | 'link': DOMAIN + link,
40 | 'count': count,
41 | }
42 |
43 | dom = fetch_dom()
44 | tags_ul = dom.find("ul", {"class": "tags-columns"})
45 | data = {}
46 |
47 | if not tags_ul:
48 | return data
49 |
50 | tags = tags_ul.find_all("li")
51 | index = 1
52 | for tag in tags:
53 | if 'href' in tag.a.attrs:
54 | tag_info = parse_tag(tag)
55 | tag_info['no'] = index
56 | data[tag_info['slug']] = tag_info
57 | index += 1
58 |
59 | return data
60 |
61 | data = {}
62 | for page in PAGES:
63 | tags = extract_tags(page)
64 | data[page] = {
65 | 'tags': tags,
66 | 'count': len(tags)
67 | }
68 |
69 | data['total_count'] = sum(data[page]['count'] for page in PAGES)
70 |
71 | return data
72 |
--------------------------------------------------------------------------------
/examples/follow_stats.py:
--------------------------------------------------------------------------------
1 | """
2 | Letterboxd Follow Statistics Analyzer
3 |
4 | Analyzes follow relationships and statistics for Letterboxd users.
5 | - Calculate followback ratios and mutual follows
6 | - Identify fans and non-followback accounts
7 | - Generate detailed follow statistics
8 | - JSON output format
9 | """
10 |
11 | import sys
12 | from json import dumps as json_dumps
13 |
14 | from letterboxdpy import user
15 | from letterboxdpy.utils.utils_terminal import get_input, args_exists
16 |
17 | class FollowStatsAnalyzer:
18 | """Analyze follow statistics for Letterboxd users."""
19 |
20 | def __init__(self, username: str):
21 | self.username = username
22 | self.user_instance = user.User(username)
23 |
24 | def analyze(self) -> dict:
25 | """Analyze follow statistics for the user."""
26 | followers = self.user_instance.get_followers()
27 | following = self.user_instance.get_following()
28 | return self._calculate_stats(following, followers)
29 |
30 | def _calculate_stats(self, following: dict, followers: dict) -> dict:
31 | """Calculate follow statistics from followers and following data."""
32 | following_set = set(following.keys())
33 | followers_set = set(followers.keys())
34 |
35 | not_followback = list(following_set - followers_set)
36 | followback = list(following_set & followers_set)
37 | fans = list(followers_set - following_set)
38 |
39 | return {
40 | 'summary': {
41 | 'total_following': len(following_set),
42 | 'total_followers': len(followers_set),
43 | 'mutual_follows': len(followback),
44 | 'not_followback_count': len(not_followback),
45 | 'fans_count': len(fans),
46 | 'followback_ratio': round(len(followback) / len(following_set) * 100, 2) if following_set else 0
47 | },
48 | 'details': {
49 | 'not_followback': not_followback,
50 | 'followback': followback,
51 | 'fans': fans
52 | }
53 | }
54 |
55 | if __name__ == "__main__":
56 | if not args_exists():
57 | print(f'Quick usage: python {sys.argv[0]} ')
58 |
59 | username = get_input("Enter username: ", index=1)
60 | analyzer = FollowStatsAnalyzer(username)
61 | stats = analyzer.analyze()
62 | print(json_dumps(stats, indent=4))
--------------------------------------------------------------------------------
/examples/search_and_export_lists.py:
--------------------------------------------------------------------------------
1 | """
2 | Letterboxd List Search and Export Tool
3 |
4 | Searches for Letterboxd lists and exports them to CSV files.
5 | - Search lists by query
6 | - Export multiple lists to CSV format
7 | - Automatic directory creation
8 | - Batch processing support
9 | """
10 |
11 | import sys
12 | import os
13 | import csv
14 |
15 | from letterboxdpy.search import Search
16 | from letterboxdpy.list import List
17 | from letterboxdpy.utils.utils_terminal import get_input, args_exists
18 |
19 | def save_results_to_csv(list_instance: List, csv_file: str) -> None:
20 | """Saves movie list results to a CSV file."""
21 | directory = os.path.join(os.getcwd(), 'exports', 'lists')
22 | if not os.path.exists(directory):
23 | os.makedirs(directory)
24 |
25 | file_name = os.path.join(directory, csv_file)
26 |
27 | with open(file_name, mode='w', newline='', encoding='utf-8') as file:
28 | writer = csv.writer(file)
29 | writer.writerow(['LetterboxdURI', 'Title'])
30 | movies = list_instance.movies
31 | for movie_data in movies.values():
32 | writer.writerow([movie_data['url'], movie_data['name']])
33 | print(f"Data successfully saved to {csv_file}. Movies: {len(movies)}")
34 |
35 | if __name__ == "__main__":
36 | if not args_exists():
37 | print(f'Quick usage: python {sys.argv[0]} ')
38 |
39 | search_query = get_input("Enter your search query for lists: ", index=1)
40 |
41 | search_instance = Search(search_query, "lists")
42 | search_data = search_instance.results
43 |
44 | if search_data['available']:
45 | results = search_data['results']
46 | search_count = search_data['count']
47 |
48 | print(f'Found {search_count} lists. ')
49 | max_lists = get_input('How many to export? (0 for all): ', index=2, expected_type=int)
50 |
51 | if max_lists == 0:
52 | max_lists = search_count
53 |
54 | print(f'Exporting first {max_lists} lists...')
55 | results = results[:max_lists]
56 |
57 | for result in results:
58 | list_slug = result['slug']
59 | list_owner_username = result['owner']['username']
60 |
61 | list_instance = List(list_owner_username, list_slug)
62 | csv_filename = f"{list_owner_username}_{list_slug}.csv"
63 | save_results_to_csv(list_instance, csv_filename)
64 | else:
65 | print(f'No lists found for "{search_query}".')
--------------------------------------------------------------------------------
/letterboxdpy/watchlist.py:
--------------------------------------------------------------------------------
1 | if __loader__.name == '__main__':
2 | import sys
3 | sys.path.append(sys.path[0] + '/..')
4 |
5 | import re
6 | from json import (
7 | dumps as json_dumps,
8 | loads as json_loads
9 | )
10 |
11 | from letterboxdpy.core.encoder import SecretsEncoder
12 | from letterboxdpy.pages import user_watchlist
13 | from letterboxdpy.core.exceptions import PrivateRouteError
14 |
15 |
16 | class Watchlist:
17 |
18 | class WatchlistPages:
19 |
20 | def __init__(self, username: str) -> None:
21 | self.watchlist = user_watchlist.UserWatchlist(username)
22 |
23 | def __init__(self, username: str) -> None:
24 | assert re.match("^[A-Za-z0-9_]+$", username), "Invalid author"
25 |
26 | self.username = username
27 | self.pages = self.WatchlistPages(self.username)
28 |
29 | self.url = self.get_url()
30 | self.count = self.get_count()
31 |
32 | self._movies = None
33 |
34 | # Properties
35 | @property
36 | def movies(self) -> dict:
37 | if self._movies is None:
38 | self._movies = self.get_movies()
39 | return self._movies
40 |
41 | # Magic Methods
42 | def __len__(self) -> int:
43 | return self.count
44 |
45 | def __str__(self) -> str:
46 | return json_dumps(self, indent=2, cls=SecretsEncoder, secrets=['pages'])
47 |
48 | def jsonify(self) -> dict:
49 | return json_loads(self.__str__())
50 |
51 | # Data Retrieval Methods
52 | def get_owner(self): ...
53 | def get_url(self) -> str: return self.pages.watchlist.url
54 | def get_count(self) -> int: return self.pages.watchlist.get_count()
55 | def get_movies(self) -> dict: return self.pages.watchlist.get_movies()
56 |
57 |
58 | if __name__ == "__main__":
59 | import argparse
60 | import sys
61 |
62 | sys.stdout.reconfigure(encoding='utf-8')
63 |
64 | parser = argparse.ArgumentParser(description="Fetch a user's watchlist.")
65 | parser.add_argument('--user', '-u', help="Username to fetch watchlist for", required=False)
66 | args = parser.parse_args()
67 |
68 | username = args.user or input('Enter username: ').strip()
69 |
70 | while not username:
71 | username = input('Please enter a valid username: ').strip()
72 |
73 | print(f"Fetching watchlist for username: {username}")
74 |
75 | # Watchlist usage:
76 | watchlist_instance = Watchlist(username)
77 | print(watchlist_instance)
78 | try:
79 | print('URL:', watchlist_instance.url)
80 | print('Count:', watchlist_instance.count)
81 | print('Movies:', watchlist_instance.movies)
82 | except PrivateRouteError:
83 | print(f"Error: User's watchlist is private.")
84 |
--------------------------------------------------------------------------------
/docs/user/funcs/user_liked_reviews.md:
--------------------------------------------------------------------------------
1 | user_liked_reviews(user object)
2 |
3 | ```python
4 | from letterboxdpy import user
5 | user_instance = user.User("nmcassa")
6 | print(user.user_liked_reviews(user_instance))
7 | ```
8 |
9 |
10 | Click to expand user_liked_reviews method response
11 |
12 | ```json
13 | {
14 | "reviews": {
15 | "666730921": {
16 | "type": "Rewatched",
17 | "no": 0,
18 | "url": "https://letterboxd.com/ppark/film/mean-girls/",
19 | "rating": 8,
20 | "review": {
21 | "content": "Refreshing",
22 | "spoiler": false,
23 | "date": {
24 | "year": 2024,
25 | "month": 9,
26 | "day": 7
27 | }
28 | },
29 | "user": {
30 | "username": "ppark",
31 | "display_name": "ppark",
32 | "url": "https://letterboxd.com/ppark/"
33 | },
34 | "movie": {
35 | "name": "Mean Girls",
36 | "slug": "mean-girls",
37 | "id": "46049",
38 | "release": 2004,
39 | "url": "https://letterboxd.com/film/mean-girls/"
40 | },
41 | "page": 1
42 | },
43 | ...
44 | "80658991": {
45 | "type": "Added",
46 | "no": 0,
47 | "url": "https://letterboxd.com/kurstboy/film/the-departed/",
48 | "rating": 9,
49 | "review": {
50 | "content": "Great way to end my Scorsese binge!That final shot is perfect and the whole third act feels tight as hell. The entire film is rich with interesting approaches to the subject matter which is fitting for a plot that grabs your attention within the first 5 minutes. Scorsese is just spitballing here and throwing every idea at the wall, his love for filmmaking shines brighter here than in something like Hugo. Don't know what to add to the table\u2026",
51 | "spoiler": false,
52 | "date": {
53 | "year": 2019,
54 | "month": 11,
55 | "day": 24
56 | }
57 | },
58 | "user": {
59 | "username": "Kurstboy",
60 | "display_name": "Karsten",
61 | "url": "https://letterboxd.com/kurstboy/"
62 | },
63 | "movie": {
64 | "name": "The Departed",
65 | "slug": "the-departed",
66 | "id": "51042",
67 | "release": 2006,
68 | "url": "https://letterboxd.com/film/the-departed/"
69 | },
70 | "page": 2
71 | }
72 | }
73 | }
74 | ```
75 |
--------------------------------------------------------------------------------
/letterboxdpy/pages/user_activity.py:
--------------------------------------------------------------------------------
1 | from letterboxdpy.core.scraper import parse_url
2 | from letterboxdpy.constants.project import DOMAIN
3 | from letterboxdpy.utils.date_utils import DateUtils
4 | from letterboxdpy.utils.activity_extractor import (
5 | parse_activity_datetime, build_time_data, get_event_type, get_log_title,
6 | get_log_type, process_review_activity, process_basic_activity,
7 | process_newlist_activity, get_log_item_slug
8 | )
9 |
10 |
11 | class UserActivity:
12 |
13 | def __init__(self, username: str) -> None:
14 | self.username = username
15 | self._base_url = f"{DOMAIN}/ajax/activity-pagination/{self.username}"
16 |
17 | # Activity endpoints
18 | self.activity_url = self._base_url
19 | self.activity_following_url = f"{self._base_url}/following"
20 |
21 | def get_activity(self) -> dict: return extract_activity(self.activity_url)
22 | def get_activity_following(self) -> dict: return extract_activity(self.activity_following_url)
23 |
24 | def extract_activity(ajax_url: str) -> dict:
25 |
26 | def _process_log(section, event_type) -> dict:
27 | """Process activity log and extract data."""
28 | log_id = section["data-activity-id"]
29 | date = parse_activity_datetime(section.find("time")['datetime'])
30 | log_title = get_log_title(section)
31 | log_type = get_log_type(event_type, section)
32 | log_item_slug = get_log_item_slug(event_type, section)
33 |
34 | # Build activity data structure
35 | log_data = {
36 | 'activity_type': event_type,
37 | 'timestamp': build_time_data(date),
38 | 'content': {}
39 | }
40 |
41 | # Process content by activity type
42 | if event_type == 'review':
43 | content_data = process_review_activity(section, log_type, log_item_slug)
44 | log_data['content'] = content_data
45 | elif event_type == 'basic':
46 | content_data = process_basic_activity(section, log_title, log_type, log_item_slug)
47 | log_data['content'] = content_data
48 | elif event_type == 'newlist':
49 | content_data = process_newlist_activity(section, log_title, log_type)
50 | log_data['content'] = content_data
51 |
52 | return {log_id: log_data}
53 |
54 | from datetime import datetime
55 |
56 | data = {
57 | 'metadata': {
58 | 'export_timestamp': DateUtils.format_to_iso(datetime.now()),
59 | 'source_url': ajax_url,
60 | 'total_activities': 0
61 | },
62 | 'activities': {}
63 | }
64 |
65 | dom = parse_url(ajax_url)
66 | sections = dom.find_all("section")
67 |
68 | if not sections:
69 | return data
70 |
71 | for section in sections:
72 | event_type = get_event_type(section)
73 | if event_type in ('review', 'basic', 'newlist'):
74 | log_data = _process_log(section, event_type)
75 | data['activities'].update(log_data)
76 | data['metadata']['total_activities'] = len(data['activities'])
77 | elif 'no-activity-message' in section['class']:
78 | break
79 |
80 | return data
--------------------------------------------------------------------------------
/docs/check_docs.py:
--------------------------------------------------------------------------------
1 | if __loader__.name == '__main__':
2 | import sys
3 | sys.path.append(sys.path[0] + '/..')
4 |
5 | import os
6 | import inspect
7 | from letterboxdpy import user, movie, films, members, search, list
8 |
9 | def get_defined_functions(module):
10 | """Returns a list of function names defined in the given module."""
11 | functions = []
12 | for name, obj in inspect.getmembers(module):
13 | if inspect.isfunction(obj) and inspect.getmodule(obj) == module:
14 | functions.append(name)
15 | return functions
16 |
17 | def get_existing_md_files(directory):
18 | """Returns a list of .md files in the given directory without extension."""
19 | md_files = [f[:-3] for f in os.listdir(directory) if f.endswith('.md')]
20 | return md_files
21 |
22 | def check_missing_md_files(functions, md_files):
23 | """Compares functions and .md files, returning functions without corresponding .md files."""
24 | missing_md = [func for func in functions if func not in md_files]
25 | return missing_md
26 |
27 | def create_md_file_for_missing_function(func_name, module, directory):
28 | """Creates a .md file for a missing function with its signature."""
29 | file_path = os.path.join(directory, f"{func_name}.md")
30 | func = getattr(module, func_name)
31 |
32 | signature = str(inspect.signature(func))
33 | docstring = inspect.getdoc(func) or "No documentation provided."
34 |
35 | with open(file_path, 'w') as file:
36 | file.write(f'{func_name}{signature}
\n\n')
37 | file.write(f'**Documentation:**\n\n{docstring}\n\n')
38 | file.write(f'[To be documented.](https://github.com/search?q=repo:nmcassa/letterboxdpy+{func_name})\n')
39 |
40 | def check_modules_for_missing_md(modules):
41 | """Checks each module for missing .md files and prints the results."""
42 | base_directory = "."
43 | for module_name, module in modules.items():
44 | print(f"{module_name}:")
45 | function_names = get_defined_functions(module)
46 | md_directory = os.path.join(base_directory, module_name, 'funcs')
47 |
48 | if not os.path.exists(md_directory):
49 | print(f"Directory {md_directory} does not exist. Creating...")
50 | os.makedirs(md_directory, exist_ok=True)
51 |
52 | md_files = get_existing_md_files(md_directory)
53 | missing_md_files = check_missing_md_files(function_names, md_files)
54 |
55 | for func in missing_md_files:
56 | create_md_file_for_missing_function(func, module, md_directory)
57 | print(f"✗ {func}.md missing and created.")
58 |
59 | for func in function_names:
60 | if func in md_files:
61 | print(f"✓ {func}.md exists")
62 |
63 | if not missing_md_files:
64 | print("All functions have corresponding .md files.")
65 | print()
66 |
67 | if __name__ == "__main__":
68 | modules = {
69 | 'user': user,
70 | 'movie': movie,
71 | 'films': films,
72 | 'members': members,
73 | 'search': search,
74 | 'list': list
75 | }
76 | check_modules_for_missing_md(modules)
77 |
--------------------------------------------------------------------------------
/examples/export_user_data.py:
--------------------------------------------------------------------------------
1 | """
2 | Letterboxd User Data Exporter
3 |
4 | Exports comprehensive user data from Letterboxd profiles.
5 | - Export all user data (films, reviews, lists, followers, etc.)
6 | - Automatic JSON file generation
7 | - Organized directory structure
8 | - Progress tracking and timing
9 | """
10 |
11 | import time
12 | import sys
13 | import os
14 |
15 | from letterboxdpy import user
16 | from letterboxdpy.utils.utils_string import strip_prefix
17 | from letterboxdpy.utils.utils_terminal import get_input
18 | from letterboxdpy.utils.utils_file import build_path, check_and_create_dirs, save_json, build_click_url
19 |
20 | # -- MAIN --
21 |
22 | username = get_input('Enter username: ', index=1)
23 | user_instance = user.User(username)
24 |
25 | current_directory = os.getcwd()
26 |
27 | # Export directories
28 | EXAMPLES_DIR = build_path(current_directory, 'examples')
29 | EXPORTS_DIR = build_path(EXAMPLES_DIR, 'exports')
30 | USERS_FOLDER = build_path(EXPORTS_DIR, 'users')
31 | USER_FOLDER = build_path(USERS_FOLDER, user_instance.username)
32 | directories = [EXAMPLES_DIR, EXPORTS_DIR, USERS_FOLDER, USER_FOLDER]
33 | check_and_create_dirs(directories)
34 |
35 | start_time = time.time()
36 |
37 | # Save user instance data
38 | user_data_path = build_path(USER_FOLDER, 'user')
39 | save_json(user_data_path, user_instance.jsonify())
40 |
41 | # Export data for each method
42 | # If you want to add a new method, add it here
43 | # With arg: [user.User.user_watchlist, {'filters': {'genre': ['action', '-drama']}}],
44 | methods = [
45 | user.User.get_activity,
46 | user.User.get_activity_following,
47 | user.User.get_diary,
48 | user.User.get_wrapped,
49 | user.User.get_films,
50 | [user.User.get_films_by_rating, {'rating':5}],
51 | user.User.get_films_not_rated,
52 | user.User.get_genre_info,
53 | user.User.get_liked_films,
54 | user.User.get_liked_reviews,
55 | user.User.get_lists,
56 | user.User.get_following,
57 | user.User.get_followers,
58 | user.User.get_reviews,
59 | user.User.get_user_tags,
60 | user.User.get_watchlist_movies,
61 | user.User.get_watchlist,
62 | ]
63 | methods_str_length = len(str(len(methods)))
64 |
65 | print('\nExporting data...')
66 | for no, method in enumerate(methods, 1):
67 | method_start_time = time.time()
68 |
69 | args = {}
70 | if isinstance(method, list):
71 | method, args = method
72 |
73 | method_name = method.__name__
74 | method_name_without_prefix = strip_prefix(method_name)
75 |
76 | os.system(f'title [{len(methods)}/{no:0>{methods_str_length}}] Exporting {method_name}...')
77 | print(f'[{len(methods)}/{no:0>{methods_str_length}}]: Processing "{method_name}" method',
78 | end=f' with args: {args}...\r' if args else '...\r')
79 |
80 | data = method(user_instance, **args) if args else method(user_instance)
81 |
82 | file_path = build_path(USER_FOLDER, method_name_without_prefix)
83 | save_json(file_path, data)
84 |
85 | print(f'{time.time() - method_start_time:<7.2f} seconds - {method_name:<22} - {build_click_url(file_path)}.json')
86 |
87 | os.system('title Completed!')
88 | print('\nProcessing complete!')
89 | print(f'\tTotal time: {time.time() - start_time:.2f} seconds')
90 |
91 | print('\tAt', build_click_url(USER_FOLDER), end='\n\n')
92 | os.system('pause')
--------------------------------------------------------------------------------
/letterboxdpy/utils/movies_extractor.py:
--------------------------------------------------------------------------------
1 | """
2 | Movie extraction utilities for different Letterboxd page layouts.
3 |
4 | This module provides generic functions to extract movie data from various
5 | Letterboxd page types that display movies in different layouts.
6 | """
7 |
8 | def extract_movies_from_horizontal_list(dom, max_items=12*6) -> dict:
9 | """
10 | Extract movies from horizontal movie lists.
11 |
12 | Used in:
13 | - /films/popular/, /films/genre/action/, etc.
14 | - Film discovery pages
15 | - Similar movies sections
16 |
17 | Args:
18 | dom: BeautifulSoup DOM object
19 | max_items: Maximum number of items to extract
20 |
21 | Returns:
22 | dict: Movie data with film IDs as keys
23 | """
24 | items = dom.find_all("li")
25 |
26 | rating_key = "data-average-rating"
27 | movies = {}
28 | for item in items:
29 | if len(movies) >= max_items:
30 | break
31 |
32 | movie_rating = float(item[rating_key]) if rating_key in item.attrs else None
33 | movie_id = item.div['data-film-id']
34 | movie_slug = item.div['data-item-slug']
35 | movie_name = item.img['alt']
36 |
37 | movies[movie_id] = {
38 | "slug": movie_slug,
39 | "name": movie_name,
40 | "rating": movie_rating,
41 | 'url': f'https://letterboxd.com/film/{movie_slug}/'
42 | }
43 |
44 | return movies
45 |
46 |
47 | def extract_movies_from_vertical_list(dom, max_items=20*5) -> dict:
48 | """
49 | Extract movies from vertical movie lists.
50 |
51 | Used in:
52 | - User watchlists (/user/username/watchlist/)
53 | - User lists (/user/username/list/list-name/)
54 | - User films pages
55 | - Search results
56 |
57 | Args:
58 | dom: BeautifulSoup DOM object
59 | max_items: Maximum number of items to extract
60 |
61 | Returns:
62 | dict: Movie data with film IDs as keys
63 | """
64 | def get_movie_data(item):
65 | """Extract movie ID, slug, and name from container element."""
66 | from letterboxdpy.utils.utils_string import extract_year_from_movie_name, clean_movie_name
67 |
68 | react_component = item.find("div", {"class": "react-component"}) if item.name == "li" else item
69 | if not react_component or 'data-film-id' not in react_component.attrs:
70 | return None
71 |
72 | movie_id = react_component['data-film-id']
73 | movie_slug = react_component.get('data-item-slug') or react_component.get('data-film-slug')
74 | raw_name = react_component.get('data-item-name') or react_component.img['alt']
75 | movie_name = clean_movie_name(raw_name)
76 | year = extract_year_from_movie_name(raw_name)
77 |
78 | return movie_id, {
79 | "slug": movie_slug,
80 | "name": movie_name,
81 | "year": year,
82 | 'url': f'https://letterboxd.com/film/{movie_slug}/'
83 | }
84 |
85 | items = dom.find_all("li", {"class": "posteritem"}) or dom.find_all("li", {"class": "griditem"})
86 | movies = {}
87 | for item in items:
88 | if len(movies) >= max_items:
89 | break
90 |
91 | movie_data = get_movie_data(item)
92 | if movie_data:
93 | movie_id, data = movie_data
94 | movies[movie_id] = data
95 |
96 | return movies
97 |
--------------------------------------------------------------------------------
/docs/user/funcs/user_wrapped.md:
--------------------------------------------------------------------------------
1 | user_wrapped(user object)
2 |
3 | ```python
4 | from letterboxdpy import user
5 | user_instance = user.User("nmcassa")
6 | print(user.user_wrapped(user_instance, 2023))
7 | ```
8 |
9 |
10 | Click to expand the demo response for user_wrapped method or view the full response
11 |
12 | ```json
13 | {
14 | "year": 2023,
15 | "logged": 120,
16 | "total_review": 2,
17 | "hours_watched": 223,
18 | "total_runtime": 13427,
19 | "first_watched": {
20 | "332289592": {
21 | "name": "The Gift",
22 | "slug": "the-gift-2015-1",
23 | "id": "255927",
24 | "release": 2015,
25 | "runtime": 108,
26 | "actions": {
27 | "rewatched": false,
28 | "rating": 6,
29 | "liked": false,
30 | "reviewed": false
31 | },
32 | "date": {
33 | "year": 2023,
34 | "month": 1,
35 | "day": 1
36 | },
37 | "page": {
38 | "url": "https://letterboxd.com/nmcassa/films/diary/for/2023/page/3/",
39 | "no": 3
40 | }
41 | }
42 | },
43 | "last_watched": {
44 | "495592379": {...}
45 | },
46 | "movies": {
47 | "495592379": {
48 | "name": "Poor Things",
49 | "slug": "poor-things-2023",
50 | "id": "710352",
51 | "release": 2023,
52 | "runtime": 141,
53 | "actions": {
54 | "rewatched": false,
55 | "rating": 6,
56 | "liked": false,
57 | "reviewed": true
58 | },
59 | "date": {
60 | "year": 2023,
61 | "month": 12,
62 | "day": 26
63 | },
64 | "page": {
65 | "url": "https://letterboxd.com/nmcassa/films/diary/for/2023/page/1/",
66 | "no": 1
67 | }
68 | },...
69 | },
70 | "months": {
71 | "1": 21,
72 | "2": 7,
73 | "3": 7,
74 | "4": 6,
75 | "5": 11,
76 | "6": 9,
77 | "7": 15,
78 | "8": 11,
79 | "9": 5,
80 | "10": 9,
81 | "11": 7,
82 | "12": 12
83 | },
84 | "days": {
85 | "1": 18,
86 | "2": 14,
87 | "3": 9,
88 | "4": 17,
89 | "5": 14,
90 | "6": 27,
91 | "7": 21
92 | },
93 | "milestones": {
94 | "50": {
95 | "413604382": {
96 | "name": "Richard Pryor: Live in Concert",
97 | "slug": "richard-pryor-live-in-concert",
98 | "id": "37594",
99 | "release": 1979,
100 | "runtime": 78,
101 | "actions": {
102 | "rewatched": false,
103 | "rating": 7,
104 | "liked": false,
105 | "reviewed": false
106 | },
107 | "date": {
108 | "year": 2023,
109 | "month": 7,
110 | "day": 13
111 | },
112 | "page": {
113 | "url": "https://letterboxd.com/nmcassa/films/diary/for/2023/page/1/",
114 | "no": 1
115 | }
116 | }
117 | },
118 | "100": {
119 | "347318246": {...}
120 | }
121 | }
122 | }
123 | ```
124 |
--------------------------------------------------------------------------------
/letterboxdpy/utils/date_utils.py:
--------------------------------------------------------------------------------
1 | """Date utilities for consistent ISO 8601 format across letterboxdpy."""
2 |
3 | from datetime import datetime
4 |
5 |
6 | class InvalidDateFormatError(Exception):
7 | """Raised when date format is not recognized."""
8 | pass
9 |
10 |
11 | class DateUtils:
12 | """Centralized date utilities for consistent date handling."""
13 |
14 | ISO_FORMAT = "%Y-%m-%dT%H:%M:%S.%fZ"
15 | ISO_FORMAT_NO_MICROSECONDS = "%Y-%m-%dT%H:%M:%SZ"
16 |
17 | @staticmethod
18 | def parse_letterboxd_date(date_input) -> datetime | None:
19 | """Parse various date formats into datetime object."""
20 | if date_input is None:
21 | return None
22 | if isinstance(date_input, datetime):
23 | return date_input
24 | if isinstance(date_input, dict):
25 | return DateUtils._parse_date_dict(date_input)
26 | if isinstance(date_input, str):
27 | return DateUtils._parse_date_string(date_input)
28 | raise InvalidDateFormatError(f"Unsupported date format: {type(date_input)}")
29 |
30 | @staticmethod
31 | def _parse_date_dict(date_dict: dict) -> datetime:
32 | """Parse date dictionary format."""
33 | year = date_dict.get('year')
34 | month = date_dict.get('month')
35 | day = date_dict.get('day')
36 |
37 | if not all(isinstance(x, int) and x is not None for x in [year, month, day]):
38 | raise InvalidDateFormatError("Invalid date dictionary")
39 | if not (1 <= month <= 12) or not (1 <= day <= 31):
40 | raise InvalidDateFormatError("Invalid date values")
41 |
42 | return datetime(year, month, day)
43 |
44 | @staticmethod
45 | def _parse_date_string(date_string: str) -> datetime:
46 | """Parse ISO date string format."""
47 | try:
48 | return datetime.strptime(date_string, DateUtils.ISO_FORMAT)
49 | except ValueError:
50 | try:
51 | return datetime.strptime(date_string, DateUtils.ISO_FORMAT_NO_MICROSECONDS)
52 | except ValueError:
53 | raise InvalidDateFormatError(f"Invalid ISO date string: {date_string}")
54 |
55 | @staticmethod
56 | def format_to_iso(date_obj: datetime | None) -> str | None:
57 | """Format datetime object to ISO 8601 string."""
58 | if date_obj is None:
59 | return None
60 | if not isinstance(date_obj, datetime):
61 | raise InvalidDateFormatError(f"Expected datetime object, got {type(date_obj)}")
62 | return date_obj.strftime(DateUtils.ISO_FORMAT)
63 |
64 | @staticmethod
65 | def dict_to_iso(date_dict: dict) -> str:
66 | """Convert date dictionary to ISO format string."""
67 | date_obj = DateUtils._parse_date_dict(date_dict)
68 | return DateUtils.format_to_iso(date_obj)
69 |
70 | @staticmethod
71 | def iso_to_dict(iso_string: str) -> dict:
72 | """Convert ISO string to date dictionary."""
73 | date_obj = DateUtils._parse_date_string(iso_string)
74 | return {'year': date_obj.year, 'month': date_obj.month, 'day': date_obj.day}
75 |
76 | @staticmethod
77 | def to_iso(date_input) -> str | None:
78 | """Convert any date format to ISO string."""
79 | date_obj = DateUtils.parse_letterboxd_date(date_input)
80 | return DateUtils.format_to_iso(date_obj)
81 |
82 |
83 | # Backward compatibility functions
84 | def parse_activity_datetime(date_string: str) -> datetime:
85 | """Parse datetime string (backward compatibility)."""
86 | return DateUtils._parse_date_string(date_string)
87 |
88 |
89 | def build_time_data(date_obj: datetime) -> str:
90 | """Build ISO timestamp string (backward compatibility)."""
91 | return DateUtils.format_to_iso(date_obj)
--------------------------------------------------------------------------------
/examples/exports/users/nmcassa/followers.json:
--------------------------------------------------------------------------------
1 | {
2 | "ramenfeedgg": {
3 | "username": "ramenfeedgg",
4 | "name": "ramenfeedgg",
5 | "url": "https://letterboxd.com/ramenfeedgg",
6 | "avatar": {
7 | "exists": false,
8 | "upscaled": false,
9 | "url": "https://s.ltrbxd.com/static/img/avatar80-CTtJ8HSs.png"
10 | },
11 | "followers": 5,
12 | "following": 6,
13 | "watched": 171,
14 | "lists": 0,
15 | "likes": 19
16 | },
17 | "ben24wold": {
18 | "username": "ben24wold",
19 | "name": "Ben Wold",
20 | "url": "https://letterboxd.com/ben24wold",
21 | "avatar": {
22 | "exists": true,
23 | "upscaled": true,
24 | "url": "https://a.ltrbxd.com/resized/avatar/twitter/1/7/9/3/7/5/9/shard/http___pbs.twimg.com_profile_images_1258867765965963269_SckczIvD-0-1000-0-1000-crop.jpg"
25 | },
26 | "followers": 37,
27 | "following": 15,
28 | "watched": 662,
29 | "lists": 37,
30 | "likes": 467
31 | },
32 | "ppark": {
33 | "username": "ppark",
34 | "name": "ppark",
35 | "url": "https://letterboxd.com/ppark",
36 | "avatar": {
37 | "exists": true,
38 | "upscaled": true,
39 | "url": "https://a.ltrbxd.com/resized/avatar/upload/5/9/0/5/5/1/0/shard/avtr-0-1000-0-1000-crop.jpg"
40 | },
41 | "followers": 7,
42 | "following": 6,
43 | "watched": 876,
44 | "lists": 1,
45 | "likes": 372
46 | },
47 | "ryanshubert": {
48 | "username": "ryanshubert",
49 | "name": "ryanshubert",
50 | "url": "https://letterboxd.com/ryanshubert",
51 | "avatar": {
52 | "exists": true,
53 | "upscaled": true,
54 | "url": "https://a.ltrbxd.com/resized/avatar/upload/2/3/6/9/1/6/5/shard/avtr-0-1000-0-1000-crop.jpg"
55 | },
56 | "followers": 22,
57 | "following": 32,
58 | "watched": 1,
59 | "lists": 7,
60 | "likes": 791
61 | },
62 | "crescendohouse": {
63 | "username": "crescendohouse",
64 | "name": "Crescendo House",
65 | "url": "https://letterboxd.com/crescendohouse",
66 | "avatar": {
67 | "exists": true,
68 | "upscaled": true,
69 | "url": "https://a.ltrbxd.com/resized/avatar/twitter/4/7/4/8/0/8/9/shard/http___pbs.twimg.com_profile_images_1373370791618830336_1qVBFR8N-0-1000-0-1000-crop.jpg"
70 | },
71 | "followers": 322,
72 | "following": 859,
73 | "watched": 5,
74 | "lists": 1,
75 | "likes": 142
76 | },
77 | "brendonyu668": {
78 | "username": "brendonyu668",
79 | "name": "Brendonyu668",
80 | "url": "https://letterboxd.com/brendonyu668",
81 | "avatar": {
82 | "exists": true,
83 | "upscaled": true,
84 | "url": "https://a.ltrbxd.com/resized/avatar/upload/2/3/4/2/2/3/2/shard/avtr-0-1000-0-1000-crop.jpg"
85 | },
86 | "followers": 36,
87 | "following": 206,
88 | "watched": 1,
89 | "lists": 130,
90 | "likes": 67
91 | },
92 | "pdrew1211": {
93 | "username": "pdrew1211",
94 | "name": "Parker Bobbitt",
95 | "url": "https://letterboxd.com/pdrew1211",
96 | "avatar": {
97 | "exists": true,
98 | "upscaled": true,
99 | "url": "https://a.ltrbxd.com/resized/avatar/upload/1/3/2/0/6/2/8/shard/avtr-0-1000-0-1000-crop.jpg"
100 | },
101 | "followers": 777,
102 | "following": 2,
103 | "watched": 1,
104 | "lists": 27,
105 | "likes": 58
106 | },
107 | "jordynhf": {
108 | "username": "jordynhf",
109 | "name": "jordynhf",
110 | "url": "https://letterboxd.com/jordynhf",
111 | "avatar": {
112 | "exists": true,
113 | "upscaled": true,
114 | "url": "https://a.ltrbxd.com/resized/avatar/upload/2/2/9/0/1/4/8/shard/avtr-0-1000-0-1000-crop.jpg"
115 | },
116 | "followers": 22,
117 | "following": 23,
118 | "watched": 852,
119 | "lists": 2,
120 | "likes": 201
121 | }
122 | }
--------------------------------------------------------------------------------
/examples/exports/users/nmcassa/following.json:
--------------------------------------------------------------------------------
1 | {
2 | "ramenfeedgg": {
3 | "username": "ramenfeedgg",
4 | "name": "ramenfeedgg",
5 | "url": "https://letterboxd.com/ramenfeedgg",
6 | "avatar": {
7 | "exists": false,
8 | "upscaled": false,
9 | "url": "https://s.ltrbxd.com/static/img/avatar80-CTtJ8HSs.png"
10 | },
11 | "followers": 5,
12 | "following": 6,
13 | "watched": 171,
14 | "lists": 0,
15 | "likes": 19
16 | },
17 | "ben24wold": {
18 | "username": "ben24wold",
19 | "name": "Ben Wold",
20 | "url": "https://letterboxd.com/ben24wold",
21 | "avatar": {
22 | "exists": true,
23 | "upscaled": true,
24 | "url": "https://a.ltrbxd.com/resized/avatar/twitter/1/7/9/3/7/5/9/shard/http___pbs.twimg.com_profile_images_1258867765965963269_SckczIvD-0-1000-0-1000-crop.jpg"
25 | },
26 | "followers": 37,
27 | "following": 15,
28 | "watched": 662,
29 | "lists": 37,
30 | "likes": 467
31 | },
32 | "ppark": {
33 | "username": "ppark",
34 | "name": "ppark",
35 | "url": "https://letterboxd.com/ppark",
36 | "avatar": {
37 | "exists": true,
38 | "upscaled": true,
39 | "url": "https://a.ltrbxd.com/resized/avatar/upload/5/9/0/5/5/1/0/shard/avtr-0-1000-0-1000-crop.jpg"
40 | },
41 | "followers": 7,
42 | "following": 6,
43 | "watched": 876,
44 | "lists": 1,
45 | "likes": 372
46 | },
47 | "joacogarcia2023": {
48 | "username": "joacogarcia2023",
49 | "name": "joacogarcia2023",
50 | "url": "https://letterboxd.com/joacogarcia2023",
51 | "avatar": {
52 | "exists": true,
53 | "upscaled": true,
54 | "url": "https://a.ltrbxd.com/resized/avatar/upload/1/0/1/4/0/6/6/7/shard/avtr-0-1000-0-1000-crop.jpg"
55 | },
56 | "followers": 14,
57 | "following": 6,
58 | "watched": 198,
59 | "lists": 0,
60 | "likes": 1
61 | },
62 | "ryanshubert": {
63 | "username": "ryanshubert",
64 | "name": "ryanshubert",
65 | "url": "https://letterboxd.com/ryanshubert",
66 | "avatar": {
67 | "exists": true,
68 | "upscaled": true,
69 | "url": "https://a.ltrbxd.com/resized/avatar/upload/2/3/6/9/1/6/5/shard/avtr-0-1000-0-1000-crop.jpg"
70 | },
71 | "followers": 22,
72 | "following": 32,
73 | "watched": 1,
74 | "lists": 7,
75 | "likes": 791
76 | },
77 | "connoreatspants": {
78 | "username": "connoreatspants",
79 | "name": "ConnorEatsPants",
80 | "url": "https://letterboxd.com/connoreatspants",
81 | "avatar": {
82 | "exists": true,
83 | "upscaled": true,
84 | "url": "https://a.ltrbxd.com/resized/avatar/upload/3/6/8/9/5/9/8/shard/avtr-0-1000-0-1000-crop.jpg"
85 | },
86 | "followers": 44,
87 | "following": 21,
88 | "watched": 162,
89 | "lists": 0,
90 | "likes": 116
91 | },
92 | "kurstboy": {
93 | "username": "kurstboy",
94 | "name": "Karsten",
95 | "url": "https://letterboxd.com/kurstboy",
96 | "avatar": {
97 | "exists": true,
98 | "upscaled": true,
99 | "url": "https://a.ltrbxd.com/resized/avatar/twitter/4/9/0/4/5/7/shard/http___pbs.twimg.com_profile_images_1001935353740177414_9ZQ0Noe4-0-1000-0-1000-crop.jpg"
100 | },
101 | "followers": 225,
102 | "following": 187,
103 | "watched": 2,
104 | "lists": 60,
105 | "likes": 3
106 | },
107 | "jordynhf": {
108 | "username": "jordynhf",
109 | "name": "jordynhf",
110 | "url": "https://letterboxd.com/jordynhf",
111 | "avatar": {
112 | "exists": true,
113 | "upscaled": true,
114 | "url": "https://a.ltrbxd.com/resized/avatar/upload/2/2/9/0/1/4/8/shard/avtr-0-1000-0-1000-crop.jpg"
115 | },
116 | "followers": 22,
117 | "following": 23,
118 | "watched": 852,
119 | "lists": 2,
120 | "likes": 201
121 | }
122 | }
--------------------------------------------------------------------------------
/letterboxdpy/list.py:
--------------------------------------------------------------------------------
1 | if __loader__.name == '__main__':
2 | import sys
3 | sys.path.append(sys.path[0] + '/..')
4 |
5 | import re
6 | from json import (
7 | dumps as json_dumps,
8 | loads as json_loads
9 | )
10 |
11 | from letterboxdpy.core.encoder import SecretsEncoder
12 | from letterboxdpy.pages import user_list
13 | from letterboxdpy.pages.user_list import ListMetaData
14 |
15 |
16 | class List:
17 |
18 | class ListPages:
19 |
20 | def __init__(self, username: str, slug: str) -> None:
21 | self.list = user_list.UserList(username, slug)
22 |
23 | def __init__(self, username: str, slug: str = None) -> None:
24 | assert re.match("^[A-Za-z0-9_]+$", username), "Invalid author"
25 |
26 | self.username = username.lower()
27 | self.slug = slug
28 | self.pages = self.ListPages(self.username, self.slug)
29 |
30 | self._movies = None
31 |
32 | self.url = self.get_url()
33 | self.title = self.get_title()
34 | self.author = self.get_author()
35 | self.description = self.get_description()
36 | self.date_created = self.get_date_created()
37 | self.date_updated = self.get_date_updated()
38 | self.tags = self.get_tags()
39 | self.count = self.get_count()
40 | self.list_id = self.get_list_id()
41 |
42 | # Properties
43 | @property
44 | def movies(self) -> dict:
45 | if self._movies is None:
46 | self._movies = self.get_movies()
47 | return self._movies
48 |
49 | # Magic Methods
50 | def __len__(self) -> int:
51 | return self.count
52 |
53 | def __getattr__(self, name):
54 | if not object.__getattribute__(self, name):
55 | raise AttributeError(f"'{self.__class__.__name__}' object has no attribute '{name}'")
56 |
57 | method = object.__getattribute__(self, name)
58 | if callable(method):
59 | return method
60 | else:
61 | raise TypeError(f"'{self.__class__.__name__}' object attribute '{name}' is not callable")
62 |
63 | def __getitem__(self, key: str):
64 | try:
65 | return object.__getattribute__(self, key)
66 | except AttributeError:
67 | raise KeyError(f"'{self.__class__.__name__}' object has no key '{key}'")
68 |
69 | def __str__(self) -> str:
70 | return json_dumps(self, indent=2, cls=SecretsEncoder, secrets=['pages'])
71 |
72 | def jsonify(self) -> dict:
73 | return json_loads(self.__str__())
74 |
75 | # Data Retrieval Methods
76 | def get_url(self) -> str: return self.pages.list.url
77 | def get_title(self) -> str: return self.pages.list.get_title()
78 | def get_author(self) -> str: return self.pages.list.get_author()
79 | def get_description(self) -> str: return self.pages.list.get_description()
80 | def get_date_created(self) -> list: return self.pages.list.get_date_created()
81 | def get_date_updated(self) -> list: return self.pages.list.get_date_updated()
82 | def get_tags(self) -> list: return self.pages.list.get_tags()
83 | def get_movies(self) -> dict: return self.pages.list.get_movies()
84 | def get_count(self) -> int: return self.pages.list.get_count()
85 | def get_list_id(self) -> str: return self.pages.list.get_list_id()
86 | def get_list_meta(self, url: str) -> ListMetaData: return self.pages.list.get_list_meta(url)
87 |
88 | if __name__ == "__main__":
89 | # user list usage:
90 | list_instance = List("nmcassa", "def-con-movie-list")
91 | movies = list_instance.movies
92 | assert len(movies) == list_instance.count, "Count mismatch"
93 |
94 | print(list_instance)
95 | print('url:', list_instance.url)
96 | print('title:', list_instance.title)
97 | print('author:', list_instance.author)
98 | print('description:', list_instance.description)
99 | print('created:', list_instance.date_created)
100 | print('updated:', list_instance.date_updated)
101 | print('tags:', list_instance.tags)
102 | print('count:', list_instance.count)
103 | print('list_id:', list_instance.list_id)
104 | print('movies:', movies)
--------------------------------------------------------------------------------
/examples/exports/users/nmcassa/user.json:
--------------------------------------------------------------------------------
1 | {
2 | "username": "nmcassa",
3 | "url": "https://letterboxd.com/nmcassa",
4 | "id": 1500306,
5 | "is_hq": false,
6 | "display_name": "nmcassa",
7 | "bio": null,
8 | "location": null,
9 | "website": null,
10 | "watchlist_length": 78,
11 | "stats": {
12 | "films": 677,
13 | "this_year": 66,
14 | "lists": 2,
15 | "following": 8,
16 | "followers": 8
17 | },
18 | "favorites": {
19 | "51794": {
20 | "slug": "the-king-of-comedy",
21 | "name": "The King of Comedy",
22 | "url": "https://letterboxd.com/film/the-king-of-comedy/",
23 | "year": 1982,
24 | "log_url": "https://letterboxd.com/nmcassa/film/the-king-of-comedy/activity/"
25 | },
26 | "51529": {
27 | "slug": "the-conversation",
28 | "name": "The Conversation",
29 | "url": "https://letterboxd.com/film/the-conversation/",
30 | "year": 1974,
31 | "log_url": "https://letterboxd.com/nmcassa/film/the-conversation/"
32 | },
33 | "51090": {
34 | "slug": "rocky",
35 | "name": "Rocky",
36 | "url": "https://letterboxd.com/film/rocky/",
37 | "year": 1976,
38 | "log_url": "https://letterboxd.com/nmcassa/film/rocky/"
39 | },
40 | "46911": {
41 | "slug": "children-of-men",
42 | "name": "Children of Men",
43 | "url": "https://letterboxd.com/film/children-of-men/",
44 | "year": 2006,
45 | "log_url": "https://letterboxd.com/nmcassa/film/children-of-men/"
46 | }
47 | },
48 | "avatar": {
49 | "exists": true,
50 | "upscaled": true,
51 | "url": "https://a.ltrbxd.com/resized/avatar/upload/1/5/0/0/3/0/6/shard/avtr-0-1000-0-1000-crop.jpg"
52 | },
53 | "recent": {
54 | "watchlist": {
55 | "45577": {
56 | "id": "45577",
57 | "slug": "human-traffic",
58 | "name": "Human Traffic",
59 | "year": 1999
60 | },
61 | "19921": {
62 | "id": "19921",
63 | "slug": "the-fighter-2010",
64 | "name": "The Fighter",
65 | "year": 2010
66 | },
67 | "46431": {
68 | "id": "46431",
69 | "slug": "rounders",
70 | "name": "Rounders",
71 | "year": 1998
72 | },
73 | "45224": {
74 | "id": "45224",
75 | "slug": "thief",
76 | "name": "Thief",
77 | "year": 1981
78 | },
79 | "32345": {
80 | "id": "32345",
81 | "slug": "taste-of-cherry",
82 | "name": "Taste of Cherry",
83 | "year": 1997
84 | }
85 | },
86 | "diary": {
87 | "months": {
88 | "9": {
89 | "12": [
90 | {
91 | "name": "Toy Story",
92 | "slug": "toy-story"
93 | },
94 | {
95 | "name": "Cars",
96 | "slug": "cars"
97 | }
98 | ],
99 | "6": [
100 | {
101 | "name": "Full Metal Jacket",
102 | "slug": "full-metal-jacket"
103 | },
104 | {
105 | "name": "Up",
106 | "slug": "up"
107 | }
108 | ]
109 | },
110 | "8": {
111 | "30": [
112 | {
113 | "name": "WALL\u00b7E",
114 | "slug": "walle"
115 | }
116 | ],
117 | "25": [
118 | {
119 | "name": "F1",
120 | "slug": "f1"
121 | }
122 | ],
123 | "24": [
124 | {
125 | "name": "Caught Stealing",
126 | "slug": "caught-stealing"
127 | }
128 | ],
129 | "23": [
130 | {
131 | "name": "Zodiac",
132 | "slug": "zodiac"
133 | }
134 | ],
135 | "18": [
136 | {
137 | "name": "Superman",
138 | "slug": "superman-2025"
139 | }
140 | ],
141 | "16": [
142 | {
143 | "name": "The Game",
144 | "slug": "the-game"
145 | }
146 | ]
147 | }
148 | }
149 | }
150 | }
151 | }
--------------------------------------------------------------------------------
/letterboxdpy/core/scraper.py:
--------------------------------------------------------------------------------
1 | if __name__ == '__main__':
2 | import sys
3 | sys.path.append(sys.path[0] + '/..')
4 |
5 | from json import dumps as json_dumps
6 | from bs4 import BeautifulSoup
7 | import requests
8 |
9 | from letterboxdpy.constants.project import DOMAIN
10 | from letterboxdpy.core.exceptions import (
11 | PageLoadError,
12 | InvalidResponseError,
13 | PrivateRouteError
14 | )
15 |
16 | class Scraper:
17 | """A class for scraping and parsing web pages."""
18 |
19 | headers = {
20 | "referer": DOMAIN,
21 | "user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64)"
22 | }
23 | builder = "lxml"
24 |
25 | def __init__(self, domain: str = headers['referer'], user_agent: str = headers["user-agent"]):
26 | """Initialize the scraper with the specified domain and user-agent."""
27 | self.headers = {
28 | "referer": domain,
29 | "user-agent": user_agent
30 | }
31 |
32 | @classmethod
33 | def get_page(cls, url: str) -> BeautifulSoup:
34 | """Fetch, check, and parse the HTML content from the specified URL."""
35 | response = cls._fetch(url)
36 | cls._check_for_errors(url, response)
37 | return cls._parse_html(response)
38 |
39 | @classmethod
40 | def _fetch(cls, url: str) -> requests.Response:
41 | """Fetch the HTML content from the specified URL."""
42 | try:
43 | return requests.get(url, headers=cls.headers)
44 | except requests.RequestException as e:
45 | raise PageLoadError(url, str(e))
46 |
47 | @classmethod
48 | def _check_for_errors(cls, url: str, response: requests.Response) -> None:
49 | """Check the response for errors and raise an exception if found."""
50 | if response.status_code != 200:
51 | error_message = cls._get_error_message(response)
52 | formatted_error_messagge = cls._format_error(url, response, error_message)
53 | if response.status_code == 403:
54 | raise PrivateRouteError(formatted_error_messagge)
55 | raise InvalidResponseError(formatted_error_messagge)
56 |
57 | @classmethod
58 | def _get_error_message(cls, response: requests.Response) -> str:
59 | """Extract the error message from the response, if available."""
60 | dom = BeautifulSoup(response.text, cls.builder)
61 | message_section = dom.find("section", {"class": "message"})
62 | return message_section.strong.text if message_section else "Unknown error occurred"
63 |
64 | @classmethod
65 | def _format_error(cls, url: str, response: requests.Response, message: str) -> str:
66 | """Format the error message for logging or raising exceptions."""
67 | return json_dumps({
68 | 'code': response.status_code,
69 | 'reason': str(response.reason),
70 | 'url': url,
71 | 'message': message
72 | }, indent=2)
73 |
74 | @classmethod
75 | def _parse_html(cls, response: requests.Response) -> BeautifulSoup:
76 | """Parse the HTML content from the response."""
77 | try:
78 | return BeautifulSoup(response.text, cls.builder)
79 | except Exception as e:
80 | raise Exception(f"Error parsing response: {e}")
81 |
82 | def parse_url(url: str) -> BeautifulSoup:
83 | """Fetch and parse the HTML content from the specified URL using the Scraper class."""
84 | return Scraper.get_page(url)
85 |
86 | def url_encode(query: str, safe: str = '') -> str:
87 | """URL encode the given query."""
88 | return requests.utils.quote(query, safe=safe)
89 |
90 | if __name__ == "__main__":
91 | sys.stdout.reconfigure(encoding='utf-8')
92 |
93 | input_domain = ''
94 | while not len(input_domain.strip()):
95 | input_domain = input('Enter url: ')
96 |
97 | print(f"Parsing {input_domain}...")
98 |
99 | parsed_dom_class_method = parse_url(input_domain)
100 | print(f"Title (using class method): {parsed_dom_class_method.title.string}")
101 |
102 | input("Click Enter to see the DOM...")
103 | print(f"HTML: {parsed_dom_class_method.prettify()}")
104 | print("*" * 20 + "\nDone!")
--------------------------------------------------------------------------------
/letterboxdpy/pages/user_reviews.py:
--------------------------------------------------------------------------------
1 | from letterboxdpy.core.scraper import parse_url
2 | from letterboxdpy.utils.utils_parser import parse_review_date, parse_review_text
3 | from letterboxdpy.constants.project import DOMAIN
4 |
5 |
6 | class UserReviews:
7 |
8 | def __init__(self, username: str) -> None:
9 | self.username = username
10 | self.url = f"{DOMAIN}/{self.username}/films/reviews"
11 |
12 | def get_reviews(self): return extract_user_reviews(self.url)
13 |
14 | def extract_user_reviews(url: str) -> dict:
15 | '''
16 | Returns a dictionary containing user reviews. The keys are unique log IDs,
17 | and each value is a dictionary with details about the review,
18 | including movie information, review type, rating, review content, date, etc.
19 | '''
20 | LOGS_PER_PAGE = 12
21 |
22 | page = 0
23 | data = {'reviews': {}}
24 | while True:
25 | page += 1
26 | dom = parse_url(f"{url}/page/{page}/")
27 |
28 | container = dom.find("div", {"class": ["viewing-list"]})
29 |
30 | if not container:
31 | # No container (div.viewing-list) found in the page.
32 | ...
33 |
34 | logs = container.find_all("article")
35 |
36 | if not logs:
37 | # No item (article) found in container.
38 | ...
39 |
40 | for log in logs:
41 | # Handle react structure
42 | react_component = log.parent.find("div", {"class": "react-component"}) or log.parent.div
43 |
44 | movie_name = log.a.text
45 | slug = react_component.get('data-item-slug') or react_component.get('data-film-slug')
46 | movie_id = react_component['data-film-id']
47 | # str ^^^--- movie_id: unique id of the movie.
48 | # Find release year in spans
49 | release = None
50 | spans = log.find_all('span')
51 | for span in spans:
52 | if span.text and span.text.strip().isdigit() and len(span.text.strip()) == 4:
53 | release = int(span.text.strip())
54 | break
55 | movie_link = f"{DOMAIN}/film/{slug}/"
56 | log_id = log['data-object-id'].split(':')[-1]
57 | # str ^^^--- log_id: unique id of the review.
58 | log_link = DOMAIN + log.a['href']
59 | log_no = log_link.split(slug)[-1]
60 | log_no = int(log_no.replace('/', '')) if log_no.count('/') == 2 else 0
61 | # int ^^^--- log_no: there can be multiple reviews for a movie.
62 | # counting starts from zero.
63 | # example for first review: /username/film/movie_name/
64 | # example for first review: /username/film/movie_name/0/
65 | # example for second review: /username/film/movie_name/1/
66 | # the number is specified at the end of the url ---^
67 | rating = log.find("span", {"class": ["rating"], })
68 | rating = int(rating['class'][-1].split('-')[-1]) if rating else None
69 | # int ^^^--- rating: the numerical value of the rating given in the review (1-10)
70 | review, spoiler = parse_review_text(log)
71 | # str ^^^--- review: the text content of the review.
72 | # spoiler warning is checked to include or exclude the first paragraph.
73 | date = log.find("span", {"class": ["date"], })
74 | log_type = date.find_previous_sibling().text.strip()
75 | # str ^^^--- log_type: Types of logs, such as:
76 | # 'Rewatched': (in diary) review, watched and rewatched
77 | # 'Watched': (in diary) review and watched
78 | # 'Added': (not in diary) review
79 | date = parse_review_date(log_type, date)
80 | # dict ^^^--- date: the date of the review.
81 | # example: {'year': 2024, 'month': 1, 'day': 1}
82 |
83 | data['reviews'][log_id] = {
84 | # static
85 | 'movie': {
86 | 'name': movie_name,
87 | 'slug': slug,
88 | 'id': movie_id,
89 | 'release': release,
90 | 'link': movie_link,
91 | },
92 | # dynamic
93 | 'type': log_type,
94 | 'no': log_no,
95 | 'link': log_link,
96 | 'rating': rating,
97 | 'review': {
98 | 'content': review,
99 | 'spoiler': spoiler
100 | },
101 | 'date': date,
102 | 'page': page,
103 | }
104 |
105 | if len(logs) < LOGS_PER_PAGE:
106 | data['count'] = len(data['reviews'])
107 | data['last_page'] = page
108 | break
109 |
110 | return data
--------------------------------------------------------------------------------
/letterboxdpy/utils/lists_extractor.py:
--------------------------------------------------------------------------------
1 | """
2 | Shared list extraction utilities for Letterboxd list pages.
3 | This module provides common functionality for extracting list data
4 | from user lists, movie lists, and individual list pages.
5 | """
6 |
7 | from letterboxdpy.utils.utils_parser import extract_and_convert_shorthand, extract_numeric_text
8 | from letterboxdpy.core.scraper import parse_url
9 | from letterboxdpy.constants.project import DOMAIN
10 |
11 |
12 | class ListsExtractor:
13 | """Common lists collection extraction functionality.
14 |
15 | Extracts collections of lists from Letterboxd pages:
16 | - User's created lists
17 | - Lists containing a specific movie
18 | - Popular lists containing a movie
19 | """
20 |
21 | # Shared selectors for all list types
22 | SELECTORS = {
23 | 'list_set': ('section', {'class': 'list-set'}),
24 | 'lists': ('section', {'class': 'list'}),
25 | 'title': ('h2', {'class': 'title'}),
26 | 'description': ('div', {'class': 'body-text'}),
27 | 'value': ('span', {'class': 'value'}),
28 | 'likes': ('a', {'class': 'icon-like'}),
29 | 'comments': ('a', {'class': 'icon-comment'}),
30 | }
31 |
32 | LISTS_PER_PAGE = 12
33 |
34 | @classmethod
35 | def from_url(cls, base_url: str, max_lists: int = None) -> dict:
36 | """
37 | Extract lists collection from URL.
38 |
39 | Args:
40 | base_url: Base URL without page parameter
41 | max_lists: Maximum number of lists to return (optional limit)
42 |
43 | Returns:
44 | dict: Contains 'lists', 'count', 'last_page'
45 | """
46 | data = {'limit': max_lists, 'count': 0, 'last_page': 1, 'lists': {}}
47 | page = 1
48 |
49 | while True:
50 | lists = cls._fetch_page_data(base_url, page)
51 |
52 | if not lists:
53 | break
54 |
55 | for item in lists:
56 | list_data = cls._extract_list_data(item)
57 | data['lists'] |= list_data
58 |
59 | if max_lists and len(data['lists']) >= max_lists:
60 | # Limit reached
61 | data['limit'] = True
62 | break
63 |
64 | if data['limit'] or len(lists) < cls.LISTS_PER_PAGE:
65 | # Is last page or limit reached
66 | break
67 |
68 | page += 1
69 |
70 | data['count'] = len(data['lists'])
71 | data['last_page'] = page
72 |
73 | return data
74 |
75 | @classmethod
76 | def _fetch_page_data(cls, base_url: str, page: int):
77 | """Fetch and parse page data."""
78 | dom = parse_url(f'{base_url}/page/{page}')
79 | return dom.find_all('article', {'class': 'list-summary'})
80 |
81 | @classmethod
82 | def _extract_list_data(cls, item) -> dict:
83 | """Extract data from a list item."""
84 |
85 | def get_id() -> str:
86 | return item['data-film-list-id']
87 |
88 | def get_title() -> str:
89 | title_elem = item.find('h2', {'class': 'name'})
90 | return title_elem.text.strip()
91 |
92 | def get_description() -> str:
93 | description = item.find('div', {'class': ['notes', 'body-text']})
94 | if description:
95 | paragraphs = description.find_all('p')
96 | return '\n'.join([p.text for p in paragraphs])
97 | return ""
98 |
99 | def get_url() -> str:
100 | title_elem = item.find('h2', {'class': 'name'})
101 | return DOMAIN + title_elem.a['href']
102 |
103 | def get_slug() -> str:
104 | return get_url().split('/')[-2]
105 |
106 | def get_count() -> int:
107 | value_elem = item.find(*cls.SELECTORS['value'])
108 | if value_elem:
109 | count = extract_numeric_text(value_elem.text)
110 | return count if count is not None else 0
111 |
112 | def get_likes() -> int:
113 | likes = item.find(*cls.SELECTORS['likes'])
114 | if likes:
115 | likes = extract_and_convert_shorthand(likes)
116 | return likes
117 | return 0
118 |
119 | def get_comments() -> int:
120 | comments = item.find(*cls.SELECTORS['comments'])
121 | if comments:
122 | return extract_and_convert_shorthand(comments)
123 | return 0
124 |
125 | return {
126 | get_id(): {
127 | 'title': get_title(),
128 | 'slug': get_slug(),
129 | 'description': get_description(),
130 | 'url': get_url(),
131 | 'count': get_count(),
132 | 'likes': get_likes(),
133 | 'comments': get_comments()
134 | }
135 | }
136 |
137 |
138 |
--------------------------------------------------------------------------------
/examples/export_user_diary_posters.py:
--------------------------------------------------------------------------------
1 | """
2 | Letterboxd Diary Poster Downloader
3 |
4 | Downloads movie posters from user's diary entries.
5 | - Extract poster URLs from diary entries
6 | - Download and organize posters by year
7 | - Automatic directory structure creation
8 | - Skip existing files with size checking
9 | """
10 |
11 | import requests
12 | import sys
13 | import os
14 |
15 | from letterboxdpy import user
16 | from letterboxdpy.core.scraper import parse_url
17 | from letterboxdpy.utils.utils_terminal import get_input, args_exists
18 |
19 |
20 | class Settings:
21 | def __init__(self, foldering=True, size_check=False):
22 | self.foldering = foldering # Create folders for each day
23 | self.size_check = size_check # Check if file size already exists
24 |
25 | class Path:
26 | @staticmethod
27 | def check_path(*paths):
28 | for path in paths:
29 | if not os.path.exists(path):
30 | os.mkdir(path)
31 | print('Directory created:', path)
32 | else:
33 | print('Directory found:', path)
34 |
35 | @staticmethod
36 | def save(file_path, data):
37 | with open(file_path, 'wb') as f:
38 | f.write(data)
39 |
40 | class App:
41 | EXPORTS_DIR = "exports"
42 | EXPORTS_USERS_DIR = os.path.join(EXPORTS_DIR, "users")
43 |
44 | def __init__(self, username):
45 | self.username = username.lower()
46 | self.USER_FOLDER = os.path.join(self.EXPORTS_USERS_DIR, self.username)
47 | self.USER_POSTERS_DIR = os.path.join(self.USER_FOLDER, "posters")
48 |
49 | self.user = user.User(self.username)
50 | self.data = self.user.get_diary()
51 | self.config = Settings()
52 |
53 | self.foldering = self.config.foldering
54 | self.size_check = self.config.size_check
55 |
56 | def get_poster_url(self, slug):
57 | poster_ajax = f"https://letterboxd.com/ajax/poster/film/{slug}/std/500x750/"
58 | poster_page = parse_url(poster_ajax)
59 | return poster_page.img['srcset'].split('?')[0]
60 |
61 | def run(self):
62 | count = self.data['count']
63 | entries = self.data['entries']
64 | already_start = 0
65 |
66 | if not count:
67 | print('No entries found')
68 | return
69 |
70 | print(f'Processing {count} entries..')
71 |
72 | Path.check_path(
73 | self.EXPORTS_DIR,
74 | self.EXPORTS_USERS_DIR,
75 | self.USER_FOLDER,
76 | self.USER_POSTERS_DIR
77 | )
78 |
79 | if self.foldering:
80 | years_dir = os.path.join(self.USER_POSTERS_DIR, 'years')
81 | Path.check_path(years_dir)
82 | previous_year = None
83 |
84 | for v in entries.values():
85 | date = v["date"]
86 |
87 | file_date = "-".join(map(str, date.values()))
88 | file_dated_name = f"{file_date}_{v['slug']}.jpg"
89 |
90 | if self.foldering:
91 | current_year = str(date['year'])
92 | year_dir = os.path.join(years_dir, current_year)
93 | if previous_year != current_year:
94 | previous_year = current_year
95 | Path.check_path(year_dir)
96 | file_path = os.path.join(year_dir, file_dated_name)
97 | else:
98 | file_path = os.path.join(self.USER_POSTERS_DIR, file_dated_name)
99 |
100 | if os.path.exists(file_path):
101 | if not self.size_check:
102 | if not already_start:
103 | already_start = count
104 | count -= 1
105 | continue
106 |
107 | print(f'{count} - Poster file already exists, checking size..')
108 |
109 | if (already_start - count) > 1:
110 | print(f'Have already processed {already_start - count} entries, skipping {count}..')
111 | already_start = 0
112 |
113 | poster_url = self.get_poster_url(v['slug'])
114 | response = requests.get(poster_url)
115 |
116 | if os.path.exists(file_path):
117 | if int(os.stat(file_path).st_size) == int(response.headers['Content-Length']):
118 | print(f'{count} - File already exists and has same size as new file, skipping..')
119 | count -= 1
120 | continue
121 | print(f'Rewriting {file_path}..')
122 |
123 | Path.save(file_path, response.content)
124 | print(f'{count} - Wrote {file_path}')
125 | count -= 1
126 |
127 | print('Processing complete!')
128 | click_url = 'file:///' + os.path.join(os.getcwd(), self.USER_POSTERS_DIR).replace("\\", "/")
129 | print('At', click_url)
130 |
131 |
132 | if __name__ == '__main__':
133 | if not args_exists():
134 | print(f'Quick usage: python {sys.argv[0]} ')
135 |
136 | username = get_input('Enter username: ', index=1)
137 | app = App(username)
138 | app.run()
--------------------------------------------------------------------------------
/letterboxdpy/pages/user_watchlist.py:
--------------------------------------------------------------------------------
1 | from letterboxdpy.core.scraper import parse_url
2 | from letterboxdpy.constants.project import DOMAIN
3 | from letterboxdpy.pages.user_list import extract_movies
4 |
5 | class UserWatchlist:
6 | FILMS_PER_PAGE = 7*4
7 |
8 | def __init__(self, username: str) -> None:
9 | self.username = username
10 | self.url = f"{DOMAIN}/{self.username}/watchlist"
11 |
12 | def __str__(self) -> str:
13 | return f"Not printable object of type: {self.__class__.__name__}"
14 |
15 | def get_owner(self): ...
16 | def get_count(self) -> int: return extract_count(self.url)
17 | def get_movies(self) -> dict: return extract_movies(self.url, self.FILMS_PER_PAGE)
18 | def get_watchlist(self, filters: dict=None) -> dict: return extract_watchlist(self.username, filters)
19 |
20 | def extract_count(url: str) -> int:
21 | """Extracts the number of films from the watchlist page's DOM."""
22 | dom = parse_url(url)
23 |
24 | watchlist_div = dom.find("div", class_="s-watchlist-content")
25 | if watchlist_div and "data-num-entries" in watchlist_div.attrs:
26 | return int(watchlist_div["data-num-entries"])
27 |
28 | count_span = dom.find("span", class_="js-watchlist-count")
29 |
30 | if count_span:
31 | count = count_span.text.split()[0]
32 | return int(count.replace(",", ""))
33 |
34 | raise ValueError("Watchlist count could not be extracted from DOM")
35 |
36 | def extract_watchlist(username: str, filters: dict = None) -> dict:
37 | """
38 | Extracts a user's watchlist from the platform.
39 |
40 | filter examples:
41 | - keys: decade, year, genre
42 |
43 | # positive genre & negative genre (start with '-')
44 | - {genre: ['mystery']} <- same -> {genre: 'mystery'}
45 | - {genre: ['-mystery']} <- same -> {genre: '-mystery'}
46 |
47 | # multiple genres
48 | - {genre: ['mystery', 'comedy'], decade: '1990s'}
49 | - {genre: ['mystery', '-comedy'], year: '2019'}
50 | - /decade/1990s/genre/action+-drama/
51 | ^^---> {'decade':'1990s','genre':['action','-drama']}
52 | """
53 | data = {
54 | 'available': False,
55 | 'count': 0,
56 | 'last_page': None,
57 | 'filters': filters,
58 | 'data': {}
59 | }
60 |
61 | FILMS_PER_PAGE = 28 # Total films per page (7 rows * 4 columns)
62 | BASE_URL = f"{DOMAIN}/{username}/watchlist/"
63 |
64 | # Construct the URL with filters if provided
65 | if filters and isinstance(filters, dict):
66 | f = ""
67 | for key, values in filters.items():
68 | if not isinstance(values, list):
69 | values = [values]
70 | f += f"{key}/"
71 | f += "+".join([str(v) for v in values]) + "/"
72 | BASE_URL += f
73 |
74 | def extract_movie_info(container) -> dict[str, str | int | None] | None:
75 | """Extract film ID, slug, name, and year from watchlist container.
76 |
77 | Returns:
78 | dict: {"id": str, "slug": str, "name": str, "year": int|None} or None if extraction fails
79 |
80 | Example:
81 | Input: container with "The Matrix (1999)"
82 | Output: {"id": "12345", "slug": "the-matrix", "name": "The Matrix", "year": 1999}
83 | """
84 | from letterboxdpy.utils.utils_string import extract_year_from_movie_name, clean_movie_name
85 |
86 | data = container.find("div", {"class": "react-component"}) or container.div
87 | if not data or 'data-film-id' not in data.attrs:
88 | return None
89 |
90 | raw_name = data.get('data-item-name') or data.img['alt']
91 | name = clean_movie_name(raw_name)
92 | year = extract_year_from_movie_name(raw_name)
93 |
94 | context = {
95 | "id": data['data-film-id'],
96 | "slug": data.get('data-item-slug') or data.get('data-film-slug'),
97 | "name": name,
98 | "year": year
99 | }
100 |
101 | return context
102 |
103 | page = 1
104 | no = 1
105 | while True:
106 | dom = parse_url(f'{BASE_URL}/page/{page}')
107 | containers = dom.find_all("li", {"class": "griditem"}) or dom.find_all("li", {"class": ["poster-container"]})
108 |
109 | for container in containers:
110 | movie_info = extract_movie_info(container)
111 | if movie_info:
112 | data['data'][movie_info["id"]] = {
113 | 'name': movie_info["name"],
114 | 'slug': movie_info["slug"],
115 | 'year': movie_info["year"],
116 | 'page': page,
117 | 'url': f"{DOMAIN}/film/{movie_info['slug']}/",
118 | 'no': no
119 | }
120 | no += 1
121 |
122 | if len(containers) < FILMS_PER_PAGE:
123 | break
124 | page += 1
125 |
126 | # Set the count of films and availability
127 | data['count'] = len(data['data'])
128 | data['available'] = data['count'] > 0
129 | data['last_page'] = page
130 |
131 | # Reverse numbering for films
132 | for fv in data['data'].values():
133 | fv.update({'no': data['count'] - fv['no'] + 1})
134 |
135 | return data
--------------------------------------------------------------------------------
/letterboxdpy/movie.py:
--------------------------------------------------------------------------------
1 | from json import (
2 | dumps as json_dumps,
3 | loads as json_loads,
4 | )
5 |
6 | from letterboxdpy.core.encoder import SecretsEncoder
7 | from letterboxdpy.pages import (
8 | movie_profile,
9 | movie_similar,
10 | movie_reviews,
11 | movie_lists,
12 | movie_details,
13 | movie_members
14 | )
15 |
16 | class Movie:
17 |
18 | class MoviePages:
19 | def __init__(self, slug: str) -> None:
20 | self.profile = movie_profile.MovieProfile(slug)
21 | self.details = movie_details.MovieDetails(slug)
22 | self.lists = movie_lists.MovieLists(slug)
23 | self.members = movie_members.MovieMembers(slug)
24 | self.reviews = movie_reviews.MovieReviews(slug)
25 | self.similar = movie_similar.MovieSimilar(slug)
26 |
27 | def __init__(self, slug: str) -> None:
28 | assert isinstance(slug, str), f"Movie slug must be a string, not {type(slug)}"
29 |
30 | self.slug = slug
31 | self.pages = self.MoviePages(self.slug)
32 |
33 | self.url = self.get_url()
34 |
35 | # one line contents
36 | self.id = self.get_id()
37 | self.title = self.get_title()
38 | self.original_title = self.get_original_title()
39 | self.runtime = self.get_runtime()
40 | self.rating = self.get_rating()
41 | self.year = self.get_year()
42 | self.tmdb_link = self.get_tmdb_link()
43 | self.imdb_link = self.get_imdb_link()
44 | self.poster = self.get_poster()
45 | self.banner = self.get_banner()
46 | self.tagline = self.get_tagline()
47 |
48 | # long contents
49 | self.description = self.get_description()
50 | self.trailer = self.get_trailer()
51 | self.alternative_titles = self.get_alternative_titles()
52 | self.details = self.get_details()
53 | self.genres = self.get_genres()
54 | self.cast = self.get_cast()
55 | self.crew = self.get_crew()
56 | self.popular_reviews = self.get_popular_reviews()
57 |
58 | def __str__(self) -> str:
59 | return json_dumps(self, indent=2, cls=SecretsEncoder, secrets=['pages'])
60 |
61 | def jsonify(self) -> dict:
62 | return json_loads(self.__str__())
63 |
64 | # PROFILE PAGE
65 | def get_url(self) -> str: return self.pages.profile.url
66 | def get_id(self) -> str: return self.pages.profile.get_id()
67 | def get_title(self) -> str: return self.pages.profile.get_title()
68 | def get_original_title(self) -> str: return self.pages.profile.get_original_title()
69 | def get_runtime(self) -> int: return self.pages.profile.get_runtime()
70 | def get_rating(self) -> float: return self.pages.profile.get_rating()
71 | def get_year(self) -> int: return self.pages.profile.get_year()
72 | def get_tmdb_link(self) -> str: return self.pages.profile.get_tmdb_link()
73 | def get_imdb_link(self) -> str: return self.pages.profile.get_imdb_link()
74 | def get_poster(self) -> str: return self.pages.profile.get_poster()
75 | def get_banner(self) -> str: return self.pages.profile.get_banner()
76 | def get_tagline(self) -> str: return self.pages.profile.get_tagline()
77 | def get_description(self) -> str: return self.pages.profile.get_description()
78 | def get_trailer(self) -> dict: return self.pages.profile.get_trailer()
79 | def get_alternative_titles(self) -> list: return self.pages.profile.get_alternative_titles()
80 | def get_details(self) -> list: return self.pages.profile.get_details()
81 | def get_genres(self) -> list: return self.pages.profile.get_genres()
82 | def get_cast(self) -> list: return self.pages.profile.get_cast()
83 | def get_crew(self) -> dict: return self.pages.profile.get_crew()
84 | def get_popular_reviews(self) -> list: return self.pages.profile.get_popular_reviews()
85 |
86 | # DETAILS PAGE
87 | def get_details_from_details(self) -> dict: return self.pages.details.get_extended_details()
88 |
89 | # LISTS PAGE
90 | def get_lists(self) -> dict: return self.pages.lists.get_lists()
91 |
92 | # MEMBERS PAGE
93 | def get_watchers_stats(self) -> dict: return self.pages.members.get_watchers_stats()
94 |
95 | # REVIEWS PAGE
96 | def get_reviews(self) -> dict: return self.pages.reviews.get_reviews()
97 | def get_reviews_by_rating(self, rating: float) -> dict: return self.pages.reviews.get_reviews_by_rating(rating)
98 |
99 | # SIMILAR MOVIES
100 | def get_similar_movies(self) -> dict: return self.pages.similar.get_similar_movies()
101 |
102 | if __name__ == "__main__":
103 | import sys
104 | sys.stdout.reconfigure(encoding='utf-8')
105 |
106 | movie_instance = Movie("v-for-vendetta") # 132 mins
107 | # movie_instance_2 = Movie("honk-2013") # 1 min
108 | # movie_instance_3 = Movie("logistics-2011") # 51420 mins
109 |
110 | # Test basic functionality
111 | print(f"Movie Title: {movie_instance.title}")
112 | print(f"Movie Year: {movie_instance.year}")
113 | print(f"Movie Runtime: {movie_instance.runtime} minutes")
114 | print(f"Movie Rating: {movie_instance.rating}")
115 |
116 | print(f"Movie Details: {movie_instance.details}")
117 |
118 | # Test individual methods without JSON serialization
119 | print("\n--- Details (from details page) ---")
120 | details_from_details = movie_instance.get_details_from_details()
121 | print(json_dumps(details_from_details, indent=2))
122 |
123 | print("\n--- Watchers Stats ---")
124 | watchers_stats = movie_instance.get_watchers_stats()
125 | print(json_dumps(watchers_stats, indent=2))
126 |
127 |
128 |
--------------------------------------------------------------------------------
/letterboxdpy/films.py:
--------------------------------------------------------------------------------
1 | if __loader__.name == '__main__':
2 | import sys
3 | sys.path.append(sys.path[0] + '/..')
4 |
5 | from letterboxdpy.utils.utils_transform import get_ajax_url
6 | from letterboxdpy.core.decorators import assert_instance
7 | from letterboxdpy.core.scraper import parse_url
8 | from letterboxdpy.utils.movies_extractor import extract_movies_from_horizontal_list, extract_movies_from_vertical_list
9 |
10 | class Films:
11 | """Fetch movies from Letterboxd based on different URLs."""
12 | VERTICAL_MAX = 20*5
13 | HORIZONTAL_MAX = 12*6
14 |
15 | def __init__(self, url: str):
16 | """Initialize Films class with a URL."""
17 | self.url = url
18 | self.ajax_url = get_ajax_url(url)
19 | self._movies = None
20 |
21 | @property
22 | def movies(self) -> dict:
23 | """Get movies from the URL."""
24 | if self._movies is None:
25 | self._movies = self.get_movies()
26 | return self._movies
27 |
28 | @property
29 | def count(self) -> int:
30 | """Return the count of movies."""
31 | return len(self.movies)
32 |
33 | # Magic Methods
34 | def __len__(self) -> int:
35 | return self.count
36 |
37 | def __getitem__(self, key: str):
38 | return self.movies[key]
39 |
40 | def get_movies(self) -> dict:
41 | """Scrape and return a dictionary of movies from Letterboxd."""
42 | page = 1
43 | movies = {}
44 |
45 | while True:
46 | page_url = self.ajax_url + f"/page/{page}"
47 | dom = parse_url(page_url)
48 |
49 | if '.com/films/' in self.url:
50 | # https://letterboxd.com/films/popular/
51 | # https://letterboxd.com/films/like/v-for-vendetta/
52 | new_movies = extract_movies_from_horizontal_list(dom)
53 | movies |= new_movies
54 | if len(new_movies) < self.HORIZONTAL_MAX:
55 | break
56 | elif '.com/film/' in self.url:
57 | # https://letterboxd.com/film/the-shawshank-redemption/similar/
58 | new_movies = extract_movies_from_vertical_list(dom)
59 | movies |= new_movies
60 | if len(new_movies) < self.VERTICAL_MAX:
61 | break
62 |
63 | page += 1
64 |
65 | return movies
66 |
67 | class Future:
68 | ARGS = ['name', 'release', 'release-earliest', 'rating',
69 | 'rating-lowest', 'shortest', 'longest']
70 |
71 | def get_movies_with_args(args: list) -> dict:
72 | # by
73 | pass
74 |
75 | def get_with_language(language: str):
76 | pass
77 |
78 | def get_with_country(country: str):
79 | pass
80 |
81 | def get_with_year(year: int):
82 | pass
83 |
84 | def get_with_actor(actor: str):
85 | pass
86 |
87 | def get_with_director(director: str):
88 | pass
89 |
90 | def get_with_writer(writer: str):
91 | pass
92 |
93 | def get_upcoming_movies() -> dict:
94 | BASE_URL = "https://letterboxd.com/films/popular/this/week/upcoming/"
95 | return Films(BASE_URL).movies
96 |
97 | @assert_instance(int)
98 | def get_movies_by_decade(decade: int) -> dict:
99 | BASE_URL = f"https://letterboxd.com/films/ajax/popular/this/week/decade/{decade}s/"
100 | return Films(BASE_URL).movies
101 |
102 | @assert_instance(int)
103 | def get_movies_by_year(year: int) -> dict:
104 | BASE_URL = f"https://letterboxd.com/films/ajax/popular/this/week/year/{year}/"
105 | return Films(BASE_URL).movies
106 |
107 | @assert_instance(str)
108 | def get_movies_by_genre(genre: str) -> dict:
109 | """
110 | action, adventure, animation, comedy, crime, documentary,
111 | drama, family, fantasy, history, horror, music, mystery,
112 | romance, science-fiction, thriller, tv-movie, war, western
113 | """
114 | BASE_URL = f"https://letterboxd.com/films/ajax/genre/{genre}"
115 | return Films(BASE_URL).movies
116 |
117 | @assert_instance(str)
118 | def get_movies_by_service(service: str) -> dict:
119 | """
120 | netflix, hulu, prime-video, disney-plus, itv-play, apple-tv,
121 | youtube-premium, amazon-prime-video, hbo-max, peacock, ...
122 | """
123 | BASE_URL = f"https://letterboxd.com/films/popular/this/week/on/{service}/"
124 | return Films(BASE_URL).movies
125 |
126 | @assert_instance(str)
127 | def get_movies_by_theme(theme: str) -> dict:
128 | BASE_URL = f"https://letterboxd.com/films/ajax/theme/{theme}"
129 | return Films(BASE_URL).movies
130 |
131 | @assert_instance(str)
132 | def get_movies_by_nanogenre(nanogenre: str) -> dict:
133 | BASE_URL = f"https://letterboxd.com/films/ajax/nanogenre/{nanogenre}/"
134 | return Films(BASE_URL).movies
135 |
136 | @assert_instance(str)
137 | def get_movies_by_mini_theme(theme: str) -> dict:
138 | BASE_URL = f"https://letterboxd.com/films/ajax/mini-theme/{theme}"
139 | return Films(BASE_URL).movies
140 |
141 | def print_movies(movies, title=None, max_count=None):
142 | """Print movies in a formatted list."""
143 | if title:
144 | print(f"\n{title} -- ({len(movies)} movies)", end=f"\n{'*'*8*2*2}\n")
145 | for movie_no, (movie_id, movie) in enumerate(movies.items(), start=1):
146 | if max_count and movie_no > max_count:
147 | break
148 | print(f"{movie_no:<8} {movie_id:<8} {movie['name']}")
149 | print(f"{'*'*8*2*2}\n")
150 |
151 | if __name__ == "__main__":
152 | sys.stdout.reconfigure(encoding='utf-8')
153 |
154 | # Movies similar to "V for Vendetta" are retrieved and printed.
155 | # https://letterboxd.com/films/like/v-for-vendetta/
156 | from letterboxdpy.movie import Movie
157 | movie_instance = Movie("v-for-vendetta")
158 | movies = movie_instance.get_similar_movies()
159 | print_movies(movies, title=f"Similar to {movie_instance.slug}")
160 |
161 | # Popular movies from the year 2027 are retrieved and displayed.
162 | # https://letterboxd.com/films/popular/this/week/year/2027/
163 | year = 2027
164 | movies = get_movies_by_year(year)
165 | print_movies(movies, title=f"Movies from {year}")
--------------------------------------------------------------------------------
/examples/user_plot_statistics.py:
--------------------------------------------------------------------------------
1 | """
2 | Letterboxd User Statistics Plotter
3 |
4 | Creates visualizations of user movie watching patterns over time.
5 | - Monthly and daily viewing statistics by year
6 | - Multi-year comparison charts
7 | - Letterboxd-inspired styling
8 | """
9 |
10 | import matplotlib.pyplot as plt
11 | from letterboxdpy.user import User
12 | from letterboxdpy.constants.project import Colors, DAY_ABBREVIATIONS, MONTH_ABBREVIATIONS
13 | import argparse
14 | import sys
15 | from datetime import datetime
16 |
17 |
18 | class LetterboxdStatisticsPlotter:
19 | """Class for plotting Letterboxd user statistics."""
20 |
21 | def __init__(self, username: str):
22 | self.username = username
23 | self.stats_by_year = {}
24 |
25 | def gather_statistics_by_year(self, start_year: int, end_year: int) -> dict:
26 | """Fetch user statistics for each year."""
27 | self.stats_by_year = {}
28 | year_count = end_year - start_year + 1
29 |
30 | print(f"Fetching statistics for @{self.username}...")
31 | print(f"Processing {year_count} year(s): {start_year}-{end_year}")
32 |
33 | for year in range(start_year, end_year + 1):
34 | try:
35 | print(f"Fetching data for {year}...", end=" ")
36 | user = User(self.username)
37 | stats = user.get_wrapped(year)
38 | self.stats_by_year[year] = {
39 | "monthly": stats.get("months"),
40 | "daily": stats.get("days")
41 | }
42 | print("✓")
43 | except Exception as error:
44 | print(f"✗ (using empty data)")
45 | self.stats_by_year[year] = {
46 | "monthly": {i: 0 for i in range(1, 13)}, # 12 months with 0
47 | "daily": {i: 0 for i in range(1, 8)} # 7 days with 0
48 | }
49 |
50 | print(f"Data collection complete. Creating plot...")
51 | return self.stats_by_year
52 |
53 | def plot_statistics(self) -> None:
54 | if not self.stats_by_year:
55 | return
56 |
57 | def setup_figure():
58 | num_years = len(self.stats_by_year)
59 | if num_years == 1:
60 | fig, axes = plt.subplots(1, 2, figsize=(12, 4), facecolor=Colors.BG)
61 | return fig, [axes]
62 | else:
63 | fig, axes = plt.subplots(num_years, 2, figsize=(12, 3 * num_years), facecolor=Colors.BG)
64 | return fig, [axes] if num_years == 1 else axes
65 |
66 | def configure_figure(fig):
67 | years_range = f"{min(self.stats_by_year.keys())}-{max(self.stats_by_year.keys())}"
68 | fig.canvas.manager.set_window_title(f'Letterboxd Statistics - {self.username} ({years_range})')
69 | fig.suptitle(f'{self.username} - Movies Watched ({years_range})', fontsize=16, color='white')
70 |
71 | def style_axes(ax):
72 | ax.set_facecolor(Colors.BG)
73 | ax.tick_params(colors=Colors.TEXT)
74 | ax.spines['bottom'].set_color(Colors.TEXT)
75 | ax.spines['top'].set_visible(False)
76 | ax.spines['right'].set_visible(False)
77 | ax.spines['left'].set_color(Colors.TEXT)
78 |
79 | def get_axes_for_year(axes, i, num_years):
80 | if num_years == 1:
81 | return axes[0][0], axes[0][1]
82 | else:
83 | return axes[i, 0], axes[i, 1]
84 |
85 | days_labels = DAY_ABBREVIATIONS
86 | months_labels = MONTH_ABBREVIATIONS
87 |
88 | fig, axes = setup_figure()
89 | configure_figure(fig)
90 | num_years = len(self.stats_by_year)
91 |
92 | for i, (year, stats) in enumerate(self.stats_by_year.items()):
93 | daily_data = stats.get('daily', {})
94 | monthly_data = stats.get('monthly', {})
95 |
96 | daily_values = [daily_data.get(day, 0) for day in range(1, 8)]
97 | monthly_values = [monthly_data.get(month, 0) for month in range(1, 13)]
98 |
99 | ax_daily, ax_monthly = get_axes_for_year(axes, i, num_years)
100 |
101 | for ax in [ax_daily, ax_monthly]:
102 | style_axes(ax)
103 |
104 | ax_daily.bar(days_labels, daily_values, color=Colors.BLUE, alpha=0.85)
105 | ax_daily.set_title(f'{year} - Daily', color='white')
106 | ax_daily.set_ylabel('Movies', color=Colors.TEXT)
107 |
108 | ax_monthly.bar(months_labels, monthly_values, color=Colors.GREEN, alpha=0.85)
109 | ax_monthly.set_title(f'{year} - Monthly', color='white')
110 | ax_monthly.set_ylabel('Movies', color=Colors.TEXT)
111 |
112 | plt.tight_layout(rect=[0, 0, 1, 0.95])
113 | plt.show()
114 |
115 |
116 | def plot(self, start_year: int = None, end_year: int = None):
117 | """Gather statistics and create plot"""
118 | if start_year is None:
119 | current_year = datetime.now().year
120 | start_year = current_year - 1
121 | if end_year is None:
122 | end_year = datetime.now().year
123 |
124 | self.gather_statistics_by_year(start_year, end_year)
125 |
126 | if self.stats_by_year:
127 | self.plot_statistics()
128 | else:
129 | print(f"No statistics found for user: {self.username}")
130 |
131 | def run(self):
132 | """Main program loop"""
133 | sys.stdout.reconfigure(encoding="utf-8")
134 | parser = argparse.ArgumentParser(description="Visualize Letterboxd user statistics")
135 | parser.add_argument("--user", help="Letterboxd username")
136 | current_year = datetime.now().year
137 | parser.add_argument("--start-year", type=int, default=current_year-1, help=f"Start year (default: {current_year-1})")
138 | parser.add_argument("--end-year", type=int, default=current_year, help=f"End year (default: {current_year})")
139 | args = parser.parse_args()
140 |
141 | username = args.user
142 | if not username or not username.strip():
143 | username = input("Enter a Letterboxd username: ").strip()
144 |
145 | self.username = username
146 | self.plot(args.start_year, args.end_year)
147 |
148 |
149 | def main():
150 | """Legacy function compatibility"""
151 | LetterboxdStatisticsPlotter("").run()
152 |
153 |
154 | if __name__ == "__main__":
155 | main()
156 |
--------------------------------------------------------------------------------
/examples/user_rating_plot.py:
--------------------------------------------------------------------------------
1 | """
2 | Letterboxd Ratings Histogram
3 |
4 | Recreates the ratings distribution section of a Letterboxd profile with a clean, professional layout.
5 | - Half-star tick labels (½, ★, ★½, …, ★★★★★)
6 | - Shows username, total ratings, average, and most given rating
7 | - Letterboxd-inspired color scheme
8 | """
9 |
10 | import argparse
11 | import sys
12 | import matplotlib.pyplot as plt
13 | import numpy as np
14 |
15 | from letterboxdpy.user import User
16 | from letterboxdpy.utils.utils_terminal import get_input
17 | from letterboxdpy.utils.utils_validators import is_whitespace_or_empty
18 | from letterboxdpy.constants.project import Colors
19 |
20 |
21 | class LetterboxdRatingPlotter:
22 | def __init__(self, username: str = None):
23 | self.username = username
24 |
25 | def create_plot(self, ratings: dict):
26 | """Create Letterboxd-style rating distribution plot with enhancements"""
27 | rating_positions = np.arange(0.5, 5.5, 0.5)
28 | rating_counts = np.array([ratings.get(rating, 0) for rating in rating_positions])
29 | total_ratings = int(rating_counts.sum())
30 | average_rating = round(float((rating_positions * rating_counts).sum() / total_ratings), 2) if total_ratings else 0.0
31 | most_given_rating = float(rating_positions[rating_counts.argmax()]) if total_ratings else 0.5
32 |
33 | # Nested helpers for readability
34 | def draw_header_and_stats(axis, stats_axis, total_count: int) -> None:
35 | axis.text(0.02, 0.98, "R A T I N G S", transform=axis.transAxes,
36 | fontsize=12, color=Colors.TEXT, weight='bold', va='top', family='monospace')
37 | axis.text(0.98, 0.98, f"{total_count:,}", transform=axis.transAxes,
38 | fontsize=12, color=Colors.TEXT, weight='bold', va='top', ha='right')
39 | axis.text(0.02, 0.92, f"@{self.username}", transform=axis.transAxes,
40 | fontsize=11, color='white', weight='bold', va='top')
41 | stats_axis.text(0.5, 0.5, f"Average: {average_rating}★ • Total: {total_ratings:,} • Most Given: {most_given_rating}★",
42 | ha='center', va='center', fontsize=11, color=Colors.TEXT, weight='bold')
43 |
44 | def get_star_labels():
45 | positions = np.arange(0.5, 5.5, 0.5)
46 | labels = [
47 | ("½★" if r == 0.5 else ("★" * int(r) + "½" if r % 1 == 0.5 else "★" * int(r)))
48 | for r in positions
49 | ]
50 | return positions, labels
51 |
52 | def label_bars(axis, bars_, counts_) -> None:
53 | if len(counts_) == 0:
54 | return
55 | max_count = max(counts_)
56 | for bar, count in zip(bars_, counts_):
57 | if count == max_count:
58 | bar.set_color(Colors.GREEN)
59 | bar.set_alpha(1.0)
60 | if count > 0:
61 | axis.text(bar.get_x() + bar.get_width() / 2, count + max_count * 0.01,
62 | str(int(count)), ha="center", va="bottom", fontsize=8,
63 | color=Colors.TEXT, alpha=0.9)
64 |
65 | def style_axes(axis, counts_) -> None:
66 | axis.set_xlim(0.25, 5.25)
67 | max_count = max(counts_) if len(counts_) else 0
68 | axis.set_ylim(0, max_count * 1.12)
69 | tick_positions, tick_labels = get_star_labels()
70 | axis.set_xticks(tick_positions)
71 | axis.set_xticklabels(tick_labels, fontsize=9, color=Colors.TEXT)
72 | axis.set_yticks([])
73 | for spine in axis.spines.values():
74 | spine.set_visible(False)
75 | axis.grid(True, axis='y', alpha=0.1, color=Colors.TEXT, linestyle='-')
76 |
77 | # Create Letterboxd-style plot
78 | fig, (ax, ax_stats) = plt.subplots(
79 | 2, 1,
80 | figsize=(12, 8),
81 | gridspec_kw={"height_ratios": [0.86, 0.14], "hspace": 0},
82 | facecolor=Colors.BG,
83 | )
84 | for a in (ax, ax_stats):
85 | a.set_facecolor(Colors.BG)
86 | ax_stats.axis('off')
87 | fig.canvas.manager.set_window_title(f"RATINGS - {self.username}")
88 |
89 | bars = ax.bar(rating_positions, rating_counts, width=0.45, color=Colors.BLUE, alpha=0.85)
90 |
91 | # Header and bottom stats
92 | draw_header_and_stats(ax, ax_stats, total_ratings)
93 |
94 | # Bar labels and highlight
95 | label_bars(ax, bars, rating_counts)
96 |
97 | # Axes styling and ticks
98 | style_axes(ax, rating_counts)
99 |
100 | # Layout handled by GridSpec; light tightening only
101 | plt.tight_layout()
102 | plt.show()
103 |
104 | def fetch_ratings(self, username: str = None) -> dict:
105 | """Fetch user ratings from Letterboxd"""
106 | username = username or self.username
107 | ratings = {r: 0 for r in np.arange(0.5, 5.5, 0.5)}
108 |
109 | print(f"Fetching ratings for @{username}...")
110 | movies = User(username).get_films()["movies"]
111 | print(f"Processing {len(movies)} rated movies...")
112 |
113 | for movie in movies.values():
114 | if rating := movie.get("rating"):
115 | ratings[rating/2] += 1
116 |
117 | total_ratings = sum(ratings.values())
118 | print(f"Found {total_ratings} ratings. Creating plot...")
119 | return ratings
120 |
121 | def plot(self, username: str = None):
122 | """Fetch ratings and create plot"""
123 | if username:
124 | self.username = username
125 |
126 | ratings = self.fetch_ratings()
127 | if sum(ratings.values()) > 0:
128 | self.create_plot(ratings)
129 | else:
130 | print(f"No ratings found for user: {self.username}")
131 |
132 | def run(self):
133 | """Main program loop"""
134 | sys.stdout.reconfigure(encoding="utf-8")
135 | parser = argparse.ArgumentParser(description="Visualize Letterboxd user rating distribution.")
136 | parser.add_argument("--user", help="Letterboxd username to analyze")
137 |
138 | args = parser.parse_args()
139 |
140 | username = None if is_whitespace_or_empty(args.user) else args.user
141 | if not username:
142 | username = get_input("Enter Letterboxd username: ")
143 | self.plot(username)
144 |
145 |
146 | def main():
147 | """Legacy function compatibility"""
148 | LetterboxdRatingPlotter().run()
149 |
150 | if __name__ == "__main__":
151 | main()
--------------------------------------------------------------------------------
/examples/exports/users/nmcassa/films_by_rating.json:
--------------------------------------------------------------------------------
1 | {
2 | "movies": {
3 | "walle": {
4 | "name": "WALL\u00b7E",
5 | "id": "45994",
6 | "rating": 10,
7 | "year": 2008,
8 | "liked": true
9 | },
10 | "the-game": {
11 | "name": "The Game",
12 | "id": "50168",
13 | "rating": 10,
14 | "year": 1997,
15 | "liked": true
16 | },
17 | "interstellar": {
18 | "name": "Interstellar",
19 | "id": "117621",
20 | "rating": 10,
21 | "year": 2014,
22 | "liked": false
23 | },
24 | "goodfellas": {
25 | "name": "GoodFellas",
26 | "id": "51383",
27 | "rating": 10,
28 | "year": 1990,
29 | "liked": true
30 | },
31 | "children-of-men": {
32 | "name": "Children of Men",
33 | "id": "46911",
34 | "rating": 10,
35 | "year": 2006,
36 | "liked": true
37 | },
38 | "whiplash-2014": {
39 | "name": "Whiplash",
40 | "id": "171384",
41 | "rating": 10,
42 | "year": 2014,
43 | "liked": true
44 | },
45 | "dune-part-two": {
46 | "name": "Dune: Part Two",
47 | "id": "617443",
48 | "rating": 10,
49 | "year": 2024,
50 | "liked": false
51 | },
52 | "cure": {
53 | "name": "Cure",
54 | "id": "28195",
55 | "rating": 10,
56 | "year": 1997,
57 | "liked": true
58 | },
59 | "society-of-the-snow": {
60 | "name": "Society of the Snow",
61 | "id": "813840",
62 | "rating": 10,
63 | "year": 2023,
64 | "liked": true
65 | },
66 | "the-conversation": {
67 | "name": "The Conversation",
68 | "id": "51529",
69 | "rating": 10,
70 | "year": 1974,
71 | "liked": true
72 | },
73 | "godzilla-minus-one": {
74 | "name": "Godzilla Minus One",
75 | "id": "845706",
76 | "rating": 10,
77 | "year": 2023,
78 | "liked": true
79 | },
80 | "nausicaa-of-the-valley-of-the-wind": {
81 | "name": "Nausica\u00e4 of the Valley of the Wind",
82 | "id": "51969",
83 | "rating": 10,
84 | "year": 1984,
85 | "liked": true
86 | },
87 | "inglourious-basterds": {
88 | "name": "Inglourious Basterds",
89 | "id": "41352",
90 | "rating": 10,
91 | "year": 2009,
92 | "liked": true
93 | },
94 | "house": {
95 | "name": "House",
96 | "id": "35925",
97 | "rating": 10,
98 | "year": 1977,
99 | "liked": false
100 | },
101 | "the-big-lebowski": {
102 | "name": "The Big Lebowski",
103 | "id": "51935",
104 | "rating": 10,
105 | "year": 1998,
106 | "liked": true
107 | },
108 | "ponyo": {
109 | "name": "Ponyo",
110 | "id": "44594",
111 | "rating": 10,
112 | "year": 2008,
113 | "liked": true
114 | },
115 | "rocky": {
116 | "name": "Rocky",
117 | "id": "51090",
118 | "rating": 10,
119 | "year": 1976,
120 | "liked": true
121 | },
122 | "world-of-tomorrow": {
123 | "name": "World of Tomorrow",
124 | "id": "230808",
125 | "rating": 10,
126 | "year": 2015,
127 | "liked": true
128 | },
129 | "the-thing": {
130 | "name": "The Thing",
131 | "id": "51155",
132 | "rating": 10,
133 | "year": 1982,
134 | "liked": false
135 | },
136 | "kikis-delivery-service": {
137 | "name": "Kiki's Delivery Service",
138 | "id": "41360",
139 | "rating": 10,
140 | "year": 1989,
141 | "liked": true
142 | },
143 | "nope": {
144 | "name": "Nope",
145 | "id": "682547",
146 | "rating": 10,
147 | "year": 2022,
148 | "liked": true
149 | },
150 | "ratatouille": {
151 | "name": "Ratatouille",
152 | "id": "50558",
153 | "rating": 10,
154 | "year": 2007,
155 | "liked": true
156 | },
157 | "parasite-2019": {
158 | "name": "Parasite",
159 | "id": "426406",
160 | "rating": 10,
161 | "year": 2019,
162 | "liked": true
163 | },
164 | "everything-everywhere-all-at-once": {
165 | "name": "Everything Everywhere All at Once",
166 | "id": "474474",
167 | "rating": 10,
168 | "year": 2022,
169 | "liked": true
170 | },
171 | "the-king-of-comedy": {
172 | "name": "The King of Comedy",
173 | "id": "51794",
174 | "rating": 10,
175 | "year": 1982,
176 | "liked": true
177 | },
178 | "james-acaster-repertoire": {
179 | "name": "James Acaster: Repertoire",
180 | "id": "445120",
181 | "rating": 10,
182 | "year": 2018,
183 | "liked": true
184 | },
185 | "bo-burnham-make-happy": {
186 | "name": "Bo Burnham: Make Happy",
187 | "id": "335216",
188 | "rating": 10,
189 | "year": 2016,
190 | "liked": true
191 | },
192 | "the-truman-show": {
193 | "name": "The Truman Show",
194 | "id": "27256",
195 | "rating": 10,
196 | "year": 1998,
197 | "liked": true
198 | },
199 | "uncut-gems": {
200 | "name": "Uncut Gems",
201 | "id": "404266",
202 | "rating": 10,
203 | "year": 2019,
204 | "liked": true
205 | },
206 | "superbad": {
207 | "name": "Superbad",
208 | "id": "47776",
209 | "rating": 10,
210 | "year": 2007,
211 | "liked": true
212 | },
213 | "scott-pilgrim-vs-the-world": {
214 | "name": "Scott Pilgrim vs. the World",
215 | "id": "37833",
216 | "rating": 10,
217 | "year": 2010,
218 | "liked": true
219 | },
220 | "spider-man-into-the-spider-verse": {
221 | "name": "Spider-Man: Into the Spider-Verse",
222 | "id": "251943",
223 | "rating": 10,
224 | "year": 2018,
225 | "liked": true
226 | },
227 | "baby-driver": {
228 | "name": "Baby Driver",
229 | "id": "268950",
230 | "rating": 10,
231 | "year": 2017,
232 | "liked": true
233 | },
234 | "the-grand-budapest-hotel": {
235 | "name": "The Grand Budapest Hotel",
236 | "id": "95113",
237 | "rating": 10,
238 | "year": 2014,
239 | "liked": true
240 | },
241 | "moonrise-kingdom": {
242 | "name": "Moonrise Kingdom",
243 | "id": "70105",
244 | "rating": 10,
245 | "year": 2012,
246 | "liked": true
247 | },
248 | "isle-of-dogs-2018": {
249 | "name": "Isle of Dogs",
250 | "id": "333448",
251 | "rating": 10,
252 | "year": 2018,
253 | "liked": true
254 | }
255 | },
256 | "count": 36,
257 | "liked_count": 32,
258 | "rating_count": 36,
259 | "liked_percentage": 88.89,
260 | "rating_percentage": 100.0,
261 | "rating_average": 10.0
262 | }
--------------------------------------------------------------------------------
/letterboxdpy/pages/user_network.py:
--------------------------------------------------------------------------------
1 | from letterboxdpy.core.scraper import parse_url
2 | from letterboxdpy.constants.project import DOMAIN
3 | from letterboxdpy.core.exceptions import PageFetchError
4 | from letterboxdpy.avatar import Avatar
5 |
6 |
7 | class UserNetwork:
8 |
9 | def __init__(self, username: str) -> None:
10 | self.username = username
11 | self.following_url = f"{DOMAIN}/{self.username}/following"
12 | self.followers_url = f"{DOMAIN}/{self.username}/followers"
13 |
14 | def get_following(self) -> dict: return extract_network(self.username, 'following')
15 | def get_followers(self) -> dict: return extract_network(self.username, 'followers')
16 |
17 | def extract_network(username: str, section: str) -> dict:
18 | """
19 | Fetches the specified network section ('followers' or 'following') for the user.
20 | """
21 | assert section in ['followers', 'following'], "Section must be either 'followers' or 'following'"
22 |
23 | BASE_URL = f"{DOMAIN}/{username}/{section}"
24 | PERSONS_PER_PAGE = 25
25 |
26 | def fetch_page(page_num: int):
27 | """Fetches a single page of the user's network section."""
28 | try:
29 | return parse_url(f"{BASE_URL}/page/{page_num}")
30 | except Exception as e:
31 | raise PageFetchError(f"Failed to fetch page {page_num}: {e}") from e
32 |
33 | def extract_persons(dom) -> dict:
34 | """Extracts persons from a DOM object and returns them as a dictionary."""
35 | persons_dict = {}
36 |
37 | # Find the member table
38 | member_table = dom.find('table', class_='member-table')
39 | if not member_table:
40 | return persons_dict
41 |
42 | # Find all user rows
43 | user_rows = member_table.find_all('tr')
44 |
45 | for row in user_rows:
46 | # Get the person summary div
47 | person_summary = row.find('div', class_='person-summary')
48 | if not person_summary:
49 | continue
50 |
51 | # Extract avatar info
52 | avatar_link = person_summary.find('a', class_='avatar')
53 | if not avatar_link:
54 | continue
55 |
56 | # Extract basic info
57 | username = avatar_link['href'].replace('/', '')
58 | avatar_img = avatar_link.find('img')
59 | display_name = avatar_img['alt'] if avatar_img else username
60 | avatar_url = avatar_img['src'] if avatar_img else ''
61 |
62 | # Process avatar with Avatar class
63 | avatar_data = Avatar(avatar_url).upscaled_data if avatar_url else {'exists': False, 'upscaled': False, 'url': ''}
64 |
65 | # Extract name link
66 | name_link = person_summary.find('a', class_='name')
67 | if name_link:
68 | display_name = name_link.get_text(strip=True)
69 |
70 | # Extract metadata (followers, following)
71 | metadata = person_summary.find('small', class_='metadata')
72 | followers_count = None
73 | following_count = None
74 |
75 | if metadata:
76 | followers_link = metadata.find('a', href=lambda x: x and 'followers' in x)
77 | if followers_link:
78 | followers_text = followers_link.get_text(strip=True)
79 | # Extract number from "5 followers"
80 | import re
81 | followers_match = re.search(r'(\d+)', followers_text)
82 | if followers_match:
83 | followers_count = int(followers_match.group(1))
84 |
85 | following_link = metadata.find('a', href=lambda x: x and 'following' in x)
86 | if following_link:
87 | following_text = following_link.get_text(strip=True)
88 | # Extract number from "following 6"
89 | following_match = re.search(r'(\d+)', following_text)
90 | if following_match:
91 | following_count = int(following_match.group(1))
92 |
93 | # Extract stats from other columns
94 | watched_cell = row.find('td', class_='col-watched')
95 | watched_count = None
96 | if watched_cell:
97 | watched_link = watched_cell.find('a')
98 | if watched_link:
99 | watched_text = watched_link.get_text(strip=True)
100 | import re
101 | watched_match = re.search(r'(\d+)', watched_text)
102 | if watched_match:
103 | watched_count = int(watched_match.group(1))
104 |
105 | lists_cell = row.find('td', class_='col-lists')
106 | lists_count = None
107 | if lists_cell:
108 | lists_link = lists_cell.find('a')
109 | if lists_link:
110 | lists_text = lists_link.get_text(strip=True)
111 | import re
112 | lists_match = re.search(r'(\d+)', lists_text)
113 | if lists_match:
114 | lists_count = int(lists_match.group(1))
115 |
116 | likes_cell = row.find('td', class_='col-likes')
117 | likes_count = None
118 | if likes_cell:
119 | likes_link = likes_cell.find('a')
120 | if likes_link:
121 | likes_text = likes_link.get_text(strip=True)
122 | import re
123 | likes_match = re.search(r'(\d+)', likes_text)
124 | if likes_match:
125 | likes_count = int(likes_match.group(1))
126 |
127 | persons_dict[username] = {
128 | 'username': username,
129 | 'name': display_name,
130 | 'url': f"{DOMAIN}/{username}",
131 | 'avatar': avatar_data,
132 | 'followers': followers_count,
133 | 'following': following_count,
134 | 'watched': watched_count,
135 | 'lists': lists_count,
136 | 'likes': likes_count
137 | }
138 |
139 | return persons_dict
140 |
141 | users_list = {}
142 | page_num = 1
143 |
144 | while True:
145 | dom = fetch_page(page_num)
146 | persons = extract_persons(dom)
147 | users_list.update(persons)
148 |
149 | # Break if the number of persons fetched is less than a full page (end of list)
150 | if len(persons) < PERSONS_PER_PAGE :
151 | break
152 |
153 | page_num += 1
154 |
155 | return users_list
--------------------------------------------------------------------------------
/letterboxdpy/user.py:
--------------------------------------------------------------------------------
1 | if __loader__.name == '__main__':
2 | import sys
3 | sys.path.append(sys.path[0] + '/..')
4 |
5 | import re
6 | from json import (
7 | dumps as json_dumps,
8 | loads as json_loads
9 | )
10 |
11 | from letterboxdpy.core.encoder import SecretsEncoder
12 | from letterboxdpy.constants.project import CURRENT_YEAR, CURRENT_MONTH, CURRENT_DAY
13 | from letterboxdpy.list import List as LetterboxdList
14 | from letterboxdpy.pages import (
15 | user_activity,
16 | user_diary,
17 | user_films,
18 | user_likes,
19 | user_lists,
20 | user_network,
21 | user_profile,
22 | user_reviews,
23 | user_tags,
24 | user_watchlist
25 | )
26 |
27 |
28 | class User:
29 |
30 | class UserPages:
31 |
32 | def __init__(self, username: str) -> None:
33 | self.activity = user_activity.UserActivity(username)
34 | self.diary = user_diary.UserDiary(username)
35 | self.films = user_films.UserFilms(username)
36 | self.likes = user_likes.UserLikes(username)
37 | self.lists = user_lists.UserLists(username)
38 | self.network = user_network.UserNetwork(username)
39 | self.profile = user_profile.UserProfile(username)
40 | self.reviews = user_reviews.UserReviews(username)
41 | self.tags = user_tags.UserTags(username)
42 | self.watchlist = user_watchlist.UserWatchlist(username)
43 |
44 | def __init__(self, username: str) -> None:
45 | assert re.match("^[A-Za-z0-9_]+$", username), "Invalid username"
46 |
47 | self.username = username.lower()
48 | self.pages = self.UserPages(self.username)
49 |
50 | self.url = self.get_url()
51 | self.id = self.get_id()
52 | self.is_hq = self.get_hq_status()
53 | self.display_name = self.get_display_name()
54 | self.bio = self.get_bio()
55 | self.location = self.get_location()
56 | self.website = self.get_website()
57 | self.watchlist_length = self.get_watchlist_length()
58 | self.stats = self.get_stats()
59 | self.favorites = self.get_favorites()
60 | self.avatar = self.get_avatar()
61 | self.recent = {
62 | 'watchlist': self.get_watchlist_recent(),
63 | 'diary': self.get_diary_recent()
64 | }
65 |
66 | def __str__(self) -> str:
67 | return json_dumps(self, indent=2, cls=SecretsEncoder, secrets=['pages'])
68 |
69 | def jsonify(self) -> dict:
70 | return json_loads(self.__str__())
71 |
72 | def get_activity(self) -> dict:
73 | return self.pages.activity.get_activity()
74 | def get_activity_following(self) -> dict:
75 | return self.pages.activity.get_activity_following()
76 |
77 | def get_diary(self, year: int = None, month: int = None, day: int = None, page: int = None) -> dict:
78 | return self.pages.diary.get_diary(year, month, day, page)
79 | def get_diary_year(self, year: int = CURRENT_YEAR) -> dict:
80 | return self.pages.diary.get_year(year)
81 | def get_diary_month(self, year: int = CURRENT_YEAR, month: int = CURRENT_MONTH) -> dict:
82 | return self.pages.diary.get_month(year, month)
83 | def get_diary_day(self, year: int = CURRENT_YEAR, month: int = CURRENT_MONTH, day: int = CURRENT_DAY) -> dict:
84 | return self.pages.diary.get_day(year, month, day)
85 | def get_wrapped(self, year: int = CURRENT_YEAR) -> dict:
86 | return self.pages.diary.get_wrapped(year)
87 |
88 | def get_films(self) -> dict:
89 | return self.pages.films.get_films()
90 | def get_films_by_rating(self, rating: float | int) -> dict:
91 | return self.pages.films.get_films_rated(rating)
92 | def get_films_not_rated(self) -> dict:
93 | return self.pages.films.get_films_not_rated()
94 | def get_genre_info(self) -> dict:
95 | return self.pages.films.get_genre_info()
96 |
97 | def get_liked_films(self) -> dict:
98 | return self.pages.likes.get_liked_films()
99 | def get_liked_reviews(self) -> dict:
100 | return self.pages.likes.get_liked_reviews()
101 | def get_liked_lists(self) -> dict:
102 | return self.pages.likes.get_liked_lists()
103 |
104 | def get_list(self, slug: str) -> LetterboxdList:
105 | return LetterboxdList(self.username, slug)
106 |
107 | def get_lists(self) -> dict:
108 | return self.pages.lists.get_lists()
109 |
110 | def get_following(self) -> dict:
111 | return self.pages.network.get_following()
112 | def get_followers(self) -> dict:
113 | return self.pages.network.get_followers()
114 |
115 | def get_url(self) -> str:
116 | return self.pages.profile.url
117 | def get_id(self) -> str:
118 | return self.pages.profile.get_id()
119 | def get_hq_status(self) -> bool:
120 | return self.pages.profile.get_hq_status()
121 | def get_display_name(self) -> str:
122 | return self.pages.profile.get_display_name()
123 | def get_bio(self) -> str:
124 | return self.pages.profile.get_bio()
125 | def get_location(self) -> str:
126 | return self.pages.profile.get_location()
127 | def get_website(self) -> str:
128 | return self.pages.profile.get_website()
129 | def get_watchlist_length(self) -> int:
130 | return self.pages.profile.get_watchlist_length()
131 | def get_stats(self) -> dict:
132 | return self.pages.profile.get_stats()
133 | def get_favorites(self) -> dict:
134 | return self.pages.profile.get_favorites()
135 | def get_avatar(self) -> str:
136 | return self.pages.profile.get_avatar()
137 | def get_watchlist_recent(self) -> dict:
138 | return self.pages.profile.get_watchlist_recent()
139 | def get_diary_recent(self) -> dict:
140 | return self.pages.profile.get_diary_recent()
141 |
142 | def get_reviews(self) -> dict:
143 | return self.pages.reviews.get_reviews()
144 |
145 | def get_user_tags(self) -> dict:
146 | return self.pages.tags.get_user_tags()
147 |
148 | def get_watchlist_count(self) -> int:
149 | return self.pages.watchlist.get_count()
150 | def get_watchlist_movies(self) -> dict:
151 | return self.pages.watchlist.get_movies()
152 | def get_watchlist(self, filters:dict=None) -> dict:
153 | return self.pages.watchlist.get_watchlist(filters)
154 |
155 | if __name__ == "__main__":
156 | import argparse
157 | import sys
158 |
159 | # Reconfigure stdout encoding to UTF-8 to support non-ASCII characters
160 | sys.stdout.reconfigure(encoding='utf-8')
161 |
162 | # Parse command-line arguments
163 | parser = argparse.ArgumentParser()
164 | parser.add_argument('--user', dest="user", help="Username to gather stats on")
165 | args = parser.parse_args()
166 |
167 | # Extract username from command-line arguments or prompt user for input
168 | username = args.user or ''
169 |
170 | # Keep prompting user until a valid username is provided
171 | while not len(username.strip()):
172 | username = input('Enter username: ')
173 |
174 | # Display the username being processed
175 | print(f"Processing username: {username}")
176 |
177 | # Initialize a User instance with the provided username
178 | user_instance = User(username)
179 |
180 | # Print user instance(profile) data
181 | print(user_instance)
182 |
183 | # Iterate over user's film data and print each movie
184 | for item in user_instance.get_films()['movies'].items():
185 | print(item)
186 |
--------------------------------------------------------------------------------
/letterboxdpy/pages/user_films.py:
--------------------------------------------------------------------------------
1 | from letterboxdpy.core.scraper import parse_url
2 | from letterboxdpy.constants.project import DOMAIN, GENRES
3 |
4 |
5 | class UserFilms:
6 |
7 | def __init__(self, username: str) -> None:
8 | self.username = username
9 | self.url = f"{DOMAIN}/{self.username}/films"
10 |
11 | def get_films(self) -> dict:
12 | return extract_user_films(self.url)
13 |
14 | def get_films_rated(self, rating: float | int) -> dict:
15 | assert rating in [0.5, 1, 1.5, 2, 2.5, 3, 3.5, 4, 4.5, 5], "Invalid rating"
16 | url = f"{self.url}/rated/{rating}/by/date"
17 | return extract_user_films(url)
18 |
19 | def get_films_not_rated(self) -> dict:
20 | url = f"{self.url}/rated/none/by/date"
21 | return extract_user_films(url)
22 |
23 | def get_genre_info(self):
24 | return extract_user_genre_info(self.username)
25 |
26 | def extract_user_films(url: str) -> dict:
27 | """Extracts user films and their details from the given URL"""
28 | FILMS_PER_PAGE = 12 * 6
29 |
30 | def process_page(page_number: int) -> dict:
31 | """Fetches and processes a page of user films."""
32 | dom = parse_url(f"{url}/page/{page_number}/")
33 | return extract_movies_from_user_watched(dom)
34 |
35 | def calculate_statistics(movies: dict) -> dict:
36 | """Calculates film statistics including liked and rating percentages."""
37 | liked_count = sum(movie['liked'] for movie in movies.values())
38 | rating_count = len([movie['rating'] for movie in movies.values() if movie['rating'] is not None])
39 |
40 | count = len(movies)
41 | liked_percentage = round(liked_count / count * 100, 2) if liked_count else 0.0
42 | rating_percentage = 0.0
43 | rating_average = 0.0
44 |
45 | if rating_count:
46 | ratings = [movie['rating'] for movie in movies.values() if movie['rating']]
47 | rating_percentage = round(rating_count / count * 100, 2)
48 | rating_average = round(sum(ratings) / rating_count, 2)
49 |
50 | return {
51 | 'count': count,
52 | 'liked_count': liked_count,
53 | 'rating_count': rating_count,
54 | 'liked_percentage': liked_percentage,
55 | 'rating_percentage': rating_percentage,
56 | 'rating_average': rating_average
57 | }
58 |
59 | movie_list = {'movies': {}}
60 | page = 0
61 |
62 | while True:
63 | page += 1
64 | movies = process_page(page)
65 | movie_list['movies'] |= movies
66 |
67 | if len(movies) < FILMS_PER_PAGE:
68 | stats = calculate_statistics(movie_list['movies'])
69 | movie_list.update(stats)
70 | break
71 |
72 | return movie_list
73 |
74 | def extract_movies_from_user_watched(dom, max=12*6) -> dict:
75 | """
76 | supports user watched films section
77 | """
78 | def _extract_rating_and_like_status(container):
79 | """Parse rating and like status from viewing data spans."""
80 |
81 | def _extract_rating_from_span(span):
82 | """Extract rating from span using pattern matching."""
83 | classes = span.get('class', [])
84 |
85 | # Skip if no rating-related classes found
86 | if not any('rating' in cls or 'rated-' in cls for cls in classes):
87 | return None
88 |
89 | # Define extraction patterns (modern first, legacy as fallback)
90 | patterns = [
91 | lambda cls: cls.startswith('rated-') and cls.split('-')[-1], # rated-X
92 | lambda cls: 'rating' in cls and '-' in cls and cls != 'rating' and cls.split('-')[-1] # rating-color-X
93 | ]
94 |
95 | for pattern in patterns:
96 | for cls in classes:
97 | try:
98 | rating_str = pattern(cls)
99 | if rating_str and rating_str.isdigit():
100 | return int(rating_str)
101 | except (ValueError, IndexError, AttributeError):
102 | continue
103 |
104 | return None
105 |
106 | def _extract_like_status(span):
107 | """Extract like status from span class."""
108 | return any('like' in cls for cls in span.get('class', []))
109 |
110 | poster_viewingdata = container.find("p", {"class": "poster-viewingdata"}) or container.p
111 | rating = None
112 | liked = False
113 |
114 | if poster_viewingdata and poster_viewingdata.span:
115 | for span in poster_viewingdata.find_all("span"):
116 | if rating is None:
117 | rating = _extract_rating_from_span(span)
118 | if not liked:
119 | liked = _extract_like_status(span)
120 |
121 | return rating, liked
122 |
123 | def _get_movie_details(container):
124 | """Extract complete movie information including rating and like status."""
125 | from letterboxdpy.utils.utils_string import extract_year_from_movie_name, clean_movie_name
126 |
127 | react_component = container.find("div", {"class": "react-component"}) or container.div
128 | if not react_component or 'data-film-id' not in react_component.attrs:
129 | return None
130 |
131 | rating, liked = _extract_rating_and_like_status(container)
132 |
133 | movie_slug = react_component.get('data-item-slug') or react_component.get('data-film-slug')
134 | movie_id = react_component['data-film-id']
135 | raw_name = react_component.get('data-item-name') or react_component.img['alt']
136 | movie_name = clean_movie_name(raw_name)
137 | year = extract_year_from_movie_name(raw_name)
138 |
139 | return movie_slug, {
140 | 'name': movie_name,
141 | "id": movie_id,
142 | "rating": rating,
143 | "year": year,
144 | "liked": liked
145 | }
146 |
147 | def _find_movie_containers(dom):
148 | """Find movie containers using modern structure with legacy fallback."""
149 | container_selectors = [
150 | ("li", {"class": "griditem"}), # Modern React structure
151 | ("li", {"class": "poster-container"}), # Legacy structure
152 | ("li", {"class": "posteritem"}) # Liked films structure
153 | ]
154 |
155 | for tag, attrs in container_selectors:
156 | containers = dom.find_all(tag, attrs)
157 | if containers:
158 | return containers
159 | return []
160 |
161 | containers = _find_movie_containers(dom)
162 |
163 | movies = {}
164 | for container in containers:
165 | if len(movies) >= max:
166 | break
167 |
168 | movie_details = _get_movie_details(container)
169 | if movie_details:
170 | slug, data = movie_details
171 | movies[slug] = data
172 |
173 | return movies
174 |
175 | def extract_user_genre_info(username: str) -> dict:
176 | ret = {}
177 | for genre in GENRES:
178 | dom = parse_url(f"{DOMAIN}/{username}/films/genre/{genre}/")
179 | data = dom.find("span", {"class": ["replace-if-you"], })
180 | data = data.next_sibling.replace(',', '')
181 | try:
182 | ret[genre] = [int(s) for s in data.split() if s.isdigit()][0]
183 | except IndexError:
184 | ret[genre] = 0
185 |
186 | return ret
--------------------------------------------------------------------------------
/letterboxdpy/pages/user_list.py:
--------------------------------------------------------------------------------
1 | import re
2 |
3 | from letterboxdpy.core.scraper import parse_url
4 | from letterboxdpy.constants.project import DOMAIN
5 | from letterboxdpy.utils.utils_parser import get_meta_content, get_movie_count_from_meta, get_body_content
6 | from letterboxdpy.utils.utils_url import check_url_match
7 | from letterboxdpy.utils.movies_extractor import extract_movies_from_vertical_list
8 | from letterboxdpy.utils.date_utils import DateUtils
9 |
10 |
11 | class ListMetaData(dict):
12 | """Type definition for list metadata"""
13 | pass
14 |
15 |
16 | class UserList:
17 | LIST_PATTERN = f'{DOMAIN}/%s/list/%s'
18 | LIST_ITEMS_PER_PAGE = 12*5
19 |
20 | def __init__(self, username: str, slug: str) -> None:
21 | assert re.match("^[A-Za-z0-9_]+$", username), "Invalid author"
22 |
23 | self.username = username
24 | self.slug = slug
25 | self.url = self.LIST_PATTERN % (username, slug)
26 | self.dom = parse_url(self.url)
27 |
28 | def __str__(self) -> str:
29 | return f"Not printable object of type: {self.__class__.__name__}"
30 |
31 | def get_title(self) -> str: return extract_title(self.dom)
32 | def get_author(self) -> str: return extract_author(self.dom)
33 | def get_description(self) -> str: return extract_description(self.dom)
34 | def get_date_created(self) -> list: return extract_date_created(self.dom)
35 | def get_date_updated(self) -> list: return extract_date_updated(self.dom)
36 | def get_tags(self) -> list: return extract_tags(self.dom)
37 | def get_movies(self) -> dict: return extract_movies(self.url, self.LIST_ITEMS_PER_PAGE)
38 | def get_count(self) -> int: return extract_count(self.dom)
39 | def get_list_id(self) -> str | None: return extract_list_id(self.dom)
40 | def get_list_meta(self, url: str) -> ListMetaData: return extract_list_meta(self.dom, url)
41 |
42 | def extract_list_id(dom) -> str | None:
43 | """
44 | Extracts the list ID from the list page DOM.
45 |
46 | Args:
47 | dom: BeautifulSoup DOM object of the list page
48 |
49 | Returns:
50 | List ID as string or None if not found
51 | """
52 | try:
53 | # Method 1: Look for data-report-url attribute in report link
54 | report_link = dom.find('span', {'data-report-url': True})
55 | if report_link:
56 | report_url = report_link.get('data-report-url')
57 | if report_url and 'filmlist:' in report_url:
58 | # Extract ID from pattern like "/ajax/filmlist:30052453/report-form"
59 | import re
60 | match = re.search(r'filmlist:(\d+)', report_url)
61 | if match:
62 | return match.group(1)
63 |
64 | # Method 2: Look for data-popmenu-id attribute
65 | report_menu = dom.find('a', {'data-popmenu-id': True})
66 | if report_menu:
67 | popmenu_id = report_menu.get('data-popmenu-id')
68 | if popmenu_id and 'list-' in popmenu_id:
69 | # Extract ID from pattern like "report-member-username-list-30052453"
70 | import re
71 | match = re.search(r'list-(\d+)$', popmenu_id)
72 | if match:
73 | return match.group(1)
74 |
75 | return None
76 | except Exception as e:
77 | print(f"Error extracting list ID: {e}")
78 | return None
79 |
80 | def extract_count(dom) -> int:
81 | """Extracts the number of films from the list DOM."""
82 | try:
83 | count = get_movie_count_from_meta(dom)
84 | if count is None:
85 | raise ValueError("Meta description not found or missing 'content' attribute.")
86 | return count
87 | except ValueError as e:
88 | raise RuntimeError("Failed to extract film count: " + str(e)) from e
89 |
90 | def extract_movies(list_url: str, items_per_page) -> dict:
91 | data = {}
92 |
93 | page = 1
94 | while True:
95 | dom = parse_url(f'{list_url}/page/{page}/')
96 | movies = extract_movies_from_vertical_list(dom)
97 | data |= movies
98 |
99 | if len(movies) < items_per_page:
100 | break
101 |
102 | page += 1
103 |
104 | return data
105 |
106 | def extract_title(dom) -> str:
107 | return get_meta_content(dom, property='og:title')
108 |
109 | def extract_author(dom) -> str:
110 | data = dom.find("span", attrs={'itemprop': 'name'})
111 | data = data.text if data else None
112 | return data
113 |
114 | def extract_description(dom) -> str:
115 | return get_meta_content(dom, property='og:description')
116 |
117 | def extract_date_created(dom) -> str | None:
118 | """Extract list creation date in ISO format."""
119 | # Look for published date span
120 | data = dom.find("span", {"class": "published is-updated"})
121 | if not data:
122 | data = dom.find("span", {"class": "published"})
123 |
124 | if data:
125 | time_element = data.findChild("time")
126 | if time_element and time_element.get('datetime'):
127 | return DateUtils.to_iso(time_element.get('datetime'))
128 |
129 | return None
130 |
131 | def extract_date_updated(dom) -> str | None:
132 | """Extract list update date in ISO format."""
133 | # Look for updated date span
134 | data = dom.find("span", {"class": "updated"})
135 | if not data:
136 | # Use published date if no separate update date
137 | data = dom.find("span", {"class": "published"})
138 |
139 | if data:
140 | time_element = data.findChild("time")
141 | if time_element and time_element.get('datetime'):
142 | return DateUtils.to_iso(time_element.get('datetime'))
143 |
144 | return None
145 |
146 | def extract_tags(dom) -> list:
147 | """
148 | Scraping the tag links from a Letterboxd list page and
149 | .. extracting just the tag names into a clean list.
150 | The decorator ensures a valid List instance is passed.
151 | """
152 | dom = dom.find("ul", {"class": ["tags"]})
153 |
154 | data = []
155 |
156 | if dom:
157 | dom = dom.findChildren("a")
158 | for item in dom:
159 | data.append(item.text)
160 |
161 | return data
162 |
163 | def extract_list_meta(dom, url: str) -> ListMetaData:
164 | """
165 | Extracts metadata from a Letterboxd list page.
166 | Args:
167 | dom: BeautifulSoup DOM object
168 | url: The original URL of the list
169 | Returns:
170 | ListMetaData: A dictionary containing list metadata and status
171 | """
172 | data: ListMetaData = {
173 | 'url': None,
174 | 'title': None,
175 | 'owner': None,
176 | 'list_id': None,
177 | 'is_available': False,
178 | 'error': None
179 | }
180 |
181 | try:
182 | # Extract basic metadata
183 | list_url = get_meta_content(dom, property='og:url')
184 | list_title = get_meta_content(dom, property='og:title')
185 | list_owner = get_body_content(dom, 'data-owner')
186 | list_id = extract_list_id(dom)
187 |
188 | # Check for URL redirection
189 | if not check_url_match(url, list_url):
190 | print(f'Redirected to {list_url}')
191 |
192 | # Update metadata
193 | data.update({
194 | 'url': list_url,
195 | 'title': list_title,
196 | 'owner': list_owner,
197 | 'list_id': list_id,
198 | 'is_available': True
199 | })
200 |
201 | except AttributeError as e:
202 | data['error'] = f"Missing required metadata: {str(e)}"
203 | print(f"Metadata extraction error: {e}")
204 | except Exception as e:
205 | data['error'] = f"Unexpected error: {str(e)}"
206 | print(f"Unexpected error while checking the list: {e}")
207 |
208 | return data
209 |
210 |
--------------------------------------------------------------------------------
/examples/exports/users/nmcassa/activity_following.json:
--------------------------------------------------------------------------------
1 | {
2 | "metadata": {
3 | "export_timestamp": "2025-08-28T17:48:04.507738",
4 | "source_url": "https://letterboxd.com/ajax/activity-pagination/nmcassa/following",
5 | "total_activities": 20
6 | },
7 | "activities": {
8 | "9684576241": {
9 | "activity_type": "basic",
10 | "timestamp": "2025-08-27T22:02:36.210000Z",
11 | "content": {
12 | "action": "added",
13 | "description": "Karsten added Hero to his watchlist",
14 | "movie": {
15 | "title": "Hero",
16 | "slug": "hero-2002",
17 | "url": "https://letterboxd.com/film/hero-2002/"
18 | }
19 | }
20 | },
21 | "9683113717": {
22 | "activity_type": "basic",
23 | "timestamp": "2025-08-27T18:17:08.676000Z",
24 | "content": {
25 | "action": "liked",
26 | "description": "Karsten watched, liked and rated Sorry, Baby ★★★½ on Wednesday Aug 27, 2025",
27 | "movie": {
28 | "title": "Sorry, Baby",
29 | "slug": "sorry-baby-2025",
30 | "url": "https://letterboxd.com/film/sorry-baby-2025/"
31 | }
32 | }
33 | },
34 | "9682558075": {
35 | "activity_type": "basic",
36 | "timestamp": "2025-08-27T16:31:53.506000Z",
37 | "content": {
38 | "action": "liked",
39 | "description": "Karsten liked slim’s ★★★★★ review of Y Tu Mamá También",
40 | "movie": {
41 | "title": "review of Y Tu Mamá También",
42 | "slug": "y-tu-mama-tambien",
43 | "url": "https://letterboxd.com/film/y-tu-mama-tambien/"
44 | }
45 | }
46 | },
47 | "9681535360": {
48 | "activity_type": "basic",
49 | "timestamp": "2025-08-27T12:57:15.051000Z",
50 | "content": {
51 | "action": "liked",
52 | "description": "ppark liked ramenfeedgg’s ★★★½ review of Caught Stealing",
53 | "movie": {
54 | "title": "review of Caught Stealing",
55 | "slug": "caught-stealing",
56 | "url": "https://letterboxd.com/film/caught-stealing/"
57 | }
58 | }
59 | },
60 | "9680204114": {
61 | "activity_type": "basic",
62 | "timestamp": "2025-08-27T06:11:03.542000Z",
63 | "content": {
64 | "action": "liked",
65 | "description": "Karsten watched, liked and rated Blue Sun Palace ★★★★ on Tuesday Aug 26, 2025",
66 | "movie": {
67 | "title": "Blue Sun Palace",
68 | "slug": "blue-sun-palace",
69 | "url": "https://letterboxd.com/film/blue-sun-palace/"
70 | }
71 | }
72 | },
73 | "9679590532": {
74 | "activity_type": "basic",
75 | "timestamp": "2025-08-27T03:32:01.964000Z",
76 | "content": {
77 | "action": "added",
78 | "description": "Ben Wold added Sentimental Value to his watchlist",
79 | "movie": {
80 | "title": "Sentimental Value",
81 | "slug": "sentimental-value-2025",
82 | "url": "https://letterboxd.com/film/sentimental-value-2025/"
83 | }
84 | }
85 | },
86 | "9679158181": {
87 | "activity_type": "basic",
88 | "timestamp": "2025-08-27T02:04:46.870000Z",
89 | "content": {
90 | "action": "liked",
91 | "description": "Karsten rewatched, liked and rated Interstellar ★★★★½ on Tuesday Aug 26, 2025",
92 | "movie": {
93 | "title": "Interstellar",
94 | "slug": "interstellar",
95 | "url": "https://letterboxd.com/film/interstellar/"
96 | }
97 | }
98 | },
99 | "9676274360": {
100 | "activity_type": "basic",
101 | "timestamp": "2025-08-26T18:15:50.098000Z",
102 | "content": {
103 | "action": "liked",
104 | "description": "Karsten rewatched, liked and rated The Exorcist ★★★★½ on Tuesday Aug 26, 2025",
105 | "movie": {
106 | "title": "The Exorcist",
107 | "slug": "1",
108 | "url": "https://letterboxd.com/film/1/"
109 | }
110 | }
111 | },
112 | "9673227177": {
113 | "activity_type": "basic",
114 | "timestamp": "2025-08-26T05:41:28.456000Z",
115 | "content": {
116 | "action": "liked",
117 | "description": "Karsten liked glanderco’s ★★★★★ review of Inglourious Basterds",
118 | "movie": {
119 | "title": "review of Inglourious Basterds",
120 | "slug": "inglourious-basterds",
121 | "url": "https://letterboxd.com/film/inglourious-basterds/"
122 | }
123 | }
124 | },
125 | "9673225825": {
126 | "activity_type": "basic",
127 | "timestamp": "2025-08-26T05:41:04.025000Z",
128 | "content": {
129 | "action": "liked",
130 | "description": "Karsten rewatched, liked and rated Inglourious Basterds ★★★★★ on Monday Aug 25, 2025",
131 | "movie": {
132 | "title": "Inglourious Basterds",
133 | "slug": "inglourious-basterds",
134 | "url": "https://letterboxd.com/film/inglourious-basterds/"
135 | }
136 | }
137 | },
138 | "9671860687": {
139 | "activity_type": "review",
140 | "timestamp": "2025-08-26T00:50:32.353000Z",
141 | "content": {
142 | "action": "rewatched",
143 | "description": "rewatched F1 ★★★★★★★★",
144 | "movie": {
145 | "title": "F1",
146 | "year": 2025,
147 | "slug": "f1",
148 | "url": "https://letterboxd.com/film/f1/"
149 | },
150 | "rating": 8,
151 | "review": {
152 | "content": "Major epicness",
153 | "contains_spoilers": false
154 | }
155 | }
156 | },
157 | "9670151089": {
158 | "activity_type": "review",
159 | "timestamp": "2025-08-25T20:32:44.324000Z",
160 | "content": {
161 | "action": "watched",
162 | "description": "watched It's a Mad, Mad, Mad, Mad World ★★★★★★★",
163 | "movie": {
164 | "title": "It's a Mad, Mad, Mad, Mad World",
165 | "year": 1963,
166 | "slug": "its-a-mad-mad-mad-mad-world",
167 | "url": "https://letterboxd.com/film/its-a-mad-mad-mad-mad-world/"
168 | },
169 | "rating": 7,
170 | "review": {
171 | "content": "some advice for those who don’t enjoy this: LIGHTEN UP",
172 | "contains_spoilers": false
173 | }
174 | }
175 | },
176 | "9669307400": {
177 | "activity_type": "basic",
178 | "timestamp": "2025-08-25T18:21:48.130000Z",
179 | "content": {
180 | "action": "liked",
181 | "description": "Karsten liked Carlos Valladares’s ★★★★ review of It's a Mad, Mad, Mad, Mad World",
182 | "movie": {
183 | "title": "review of It's a Mad, Mad, Mad, Mad World",
184 | "slug": "2",
185 | "url": "https://letterboxd.com/film/2/"
186 | }
187 | }
188 | },
189 | "9669148896": {
190 | "activity_type": "basic",
191 | "timestamp": "2025-08-25T17:54:46.259000Z",
192 | "content": {
193 | "action": "liked",
194 | "description": "Karsten watched, liked and rated The Night House ★★★½ on Monday Aug 25, 2025",
195 | "movie": {
196 | "title": "The Night House",
197 | "slug": "the-night-house",
198 | "url": "https://letterboxd.com/film/the-night-house/"
199 | }
200 | }
201 | },
202 | "9668253197": {
203 | "activity_type": "newlist",
204 | "timestamp": "2025-08-25T15:15:58.484000Z",
205 | "content": {
206 | "log_type": "newlist",
207 | "title": "ryanshubert listed TIFF 3 films",
208 | "film_count": "3 films",
209 | "target_list": {
210 | "name": "TIFF",
211 | "url": "/ryanshubert/list/tiff/"
212 | }
213 | }
214 | },
215 | "9666286088": {
216 | "activity_type": "basic",
217 | "timestamp": "2025-08-25T06:49:30.257000Z",
218 | "content": {
219 | "action": "watched",
220 | "description": "Karsten watched and liked Elephant on Sunday Aug 24, 2025",
221 | "movie": {
222 | "title": "Elephant",
223 | "slug": "elephant",
224 | "url": "https://letterboxd.com/film/elephant/"
225 | }
226 | }
227 | },
228 | "9664423024": {
229 | "activity_type": "basic",
230 | "timestamp": "2025-08-25T01:00:41.773000Z",
231 | "content": {
232 | "action": "rewatched",
233 | "description": "ryanshubert rewatched and rated Corpse Bride ★★★★ on Sunday Aug 24, 2025"
234 | }
235 | },
236 | "9663985114": {
237 | "activity_type": "review",
238 | "timestamp": "2025-08-24T23:55:13.715000Z",
239 | "content": {
240 | "action": "watched",
241 | "description": "watched Caught Stealing ★★★★★★★",
242 | "movie": {
243 | "title": "Caught Stealing",
244 | "year": 2025,
245 | "slug": "caught-stealing",
246 | "url": "https://letterboxd.com/film/caught-stealing/"
247 | },
248 | "rating": 7,
249 | "review": {
250 | "content": "Pulp Fiction vibes, Austin Butler has the most valid crash out of 2025.",
251 | "contains_spoilers": false
252 | }
253 | }
254 | },
255 | "9663971772": {
256 | "activity_type": "basic",
257 | "timestamp": "2025-08-24T23:53:19.359000Z",
258 | "content": {
259 | "action": "liked",
260 | "description": "ramenfeedgg liked ppark’s ★★½ review of Caught Stealing",
261 | "movie": {
262 | "title": "review of Caught Stealing",
263 | "slug": "caught-stealing",
264 | "url": "https://letterboxd.com/film/caught-stealing/"
265 | }
266 | }
267 | },
268 | "9661823853": {
269 | "activity_type": "basic",
270 | "timestamp": "2025-08-24T19:35:54.779000Z",
271 | "content": {
272 | "action": "watched",
273 | "description": "ryanshubert watched and rated Honey Don't! ★★½ on Sunday Aug 24, 2025",
274 | "movie": {
275 | "title": "Honey Don't!",
276 | "slug": "honey-dont",
277 | "url": "https://letterboxd.com/film/honey-dont/"
278 | }
279 | }
280 | }
281 | }
282 | }
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # letterboxdpy
2 |
3 | [](https://badge.fury.io/py/letterboxdpy)
4 | [](https://pepy.tech/project/letterboxdpy)
5 | 
6 |
7 | ## Installation
8 |
9 | ### From PyPI
10 |
11 | You can easily install the stable version of `letterboxdpy` from PyPI using pip:
12 |
13 | ```bash
14 | pip install letterboxdpy
15 | ```
16 |
17 | ### From GitHub Repository
18 |
19 | Alternatively, if you wish to access the latest (potentially unstable) version directly from the GitHub repository, you can execute the following command:
20 |
21 | ```bash
22 | pip install git+https://github.com/nmcassa/letterboxdpy.git
23 | ```
24 |
25 | > [!WARNING]
26 | > Please be aware that installing directly from the GitHub repository might give you access to the most recent features and bug fixes, but it could also include changes that haven't been thoroughly tested and may not be stable for production use.
27 |
28 | User Object
29 |
30 | [Explore the file](letterboxdpy/user.py) | [Functions Documentation](/docs/user/funcs/)
31 |
32 | ```python
33 | from letterboxdpy.user import User
34 | user_instance = User("nmcassa")
35 | print(user_instance)
36 | ```
37 |
38 |
39 | Click to expand User object response
40 |
41 | ```json
42 | {
43 | "username": "nmcassa",
44 | "url": "https://letterboxd.com/nmcassa",
45 | "id": 1500306,
46 | "is_hq": false,
47 | "display_name": "nmcassa",
48 | "bio": null,
49 | "location": null,
50 | "website": null,
51 | "watchlist_length": 74,
52 | "stats": {
53 | "films": 594,
54 | "this_year": 74,
55 | "lists": 2,
56 | "following": 7,
57 | "followers": 7
58 | },
59 | "favorites": {
60 | "95113": {
61 | "slug": "the-grand-budapest-hotel",
62 | "name": "The Grand Budapest Hotel"
63 | },...
64 | },
65 | "avatar": {
66 | "exists": true,
67 | "upscaled": true,
68 | "url": "https://a.ltrbxd.com/resized/avatar/upload/1/5/0/0/3/0/6/shard/avtr-0-1000-0-1000-crop.jpg"
69 | },
70 | "recent": {
71 | "watchlist": {
72 | "1042841": {
73 | "id": "1042841",
74 | "slug": "the-contestant-2023",
75 | "name": "The Contestant"
76 | },...
77 | },
78 | "diary": {
79 | "months": {
80 | "9": {
81 | "22": [
82 | {
83 | "name": "The Substance",
84 | "slug": "the-substance"
85 | },
86 | {
87 | "name": "Whiplash",
88 | "slug": "1"
89 | }
90 | ],
91 | "13": [
92 | {
93 | "name": "Speak No Evil",
94 | "slug": "speak-no-evil-2024"
95 | }
96 | ],...
97 | },
98 | "8": {
99 | "30": [
100 | {
101 | "name": "Shaun of the Dead",
102 | "slug": "shaun-of-the-dead"
103 | }
104 | ],...
105 | }
106 | }
107 | }
108 | }
109 | }
110 | ```
111 |
112 |
113 | Movie Object
114 |
115 | [Explore the file](letterboxdpy/movie.py) | [Functions Documentation](/docs/movie/funcs/)
116 |
117 | ```python
118 | from letterboxdpy.movie import Movie
119 | movie_instance = Movie("v-for-vendetta")
120 | print(movie_instance)
121 | ```
122 |
123 |
124 | Click to expand Movie object response
125 |
126 | ```json
127 | {
128 | "scraper": {...},
129 | "url": "https://letterboxd.com/film/v-for-vendetta",
130 | "slug": "v-for-vendetta",
131 | "letterboxd_id": 51400,
132 | "title": "V for Vendetta",
133 | "original_title": null,
134 | "runtime": 132,
135 | "rating": 3.84,
136 | "year": 2005,
137 | "tmdb_link": "https://www.themoviedb.org/movie/752/",
138 | "imdb_link": "http://www.imdb.com/title/tt0434409/maindetails",
139 | "poster": "https://a.ltrbxd.com/resized/film-poster/5/1/4/0/0/51400-v-for-vendetta-0-230-0-345-crop.jpg",
140 | "banner": "https://a.ltrbxd.com/resized/sm/upload/mx/jg/tz/ni/v-for-vendetta-1920-1920-1080-1080-crop-000000.jpg",
141 | "tagline": "People should not be afraid of their governments. Governments should be afraid of their people.",
142 | "description": "In a world in which Great Britain has become a fascist state, a masked vigilante known only as \u201cV\u201d conducts guerrilla warfare against the oppressive British government. When V rescues a young woman from the secret police, he finds in her an ally with whom he can continue his fight to free the people of Britain.",
143 | "trailer": {
144 | "id": "V5VGq23aZ-g",
145 | "link": "https://www.youtube.com/watch?v=V5VGq23aZ-g",
146 | "embed_url": "https://www.youtube.com/embed/V5VGq23aZ-g"
147 | },
148 | "alternative_titles": [
149 | "Vendetta \u00fc\u00e7\u00fcn V",
150 | "O za osvetu",...
151 | ],
152 | "details": [
153 | {
154 | "type": "studio",
155 | "name": "Virtual Studios",
156 | "slug": "virtual-studios",
157 | "url": "https://letterboxd.com/studio/virtual-studios/"
158 | },...
159 | ],
160 | "genres": [
161 | {
162 | "type": "genre",
163 | "name": "Thriller",
164 | "slug": "thriller",
165 | "url": "https://letterboxd.com/films/genre/thriller/"
166 | },...
167 | ],
168 | "cast": [
169 | {
170 | "name": "Natalie Portman",
171 | "role_name": "Evey Hammond",
172 | "slug": "natalie-portman",
173 | "url": "https://letterboxd.com/actor/natalie-portman/"
174 | },...
175 | ],
176 | "crew": {
177 | "director": [
178 | {
179 | "name": "James McTeigue",
180 | "slug": "james-mcteigue",
181 | "url": "https://letterboxd.com/director/james-mcteigue/"
182 | }
183 | ],
184 | "producer": [
185 | {
186 | "name": "Grant Hill",
187 | "slug": "grant-hill",
188 | "url": "https://letterboxd.com/producer/grant-hill/"
189 | },...
190 | ],...
191 | },
192 | "popular_reviews": [
193 | {
194 | "reviewer": "zoey luke",
195 | "rating": " \u2605\u2605\u2605\u2605\u00bd ",
196 | "review": "I love natalie Portman and I hate the government"
197 | },...
198 | ]
199 | }
200 | }
201 | ```
202 |
203 |
204 | Search Object
205 |
206 | [Explore the file](letterboxdpy/search.py) | [Functions Documentation](/docs/search/funcs/)
207 |
208 | ```python
209 | from letterboxdpy.search import Search
210 | search_instance = Search("V for Vendetta", 'films')
211 | print(search_instance.get_results(max=5))
212 | ```
213 |
214 |
215 | Click to expand Search object response
216 |
217 | ```json
218 | {
219 | "available": true,
220 | "query": "V%20for%20Vendetta",
221 | "filter": "films",
222 | "end_page": 13,
223 | "count": 5,
224 | "results": [
225 | {
226 | "no": 1,
227 | "page": 1,
228 | "type": "film",
229 | "slug": "v-for-vendetta",
230 | "name": "V for Vendetta",
231 | "year": 2005,
232 | "url": "https://letterboxd.com/film/v-for-vendetta/",
233 | "poster": null,
234 | "directors": [
235 | {
236 | "name": "James McTeigue",
237 | "slug": "james-mcteigue",
238 | "url": "https://letterboxd.com/director/james-mcteigue/"
239 | }
240 | ]
241 | },
242 | {
243 | "no": 2,
244 | "page": 1,
245 | "type": "film",
246 | "slug": "lady-vengeance",
247 | "name": "Lady Vengeance",
248 | "year": 2005,
249 | "url": "https://letterboxd.com/film/lady-vengeance/",
250 | "poster": null,
251 | "directors": [
252 | {
253 | "name": "Park Chan-wook",
254 | "slug": "park-chan-wook",
255 | "url": "https://letterboxd.com/director/park-chan-wook/"
256 | }
257 | ]
258 | },...
259 | ]
260 | }
261 | ```
262 |
263 |
264 | List Object
265 |
266 | [Explore the file](letterboxdpy/list.py)
267 |
268 | ```python
269 | from letterboxdpy.list import List
270 | list_instance = List("hepburnluv", "classic-movies-for-beginners")
271 | print(list_instance)
272 | ```
273 |
274 |
275 | Click to expand List object response
276 |
277 | ```json
278 | {
279 | "scraper": {...},
280 | "url": "https://letterboxd.com/hepburnluv/list/classic-movies-for-beginners",
281 | "slug": "classic-movies-for-beginners",
282 | "username": "hepburnluv",
283 | "list_type": "list",
284 | "items_per_page": 60,
285 | "title": "classic movies for beginners.",
286 | "description": "old hollywood classic movies for you who wanna start watching. \u02d6\u207a\u2027\u208a\u02da \u2661 \u02da\u208a\u2027\u207a\u02d6. \u0741\u208a \u22b9 . \u0741(from easiest to hardest to watch) (these are my personal recommendations only) thank you guys for all the comments and likes <3",
287 | "movies": [
288 | [
289 | "The Wizard of Oz",
290 | "the-wizard-of-oz-1939"
291 | ],
292 | [
293 | "Roman Holiday",
294 | "roman-holiday"
295 | ],...
296 | ],
297 | "count": 66
298 | }
299 | ```
300 |
301 |
302 | Films Object
303 |
304 | [Explore the file](letterboxdpy/films.py) | [Functions Documentation](/docs/films/funcs/)
305 |
306 | ```python
307 | from letterboxdpy.films import Films
308 | ```
309 |
310 | Members Object
311 |
312 | [Explore the file](letterboxdpy/members.py) | [Functions Documentation](/docs/members/funcs/)
313 |
314 | ```python
315 | from letterboxdpy.members import Members
316 | ```
317 |
318 | Examples
319 |
320 | Example scripts demonstrating various features are available in the [`examples/`](examples/) directory.
321 |
322 | See [`examples/README.md`](examples/README.md) for detailed usage instructions.
323 |
324 | Testing
325 |
326 | You may test the plugin by using the built-in `unittest` package through the CLI:
327 |
328 | ```zsh
329 | python -m unittest
330 | ```
331 |
332 | **Example**
333 | ```zsh
334 | python -m unittest tests/test_movie.py
335 | ```
336 |
337 | ## Stargazers over time
338 | [](https://starchart.cc/nmcassa/letterboxdpy)
--------------------------------------------------------------------------------