├── .gitattributes ├── requirements.txt ├── .gitignore ├── docs ├── members │ └── funcs │ │ └── top_users.md ├── films │ └── funcs │ │ ├── get_upcoming_movies.md │ │ ├── get_movies_by_year.md │ │ ├── print_movies.md │ │ ├── get_movies_by_theme.md │ │ ├── get_movies_by_decade.md │ │ ├── get_movies_by_similar.md │ │ ├── get_movies_by_mini_theme.md │ │ ├── get_movies_by_nanogenre.md │ │ ├── get_movies_by_service.md │ │ └── get_movies_by_genre.md ├── user │ └── funcs │ │ ├── user_films_liked.md │ │ ├── user_films_rated.md │ │ ├── extract_user_films.md │ │ ├── user_network.md │ │ ├── user_followers.md │ │ ├── user_following.md │ │ ├── user_lists.md │ │ ├── user_genre_info.md │ │ ├── user_tags.md │ │ ├── user_films.md │ │ ├── user_watchlist.md │ │ ├── user_diary.md │ │ ├── user_activity.md │ │ ├── user_reviews.md │ │ ├── user_liked_reviews.md │ │ └── user_wrapped.md ├── movie │ └── funcs │ │ ├── movie_watchers.md │ │ └── movie_details.md ├── search │ └── funcs │ │ └── get_film_slug_from_title.md └── check_docs.py ├── tests ├── run.tests.sh ├── test_user.py ├── test_search.py ├── test_scraper.py └── test_movie.py ├── examples ├── requirements.txt ├── exports │ └── users │ │ └── nmcassa │ │ ├── genre_info.json │ │ ├── lists.json │ │ ├── user_tags.json │ │ ├── followers.json │ │ ├── following.json │ │ ├── user.json │ │ ├── films_by_rating.json │ │ └── activity_following.json ├── README.md ├── follow_stats.py ├── search_and_export_lists.py ├── export_user_data.py ├── export_user_diary_posters.py ├── user_plot_statistics.py └── user_rating_plot.py ├── CONTRIBUTING.md ├── letterboxdpy ├── pages │ ├── movie_lists.py │ ├── user_lists.py │ ├── movie_similar.py │ ├── movie_details.py │ ├── movie_reviews.py │ ├── movie_members.py │ ├── user_tags.py │ ├── user_activity.py │ ├── user_reviews.py │ ├── user_watchlist.py │ ├── user_network.py │ ├── user_films.py │ └── user_list.py ├── utils │ ├── utils_transform.py │ ├── utils_string.py │ ├── utils_validators.py │ ├── utils_url.py │ ├── utils_terminal.py │ ├── utils_file.py │ ├── movies_extractor.py │ ├── date_utils.py │ └── lists_extractor.py ├── core │ ├── exceptions.py │ ├── decorators.py │ ├── encoder.py │ └── scraper.py ├── constants │ ├── project.py │ └── selectors.py ├── url.py ├── avatar.py ├── members.py ├── watchlist.py ├── list.py ├── movie.py ├── films.py └── user.py ├── LICENSE ├── pyproject.toml └── README.md /.gitattributes: -------------------------------------------------------------------------------- 1 | # Auto detect text files and perform LF normalization 2 | * text=auto 3 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | requests>=2.31.0 2 | beautifulsoup4>=4.12.3 3 | lxml>=5.1.0 4 | validators -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Python cache and bytecode 2 | __pycache__/ 3 | *.pyc 4 | *.pyo 5 | *~ 6 | 7 | # Build artifacts 8 | build/ 9 | dist/ 10 | *.egg-info/ 11 | 12 | # macOS 13 | .DS_Store 14 | 15 | # Windows 16 | desktop.ini 17 | -------------------------------------------------------------------------------- /docs/members/funcs/top_users.md: -------------------------------------------------------------------------------- 1 |

top_users(n: int) -> List

2 | 3 | **Documentation:** 4 | 5 | No documentation provided. 6 | 7 | [To be documented.](https://github.com/search?q=repo:nmcassa/letterboxdpy+top_users) 8 | -------------------------------------------------------------------------------- /docs/films/funcs/get_upcoming_movies.md: -------------------------------------------------------------------------------- 1 |

get_upcoming_movies() -> dict

2 | 3 | **Documentation:** 4 | 5 | No documentation provided. 6 | 7 | [To be documented.](https://github.com/search?q=repo:nmcassa/letterboxdpy+get_upcoming_movies) 8 | -------------------------------------------------------------------------------- /docs/films/funcs/get_movies_by_year.md: -------------------------------------------------------------------------------- 1 |

get_movies_by_year(year: int) -> dict

2 | 3 | **Documentation:** 4 | 5 | No documentation provided. 6 | 7 | [To be documented.](https://github.com/search?q=repo:nmcassa/letterboxdpy+get_movies_by_year) 8 | -------------------------------------------------------------------------------- /docs/films/funcs/print_movies.md: -------------------------------------------------------------------------------- 1 | 2 | 3 | **Documentation:** 4 | 5 | Print movies in a formatted list. 6 | 7 | [To be documented.](https://github.com/search?q=repo:nmcassa/letterboxdpy+print_movies) 8 | -------------------------------------------------------------------------------- /docs/films/funcs/get_movies_by_theme.md: -------------------------------------------------------------------------------- 1 |

get_movies_by_theme(theme: str) -> dict

2 | 3 | **Documentation:** 4 | 5 | No documentation provided. 6 | 7 | [To be documented.](https://github.com/search?q=repo:nmcassa/letterboxdpy+get_movies_by_theme) 8 | -------------------------------------------------------------------------------- /docs/films/funcs/get_movies_by_decade.md: -------------------------------------------------------------------------------- 1 |

get_movies_by_decade(decade: int) -> dict

2 | 3 | **Documentation:** 4 | 5 | No documentation provided. 6 | 7 | [To be documented.](https://github.com/search?q=repo:nmcassa/letterboxdpy+get_movies_by_decade) 8 | -------------------------------------------------------------------------------- /docs/user/funcs/user_films_liked.md: -------------------------------------------------------------------------------- 1 |

user_films_liked(user: letterboxdpy.user.User) -> dict

2 | 3 | **Documentation:** 4 | 5 | No documentation provided. 6 | 7 | [To be documented.](https://github.com/search?q=repo:nmcassa/letterboxdpy+user_films_liked) 8 | -------------------------------------------------------------------------------- /docs/films/funcs/get_movies_by_similar.md: -------------------------------------------------------------------------------- 1 |

get_movies_by_similar(movie_slug: str) -> dict

2 | 3 | **Documentation:** 4 | 5 | No documentation provided. 6 | 7 | [To be documented.](https://github.com/search?q=repo:nmcassa/letterboxdpy+get_movies_by_similar) 8 | -------------------------------------------------------------------------------- /docs/films/funcs/get_movies_by_mini_theme.md: -------------------------------------------------------------------------------- 1 |

get_movies_by_mini_theme(theme: str) -> dict

2 | 3 | **Documentation:** 4 | 5 | No documentation provided. 6 | 7 | [To be documented.](https://github.com/search?q=repo:nmcassa/letterboxdpy+get_movies_by_mini_theme) 8 | -------------------------------------------------------------------------------- /docs/films/funcs/get_movies_by_nanogenre.md: -------------------------------------------------------------------------------- 1 |

get_movies_by_nanogenre(nanogenre: str) -> dict

2 | 3 | **Documentation:** 4 | 5 | No documentation provided. 6 | 7 | [To be documented.](https://github.com/search?q=repo:nmcassa/letterboxdpy+get_movies_by_nanogenre) 8 | -------------------------------------------------------------------------------- /docs/user/funcs/user_films_rated.md: -------------------------------------------------------------------------------- 1 |

user_films_rated(user: letterboxdpy.user.User, rating: float | int) -> dict

2 | 3 | **Documentation:** 4 | 5 | No documentation provided. 6 | 7 | [To be documented.](https://github.com/search?q=repo:nmcassa/letterboxdpy+user_films_rated) 8 | -------------------------------------------------------------------------------- /docs/user/funcs/extract_user_films.md: -------------------------------------------------------------------------------- 1 |

extract_user_films(user: letterboxdpy.user.User, url: str = None) -> dict

2 | 3 | **Documentation:** 4 | 5 | Extracts user films and their details from the given URL or returns all watched films. 6 | 7 | [To be documented.](https://github.com/search?q=repo:nmcassa/letterboxdpy+extract_user_films) 8 | -------------------------------------------------------------------------------- /docs/films/funcs/get_movies_by_service.md: -------------------------------------------------------------------------------- 1 |

get_movies_by_service(service: str) -> dict

2 | 3 | **Documentation:** 4 | 5 | netflix, hulu, prime-video, disney-plus, itv-play, apple-tv, 6 | youtube-premium, amazon-prime-video, hbo-max, peacock, ... 7 | 8 | [To be documented.](https://github.com/search?q=repo:nmcassa/letterboxdpy+get_movies_by_service) 9 | -------------------------------------------------------------------------------- /tests/run.tests.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -e 4 | 5 | export PYTHONPATH=.. 6 | 7 | run_tests() { 8 | echo "Running tests..." 9 | python -m unittest discover -v 10 | } 11 | 12 | if run_tests; then 13 | echo "All tests passed!" 14 | else 15 | echo "Some tests failed. Check the output above." 16 | fi 17 | 18 | echo "Press any key to exit..." 19 | read -n 1 -s 20 | -------------------------------------------------------------------------------- /examples/requirements.txt: -------------------------------------------------------------------------------- 1 | # Examples requirements for letterboxdpy 2 | 3 | # Core dependencies (for all examples) 4 | requests>=2.31.0 5 | beautifulsoup4>=4.12.3 6 | lxml>=5.1.0 7 | validators 8 | 9 | # Visualization and plotting (user_rating_plot.py, user_plot_statistics.py) 10 | matplotlib>=3.5.0 11 | numpy>=1.21.0 12 | pillow>=8.0.0 # Required by matplotlib for image processing 13 | -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # Contributing Guidelines 2 | 3 | ## Branch Strategy 4 | We follow a structured branch workflow: 5 | 6 | - **`develop`** → Main branch for integrating PRs. 7 | - **`staging`** → Testing branch, merged from `develop`. 8 | - **`main`** → Stable production branch, merged from `staging`. 9 | 10 | Please submit PRs to `develop`, not `main`. 11 | 12 | Thank you for your contributions! 13 | -------------------------------------------------------------------------------- /docs/user/funcs/user_network.md: -------------------------------------------------------------------------------- 1 |

user_network(user: letterboxdpy.user.User, section: str) -> dict

2 | 3 | **Documentation:** 4 | 5 | Fetches followers or following based on the section and returns them as a dictionary 6 | - The section to scrape, must be either 'followers' or 'following'. 7 | 8 | [To be documented.](https://github.com/search?q=repo:nmcassa/letterboxdpy+user_network) 9 | -------------------------------------------------------------------------------- /docs/movie/funcs/movie_watchers.md: -------------------------------------------------------------------------------- 1 |

movie_watchers(movie object)

2 | 3 | ```python 4 | from letterboxdpy import movie 5 | movie_instance = movie.Movie("v-for-vendetta") 6 | print(movie.movie_watchers(movie_instance)) 7 | ``` 8 | 9 | ```json 10 | { 11 | "members": 1090230, 12 | "fans": 9923, 13 | "likes": 278496, 14 | "reviews": 42180, 15 | "lists": 98866 16 | } 17 | ``` -------------------------------------------------------------------------------- /docs/films/funcs/get_movies_by_genre.md: -------------------------------------------------------------------------------- 1 |

get_movies_by_genre(genre: str) -> dict

2 | 3 | **Documentation:** 4 | 5 | action, adventure, animation, comedy, crime, documentary, 6 | drama, family, fantasy, history, horror, music, mystery, 7 | romance, science-fiction, thriller, tv-movie, war, western 8 | 9 | [To be documented.](https://github.com/search?q=repo:nmcassa/letterboxdpy+get_movies_by_genre) 10 | -------------------------------------------------------------------------------- /examples/exports/users/nmcassa/genre_info.json: -------------------------------------------------------------------------------- 1 | { 2 | "action": 114, 3 | "adventure": 149, 4 | "animation": 123, 5 | "comedy": 291, 6 | "crime": 60, 7 | "documentary": 28, 8 | "drama": 197, 9 | "family": 127, 10 | "fantasy": 81, 11 | "history": 11, 12 | "horror": 64, 13 | "music": 18, 14 | "mystery": 55, 15 | "romance": 49, 16 | "science-fiction": 101, 17 | "thriller": 106, 18 | "tv-movie": 20, 19 | "war": 11, 20 | "western": 5 21 | } -------------------------------------------------------------------------------- /letterboxdpy/pages/movie_lists.py: -------------------------------------------------------------------------------- 1 | from letterboxdpy.utils.lists_extractor import ListsExtractor 2 | from letterboxdpy.constants.project import DOMAIN 3 | 4 | 5 | class MovieLists: 6 | """Movie lists page operations - lists containing this movie.""" 7 | 8 | def __init__(self, slug: str): 9 | """Initialize MovieLists with a movie slug.""" 10 | self.slug = slug 11 | self.url = f"{DOMAIN}/film/{slug}/lists" 12 | 13 | def get_lists(self) -> dict: return ListsExtractor.from_url(self.url) -------------------------------------------------------------------------------- /docs/user/funcs/user_followers.md: -------------------------------------------------------------------------------- 1 |

user_followers(user object)

2 | 3 | ```python 4 | from letterboxdpy import user 5 | user_instance = user.User("nmcassa") 6 | print(user.user_followers(user_instance)) 7 | ``` 8 | 9 |
10 | Click to expand user_followers method response 11 | 12 | ```json 13 | { 14 | "ppark": { 15 | "display_name": "ppark" 16 | }, 17 | "joacogarcia2023": { 18 | "display_name": "joacogarcia2023" 19 | }, 20 | "ryanshubert": { 21 | "display_name": "ryanshubert" 22 | },... 23 | } 24 | ``` 25 |
-------------------------------------------------------------------------------- /docs/user/funcs/user_following.md: -------------------------------------------------------------------------------- 1 |

user_following(user object)

2 | 3 | ```python 4 | from letterboxdpy import user 5 | user_instance = user.User("nmcassa") 6 | print(user.user_following(user_instance)) 7 | ``` 8 | 9 |
10 | Click to expand user_following method response 11 | 12 | ```json 13 | { 14 | "ppark": { 15 | "display_name": "ppark" 16 | }, 17 | "ryanshubert": { 18 | "display_name": "ryanshubert" 19 | }, 20 | "crescendohouse": { 21 | "display_name": "Crescendo House" 22 | },... 23 | } 24 | ``` 25 |
-------------------------------------------------------------------------------- /letterboxdpy/pages/user_lists.py: -------------------------------------------------------------------------------- 1 | from letterboxdpy.utils.lists_extractor import ListsExtractor 2 | from letterboxdpy.constants.project import DOMAIN 3 | 4 | 5 | class UserLists: 6 | 7 | def __init__(self, username: str) -> None: 8 | self.username = username 9 | self.url = f"{DOMAIN}/{self.username}/lists" 10 | 11 | def get_lists(self, max_lists: int = None) -> dict: 12 | return ListsExtractor.from_url(self.url, max_lists) 13 | 14 | if __name__ == "__main__": 15 | lists_instance = UserLists("fastfingertips") 16 | 17 | for id, data in lists_instance.get_lists()['lists'].items(): 18 | for key, value in data.items(): 19 | print(f"{key}: {value}") 20 | print("-"*100) -------------------------------------------------------------------------------- /tests/test_user.py: -------------------------------------------------------------------------------- 1 | from letterboxdpy.user import User 2 | import unittest 3 | 4 | 5 | class TestUser(unittest.TestCase): 6 | 7 | def setUp(self): 8 | self.user = User("nmcassa") 9 | 10 | def test_get_all_liked_films(self): 11 | movies = self.user.get_liked_films()['movies'] 12 | values = movies.values() 13 | 14 | self.assertTrue(all(value['liked'] for value in values)) 15 | 16 | def test_network_data(self): 17 | followers = self.user.get_followers() 18 | following = self.user.get_following() 19 | 20 | self.assertTrue(self.user.stats['followers'] == len(followers)) 21 | self.assertTrue(self.user.stats['following'] == len(following)) 22 | 23 | if __name__ == '__main__': 24 | unittest.main() -------------------------------------------------------------------------------- /docs/movie/funcs/movie_details.md: -------------------------------------------------------------------------------- 1 |

movie_details(movie object)

2 | 3 | ```python 4 | from letterboxdpy import movie 5 | movie_instance = movie.Movie("v-for-vendetta") 6 | print(movie.movie_details(movie_instance)) 7 | ``` 8 | 9 |
10 | Click to expand movie_details method response 11 | 12 | ```json 13 | { 14 | "Country": [ 15 | "Germany", 16 | "UK", 17 | "USA" 18 | ], 19 | "Studio": [ 20 | "Virtual Studios", 21 | "Anarchos Productions", 22 | "Silver Pictures", 23 | "F\u00fcnfte Babelsberg Film", 24 | "Warner Bros. Productions", 25 | "DC Vertigo" 26 | ], 27 | "Language": [ 28 | "English" 29 | ] 30 | } 31 | ``` 32 |
-------------------------------------------------------------------------------- /letterboxdpy/utils/utils_transform.py: -------------------------------------------------------------------------------- 1 | from letterboxdpy.constants.project import MONTH_ABBREVIATIONS 2 | 3 | def month_to_index(month_abbreviation): 4 | """Convert a month abbreviation to its index.""" 5 | try: 6 | return MONTH_ABBREVIATIONS.index(month_abbreviation) + 1 7 | except ValueError: 8 | return None 9 | 10 | def index_to_month(month_index): 11 | """Convert a month index to its abbreviation.""" 12 | if 1 <= month_index <= 12: 13 | return MONTH_ABBREVIATIONS[month_index - 1] 14 | return None 15 | 16 | def get_ajax_url(url: str) -> str: 17 | """ 18 | this function returns the ajax url of the given url. 19 | """ 20 | x = '.com/films' 21 | ax = ".com/films/ajax" 22 | 23 | return url if ax in url else url.replace(x, ax) -------------------------------------------------------------------------------- /docs/user/funcs/user_lists.md: -------------------------------------------------------------------------------- 1 |

user_lists(user object)

2 | 3 | ```python 4 | from letterboxdpy import user 5 | user_instance = user.User("nmcassa") 6 | print(user.user_lists(user_instance)) 7 | ``` 8 | 9 |
10 | Click to expand user_lists method response 11 | 12 | ```json 13 | { 14 | "lists": { 15 | "30052453": { 16 | "title": "DEF CON Movie List", 17 | "slug": "def-con-movie-list", 18 | "description": "The DEF CON Hacking Conference's suggested movie list. defcon.org/html/links/movie-list.html", 19 | "url": "https://letterboxd.com/nmcassa/list/def-con-movie-list/", 20 | "count": 11, 21 | "likes": 0, 22 | "comments": 0 23 | } 24 | }, 25 | "count": 1, 26 | "last_page": 1 27 | } 28 | ``` 29 |
-------------------------------------------------------------------------------- /docs/user/funcs/user_genre_info.md: -------------------------------------------------------------------------------- 1 |

user_genre_info(user object)

2 | 3 | ```python 4 | from letterboxdpy import user 5 | user_instance = user.User("nmcassa") 6 | print(user.user_genre_info(user_instance)) 7 | ``` 8 | 9 |
10 | Click to expand user_genre_info method response 11 | 12 | ```json 13 | { 14 | "action":55, 15 | "adventure":101, 16 | "animation":95, 17 | "comedy":188, 18 | "crime":22, 19 | "documentary":16, 20 | "drama":94, 21 | "family":109, 22 | "fantasy":54, 23 | "history":5, 24 | "horror":27, 25 | "music":9, 26 | "mystery":30, 27 | "romance":29, 28 | "science-fiction":48, 29 | "thriller":43, 30 | "tv-movie":13, 31 | "war":4, 32 | "western":5 33 | } 34 | ``` 35 |
-------------------------------------------------------------------------------- /docs/user/funcs/user_tags.md: -------------------------------------------------------------------------------- 1 |

user_tags(user object)

2 | 3 | ```python 4 | from letterboxdpy import user 5 | user_instance = user.User("nmcassa") 6 | result = user.user_tags(user_instance) 7 | print(result) 8 | ``` 9 | 10 |
11 | Click to expand user_tags method response 12 | 13 | ```json 14 | { 15 | "films": {"tags": {"lol": {...}}, "count": 1}, 16 | "diary": {"tags": {"lol": {...}}, "count": 1}, 17 | "reviews": {"tags": {"lol": {...}}, "count": 1}, 18 | "lists": { 19 | "tags": { 20 | "hacking": { 21 | "name": "hacking", 22 | "title": "hacking", 23 | "link": "/nmcassa/tag/hacking/lists/", 24 | "count": 1, 25 | "no": 1 26 | } 27 | }, 28 | "count": 1 29 | }, 30 | "count": 4 31 | } 32 | ``` 33 |
34 | -------------------------------------------------------------------------------- /examples/exports/users/nmcassa/lists.json: -------------------------------------------------------------------------------- 1 | { 2 | "limit": null, 3 | "count": 2, 4 | "last_page": 1, 5 | "lists": { 6 | "46710824": { 7 | "title": "Movies to Watch with Priscilla Park", 8 | "slug": "movies-to-watch-with-priscilla-park", 9 | "description": "", 10 | "url": "https://letterboxd.com/nmcassa/list/movies-to-watch-with-priscilla-park/", 11 | "count": 19, 12 | "likes": 1, 13 | "comments": 0 14 | }, 15 | "30052453": { 16 | "title": "DEF CON Movie List", 17 | "slug": "def-con-movie-list", 18 | "description": "The DEF CON Hacking Conference's suggested movie list. defcon.org/html/links/movie-list.html", 19 | "url": "https://letterboxd.com/nmcassa/list/def-con-movie-list/", 20 | "count": 11, 21 | "likes": 2, 22 | "comments": 0 23 | } 24 | } 25 | } -------------------------------------------------------------------------------- /docs/search/funcs/get_film_slug_from_title.md: -------------------------------------------------------------------------------- 1 |

get_film_slug_from_title(title: str) -> str

2 | 3 | **Documentation:** 4 | 5 | Searches for a film by title and returns its Letterboxd slug. 6 | 7 | **Parameters:** 8 | - `title` (str): The title of the film to search for 9 | 10 | **Returns:** 11 | - `str`: The film slug (e.g., "dune-2021") or `None` if not found 12 | 13 | **Example:** 14 | ```python 15 | from letterboxdpy.search import get_film_slug_from_title 16 | 17 | # Get slug for a specific film 18 | slug = get_film_slug_from_title("Dune") 19 | print(slug) # Output: "dune-2021" 20 | 21 | # Handle case when film is not found 22 | slug = get_film_slug_from_title("NonexistentMovie123") 23 | print(slug) # Output: None 24 | ``` 25 | 26 | **Note:** This function returns the first search result. For more specific results, use the `Search` class directly. 27 | -------------------------------------------------------------------------------- /tests/test_search.py: -------------------------------------------------------------------------------- 1 | from letterboxdpy.search import Search, get_film_slug_from_title 2 | import unittest 3 | 4 | 5 | class TestSearch(unittest.TestCase): 6 | 7 | def setUp(self): 8 | self.movie_name = "V for Vendetta" 9 | #self.movie_director_name = "James McTeigue" 10 | #self.movie_year = 2006 11 | self.q = Search(self.movie_name, 'films') 12 | 13 | def test_film_search(self): 14 | data = self.q.get_results() 15 | self.assertTrue(len(data['results']) > 0) 16 | 17 | def test_film_search_with_max(self): 18 | data = self.q.get_results(max=1) 19 | self.assertTrue(data['count'] == 1) 20 | self.assertTrue(len(data['results']) == 1) 21 | 22 | def test_get_film_slug_from_title(self): 23 | slug = get_film_slug_from_title(self.movie_name) 24 | self.assertEqual(slug, 'v-for-vendetta') 25 | 26 | if __name__ == '__main__': 27 | unittest.main() -------------------------------------------------------------------------------- /docs/user/funcs/user_films.md: -------------------------------------------------------------------------------- 1 |

user_films(user object)

2 | 3 | ```python 4 | from letterboxdpy import user 5 | user_instance = user.User("nmcassa") 6 | print(user.user_films(user_instance)) 7 | ``` 8 | 9 |
10 | Click to expand the demo response for user_films method or view the full response 11 | 12 | ```json 13 | { 14 | "movies": { 15 | "civil-war-2024": { 16 | "name": "Civil War", 17 | "id": "834656", 18 | "rating": 3, 19 | "liked": false 20 | }, 21 | "monkey-man": { 22 | "name": "Monkey Man", 23 | "id": "488751", 24 | "rating": 9, 25 | "liked": true 26 | },... 27 | }, 28 | "count": 560, 29 | "liked_count": 80, 30 | "rating_count": 518, 31 | "liked_percentage": 14.29, 32 | "rating_percentage": 92.5, 33 | "rating_average": 6.47 34 | } 35 | ``` 36 |
-------------------------------------------------------------------------------- /letterboxdpy/core/exceptions.py: -------------------------------------------------------------------------------- 1 | class PageFetchError(Exception): 2 | """Custom exception for errors related to fetching pages.""" 3 | pass 4 | 5 | class PageLoadError(Exception): 6 | """Raised when loading a page from a given URL fails.""" 7 | def __init__(self, url, message="Failed to load the page"): 8 | super().__init__(f"{message}: {url}") 9 | self.url = url 10 | 11 | class InvalidResponseError(Exception): 12 | """Exception raised when an HTTP response is invalid or unexpected.""" 13 | pass 14 | 15 | class CustomEncoderError(Exception): 16 | """Custom error class to represent errors that occur during encoding.""" 17 | 18 | def __init__(self, message: str, *args): 19 | super().__init__(message, *args) 20 | self.message = message 21 | 22 | def __str__(self): 23 | return f"CustomEncoderError: {self.message}" 24 | 25 | class PrivateRouteError(Exception): 26 | """Exception raised when a private route is accessed.""" 27 | pass -------------------------------------------------------------------------------- /tests/test_scraper.py: -------------------------------------------------------------------------------- 1 | from letterboxdpy.core.scraper import Scraper, url_encode 2 | from bs4 import BeautifulSoup 3 | import unittest 4 | 5 | 6 | class TestScraper(unittest.TestCase): 7 | 8 | def setUp(self): 9 | self.scraper = Scraper("letterboxd.com") 10 | 11 | self.valid_film_url = "https://letterboxd.com/film/dune-part-two/" 12 | self.invalid_film_url = "https://letterboxd.com/film/duneparttwo/" 13 | 14 | def test_valid_film_url(self): 15 | self.assertIsInstance( 16 | self.scraper.get_page(self.valid_film_url), BeautifulSoup 17 | ) 18 | 19 | def test_invalid_film_url(self): 20 | with self.assertRaises(Exception): 21 | self.scraper.get_page(self.invalid_film_url) 22 | 23 | def test_url_encode(self): 24 | query = "Dune: Part Two" 25 | encoded_query = url_encode(query) 26 | self.assertEqual(encoded_query, "Dune%3A%20Part%20Two") 27 | 28 | if __name__ == '__main__': 29 | unittest.main() 30 | -------------------------------------------------------------------------------- /letterboxdpy/pages/movie_similar.py: -------------------------------------------------------------------------------- 1 | from letterboxdpy.constants.project import DOMAIN 2 | 3 | 4 | class MovieSimilar: 5 | """Movie similar page operations - similar movies functionality.""" 6 | 7 | def __init__(self, slug: str): 8 | """Initialize MovieSimilar with a movie slug.""" 9 | self.slug = slug 10 | self.url = f"https://letterboxd.com/films/ajax/like/{slug}" 11 | 12 | def get_similar_movies(self) -> dict: 13 | """Get movies similar to this movie.""" 14 | return extract_similar_movies(self.url) 15 | 16 | 17 | def extract_similar_movies(url: str) -> dict: 18 | """Extract movies similar to the given movie.""" 19 | from letterboxdpy.films import Films # Avoid circular import 20 | 21 | # Using the AJAX endpoint for similar movies 22 | return Films(url).movies 23 | 24 | if __name__ == "__main__": 25 | similar_instance = MovieSimilar("v-for-vendetta") 26 | 27 | print(f"Movie: {similar_instance.slug}") 28 | for id, data in similar_instance.get_similar_movies().items(): 29 | print(id, data) 30 | -------------------------------------------------------------------------------- /docs/user/funcs/user_watchlist.md: -------------------------------------------------------------------------------- 1 |

user_watchlist(user object)

2 | 3 | ```python 4 | from letterboxdpy import user 5 | user_instance = user.User("nmcassa") 6 | watchlist_result = user.user_watchlist(user_instance, {'genre':['action','-drama']}) 7 | print(watchlist_result) 8 | ``` 9 | 10 |
11 | Click to expand user_watchlist method response 12 | 13 | ```json 14 | { 15 | "available": true, 16 | "count": 57, 17 | "data_count": 6, 18 | "last_page": 1, 19 | "filters": { 20 | "genre": [ 21 | "action", 22 | "-drama" 23 | ] 24 | }, 25 | "data": { 26 | "51397": { 27 | "name": "From Dusk Till Dawn", 28 | "slug": "from-dusk-till-dawn", 29 | "no": 6, 30 | "page": 1, 31 | "url": "https://letterboxd.com/films/from-dusk-till-dawn/" 32 | },... 33 | "62780": { 34 | "name": "Mad Max: Fury Road", 35 | "slug": "mad-max-fury-road", 36 | "no": 1, 37 | "page": 1, 38 | "url": "https://letterboxd.com/films/mad-max-fury-road/" 39 | } 40 | } 41 | } 42 | ``` 43 |
-------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2022 nmcassa 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [build-system] 2 | requires = ["hatchling"] 3 | build-backend = "hatchling.build" 4 | 5 | [project] 6 | name = "letterboxdpy" 7 | version = "5.3.7" 8 | dependencies = [ 9 | "requests>=2.31.0", 10 | "beautifulsoup4>=4.12.3", 11 | "lxml>=5.1.0", 12 | "validators" 13 | ] 14 | requires-python = ">=3.7" 15 | authors = [ 16 | { name="Nicholas Cassarino", email="nmcassa804@outlook.com" }, 17 | ] 18 | maintainers = [ 19 | { name="FastFingertips", email="fastfingertips@gmail.com" } 20 | ] 21 | description = "A letterboxd webscraper" 22 | readme = "README.md" 23 | license = "MIT" 24 | license-files = ["LICENSE"] 25 | classifiers = [ 26 | "Programming Language :: Python :: 3", 27 | "License :: OSI Approved :: MIT License", 28 | "Operating System :: OS Independent", 29 | ] 30 | keywords = ["letterboxd", "webscraper", "movie", "film", "rating", "review", "watchlist", "diary"] 31 | 32 | [project.urls] 33 | Repository = "https://github.com/nmcassa/letterboxdpy" 34 | Documentation = "https://github.com/nmcassa/letterboxdpy" 35 | "Bug Tracker" = "https://github.com/nmcassa/letterboxdpy/issues" 36 | "Source Code" = "https://github.com/nmcassa/letterboxdpy/archive/refs/heads/main.zip" 37 | 38 | [tool.hatch.build.targets.wheel] 39 | packages = ["letterboxdpy"] 40 | -------------------------------------------------------------------------------- /letterboxdpy/utils/utils_string.py: -------------------------------------------------------------------------------- 1 | def remove_prefix(text: str, prefix: str) -> str: 2 | """Remove a specific prefix from a string if it exists.""" 3 | return text[len(prefix):] if text.startswith(prefix) else text 4 | 5 | def strip_prefix(method_name: str, prefix: str = 'get_') -> str: 6 | """Remove the 'get_' prefix from a method name if it exists.""" 7 | return remove_prefix(method_name, prefix) 8 | 9 | def extract_year_from_movie_name(movie_name: str) -> int | None: 10 | """Extract year from movie name if it's in parentheses format. 11 | 12 | Example: 13 | extract_year_from_movie_name("The Matrix (1999)") -> 1999 14 | extract_year_from_movie_name("Inception") -> None 15 | """ 16 | import re 17 | YEAR_PATTERN = r'\((\d{4})\)' 18 | match = re.search(YEAR_PATTERN, movie_name or '') 19 | return int(match.group(1)) if match else None 20 | 21 | def clean_movie_name(movie_name: str) -> str: 22 | """Remove year from movie name if it's in parentheses format. 23 | 24 | Example: 25 | clean_movie_name("The Matrix (1999)") -> "The Matrix" 26 | clean_movie_name("Inception") -> "Inception" 27 | """ 28 | import re 29 | YEAR_PATTERN = r'\((\d{4})\)' 30 | return re.sub(YEAR_PATTERN, '', movie_name or '').strip() -------------------------------------------------------------------------------- /examples/exports/users/nmcassa/user_tags.json: -------------------------------------------------------------------------------- 1 | { 2 | "films": { 3 | "tags": { 4 | "lol": { 5 | "name": "lol", 6 | "title": "lol", 7 | "slug": "lol", 8 | "link": "https://letterboxd.com/nmcassa/tag/lol/films/", 9 | "count": 1, 10 | "no": 1 11 | } 12 | }, 13 | "count": 1 14 | }, 15 | "diary": { 16 | "tags": { 17 | "lol": { 18 | "name": "lol", 19 | "title": "lol", 20 | "slug": "lol", 21 | "link": "https://letterboxd.com/nmcassa/tag/lol/diary/", 22 | "count": 1, 23 | "no": 1 24 | } 25 | }, 26 | "count": 1 27 | }, 28 | "reviews": { 29 | "tags": { 30 | "lol": { 31 | "name": "lol", 32 | "title": "lol", 33 | "slug": "lol", 34 | "link": "https://letterboxd.com/nmcassa/tag/lol/reviews/", 35 | "count": 1, 36 | "no": 1 37 | } 38 | }, 39 | "count": 1 40 | }, 41 | "lists": { 42 | "tags": { 43 | "hacking": { 44 | "name": "hacking", 45 | "title": "hacking", 46 | "slug": "hacking", 47 | "link": "https://letterboxd.com/nmcassa/tag/hacking/lists/", 48 | "count": 1, 49 | "no": 1 50 | } 51 | }, 52 | "count": 1 53 | }, 54 | "total_count": 4 55 | } -------------------------------------------------------------------------------- /tests/test_movie.py: -------------------------------------------------------------------------------- 1 | from letterboxdpy.movie import Movie 2 | import unittest 3 | 4 | 5 | class TestMovie(unittest.TestCase): 6 | 7 | def setUp(self): 8 | self.movie = Movie("v-for-vendetta") 9 | 10 | def test_get_not_exists_banner_movie(self): 11 | instance = Movie("avatar-4") # upcoming 2029 12 | data = instance.banner 13 | self.assertIsNone(data) 14 | 15 | def test_get_exists_banner_movie(self): 16 | data = self.movie.banner 17 | self.assertIsNotNone(data) 18 | 19 | def test_get_movie_title(self): 20 | data = self.movie.title 21 | self.assertEqual(data, "V for Vendetta") 22 | 23 | def test_get_movie_year(self): 24 | data = self.movie.year 25 | self.assertEqual(data, 2005) 26 | 27 | def test_movie_original_title_nullable(self): 28 | data = self.movie.original_title 29 | self.assertIsNone(data) 30 | 31 | def test_non_english_movie_original_title(self): 32 | movie = Movie("parasite-2019") 33 | self.assertEqual(movie.title, "Parasite") 34 | self.assertIsNotNone(movie.original_title) 35 | self.assertNotEqual(movie.title, movie.original_title) 36 | self.assertEqual(movie.original_title, "기생충") 37 | 38 | 39 | if __name__ == '__main__': 40 | unittest.main() -------------------------------------------------------------------------------- /letterboxdpy/constants/project.py: -------------------------------------------------------------------------------- 1 | from datetime import datetime 2 | 3 | # Date/Time Constants 4 | now = datetime.now() 5 | CURRENT_YEAR = now.year 6 | CURRENT_MONTH = now.month 7 | CURRENT_DAY = now.day 8 | MONTH_ABBREVIATIONS = [ 9 | "Jan", "Feb", "Mar", "Apr", "May", "Jun", 10 | "Jul", "Aug", "Sep", "Oct", "Nov", "Dec" 11 | ] 12 | DAY_ABBREVIATIONS = ["Mon", "Tue", "Wed", "Thu", "Fri", "Sat", "Sun"] 13 | 14 | # Domain/URL Constants 15 | URL_PROTOCOLS = ['http://', 'https://'] 16 | 17 | DOMAIN_FULL = 'letterboxd.com' 18 | DOMAIN_SHORT = 'boxd.it' 19 | 20 | # Base URLs 21 | DOMAIN = f'https://{DOMAIN_FULL}' 22 | SITE = f'{DOMAIN}/' 23 | SITE_SHORT = f'https://{DOMAIN_SHORT}/' 24 | 25 | DOMAIN_MATCHES = [f'{DOMAIN_FULL}/', f'{DOMAIN_SHORT}/'] 26 | 27 | # Movie-Related Constants 28 | VALID_RATINGS = {0.5, 1, 1.5, 2, 2.5, 3, 3.5, 4, 4.5, 5} 29 | GENRES = [ 30 | "action", "adventure", "animation", "comedy", "crime", 31 | "documentary", "drama", "family", "fantasy", "history", 32 | "horror", "music", "mystery", "romance", "science-fiction", 33 | "thriller", "tv-movie", "war", "western" 34 | ] 35 | 36 | # Letterboxd Theme Colors 37 | class Colors: 38 | BG = '#14181C' # Letterboxd dark background 39 | BLUE = '#456' # Letterboxd blue 40 | GREEN = '#00C030' # Letterboxd green 41 | TEXT = '#9AB' # Letterboxd text gray -------------------------------------------------------------------------------- /letterboxdpy/constants/selectors.py: -------------------------------------------------------------------------------- 1 | """ 2 | CSS and HTML selectors for web scraping. 3 | 4 | Defines BeautifulSoup selectors used to extract data from 5 | Letterboxd pages including films, metadata, and error messages. 6 | """ 7 | 8 | from dataclasses import dataclass 9 | from typing import Dict, Tuple, TypeAlias 10 | 11 | 12 | Selector: TypeAlias = Tuple[str, Dict[str, str]] 13 | 14 | @dataclass 15 | class FilmSelectors: 16 | """Selectors for film list elements""" 17 | #
18 | LIST: Selector = ('div', {'class': 'js-list-entries'}) 19 | #

Aliens vs Predator: Requiem

20 | HEADLINE: Selector = ('h2', {'class': 'name'}) 21 | # 2007 22 | YEAR: Selector = ('span', {'class': 'releasedate'}) 23 | 24 | @dataclass 25 | class MetaSelectors: 26 | """Selectors for meta elements""" 27 | DESCRIPTION: Selector = ('meta', {'name': 'description'}) 28 | 29 | @dataclass 30 | class PageSelectors: 31 | """Selectors for page elements""" 32 | ERROR_BODY: Selector = ('body', {'class': 'error'}) 33 | ERROR_MESSAGE: Selector = ('section', {'class': 'message'}) 34 | LAST_PAGE: Selector = ('div', {'class': 'paginate-pages'}) 35 | ARTICLES: Selector = ('ul', {'class': 'poster-list -p70 film-list clear film-details-list'}) -------------------------------------------------------------------------------- /letterboxdpy/utils/utils_validators.py: -------------------------------------------------------------------------------- 1 | import re 2 | import validators 3 | 4 | def is_url(url) -> bool: 5 | """ 6 | this function checks if the URL is valid or not, 7 | and returns a boolean value as the result. 8 | """ 9 | return validators.url(url) 10 | 11 | def is_null_or_empty(value): 12 | """Check if the given string is null or empty.""" 13 | if value is None or value == "": 14 | return True 15 | return False 16 | 17 | def is_whitespace_or_empty(value): 18 | """Check if the given string is whitespace or empty.""" 19 | if not isinstance(value, str): 20 | return False 21 | return not value.strip() 22 | 23 | def is_non_negative_integer(value): 24 | """Check if the given value is a non-negative integer.""" 25 | return isinstance(value, int) and value >= 0 26 | 27 | def is_valid_email(value): 28 | """Check if the given string is a valid email address.""" 29 | if not isinstance(value, str): 30 | return False 31 | email_pattern = r"^[\w\.-]+@[\w\.-]+\.\w+$" 32 | return bool(re.match(email_pattern, value)) 33 | 34 | def is_positive_float(value): 35 | """Check if the given value is a positive float.""" 36 | try: 37 | number = float(value) 38 | return number > 0 39 | except (ValueError, TypeError): 40 | return False 41 | 42 | def is_boolean(value): 43 | """Check if the given value is a boolean.""" 44 | return isinstance(value, bool) -------------------------------------------------------------------------------- /letterboxdpy/core/decorators.py: -------------------------------------------------------------------------------- 1 | from functools import wraps 2 | 3 | # -- DECORATORS -- 4 | 5 | def assert_instance(expected_class: type) -> callable: 6 | """Ensures the argument passed is an instance of a specified class.""" 7 | 8 | def decorator(func): 9 | @wraps(func) 10 | def wrapper(instance, *args, **kwargs): 11 | """ 12 | Verifies if the argument is an instance of the expected class. 13 | 14 | Args: 15 | instance: Object to check if it's an instance of the expected class. 16 | *args: Additional positional arguments. 17 | **kwargs: Additional keyword arguments. 18 | 19 | Returns: 20 | The result of the decorated function. 21 | 22 | Raises: 23 | AssertionError: If the instance is not of the expected class. 24 | """ 25 | if not isinstance(instance, expected_class): 26 | raise AssertionError(f"Argument {instance} is not an instance of {expected_class.__name__}") 27 | return func(instance, *args, **kwargs) 28 | 29 | return wrapper 30 | 31 | return decorator 32 | 33 | 34 | if __name__ == "__main__": 35 | 36 | @assert_instance(int) 37 | def printint(arg: int): 38 | print(arg) 39 | 40 | try: 41 | printint(1) 42 | printint("2") 43 | except AssertionError as e: 44 | print(e) 45 | -------------------------------------------------------------------------------- /letterboxdpy/utils/utils_url.py: -------------------------------------------------------------------------------- 1 | import re 2 | from letterboxdpy.constants.project import DOMAIN_SHORT, URL_PROTOCOLS, DOMAIN 3 | 4 | 5 | def get_list_slug(url) -> str: 6 | """ 7 | extract the slug from a URL containing '/list/'. 8 | example: 'https://letterboxd.com/fastfingertips/list/list_name/' -> 'list_name' 9 | """ 10 | return url[url.index('/list/') + len('/list/'):].replace('/', '') 11 | 12 | def check_url_match(base_url, target_url) -> bool: 13 | """ 14 | this function checks if two URLs match, 15 | and returns a boolean value as the result. 16 | """ 17 | return base_url == target_url or f'{base_url}/' == target_url 18 | 19 | def is_short_url(url) -> bool: 20 | """ 21 | this function checks if the URL is a short URL or not, 22 | and returns a boolean value as the result. 23 | """ 24 | return any(prot+DOMAIN_SHORT in url for prot in URL_PROTOCOLS) 25 | 26 | def parse_list_url(url: str) -> tuple: 27 | """Parse list URL to extract username and slug.""" 28 | # URL format: https://letterboxd.com/username/list/slug/ 29 | pattern = r'letterboxd\.com/([^/]+)/list/([^/]+)' 30 | match = re.search(pattern, url) 31 | if match: 32 | return match.group(1), match.group(2) 33 | raise ValueError(f"Invalid list URL format: {url}") 34 | 35 | 36 | def build_list_url(username: str, slug: str) -> str: 37 | """Build list URL from username and slug.""" 38 | return f"{DOMAIN}/{username}/list/{slug}/" -------------------------------------------------------------------------------- /letterboxdpy/pages/movie_details.py: -------------------------------------------------------------------------------- 1 | from letterboxdpy.core.scraper import parse_url 2 | from letterboxdpy.constants.project import DOMAIN 3 | 4 | 5 | class MovieDetails: 6 | """Movie details page operations - production information from /details page.""" 7 | 8 | def __init__(self, slug: str): 9 | """Initialize MovieDetails with a movie slug.""" 10 | self.slug = slug 11 | self.url = f"{DOMAIN}/film/{slug}/details" 12 | self.dom = parse_url(self.url) 13 | 14 | def get_extended_details(self) -> dict: 15 | """Get extended details (country, studio, language) from details page.""" 16 | return extract_movie_extended_details(self.dom) 17 | 18 | def extract_movie_extended_details(dom) -> dict: 19 | """Extract detailed movie information from details page.""" 20 | dom_details = dom.find("div", {"id": ["tab-details"]}) 21 | 22 | data = { 23 | 'country': [], 24 | 'studio': [], 25 | 'language': [] 26 | } 27 | 28 | if dom_details: 29 | for a in dom_details.find_all("a"): 30 | text = a.text.strip() 31 | if a['href'][1:7] == 'studio': 32 | data['studio'].append(text) 33 | elif a['href'][7:14] == 'country': 34 | data['country'].append(text) 35 | elif a['href'][7:15] == 'language': 36 | data['language'].append(text) 37 | 38 | return data 39 | 40 | if __name__ == "__main__": 41 | details = MovieDetails("v-for-vendetta") 42 | print(details.get_extended_details()) -------------------------------------------------------------------------------- /letterboxdpy/pages/movie_reviews.py: -------------------------------------------------------------------------------- 1 | from letterboxdpy.constants.project import DOMAIN 2 | 3 | 4 | class MovieReviews: 5 | """Movie reviews page operations - user reviews for this movie.""" 6 | 7 | def __init__(self, slug: str): 8 | """Initialize MovieReviews with a movie slug.""" 9 | self.slug = slug 10 | self.url = f"{DOMAIN}/film/{slug}/reviews" 11 | 12 | def get_reviews(self) -> dict: 13 | """Get all reviews for this movie.""" 14 | return extract_movie_reviews(self.url) 15 | 16 | def get_reviews_by_rating(self, rating: float) -> dict: 17 | """Get reviews filtered by rating.""" 18 | return extract_movie_reviews_by_rating(self.url, rating) 19 | 20 | 21 | def extract_movie_reviews(url: str) -> dict: 22 | """Extract all reviews for a movie.""" 23 | 24 | # TODO: Implement movie reviews extraction 25 | # This would parse /film/slug/reviews/ page 26 | # Similar to user_reviews.py but for movie reviews 27 | 28 | return { 29 | 'available': False, 30 | 'count': 0, 31 | 'reviews': [] 32 | } 33 | 34 | 35 | def extract_movie_reviews_by_rating(url: str, rating: float) -> dict: 36 | """Extract reviews filtered by specific rating.""" 37 | by_rating_url = f"{url}/by/rating/{rating}" 38 | 39 | # TODO: Implement movie reviews by rating extraction 40 | # This would parse /film/slug/reviews/by/rating/X/ page 41 | 42 | return { 43 | 'available': False, 44 | 'rating': rating, 45 | 'count': 0, 46 | 'reviews': [] 47 | } -------------------------------------------------------------------------------- /docs/user/funcs/user_diary.md: -------------------------------------------------------------------------------- 1 |

user_diary(user object)

2 | 3 | ```python 4 | from letterboxdpy import user 5 | user_instance = user.User("nmcassa") 6 | print(user.user_diary(user_instance)) 7 | ``` 8 | 9 |
10 | Click to expand the demo response for user_diary method or view the full response 11 | 12 | ```json 13 | { 14 | "entries": { 15 | "513520182": { 16 | "name": "Black Swan", 17 | "slug": "black-swan", 18 | "id": "20956", 19 | "release": 2010, 20 | "runtime": 108, 21 | "rewatched": false, 22 | "rating": 9, 23 | "liked": true, 24 | "reviewed": false, 25 | "date": { 26 | "year": 2024, 27 | "month": 1, 28 | "day": 15 29 | }, 30 | "page": 1 31 | },... 32 | ...}, 33 | "129707465": { 34 | "name": "mid90s", 35 | "slug": "mid90s", 36 | "id": "370451", 37 | "release": 2018, 38 | "runtime": 86, 39 | "rewatched": false, 40 | "rating": 8, 41 | "liked": false, 42 | "reviewed": false, 43 | "date": { 44 | "year": 2020, 45 | "month": 10, 46 | "day": 20 47 | }, 48 | "page": 7 49 | } 50 | }, 51 | "count": 337, 52 | "last_page": 7 53 | } 54 | ``` 55 |
-------------------------------------------------------------------------------- /letterboxdpy/core/encoder.py: -------------------------------------------------------------------------------- 1 | from json import JSONEncoder 2 | from letterboxdpy.core.exceptions import CustomEncoderError 3 | 4 | 5 | class Encoder(JSONEncoder): 6 | """ 7 | Encoder class provides a way to serialize custom class 8 | .. instances to JSON by overriding the default serialization 9 | .. logic to return the object's namespace dictionary. 10 | """ 11 | def default(self, o): 12 | if not hasattr(o, '__dict__'): 13 | raise CustomEncoderError(f"Object of type {type(o).__name__} has no __dict__ attribute") 14 | 15 | try: 16 | return o.__dict__ 17 | except Exception as e: 18 | raise CustomEncoderError("An error occurred during encoding") from e 19 | 20 | class SecretsEncoder(JSONEncoder): 21 | """JSON encoder that excludes specified attributes from the output.""" 22 | 23 | def __init__(self, secrets: list = ['secrets'], **kwargs): 24 | if not isinstance(secrets, list): 25 | raise TypeError("secrets must be a list") 26 | if not secrets: 27 | raise ValueError("secrets must not be empty") 28 | if not all(isinstance(attr, str) for attr in secrets): 29 | raise TypeError("All elements in secrets must be strings") 30 | 31 | self.secrets = set(secrets) 32 | super().__init__(**kwargs) 33 | 34 | def default(self, o): 35 | """Encodes the object to JSON format excluding specified attributes.""" 36 | if not hasattr(o, '__dict__'): 37 | raise CustomEncoderError(f"Object of type {type(o).__name__} has no __dict__ attribute") 38 | return {k: v for k, v in o.__dict__.items() if k not in self.secrets} -------------------------------------------------------------------------------- /letterboxdpy/pages/movie_members.py: -------------------------------------------------------------------------------- 1 | from letterboxdpy.core.scraper import parse_url 2 | from letterboxdpy.constants.project import DOMAIN 3 | from letterboxdpy.utils.utils_parser import extract_numeric_text 4 | 5 | 6 | class MovieMembers: 7 | """Movie members page operations - watchers statistics.""" 8 | 9 | def __init__(self, slug: str): 10 | """Initialize MovieMembers with a movie slug.""" 11 | self.slug = slug 12 | self.url = f"{DOMAIN}/film/{slug}/members" 13 | self.dom = parse_url(self.url) 14 | 15 | def get_watchers_stats(self) -> dict: 16 | """Get movie watchers' statistics.""" 17 | return extract_movie_watchers_stats(self.dom) 18 | 19 | # TODO: /fans, /likes, /reviews, /lists 20 | 21 | def extract_movie_watchers_stats(dom) -> dict: 22 | """Extract movie watchers' statistics from members page.""" 23 | try: 24 | # Extract watchers data from DOM. 25 | stats = {} 26 | content_nav = dom.find("div", {"id": "content-nav"}) 27 | if content_nav: 28 | for a in content_nav.find_all("a", title=True): 29 | a_text = a.text.strip().lower() 30 | a_title = a['title'] 31 | count = extract_numeric_text(a_title) 32 | stats[a_text] = count 33 | return stats 34 | except Exception as e: 35 | raise RuntimeError("Failed to retrieve movie watchers' statistics") from e 36 | 37 | if __name__ == "__main__": 38 | members_instance = MovieMembers("v-for-vendetta") 39 | 40 | print(f"Movie: {members_instance.slug}") 41 | for key, value in members_instance.get_watchers_stats().items(): 42 | print(f"{key}: {value}") -------------------------------------------------------------------------------- /examples/README.md: -------------------------------------------------------------------------------- 1 | # Letterboxd Examples 2 | 3 | Example scripts demonstrating `letterboxdpy` library features. 4 | 5 | ## Installation 6 | 7 | ```bash 8 | pip install -e . 9 | pip install -r examples/requirements.txt 10 | ``` 11 | 12 | ## Examples 13 | 14 | **`user_rating_plot.py`** 15 | Creates a rating distribution histogram with Letterboxd styling. 16 | ```bash 17 | python examples/user_rating_plot.py --user 18 | ``` 19 | 20 | **`user_plot_statistics.py`** 21 | Visualizes movie watching patterns over time with monthly and daily statistics. 22 | ```bash 23 | python examples/user_plot_statistics.py --user --start-year 2020 --end-year 2024 24 | ``` 25 | 26 | **`follow_stats.py`** 27 | Analyzes follow relationships, followback ratios, and mutual follows. 28 | ```bash 29 | echo | python examples/follow_stats.py 30 | ``` 31 | 32 | **`export_user_data.py`** 33 | Exports all user data (films, reviews, lists, followers, etc.) to JSON files. 34 | ```bash 35 | echo | python examples/export_user_data.py 36 | ``` 37 | 38 | **`export_user_diary_posters.py`** 39 | Downloads movie posters from diary entries and organizes them by year. 40 | ```bash 41 | echo | python examples/export_user_diary_posters.py 42 | ``` 43 | 44 | **`search_and_export_lists.py`** 45 | Searches for lists by query and exports them to CSV format. 46 | ```bash 47 | echo -e "query\n3" > input.txt 48 | Get-Content input.txt | python examples/search_and_export_lists.py 49 | ``` 50 | 51 | ## Requirements 52 | 53 | - **Core**: requests, beautifulsoup4, lxml, validators 54 | - **Visualization**: matplotlib, numpy, pillow 55 | - **Data Processing**: pandas 56 | 57 | See `requirements.txt` for details. 58 | -------------------------------------------------------------------------------- /letterboxdpy/utils/utils_terminal.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import os 3 | 4 | 5 | def get_input(prompt: str, *, index: int = None, expected_type: type = str) -> any: 6 | """"Retrieve value from command-line argument or prompt user for input.""" 7 | def convert(value): 8 | return expected_type(value) 9 | 10 | if index: 11 | try: 12 | return convert(sys.argv[index]) 13 | except (IndexError, ValueError): 14 | pass 15 | 16 | while True: 17 | try: 18 | value = input(prompt).strip() 19 | if value: 20 | return convert(value) 21 | except ValueError: 22 | pass 23 | except KeyboardInterrupt: 24 | print("\nKeyboard interrupt detected. Exiting...") 25 | sys.exit(0) 26 | 27 | def args_exists() -> bool: 28 | """Check if command-line arguments exist.""" 29 | return len(sys.argv) > 1 30 | 31 | # CORE 32 | 33 | def get_arg(index: int, default: str = None) -> str: 34 | """Retrieve command-line argument at a given index.""" 35 | if index < 0: 36 | raise ValueError("Index cannot be negative") 37 | if len(sys.argv) > index: 38 | return sys.argv[index] 39 | return default 40 | 41 | def ask_confirmation(prompt: str = "Do you want to continue? (y/n): ") -> bool: 42 | """Prompt the user for confirmation and return boolean response.""" 43 | response = input(prompt).lower() 44 | return response in ['y', 'yes'] 45 | 46 | def clear_screen() -> None: 47 | """Clear the terminal screen based on the operating system.""" 48 | os_name = os.name 49 | if os_name == 'nt': 50 | os.system('cls') 51 | elif os_name == 'posix': 52 | os.system('clear') 53 | else: 54 | raise NotImplementedError("Unsupported operating system") -------------------------------------------------------------------------------- /docs/user/funcs/user_activity.md: -------------------------------------------------------------------------------- 1 |

get_activity()

2 | 3 | ```python 4 | from letterboxdpy.user import User 5 | user_instance = User("nmcassa") 6 | print(user_instance.get_activity()) 7 | ``` 8 | 9 |
10 | Click to expand the demo response for get_activity method 11 | 12 | ```json 13 | { 14 | "metadata": { 15 | "export_timestamp": "2025-08-28T17:31:22.001861", 16 | "source_url": "https://letterboxd.com/ajax/activity-pagination/nmcassa", 17 | "total_activities": 3 18 | }, 19 | "activities": { 20 | "9659817024": { 21 | "activity_type": "review", 22 | "timestamp": "2025-08-24T14:40:23.000000Z", 23 | "content": { 24 | "action": "watched", 25 | "description": "nmcassa watched and rated The Matrix ★★★★★", 26 | "movie": { 27 | "title": "The Matrix", 28 | "year": 1999, 29 | "slug": "the-matrix", 30 | "url": "https://letterboxd.com/film/the-matrix/" 31 | }, 32 | "rating": 5.0 33 | } 34 | }, 35 | "9624102431": { 36 | "activity_type": "basic", 37 | "timestamp": "2025-08-19T16:49:13.000000Z", 38 | "content": { 39 | "action": "liked", 40 | "description": "nmcassa liked Ben Wold's review of Superman", 41 | "movie": { 42 | "title": "Superman", 43 | "slug": "superman", 44 | "url": "https://letterboxd.com/film/superman/" 45 | } 46 | } 47 | }, 48 | "9624100380": { 49 | "activity_type": "basic", 50 | "timestamp": "2025-08-19T16:48:50.000000Z", 51 | "content": { 52 | "action": "added", 53 | "description": "nmcassa added The Substance to their watchlist", 54 | "movie": { 55 | "title": "The Substance", 56 | "slug": "the-substance", 57 | "url": "https://letterboxd.com/film/the-substance/" 58 | } 59 | } 60 | } 61 | } 62 | } 63 | ``` 64 |
-------------------------------------------------------------------------------- /letterboxdpy/url.py: -------------------------------------------------------------------------------- 1 | def get_live_feed_url() -> str: 2 | # total watches and last reviews 3 | return "https://letterboxd.com/csi/films-live-feed/" 4 | 5 | def get_metadata_url() -> str: 6 | return "https://letterboxd.com/ajax/letterboxd-metadata/" 7 | 8 | # -- FILM -- 9 | 10 | def get_popular_lists_url(film_slug: str) -> str: 11 | # top lists 12 | return f"https://letterboxd.com/csi/film/{film_slug}/popular-lists/" 13 | 14 | def get_recent_reviews_url(film_slug: str) -> str: 15 | # last reviews 16 | return f"https://letterboxd.com/csi/film/{film_slug}/recent-reviews/" 17 | 18 | def get_rating_histogram_url(film_slug: str) -> str: 19 | # fan count and ratings 20 | return f"https://letterboxd.com/csi/film/{film_slug}/rating-histogram/" 21 | 22 | def get_user_actions_url(film_slug: str) -> str: 23 | return f"https://letterboxd.com/csi/film/{film_slug}/sidebar-user-actions/" 24 | 25 | def get_stats_url(film_slug: str) -> str: 26 | # watches, lists and likes 27 | return f"https://letterboxd.com/csi/film/{film_slug}/stats/" 28 | 29 | def get_news_url(film_slug: str) -> str: 30 | # posts: journal, video, etc. 31 | return f"https://letterboxd.com/csi/film/{film_slug}/news/" 32 | 33 | def get_availability_url(film_slug: str) -> str: 34 | # trailer and services 35 | return f"https://letterboxd.com/csi/film/{film_slug}/availability/" 36 | 37 | """ 38 | # -- USER -- 39 | 40 | def get_user_homepage_url() -> str: 41 | return "https://letterboxd.com/ajax/user-homepage/" 42 | 43 | def get_friend_reviews_url(film_slug: str) -> str: 44 | return f"https://letterboxd.com/csi/film/{film_slug}/friend-reviews/" 45 | 46 | def get_friend_activity_url(film_slug: str) -> str: 47 | return f"https://letterboxd.com/csi/film/{film_slug}/friend-activity/" 48 | 49 | def get_own_reviews_url(film_slug: str) -> str: 50 | return f"https://letterboxd.com/csi/film/{film_slug}/own-reviews/" 51 | 52 | def get_likes_reviews_url(film_slug: str) -> str: 53 | return "https://letterboxd.com/csi/film/{film_slug}/liked-reviews/" 54 | """ -------------------------------------------------------------------------------- /letterboxdpy/utils/utils_file.py: -------------------------------------------------------------------------------- 1 | import os 2 | from json import dump as json_dump 3 | from typing import Union 4 | 5 | 6 | def save_data(path: str, data: dict, format: str = 'json') -> None: 7 | """Save data to a file in the specified format.""" 8 | if format == 'json': 9 | save_json(path, data) 10 | else: 11 | raise ValueError(f"Unsupported format '{format}'. Only 'json' is currently supported.") 12 | 13 | def check_and_create_dirs(directories: Union[list, str]) -> None: 14 | """Checks if directories exist, creates them if not.""" 15 | if isinstance(directories, str): 16 | directories = [directories] 17 | 18 | print('\nChecking directories...') 19 | for directory in directories: 20 | create_directory(directory) 21 | print('\tAll directories checked, continuing...', end='\n\n') 22 | 23 | def save_json(path: str, data: dict) -> None: 24 | """Save data to a file as JSON.""" 25 | with open(f'{path}.json', 'w') as f: 26 | json_dump(data, f, indent=2) 27 | 28 | def create_directory(directory: str) -> None: 29 | """Creates a directory if it does not exist.""" 30 | try: 31 | if not os.path.exists(directory): 32 | print(f'\tCreating {directory}') 33 | os.makedirs(directory, exist_ok=True) 34 | else: 35 | print(f'\tFound {directory}') 36 | except OSError as e: 37 | print(f"\tError creating {directory}: {e}") 38 | 39 | def build_path(*segments: str, normalize: bool = True) -> str: 40 | """Build and format file paths from the given segments.""" 41 | path = os.path.join(*segments) 42 | if normalize: 43 | return os.path.normpath(path) 44 | return path 45 | 46 | def build_click_url(file_path: str, protocol: str = 'file') -> str: 47 | """Build a clickable file URL with the specified protocol.""" 48 | if protocol == 'file': 49 | return f"file:///{build_path(os.getcwd(), file_path).replace(os.sep, '/')}" 50 | elif protocol in ['http', 'https']: 51 | return f"{protocol}://{file_path}" 52 | else: 53 | raise ValueError(f"Unsupported protocol '{protocol}'") -------------------------------------------------------------------------------- /docs/user/funcs/user_reviews.md: -------------------------------------------------------------------------------- 1 | 2 |

user_reviews(user object)

3 | 4 | ```python 5 | from letterboxdpy import user 6 | user_instance = user.User("nmcassa") 7 | print(user.user_reviews(user_instance)) 8 | ``` 9 | 10 |
11 | Click to expand user_reviews method response 12 | 13 | ```json 14 | { 15 | "reviews": { 16 | "495592379": { 17 | "movie": { 18 | "name": "Poor Things", 19 | "slug": "poor-things-2023", 20 | "id": "710352", 21 | "release": 2023, 22 | "link": "https://letterboxd.com/film/poor-things-2023/" 23 | }, 24 | "type": "Watched", 25 | "no": 0, 26 | "link": "https://letterboxd.com/nmcassa/film/poor-things-2023/", 27 | "rating": 6, 28 | "review": { 29 | "content": "It looks like AI art and weird movie", 30 | "spoiler": false 31 | }, 32 | "date": { 33 | "year": 2023, 34 | "month": 12, 35 | "day": 26 36 | }, 37 | "page": 1 38 | }, 39 | "152420824": { 40 | "movie": { 41 | "name": "I'm Thinking of Ending Things", 42 | "slug": "im-thinking-of-ending-things", 43 | "id": "430806", 44 | "release": 2020, 45 | "link": "https://letterboxd.com/film/im-thinking-of-ending-things/" 46 | }, 47 | "type": "Watched", 48 | "no": 0, 49 | "link": "https://letterboxd.com/nmcassa/film/im-thinking-of-ending-things/", 50 | "rating": 8, 51 | "review": { 52 | "content": "yeah i dont get it", 53 | "spoiler": false 54 | }, 55 | "date": { 56 | "year": 2021, 57 | "month": 2, 58 | "day": 14 59 | }, 60 | "page": 1 61 | } 62 | }, 63 | "count": 7, 64 | "last_page": 1 65 | } 66 | ``` 67 |
-------------------------------------------------------------------------------- /letterboxdpy/avatar.py: -------------------------------------------------------------------------------- 1 | class Avatar: 2 | """Class to manage avatar URLs and upscale them if necessary.""" 3 | 4 | # Default upscale size 5 | UPSCALE_SIZE = (1000, 1000) 6 | # List of default sizes to check against 7 | DEFAULT_SIZES = [(80, 80), (220, 220)] 8 | 9 | def __init__(self, url: str) -> None: 10 | """Initialize Avatar with the provided URL.""" 11 | self.top_level = url.split('.')[0].split('//')[1] 12 | # Top levels: avatar:a, statics:s, secure 13 | self.avatar_exists = self.top_level == 'a' 14 | # Storing the URL without query parameters if the avatar exists 15 | self.url = url.split('?')[0] if self.avatar_exists else url 16 | # Initializing data dictionary with the initial state 17 | self.data = { 18 | 'exists': self.avatar_exists, 19 | 'upscaled': False, 20 | 'url': self.url 21 | } 22 | # Storing a copy of data for internal use 23 | self._upscaled_data = self.data.copy() 24 | 25 | @property 26 | def upscaled_data(self) -> dict: 27 | """Return upscaled avatar data if applicable.""" 28 | if self.avatar_exists: 29 | for default_size in self.DEFAULT_SIZES: 30 | pattern_default = '-0-'.join(map(str, default_size)) 31 | # If a match is found, update the data with upscaled information 32 | if pattern_default in self.url: 33 | pattern_upscale = '-0-'.join(map(str, self.UPSCALE_SIZE)) 34 | self._upscaled_data.update({ 35 | 'upscaled': True, 36 | 'url': self.url.replace(pattern_default, pattern_upscale) 37 | }) 38 | return self._upscaled_data 39 | 40 | 41 | if __name__ == '__main__': 42 | try: 43 | print(Avatar('https://unknown.example.com/test.png').upscaled_data) 44 | print(Avatar('https://s.example.com/a/0-220-0-220.png').upscaled_data) 45 | print(Avatar('https://a.example.com/a/0-220-0-220.png').upscaled_data) 46 | print(Avatar('https://a.example.com/a/0-80-0-80.png').upscaled_data) 47 | except Exception as e: 48 | raise RuntimeError(f"An error occurred: {e}") 49 | -------------------------------------------------------------------------------- /letterboxdpy/members.py: -------------------------------------------------------------------------------- 1 | if __loader__.name == '__main__': 2 | import sys 3 | sys.path.append(sys.path[0] + '/..') 4 | 5 | from json import ( 6 | dump as json_dump, 7 | dumps as json_dumps, 8 | loads as json_loads 9 | ) 10 | import re 11 | from typing import List 12 | from letterboxdpy.core.encoder import Encoder 13 | from letterboxdpy.core.scraper import parse_url 14 | 15 | 16 | class Members: 17 | """Class for handling member data from Letterboxd.""" 18 | 19 | MEMBERS_YEAR_TOP = "https://letterboxd.com/members/popular/this/year/" 20 | MEMBERS_PER_PAGE = 30 21 | 22 | def __init__(self, url: str = ""): 23 | """Initialize Members with the base URL.""" 24 | self.url = url 25 | 26 | def self_check_value(self, value: str) -> None: 27 | """Check if the value contains only valid characters.""" 28 | if not re.match("^[A-Za-z0-9_]+$", value): 29 | raise ValueError(f"Invalid {self.__class__.__name__}") 30 | 31 | def __str__(self) -> str: 32 | """Return a JSON string representation of the instance.""" 33 | return json_dumps(self, indent=2, cls=Encoder) 34 | 35 | def jsonify(self) -> dict: 36 | """Convert the instance to a JSON dictionary.""" 37 | return json_loads(self.__str__()) 38 | 39 | # -- FUNCTIONS -- 40 | 41 | def top_users(max:int = 100) -> List: 42 | """Fetch the top n members from the Letterboxd popular members page.""" 43 | # max 256 page? 44 | members_instance = Members() 45 | 46 | data = [] 47 | page = 1 48 | while True: 49 | url = f"{members_instance.MEMBERS_YEAR_TOP}page/{page}/" 50 | dom = parse_url(url) 51 | 52 | table = dom.find_all('table', {"class": ["member-table"]})[0] 53 | avatars = table.find_all("a", {"class": ["avatar -a40"]}) 54 | 55 | for avatar in avatars: 56 | user_url = avatar['href'] 57 | user_name = user_url.replace('/', '') 58 | data.append(user_name) 59 | 60 | if len(data) >= max: 61 | return data 62 | 63 | if len(avatars) < members_instance.MEMBERS_PER_PAGE: 64 | break 65 | 66 | page += 1 67 | 68 | return data 69 | 70 | if __name__=="__main__": 71 | data = top_users(max=200) 72 | with open(f'top_members_{len(data)}.json', 'w') as f: 73 | json_dump(data, f, indent=2) -------------------------------------------------------------------------------- /letterboxdpy/pages/user_tags.py: -------------------------------------------------------------------------------- 1 | from letterboxdpy.core.scraper import parse_url 2 | from letterboxdpy.constants.project import DOMAIN 3 | 4 | 5 | class UserTags: 6 | 7 | def __init__(self, username: str) -> None: 8 | self.username = username 9 | self.url = f"{DOMAIN}/{self.username}/tags" 10 | self.films_url = f"{DOMAIN}/{self.username}/tags/films" 11 | self.diary_url = f"{DOMAIN}/{self.username}/tags/diary" 12 | self.reviews_url = f"{DOMAIN}/{self.username}/tags/reviews" 13 | self.lists_url = f"{DOMAIN}/{self.username}/tags/lists" 14 | 15 | def get_user_tags(self) -> dict: return extract_user_tags(self.url) 16 | 17 | def extract_user_tags(url: str) -> dict: 18 | BASE_URL = url 19 | PAGES = ['films', 'diary', 'reviews', 'lists'] 20 | 21 | def extract_tags(page: str) -> dict: 22 | """Extract tags from the page.""" 23 | 24 | def fetch_dom() -> any: 25 | """Fetch and return the DOM for the page.""" 26 | return parse_url(f"{BASE_URL}/{page}") 27 | 28 | def parse_tag(tag) -> dict: 29 | """Extract tag information from a single tag element.""" 30 | name = tag.a.text.strip() 31 | title = tag.a['title'] 32 | link = tag.a['href'] 33 | slug = link.split('/')[-3] 34 | count = int(tag.span.text.strip() or 1) 35 | return { 36 | 'name': name, 37 | 'title': title, 38 | 'slug': slug, 39 | 'link': DOMAIN + link, 40 | 'count': count, 41 | } 42 | 43 | dom = fetch_dom() 44 | tags_ul = dom.find("ul", {"class": "tags-columns"}) 45 | data = {} 46 | 47 | if not tags_ul: 48 | return data 49 | 50 | tags = tags_ul.find_all("li") 51 | index = 1 52 | for tag in tags: 53 | if 'href' in tag.a.attrs: 54 | tag_info = parse_tag(tag) 55 | tag_info['no'] = index 56 | data[tag_info['slug']] = tag_info 57 | index += 1 58 | 59 | return data 60 | 61 | data = {} 62 | for page in PAGES: 63 | tags = extract_tags(page) 64 | data[page] = { 65 | 'tags': tags, 66 | 'count': len(tags) 67 | } 68 | 69 | data['total_count'] = sum(data[page]['count'] for page in PAGES) 70 | 71 | return data 72 | -------------------------------------------------------------------------------- /examples/follow_stats.py: -------------------------------------------------------------------------------- 1 | """ 2 | Letterboxd Follow Statistics Analyzer 3 | 4 | Analyzes follow relationships and statistics for Letterboxd users. 5 | - Calculate followback ratios and mutual follows 6 | - Identify fans and non-followback accounts 7 | - Generate detailed follow statistics 8 | - JSON output format 9 | """ 10 | 11 | import sys 12 | from json import dumps as json_dumps 13 | 14 | from letterboxdpy import user 15 | from letterboxdpy.utils.utils_terminal import get_input, args_exists 16 | 17 | class FollowStatsAnalyzer: 18 | """Analyze follow statistics for Letterboxd users.""" 19 | 20 | def __init__(self, username: str): 21 | self.username = username 22 | self.user_instance = user.User(username) 23 | 24 | def analyze(self) -> dict: 25 | """Analyze follow statistics for the user.""" 26 | followers = self.user_instance.get_followers() 27 | following = self.user_instance.get_following() 28 | return self._calculate_stats(following, followers) 29 | 30 | def _calculate_stats(self, following: dict, followers: dict) -> dict: 31 | """Calculate follow statistics from followers and following data.""" 32 | following_set = set(following.keys()) 33 | followers_set = set(followers.keys()) 34 | 35 | not_followback = list(following_set - followers_set) 36 | followback = list(following_set & followers_set) 37 | fans = list(followers_set - following_set) 38 | 39 | return { 40 | 'summary': { 41 | 'total_following': len(following_set), 42 | 'total_followers': len(followers_set), 43 | 'mutual_follows': len(followback), 44 | 'not_followback_count': len(not_followback), 45 | 'fans_count': len(fans), 46 | 'followback_ratio': round(len(followback) / len(following_set) * 100, 2) if following_set else 0 47 | }, 48 | 'details': { 49 | 'not_followback': not_followback, 50 | 'followback': followback, 51 | 'fans': fans 52 | } 53 | } 54 | 55 | if __name__ == "__main__": 56 | if not args_exists(): 57 | print(f'Quick usage: python {sys.argv[0]} ') 58 | 59 | username = get_input("Enter username: ", index=1) 60 | analyzer = FollowStatsAnalyzer(username) 61 | stats = analyzer.analyze() 62 | print(json_dumps(stats, indent=4)) -------------------------------------------------------------------------------- /examples/search_and_export_lists.py: -------------------------------------------------------------------------------- 1 | """ 2 | Letterboxd List Search and Export Tool 3 | 4 | Searches for Letterboxd lists and exports them to CSV files. 5 | - Search lists by query 6 | - Export multiple lists to CSV format 7 | - Automatic directory creation 8 | - Batch processing support 9 | """ 10 | 11 | import sys 12 | import os 13 | import csv 14 | 15 | from letterboxdpy.search import Search 16 | from letterboxdpy.list import List 17 | from letterboxdpy.utils.utils_terminal import get_input, args_exists 18 | 19 | def save_results_to_csv(list_instance: List, csv_file: str) -> None: 20 | """Saves movie list results to a CSV file.""" 21 | directory = os.path.join(os.getcwd(), 'exports', 'lists') 22 | if not os.path.exists(directory): 23 | os.makedirs(directory) 24 | 25 | file_name = os.path.join(directory, csv_file) 26 | 27 | with open(file_name, mode='w', newline='', encoding='utf-8') as file: 28 | writer = csv.writer(file) 29 | writer.writerow(['LetterboxdURI', 'Title']) 30 | movies = list_instance.movies 31 | for movie_data in movies.values(): 32 | writer.writerow([movie_data['url'], movie_data['name']]) 33 | print(f"Data successfully saved to {csv_file}. Movies: {len(movies)}") 34 | 35 | if __name__ == "__main__": 36 | if not args_exists(): 37 | print(f'Quick usage: python {sys.argv[0]} ') 38 | 39 | search_query = get_input("Enter your search query for lists: ", index=1) 40 | 41 | search_instance = Search(search_query, "lists") 42 | search_data = search_instance.results 43 | 44 | if search_data['available']: 45 | results = search_data['results'] 46 | search_count = search_data['count'] 47 | 48 | print(f'Found {search_count} lists. ') 49 | max_lists = get_input('How many to export? (0 for all): ', index=2, expected_type=int) 50 | 51 | if max_lists == 0: 52 | max_lists = search_count 53 | 54 | print(f'Exporting first {max_lists} lists...') 55 | results = results[:max_lists] 56 | 57 | for result in results: 58 | list_slug = result['slug'] 59 | list_owner_username = result['owner']['username'] 60 | 61 | list_instance = List(list_owner_username, list_slug) 62 | csv_filename = f"{list_owner_username}_{list_slug}.csv" 63 | save_results_to_csv(list_instance, csv_filename) 64 | else: 65 | print(f'No lists found for "{search_query}".') -------------------------------------------------------------------------------- /letterboxdpy/watchlist.py: -------------------------------------------------------------------------------- 1 | if __loader__.name == '__main__': 2 | import sys 3 | sys.path.append(sys.path[0] + '/..') 4 | 5 | import re 6 | from json import ( 7 | dumps as json_dumps, 8 | loads as json_loads 9 | ) 10 | 11 | from letterboxdpy.core.encoder import SecretsEncoder 12 | from letterboxdpy.pages import user_watchlist 13 | from letterboxdpy.core.exceptions import PrivateRouteError 14 | 15 | 16 | class Watchlist: 17 | 18 | class WatchlistPages: 19 | 20 | def __init__(self, username: str) -> None: 21 | self.watchlist = user_watchlist.UserWatchlist(username) 22 | 23 | def __init__(self, username: str) -> None: 24 | assert re.match("^[A-Za-z0-9_]+$", username), "Invalid author" 25 | 26 | self.username = username 27 | self.pages = self.WatchlistPages(self.username) 28 | 29 | self.url = self.get_url() 30 | self.count = self.get_count() 31 | 32 | self._movies = None 33 | 34 | # Properties 35 | @property 36 | def movies(self) -> dict: 37 | if self._movies is None: 38 | self._movies = self.get_movies() 39 | return self._movies 40 | 41 | # Magic Methods 42 | def __len__(self) -> int: 43 | return self.count 44 | 45 | def __str__(self) -> str: 46 | return json_dumps(self, indent=2, cls=SecretsEncoder, secrets=['pages']) 47 | 48 | def jsonify(self) -> dict: 49 | return json_loads(self.__str__()) 50 | 51 | # Data Retrieval Methods 52 | def get_owner(self): ... 53 | def get_url(self) -> str: return self.pages.watchlist.url 54 | def get_count(self) -> int: return self.pages.watchlist.get_count() 55 | def get_movies(self) -> dict: return self.pages.watchlist.get_movies() 56 | 57 | 58 | if __name__ == "__main__": 59 | import argparse 60 | import sys 61 | 62 | sys.stdout.reconfigure(encoding='utf-8') 63 | 64 | parser = argparse.ArgumentParser(description="Fetch a user's watchlist.") 65 | parser.add_argument('--user', '-u', help="Username to fetch watchlist for", required=False) 66 | args = parser.parse_args() 67 | 68 | username = args.user or input('Enter username: ').strip() 69 | 70 | while not username: 71 | username = input('Please enter a valid username: ').strip() 72 | 73 | print(f"Fetching watchlist for username: {username}") 74 | 75 | # Watchlist usage: 76 | watchlist_instance = Watchlist(username) 77 | print(watchlist_instance) 78 | try: 79 | print('URL:', watchlist_instance.url) 80 | print('Count:', watchlist_instance.count) 81 | print('Movies:', watchlist_instance.movies) 82 | except PrivateRouteError: 83 | print(f"Error: User's watchlist is private.") 84 | -------------------------------------------------------------------------------- /docs/user/funcs/user_liked_reviews.md: -------------------------------------------------------------------------------- 1 |

user_liked_reviews(user object)

2 | 3 | ```python 4 | from letterboxdpy import user 5 | user_instance = user.User("nmcassa") 6 | print(user.user_liked_reviews(user_instance)) 7 | ``` 8 | 9 |
10 | Click to expand user_liked_reviews method response 11 | 12 | ```json 13 | { 14 | "reviews": { 15 | "666730921": { 16 | "type": "Rewatched", 17 | "no": 0, 18 | "url": "https://letterboxd.com/ppark/film/mean-girls/", 19 | "rating": 8, 20 | "review": { 21 | "content": "Refreshing", 22 | "spoiler": false, 23 | "date": { 24 | "year": 2024, 25 | "month": 9, 26 | "day": 7 27 | } 28 | }, 29 | "user": { 30 | "username": "ppark", 31 | "display_name": "ppark", 32 | "url": "https://letterboxd.com/ppark/" 33 | }, 34 | "movie": { 35 | "name": "Mean Girls", 36 | "slug": "mean-girls", 37 | "id": "46049", 38 | "release": 2004, 39 | "url": "https://letterboxd.com/film/mean-girls/" 40 | }, 41 | "page": 1 42 | }, 43 | ... 44 | "80658991": { 45 | "type": "Added", 46 | "no": 0, 47 | "url": "https://letterboxd.com/kurstboy/film/the-departed/", 48 | "rating": 9, 49 | "review": { 50 | "content": "Great way to end my Scorsese binge!That final shot is perfect and the whole third act feels tight as hell. The entire film is rich with interesting approaches to the subject matter which is fitting for a plot that grabs your attention within the first 5 minutes. Scorsese is just spitballing here and throwing every idea at the wall, his love for filmmaking shines brighter here than in something like Hugo. Don't know what to add to the table\u2026", 51 | "spoiler": false, 52 | "date": { 53 | "year": 2019, 54 | "month": 11, 55 | "day": 24 56 | } 57 | }, 58 | "user": { 59 | "username": "Kurstboy", 60 | "display_name": "Karsten", 61 | "url": "https://letterboxd.com/kurstboy/" 62 | }, 63 | "movie": { 64 | "name": "The Departed", 65 | "slug": "the-departed", 66 | "id": "51042", 67 | "release": 2006, 68 | "url": "https://letterboxd.com/film/the-departed/" 69 | }, 70 | "page": 2 71 | } 72 | } 73 | } 74 | ``` 75 |
-------------------------------------------------------------------------------- /letterboxdpy/pages/user_activity.py: -------------------------------------------------------------------------------- 1 | from letterboxdpy.core.scraper import parse_url 2 | from letterboxdpy.constants.project import DOMAIN 3 | from letterboxdpy.utils.date_utils import DateUtils 4 | from letterboxdpy.utils.activity_extractor import ( 5 | parse_activity_datetime, build_time_data, get_event_type, get_log_title, 6 | get_log_type, process_review_activity, process_basic_activity, 7 | process_newlist_activity, get_log_item_slug 8 | ) 9 | 10 | 11 | class UserActivity: 12 | 13 | def __init__(self, username: str) -> None: 14 | self.username = username 15 | self._base_url = f"{DOMAIN}/ajax/activity-pagination/{self.username}" 16 | 17 | # Activity endpoints 18 | self.activity_url = self._base_url 19 | self.activity_following_url = f"{self._base_url}/following" 20 | 21 | def get_activity(self) -> dict: return extract_activity(self.activity_url) 22 | def get_activity_following(self) -> dict: return extract_activity(self.activity_following_url) 23 | 24 | def extract_activity(ajax_url: str) -> dict: 25 | 26 | def _process_log(section, event_type) -> dict: 27 | """Process activity log and extract data.""" 28 | log_id = section["data-activity-id"] 29 | date = parse_activity_datetime(section.find("time")['datetime']) 30 | log_title = get_log_title(section) 31 | log_type = get_log_type(event_type, section) 32 | log_item_slug = get_log_item_slug(event_type, section) 33 | 34 | # Build activity data structure 35 | log_data = { 36 | 'activity_type': event_type, 37 | 'timestamp': build_time_data(date), 38 | 'content': {} 39 | } 40 | 41 | # Process content by activity type 42 | if event_type == 'review': 43 | content_data = process_review_activity(section, log_type, log_item_slug) 44 | log_data['content'] = content_data 45 | elif event_type == 'basic': 46 | content_data = process_basic_activity(section, log_title, log_type, log_item_slug) 47 | log_data['content'] = content_data 48 | elif event_type == 'newlist': 49 | content_data = process_newlist_activity(section, log_title, log_type) 50 | log_data['content'] = content_data 51 | 52 | return {log_id: log_data} 53 | 54 | from datetime import datetime 55 | 56 | data = { 57 | 'metadata': { 58 | 'export_timestamp': DateUtils.format_to_iso(datetime.now()), 59 | 'source_url': ajax_url, 60 | 'total_activities': 0 61 | }, 62 | 'activities': {} 63 | } 64 | 65 | dom = parse_url(ajax_url) 66 | sections = dom.find_all("section") 67 | 68 | if not sections: 69 | return data 70 | 71 | for section in sections: 72 | event_type = get_event_type(section) 73 | if event_type in ('review', 'basic', 'newlist'): 74 | log_data = _process_log(section, event_type) 75 | data['activities'].update(log_data) 76 | data['metadata']['total_activities'] = len(data['activities']) 77 | elif 'no-activity-message' in section['class']: 78 | break 79 | 80 | return data -------------------------------------------------------------------------------- /docs/check_docs.py: -------------------------------------------------------------------------------- 1 | if __loader__.name == '__main__': 2 | import sys 3 | sys.path.append(sys.path[0] + '/..') 4 | 5 | import os 6 | import inspect 7 | from letterboxdpy import user, movie, films, members, search, list 8 | 9 | def get_defined_functions(module): 10 | """Returns a list of function names defined in the given module.""" 11 | functions = [] 12 | for name, obj in inspect.getmembers(module): 13 | if inspect.isfunction(obj) and inspect.getmodule(obj) == module: 14 | functions.append(name) 15 | return functions 16 | 17 | def get_existing_md_files(directory): 18 | """Returns a list of .md files in the given directory without extension.""" 19 | md_files = [f[:-3] for f in os.listdir(directory) if f.endswith('.md')] 20 | return md_files 21 | 22 | def check_missing_md_files(functions, md_files): 23 | """Compares functions and .md files, returning functions without corresponding .md files.""" 24 | missing_md = [func for func in functions if func not in md_files] 25 | return missing_md 26 | 27 | def create_md_file_for_missing_function(func_name, module, directory): 28 | """Creates a .md file for a missing function with its signature.""" 29 | file_path = os.path.join(directory, f"{func_name}.md") 30 | func = getattr(module, func_name) 31 | 32 | signature = str(inspect.signature(func)) 33 | docstring = inspect.getdoc(func) or "No documentation provided." 34 | 35 | with open(file_path, 'w') as file: 36 | file.write(f'

{func_name}{signature}

\n\n') 37 | file.write(f'**Documentation:**\n\n{docstring}\n\n') 38 | file.write(f'[To be documented.](https://github.com/search?q=repo:nmcassa/letterboxdpy+{func_name})\n') 39 | 40 | def check_modules_for_missing_md(modules): 41 | """Checks each module for missing .md files and prints the results.""" 42 | base_directory = "." 43 | for module_name, module in modules.items(): 44 | print(f"{module_name}:") 45 | function_names = get_defined_functions(module) 46 | md_directory = os.path.join(base_directory, module_name, 'funcs') 47 | 48 | if not os.path.exists(md_directory): 49 | print(f"Directory {md_directory} does not exist. Creating...") 50 | os.makedirs(md_directory, exist_ok=True) 51 | 52 | md_files = get_existing_md_files(md_directory) 53 | missing_md_files = check_missing_md_files(function_names, md_files) 54 | 55 | for func in missing_md_files: 56 | create_md_file_for_missing_function(func, module, md_directory) 57 | print(f"✗ {func}.md missing and created.") 58 | 59 | for func in function_names: 60 | if func in md_files: 61 | print(f"✓ {func}.md exists") 62 | 63 | if not missing_md_files: 64 | print("All functions have corresponding .md files.") 65 | print() 66 | 67 | if __name__ == "__main__": 68 | modules = { 69 | 'user': user, 70 | 'movie': movie, 71 | 'films': films, 72 | 'members': members, 73 | 'search': search, 74 | 'list': list 75 | } 76 | check_modules_for_missing_md(modules) 77 | -------------------------------------------------------------------------------- /examples/export_user_data.py: -------------------------------------------------------------------------------- 1 | """ 2 | Letterboxd User Data Exporter 3 | 4 | Exports comprehensive user data from Letterboxd profiles. 5 | - Export all user data (films, reviews, lists, followers, etc.) 6 | - Automatic JSON file generation 7 | - Organized directory structure 8 | - Progress tracking and timing 9 | """ 10 | 11 | import time 12 | import sys 13 | import os 14 | 15 | from letterboxdpy import user 16 | from letterboxdpy.utils.utils_string import strip_prefix 17 | from letterboxdpy.utils.utils_terminal import get_input 18 | from letterboxdpy.utils.utils_file import build_path, check_and_create_dirs, save_json, build_click_url 19 | 20 | # -- MAIN -- 21 | 22 | username = get_input('Enter username: ', index=1) 23 | user_instance = user.User(username) 24 | 25 | current_directory = os.getcwd() 26 | 27 | # Export directories 28 | EXAMPLES_DIR = build_path(current_directory, 'examples') 29 | EXPORTS_DIR = build_path(EXAMPLES_DIR, 'exports') 30 | USERS_FOLDER = build_path(EXPORTS_DIR, 'users') 31 | USER_FOLDER = build_path(USERS_FOLDER, user_instance.username) 32 | directories = [EXAMPLES_DIR, EXPORTS_DIR, USERS_FOLDER, USER_FOLDER] 33 | check_and_create_dirs(directories) 34 | 35 | start_time = time.time() 36 | 37 | # Save user instance data 38 | user_data_path = build_path(USER_FOLDER, 'user') 39 | save_json(user_data_path, user_instance.jsonify()) 40 | 41 | # Export data for each method 42 | # If you want to add a new method, add it here 43 | # With arg: [user.User.user_watchlist, {'filters': {'genre': ['action', '-drama']}}], 44 | methods = [ 45 | user.User.get_activity, 46 | user.User.get_activity_following, 47 | user.User.get_diary, 48 | user.User.get_wrapped, 49 | user.User.get_films, 50 | [user.User.get_films_by_rating, {'rating':5}], 51 | user.User.get_films_not_rated, 52 | user.User.get_genre_info, 53 | user.User.get_liked_films, 54 | user.User.get_liked_reviews, 55 | user.User.get_lists, 56 | user.User.get_following, 57 | user.User.get_followers, 58 | user.User.get_reviews, 59 | user.User.get_user_tags, 60 | user.User.get_watchlist_movies, 61 | user.User.get_watchlist, 62 | ] 63 | methods_str_length = len(str(len(methods))) 64 | 65 | print('\nExporting data...') 66 | for no, method in enumerate(methods, 1): 67 | method_start_time = time.time() 68 | 69 | args = {} 70 | if isinstance(method, list): 71 | method, args = method 72 | 73 | method_name = method.__name__ 74 | method_name_without_prefix = strip_prefix(method_name) 75 | 76 | os.system(f'title [{len(methods)}/{no:0>{methods_str_length}}] Exporting {method_name}...') 77 | print(f'[{len(methods)}/{no:0>{methods_str_length}}]: Processing "{method_name}" method', 78 | end=f' with args: {args}...\r' if args else '...\r') 79 | 80 | data = method(user_instance, **args) if args else method(user_instance) 81 | 82 | file_path = build_path(USER_FOLDER, method_name_without_prefix) 83 | save_json(file_path, data) 84 | 85 | print(f'{time.time() - method_start_time:<7.2f} seconds - {method_name:<22} - {build_click_url(file_path)}.json') 86 | 87 | os.system('title Completed!') 88 | print('\nProcessing complete!') 89 | print(f'\tTotal time: {time.time() - start_time:.2f} seconds') 90 | 91 | print('\tAt', build_click_url(USER_FOLDER), end='\n\n') 92 | os.system('pause') -------------------------------------------------------------------------------- /letterboxdpy/utils/movies_extractor.py: -------------------------------------------------------------------------------- 1 | """ 2 | Movie extraction utilities for different Letterboxd page layouts. 3 | 4 | This module provides generic functions to extract movie data from various 5 | Letterboxd page types that display movies in different layouts. 6 | """ 7 | 8 | def extract_movies_from_horizontal_list(dom, max_items=12*6) -> dict: 9 | """ 10 | Extract movies from horizontal movie lists. 11 | 12 | Used in: 13 | - /films/popular/, /films/genre/action/, etc. 14 | - Film discovery pages 15 | - Similar movies sections 16 | 17 | Args: 18 | dom: BeautifulSoup DOM object 19 | max_items: Maximum number of items to extract 20 | 21 | Returns: 22 | dict: Movie data with film IDs as keys 23 | """ 24 | items = dom.find_all("li") 25 | 26 | rating_key = "data-average-rating" 27 | movies = {} 28 | for item in items: 29 | if len(movies) >= max_items: 30 | break 31 | 32 | movie_rating = float(item[rating_key]) if rating_key in item.attrs else None 33 | movie_id = item.div['data-film-id'] 34 | movie_slug = item.div['data-item-slug'] 35 | movie_name = item.img['alt'] 36 | 37 | movies[movie_id] = { 38 | "slug": movie_slug, 39 | "name": movie_name, 40 | "rating": movie_rating, 41 | 'url': f'https://letterboxd.com/film/{movie_slug}/' 42 | } 43 | 44 | return movies 45 | 46 | 47 | def extract_movies_from_vertical_list(dom, max_items=20*5) -> dict: 48 | """ 49 | Extract movies from vertical movie lists. 50 | 51 | Used in: 52 | - User watchlists (/user/username/watchlist/) 53 | - User lists (/user/username/list/list-name/) 54 | - User films pages 55 | - Search results 56 | 57 | Args: 58 | dom: BeautifulSoup DOM object 59 | max_items: Maximum number of items to extract 60 | 61 | Returns: 62 | dict: Movie data with film IDs as keys 63 | """ 64 | def get_movie_data(item): 65 | """Extract movie ID, slug, and name from container element.""" 66 | from letterboxdpy.utils.utils_string import extract_year_from_movie_name, clean_movie_name 67 | 68 | react_component = item.find("div", {"class": "react-component"}) if item.name == "li" else item 69 | if not react_component or 'data-film-id' not in react_component.attrs: 70 | return None 71 | 72 | movie_id = react_component['data-film-id'] 73 | movie_slug = react_component.get('data-item-slug') or react_component.get('data-film-slug') 74 | raw_name = react_component.get('data-item-name') or react_component.img['alt'] 75 | movie_name = clean_movie_name(raw_name) 76 | year = extract_year_from_movie_name(raw_name) 77 | 78 | return movie_id, { 79 | "slug": movie_slug, 80 | "name": movie_name, 81 | "year": year, 82 | 'url': f'https://letterboxd.com/film/{movie_slug}/' 83 | } 84 | 85 | items = dom.find_all("li", {"class": "posteritem"}) or dom.find_all("li", {"class": "griditem"}) 86 | movies = {} 87 | for item in items: 88 | if len(movies) >= max_items: 89 | break 90 | 91 | movie_data = get_movie_data(item) 92 | if movie_data: 93 | movie_id, data = movie_data 94 | movies[movie_id] = data 95 | 96 | return movies 97 | -------------------------------------------------------------------------------- /docs/user/funcs/user_wrapped.md: -------------------------------------------------------------------------------- 1 |

user_wrapped(user object)

2 | 3 | ```python 4 | from letterboxdpy import user 5 | user_instance = user.User("nmcassa") 6 | print(user.user_wrapped(user_instance, 2023)) 7 | ``` 8 | 9 |
10 | Click to expand the demo response for user_wrapped method or view the full response 11 | 12 | ```json 13 | { 14 | "year": 2023, 15 | "logged": 120, 16 | "total_review": 2, 17 | "hours_watched": 223, 18 | "total_runtime": 13427, 19 | "first_watched": { 20 | "332289592": { 21 | "name": "The Gift", 22 | "slug": "the-gift-2015-1", 23 | "id": "255927", 24 | "release": 2015, 25 | "runtime": 108, 26 | "actions": { 27 | "rewatched": false, 28 | "rating": 6, 29 | "liked": false, 30 | "reviewed": false 31 | }, 32 | "date": { 33 | "year": 2023, 34 | "month": 1, 35 | "day": 1 36 | }, 37 | "page": { 38 | "url": "https://letterboxd.com/nmcassa/films/diary/for/2023/page/3/", 39 | "no": 3 40 | } 41 | } 42 | }, 43 | "last_watched": { 44 | "495592379": {...} 45 | }, 46 | "movies": { 47 | "495592379": { 48 | "name": "Poor Things", 49 | "slug": "poor-things-2023", 50 | "id": "710352", 51 | "release": 2023, 52 | "runtime": 141, 53 | "actions": { 54 | "rewatched": false, 55 | "rating": 6, 56 | "liked": false, 57 | "reviewed": true 58 | }, 59 | "date": { 60 | "year": 2023, 61 | "month": 12, 62 | "day": 26 63 | }, 64 | "page": { 65 | "url": "https://letterboxd.com/nmcassa/films/diary/for/2023/page/1/", 66 | "no": 1 67 | } 68 | },... 69 | }, 70 | "months": { 71 | "1": 21, 72 | "2": 7, 73 | "3": 7, 74 | "4": 6, 75 | "5": 11, 76 | "6": 9, 77 | "7": 15, 78 | "8": 11, 79 | "9": 5, 80 | "10": 9, 81 | "11": 7, 82 | "12": 12 83 | }, 84 | "days": { 85 | "1": 18, 86 | "2": 14, 87 | "3": 9, 88 | "4": 17, 89 | "5": 14, 90 | "6": 27, 91 | "7": 21 92 | }, 93 | "milestones": { 94 | "50": { 95 | "413604382": { 96 | "name": "Richard Pryor: Live in Concert", 97 | "slug": "richard-pryor-live-in-concert", 98 | "id": "37594", 99 | "release": 1979, 100 | "runtime": 78, 101 | "actions": { 102 | "rewatched": false, 103 | "rating": 7, 104 | "liked": false, 105 | "reviewed": false 106 | }, 107 | "date": { 108 | "year": 2023, 109 | "month": 7, 110 | "day": 13 111 | }, 112 | "page": { 113 | "url": "https://letterboxd.com/nmcassa/films/diary/for/2023/page/1/", 114 | "no": 1 115 | } 116 | } 117 | }, 118 | "100": { 119 | "347318246": {...} 120 | } 121 | } 122 | } 123 | ``` 124 |
-------------------------------------------------------------------------------- /letterboxdpy/utils/date_utils.py: -------------------------------------------------------------------------------- 1 | """Date utilities for consistent ISO 8601 format across letterboxdpy.""" 2 | 3 | from datetime import datetime 4 | 5 | 6 | class InvalidDateFormatError(Exception): 7 | """Raised when date format is not recognized.""" 8 | pass 9 | 10 | 11 | class DateUtils: 12 | """Centralized date utilities for consistent date handling.""" 13 | 14 | ISO_FORMAT = "%Y-%m-%dT%H:%M:%S.%fZ" 15 | ISO_FORMAT_NO_MICROSECONDS = "%Y-%m-%dT%H:%M:%SZ" 16 | 17 | @staticmethod 18 | def parse_letterboxd_date(date_input) -> datetime | None: 19 | """Parse various date formats into datetime object.""" 20 | if date_input is None: 21 | return None 22 | if isinstance(date_input, datetime): 23 | return date_input 24 | if isinstance(date_input, dict): 25 | return DateUtils._parse_date_dict(date_input) 26 | if isinstance(date_input, str): 27 | return DateUtils._parse_date_string(date_input) 28 | raise InvalidDateFormatError(f"Unsupported date format: {type(date_input)}") 29 | 30 | @staticmethod 31 | def _parse_date_dict(date_dict: dict) -> datetime: 32 | """Parse date dictionary format.""" 33 | year = date_dict.get('year') 34 | month = date_dict.get('month') 35 | day = date_dict.get('day') 36 | 37 | if not all(isinstance(x, int) and x is not None for x in [year, month, day]): 38 | raise InvalidDateFormatError("Invalid date dictionary") 39 | if not (1 <= month <= 12) or not (1 <= day <= 31): 40 | raise InvalidDateFormatError("Invalid date values") 41 | 42 | return datetime(year, month, day) 43 | 44 | @staticmethod 45 | def _parse_date_string(date_string: str) -> datetime: 46 | """Parse ISO date string format.""" 47 | try: 48 | return datetime.strptime(date_string, DateUtils.ISO_FORMAT) 49 | except ValueError: 50 | try: 51 | return datetime.strptime(date_string, DateUtils.ISO_FORMAT_NO_MICROSECONDS) 52 | except ValueError: 53 | raise InvalidDateFormatError(f"Invalid ISO date string: {date_string}") 54 | 55 | @staticmethod 56 | def format_to_iso(date_obj: datetime | None) -> str | None: 57 | """Format datetime object to ISO 8601 string.""" 58 | if date_obj is None: 59 | return None 60 | if not isinstance(date_obj, datetime): 61 | raise InvalidDateFormatError(f"Expected datetime object, got {type(date_obj)}") 62 | return date_obj.strftime(DateUtils.ISO_FORMAT) 63 | 64 | @staticmethod 65 | def dict_to_iso(date_dict: dict) -> str: 66 | """Convert date dictionary to ISO format string.""" 67 | date_obj = DateUtils._parse_date_dict(date_dict) 68 | return DateUtils.format_to_iso(date_obj) 69 | 70 | @staticmethod 71 | def iso_to_dict(iso_string: str) -> dict: 72 | """Convert ISO string to date dictionary.""" 73 | date_obj = DateUtils._parse_date_string(iso_string) 74 | return {'year': date_obj.year, 'month': date_obj.month, 'day': date_obj.day} 75 | 76 | @staticmethod 77 | def to_iso(date_input) -> str | None: 78 | """Convert any date format to ISO string.""" 79 | date_obj = DateUtils.parse_letterboxd_date(date_input) 80 | return DateUtils.format_to_iso(date_obj) 81 | 82 | 83 | # Backward compatibility functions 84 | def parse_activity_datetime(date_string: str) -> datetime: 85 | """Parse datetime string (backward compatibility).""" 86 | return DateUtils._parse_date_string(date_string) 87 | 88 | 89 | def build_time_data(date_obj: datetime) -> str: 90 | """Build ISO timestamp string (backward compatibility).""" 91 | return DateUtils.format_to_iso(date_obj) -------------------------------------------------------------------------------- /examples/exports/users/nmcassa/followers.json: -------------------------------------------------------------------------------- 1 | { 2 | "ramenfeedgg": { 3 | "username": "ramenfeedgg", 4 | "name": "ramenfeedgg", 5 | "url": "https://letterboxd.com/ramenfeedgg", 6 | "avatar": { 7 | "exists": false, 8 | "upscaled": false, 9 | "url": "https://s.ltrbxd.com/static/img/avatar80-CTtJ8HSs.png" 10 | }, 11 | "followers": 5, 12 | "following": 6, 13 | "watched": 171, 14 | "lists": 0, 15 | "likes": 19 16 | }, 17 | "ben24wold": { 18 | "username": "ben24wold", 19 | "name": "Ben Wold", 20 | "url": "https://letterboxd.com/ben24wold", 21 | "avatar": { 22 | "exists": true, 23 | "upscaled": true, 24 | "url": "https://a.ltrbxd.com/resized/avatar/twitter/1/7/9/3/7/5/9/shard/http___pbs.twimg.com_profile_images_1258867765965963269_SckczIvD-0-1000-0-1000-crop.jpg" 25 | }, 26 | "followers": 37, 27 | "following": 15, 28 | "watched": 662, 29 | "lists": 37, 30 | "likes": 467 31 | }, 32 | "ppark": { 33 | "username": "ppark", 34 | "name": "ppark", 35 | "url": "https://letterboxd.com/ppark", 36 | "avatar": { 37 | "exists": true, 38 | "upscaled": true, 39 | "url": "https://a.ltrbxd.com/resized/avatar/upload/5/9/0/5/5/1/0/shard/avtr-0-1000-0-1000-crop.jpg" 40 | }, 41 | "followers": 7, 42 | "following": 6, 43 | "watched": 876, 44 | "lists": 1, 45 | "likes": 372 46 | }, 47 | "ryanshubert": { 48 | "username": "ryanshubert", 49 | "name": "ryanshubert", 50 | "url": "https://letterboxd.com/ryanshubert", 51 | "avatar": { 52 | "exists": true, 53 | "upscaled": true, 54 | "url": "https://a.ltrbxd.com/resized/avatar/upload/2/3/6/9/1/6/5/shard/avtr-0-1000-0-1000-crop.jpg" 55 | }, 56 | "followers": 22, 57 | "following": 32, 58 | "watched": 1, 59 | "lists": 7, 60 | "likes": 791 61 | }, 62 | "crescendohouse": { 63 | "username": "crescendohouse", 64 | "name": "Crescendo House", 65 | "url": "https://letterboxd.com/crescendohouse", 66 | "avatar": { 67 | "exists": true, 68 | "upscaled": true, 69 | "url": "https://a.ltrbxd.com/resized/avatar/twitter/4/7/4/8/0/8/9/shard/http___pbs.twimg.com_profile_images_1373370791618830336_1qVBFR8N-0-1000-0-1000-crop.jpg" 70 | }, 71 | "followers": 322, 72 | "following": 859, 73 | "watched": 5, 74 | "lists": 1, 75 | "likes": 142 76 | }, 77 | "brendonyu668": { 78 | "username": "brendonyu668", 79 | "name": "Brendonyu668", 80 | "url": "https://letterboxd.com/brendonyu668", 81 | "avatar": { 82 | "exists": true, 83 | "upscaled": true, 84 | "url": "https://a.ltrbxd.com/resized/avatar/upload/2/3/4/2/2/3/2/shard/avtr-0-1000-0-1000-crop.jpg" 85 | }, 86 | "followers": 36, 87 | "following": 206, 88 | "watched": 1, 89 | "lists": 130, 90 | "likes": 67 91 | }, 92 | "pdrew1211": { 93 | "username": "pdrew1211", 94 | "name": "Parker Bobbitt", 95 | "url": "https://letterboxd.com/pdrew1211", 96 | "avatar": { 97 | "exists": true, 98 | "upscaled": true, 99 | "url": "https://a.ltrbxd.com/resized/avatar/upload/1/3/2/0/6/2/8/shard/avtr-0-1000-0-1000-crop.jpg" 100 | }, 101 | "followers": 777, 102 | "following": 2, 103 | "watched": 1, 104 | "lists": 27, 105 | "likes": 58 106 | }, 107 | "jordynhf": { 108 | "username": "jordynhf", 109 | "name": "jordynhf", 110 | "url": "https://letterboxd.com/jordynhf", 111 | "avatar": { 112 | "exists": true, 113 | "upscaled": true, 114 | "url": "https://a.ltrbxd.com/resized/avatar/upload/2/2/9/0/1/4/8/shard/avtr-0-1000-0-1000-crop.jpg" 115 | }, 116 | "followers": 22, 117 | "following": 23, 118 | "watched": 852, 119 | "lists": 2, 120 | "likes": 201 121 | } 122 | } -------------------------------------------------------------------------------- /examples/exports/users/nmcassa/following.json: -------------------------------------------------------------------------------- 1 | { 2 | "ramenfeedgg": { 3 | "username": "ramenfeedgg", 4 | "name": "ramenfeedgg", 5 | "url": "https://letterboxd.com/ramenfeedgg", 6 | "avatar": { 7 | "exists": false, 8 | "upscaled": false, 9 | "url": "https://s.ltrbxd.com/static/img/avatar80-CTtJ8HSs.png" 10 | }, 11 | "followers": 5, 12 | "following": 6, 13 | "watched": 171, 14 | "lists": 0, 15 | "likes": 19 16 | }, 17 | "ben24wold": { 18 | "username": "ben24wold", 19 | "name": "Ben Wold", 20 | "url": "https://letterboxd.com/ben24wold", 21 | "avatar": { 22 | "exists": true, 23 | "upscaled": true, 24 | "url": "https://a.ltrbxd.com/resized/avatar/twitter/1/7/9/3/7/5/9/shard/http___pbs.twimg.com_profile_images_1258867765965963269_SckczIvD-0-1000-0-1000-crop.jpg" 25 | }, 26 | "followers": 37, 27 | "following": 15, 28 | "watched": 662, 29 | "lists": 37, 30 | "likes": 467 31 | }, 32 | "ppark": { 33 | "username": "ppark", 34 | "name": "ppark", 35 | "url": "https://letterboxd.com/ppark", 36 | "avatar": { 37 | "exists": true, 38 | "upscaled": true, 39 | "url": "https://a.ltrbxd.com/resized/avatar/upload/5/9/0/5/5/1/0/shard/avtr-0-1000-0-1000-crop.jpg" 40 | }, 41 | "followers": 7, 42 | "following": 6, 43 | "watched": 876, 44 | "lists": 1, 45 | "likes": 372 46 | }, 47 | "joacogarcia2023": { 48 | "username": "joacogarcia2023", 49 | "name": "joacogarcia2023", 50 | "url": "https://letterboxd.com/joacogarcia2023", 51 | "avatar": { 52 | "exists": true, 53 | "upscaled": true, 54 | "url": "https://a.ltrbxd.com/resized/avatar/upload/1/0/1/4/0/6/6/7/shard/avtr-0-1000-0-1000-crop.jpg" 55 | }, 56 | "followers": 14, 57 | "following": 6, 58 | "watched": 198, 59 | "lists": 0, 60 | "likes": 1 61 | }, 62 | "ryanshubert": { 63 | "username": "ryanshubert", 64 | "name": "ryanshubert", 65 | "url": "https://letterboxd.com/ryanshubert", 66 | "avatar": { 67 | "exists": true, 68 | "upscaled": true, 69 | "url": "https://a.ltrbxd.com/resized/avatar/upload/2/3/6/9/1/6/5/shard/avtr-0-1000-0-1000-crop.jpg" 70 | }, 71 | "followers": 22, 72 | "following": 32, 73 | "watched": 1, 74 | "lists": 7, 75 | "likes": 791 76 | }, 77 | "connoreatspants": { 78 | "username": "connoreatspants", 79 | "name": "ConnorEatsPants", 80 | "url": "https://letterboxd.com/connoreatspants", 81 | "avatar": { 82 | "exists": true, 83 | "upscaled": true, 84 | "url": "https://a.ltrbxd.com/resized/avatar/upload/3/6/8/9/5/9/8/shard/avtr-0-1000-0-1000-crop.jpg" 85 | }, 86 | "followers": 44, 87 | "following": 21, 88 | "watched": 162, 89 | "lists": 0, 90 | "likes": 116 91 | }, 92 | "kurstboy": { 93 | "username": "kurstboy", 94 | "name": "Karsten", 95 | "url": "https://letterboxd.com/kurstboy", 96 | "avatar": { 97 | "exists": true, 98 | "upscaled": true, 99 | "url": "https://a.ltrbxd.com/resized/avatar/twitter/4/9/0/4/5/7/shard/http___pbs.twimg.com_profile_images_1001935353740177414_9ZQ0Noe4-0-1000-0-1000-crop.jpg" 100 | }, 101 | "followers": 225, 102 | "following": 187, 103 | "watched": 2, 104 | "lists": 60, 105 | "likes": 3 106 | }, 107 | "jordynhf": { 108 | "username": "jordynhf", 109 | "name": "jordynhf", 110 | "url": "https://letterboxd.com/jordynhf", 111 | "avatar": { 112 | "exists": true, 113 | "upscaled": true, 114 | "url": "https://a.ltrbxd.com/resized/avatar/upload/2/2/9/0/1/4/8/shard/avtr-0-1000-0-1000-crop.jpg" 115 | }, 116 | "followers": 22, 117 | "following": 23, 118 | "watched": 852, 119 | "lists": 2, 120 | "likes": 201 121 | } 122 | } -------------------------------------------------------------------------------- /letterboxdpy/list.py: -------------------------------------------------------------------------------- 1 | if __loader__.name == '__main__': 2 | import sys 3 | sys.path.append(sys.path[0] + '/..') 4 | 5 | import re 6 | from json import ( 7 | dumps as json_dumps, 8 | loads as json_loads 9 | ) 10 | 11 | from letterboxdpy.core.encoder import SecretsEncoder 12 | from letterboxdpy.pages import user_list 13 | from letterboxdpy.pages.user_list import ListMetaData 14 | 15 | 16 | class List: 17 | 18 | class ListPages: 19 | 20 | def __init__(self, username: str, slug: str) -> None: 21 | self.list = user_list.UserList(username, slug) 22 | 23 | def __init__(self, username: str, slug: str = None) -> None: 24 | assert re.match("^[A-Za-z0-9_]+$", username), "Invalid author" 25 | 26 | self.username = username.lower() 27 | self.slug = slug 28 | self.pages = self.ListPages(self.username, self.slug) 29 | 30 | self._movies = None 31 | 32 | self.url = self.get_url() 33 | self.title = self.get_title() 34 | self.author = self.get_author() 35 | self.description = self.get_description() 36 | self.date_created = self.get_date_created() 37 | self.date_updated = self.get_date_updated() 38 | self.tags = self.get_tags() 39 | self.count = self.get_count() 40 | self.list_id = self.get_list_id() 41 | 42 | # Properties 43 | @property 44 | def movies(self) -> dict: 45 | if self._movies is None: 46 | self._movies = self.get_movies() 47 | return self._movies 48 | 49 | # Magic Methods 50 | def __len__(self) -> int: 51 | return self.count 52 | 53 | def __getattr__(self, name): 54 | if not object.__getattribute__(self, name): 55 | raise AttributeError(f"'{self.__class__.__name__}' object has no attribute '{name}'") 56 | 57 | method = object.__getattribute__(self, name) 58 | if callable(method): 59 | return method 60 | else: 61 | raise TypeError(f"'{self.__class__.__name__}' object attribute '{name}' is not callable") 62 | 63 | def __getitem__(self, key: str): 64 | try: 65 | return object.__getattribute__(self, key) 66 | except AttributeError: 67 | raise KeyError(f"'{self.__class__.__name__}' object has no key '{key}'") 68 | 69 | def __str__(self) -> str: 70 | return json_dumps(self, indent=2, cls=SecretsEncoder, secrets=['pages']) 71 | 72 | def jsonify(self) -> dict: 73 | return json_loads(self.__str__()) 74 | 75 | # Data Retrieval Methods 76 | def get_url(self) -> str: return self.pages.list.url 77 | def get_title(self) -> str: return self.pages.list.get_title() 78 | def get_author(self) -> str: return self.pages.list.get_author() 79 | def get_description(self) -> str: return self.pages.list.get_description() 80 | def get_date_created(self) -> list: return self.pages.list.get_date_created() 81 | def get_date_updated(self) -> list: return self.pages.list.get_date_updated() 82 | def get_tags(self) -> list: return self.pages.list.get_tags() 83 | def get_movies(self) -> dict: return self.pages.list.get_movies() 84 | def get_count(self) -> int: return self.pages.list.get_count() 85 | def get_list_id(self) -> str: return self.pages.list.get_list_id() 86 | def get_list_meta(self, url: str) -> ListMetaData: return self.pages.list.get_list_meta(url) 87 | 88 | if __name__ == "__main__": 89 | # user list usage: 90 | list_instance = List("nmcassa", "def-con-movie-list") 91 | movies = list_instance.movies 92 | assert len(movies) == list_instance.count, "Count mismatch" 93 | 94 | print(list_instance) 95 | print('url:', list_instance.url) 96 | print('title:', list_instance.title) 97 | print('author:', list_instance.author) 98 | print('description:', list_instance.description) 99 | print('created:', list_instance.date_created) 100 | print('updated:', list_instance.date_updated) 101 | print('tags:', list_instance.tags) 102 | print('count:', list_instance.count) 103 | print('list_id:', list_instance.list_id) 104 | print('movies:', movies) -------------------------------------------------------------------------------- /examples/exports/users/nmcassa/user.json: -------------------------------------------------------------------------------- 1 | { 2 | "username": "nmcassa", 3 | "url": "https://letterboxd.com/nmcassa", 4 | "id": 1500306, 5 | "is_hq": false, 6 | "display_name": "nmcassa", 7 | "bio": null, 8 | "location": null, 9 | "website": null, 10 | "watchlist_length": 78, 11 | "stats": { 12 | "films": 677, 13 | "this_year": 66, 14 | "lists": 2, 15 | "following": 8, 16 | "followers": 8 17 | }, 18 | "favorites": { 19 | "51794": { 20 | "slug": "the-king-of-comedy", 21 | "name": "The King of Comedy", 22 | "url": "https://letterboxd.com/film/the-king-of-comedy/", 23 | "year": 1982, 24 | "log_url": "https://letterboxd.com/nmcassa/film/the-king-of-comedy/activity/" 25 | }, 26 | "51529": { 27 | "slug": "the-conversation", 28 | "name": "The Conversation", 29 | "url": "https://letterboxd.com/film/the-conversation/", 30 | "year": 1974, 31 | "log_url": "https://letterboxd.com/nmcassa/film/the-conversation/" 32 | }, 33 | "51090": { 34 | "slug": "rocky", 35 | "name": "Rocky", 36 | "url": "https://letterboxd.com/film/rocky/", 37 | "year": 1976, 38 | "log_url": "https://letterboxd.com/nmcassa/film/rocky/" 39 | }, 40 | "46911": { 41 | "slug": "children-of-men", 42 | "name": "Children of Men", 43 | "url": "https://letterboxd.com/film/children-of-men/", 44 | "year": 2006, 45 | "log_url": "https://letterboxd.com/nmcassa/film/children-of-men/" 46 | } 47 | }, 48 | "avatar": { 49 | "exists": true, 50 | "upscaled": true, 51 | "url": "https://a.ltrbxd.com/resized/avatar/upload/1/5/0/0/3/0/6/shard/avtr-0-1000-0-1000-crop.jpg" 52 | }, 53 | "recent": { 54 | "watchlist": { 55 | "45577": { 56 | "id": "45577", 57 | "slug": "human-traffic", 58 | "name": "Human Traffic", 59 | "year": 1999 60 | }, 61 | "19921": { 62 | "id": "19921", 63 | "slug": "the-fighter-2010", 64 | "name": "The Fighter", 65 | "year": 2010 66 | }, 67 | "46431": { 68 | "id": "46431", 69 | "slug": "rounders", 70 | "name": "Rounders", 71 | "year": 1998 72 | }, 73 | "45224": { 74 | "id": "45224", 75 | "slug": "thief", 76 | "name": "Thief", 77 | "year": 1981 78 | }, 79 | "32345": { 80 | "id": "32345", 81 | "slug": "taste-of-cherry", 82 | "name": "Taste of Cherry", 83 | "year": 1997 84 | } 85 | }, 86 | "diary": { 87 | "months": { 88 | "9": { 89 | "12": [ 90 | { 91 | "name": "Toy Story", 92 | "slug": "toy-story" 93 | }, 94 | { 95 | "name": "Cars", 96 | "slug": "cars" 97 | } 98 | ], 99 | "6": [ 100 | { 101 | "name": "Full Metal Jacket", 102 | "slug": "full-metal-jacket" 103 | }, 104 | { 105 | "name": "Up", 106 | "slug": "up" 107 | } 108 | ] 109 | }, 110 | "8": { 111 | "30": [ 112 | { 113 | "name": "WALL\u00b7E", 114 | "slug": "walle" 115 | } 116 | ], 117 | "25": [ 118 | { 119 | "name": "F1", 120 | "slug": "f1" 121 | } 122 | ], 123 | "24": [ 124 | { 125 | "name": "Caught Stealing", 126 | "slug": "caught-stealing" 127 | } 128 | ], 129 | "23": [ 130 | { 131 | "name": "Zodiac", 132 | "slug": "zodiac" 133 | } 134 | ], 135 | "18": [ 136 | { 137 | "name": "Superman", 138 | "slug": "superman-2025" 139 | } 140 | ], 141 | "16": [ 142 | { 143 | "name": "The Game", 144 | "slug": "the-game" 145 | } 146 | ] 147 | } 148 | } 149 | } 150 | } 151 | } -------------------------------------------------------------------------------- /letterboxdpy/core/scraper.py: -------------------------------------------------------------------------------- 1 | if __name__ == '__main__': 2 | import sys 3 | sys.path.append(sys.path[0] + '/..') 4 | 5 | from json import dumps as json_dumps 6 | from bs4 import BeautifulSoup 7 | import requests 8 | 9 | from letterboxdpy.constants.project import DOMAIN 10 | from letterboxdpy.core.exceptions import ( 11 | PageLoadError, 12 | InvalidResponseError, 13 | PrivateRouteError 14 | ) 15 | 16 | class Scraper: 17 | """A class for scraping and parsing web pages.""" 18 | 19 | headers = { 20 | "referer": DOMAIN, 21 | "user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64)" 22 | } 23 | builder = "lxml" 24 | 25 | def __init__(self, domain: str = headers['referer'], user_agent: str = headers["user-agent"]): 26 | """Initialize the scraper with the specified domain and user-agent.""" 27 | self.headers = { 28 | "referer": domain, 29 | "user-agent": user_agent 30 | } 31 | 32 | @classmethod 33 | def get_page(cls, url: str) -> BeautifulSoup: 34 | """Fetch, check, and parse the HTML content from the specified URL.""" 35 | response = cls._fetch(url) 36 | cls._check_for_errors(url, response) 37 | return cls._parse_html(response) 38 | 39 | @classmethod 40 | def _fetch(cls, url: str) -> requests.Response: 41 | """Fetch the HTML content from the specified URL.""" 42 | try: 43 | return requests.get(url, headers=cls.headers) 44 | except requests.RequestException as e: 45 | raise PageLoadError(url, str(e)) 46 | 47 | @classmethod 48 | def _check_for_errors(cls, url: str, response: requests.Response) -> None: 49 | """Check the response for errors and raise an exception if found.""" 50 | if response.status_code != 200: 51 | error_message = cls._get_error_message(response) 52 | formatted_error_messagge = cls._format_error(url, response, error_message) 53 | if response.status_code == 403: 54 | raise PrivateRouteError(formatted_error_messagge) 55 | raise InvalidResponseError(formatted_error_messagge) 56 | 57 | @classmethod 58 | def _get_error_message(cls, response: requests.Response) -> str: 59 | """Extract the error message from the response, if available.""" 60 | dom = BeautifulSoup(response.text, cls.builder) 61 | message_section = dom.find("section", {"class": "message"}) 62 | return message_section.strong.text if message_section else "Unknown error occurred" 63 | 64 | @classmethod 65 | def _format_error(cls, url: str, response: requests.Response, message: str) -> str: 66 | """Format the error message for logging or raising exceptions.""" 67 | return json_dumps({ 68 | 'code': response.status_code, 69 | 'reason': str(response.reason), 70 | 'url': url, 71 | 'message': message 72 | }, indent=2) 73 | 74 | @classmethod 75 | def _parse_html(cls, response: requests.Response) -> BeautifulSoup: 76 | """Parse the HTML content from the response.""" 77 | try: 78 | return BeautifulSoup(response.text, cls.builder) 79 | except Exception as e: 80 | raise Exception(f"Error parsing response: {e}") 81 | 82 | def parse_url(url: str) -> BeautifulSoup: 83 | """Fetch and parse the HTML content from the specified URL using the Scraper class.""" 84 | return Scraper.get_page(url) 85 | 86 | def url_encode(query: str, safe: str = '') -> str: 87 | """URL encode the given query.""" 88 | return requests.utils.quote(query, safe=safe) 89 | 90 | if __name__ == "__main__": 91 | sys.stdout.reconfigure(encoding='utf-8') 92 | 93 | input_domain = '' 94 | while not len(input_domain.strip()): 95 | input_domain = input('Enter url: ') 96 | 97 | print(f"Parsing {input_domain}...") 98 | 99 | parsed_dom_class_method = parse_url(input_domain) 100 | print(f"Title (using class method): {parsed_dom_class_method.title.string}") 101 | 102 | input("Click Enter to see the DOM...") 103 | print(f"HTML: {parsed_dom_class_method.prettify()}") 104 | print("*" * 20 + "\nDone!") -------------------------------------------------------------------------------- /letterboxdpy/pages/user_reviews.py: -------------------------------------------------------------------------------- 1 | from letterboxdpy.core.scraper import parse_url 2 | from letterboxdpy.utils.utils_parser import parse_review_date, parse_review_text 3 | from letterboxdpy.constants.project import DOMAIN 4 | 5 | 6 | class UserReviews: 7 | 8 | def __init__(self, username: str) -> None: 9 | self.username = username 10 | self.url = f"{DOMAIN}/{self.username}/films/reviews" 11 | 12 | def get_reviews(self): return extract_user_reviews(self.url) 13 | 14 | def extract_user_reviews(url: str) -> dict: 15 | ''' 16 | Returns a dictionary containing user reviews. The keys are unique log IDs, 17 | and each value is a dictionary with details about the review, 18 | including movie information, review type, rating, review content, date, etc. 19 | ''' 20 | LOGS_PER_PAGE = 12 21 | 22 | page = 0 23 | data = {'reviews': {}} 24 | while True: 25 | page += 1 26 | dom = parse_url(f"{url}/page/{page}/") 27 | 28 | container = dom.find("div", {"class": ["viewing-list"]}) 29 | 30 | if not container: 31 | # No container (div.viewing-list) found in the page. 32 | ... 33 | 34 | logs = container.find_all("article") 35 | 36 | if not logs: 37 | # No item (article) found in container. 38 | ... 39 | 40 | for log in logs: 41 | # Handle react structure 42 | react_component = log.parent.find("div", {"class": "react-component"}) or log.parent.div 43 | 44 | movie_name = log.a.text 45 | slug = react_component.get('data-item-slug') or react_component.get('data-film-slug') 46 | movie_id = react_component['data-film-id'] 47 | # str ^^^--- movie_id: unique id of the movie. 48 | # Find release year in spans 49 | release = None 50 | spans = log.find_all('span') 51 | for span in spans: 52 | if span.text and span.text.strip().isdigit() and len(span.text.strip()) == 4: 53 | release = int(span.text.strip()) 54 | break 55 | movie_link = f"{DOMAIN}/film/{slug}/" 56 | log_id = log['data-object-id'].split(':')[-1] 57 | # str ^^^--- log_id: unique id of the review. 58 | log_link = DOMAIN + log.a['href'] 59 | log_no = log_link.split(slug)[-1] 60 | log_no = int(log_no.replace('/', '')) if log_no.count('/') == 2 else 0 61 | # int ^^^--- log_no: there can be multiple reviews for a movie. 62 | # counting starts from zero. 63 | # example for first review: /username/film/movie_name/ 64 | # example for first review: /username/film/movie_name/0/ 65 | # example for second review: /username/film/movie_name/1/ 66 | # the number is specified at the end of the url ---^ 67 | rating = log.find("span", {"class": ["rating"], }) 68 | rating = int(rating['class'][-1].split('-')[-1]) if rating else None 69 | # int ^^^--- rating: the numerical value of the rating given in the review (1-10) 70 | review, spoiler = parse_review_text(log) 71 | # str ^^^--- review: the text content of the review. 72 | # spoiler warning is checked to include or exclude the first paragraph. 73 | date = log.find("span", {"class": ["date"], }) 74 | log_type = date.find_previous_sibling().text.strip() 75 | # str ^^^--- log_type: Types of logs, such as: 76 | # 'Rewatched': (in diary) review, watched and rewatched 77 | # 'Watched': (in diary) review and watched 78 | # 'Added': (not in diary) review 79 | date = parse_review_date(log_type, date) 80 | # dict ^^^--- date: the date of the review. 81 | # example: {'year': 2024, 'month': 1, 'day': 1} 82 | 83 | data['reviews'][log_id] = { 84 | # static 85 | 'movie': { 86 | 'name': movie_name, 87 | 'slug': slug, 88 | 'id': movie_id, 89 | 'release': release, 90 | 'link': movie_link, 91 | }, 92 | # dynamic 93 | 'type': log_type, 94 | 'no': log_no, 95 | 'link': log_link, 96 | 'rating': rating, 97 | 'review': { 98 | 'content': review, 99 | 'spoiler': spoiler 100 | }, 101 | 'date': date, 102 | 'page': page, 103 | } 104 | 105 | if len(logs) < LOGS_PER_PAGE: 106 | data['count'] = len(data['reviews']) 107 | data['last_page'] = page 108 | break 109 | 110 | return data -------------------------------------------------------------------------------- /letterboxdpy/utils/lists_extractor.py: -------------------------------------------------------------------------------- 1 | """ 2 | Shared list extraction utilities for Letterboxd list pages. 3 | This module provides common functionality for extracting list data 4 | from user lists, movie lists, and individual list pages. 5 | """ 6 | 7 | from letterboxdpy.utils.utils_parser import extract_and_convert_shorthand, extract_numeric_text 8 | from letterboxdpy.core.scraper import parse_url 9 | from letterboxdpy.constants.project import DOMAIN 10 | 11 | 12 | class ListsExtractor: 13 | """Common lists collection extraction functionality. 14 | 15 | Extracts collections of lists from Letterboxd pages: 16 | - User's created lists 17 | - Lists containing a specific movie 18 | - Popular lists containing a movie 19 | """ 20 | 21 | # Shared selectors for all list types 22 | SELECTORS = { 23 | 'list_set': ('section', {'class': 'list-set'}), 24 | 'lists': ('section', {'class': 'list'}), 25 | 'title': ('h2', {'class': 'title'}), 26 | 'description': ('div', {'class': 'body-text'}), 27 | 'value': ('span', {'class': 'value'}), 28 | 'likes': ('a', {'class': 'icon-like'}), 29 | 'comments': ('a', {'class': 'icon-comment'}), 30 | } 31 | 32 | LISTS_PER_PAGE = 12 33 | 34 | @classmethod 35 | def from_url(cls, base_url: str, max_lists: int = None) -> dict: 36 | """ 37 | Extract lists collection from URL. 38 | 39 | Args: 40 | base_url: Base URL without page parameter 41 | max_lists: Maximum number of lists to return (optional limit) 42 | 43 | Returns: 44 | dict: Contains 'lists', 'count', 'last_page' 45 | """ 46 | data = {'limit': max_lists, 'count': 0, 'last_page': 1, 'lists': {}} 47 | page = 1 48 | 49 | while True: 50 | lists = cls._fetch_page_data(base_url, page) 51 | 52 | if not lists: 53 | break 54 | 55 | for item in lists: 56 | list_data = cls._extract_list_data(item) 57 | data['lists'] |= list_data 58 | 59 | if max_lists and len(data['lists']) >= max_lists: 60 | # Limit reached 61 | data['limit'] = True 62 | break 63 | 64 | if data['limit'] or len(lists) < cls.LISTS_PER_PAGE: 65 | # Is last page or limit reached 66 | break 67 | 68 | page += 1 69 | 70 | data['count'] = len(data['lists']) 71 | data['last_page'] = page 72 | 73 | return data 74 | 75 | @classmethod 76 | def _fetch_page_data(cls, base_url: str, page: int): 77 | """Fetch and parse page data.""" 78 | dom = parse_url(f'{base_url}/page/{page}') 79 | return dom.find_all('article', {'class': 'list-summary'}) 80 | 81 | @classmethod 82 | def _extract_list_data(cls, item) -> dict: 83 | """Extract data from a list item.""" 84 | 85 | def get_id() -> str: 86 | return item['data-film-list-id'] 87 | 88 | def get_title() -> str: 89 | title_elem = item.find('h2', {'class': 'name'}) 90 | return title_elem.text.strip() 91 | 92 | def get_description() -> str: 93 | description = item.find('div', {'class': ['notes', 'body-text']}) 94 | if description: 95 | paragraphs = description.find_all('p') 96 | return '\n'.join([p.text for p in paragraphs]) 97 | return "" 98 | 99 | def get_url() -> str: 100 | title_elem = item.find('h2', {'class': 'name'}) 101 | return DOMAIN + title_elem.a['href'] 102 | 103 | def get_slug() -> str: 104 | return get_url().split('/')[-2] 105 | 106 | def get_count() -> int: 107 | value_elem = item.find(*cls.SELECTORS['value']) 108 | if value_elem: 109 | count = extract_numeric_text(value_elem.text) 110 | return count if count is not None else 0 111 | 112 | def get_likes() -> int: 113 | likes = item.find(*cls.SELECTORS['likes']) 114 | if likes: 115 | likes = extract_and_convert_shorthand(likes) 116 | return likes 117 | return 0 118 | 119 | def get_comments() -> int: 120 | comments = item.find(*cls.SELECTORS['comments']) 121 | if comments: 122 | return extract_and_convert_shorthand(comments) 123 | return 0 124 | 125 | return { 126 | get_id(): { 127 | 'title': get_title(), 128 | 'slug': get_slug(), 129 | 'description': get_description(), 130 | 'url': get_url(), 131 | 'count': get_count(), 132 | 'likes': get_likes(), 133 | 'comments': get_comments() 134 | } 135 | } 136 | 137 | 138 | -------------------------------------------------------------------------------- /examples/export_user_diary_posters.py: -------------------------------------------------------------------------------- 1 | """ 2 | Letterboxd Diary Poster Downloader 3 | 4 | Downloads movie posters from user's diary entries. 5 | - Extract poster URLs from diary entries 6 | - Download and organize posters by year 7 | - Automatic directory structure creation 8 | - Skip existing files with size checking 9 | """ 10 | 11 | import requests 12 | import sys 13 | import os 14 | 15 | from letterboxdpy import user 16 | from letterboxdpy.core.scraper import parse_url 17 | from letterboxdpy.utils.utils_terminal import get_input, args_exists 18 | 19 | 20 | class Settings: 21 | def __init__(self, foldering=True, size_check=False): 22 | self.foldering = foldering # Create folders for each day 23 | self.size_check = size_check # Check if file size already exists 24 | 25 | class Path: 26 | @staticmethod 27 | def check_path(*paths): 28 | for path in paths: 29 | if not os.path.exists(path): 30 | os.mkdir(path) 31 | print('Directory created:', path) 32 | else: 33 | print('Directory found:', path) 34 | 35 | @staticmethod 36 | def save(file_path, data): 37 | with open(file_path, 'wb') as f: 38 | f.write(data) 39 | 40 | class App: 41 | EXPORTS_DIR = "exports" 42 | EXPORTS_USERS_DIR = os.path.join(EXPORTS_DIR, "users") 43 | 44 | def __init__(self, username): 45 | self.username = username.lower() 46 | self.USER_FOLDER = os.path.join(self.EXPORTS_USERS_DIR, self.username) 47 | self.USER_POSTERS_DIR = os.path.join(self.USER_FOLDER, "posters") 48 | 49 | self.user = user.User(self.username) 50 | self.data = self.user.get_diary() 51 | self.config = Settings() 52 | 53 | self.foldering = self.config.foldering 54 | self.size_check = self.config.size_check 55 | 56 | def get_poster_url(self, slug): 57 | poster_ajax = f"https://letterboxd.com/ajax/poster/film/{slug}/std/500x750/" 58 | poster_page = parse_url(poster_ajax) 59 | return poster_page.img['srcset'].split('?')[0] 60 | 61 | def run(self): 62 | count = self.data['count'] 63 | entries = self.data['entries'] 64 | already_start = 0 65 | 66 | if not count: 67 | print('No entries found') 68 | return 69 | 70 | print(f'Processing {count} entries..') 71 | 72 | Path.check_path( 73 | self.EXPORTS_DIR, 74 | self.EXPORTS_USERS_DIR, 75 | self.USER_FOLDER, 76 | self.USER_POSTERS_DIR 77 | ) 78 | 79 | if self.foldering: 80 | years_dir = os.path.join(self.USER_POSTERS_DIR, 'years') 81 | Path.check_path(years_dir) 82 | previous_year = None 83 | 84 | for v in entries.values(): 85 | date = v["date"] 86 | 87 | file_date = "-".join(map(str, date.values())) 88 | file_dated_name = f"{file_date}_{v['slug']}.jpg" 89 | 90 | if self.foldering: 91 | current_year = str(date['year']) 92 | year_dir = os.path.join(years_dir, current_year) 93 | if previous_year != current_year: 94 | previous_year = current_year 95 | Path.check_path(year_dir) 96 | file_path = os.path.join(year_dir, file_dated_name) 97 | else: 98 | file_path = os.path.join(self.USER_POSTERS_DIR, file_dated_name) 99 | 100 | if os.path.exists(file_path): 101 | if not self.size_check: 102 | if not already_start: 103 | already_start = count 104 | count -= 1 105 | continue 106 | 107 | print(f'{count} - Poster file already exists, checking size..') 108 | 109 | if (already_start - count) > 1: 110 | print(f'Have already processed {already_start - count} entries, skipping {count}..') 111 | already_start = 0 112 | 113 | poster_url = self.get_poster_url(v['slug']) 114 | response = requests.get(poster_url) 115 | 116 | if os.path.exists(file_path): 117 | if int(os.stat(file_path).st_size) == int(response.headers['Content-Length']): 118 | print(f'{count} - File already exists and has same size as new file, skipping..') 119 | count -= 1 120 | continue 121 | print(f'Rewriting {file_path}..') 122 | 123 | Path.save(file_path, response.content) 124 | print(f'{count} - Wrote {file_path}') 125 | count -= 1 126 | 127 | print('Processing complete!') 128 | click_url = 'file:///' + os.path.join(os.getcwd(), self.USER_POSTERS_DIR).replace("\\", "/") 129 | print('At', click_url) 130 | 131 | 132 | if __name__ == '__main__': 133 | if not args_exists(): 134 | print(f'Quick usage: python {sys.argv[0]} ') 135 | 136 | username = get_input('Enter username: ', index=1) 137 | app = App(username) 138 | app.run() -------------------------------------------------------------------------------- /letterboxdpy/pages/user_watchlist.py: -------------------------------------------------------------------------------- 1 | from letterboxdpy.core.scraper import parse_url 2 | from letterboxdpy.constants.project import DOMAIN 3 | from letterboxdpy.pages.user_list import extract_movies 4 | 5 | class UserWatchlist: 6 | FILMS_PER_PAGE = 7*4 7 | 8 | def __init__(self, username: str) -> None: 9 | self.username = username 10 | self.url = f"{DOMAIN}/{self.username}/watchlist" 11 | 12 | def __str__(self) -> str: 13 | return f"Not printable object of type: {self.__class__.__name__}" 14 | 15 | def get_owner(self): ... 16 | def get_count(self) -> int: return extract_count(self.url) 17 | def get_movies(self) -> dict: return extract_movies(self.url, self.FILMS_PER_PAGE) 18 | def get_watchlist(self, filters: dict=None) -> dict: return extract_watchlist(self.username, filters) 19 | 20 | def extract_count(url: str) -> int: 21 | """Extracts the number of films from the watchlist page's DOM.""" 22 | dom = parse_url(url) 23 | 24 | watchlist_div = dom.find("div", class_="s-watchlist-content") 25 | if watchlist_div and "data-num-entries" in watchlist_div.attrs: 26 | return int(watchlist_div["data-num-entries"]) 27 | 28 | count_span = dom.find("span", class_="js-watchlist-count") 29 | 30 | if count_span: 31 | count = count_span.text.split()[0] 32 | return int(count.replace(",", "")) 33 | 34 | raise ValueError("Watchlist count could not be extracted from DOM") 35 | 36 | def extract_watchlist(username: str, filters: dict = None) -> dict: 37 | """ 38 | Extracts a user's watchlist from the platform. 39 | 40 | filter examples: 41 | - keys: decade, year, genre 42 | 43 | # positive genre & negative genre (start with '-') 44 | - {genre: ['mystery']} <- same -> {genre: 'mystery'} 45 | - {genre: ['-mystery']} <- same -> {genre: '-mystery'} 46 | 47 | # multiple genres 48 | - {genre: ['mystery', 'comedy'], decade: '1990s'} 49 | - {genre: ['mystery', '-comedy'], year: '2019'} 50 | - /decade/1990s/genre/action+-drama/ 51 | ^^---> {'decade':'1990s','genre':['action','-drama']} 52 | """ 53 | data = { 54 | 'available': False, 55 | 'count': 0, 56 | 'last_page': None, 57 | 'filters': filters, 58 | 'data': {} 59 | } 60 | 61 | FILMS_PER_PAGE = 28 # Total films per page (7 rows * 4 columns) 62 | BASE_URL = f"{DOMAIN}/{username}/watchlist/" 63 | 64 | # Construct the URL with filters if provided 65 | if filters and isinstance(filters, dict): 66 | f = "" 67 | for key, values in filters.items(): 68 | if not isinstance(values, list): 69 | values = [values] 70 | f += f"{key}/" 71 | f += "+".join([str(v) for v in values]) + "/" 72 | BASE_URL += f 73 | 74 | def extract_movie_info(container) -> dict[str, str | int | None] | None: 75 | """Extract film ID, slug, name, and year from watchlist container. 76 | 77 | Returns: 78 | dict: {"id": str, "slug": str, "name": str, "year": int|None} or None if extraction fails 79 | 80 | Example: 81 | Input: container with "The Matrix (1999)" 82 | Output: {"id": "12345", "slug": "the-matrix", "name": "The Matrix", "year": 1999} 83 | """ 84 | from letterboxdpy.utils.utils_string import extract_year_from_movie_name, clean_movie_name 85 | 86 | data = container.find("div", {"class": "react-component"}) or container.div 87 | if not data or 'data-film-id' not in data.attrs: 88 | return None 89 | 90 | raw_name = data.get('data-item-name') or data.img['alt'] 91 | name = clean_movie_name(raw_name) 92 | year = extract_year_from_movie_name(raw_name) 93 | 94 | context = { 95 | "id": data['data-film-id'], 96 | "slug": data.get('data-item-slug') or data.get('data-film-slug'), 97 | "name": name, 98 | "year": year 99 | } 100 | 101 | return context 102 | 103 | page = 1 104 | no = 1 105 | while True: 106 | dom = parse_url(f'{BASE_URL}/page/{page}') 107 | containers = dom.find_all("li", {"class": "griditem"}) or dom.find_all("li", {"class": ["poster-container"]}) 108 | 109 | for container in containers: 110 | movie_info = extract_movie_info(container) 111 | if movie_info: 112 | data['data'][movie_info["id"]] = { 113 | 'name': movie_info["name"], 114 | 'slug': movie_info["slug"], 115 | 'year': movie_info["year"], 116 | 'page': page, 117 | 'url': f"{DOMAIN}/film/{movie_info['slug']}/", 118 | 'no': no 119 | } 120 | no += 1 121 | 122 | if len(containers) < FILMS_PER_PAGE: 123 | break 124 | page += 1 125 | 126 | # Set the count of films and availability 127 | data['count'] = len(data['data']) 128 | data['available'] = data['count'] > 0 129 | data['last_page'] = page 130 | 131 | # Reverse numbering for films 132 | for fv in data['data'].values(): 133 | fv.update({'no': data['count'] - fv['no'] + 1}) 134 | 135 | return data -------------------------------------------------------------------------------- /letterboxdpy/movie.py: -------------------------------------------------------------------------------- 1 | from json import ( 2 | dumps as json_dumps, 3 | loads as json_loads, 4 | ) 5 | 6 | from letterboxdpy.core.encoder import SecretsEncoder 7 | from letterboxdpy.pages import ( 8 | movie_profile, 9 | movie_similar, 10 | movie_reviews, 11 | movie_lists, 12 | movie_details, 13 | movie_members 14 | ) 15 | 16 | class Movie: 17 | 18 | class MoviePages: 19 | def __init__(self, slug: str) -> None: 20 | self.profile = movie_profile.MovieProfile(slug) 21 | self.details = movie_details.MovieDetails(slug) 22 | self.lists = movie_lists.MovieLists(slug) 23 | self.members = movie_members.MovieMembers(slug) 24 | self.reviews = movie_reviews.MovieReviews(slug) 25 | self.similar = movie_similar.MovieSimilar(slug) 26 | 27 | def __init__(self, slug: str) -> None: 28 | assert isinstance(slug, str), f"Movie slug must be a string, not {type(slug)}" 29 | 30 | self.slug = slug 31 | self.pages = self.MoviePages(self.slug) 32 | 33 | self.url = self.get_url() 34 | 35 | # one line contents 36 | self.id = self.get_id() 37 | self.title = self.get_title() 38 | self.original_title = self.get_original_title() 39 | self.runtime = self.get_runtime() 40 | self.rating = self.get_rating() 41 | self.year = self.get_year() 42 | self.tmdb_link = self.get_tmdb_link() 43 | self.imdb_link = self.get_imdb_link() 44 | self.poster = self.get_poster() 45 | self.banner = self.get_banner() 46 | self.tagline = self.get_tagline() 47 | 48 | # long contents 49 | self.description = self.get_description() 50 | self.trailer = self.get_trailer() 51 | self.alternative_titles = self.get_alternative_titles() 52 | self.details = self.get_details() 53 | self.genres = self.get_genres() 54 | self.cast = self.get_cast() 55 | self.crew = self.get_crew() 56 | self.popular_reviews = self.get_popular_reviews() 57 | 58 | def __str__(self) -> str: 59 | return json_dumps(self, indent=2, cls=SecretsEncoder, secrets=['pages']) 60 | 61 | def jsonify(self) -> dict: 62 | return json_loads(self.__str__()) 63 | 64 | # PROFILE PAGE 65 | def get_url(self) -> str: return self.pages.profile.url 66 | def get_id(self) -> str: return self.pages.profile.get_id() 67 | def get_title(self) -> str: return self.pages.profile.get_title() 68 | def get_original_title(self) -> str: return self.pages.profile.get_original_title() 69 | def get_runtime(self) -> int: return self.pages.profile.get_runtime() 70 | def get_rating(self) -> float: return self.pages.profile.get_rating() 71 | def get_year(self) -> int: return self.pages.profile.get_year() 72 | def get_tmdb_link(self) -> str: return self.pages.profile.get_tmdb_link() 73 | def get_imdb_link(self) -> str: return self.pages.profile.get_imdb_link() 74 | def get_poster(self) -> str: return self.pages.profile.get_poster() 75 | def get_banner(self) -> str: return self.pages.profile.get_banner() 76 | def get_tagline(self) -> str: return self.pages.profile.get_tagline() 77 | def get_description(self) -> str: return self.pages.profile.get_description() 78 | def get_trailer(self) -> dict: return self.pages.profile.get_trailer() 79 | def get_alternative_titles(self) -> list: return self.pages.profile.get_alternative_titles() 80 | def get_details(self) -> list: return self.pages.profile.get_details() 81 | def get_genres(self) -> list: return self.pages.profile.get_genres() 82 | def get_cast(self) -> list: return self.pages.profile.get_cast() 83 | def get_crew(self) -> dict: return self.pages.profile.get_crew() 84 | def get_popular_reviews(self) -> list: return self.pages.profile.get_popular_reviews() 85 | 86 | # DETAILS PAGE 87 | def get_details_from_details(self) -> dict: return self.pages.details.get_extended_details() 88 | 89 | # LISTS PAGE 90 | def get_lists(self) -> dict: return self.pages.lists.get_lists() 91 | 92 | # MEMBERS PAGE 93 | def get_watchers_stats(self) -> dict: return self.pages.members.get_watchers_stats() 94 | 95 | # REVIEWS PAGE 96 | def get_reviews(self) -> dict: return self.pages.reviews.get_reviews() 97 | def get_reviews_by_rating(self, rating: float) -> dict: return self.pages.reviews.get_reviews_by_rating(rating) 98 | 99 | # SIMILAR MOVIES 100 | def get_similar_movies(self) -> dict: return self.pages.similar.get_similar_movies() 101 | 102 | if __name__ == "__main__": 103 | import sys 104 | sys.stdout.reconfigure(encoding='utf-8') 105 | 106 | movie_instance = Movie("v-for-vendetta") # 132 mins 107 | # movie_instance_2 = Movie("honk-2013") # 1 min 108 | # movie_instance_3 = Movie("logistics-2011") # 51420 mins 109 | 110 | # Test basic functionality 111 | print(f"Movie Title: {movie_instance.title}") 112 | print(f"Movie Year: {movie_instance.year}") 113 | print(f"Movie Runtime: {movie_instance.runtime} minutes") 114 | print(f"Movie Rating: {movie_instance.rating}") 115 | 116 | print(f"Movie Details: {movie_instance.details}") 117 | 118 | # Test individual methods without JSON serialization 119 | print("\n--- Details (from details page) ---") 120 | details_from_details = movie_instance.get_details_from_details() 121 | print(json_dumps(details_from_details, indent=2)) 122 | 123 | print("\n--- Watchers Stats ---") 124 | watchers_stats = movie_instance.get_watchers_stats() 125 | print(json_dumps(watchers_stats, indent=2)) 126 | 127 | 128 | -------------------------------------------------------------------------------- /letterboxdpy/films.py: -------------------------------------------------------------------------------- 1 | if __loader__.name == '__main__': 2 | import sys 3 | sys.path.append(sys.path[0] + '/..') 4 | 5 | from letterboxdpy.utils.utils_transform import get_ajax_url 6 | from letterboxdpy.core.decorators import assert_instance 7 | from letterboxdpy.core.scraper import parse_url 8 | from letterboxdpy.utils.movies_extractor import extract_movies_from_horizontal_list, extract_movies_from_vertical_list 9 | 10 | class Films: 11 | """Fetch movies from Letterboxd based on different URLs.""" 12 | VERTICAL_MAX = 20*5 13 | HORIZONTAL_MAX = 12*6 14 | 15 | def __init__(self, url: str): 16 | """Initialize Films class with a URL.""" 17 | self.url = url 18 | self.ajax_url = get_ajax_url(url) 19 | self._movies = None 20 | 21 | @property 22 | def movies(self) -> dict: 23 | """Get movies from the URL.""" 24 | if self._movies is None: 25 | self._movies = self.get_movies() 26 | return self._movies 27 | 28 | @property 29 | def count(self) -> int: 30 | """Return the count of movies.""" 31 | return len(self.movies) 32 | 33 | # Magic Methods 34 | def __len__(self) -> int: 35 | return self.count 36 | 37 | def __getitem__(self, key: str): 38 | return self.movies[key] 39 | 40 | def get_movies(self) -> dict: 41 | """Scrape and return a dictionary of movies from Letterboxd.""" 42 | page = 1 43 | movies = {} 44 | 45 | while True: 46 | page_url = self.ajax_url + f"/page/{page}" 47 | dom = parse_url(page_url) 48 | 49 | if '.com/films/' in self.url: 50 | # https://letterboxd.com/films/popular/ 51 | # https://letterboxd.com/films/like/v-for-vendetta/ 52 | new_movies = extract_movies_from_horizontal_list(dom) 53 | movies |= new_movies 54 | if len(new_movies) < self.HORIZONTAL_MAX: 55 | break 56 | elif '.com/film/' in self.url: 57 | # https://letterboxd.com/film/the-shawshank-redemption/similar/ 58 | new_movies = extract_movies_from_vertical_list(dom) 59 | movies |= new_movies 60 | if len(new_movies) < self.VERTICAL_MAX: 61 | break 62 | 63 | page += 1 64 | 65 | return movies 66 | 67 | class Future: 68 | ARGS = ['name', 'release', 'release-earliest', 'rating', 69 | 'rating-lowest', 'shortest', 'longest'] 70 | 71 | def get_movies_with_args(args: list) -> dict: 72 | # by 73 | pass 74 | 75 | def get_with_language(language: str): 76 | pass 77 | 78 | def get_with_country(country: str): 79 | pass 80 | 81 | def get_with_year(year: int): 82 | pass 83 | 84 | def get_with_actor(actor: str): 85 | pass 86 | 87 | def get_with_director(director: str): 88 | pass 89 | 90 | def get_with_writer(writer: str): 91 | pass 92 | 93 | def get_upcoming_movies() -> dict: 94 | BASE_URL = "https://letterboxd.com/films/popular/this/week/upcoming/" 95 | return Films(BASE_URL).movies 96 | 97 | @assert_instance(int) 98 | def get_movies_by_decade(decade: int) -> dict: 99 | BASE_URL = f"https://letterboxd.com/films/ajax/popular/this/week/decade/{decade}s/" 100 | return Films(BASE_URL).movies 101 | 102 | @assert_instance(int) 103 | def get_movies_by_year(year: int) -> dict: 104 | BASE_URL = f"https://letterboxd.com/films/ajax/popular/this/week/year/{year}/" 105 | return Films(BASE_URL).movies 106 | 107 | @assert_instance(str) 108 | def get_movies_by_genre(genre: str) -> dict: 109 | """ 110 | action, adventure, animation, comedy, crime, documentary, 111 | drama, family, fantasy, history, horror, music, mystery, 112 | romance, science-fiction, thriller, tv-movie, war, western 113 | """ 114 | BASE_URL = f"https://letterboxd.com/films/ajax/genre/{genre}" 115 | return Films(BASE_URL).movies 116 | 117 | @assert_instance(str) 118 | def get_movies_by_service(service: str) -> dict: 119 | """ 120 | netflix, hulu, prime-video, disney-plus, itv-play, apple-tv, 121 | youtube-premium, amazon-prime-video, hbo-max, peacock, ... 122 | """ 123 | BASE_URL = f"https://letterboxd.com/films/popular/this/week/on/{service}/" 124 | return Films(BASE_URL).movies 125 | 126 | @assert_instance(str) 127 | def get_movies_by_theme(theme: str) -> dict: 128 | BASE_URL = f"https://letterboxd.com/films/ajax/theme/{theme}" 129 | return Films(BASE_URL).movies 130 | 131 | @assert_instance(str) 132 | def get_movies_by_nanogenre(nanogenre: str) -> dict: 133 | BASE_URL = f"https://letterboxd.com/films/ajax/nanogenre/{nanogenre}/" 134 | return Films(BASE_URL).movies 135 | 136 | @assert_instance(str) 137 | def get_movies_by_mini_theme(theme: str) -> dict: 138 | BASE_URL = f"https://letterboxd.com/films/ajax/mini-theme/{theme}" 139 | return Films(BASE_URL).movies 140 | 141 | def print_movies(movies, title=None, max_count=None): 142 | """Print movies in a formatted list.""" 143 | if title: 144 | print(f"\n{title} -- ({len(movies)} movies)", end=f"\n{'*'*8*2*2}\n") 145 | for movie_no, (movie_id, movie) in enumerate(movies.items(), start=1): 146 | if max_count and movie_no > max_count: 147 | break 148 | print(f"{movie_no:<8} {movie_id:<8} {movie['name']}") 149 | print(f"{'*'*8*2*2}\n") 150 | 151 | if __name__ == "__main__": 152 | sys.stdout.reconfigure(encoding='utf-8') 153 | 154 | # Movies similar to "V for Vendetta" are retrieved and printed. 155 | # https://letterboxd.com/films/like/v-for-vendetta/ 156 | from letterboxdpy.movie import Movie 157 | movie_instance = Movie("v-for-vendetta") 158 | movies = movie_instance.get_similar_movies() 159 | print_movies(movies, title=f"Similar to {movie_instance.slug}") 160 | 161 | # Popular movies from the year 2027 are retrieved and displayed. 162 | # https://letterboxd.com/films/popular/this/week/year/2027/ 163 | year = 2027 164 | movies = get_movies_by_year(year) 165 | print_movies(movies, title=f"Movies from {year}") -------------------------------------------------------------------------------- /examples/user_plot_statistics.py: -------------------------------------------------------------------------------- 1 | """ 2 | Letterboxd User Statistics Plotter 3 | 4 | Creates visualizations of user movie watching patterns over time. 5 | - Monthly and daily viewing statistics by year 6 | - Multi-year comparison charts 7 | - Letterboxd-inspired styling 8 | """ 9 | 10 | import matplotlib.pyplot as plt 11 | from letterboxdpy.user import User 12 | from letterboxdpy.constants.project import Colors, DAY_ABBREVIATIONS, MONTH_ABBREVIATIONS 13 | import argparse 14 | import sys 15 | from datetime import datetime 16 | 17 | 18 | class LetterboxdStatisticsPlotter: 19 | """Class for plotting Letterboxd user statistics.""" 20 | 21 | def __init__(self, username: str): 22 | self.username = username 23 | self.stats_by_year = {} 24 | 25 | def gather_statistics_by_year(self, start_year: int, end_year: int) -> dict: 26 | """Fetch user statistics for each year.""" 27 | self.stats_by_year = {} 28 | year_count = end_year - start_year + 1 29 | 30 | print(f"Fetching statistics for @{self.username}...") 31 | print(f"Processing {year_count} year(s): {start_year}-{end_year}") 32 | 33 | for year in range(start_year, end_year + 1): 34 | try: 35 | print(f"Fetching data for {year}...", end=" ") 36 | user = User(self.username) 37 | stats = user.get_wrapped(year) 38 | self.stats_by_year[year] = { 39 | "monthly": stats.get("months"), 40 | "daily": stats.get("days") 41 | } 42 | print("✓") 43 | except Exception as error: 44 | print(f"✗ (using empty data)") 45 | self.stats_by_year[year] = { 46 | "monthly": {i: 0 for i in range(1, 13)}, # 12 months with 0 47 | "daily": {i: 0 for i in range(1, 8)} # 7 days with 0 48 | } 49 | 50 | print(f"Data collection complete. Creating plot...") 51 | return self.stats_by_year 52 | 53 | def plot_statistics(self) -> None: 54 | if not self.stats_by_year: 55 | return 56 | 57 | def setup_figure(): 58 | num_years = len(self.stats_by_year) 59 | if num_years == 1: 60 | fig, axes = plt.subplots(1, 2, figsize=(12, 4), facecolor=Colors.BG) 61 | return fig, [axes] 62 | else: 63 | fig, axes = plt.subplots(num_years, 2, figsize=(12, 3 * num_years), facecolor=Colors.BG) 64 | return fig, [axes] if num_years == 1 else axes 65 | 66 | def configure_figure(fig): 67 | years_range = f"{min(self.stats_by_year.keys())}-{max(self.stats_by_year.keys())}" 68 | fig.canvas.manager.set_window_title(f'Letterboxd Statistics - {self.username} ({years_range})') 69 | fig.suptitle(f'{self.username} - Movies Watched ({years_range})', fontsize=16, color='white') 70 | 71 | def style_axes(ax): 72 | ax.set_facecolor(Colors.BG) 73 | ax.tick_params(colors=Colors.TEXT) 74 | ax.spines['bottom'].set_color(Colors.TEXT) 75 | ax.spines['top'].set_visible(False) 76 | ax.spines['right'].set_visible(False) 77 | ax.spines['left'].set_color(Colors.TEXT) 78 | 79 | def get_axes_for_year(axes, i, num_years): 80 | if num_years == 1: 81 | return axes[0][0], axes[0][1] 82 | else: 83 | return axes[i, 0], axes[i, 1] 84 | 85 | days_labels = DAY_ABBREVIATIONS 86 | months_labels = MONTH_ABBREVIATIONS 87 | 88 | fig, axes = setup_figure() 89 | configure_figure(fig) 90 | num_years = len(self.stats_by_year) 91 | 92 | for i, (year, stats) in enumerate(self.stats_by_year.items()): 93 | daily_data = stats.get('daily', {}) 94 | monthly_data = stats.get('monthly', {}) 95 | 96 | daily_values = [daily_data.get(day, 0) for day in range(1, 8)] 97 | monthly_values = [monthly_data.get(month, 0) for month in range(1, 13)] 98 | 99 | ax_daily, ax_monthly = get_axes_for_year(axes, i, num_years) 100 | 101 | for ax in [ax_daily, ax_monthly]: 102 | style_axes(ax) 103 | 104 | ax_daily.bar(days_labels, daily_values, color=Colors.BLUE, alpha=0.85) 105 | ax_daily.set_title(f'{year} - Daily', color='white') 106 | ax_daily.set_ylabel('Movies', color=Colors.TEXT) 107 | 108 | ax_monthly.bar(months_labels, monthly_values, color=Colors.GREEN, alpha=0.85) 109 | ax_monthly.set_title(f'{year} - Monthly', color='white') 110 | ax_monthly.set_ylabel('Movies', color=Colors.TEXT) 111 | 112 | plt.tight_layout(rect=[0, 0, 1, 0.95]) 113 | plt.show() 114 | 115 | 116 | def plot(self, start_year: int = None, end_year: int = None): 117 | """Gather statistics and create plot""" 118 | if start_year is None: 119 | current_year = datetime.now().year 120 | start_year = current_year - 1 121 | if end_year is None: 122 | end_year = datetime.now().year 123 | 124 | self.gather_statistics_by_year(start_year, end_year) 125 | 126 | if self.stats_by_year: 127 | self.plot_statistics() 128 | else: 129 | print(f"No statistics found for user: {self.username}") 130 | 131 | def run(self): 132 | """Main program loop""" 133 | sys.stdout.reconfigure(encoding="utf-8") 134 | parser = argparse.ArgumentParser(description="Visualize Letterboxd user statistics") 135 | parser.add_argument("--user", help="Letterboxd username") 136 | current_year = datetime.now().year 137 | parser.add_argument("--start-year", type=int, default=current_year-1, help=f"Start year (default: {current_year-1})") 138 | parser.add_argument("--end-year", type=int, default=current_year, help=f"End year (default: {current_year})") 139 | args = parser.parse_args() 140 | 141 | username = args.user 142 | if not username or not username.strip(): 143 | username = input("Enter a Letterboxd username: ").strip() 144 | 145 | self.username = username 146 | self.plot(args.start_year, args.end_year) 147 | 148 | 149 | def main(): 150 | """Legacy function compatibility""" 151 | LetterboxdStatisticsPlotter("").run() 152 | 153 | 154 | if __name__ == "__main__": 155 | main() 156 | -------------------------------------------------------------------------------- /examples/user_rating_plot.py: -------------------------------------------------------------------------------- 1 | """ 2 | Letterboxd Ratings Histogram 3 | 4 | Recreates the ratings distribution section of a Letterboxd profile with a clean, professional layout. 5 | - Half-star tick labels (½, ★, ★½, …, ★★★★★) 6 | - Shows username, total ratings, average, and most given rating 7 | - Letterboxd-inspired color scheme 8 | """ 9 | 10 | import argparse 11 | import sys 12 | import matplotlib.pyplot as plt 13 | import numpy as np 14 | 15 | from letterboxdpy.user import User 16 | from letterboxdpy.utils.utils_terminal import get_input 17 | from letterboxdpy.utils.utils_validators import is_whitespace_or_empty 18 | from letterboxdpy.constants.project import Colors 19 | 20 | 21 | class LetterboxdRatingPlotter: 22 | def __init__(self, username: str = None): 23 | self.username = username 24 | 25 | def create_plot(self, ratings: dict): 26 | """Create Letterboxd-style rating distribution plot with enhancements""" 27 | rating_positions = np.arange(0.5, 5.5, 0.5) 28 | rating_counts = np.array([ratings.get(rating, 0) for rating in rating_positions]) 29 | total_ratings = int(rating_counts.sum()) 30 | average_rating = round(float((rating_positions * rating_counts).sum() / total_ratings), 2) if total_ratings else 0.0 31 | most_given_rating = float(rating_positions[rating_counts.argmax()]) if total_ratings else 0.5 32 | 33 | # Nested helpers for readability 34 | def draw_header_and_stats(axis, stats_axis, total_count: int) -> None: 35 | axis.text(0.02, 0.98, "R A T I N G S", transform=axis.transAxes, 36 | fontsize=12, color=Colors.TEXT, weight='bold', va='top', family='monospace') 37 | axis.text(0.98, 0.98, f"{total_count:,}", transform=axis.transAxes, 38 | fontsize=12, color=Colors.TEXT, weight='bold', va='top', ha='right') 39 | axis.text(0.02, 0.92, f"@{self.username}", transform=axis.transAxes, 40 | fontsize=11, color='white', weight='bold', va='top') 41 | stats_axis.text(0.5, 0.5, f"Average: {average_rating}★ • Total: {total_ratings:,} • Most Given: {most_given_rating}★", 42 | ha='center', va='center', fontsize=11, color=Colors.TEXT, weight='bold') 43 | 44 | def get_star_labels(): 45 | positions = np.arange(0.5, 5.5, 0.5) 46 | labels = [ 47 | ("½★" if r == 0.5 else ("★" * int(r) + "½" if r % 1 == 0.5 else "★" * int(r))) 48 | for r in positions 49 | ] 50 | return positions, labels 51 | 52 | def label_bars(axis, bars_, counts_) -> None: 53 | if len(counts_) == 0: 54 | return 55 | max_count = max(counts_) 56 | for bar, count in zip(bars_, counts_): 57 | if count == max_count: 58 | bar.set_color(Colors.GREEN) 59 | bar.set_alpha(1.0) 60 | if count > 0: 61 | axis.text(bar.get_x() + bar.get_width() / 2, count + max_count * 0.01, 62 | str(int(count)), ha="center", va="bottom", fontsize=8, 63 | color=Colors.TEXT, alpha=0.9) 64 | 65 | def style_axes(axis, counts_) -> None: 66 | axis.set_xlim(0.25, 5.25) 67 | max_count = max(counts_) if len(counts_) else 0 68 | axis.set_ylim(0, max_count * 1.12) 69 | tick_positions, tick_labels = get_star_labels() 70 | axis.set_xticks(tick_positions) 71 | axis.set_xticklabels(tick_labels, fontsize=9, color=Colors.TEXT) 72 | axis.set_yticks([]) 73 | for spine in axis.spines.values(): 74 | spine.set_visible(False) 75 | axis.grid(True, axis='y', alpha=0.1, color=Colors.TEXT, linestyle='-') 76 | 77 | # Create Letterboxd-style plot 78 | fig, (ax, ax_stats) = plt.subplots( 79 | 2, 1, 80 | figsize=(12, 8), 81 | gridspec_kw={"height_ratios": [0.86, 0.14], "hspace": 0}, 82 | facecolor=Colors.BG, 83 | ) 84 | for a in (ax, ax_stats): 85 | a.set_facecolor(Colors.BG) 86 | ax_stats.axis('off') 87 | fig.canvas.manager.set_window_title(f"RATINGS - {self.username}") 88 | 89 | bars = ax.bar(rating_positions, rating_counts, width=0.45, color=Colors.BLUE, alpha=0.85) 90 | 91 | # Header and bottom stats 92 | draw_header_and_stats(ax, ax_stats, total_ratings) 93 | 94 | # Bar labels and highlight 95 | label_bars(ax, bars, rating_counts) 96 | 97 | # Axes styling and ticks 98 | style_axes(ax, rating_counts) 99 | 100 | # Layout handled by GridSpec; light tightening only 101 | plt.tight_layout() 102 | plt.show() 103 | 104 | def fetch_ratings(self, username: str = None) -> dict: 105 | """Fetch user ratings from Letterboxd""" 106 | username = username or self.username 107 | ratings = {r: 0 for r in np.arange(0.5, 5.5, 0.5)} 108 | 109 | print(f"Fetching ratings for @{username}...") 110 | movies = User(username).get_films()["movies"] 111 | print(f"Processing {len(movies)} rated movies...") 112 | 113 | for movie in movies.values(): 114 | if rating := movie.get("rating"): 115 | ratings[rating/2] += 1 116 | 117 | total_ratings = sum(ratings.values()) 118 | print(f"Found {total_ratings} ratings. Creating plot...") 119 | return ratings 120 | 121 | def plot(self, username: str = None): 122 | """Fetch ratings and create plot""" 123 | if username: 124 | self.username = username 125 | 126 | ratings = self.fetch_ratings() 127 | if sum(ratings.values()) > 0: 128 | self.create_plot(ratings) 129 | else: 130 | print(f"No ratings found for user: {self.username}") 131 | 132 | def run(self): 133 | """Main program loop""" 134 | sys.stdout.reconfigure(encoding="utf-8") 135 | parser = argparse.ArgumentParser(description="Visualize Letterboxd user rating distribution.") 136 | parser.add_argument("--user", help="Letterboxd username to analyze") 137 | 138 | args = parser.parse_args() 139 | 140 | username = None if is_whitespace_or_empty(args.user) else args.user 141 | if not username: 142 | username = get_input("Enter Letterboxd username: ") 143 | self.plot(username) 144 | 145 | 146 | def main(): 147 | """Legacy function compatibility""" 148 | LetterboxdRatingPlotter().run() 149 | 150 | if __name__ == "__main__": 151 | main() -------------------------------------------------------------------------------- /examples/exports/users/nmcassa/films_by_rating.json: -------------------------------------------------------------------------------- 1 | { 2 | "movies": { 3 | "walle": { 4 | "name": "WALL\u00b7E", 5 | "id": "45994", 6 | "rating": 10, 7 | "year": 2008, 8 | "liked": true 9 | }, 10 | "the-game": { 11 | "name": "The Game", 12 | "id": "50168", 13 | "rating": 10, 14 | "year": 1997, 15 | "liked": true 16 | }, 17 | "interstellar": { 18 | "name": "Interstellar", 19 | "id": "117621", 20 | "rating": 10, 21 | "year": 2014, 22 | "liked": false 23 | }, 24 | "goodfellas": { 25 | "name": "GoodFellas", 26 | "id": "51383", 27 | "rating": 10, 28 | "year": 1990, 29 | "liked": true 30 | }, 31 | "children-of-men": { 32 | "name": "Children of Men", 33 | "id": "46911", 34 | "rating": 10, 35 | "year": 2006, 36 | "liked": true 37 | }, 38 | "whiplash-2014": { 39 | "name": "Whiplash", 40 | "id": "171384", 41 | "rating": 10, 42 | "year": 2014, 43 | "liked": true 44 | }, 45 | "dune-part-two": { 46 | "name": "Dune: Part Two", 47 | "id": "617443", 48 | "rating": 10, 49 | "year": 2024, 50 | "liked": false 51 | }, 52 | "cure": { 53 | "name": "Cure", 54 | "id": "28195", 55 | "rating": 10, 56 | "year": 1997, 57 | "liked": true 58 | }, 59 | "society-of-the-snow": { 60 | "name": "Society of the Snow", 61 | "id": "813840", 62 | "rating": 10, 63 | "year": 2023, 64 | "liked": true 65 | }, 66 | "the-conversation": { 67 | "name": "The Conversation", 68 | "id": "51529", 69 | "rating": 10, 70 | "year": 1974, 71 | "liked": true 72 | }, 73 | "godzilla-minus-one": { 74 | "name": "Godzilla Minus One", 75 | "id": "845706", 76 | "rating": 10, 77 | "year": 2023, 78 | "liked": true 79 | }, 80 | "nausicaa-of-the-valley-of-the-wind": { 81 | "name": "Nausica\u00e4 of the Valley of the Wind", 82 | "id": "51969", 83 | "rating": 10, 84 | "year": 1984, 85 | "liked": true 86 | }, 87 | "inglourious-basterds": { 88 | "name": "Inglourious Basterds", 89 | "id": "41352", 90 | "rating": 10, 91 | "year": 2009, 92 | "liked": true 93 | }, 94 | "house": { 95 | "name": "House", 96 | "id": "35925", 97 | "rating": 10, 98 | "year": 1977, 99 | "liked": false 100 | }, 101 | "the-big-lebowski": { 102 | "name": "The Big Lebowski", 103 | "id": "51935", 104 | "rating": 10, 105 | "year": 1998, 106 | "liked": true 107 | }, 108 | "ponyo": { 109 | "name": "Ponyo", 110 | "id": "44594", 111 | "rating": 10, 112 | "year": 2008, 113 | "liked": true 114 | }, 115 | "rocky": { 116 | "name": "Rocky", 117 | "id": "51090", 118 | "rating": 10, 119 | "year": 1976, 120 | "liked": true 121 | }, 122 | "world-of-tomorrow": { 123 | "name": "World of Tomorrow", 124 | "id": "230808", 125 | "rating": 10, 126 | "year": 2015, 127 | "liked": true 128 | }, 129 | "the-thing": { 130 | "name": "The Thing", 131 | "id": "51155", 132 | "rating": 10, 133 | "year": 1982, 134 | "liked": false 135 | }, 136 | "kikis-delivery-service": { 137 | "name": "Kiki's Delivery Service", 138 | "id": "41360", 139 | "rating": 10, 140 | "year": 1989, 141 | "liked": true 142 | }, 143 | "nope": { 144 | "name": "Nope", 145 | "id": "682547", 146 | "rating": 10, 147 | "year": 2022, 148 | "liked": true 149 | }, 150 | "ratatouille": { 151 | "name": "Ratatouille", 152 | "id": "50558", 153 | "rating": 10, 154 | "year": 2007, 155 | "liked": true 156 | }, 157 | "parasite-2019": { 158 | "name": "Parasite", 159 | "id": "426406", 160 | "rating": 10, 161 | "year": 2019, 162 | "liked": true 163 | }, 164 | "everything-everywhere-all-at-once": { 165 | "name": "Everything Everywhere All at Once", 166 | "id": "474474", 167 | "rating": 10, 168 | "year": 2022, 169 | "liked": true 170 | }, 171 | "the-king-of-comedy": { 172 | "name": "The King of Comedy", 173 | "id": "51794", 174 | "rating": 10, 175 | "year": 1982, 176 | "liked": true 177 | }, 178 | "james-acaster-repertoire": { 179 | "name": "James Acaster: Repertoire", 180 | "id": "445120", 181 | "rating": 10, 182 | "year": 2018, 183 | "liked": true 184 | }, 185 | "bo-burnham-make-happy": { 186 | "name": "Bo Burnham: Make Happy", 187 | "id": "335216", 188 | "rating": 10, 189 | "year": 2016, 190 | "liked": true 191 | }, 192 | "the-truman-show": { 193 | "name": "The Truman Show", 194 | "id": "27256", 195 | "rating": 10, 196 | "year": 1998, 197 | "liked": true 198 | }, 199 | "uncut-gems": { 200 | "name": "Uncut Gems", 201 | "id": "404266", 202 | "rating": 10, 203 | "year": 2019, 204 | "liked": true 205 | }, 206 | "superbad": { 207 | "name": "Superbad", 208 | "id": "47776", 209 | "rating": 10, 210 | "year": 2007, 211 | "liked": true 212 | }, 213 | "scott-pilgrim-vs-the-world": { 214 | "name": "Scott Pilgrim vs. the World", 215 | "id": "37833", 216 | "rating": 10, 217 | "year": 2010, 218 | "liked": true 219 | }, 220 | "spider-man-into-the-spider-verse": { 221 | "name": "Spider-Man: Into the Spider-Verse", 222 | "id": "251943", 223 | "rating": 10, 224 | "year": 2018, 225 | "liked": true 226 | }, 227 | "baby-driver": { 228 | "name": "Baby Driver", 229 | "id": "268950", 230 | "rating": 10, 231 | "year": 2017, 232 | "liked": true 233 | }, 234 | "the-grand-budapest-hotel": { 235 | "name": "The Grand Budapest Hotel", 236 | "id": "95113", 237 | "rating": 10, 238 | "year": 2014, 239 | "liked": true 240 | }, 241 | "moonrise-kingdom": { 242 | "name": "Moonrise Kingdom", 243 | "id": "70105", 244 | "rating": 10, 245 | "year": 2012, 246 | "liked": true 247 | }, 248 | "isle-of-dogs-2018": { 249 | "name": "Isle of Dogs", 250 | "id": "333448", 251 | "rating": 10, 252 | "year": 2018, 253 | "liked": true 254 | } 255 | }, 256 | "count": 36, 257 | "liked_count": 32, 258 | "rating_count": 36, 259 | "liked_percentage": 88.89, 260 | "rating_percentage": 100.0, 261 | "rating_average": 10.0 262 | } -------------------------------------------------------------------------------- /letterboxdpy/pages/user_network.py: -------------------------------------------------------------------------------- 1 | from letterboxdpy.core.scraper import parse_url 2 | from letterboxdpy.constants.project import DOMAIN 3 | from letterboxdpy.core.exceptions import PageFetchError 4 | from letterboxdpy.avatar import Avatar 5 | 6 | 7 | class UserNetwork: 8 | 9 | def __init__(self, username: str) -> None: 10 | self.username = username 11 | self.following_url = f"{DOMAIN}/{self.username}/following" 12 | self.followers_url = f"{DOMAIN}/{self.username}/followers" 13 | 14 | def get_following(self) -> dict: return extract_network(self.username, 'following') 15 | def get_followers(self) -> dict: return extract_network(self.username, 'followers') 16 | 17 | def extract_network(username: str, section: str) -> dict: 18 | """ 19 | Fetches the specified network section ('followers' or 'following') for the user. 20 | """ 21 | assert section in ['followers', 'following'], "Section must be either 'followers' or 'following'" 22 | 23 | BASE_URL = f"{DOMAIN}/{username}/{section}" 24 | PERSONS_PER_PAGE = 25 25 | 26 | def fetch_page(page_num: int): 27 | """Fetches a single page of the user's network section.""" 28 | try: 29 | return parse_url(f"{BASE_URL}/page/{page_num}") 30 | except Exception as e: 31 | raise PageFetchError(f"Failed to fetch page {page_num}: {e}") from e 32 | 33 | def extract_persons(dom) -> dict: 34 | """Extracts persons from a DOM object and returns them as a dictionary.""" 35 | persons_dict = {} 36 | 37 | # Find the member table 38 | member_table = dom.find('table', class_='member-table') 39 | if not member_table: 40 | return persons_dict 41 | 42 | # Find all user rows 43 | user_rows = member_table.find_all('tr') 44 | 45 | for row in user_rows: 46 | # Get the person summary div 47 | person_summary = row.find('div', class_='person-summary') 48 | if not person_summary: 49 | continue 50 | 51 | # Extract avatar info 52 | avatar_link = person_summary.find('a', class_='avatar') 53 | if not avatar_link: 54 | continue 55 | 56 | # Extract basic info 57 | username = avatar_link['href'].replace('/', '') 58 | avatar_img = avatar_link.find('img') 59 | display_name = avatar_img['alt'] if avatar_img else username 60 | avatar_url = avatar_img['src'] if avatar_img else '' 61 | 62 | # Process avatar with Avatar class 63 | avatar_data = Avatar(avatar_url).upscaled_data if avatar_url else {'exists': False, 'upscaled': False, 'url': ''} 64 | 65 | # Extract name link 66 | name_link = person_summary.find('a', class_='name') 67 | if name_link: 68 | display_name = name_link.get_text(strip=True) 69 | 70 | # Extract metadata (followers, following) 71 | metadata = person_summary.find('small', class_='metadata') 72 | followers_count = None 73 | following_count = None 74 | 75 | if metadata: 76 | followers_link = metadata.find('a', href=lambda x: x and 'followers' in x) 77 | if followers_link: 78 | followers_text = followers_link.get_text(strip=True) 79 | # Extract number from "5 followers" 80 | import re 81 | followers_match = re.search(r'(\d+)', followers_text) 82 | if followers_match: 83 | followers_count = int(followers_match.group(1)) 84 | 85 | following_link = metadata.find('a', href=lambda x: x and 'following' in x) 86 | if following_link: 87 | following_text = following_link.get_text(strip=True) 88 | # Extract number from "following 6" 89 | following_match = re.search(r'(\d+)', following_text) 90 | if following_match: 91 | following_count = int(following_match.group(1)) 92 | 93 | # Extract stats from other columns 94 | watched_cell = row.find('td', class_='col-watched') 95 | watched_count = None 96 | if watched_cell: 97 | watched_link = watched_cell.find('a') 98 | if watched_link: 99 | watched_text = watched_link.get_text(strip=True) 100 | import re 101 | watched_match = re.search(r'(\d+)', watched_text) 102 | if watched_match: 103 | watched_count = int(watched_match.group(1)) 104 | 105 | lists_cell = row.find('td', class_='col-lists') 106 | lists_count = None 107 | if lists_cell: 108 | lists_link = lists_cell.find('a') 109 | if lists_link: 110 | lists_text = lists_link.get_text(strip=True) 111 | import re 112 | lists_match = re.search(r'(\d+)', lists_text) 113 | if lists_match: 114 | lists_count = int(lists_match.group(1)) 115 | 116 | likes_cell = row.find('td', class_='col-likes') 117 | likes_count = None 118 | if likes_cell: 119 | likes_link = likes_cell.find('a') 120 | if likes_link: 121 | likes_text = likes_link.get_text(strip=True) 122 | import re 123 | likes_match = re.search(r'(\d+)', likes_text) 124 | if likes_match: 125 | likes_count = int(likes_match.group(1)) 126 | 127 | persons_dict[username] = { 128 | 'username': username, 129 | 'name': display_name, 130 | 'url': f"{DOMAIN}/{username}", 131 | 'avatar': avatar_data, 132 | 'followers': followers_count, 133 | 'following': following_count, 134 | 'watched': watched_count, 135 | 'lists': lists_count, 136 | 'likes': likes_count 137 | } 138 | 139 | return persons_dict 140 | 141 | users_list = {} 142 | page_num = 1 143 | 144 | while True: 145 | dom = fetch_page(page_num) 146 | persons = extract_persons(dom) 147 | users_list.update(persons) 148 | 149 | # Break if the number of persons fetched is less than a full page (end of list) 150 | if len(persons) < PERSONS_PER_PAGE : 151 | break 152 | 153 | page_num += 1 154 | 155 | return users_list -------------------------------------------------------------------------------- /letterboxdpy/user.py: -------------------------------------------------------------------------------- 1 | if __loader__.name == '__main__': 2 | import sys 3 | sys.path.append(sys.path[0] + '/..') 4 | 5 | import re 6 | from json import ( 7 | dumps as json_dumps, 8 | loads as json_loads 9 | ) 10 | 11 | from letterboxdpy.core.encoder import SecretsEncoder 12 | from letterboxdpy.constants.project import CURRENT_YEAR, CURRENT_MONTH, CURRENT_DAY 13 | from letterboxdpy.list import List as LetterboxdList 14 | from letterboxdpy.pages import ( 15 | user_activity, 16 | user_diary, 17 | user_films, 18 | user_likes, 19 | user_lists, 20 | user_network, 21 | user_profile, 22 | user_reviews, 23 | user_tags, 24 | user_watchlist 25 | ) 26 | 27 | 28 | class User: 29 | 30 | class UserPages: 31 | 32 | def __init__(self, username: str) -> None: 33 | self.activity = user_activity.UserActivity(username) 34 | self.diary = user_diary.UserDiary(username) 35 | self.films = user_films.UserFilms(username) 36 | self.likes = user_likes.UserLikes(username) 37 | self.lists = user_lists.UserLists(username) 38 | self.network = user_network.UserNetwork(username) 39 | self.profile = user_profile.UserProfile(username) 40 | self.reviews = user_reviews.UserReviews(username) 41 | self.tags = user_tags.UserTags(username) 42 | self.watchlist = user_watchlist.UserWatchlist(username) 43 | 44 | def __init__(self, username: str) -> None: 45 | assert re.match("^[A-Za-z0-9_]+$", username), "Invalid username" 46 | 47 | self.username = username.lower() 48 | self.pages = self.UserPages(self.username) 49 | 50 | self.url = self.get_url() 51 | self.id = self.get_id() 52 | self.is_hq = self.get_hq_status() 53 | self.display_name = self.get_display_name() 54 | self.bio = self.get_bio() 55 | self.location = self.get_location() 56 | self.website = self.get_website() 57 | self.watchlist_length = self.get_watchlist_length() 58 | self.stats = self.get_stats() 59 | self.favorites = self.get_favorites() 60 | self.avatar = self.get_avatar() 61 | self.recent = { 62 | 'watchlist': self.get_watchlist_recent(), 63 | 'diary': self.get_diary_recent() 64 | } 65 | 66 | def __str__(self) -> str: 67 | return json_dumps(self, indent=2, cls=SecretsEncoder, secrets=['pages']) 68 | 69 | def jsonify(self) -> dict: 70 | return json_loads(self.__str__()) 71 | 72 | def get_activity(self) -> dict: 73 | return self.pages.activity.get_activity() 74 | def get_activity_following(self) -> dict: 75 | return self.pages.activity.get_activity_following() 76 | 77 | def get_diary(self, year: int = None, month: int = None, day: int = None, page: int = None) -> dict: 78 | return self.pages.diary.get_diary(year, month, day, page) 79 | def get_diary_year(self, year: int = CURRENT_YEAR) -> dict: 80 | return self.pages.diary.get_year(year) 81 | def get_diary_month(self, year: int = CURRENT_YEAR, month: int = CURRENT_MONTH) -> dict: 82 | return self.pages.diary.get_month(year, month) 83 | def get_diary_day(self, year: int = CURRENT_YEAR, month: int = CURRENT_MONTH, day: int = CURRENT_DAY) -> dict: 84 | return self.pages.diary.get_day(year, month, day) 85 | def get_wrapped(self, year: int = CURRENT_YEAR) -> dict: 86 | return self.pages.diary.get_wrapped(year) 87 | 88 | def get_films(self) -> dict: 89 | return self.pages.films.get_films() 90 | def get_films_by_rating(self, rating: float | int) -> dict: 91 | return self.pages.films.get_films_rated(rating) 92 | def get_films_not_rated(self) -> dict: 93 | return self.pages.films.get_films_not_rated() 94 | def get_genre_info(self) -> dict: 95 | return self.pages.films.get_genre_info() 96 | 97 | def get_liked_films(self) -> dict: 98 | return self.pages.likes.get_liked_films() 99 | def get_liked_reviews(self) -> dict: 100 | return self.pages.likes.get_liked_reviews() 101 | def get_liked_lists(self) -> dict: 102 | return self.pages.likes.get_liked_lists() 103 | 104 | def get_list(self, slug: str) -> LetterboxdList: 105 | return LetterboxdList(self.username, slug) 106 | 107 | def get_lists(self) -> dict: 108 | return self.pages.lists.get_lists() 109 | 110 | def get_following(self) -> dict: 111 | return self.pages.network.get_following() 112 | def get_followers(self) -> dict: 113 | return self.pages.network.get_followers() 114 | 115 | def get_url(self) -> str: 116 | return self.pages.profile.url 117 | def get_id(self) -> str: 118 | return self.pages.profile.get_id() 119 | def get_hq_status(self) -> bool: 120 | return self.pages.profile.get_hq_status() 121 | def get_display_name(self) -> str: 122 | return self.pages.profile.get_display_name() 123 | def get_bio(self) -> str: 124 | return self.pages.profile.get_bio() 125 | def get_location(self) -> str: 126 | return self.pages.profile.get_location() 127 | def get_website(self) -> str: 128 | return self.pages.profile.get_website() 129 | def get_watchlist_length(self) -> int: 130 | return self.pages.profile.get_watchlist_length() 131 | def get_stats(self) -> dict: 132 | return self.pages.profile.get_stats() 133 | def get_favorites(self) -> dict: 134 | return self.pages.profile.get_favorites() 135 | def get_avatar(self) -> str: 136 | return self.pages.profile.get_avatar() 137 | def get_watchlist_recent(self) -> dict: 138 | return self.pages.profile.get_watchlist_recent() 139 | def get_diary_recent(self) -> dict: 140 | return self.pages.profile.get_diary_recent() 141 | 142 | def get_reviews(self) -> dict: 143 | return self.pages.reviews.get_reviews() 144 | 145 | def get_user_tags(self) -> dict: 146 | return self.pages.tags.get_user_tags() 147 | 148 | def get_watchlist_count(self) -> int: 149 | return self.pages.watchlist.get_count() 150 | def get_watchlist_movies(self) -> dict: 151 | return self.pages.watchlist.get_movies() 152 | def get_watchlist(self, filters:dict=None) -> dict: 153 | return self.pages.watchlist.get_watchlist(filters) 154 | 155 | if __name__ == "__main__": 156 | import argparse 157 | import sys 158 | 159 | # Reconfigure stdout encoding to UTF-8 to support non-ASCII characters 160 | sys.stdout.reconfigure(encoding='utf-8') 161 | 162 | # Parse command-line arguments 163 | parser = argparse.ArgumentParser() 164 | parser.add_argument('--user', dest="user", help="Username to gather stats on") 165 | args = parser.parse_args() 166 | 167 | # Extract username from command-line arguments or prompt user for input 168 | username = args.user or '' 169 | 170 | # Keep prompting user until a valid username is provided 171 | while not len(username.strip()): 172 | username = input('Enter username: ') 173 | 174 | # Display the username being processed 175 | print(f"Processing username: {username}") 176 | 177 | # Initialize a User instance with the provided username 178 | user_instance = User(username) 179 | 180 | # Print user instance(profile) data 181 | print(user_instance) 182 | 183 | # Iterate over user's film data and print each movie 184 | for item in user_instance.get_films()['movies'].items(): 185 | print(item) 186 | -------------------------------------------------------------------------------- /letterboxdpy/pages/user_films.py: -------------------------------------------------------------------------------- 1 | from letterboxdpy.core.scraper import parse_url 2 | from letterboxdpy.constants.project import DOMAIN, GENRES 3 | 4 | 5 | class UserFilms: 6 | 7 | def __init__(self, username: str) -> None: 8 | self.username = username 9 | self.url = f"{DOMAIN}/{self.username}/films" 10 | 11 | def get_films(self) -> dict: 12 | return extract_user_films(self.url) 13 | 14 | def get_films_rated(self, rating: float | int) -> dict: 15 | assert rating in [0.5, 1, 1.5, 2, 2.5, 3, 3.5, 4, 4.5, 5], "Invalid rating" 16 | url = f"{self.url}/rated/{rating}/by/date" 17 | return extract_user_films(url) 18 | 19 | def get_films_not_rated(self) -> dict: 20 | url = f"{self.url}/rated/none/by/date" 21 | return extract_user_films(url) 22 | 23 | def get_genre_info(self): 24 | return extract_user_genre_info(self.username) 25 | 26 | def extract_user_films(url: str) -> dict: 27 | """Extracts user films and their details from the given URL""" 28 | FILMS_PER_PAGE = 12 * 6 29 | 30 | def process_page(page_number: int) -> dict: 31 | """Fetches and processes a page of user films.""" 32 | dom = parse_url(f"{url}/page/{page_number}/") 33 | return extract_movies_from_user_watched(dom) 34 | 35 | def calculate_statistics(movies: dict) -> dict: 36 | """Calculates film statistics including liked and rating percentages.""" 37 | liked_count = sum(movie['liked'] for movie in movies.values()) 38 | rating_count = len([movie['rating'] for movie in movies.values() if movie['rating'] is not None]) 39 | 40 | count = len(movies) 41 | liked_percentage = round(liked_count / count * 100, 2) if liked_count else 0.0 42 | rating_percentage = 0.0 43 | rating_average = 0.0 44 | 45 | if rating_count: 46 | ratings = [movie['rating'] for movie in movies.values() if movie['rating']] 47 | rating_percentage = round(rating_count / count * 100, 2) 48 | rating_average = round(sum(ratings) / rating_count, 2) 49 | 50 | return { 51 | 'count': count, 52 | 'liked_count': liked_count, 53 | 'rating_count': rating_count, 54 | 'liked_percentage': liked_percentage, 55 | 'rating_percentage': rating_percentage, 56 | 'rating_average': rating_average 57 | } 58 | 59 | movie_list = {'movies': {}} 60 | page = 0 61 | 62 | while True: 63 | page += 1 64 | movies = process_page(page) 65 | movie_list['movies'] |= movies 66 | 67 | if len(movies) < FILMS_PER_PAGE: 68 | stats = calculate_statistics(movie_list['movies']) 69 | movie_list.update(stats) 70 | break 71 | 72 | return movie_list 73 | 74 | def extract_movies_from_user_watched(dom, max=12*6) -> dict: 75 | """ 76 | supports user watched films section 77 | """ 78 | def _extract_rating_and_like_status(container): 79 | """Parse rating and like status from viewing data spans.""" 80 | 81 | def _extract_rating_from_span(span): 82 | """Extract rating from span using pattern matching.""" 83 | classes = span.get('class', []) 84 | 85 | # Skip if no rating-related classes found 86 | if not any('rating' in cls or 'rated-' in cls for cls in classes): 87 | return None 88 | 89 | # Define extraction patterns (modern first, legacy as fallback) 90 | patterns = [ 91 | lambda cls: cls.startswith('rated-') and cls.split('-')[-1], # rated-X 92 | lambda cls: 'rating' in cls and '-' in cls and cls != 'rating' and cls.split('-')[-1] # rating-color-X 93 | ] 94 | 95 | for pattern in patterns: 96 | for cls in classes: 97 | try: 98 | rating_str = pattern(cls) 99 | if rating_str and rating_str.isdigit(): 100 | return int(rating_str) 101 | except (ValueError, IndexError, AttributeError): 102 | continue 103 | 104 | return None 105 | 106 | def _extract_like_status(span): 107 | """Extract like status from span class.""" 108 | return any('like' in cls for cls in span.get('class', [])) 109 | 110 | poster_viewingdata = container.find("p", {"class": "poster-viewingdata"}) or container.p 111 | rating = None 112 | liked = False 113 | 114 | if poster_viewingdata and poster_viewingdata.span: 115 | for span in poster_viewingdata.find_all("span"): 116 | if rating is None: 117 | rating = _extract_rating_from_span(span) 118 | if not liked: 119 | liked = _extract_like_status(span) 120 | 121 | return rating, liked 122 | 123 | def _get_movie_details(container): 124 | """Extract complete movie information including rating and like status.""" 125 | from letterboxdpy.utils.utils_string import extract_year_from_movie_name, clean_movie_name 126 | 127 | react_component = container.find("div", {"class": "react-component"}) or container.div 128 | if not react_component or 'data-film-id' not in react_component.attrs: 129 | return None 130 | 131 | rating, liked = _extract_rating_and_like_status(container) 132 | 133 | movie_slug = react_component.get('data-item-slug') or react_component.get('data-film-slug') 134 | movie_id = react_component['data-film-id'] 135 | raw_name = react_component.get('data-item-name') or react_component.img['alt'] 136 | movie_name = clean_movie_name(raw_name) 137 | year = extract_year_from_movie_name(raw_name) 138 | 139 | return movie_slug, { 140 | 'name': movie_name, 141 | "id": movie_id, 142 | "rating": rating, 143 | "year": year, 144 | "liked": liked 145 | } 146 | 147 | def _find_movie_containers(dom): 148 | """Find movie containers using modern structure with legacy fallback.""" 149 | container_selectors = [ 150 | ("li", {"class": "griditem"}), # Modern React structure 151 | ("li", {"class": "poster-container"}), # Legacy structure 152 | ("li", {"class": "posteritem"}) # Liked films structure 153 | ] 154 | 155 | for tag, attrs in container_selectors: 156 | containers = dom.find_all(tag, attrs) 157 | if containers: 158 | return containers 159 | return [] 160 | 161 | containers = _find_movie_containers(dom) 162 | 163 | movies = {} 164 | for container in containers: 165 | if len(movies) >= max: 166 | break 167 | 168 | movie_details = _get_movie_details(container) 169 | if movie_details: 170 | slug, data = movie_details 171 | movies[slug] = data 172 | 173 | return movies 174 | 175 | def extract_user_genre_info(username: str) -> dict: 176 | ret = {} 177 | for genre in GENRES: 178 | dom = parse_url(f"{DOMAIN}/{username}/films/genre/{genre}/") 179 | data = dom.find("span", {"class": ["replace-if-you"], }) 180 | data = data.next_sibling.replace(',', '') 181 | try: 182 | ret[genre] = [int(s) for s in data.split() if s.isdigit()][0] 183 | except IndexError: 184 | ret[genre] = 0 185 | 186 | return ret -------------------------------------------------------------------------------- /letterboxdpy/pages/user_list.py: -------------------------------------------------------------------------------- 1 | import re 2 | 3 | from letterboxdpy.core.scraper import parse_url 4 | from letterboxdpy.constants.project import DOMAIN 5 | from letterboxdpy.utils.utils_parser import get_meta_content, get_movie_count_from_meta, get_body_content 6 | from letterboxdpy.utils.utils_url import check_url_match 7 | from letterboxdpy.utils.movies_extractor import extract_movies_from_vertical_list 8 | from letterboxdpy.utils.date_utils import DateUtils 9 | 10 | 11 | class ListMetaData(dict): 12 | """Type definition for list metadata""" 13 | pass 14 | 15 | 16 | class UserList: 17 | LIST_PATTERN = f'{DOMAIN}/%s/list/%s' 18 | LIST_ITEMS_PER_PAGE = 12*5 19 | 20 | def __init__(self, username: str, slug: str) -> None: 21 | assert re.match("^[A-Za-z0-9_]+$", username), "Invalid author" 22 | 23 | self.username = username 24 | self.slug = slug 25 | self.url = self.LIST_PATTERN % (username, slug) 26 | self.dom = parse_url(self.url) 27 | 28 | def __str__(self) -> str: 29 | return f"Not printable object of type: {self.__class__.__name__}" 30 | 31 | def get_title(self) -> str: return extract_title(self.dom) 32 | def get_author(self) -> str: return extract_author(self.dom) 33 | def get_description(self) -> str: return extract_description(self.dom) 34 | def get_date_created(self) -> list: return extract_date_created(self.dom) 35 | def get_date_updated(self) -> list: return extract_date_updated(self.dom) 36 | def get_tags(self) -> list: return extract_tags(self.dom) 37 | def get_movies(self) -> dict: return extract_movies(self.url, self.LIST_ITEMS_PER_PAGE) 38 | def get_count(self) -> int: return extract_count(self.dom) 39 | def get_list_id(self) -> str | None: return extract_list_id(self.dom) 40 | def get_list_meta(self, url: str) -> ListMetaData: return extract_list_meta(self.dom, url) 41 | 42 | def extract_list_id(dom) -> str | None: 43 | """ 44 | Extracts the list ID from the list page DOM. 45 | 46 | Args: 47 | dom: BeautifulSoup DOM object of the list page 48 | 49 | Returns: 50 | List ID as string or None if not found 51 | """ 52 | try: 53 | # Method 1: Look for data-report-url attribute in report link 54 | report_link = dom.find('span', {'data-report-url': True}) 55 | if report_link: 56 | report_url = report_link.get('data-report-url') 57 | if report_url and 'filmlist:' in report_url: 58 | # Extract ID from pattern like "/ajax/filmlist:30052453/report-form" 59 | import re 60 | match = re.search(r'filmlist:(\d+)', report_url) 61 | if match: 62 | return match.group(1) 63 | 64 | # Method 2: Look for data-popmenu-id attribute 65 | report_menu = dom.find('a', {'data-popmenu-id': True}) 66 | if report_menu: 67 | popmenu_id = report_menu.get('data-popmenu-id') 68 | if popmenu_id and 'list-' in popmenu_id: 69 | # Extract ID from pattern like "report-member-username-list-30052453" 70 | import re 71 | match = re.search(r'list-(\d+)$', popmenu_id) 72 | if match: 73 | return match.group(1) 74 | 75 | return None 76 | except Exception as e: 77 | print(f"Error extracting list ID: {e}") 78 | return None 79 | 80 | def extract_count(dom) -> int: 81 | """Extracts the number of films from the list DOM.""" 82 | try: 83 | count = get_movie_count_from_meta(dom) 84 | if count is None: 85 | raise ValueError("Meta description not found or missing 'content' attribute.") 86 | return count 87 | except ValueError as e: 88 | raise RuntimeError("Failed to extract film count: " + str(e)) from e 89 | 90 | def extract_movies(list_url: str, items_per_page) -> dict: 91 | data = {} 92 | 93 | page = 1 94 | while True: 95 | dom = parse_url(f'{list_url}/page/{page}/') 96 | movies = extract_movies_from_vertical_list(dom) 97 | data |= movies 98 | 99 | if len(movies) < items_per_page: 100 | break 101 | 102 | page += 1 103 | 104 | return data 105 | 106 | def extract_title(dom) -> str: 107 | return get_meta_content(dom, property='og:title') 108 | 109 | def extract_author(dom) -> str: 110 | data = dom.find("span", attrs={'itemprop': 'name'}) 111 | data = data.text if data else None 112 | return data 113 | 114 | def extract_description(dom) -> str: 115 | return get_meta_content(dom, property='og:description') 116 | 117 | def extract_date_created(dom) -> str | None: 118 | """Extract list creation date in ISO format.""" 119 | # Look for published date span 120 | data = dom.find("span", {"class": "published is-updated"}) 121 | if not data: 122 | data = dom.find("span", {"class": "published"}) 123 | 124 | if data: 125 | time_element = data.findChild("time") 126 | if time_element and time_element.get('datetime'): 127 | return DateUtils.to_iso(time_element.get('datetime')) 128 | 129 | return None 130 | 131 | def extract_date_updated(dom) -> str | None: 132 | """Extract list update date in ISO format.""" 133 | # Look for updated date span 134 | data = dom.find("span", {"class": "updated"}) 135 | if not data: 136 | # Use published date if no separate update date 137 | data = dom.find("span", {"class": "published"}) 138 | 139 | if data: 140 | time_element = data.findChild("time") 141 | if time_element and time_element.get('datetime'): 142 | return DateUtils.to_iso(time_element.get('datetime')) 143 | 144 | return None 145 | 146 | def extract_tags(dom) -> list: 147 | """ 148 | Scraping the tag links from a Letterboxd list page and 149 | .. extracting just the tag names into a clean list. 150 | The decorator ensures a valid List instance is passed. 151 | """ 152 | dom = dom.find("ul", {"class": ["tags"]}) 153 | 154 | data = [] 155 | 156 | if dom: 157 | dom = dom.findChildren("a") 158 | for item in dom: 159 | data.append(item.text) 160 | 161 | return data 162 | 163 | def extract_list_meta(dom, url: str) -> ListMetaData: 164 | """ 165 | Extracts metadata from a Letterboxd list page. 166 | Args: 167 | dom: BeautifulSoup DOM object 168 | url: The original URL of the list 169 | Returns: 170 | ListMetaData: A dictionary containing list metadata and status 171 | """ 172 | data: ListMetaData = { 173 | 'url': None, 174 | 'title': None, 175 | 'owner': None, 176 | 'list_id': None, 177 | 'is_available': False, 178 | 'error': None 179 | } 180 | 181 | try: 182 | # Extract basic metadata 183 | list_url = get_meta_content(dom, property='og:url') 184 | list_title = get_meta_content(dom, property='og:title') 185 | list_owner = get_body_content(dom, 'data-owner') 186 | list_id = extract_list_id(dom) 187 | 188 | # Check for URL redirection 189 | if not check_url_match(url, list_url): 190 | print(f'Redirected to {list_url}') 191 | 192 | # Update metadata 193 | data.update({ 194 | 'url': list_url, 195 | 'title': list_title, 196 | 'owner': list_owner, 197 | 'list_id': list_id, 198 | 'is_available': True 199 | }) 200 | 201 | except AttributeError as e: 202 | data['error'] = f"Missing required metadata: {str(e)}" 203 | print(f"Metadata extraction error: {e}") 204 | except Exception as e: 205 | data['error'] = f"Unexpected error: {str(e)}" 206 | print(f"Unexpected error while checking the list: {e}") 207 | 208 | return data 209 | 210 | -------------------------------------------------------------------------------- /examples/exports/users/nmcassa/activity_following.json: -------------------------------------------------------------------------------- 1 | { 2 | "metadata": { 3 | "export_timestamp": "2025-08-28T17:48:04.507738", 4 | "source_url": "https://letterboxd.com/ajax/activity-pagination/nmcassa/following", 5 | "total_activities": 20 6 | }, 7 | "activities": { 8 | "9684576241": { 9 | "activity_type": "basic", 10 | "timestamp": "2025-08-27T22:02:36.210000Z", 11 | "content": { 12 | "action": "added", 13 | "description": "Karsten added Hero to his watchlist", 14 | "movie": { 15 | "title": "Hero", 16 | "slug": "hero-2002", 17 | "url": "https://letterboxd.com/film/hero-2002/" 18 | } 19 | } 20 | }, 21 | "9683113717": { 22 | "activity_type": "basic", 23 | "timestamp": "2025-08-27T18:17:08.676000Z", 24 | "content": { 25 | "action": "liked", 26 | "description": "Karsten watched, liked and rated Sorry, Baby ★★★½ on Wednesday Aug 27, 2025", 27 | "movie": { 28 | "title": "Sorry, Baby", 29 | "slug": "sorry-baby-2025", 30 | "url": "https://letterboxd.com/film/sorry-baby-2025/" 31 | } 32 | } 33 | }, 34 | "9682558075": { 35 | "activity_type": "basic", 36 | "timestamp": "2025-08-27T16:31:53.506000Z", 37 | "content": { 38 | "action": "liked", 39 | "description": "Karsten liked slim’s ★★★★★ review of Y Tu Mamá También", 40 | "movie": { 41 | "title": "review of Y Tu Mamá También", 42 | "slug": "y-tu-mama-tambien", 43 | "url": "https://letterboxd.com/film/y-tu-mama-tambien/" 44 | } 45 | } 46 | }, 47 | "9681535360": { 48 | "activity_type": "basic", 49 | "timestamp": "2025-08-27T12:57:15.051000Z", 50 | "content": { 51 | "action": "liked", 52 | "description": "ppark liked ramenfeedgg’s ★★★½ review of Caught Stealing", 53 | "movie": { 54 | "title": "review of Caught Stealing", 55 | "slug": "caught-stealing", 56 | "url": "https://letterboxd.com/film/caught-stealing/" 57 | } 58 | } 59 | }, 60 | "9680204114": { 61 | "activity_type": "basic", 62 | "timestamp": "2025-08-27T06:11:03.542000Z", 63 | "content": { 64 | "action": "liked", 65 | "description": "Karsten watched, liked and rated Blue Sun Palace ★★★★ on Tuesday Aug 26, 2025", 66 | "movie": { 67 | "title": "Blue Sun Palace", 68 | "slug": "blue-sun-palace", 69 | "url": "https://letterboxd.com/film/blue-sun-palace/" 70 | } 71 | } 72 | }, 73 | "9679590532": { 74 | "activity_type": "basic", 75 | "timestamp": "2025-08-27T03:32:01.964000Z", 76 | "content": { 77 | "action": "added", 78 | "description": "Ben Wold added Sentimental Value to his watchlist", 79 | "movie": { 80 | "title": "Sentimental Value", 81 | "slug": "sentimental-value-2025", 82 | "url": "https://letterboxd.com/film/sentimental-value-2025/" 83 | } 84 | } 85 | }, 86 | "9679158181": { 87 | "activity_type": "basic", 88 | "timestamp": "2025-08-27T02:04:46.870000Z", 89 | "content": { 90 | "action": "liked", 91 | "description": "Karsten rewatched, liked and rated Interstellar ★★★★½ on Tuesday Aug 26, 2025", 92 | "movie": { 93 | "title": "Interstellar", 94 | "slug": "interstellar", 95 | "url": "https://letterboxd.com/film/interstellar/" 96 | } 97 | } 98 | }, 99 | "9676274360": { 100 | "activity_type": "basic", 101 | "timestamp": "2025-08-26T18:15:50.098000Z", 102 | "content": { 103 | "action": "liked", 104 | "description": "Karsten rewatched, liked and rated The Exorcist ★★★★½ on Tuesday Aug 26, 2025", 105 | "movie": { 106 | "title": "The Exorcist", 107 | "slug": "1", 108 | "url": "https://letterboxd.com/film/1/" 109 | } 110 | } 111 | }, 112 | "9673227177": { 113 | "activity_type": "basic", 114 | "timestamp": "2025-08-26T05:41:28.456000Z", 115 | "content": { 116 | "action": "liked", 117 | "description": "Karsten liked glanderco’s ★★★★★ review of Inglourious Basterds", 118 | "movie": { 119 | "title": "review of Inglourious Basterds", 120 | "slug": "inglourious-basterds", 121 | "url": "https://letterboxd.com/film/inglourious-basterds/" 122 | } 123 | } 124 | }, 125 | "9673225825": { 126 | "activity_type": "basic", 127 | "timestamp": "2025-08-26T05:41:04.025000Z", 128 | "content": { 129 | "action": "liked", 130 | "description": "Karsten rewatched, liked and rated Inglourious Basterds ★★★★★ on Monday Aug 25, 2025", 131 | "movie": { 132 | "title": "Inglourious Basterds", 133 | "slug": "inglourious-basterds", 134 | "url": "https://letterboxd.com/film/inglourious-basterds/" 135 | } 136 | } 137 | }, 138 | "9671860687": { 139 | "activity_type": "review", 140 | "timestamp": "2025-08-26T00:50:32.353000Z", 141 | "content": { 142 | "action": "rewatched", 143 | "description": "rewatched F1 ★★★★★★★★", 144 | "movie": { 145 | "title": "F1", 146 | "year": 2025, 147 | "slug": "f1", 148 | "url": "https://letterboxd.com/film/f1/" 149 | }, 150 | "rating": 8, 151 | "review": { 152 | "content": "Major epicness", 153 | "contains_spoilers": false 154 | } 155 | } 156 | }, 157 | "9670151089": { 158 | "activity_type": "review", 159 | "timestamp": "2025-08-25T20:32:44.324000Z", 160 | "content": { 161 | "action": "watched", 162 | "description": "watched It's a Mad, Mad, Mad, Mad World ★★★★★★★", 163 | "movie": { 164 | "title": "It's a Mad, Mad, Mad, Mad World", 165 | "year": 1963, 166 | "slug": "its-a-mad-mad-mad-mad-world", 167 | "url": "https://letterboxd.com/film/its-a-mad-mad-mad-mad-world/" 168 | }, 169 | "rating": 7, 170 | "review": { 171 | "content": "some advice for those who don’t enjoy this: LIGHTEN UP", 172 | "contains_spoilers": false 173 | } 174 | } 175 | }, 176 | "9669307400": { 177 | "activity_type": "basic", 178 | "timestamp": "2025-08-25T18:21:48.130000Z", 179 | "content": { 180 | "action": "liked", 181 | "description": "Karsten liked Carlos Valladares’s ★★★★ review of It's a Mad, Mad, Mad, Mad World", 182 | "movie": { 183 | "title": "review of It's a Mad, Mad, Mad, Mad World", 184 | "slug": "2", 185 | "url": "https://letterboxd.com/film/2/" 186 | } 187 | } 188 | }, 189 | "9669148896": { 190 | "activity_type": "basic", 191 | "timestamp": "2025-08-25T17:54:46.259000Z", 192 | "content": { 193 | "action": "liked", 194 | "description": "Karsten watched, liked and rated The Night House ★★★½ on Monday Aug 25, 2025", 195 | "movie": { 196 | "title": "The Night House", 197 | "slug": "the-night-house", 198 | "url": "https://letterboxd.com/film/the-night-house/" 199 | } 200 | } 201 | }, 202 | "9668253197": { 203 | "activity_type": "newlist", 204 | "timestamp": "2025-08-25T15:15:58.484000Z", 205 | "content": { 206 | "log_type": "newlist", 207 | "title": "ryanshubert listed TIFF 3 films", 208 | "film_count": "3 films", 209 | "target_list": { 210 | "name": "TIFF", 211 | "url": "/ryanshubert/list/tiff/" 212 | } 213 | } 214 | }, 215 | "9666286088": { 216 | "activity_type": "basic", 217 | "timestamp": "2025-08-25T06:49:30.257000Z", 218 | "content": { 219 | "action": "watched", 220 | "description": "Karsten watched and liked Elephant on Sunday Aug 24, 2025", 221 | "movie": { 222 | "title": "Elephant", 223 | "slug": "elephant", 224 | "url": "https://letterboxd.com/film/elephant/" 225 | } 226 | } 227 | }, 228 | "9664423024": { 229 | "activity_type": "basic", 230 | "timestamp": "2025-08-25T01:00:41.773000Z", 231 | "content": { 232 | "action": "rewatched", 233 | "description": "ryanshubert rewatched and rated Corpse Bride ★★★★ on Sunday Aug 24, 2025" 234 | } 235 | }, 236 | "9663985114": { 237 | "activity_type": "review", 238 | "timestamp": "2025-08-24T23:55:13.715000Z", 239 | "content": { 240 | "action": "watched", 241 | "description": "watched Caught Stealing ★★★★★★★", 242 | "movie": { 243 | "title": "Caught Stealing", 244 | "year": 2025, 245 | "slug": "caught-stealing", 246 | "url": "https://letterboxd.com/film/caught-stealing/" 247 | }, 248 | "rating": 7, 249 | "review": { 250 | "content": "Pulp Fiction vibes, Austin Butler has the most valid crash out of 2025.", 251 | "contains_spoilers": false 252 | } 253 | } 254 | }, 255 | "9663971772": { 256 | "activity_type": "basic", 257 | "timestamp": "2025-08-24T23:53:19.359000Z", 258 | "content": { 259 | "action": "liked", 260 | "description": "ramenfeedgg liked ppark’s ★★½ review of Caught Stealing", 261 | "movie": { 262 | "title": "review of Caught Stealing", 263 | "slug": "caught-stealing", 264 | "url": "https://letterboxd.com/film/caught-stealing/" 265 | } 266 | } 267 | }, 268 | "9661823853": { 269 | "activity_type": "basic", 270 | "timestamp": "2025-08-24T19:35:54.779000Z", 271 | "content": { 272 | "action": "watched", 273 | "description": "ryanshubert watched and rated Honey Don't! ★★½ on Sunday Aug 24, 2025", 274 | "movie": { 275 | "title": "Honey Don't!", 276 | "slug": "honey-dont", 277 | "url": "https://letterboxd.com/film/honey-dont/" 278 | } 279 | } 280 | } 281 | } 282 | } -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # letterboxdpy 2 | 3 | [![PyPI version](https://badge.fury.io/py/letterboxdpy.svg)](https://badge.fury.io/py/letterboxdpy) 4 | [![Downloads](https://static.pepy.tech/personalized-badge/letterboxdpy?period=total&units=none&left_color=grey&right_color=blue&left_text=Downloads)](https://pepy.tech/project/letterboxdpy) 5 | ![format](https://img.shields.io/pypi/format/letterboxdpy) 6 | 7 | ## Installation 8 | 9 | ### From PyPI 10 | 11 | You can easily install the stable version of `letterboxdpy` from PyPI using pip: 12 | 13 | ```bash 14 | pip install letterboxdpy 15 | ``` 16 | 17 | ### From GitHub Repository 18 | 19 | Alternatively, if you wish to access the latest (potentially unstable) version directly from the GitHub repository, you can execute the following command: 20 | 21 | ```bash 22 | pip install git+https://github.com/nmcassa/letterboxdpy.git 23 | ``` 24 | 25 | > [!WARNING] 26 | > Please be aware that installing directly from the GitHub repository might give you access to the most recent features and bug fixes, but it could also include changes that haven't been thoroughly tested and may not be stable for production use. 27 | 28 |

User Object

29 | 30 | [Explore the file](letterboxdpy/user.py) | [Functions Documentation](/docs/user/funcs/) 31 | 32 | ```python 33 | from letterboxdpy.user import User 34 | user_instance = User("nmcassa") 35 | print(user_instance) 36 | ``` 37 | 38 |
39 | Click to expand User object response 40 | 41 | ```json 42 | { 43 | "username": "nmcassa", 44 | "url": "https://letterboxd.com/nmcassa", 45 | "id": 1500306, 46 | "is_hq": false, 47 | "display_name": "nmcassa", 48 | "bio": null, 49 | "location": null, 50 | "website": null, 51 | "watchlist_length": 74, 52 | "stats": { 53 | "films": 594, 54 | "this_year": 74, 55 | "lists": 2, 56 | "following": 7, 57 | "followers": 7 58 | }, 59 | "favorites": { 60 | "95113": { 61 | "slug": "the-grand-budapest-hotel", 62 | "name": "The Grand Budapest Hotel" 63 | },... 64 | }, 65 | "avatar": { 66 | "exists": true, 67 | "upscaled": true, 68 | "url": "https://a.ltrbxd.com/resized/avatar/upload/1/5/0/0/3/0/6/shard/avtr-0-1000-0-1000-crop.jpg" 69 | }, 70 | "recent": { 71 | "watchlist": { 72 | "1042841": { 73 | "id": "1042841", 74 | "slug": "the-contestant-2023", 75 | "name": "The Contestant" 76 | },... 77 | }, 78 | "diary": { 79 | "months": { 80 | "9": { 81 | "22": [ 82 | { 83 | "name": "The Substance", 84 | "slug": "the-substance" 85 | }, 86 | { 87 | "name": "Whiplash", 88 | "slug": "1" 89 | } 90 | ], 91 | "13": [ 92 | { 93 | "name": "Speak No Evil", 94 | "slug": "speak-no-evil-2024" 95 | } 96 | ],... 97 | }, 98 | "8": { 99 | "30": [ 100 | { 101 | "name": "Shaun of the Dead", 102 | "slug": "shaun-of-the-dead" 103 | } 104 | ],... 105 | } 106 | } 107 | } 108 | } 109 | } 110 | ``` 111 |
112 | 113 |

Movie Object

114 | 115 | [Explore the file](letterboxdpy/movie.py) | [Functions Documentation](/docs/movie/funcs/) 116 | 117 | ```python 118 | from letterboxdpy.movie import Movie 119 | movie_instance = Movie("v-for-vendetta") 120 | print(movie_instance) 121 | ``` 122 | 123 |
124 | Click to expand Movie object response 125 | 126 | ```json 127 | { 128 | "scraper": {...}, 129 | "url": "https://letterboxd.com/film/v-for-vendetta", 130 | "slug": "v-for-vendetta", 131 | "letterboxd_id": 51400, 132 | "title": "V for Vendetta", 133 | "original_title": null, 134 | "runtime": 132, 135 | "rating": 3.84, 136 | "year": 2005, 137 | "tmdb_link": "https://www.themoviedb.org/movie/752/", 138 | "imdb_link": "http://www.imdb.com/title/tt0434409/maindetails", 139 | "poster": "https://a.ltrbxd.com/resized/film-poster/5/1/4/0/0/51400-v-for-vendetta-0-230-0-345-crop.jpg", 140 | "banner": "https://a.ltrbxd.com/resized/sm/upload/mx/jg/tz/ni/v-for-vendetta-1920-1920-1080-1080-crop-000000.jpg", 141 | "tagline": "People should not be afraid of their governments. Governments should be afraid of their people.", 142 | "description": "In a world in which Great Britain has become a fascist state, a masked vigilante known only as \u201cV\u201d conducts guerrilla warfare against the oppressive British government. When V rescues a young woman from the secret police, he finds in her an ally with whom he can continue his fight to free the people of Britain.", 143 | "trailer": { 144 | "id": "V5VGq23aZ-g", 145 | "link": "https://www.youtube.com/watch?v=V5VGq23aZ-g", 146 | "embed_url": "https://www.youtube.com/embed/V5VGq23aZ-g" 147 | }, 148 | "alternative_titles": [ 149 | "Vendetta \u00fc\u00e7\u00fcn V", 150 | "O za osvetu",... 151 | ], 152 | "details": [ 153 | { 154 | "type": "studio", 155 | "name": "Virtual Studios", 156 | "slug": "virtual-studios", 157 | "url": "https://letterboxd.com/studio/virtual-studios/" 158 | },... 159 | ], 160 | "genres": [ 161 | { 162 | "type": "genre", 163 | "name": "Thriller", 164 | "slug": "thriller", 165 | "url": "https://letterboxd.com/films/genre/thriller/" 166 | },... 167 | ], 168 | "cast": [ 169 | { 170 | "name": "Natalie Portman", 171 | "role_name": "Evey Hammond", 172 | "slug": "natalie-portman", 173 | "url": "https://letterboxd.com/actor/natalie-portman/" 174 | },... 175 | ], 176 | "crew": { 177 | "director": [ 178 | { 179 | "name": "James McTeigue", 180 | "slug": "james-mcteigue", 181 | "url": "https://letterboxd.com/director/james-mcteigue/" 182 | } 183 | ], 184 | "producer": [ 185 | { 186 | "name": "Grant Hill", 187 | "slug": "grant-hill", 188 | "url": "https://letterboxd.com/producer/grant-hill/" 189 | },... 190 | ],... 191 | }, 192 | "popular_reviews": [ 193 | { 194 | "reviewer": "zoey luke", 195 | "rating": " \u2605\u2605\u2605\u2605\u00bd ", 196 | "review": "I love natalie Portman and I hate the government" 197 | },... 198 | ] 199 | } 200 | } 201 | ``` 202 |
203 | 204 |

Search Object

205 | 206 | [Explore the file](letterboxdpy/search.py) | [Functions Documentation](/docs/search/funcs/) 207 | 208 | ```python 209 | from letterboxdpy.search import Search 210 | search_instance = Search("V for Vendetta", 'films') 211 | print(search_instance.get_results(max=5)) 212 | ``` 213 | 214 |
215 | Click to expand Search object response 216 | 217 | ```json 218 | { 219 | "available": true, 220 | "query": "V%20for%20Vendetta", 221 | "filter": "films", 222 | "end_page": 13, 223 | "count": 5, 224 | "results": [ 225 | { 226 | "no": 1, 227 | "page": 1, 228 | "type": "film", 229 | "slug": "v-for-vendetta", 230 | "name": "V for Vendetta", 231 | "year": 2005, 232 | "url": "https://letterboxd.com/film/v-for-vendetta/", 233 | "poster": null, 234 | "directors": [ 235 | { 236 | "name": "James McTeigue", 237 | "slug": "james-mcteigue", 238 | "url": "https://letterboxd.com/director/james-mcteigue/" 239 | } 240 | ] 241 | }, 242 | { 243 | "no": 2, 244 | "page": 1, 245 | "type": "film", 246 | "slug": "lady-vengeance", 247 | "name": "Lady Vengeance", 248 | "year": 2005, 249 | "url": "https://letterboxd.com/film/lady-vengeance/", 250 | "poster": null, 251 | "directors": [ 252 | { 253 | "name": "Park Chan-wook", 254 | "slug": "park-chan-wook", 255 | "url": "https://letterboxd.com/director/park-chan-wook/" 256 | } 257 | ] 258 | },... 259 | ] 260 | } 261 | ``` 262 |
263 | 264 |

List Object

265 | 266 | [Explore the file](letterboxdpy/list.py) 267 | 268 | ```python 269 | from letterboxdpy.list import List 270 | list_instance = List("hepburnluv", "classic-movies-for-beginners") 271 | print(list_instance) 272 | ``` 273 | 274 |
275 | Click to expand List object response 276 | 277 | ```json 278 | { 279 | "scraper": {...}, 280 | "url": "https://letterboxd.com/hepburnluv/list/classic-movies-for-beginners", 281 | "slug": "classic-movies-for-beginners", 282 | "username": "hepburnluv", 283 | "list_type": "list", 284 | "items_per_page": 60, 285 | "title": "classic movies for beginners.", 286 | "description": "old hollywood classic movies for you who wanna start watching. \u02d6\u207a\u2027\u208a\u02da \u2661 \u02da\u208a\u2027\u207a\u02d6. \u0741\u208a \u22b9 . \u0741(from easiest to hardest to watch) (these are my personal recommendations only) thank you guys for all the comments and likes <3", 287 | "movies": [ 288 | [ 289 | "The Wizard of Oz", 290 | "the-wizard-of-oz-1939" 291 | ], 292 | [ 293 | "Roman Holiday", 294 | "roman-holiday" 295 | ],... 296 | ], 297 | "count": 66 298 | } 299 | ``` 300 |
301 | 302 |

Films Object

303 | 304 | [Explore the file](letterboxdpy/films.py) | [Functions Documentation](/docs/films/funcs/) 305 | 306 | ```python 307 | from letterboxdpy.films import Films 308 | ``` 309 | 310 |

Members Object

311 | 312 | [Explore the file](letterboxdpy/members.py) | [Functions Documentation](/docs/members/funcs/) 313 | 314 | ```python 315 | from letterboxdpy.members import Members 316 | ``` 317 | 318 |

Examples

319 | 320 | Example scripts demonstrating various features are available in the [`examples/`](examples/) directory. 321 | 322 | See [`examples/README.md`](examples/README.md) for detailed usage instructions. 323 | 324 |

Testing

325 | 326 | You may test the plugin by using the built-in `unittest` package through the CLI: 327 | 328 | ```zsh 329 | python -m unittest 330 | ``` 331 | 332 | **Example** 333 | ```zsh 334 | python -m unittest tests/test_movie.py 335 | ``` 336 | 337 | ## Stargazers over time 338 | [![Stargazers over time](https://starchart.cc/nmcassa/letterboxdpy.svg?background=%2300000000&axis=%23848D97&line=%23238636)](https://starchart.cc/nmcassa/letterboxdpy) --------------------------------------------------------------------------------