├── MANIFEST.SKIP ├── t ├── 01_base.t ├── m5_get_movie_by_title_without_rate.t ├── m4_get_movie_by_title_single_match.t ├── m3_get_movie_by_wrong_title.t ├── m7_get_movie_by_info_offline.t ├── p3_get_person_by_wrong_name.t ├── m6_get_movie_by_title_with_cr_casts.t ├── p2_get_person_by_code.t ├── p4_get_person_info_offline.t ├── p1_get_person_by_name.t ├── m2_get_movie_by_title.t ├── m1_get_movie_by_code.t └── tv1_get_tv_series_by_title.t ├── Todo ├── MANIFEST ├── META.yml ├── README ├── Makefile.PL ├── lib └── IMDB │ ├── Persons.pm │ ├── BaseClass.pm │ └── Film.pm ├── ChangeLog └── LICENSE /MANIFEST.SKIP: -------------------------------------------------------------------------------- 1 | .arch-ids 2 | {arch} 3 | blib/ 4 | .svn 5 | patches 6 | -------------------------------------------------------------------------------- /t/01_base.t: -------------------------------------------------------------------------------- 1 | # 2 | # Base test for IMDB::Film 3 | # 4 | 5 | use strict; 6 | use warnings; 7 | use Test::More tests => 2; 8 | 9 | use_ok('IMDB::Film'); 10 | use_ok('IMDB::Persons'); 11 | -------------------------------------------------------------------------------- /t/m5_get_movie_by_title_without_rate.t: -------------------------------------------------------------------------------- 1 | use strict; 2 | 3 | use Test::More tests => 2; 4 | 5 | use IMDB::Film; 6 | 7 | my $crit = 'jonny zer'; 8 | my %pars = (cache => 0, debug => 0, crit => $crit); 9 | 10 | my $obj = new IMDB::Film(%pars); 11 | is($obj->code, '0412158', 'Movies IMDB Code'); 12 | is($obj->rating, $obj->rating, 'Movie Rating'); 13 | -------------------------------------------------------------------------------- /t/m4_get_movie_by_title_single_match.t: -------------------------------------------------------------------------------- 1 | use strict; 2 | 3 | use Test::More tests => 2; 4 | use IMDB::Film; 5 | 6 | 7 | my $crit = 'Con Air'; 8 | my %pars = (cache => 0, debug => 0, crit => $crit); 9 | 10 | my $obj = new IMDB::Film(%pars); 11 | 12 | is($obj->code, '0118880', 'search code'); 13 | 14 | is(scalar(@{$obj->matched}), 0, 'Matched results'); 15 | -------------------------------------------------------------------------------- /t/m3_get_movie_by_wrong_title.t: -------------------------------------------------------------------------------- 1 | use strict; 2 | 3 | use Test::More tests => 3; 4 | use IMDB::Film; 5 | 6 | my $crit = 'hhhhhhhhhhh'; 7 | my %pars = (cache => 0, debug => 0, crit => $crit); 8 | 9 | my $obj = new IMDB::Film(%pars); 10 | 11 | is($obj->error, 'Not Found', 'error'); 12 | is($obj->status, 0, 'status'); 13 | is($obj->code, undef, 'code'); 14 | 15 | -------------------------------------------------------------------------------- /t/m7_get_movie_by_info_offline.t: -------------------------------------------------------------------------------- 1 | use Test::More tests => 4; 2 | 3 | use IMDB::Film; 4 | 5 | my $obj = new IMDB::Film(crit => 't/test.html', debug => 0, cache => 0); 6 | 7 | is($obj->status, 1, 'Object status'); 8 | is($obj->code, '0332452', 'Movie IMDB Code'); 9 | is($obj->title, 'Troy', 'Movie Title'); 10 | is($obj->cast->[0]{name}, 'Julian Glover', 'Movie Person'); 11 | -------------------------------------------------------------------------------- /t/p3_get_person_by_wrong_name.t: -------------------------------------------------------------------------------- 1 | use Test::More tests => 4; 2 | 3 | use IMDB::Persons; 4 | 5 | my %pars = (crit => 'hhhhhhhhhhhhh', cache => 0, debug => 0); 6 | 7 | my $person = new IMDB::Persons(%pars); 8 | 9 | is($person->error, 'Not Found', 'error'); 10 | is($person->status, 0, 'status'); 11 | is($person->code, undef, 'code'); 12 | is($person->name, '', 'name'); 13 | -------------------------------------------------------------------------------- /Todo: -------------------------------------------------------------------------------- 1 | TODO list for Perl module IMDB::Film 2 | 3 | - Add possibility to indicate is it TV series or not (http://www.imdb.com/title/tt0367279/). 4 | - Add retrieving TV series episodes (http://www.imdb.com/title/tt0367279/episodes#season-1). 5 | - Add functionality to get additional information such as Full Cast and Crew, 6 | sound track etc. 7 | - Move all methods with needed additional connect to IMDB.com into separte module. 8 | 9 | -------------------------------------------------------------------------------- /t/m6_get_movie_by_title_with_cr_casts.t: -------------------------------------------------------------------------------- 1 | # 2 | # Test retrieving list of cast in case of Credited cast. 3 | # 4 | 5 | use strict; 6 | 7 | use Test::More tests => 2; 8 | use IMDB::Film; 9 | 10 | my $crit = '0326272'; 11 | my %pars = (cache => 0, debug => 0, crit => $crit); 12 | 13 | my $obj = new IMDB::Film(%pars); 14 | is($obj->title, 'Three Sopranos', 'Movie title'); 15 | my $cast = $obj->cast; 16 | is_deeply($cast->[0], {id => '1202207', name => 'Kathleen Cassello', role => 'Herself'}, 'cast'); 17 | 18 | -------------------------------------------------------------------------------- /MANIFEST: -------------------------------------------------------------------------------- 1 | ChangeLog 2 | lib/IMDB/BaseClass.pm 3 | lib/IMDB/Film.pm 4 | lib/IMDB/Persons.pm 5 | LICENSE 6 | Makefile.PL 7 | MANIFEST This list of files 8 | MANIFEST.SKIP 9 | META.yml 10 | README 11 | t/01_base.t 12 | t/m1_get_movie_by_code.t 13 | t/m2_get_movie_by_title.t 14 | t/m3_get_movie_by_wrong_title.t 15 | t/m4_get_movie_by_title_single_match.t 16 | t/m5_get_movie_by_title_without_rate.t 17 | t/m6_get_movie_by_title_with_cr_casts.t 18 | t/m7_get_movie_by_info_offline.t 19 | t/p1_get_person_by_name.t 20 | t/p2_get_person_by_code.t 21 | t/p3_get_person_by_wrong_name.t 22 | t/p4_get_person_info_offline.t 23 | t/test.html 24 | t/test_p.html 25 | Todo 26 | -------------------------------------------------------------------------------- /META.yml: -------------------------------------------------------------------------------- 1 | --- #YAML:1.0 2 | name: IMDB-Film 3 | version: 0.51 4 | abstract: OO Perl interface to the movies database IMDB. 5 | author: 6 | - Michael Stepanov 7 | license: unknown 8 | distribution_type: module 9 | configure_requires: 10 | ExtUtils::MakeMaker: 0 11 | build_requires: 12 | ExtUtils::MakeMaker: 0 13 | requires: 14 | Cache::FileCache: 0 15 | Carp: 0 16 | Digest::SHA1: 0 17 | Error: 0 18 | HTML::Entities: 0 19 | HTML::TokeParser: 2.28 20 | LWP::Simple: 1.41 21 | Pod::Checker: 0 22 | Text::Unidecode: 0 23 | no_index: 24 | directory: 25 | - t 26 | - inc 27 | generated_by: ExtUtils::MakeMaker version 6.56 28 | meta-spec: 29 | url: http://module-build.sourceforge.net/META-spec-v1.4.html 30 | version: 1.4 31 | -------------------------------------------------------------------------------- /t/p2_get_person_by_code.t: -------------------------------------------------------------------------------- 1 | use Test::More tests => 6; 2 | 3 | use IMDB::Persons; 4 | 5 | my %person_info = ( 6 | code => '0000129', 7 | id => '0000129', 8 | name => qq{Tom Cruise}, 9 | mini_bio => qq{If you had told 14 year old Franciscan seminary student Thomas Cruise Mapother IV that one day in the not too distant future he would be considered one of the top 100 movie stars of all time, he would have probably grinned and told you that his ambition was to become a priest. Nonetheless, this sensitive...}, 10 | date_of_birth => qq{3 July 1962}, 11 | place_of_birth => qq{Syracuse, New York, USA}, 12 | ); 13 | 14 | my %pars = (crit => $person_info{code}, cache => 0, debug => 0); 15 | my $p = new IMDB::Persons(%pars); 16 | 17 | is($p->code, $person_info{code}, 'code'); 18 | is($p->name, $person_info{name}, 'name'); 19 | is($p->date_of_birth, $person_info{date_of_birth}, 'date_of_birth'); 20 | is($p->place_of_birth, $person_info{place_of_birth}, 'place_of_birth'); 21 | is($p->mini_bio, $person_info{mini_bio}, 'mini_bio'); 22 | like($p->photo, qr#\.jpg#i, 'photo'); 23 | -------------------------------------------------------------------------------- /t/p4_get_person_info_offline.t: -------------------------------------------------------------------------------- 1 | use Test::More tests => 5; 2 | 3 | use IMDB::Persons; 4 | 5 | my %person_info = ( 6 | code => '0000129', 7 | id => '0000129', 8 | name => qq{Tom Cruise}, 9 | mini_bio => qq{If you had told 14 year old Franciscan seminary student Thomas Cruise Mapother IV that one day in the not too distant future he would be considered one of the top 100 movie stars of all time, he would have probably grinned and told you that his ambition was to become a priest. Nonetheless, this sensitive...}, 10 | date_of_birth => qq{3 July 1962}, 11 | place_of_birth => qq{Syracuse, New York, USA}, 12 | photo => '38m.jpg', 13 | ); 14 | 15 | my %pars = (crit => 't/test_p.html', cache => 0, debug => 0); 16 | my $p = new IMDB::Persons(%pars); 17 | 18 | # FIXME 19 | #is($p->code, $person_info{code}, 'code'); 20 | is($p->name, $person_info{name}, 'name'); 21 | is($p->date_of_birth, $person_info{date_of_birth}, 'date_of_birth'); 22 | is($p->place_of_birth, $person_info{place_of_birth}, 'place_of_birth'); 23 | is($p->mini_bio, $person_info{mini_bio}, 'mini_bio'); 24 | like($p->photo, qr#\.jpg#i, 'photo'); 25 | -------------------------------------------------------------------------------- /README: -------------------------------------------------------------------------------- 1 | IMDB-Film 2 | ====================== 3 | 4 | IMDB::Film is OO Perl interface to the database of films 5 | IMDB (www.imdb.com). It allows to retrieve information 6 | about movies by its IMDB code or title. Also, there is a 7 | possibility to get information about IMDB persons (actors, 8 | actresses, directors etc) by their name of code. 9 | 10 | For more details please read POD of that module. 11 | 12 | INSTALLATION 13 | 14 | To install this module type the following: 15 | 16 | perl Makefile.PL 17 | make 18 | make test 19 | make install 20 | 21 | To install this module in the specific path use: 22 | 23 | perl Makefile.PL PREFIX=/path/to/your/lib 24 | 25 | To install the module in your home directory use that command: 26 | 27 | perl Makefile.PL PREFIX=YOU_HOME/path/to/your/lib 28 | 29 | Note: you don't need to have a root privileges to do that! 30 | 31 | Also, you can use CPAN shell: 32 | 33 | > perl -MCPAN -e shell 34 | cpan> install IMDB::Film 35 | 36 | or use command 'cpan': 37 | 38 | > cpan install IMDB::Film 39 | 40 | DEPENDENCIES 41 | 42 | This module requires these other modules and libraries: 43 | 44 | HTML::TokeParser 45 | LWP::Simple 46 | Cache::Cache 47 | 48 | AUTHOR 49 | 50 | Michael Stepanov (stepanov.michael@gmail.com) 51 | 52 | COPYRIGHT AND LICENCE 53 | 54 | Copyright (C) 2004 - 2010 by Michael Stepanov. 55 | 56 | This library is free software; you can redistribute it and/or modify 57 | it under the same terms as Perl itself. 58 | -------------------------------------------------------------------------------- /Makefile.PL: -------------------------------------------------------------------------------- 1 | use ExtUtils::MakeMaker; 2 | 3 | use strict; 4 | 5 | my $online_tests = ExtUtils::MakeMaker::prompt('Do you want to perform online tests during "make test" phase?', 'no') =~ /^\s*(y)/i; 6 | my $tests = join ' ', glob ($online_tests ? 't/0*.t t/m*.t' : 't/0*.t'); 7 | 8 | if($online_tests) { 9 | my $person_test = ExtUtils::MakeMaker::prompt('Do you want to perform online tests for IMDB persons during "make test" phase?', 'no') =~ /^\s*(y)/i; 10 | $tests .= ' ' . join ' ', glob 't/p*.t' if $person_test; 11 | 12 | my $extra_test = ExtUtils::MakeMaker::prompt('Do you want to perform extra online tests during "make test" phase?', 'no') =~ /^\s*(y)/i; 13 | $tests .= ' ' . join ' ', glob 't/n*.t' if $extra_test; 14 | 15 | my $pod_test = ExtUtils::MakeMaker::prompt('Do you want to perform POD checking during "make test" phase?', 'no') =~ /^\s*(y)/i; 16 | $tests .= ' ' . join ' ', glob 't/y*.t' if $pod_test; 17 | } 18 | 19 | WriteMakefile( 20 | NAME => 'IMDB::Film', 21 | VERSION_FROM => 'lib/IMDB/BaseClass.pm', 22 | PREREQ_PM => { 23 | 'HTML::TokeParser' => 2.28, 24 | 'LWP::Simple' => 1.41, 25 | 'Cache::FileCache' => 0, 26 | 'Carp' => 0, 27 | 'Error' => 0, 28 | 'Digest::SHA1' => 0, 29 | 'Pod::Checker' => 0, 30 | 'HTML::Entities' => 0, 31 | 'Text::Unidecode' => 0, 32 | }, 33 | 34 | test => {TESTS => $tests}, 35 | ($] >= 5.005 ? ## Add these new keywords supported since 5.005 36 | (ABSTRACT_FROM => 'lib/IMDB/Film.pm', 37 | AUTHOR => 'Michael Stepanov ') : ()), 38 | ); 39 | -------------------------------------------------------------------------------- /t/p1_get_person_by_name.t: -------------------------------------------------------------------------------- 1 | use Test::More tests => 6; 2 | 3 | use IMDB::Persons; 4 | use Data::Dumper; 5 | 6 | my %person_info = ( 7 | code => '0000129', 8 | id => '0000129', 9 | name => qq{Tom Cruise}, 10 | mini_bio => qq{If you had told 14 year old Franciscan seminary student Thomas Cruise Mapother IV that one day in the not too distant future he would be considered one of the top 100 movie stars of all time, he would have probably grinned and told you that his ambition was to become a priest. Nonetheless, this sensitive...}, 11 | date_of_birth => qq{3 July 1962}, 12 | place_of_birth => qq{Syracuse, New York, USA}, 13 | photo => '/images/M/MV5BMTI4MzUyMTI1N15BMl5BanBnXkFtZTcwOTg3NTYyMQ@@._V1._SX100_SY140_.jpg', 14 | film => { 15 | 'title' => 'Mission: Impossible III', 16 | 'role' => 'Ethan Hunt', 17 | 'year' => '2006', 18 | 'code' => '0317919' 19 | }, 20 | genres => ['Documentary', 'News', 'Talk-Show', 'Comedy'], 21 | plot_keywords => ['Number In Title', 'TV Special', 'Awards Show', 'Non Fiction'], 22 | ); 23 | 24 | my %pars = (crit => $person_info{name}, cache => 0, debug => 0); 25 | my $p = new IMDB::Persons(%pars); 26 | 27 | is($p->code, $person_info{code}, 'code'); 28 | is($p->name, $person_info{name}, 'name'); 29 | is($p->date_of_birth, $person_info{date_of_birth}, 'date_of_birth'); 30 | is($p->place_of_birth, $person_info{place_of_birth}, 'place_of_birth'); 31 | is($p->mini_bio, $person_info{mini_bio}, 'mini_bio'); 32 | like($p->photo, qr#\.jpg#i, 'photo'); 33 | 34 | # FIXME: Temporary disabled 35 | #my $list = $p->filmography(); 36 | #my $f = 0; 37 | #for my $movie(@{$list->{'Actor'}}) { 38 | # if($movie->{title} eq $person_info{film}->{title}) { 39 | # is($movie->{code}, $person_info{film}->{code}, 'movie code'); 40 | # is($movie->{year}, $person_info{film}->{year}, 'movie code'); 41 | # is($movie->{role}, $person_info{film}->{role}, 'movie code'); 42 | # $f = 1; 43 | # last; 44 | # } 45 | #} 46 | 47 | #is($f, 1, 'filmography'); 48 | -------------------------------------------------------------------------------- /t/m2_get_movie_by_title.t: -------------------------------------------------------------------------------- 1 | use strict; 2 | use warnings; 3 | 4 | use Test::More tests => 19; 5 | 6 | use IMDB::Film; 7 | 8 | my $crit = 'Troy'; 9 | my %films = ( 10 | code => '0332452', 11 | id => '0332452', 12 | title => 'Troy', 13 | year => '2004', 14 | genres => [qw(Action Drama War Adventure Romance)], 15 | country => [qw(Malta UK USA)], 16 | language => [qw(English)], 17 | company => 'Warner Bros. Pictures', 18 | plot => qq{An adaptation of Homer's great epic, the film follows the assault on Troy by the united Greek forces and chronicles the fates of the men involved.}, 19 | storyline => qq{It is the year 1250 B.C. during the late Bronze age. Two emerging nations begin to clash after Paris, the Trojan prince, convinces Helen, Queen of Sparta, to leave her husband Menelaus, and sail with him back to Troy. After Menelaus finds out that his wife was taken by the Trojans, he asks his brother Agamemnom to help him get her back. Agamemnon sees this as an opportunity for power. So they set off with 1,000 ships holding 50,000 Greeks to Troy. With the help of Achilles, the Greeks are able to fight the never before defeated Trojans. But they come to a stop by Hector, Prince of Troy. The whole movie shows their battle struggles, and the foreshadowing of fate in this remake by Wolfgang Petersen of Homer's "The Iliad.}, 20 | full_plot => qq{It is the year 1250 B.C. during the late Bronze age. Two emerging nations begin to clash after Paris, the Trojan prince, convinces Helen, Queen of Sparta, to leave her husband Menelaus, and sail with him back to Troy. After Menelaus finds out that his wife was taken by the Trojans, he asks his brother Agamemnom to help him get her back. Agamemnon sees this as an opportunity for power. So they set off with 1,000 ships holding 50,000 Greeks to Troy. With the help of Achilles, the Greeks are able to fight the never before defeated Trojans. But they come to a stop by Hector, Prince of Troy. The whole movie shows their battle struggles, and the foreshadowing of fate in this remake by Wolfgang Petersen of Homer's "The Iliad."}, 21 | cover => qq{MV5BMTU1MjM4NTA5Nl5BMl5BanBnXkFtZTcwOTE3NzA1MQ@@._V1._SX100_SY114_.jpg}, 22 | cast => [{ id => '0002103', name => 'Julian Glover', role => 'Triopas'}, 23 | { id => '0004051', name => 'Brian Cox', role => 'Agamemnon'}, 24 | { id => '0428923', name => 'Nathan Jones', role => 'Boagrius'}, 25 | { id => '0549538', name => 'Adoni Maropis', role => 'Agamemnon\'s Officer'}, 26 | { id => '0808559', name => 'Jacob Smith', role => 'Messenger Boy'}, 27 | { id => '0000093', name => 'Brad Pitt', role => 'Achilles'}, 28 | { id => '0795344', name => 'John Shrapnel', role => 'Nestor'}, 29 | { id => '0322407', name => 'Brendan Gleeson', role => 'Menelaus'}, 30 | { id => '1208167', name => 'Diane Kruger', role => 'Helen'}, 31 | { id => '0051509', name => 'Eric Bana', role => 'Hector'}, 32 | { id => '0089217', name => 'Orlando Bloom', role => 'Paris'}, 33 | { id => '1595495', name => 'Siri Svegler', role => 'Polydora'}, 34 | { id => '1595480', name => 'Lucie Barat', role => 'Helen\'s Handmaiden'}, 35 | { id => '0094297', name => 'Ken Bones', role => 'Hippasus'}, 36 | { id => '0146439', name => 'Manuel Cauchi', role => 'Old Spartan Fisherman'}, 37 | ], 38 | 39 | directors => [{id => '0000583', name => 'Wolfgang Petersen'}], 40 | writers => [{id => '0392955', name => 'Homer'}, 41 | {id => '1125275', name => 'David Benioff'}], 42 | duration => '163 min', 43 | aspect_ratio => '2.35 : 1', 44 | rating => '6.9', 45 | votes => '98918', 46 | ); 47 | 48 | my %pars = (cache => 0, debug => 0, crit => $crit); 49 | 50 | my $obj = new IMDB::Film(%pars); 51 | isa_ok($obj, 'IMDB::Film'); 52 | 53 | my @countries = sort(@{$obj->country}); 54 | 55 | is($obj->code, $films{code}, 'Movie IMDB Code'); 56 | is($obj->id, $films{id}, 'Movie IMDB ID'); 57 | is($obj->title, $films{title}, 'Movie Title'); 58 | is($obj->year, $films{year}, 'Movie Production Year'); 59 | like($obj->plot, qr/$films{plot}/, 'Movie Plot'); 60 | like($obj->storyline, qr/$films{storyline}/, 'Movie Plot'); 61 | like($obj->cover, '/\.jpg/i', 'Movie Cover'); 62 | is_deeply($obj->cast, $films{cast}, 'Movie Cast'); 63 | is($obj->language->[0], $films{language}[0], 'Movie Language'); 64 | is($countries[0], $films{country}[0], 'Movie Country'); 65 | is($obj->genres->[0], $films{genres}[0], 'Movie Genre'); 66 | like($obj->full_plot, qr/$films{full_plot}/, 'Movie full plot'); 67 | is($obj->duration, $films{duration}, 'Movie Duration'); 68 | is($obj->aspect_ratio, $films{aspect_ratio}, 'Movie Aspect Ratio'); 69 | is($obj->company, $films{company}, 'Company'); 70 | 71 | my($rating, $votes) = $obj->rating(); 72 | cmp_ok($rating, '>=', $films{rating}, 'Rating'); 73 | cmp_ok($votes, '>=', $films{votes}, 'Votes'); 74 | cmp_ok($obj->rating(), '>=', $films{rating}, 'Rating'); 75 | -------------------------------------------------------------------------------- /t/m1_get_movie_by_code.t: -------------------------------------------------------------------------------- 1 | use strict; 2 | use warnings; 3 | 4 | use Test::More tests => 21; 5 | 6 | use IMDB::Film; 7 | 8 | my $crit = '0332452'; 9 | my %films = ( 10 | code => '0332452', 11 | id => '0332452', 12 | title => 'Troy', 13 | year => '2004', 14 | genres => [qw(Action Drama War Adventure Romance)], 15 | country => [qw(Malta UK USA)], 16 | language => [qw(English)], 17 | company => 'Warner Bros. Pictures', 18 | duration => '163 min', 19 | plot => qq{An adaptation of Homer's great epic, the film follows the assault on Troy by the united Greek forces and chronicles the fates of the men involved.}, 20 | storyline => qq{It is the year 1250 B.C. during the late Bronze age. Two emerging nations begin to clash after Paris, the Trojan prince, convinces Helen, Queen of Sparta, to leave her husband Menelaus, and sail with him back to Troy. After Menelaus finds out that his wife was taken by the Trojans, he asks his brother Agamemnom to help him get her back. Agamemnon sees this as an opportunity for power. So they set off with 1,000 ships holding 50,000 Greeks to Troy. With the help of Achilles, the Greeks are able to fight the never before defeated Trojans. But they come to a stop by Hector, Prince of Troy. The whole movie shows their battle struggles, and the foreshadowing of fate in this remake by Wolfgang Petersen of Homer's "The Iliad.}, 21 | full_plot => qq{It is the year 1250 B.C. during the late Bronze age. Two emerging nations begin to clash after Paris, the Trojan prince, convinces Helen, Queen of Sparta, to leave her husband Menelaus, and sail with him back to Troy. After Menelaus finds out that his wife was taken by the Trojans, he asks his brother Agamemnom to help him get her back. Agamemnon sees this as an opportunity for power. So they set off with 1,000 ships holding 50,000 Greeks to Troy. With the help of Achilles, the Greeks are able to fight the never before defeated Trojans. But they come to a stop by Hector, Prince of Troy. The whole movie shows their battle struggles, and the foreshadowing of fate in this remake by Wolfgang Petersen of Homer's "The Iliad."}, 22 | cover => qq{MV5BMTU1MjM4NTA5Nl5BMl5BanBnXkFtZTcwOTE3NzA1MQ@@._V1._SX100_SY114_.jpg}, 23 | cast => [{ id => '0002103', name => 'Julian Glover', role => 'Triopas'}, 24 | { id => '0004051', name => 'Brian Cox', role => 'Agamemnon'}, 25 | { id => '0428923', name => 'Nathan Jones', role => 'Boagrius'}, 26 | { id => '0549538', name => 'Adoni Maropis', role => 'Agamemnon\'s Officer'}, 27 | { id => '0808559', name => 'Jacob Smith', role => 'Messenger Boy'}, 28 | { id => '0000093', name => 'Brad Pitt', role => 'Achilles'}, 29 | { id => '0795344', name => 'John Shrapnel', role => 'Nestor'}, 30 | { id => '0322407', name => 'Brendan Gleeson', role => 'Menelaus'}, 31 | { id => '1208167', name => 'Diane Kruger', role => 'Helen'}, 32 | { id => '0051509', name => 'Eric Bana', role => 'Hector'}, 33 | { id => '0089217', name => 'Orlando Bloom', role => 'Paris'}, 34 | { id => '1595495', name => 'Siri Svegler', role => 'Polydora'}, 35 | { id => '1595480', name => 'Lucie Barat', role => 'Helen\'s Handmaiden'}, 36 | { id => '0094297', name => 'Ken Bones', role => 'Hippasus'}, 37 | { id => '0146439', name => 'Manuel Cauchi', role => 'Old Spartan Fisherman'}, 38 | ], 39 | 40 | directors => [{id => '0000583', name => 'Wolfgang Petersen'}], 41 | writers => [{id => '0392955', name => 'Homer'}, 42 | {id => '1125275', name => 'David Benioff'}], 43 | mpaa_info => 'Rated R for graphic violence and some sexuality/nudity', 44 | ); 45 | 46 | my %pars = (cache => 0, debug => 0, crit => $crit); 47 | 48 | my $obj = new IMDB::Film(%pars); 49 | isa_ok($obj, 'IMDB::Film'); 50 | 51 | my @countries = sort(@{$obj->country}); 52 | 53 | is($obj->code, $films{code}, 'Movie IMDB Code'); 54 | is($obj->id, $films{id}, 'Movie IMDB ID'); 55 | is($obj->title, $films{title}, 'Movie Title'); 56 | is($obj->year, $films{year}, 'Movie Production Year'); 57 | like($obj->plot, qr/$films{plot}/, 'Movie Plot'); 58 | like($obj->storyline, qr/$films{storyline}/, 'Movie Plot'); 59 | like($obj->cover, '/\.jpg/i', 'Movie Cover'); 60 | is_deeply($obj->cast, $films{cast}, 'Movie Cast'); 61 | is($obj->language->[0], $films{language}[0], 'Movie Language'); 62 | is($countries[0], $films{country}[0], 'Movie Country'); 63 | is($obj->genres->[0], $films{genres}[0], 'Movie Genre'); 64 | like($obj->full_plot, qr/$films{full_plot}/, 'Movie full plot'); 65 | is($obj->duration, $films{duration}, 'Movie Duration'); 66 | is($obj->mpaa_info, $films{mpaa_info}, 'MPAA'); 67 | is($obj->company, $films{company}, 'Company'); 68 | 69 | my($rate, $num) = $obj->rating(); 70 | like($rate, qr/\d+/, 'Movie rating'); 71 | like($num, qr/\d+/, 'Rated people'); 72 | 73 | $rate = $obj->rating; 74 | like($rate, qr/\d+/, 'Movie rating'); 75 | 76 | #my $certs = $obj->certifications; 77 | #is($certs->{USA}, 'R', 'Movie Certifications'); 78 | 79 | is_deeply($obj->directors, $films{directors}, 'Movie Directors'); 80 | is_deeply($obj->writers, $films{writers}, 'Movie Writers'); 81 | 82 | #my $rec_movies = $obj->recommendation_movies(); 83 | #my($code, $title) = each %$rec_movies; 84 | #like($code, qr/\d+/, 'Recommedation movies'); 85 | -------------------------------------------------------------------------------- /lib/IMDB/Persons.pm: -------------------------------------------------------------------------------- 1 | =head1 NAME 2 | 3 | IMDB::Persons - Perl extension for retrieving movies persons 4 | from IMDB.com 5 | 6 | =head1 SYNOPSIS 7 | 8 | use IMDB::Persons; 9 | 10 | # 11 | # Retrieve a person information by IMDB code 12 | # 13 | my $person = new IMDB::Persons(crit => '0000129'); 14 | 15 | or 16 | 17 | # 18 | # Retrieve a person information by name 19 | # 20 | my $person = new IMDB::Persons(crit => 'Tom Cruise'); 21 | 22 | or 23 | 24 | # 25 | # Process already stored HTML page from IMDB 26 | # 27 | my $person = new IMDB::Persons(file => 'imdb.html'); 28 | 29 | if($person->status) { 30 | print "Name: ".$person->name."\n"; 31 | print "Birth Date: ".$person->date_of_birth."\n"; 32 | } else { 33 | print "Something wrong: ".$person->error."!\n"; 34 | } 35 | 36 | =head1 DESCRIPTION 37 | 38 | IMDB::Persons allows to retrieve an information about 39 | IMDB persons (actors, actresses, directors etc): full name, 40 | photo, date and place of birth, mini bio and filmography. 41 | 42 | =cut 43 | 44 | package IMDB::Persons; 45 | 46 | use strict; 47 | use warnings; 48 | 49 | use Carp; 50 | 51 | use Data::Dumper; 52 | 53 | use base qw(IMDB::BaseClass); 54 | 55 | use fields qw( _name 56 | _date_of_birth 57 | _place_of_birth 58 | _photo 59 | _mini_bio 60 | _filmography_types 61 | _filmography 62 | _genres 63 | _plot_keywords 64 | ); 65 | 66 | use vars qw($VERSION %FIELDS); 67 | 68 | use constant FORCED => 1; 69 | use constant CLASS_NAME => 'IMDB::Persons'; 70 | use constant MAIN_TAG => 'h4'; 71 | 72 | BEGIN { 73 | $VERSION = '0.51'; 74 | } 75 | 76 | { 77 | my %_defaults = ( 78 | cache => 0, 79 | debug => 0, 80 | error => [], 81 | matched => [], 82 | cache_exp => '1 h', 83 | host => 'www.imdb.com', 84 | query => 'name/nm', 85 | search => 'find?nm=on;mx=20;q=', 86 | status => 0, 87 | timeout => 10, 88 | user_agent => 'Mozilla/5.0', 89 | ); 90 | 91 | sub _get_default_attrs { keys %_defaults } 92 | sub _get_default_value { 93 | my($self, $attr) = @_; 94 | $_defaults{$attr}; 95 | } 96 | } 97 | 98 | =head1 Object Private Methods 99 | 100 | =over 4 101 | 102 | =item _init() 103 | 104 | Initialize a new object. 105 | 106 | =cut 107 | 108 | sub _init { 109 | my CLASS_NAME $self = shift; 110 | my %args = @_; 111 | 112 | croak "Person IMDB ID or Name should be defined!" if !$args{crit} && !$args{file}; 113 | 114 | $self->SUPER::_init(%args); 115 | my $name = $self->name(); 116 | 117 | for my $prop (grep { /^_/ && !/^_name$/ } sort keys %FIELDS) { 118 | ($prop) = $prop =~ /^_(.*)/; 119 | $self->$prop(); 120 | } 121 | } 122 | 123 | =item _search_person() 124 | 125 | Implements a logic to search IMDB persons by their names. 126 | 127 | =cut 128 | 129 | sub _search_person { 130 | my CLASS_NAME $self = shift; 131 | 132 | return $self->SUPER::_search_results('\/name\/nm(\d+)', '/a'); 133 | } 134 | 135 | sub fields { 136 | my CLASS_NAME $self = shift; 137 | return \%FIELDS; 138 | } 139 | 140 | 141 | =back 142 | 143 | =head1 Object Public Methods 144 | 145 | =over 4 146 | 147 | =item name() 148 | 149 | Retrieve a person full name 150 | 151 | my $person_name = $person->name(); 152 | 153 | =cut 154 | 155 | sub name { 156 | my CLASS_NAME $self = shift; 157 | if(!defined $self->{'_name'}) { 158 | my $parser = $self->_parser(FORCED); 159 | 160 | $parser->get_tag('title'); 161 | my $title = $parser->get_text(); 162 | $title =~ s#\s*\-\s*IMDB##i; 163 | 164 | $self->_show_message("Title=$title", 'DEBUG'); 165 | 166 | # Check if we have some search results 167 | my $no_matches = 1; 168 | while(my $tag = $parser->get_tag('td')) { 169 | if($tag->[1]->{class} && $tag->[1]->{class} eq 'media_strip_header') { 170 | $no_matches = 0; 171 | last; 172 | } 173 | } 174 | 175 | if($title =~ /imdb\s+name\s+search/i && !$no_matches) { 176 | $self->_show_message("Go to search page ...", 'DEBUG'); 177 | $title = $self->_search_person(); 178 | } 179 | 180 | $title = '' if $title =~ /IMDb Name Search/i; 181 | if($title) { 182 | $self->status(1); 183 | $self->retrieve_code($parser, 'http://www.imdb.com/name/nm(\d+)') unless $self->code; 184 | } else { 185 | $self->status(0); 186 | $self->error('Not Found'); 187 | } 188 | 189 | $title =~ s/^imdb\s+\-\s+//i; 190 | $self->{'_name'} = $title; 191 | } 192 | 193 | return $self->{'_name'}; 194 | } 195 | 196 | =item mini_bio() 197 | 198 | Returns a mini bio for specified IMDB person 199 | 200 | my $mini_bio = $person->mini_bio(); 201 | 202 | =cut 203 | 204 | sub mini_bio { 205 | my CLASS_NAME $self = shift; 206 | if(!defined $self->{_mini_bio}) { 207 | my $parser = $self->_parser(FORCED); 208 | while(my $tag = $parser->get_tag('div') ) { 209 | last if $tag->[1]->{class} && $tag->[1]->{class} eq 'infobar'; 210 | } 211 | 212 | my $tag = $parser->get_tag('p'); 213 | $self->{'_mini_bio'} = $parser->get_trimmed_text('a'); 214 | } 215 | return $self->{'_mini_bio'}; 216 | } 217 | 218 | =item date_of_birth() 219 | 220 | Returns a date of birth of IMDB person in format 'day' 'month caption' 'year': 221 | 222 | my $d_birth = $person->date_of_birth(); 223 | 224 | =cut 225 | 226 | #TODO: add date convertion in different formats. 227 | sub date_of_birth { 228 | my CLASS_NAME $self = shift; 229 | if(!defined $self->{'_date_of_birth'}) { 230 | my $parser = $self->_parser(FORCED); 231 | while(my $tag = $parser->get_tag(MAIN_TAG)) { 232 | my $text = $parser->get_text; 233 | last if $text =~ /^Born/i; 234 | } 235 | 236 | my $date = ''; 237 | my $year = ''; 238 | my $place = ''; 239 | while(my $tag = $parser->get_tag()) { 240 | last if $tag->[0] eq '/td'; 241 | 242 | if($tag->[0] eq 'a') { 243 | my $text = $parser->get_text(); 244 | next unless $text; 245 | 246 | SWITCH: for($tag->[1]->{href}) { 247 | /birth_monthday/i && do { $date = $text; $date =~ s#(\w+)\s(\d+)#$2 $1#; last SWITCH; }; 248 | /birth_year/i && do { $year = $text; last SWITCH; }; 249 | /birth_place/i && do { $place = $text; last SWITCH; }; 250 | } 251 | } 252 | } 253 | 254 | $self->{'_date_of_birth'} = {date => "$date $year", place => $place}; 255 | } 256 | 257 | return $self->{'_date_of_birth'}{'date'}; 258 | } 259 | 260 | =item place_of_birth() 261 | 262 | Returns a name of place of the birth 263 | 264 | my $place = $person->place_of_birth(); 265 | 266 | =cut 267 | 268 | sub place_of_birth { 269 | my CLASS_NAME $self = shift; 270 | return $self->{'_date_of_birth'}{'place'}; 271 | } 272 | 273 | =item photo() 274 | 275 | Return a path to the person's photo 276 | 277 | my $photo = $person->photo(); 278 | 279 | =cut 280 | 281 | sub photo { 282 | my CLASS_NAME $self = shift; 283 | if(!defined $self->{'_photo'}) { 284 | my $tag; 285 | my $parser = $self->_parser(FORCED); 286 | while($tag = $parser->get_tag('img')) { 287 | if($tag->[1]->{alt} && $tag->[1]->{alt} eq $self->name . ' Picture') { 288 | $self->{'_photo'} = $tag->[1]{src}; 289 | last; 290 | } 291 | } 292 | 293 | $self->{'_photo'} = 'No Photo' unless $self->{'_photo'}; 294 | } 295 | 296 | return $self->{'_photo'}; 297 | } 298 | 299 | =item filmography() 300 | 301 | Returns a person's filmography as a hash of arrays with following structure: 302 | 303 | my $fg = $person->filmography(); 304 | 305 | __DATA__ 306 | $fg = { 307 | 'Section' => [ 308 | { title => 'movie title', 309 | role => 'person role', 310 | year => 'year of movie production', 311 | code => 'IMDB code of movie', 312 | } 313 | ]; 314 | } 315 | 316 | The section can be In Development, Actor, Self, Thanks, Archive Footage, Producer etc. 317 | 318 | =cut 319 | 320 | sub filmography { 321 | my CLASS_NAME $self = shift; 322 | 323 | my $films; 324 | if(!$self->{'_filmography'}) { 325 | my $parser = $self->_parser(FORCED); 326 | while(my $tag = $parser->get_tag('h2')) { 327 | 328 | my $text = $parser->get_text; 329 | last if $text && $text =~ /filmography/i; 330 | } 331 | 332 | my $key = 'Unknown'; 333 | while(my $tag = $parser->get_tag()) { 334 | 335 | last if $tag->[0] eq 'script'; # Netx section after filmography 336 | 337 | if($tag->[0] eq 'h5') { 338 | my $caption = $parser->get_trimmed_text('h5', '/a'); 339 | 340 | $key = $caption if $caption; 341 | $key =~ s/://; 342 | 343 | $self->_show_message("FILMOGRAPHY: key=$key; caption=$caption; trimmed=".$parser->get_trimmed_text('h5', '/a'), 'DEBUG'); 344 | } 345 | 346 | if($tag->[0] eq 'a' && $tag->[1]->{href} && $tag->[1]{href} =~ m!title\/tt(\d+)!) { 347 | my $title = $parser->get_text(); 348 | my $text = $parser->get_trimmed_text('br', '/li'); 349 | 350 | $self->_show_message("link: $title --> $text", 'DEBUG'); 351 | 352 | my $code = $1; 353 | my($year, $role) = $text =~ m!\((\d+)\)\s.+\.+\s(.+)!; 354 | push @{$films->{$key}}, { title => $title, 355 | code => $code, 356 | year => $year, 357 | role => $role, 358 | }; 359 | } 360 | } 361 | 362 | $self->{'_filmography'} = $films; 363 | 364 | } else { 365 | $self->_show_message("filmography defined!", 'DEBUG'); 366 | } 367 | 368 | return $self->{'_filmography'}; 369 | } 370 | 371 | =item genres() 372 | 373 | Retrieve a list of movie genres for specified person: 374 | 375 | my $genres = $persons->genres; 376 | 377 | =cut 378 | 379 | sub genres { 380 | my CLASS_NAME $self = shift; 381 | 382 | unless($self->{_genres}) { 383 | my @genres = $self->_get_common_array_propery('genres'); 384 | $self->{_genres} = \@genres; 385 | } 386 | 387 | $self->{_genres}; 388 | } 389 | 390 | =item plot_keywords() 391 | 392 | Retrieve a list of keywords for movies where specified person plays: 393 | 394 | my $keywords = $persons->plot_keywords; 395 | 396 | =cut 397 | 398 | sub plot_keywords { 399 | my CLASS_NAME $self = shift; 400 | 401 | unless($self->{_plot_keywords}) { 402 | my @keywords = $self->_get_common_array_propery('plot keywords'); 403 | $self->{_plot_keywords} = \@keywords; 404 | } 405 | 406 | $self->{_plot_keywords}; 407 | } 408 | 409 | sub _get_common_array_propery { 410 | my CLASS_NAME $self = shift; 411 | my $target = shift || ''; 412 | 413 | my $parser = $self->_parser(FORCED); 414 | while(my $tag = $parser->get_tag(MAIN_TAG)) { 415 | my $text = $parser->get_text(); 416 | last if $text =~ /$target/i; 417 | } 418 | 419 | my @res = (); 420 | while(my $tag = $parser->get_tag('a')) { 421 | last if $tag->[1]->{class} && $tag->[1]->{class} =~ /tn15more/i; 422 | push @res, $parser->get_text; 423 | } 424 | 425 | return @res; 426 | } 427 | 428 | sub filmography_types { 429 | my CLASS_NAME $self = shift; 430 | } 431 | 432 | sub DESTROY { 433 | my $self = shift; 434 | } 435 | 436 | 1; 437 | 438 | __END__ 439 | 440 | =back 441 | 442 | =head1 EXPORTS 443 | 444 | No Matches.=head1 BUGS 445 | 446 | Please, send me any found bugs by email: stepanov.michael@gmail.com. 447 | 448 | =head1 SEE ALSO 449 | 450 | IMDB::Film 451 | IMDB::BaseClass 452 | WWW::Yahoo::Movies 453 | HTML::TokeParser 454 | 455 | =head1 AUTHOR 456 | 457 | Mikhail Stepanov AKA nite_man (stepanov.michael@gmail.com) 458 | 459 | =head1 COPYRIGHT 460 | 461 | Copyright (c) 2004 - 2007, Mikhail Stepanov. 462 | This module is free software. It may be used, redistributed and/or 463 | modified under the same terms as Perl itself. 464 | 465 | =cut 466 | -------------------------------------------------------------------------------- /t/tv1_get_tv_series_by_title.t: -------------------------------------------------------------------------------- 1 | use strict; 2 | use warnings; 3 | 4 | use utf8; 5 | 6 | use Test::More tests => 19; 7 | 8 | use IMDB::Film; 9 | 10 | my $crit = 'Fawlty Towers'; 11 | my %tv = ( 12 | code => '0072500', 13 | id => '0072500', 14 | title => '"Fawlty Towers"', 15 | year => '1975', 16 | genres => [qw(Comedy)], 17 | country => [qw(UK)], 18 | language => [qw(English)], 19 | company => 'British Broadcasting Corporation (BBC)', 20 | plot => qq{Hotel owner Basil Fawlty's incompetence, short fuse, and arrogance form a combination that ensures accidents and trouble are never far away.}, 21 | storyline => qq{Inept and manic English hotel owner and manager, Basil Fawlty, isn't cut out for his job. He's intolerant, rude and paranoid. All hell frequently breaks loose as Basil tries to run the hotel, constantly under verbal (and sometime physical) attack from his unhelpful wife Sybil, and hindered by the incompetent, but easy target, Manuel; their Spanish waiter.}, 22 | full_plot => qq{Inept and manic English hotel owner and manager, Basil Fawlty, isn't cut out for his job. He's intolerant, rude and paranoid. All hell frequently breaks loose as Basil tries to run the hotel, constantly under verbal (and sometime physical) attack from his unhelpful wife Sybil, and hindered by the incompetent, but easy target, Manuel; their Spanish waiter.}, 23 | cover => qq{MV5BMTU1MjM4NTA5Nl5BMl5BanBnXkFtZTcwOTE3NzA1MQ@@._V1._SX100_SY114_.jpg}, 24 | cast => [{ 25 | 'name' => 'John Cleese', 26 | 'id' => '0000092', 27 | 'role' => 'Basil Fawlty (12 episodes, 1975-1979)' 28 | }, 29 | { 30 | 'name' => 'Prunella Scales', 31 | 'id' => '0768795', 32 | 'role' => 'Sybil Fawlty (12 episodes, 1975-1979)' 33 | }, 34 | { 35 | 'name' => 'Andrew Sachs', 36 | 'id' => '0755133', 37 | 'role' => 'Manuel (12 episodes, 1975-1979)' 38 | }, 39 | { 40 | 'name' => 'Connie Booth', 41 | 'id' => '0095665', 42 | 'role' => 'Polly Sherman (12 episodes, 1975-1979)' 43 | }, 44 | { 45 | 'name' => 'Ballard Berkeley', 46 | 'id' => '0075326', 47 | 'role' => 'Major Gowen (12 episodes, 1975-1979)' 48 | }, 49 | { 50 | 'name' => 'Gilly Flower', 51 | 'id' => '0283148', 52 | 'role' => 'Miss Agatha Tibbs (12 episodes, 1975-1979)' 53 | }, 54 | { 55 | 'name' => 'Renee Roberts', 56 | 'id' => '0731513', 57 | 'role' => 'Miss Ursula Gatsby (12 episodes, 1975-1979)' 58 | }, 59 | { 60 | 'name' => 'Brian Hall', 61 | 'id' => '0355363', 62 | 'role' => 'Terry (6 episodes, 1979)' 63 | }], 64 | episodes => [ 65 | { 66 | 'date' => '19 September 1975', 67 | 'plot' => 'Basil is delighted when a member of the aristocracy chooses to stay at the Fawlty Towers, snubbing the normal guests who frequent the hotel. What Basil does not know, however, is that the man is not a Lord but a confidence trickster who attempts to steal his collection of rare coins. As Basil\'s desperation to keep such an \'upstanding\' guest increases, he becomes increasingly tolerant towards the man\'s behavior, only to have the guest arrested in a police raid on the hotel.', 68 | 'title' => 'A Touch of Class', 69 | 'id' => '0578584', 70 | 'episode' => '1', 71 | 'season' => '1' 72 | }, 73 | { 74 | 'date' => '26 September 1975', 75 | 'plot' => 'Over his wife\'s objections, Basil hires his usual firm of incompetents (O\'Reilly\'s) to do some carpentry work in the hotel lobby. With both Basil and Sybil out for the afternoon, they leave Polly in charge. She decides to have a nap, leaving the English-challenged Manuel in charge. When Basil returns, he finds a solid wall where once stood doors to the dining room and still no door to the kitchen per his work order. Sybil takes an I-told-you-so attitude and insists he hire a qualified builder to fix the mess, but Basil decides to get O\'Reilly back to right the wrong, but he still makes a mess of it as well.', 76 | 'title' => 'The Builders', 77 | 'id' => '0578589', 78 | 'episode' => '2', 79 | 'season' => '1' 80 | }, 81 | { 82 | 'date' => '3 October 1975', 83 | 'plot' => 'Basil Fawlty\'s puritanical streak comes out when a young couple he suspects of not being married tries to book a double room. He\'s ready to give them rooms on separate floors until Sybil steps in. It turns out that the couple know Polly but when he sees her coming out of their room - she was trying on a dress her friend had brought her for the wedding they would be attending - Basil begins to think everyone has gone mad. All that\'s left is for another guest, Mrs. Peignoir, to get tipsy and make a pass at him. For Manuel, it\'s his birthday and a night of celebrating takes its toll.', 84 | 'title' => 'The Wedding Party', 85 | 'id' => '0578594', 86 | 'episode' => '3', 87 | 'season' => '1' 88 | }, 89 | { 90 | 'date' => '10 October 1975', 91 | 'plot' => 'Basil Fawlty goes on the alert when he hears there are hotel inspectors in the area.', 92 | 'title' => 'The Hotel Inspectors', 93 | 'id' => '0578591', 94 | 'episode' => '4', 95 | 'season' => '1' 96 | }, 97 | { 98 | 'date' => '17 October 1975', 99 | # I can't work out how to compare é, so lets cheat 100 | #'plot' => 'The Fawltys are very pleased with their new chef, Kurt. He was recommended to them by their friend André who runs a nice restaurant in town. All in all, he\'s working out quite well and they decide to have a gourmet evening on Thursdays to try and promote, in Basil\'s eye at any rate, a better class of clientÞle. Kurt gets along well with everyone and seems to have a particular interest in Manuel. When the big day finally arrives, they find themselves with only four guests booked for the dining room - which Sybil puts down to Basil\'s advert in the local paper saying \'no riff-raff\' - and there seems to be something about Kurt that AndrÚ forgot to mention. It\'s pandemonium as a manic Basil tries to make up for their collective lapses.', 101 | 'title' => 'Gourmet Night', 102 | 'id' => '0578587', 103 | 'episode' => '5', 104 | 'season' => '1' 105 | }, 106 | { 107 | 'date' => '24 October 1975', 108 | 'plot' => 'A moose head to be hung, a fire drill to be conducted, and German guests are all a bit much for Basil to handle while Sybil\'s in hospital.', 109 | 'title' => 'The Germans', 110 | 'id' => '0578590', 111 | 'episode' => '6', 112 | 'season' => '1' 113 | }, 114 | { 115 | 'date' => '19 February 1979', 116 | 'plot' => 'When Mrs.Richards, a demanding woman who is hard of hearing, checks into the hotel, Basil, Sybil and Polly find themselves with a very difficult customer. Meanwhile, Basil bets on a horse that is sure to win, however he must not let Sybil find out.', 117 | 'title' => 'Communication Problems', 118 | 'id' => '0578586', 119 | 'episode' => '1', 120 | 'season' => '2' 121 | }, 122 | { 123 | 'date' => '26 February 1979', 124 | 'plot' => 'Confusion arises when Basil tries to catch a girl in a playboy\'s room after hours, all the while unnerved by a psychiatrist\'s presence.', 125 | 'title' => 'The Psychiatrist', 126 | 'id' => '0578593', 127 | 'episode' => '2', 128 | 'season' => '2' 129 | }, 130 | { 131 | 'date' => '5 March 1979', 132 | 'plot' => 'Customer dissatisfaction with the Fawlty Towers dining experience comes to a head when an insistent American comes for a stay.', 133 | 'title' => 'Waldorf Salad', 134 | 'id' => '0578595', 135 | 'episode' => '3', 136 | 'season' => '2' 137 | }, 138 | { 139 | 'date' => '12 March 1979', 140 | 'plot' => 'One of the guests has died in his sleep, but Basil thinks it\'s due to serving him spoiled food.', 141 | 'title' => 'The Kipper and the Corpse', 142 | 'id' => '0578592', 143 | 'episode' => '4', 144 | 'season' => '2' 145 | }, 146 | { 147 | 'date' => '26 March 1979', 148 | 'plot' => 'Basil plans a surprise anniversary party for Sybil but, thinking he\'s forgotten it again, she walks out just before the guests arrive.', 149 | 'title' => 'The Anniversary', 150 | 'id' => '0578588', 151 | 'episode' => '5', 152 | 'season' => '2' 153 | }, 154 | { 155 | 'date' => '25 October 1979', 156 | 'plot' => 'Basil and the rest of the staff are in deep trouble when the health inspector turns up and delivers an enormous list of problems with the hotel. Things become even worse when Manuel\'s rat gets loose in the hotel.', 157 | 'title' => 'Basil the Rat', 158 | 'id' => '0578585', 159 | 'episode' => '6', 160 | 'season' => '2' 161 | } 162 | ], 163 | directors => [{id => '0000583', name => 'Wolfgang Petersen'}], 164 | writers => [{id => '0392955', name => 'Homer'}, 165 | {id => '1125275', name => 'David Benioff'}], 166 | duration => '30 min (12 episodes)', 167 | aspect_ratio => '1.33 : 1', 168 | rating => '6.9', 169 | votes => '17394', 170 | ); 171 | 172 | my %pars = (cache => 0, debug => 0, crit => $crit); 173 | 174 | my $obj = new IMDB::Film(%pars); 175 | isa_ok($obj, 'IMDB::Film'); 176 | 177 | my @countries = sort(@{$obj->country}); 178 | 179 | is($obj->code, $tv{code}, 'TV IMDB Code'); 180 | is($obj->id, $tv{id}, 'TV IMDB ID'); 181 | is($obj->title, $tv{title}, 'TV Title'); 182 | is($obj->year, $tv{year}, 'TV Production Year'); 183 | is($obj->plot, $tv{plot}, 'TV Plot'); 184 | is($obj->storyline, $tv{storyline}, 'TV Storyline'); 185 | #like($obj->cover, '/\.jpg/i', 'TV Cover'); FIXME: do not currently get cover 186 | is_deeply($obj->cast, $tv{cast}, 'TV Cast'); 187 | delete $obj->episodes->[4]->{plot}; # because I can't work out how to compare é 188 | is_deeply($obj->episodes, $tv{episodes}, 'TV Episodes'); 189 | is($obj->language->[0], $tv{language}[0], 'TV Language'); 190 | is($countries[0], $tv{country}[0], 'TV Country'); 191 | is($obj->genres->[0], $tv{genres}[0], 'TV Genre'); 192 | is($obj->full_plot, $tv{full_plot}, 'TV full plot'); 193 | is($obj->duration, $tv{duration}, 'TV Duration'); 194 | is($obj->aspect_ratio, $tv{aspect_ratio}, 'TV Aspect Ratio'); 195 | is($obj->company, $tv{company}, 'Company'); 196 | 197 | my($rating, $votes) = $obj->rating(); 198 | cmp_ok($rating, '>=', $tv{rating}, 'Rating'); 199 | cmp_ok($votes, '>=', $tv{votes}, 'Votes'); 200 | cmp_ok($obj->rating(), '>=', $tv{rating}, 'Rating'); 201 | -------------------------------------------------------------------------------- /ChangeLog: -------------------------------------------------------------------------------- 1 | Revision history for Perl module IMDB::Film 2 | 3 | 0.52 Feb 15 2012 4 | - Fixed retrieving episodes of TV series following IMDB change (ticket #74679) [ARJONES]; 5 | 6 | 0.51 Sep 28 2011 7 | - fixed retrieving of moive rating (ticket #71117); 8 | - fixed official movie sites test. 9 | 10 | 0.50 Aug 09 2011 11 | - fixed retrieving of movie rating (ticket #69049); 12 | - fixed returning of MPAA rating; 13 | - fixed parsing of movie title; 14 | - fixed parsing of person name; 15 | - fixed variuos tests. 16 | 17 | 0.49 Nov 25 2010 18 | - fixed issue with returning the episodes of TV series; 19 | - made movie kind low case to have back compatibility; 20 | - fixed retrieving year of movie; 21 | - fixed retrieving bio of the movie person; 22 | - fixed tests; 23 | 24 | 0.48 Oct 19 2010 25 | - fixed bug with retrieving of movie cover if the title contains special HTML symbols, ticket #62254; 26 | - added returning of default IMDB image in case if movie doesn't have a cover; 27 | - fixed issue with retrieving of movie's plot; 28 | - added a new method - storyline; 29 | - fixed bug with return of movie's kind; 30 | - fixed issue with not completed year period, i.e 2004-, ticket #62174; 31 | - added a few new tests. 32 | 33 | 0.47 Oct 12 2010 34 | - fixed module functionality according to new IMDB design. 35 | 36 | 0.46 Sep 10 2010 37 | - added possibility to get big size covers; 38 | - fixed retrieving of cover for non-English movies; 39 | - fixed bug with parsing of the movie title on the search page, ticket #55739; 40 | - fixed plot parsing. 41 | 42 | 0.45 Mar 19 2010 43 | - fixed parsing of movie top info (thanks to Stefan Göbel for the patch); 44 | - added new test for top movie info; 45 | - fixed a few tests. 46 | 47 | 0.44 Mar 17 2010 48 | - fixed issue with retrieving movie by its title (thanks to Peter Valdemar 49 | Mørch for patch); 50 | - fixed issue with parsing person birthday and birthplace; 51 | - fixed tests. 52 | 53 | 0.43 Nov 12 2009 54 | - fixed issue with retrieving plot keywords instead of plot (thanks to Justin 55 | Fletcher for the bugreport and patch, for ticket #51240); 56 | - fixe issue with parsing Top 250 or Bottom 100 movie property (thans for 57 | Byju for bugreport); 58 | - fixed parsing AKA movie info (thanks to Milos Vavrek for bugreport, 59 | ticket #52729); 60 | 61 | 0.42 Oct 8 2009 62 | - added keywords for the film; 63 | - fixed issue with retrieving of plot contained a link (thanks to Byju for 64 | the bug-report); 65 | - fixed issue with displaying special HTML symbols (ticket #49060); 66 | - fixed POD documentation. 67 | 68 | 0.41 Jul 30 2009 69 | - added retrieving of recommendation movies; 70 | - fixed issue with using of uninitialized value in pattern match (thanks 71 | to Byju); 72 | - fixed issue with wrong item in the list of movie directors. 73 | 74 | 0.40 Jul 11 2009 75 | - added top info into rating array (requested by Geoffrey Hoffman); 76 | - fixed pod errors (thanks to Bas Zoetekouw, ticket #47433). 77 | 78 | 0.39 Jul 03 2009 79 | - added additional attributes such "re-release", "DVD premiere" etc from 80 | the release date page (requested by Vincent Lefevre, ticket #47457); 81 | - added support for companies and movie connections (thanks to Justin for 82 | the patch, ticket #42756); 83 | - fixed retrieving release date information (reported by Vincent Lefevre, 84 | ticket #47457); 85 | - reorganize tests. 86 | 87 | 0.38 Jun 30 2009 88 | - fixed issue with empty cast list (thanks to Andréas Bratell); 89 | - fixed issue with retrieving metadata for movies with code less then 7 90 | digits (thanks to Simon), ticket #47422; 91 | - fixed POD documentation (thanks to Bas Zoetekouw), ticket #47433). 92 | 93 | 0.37 May 25 2009 94 | - added MPAA info for the specified movie; 95 | - fixed issue with retrieving of filmography for the specified actor or 96 | actresse (thanks to Cento for the bugreport); 97 | - fixed issue with retrieving metadata for movies with title contained 98 | only digits, i.e. 300 (thanks to Eric Johnson for bugreport); 99 | 100 | 0.36 Jan 30 2009 101 | - fixed issue with parsing TV Shows information (thanks to Tom); 102 | - fixed issue with retrieving movie person name; 103 | - fixed a few test; 104 | 105 | 0.35 29 Sep 2008 106 | - fixed a few bugs. 107 | 108 | 0.34 Jun 12 2008 109 | - fixed issue with parsing user rating (thanks to Faidon Liambotis for his 110 | patch); 111 | - fixed module documentation. 112 | 113 | 0.33 Jun 09 2008 114 | - fixed a bug with parsing a list of countries; 115 | - fixed a bug with parsing a list of language; 116 | - fixed a bug with parsing a list of certificates; 117 | - fixed a bug with list of matches (thanks to Dan Faerch for bug-report and 118 | patch); 119 | - fixed a bug with searching by movie title in case if it a number (thanks 120 | to Brian Wilson for bug-report and suggestion). 121 | 122 | 0.32 Dec 28 2007 123 | - fixed a bug with retrieving movie cast (thanks to David Sullivan for his 124 | patch); 125 | - added tests for the TV series; 126 | - fixed a bug with parsing a person filmography. 127 | 128 | 0.31 Nov 16 2007 129 | - added functionality to retrieve data of TV series: indicate is it TV 130 | series of not and getting a list of episodes (thanks to Branislav Gerzo 131 | for the patch); 132 | - added new tests to check TV series functionlity; 133 | - modified a procedure of getting trivia text; 134 | - fixed a bug with retrieving cast details (thanks to Matthias Hopf for 135 | patch). 136 | 137 | 0.30 Sep 13 2007 138 | - modified a functionlity to retrieve cast; 139 | - fixed a bug with return a goofs (thanks to John Norton for bug-report); 140 | - added localization of variable $/ (thanks to John Norton for bug-report); 141 | - fixed a few bugs in the tests. 142 | 143 | 0.29 Jul 18 2007 144 | - added possibility to get movie by its title and year (good idea given by Ohad Ben-Cohen); 145 | - modified a search procedure to fit a new IMDB layout (thanks to Danial 146 | Pearce, Peter Valdemar Mørch for bug-reports and patches); 147 | - modified retrieving a list of official sites and released dates; 148 | - fixed a bug with parsing writers movie info in case if there is only one 149 | person (thanks to Szel Miklos for bug-report). 150 | 151 | 0.28 May 07 2007 152 | - added a new method - aspect_ratio, to get movie aspect ratio; 153 | - fixed a bug with retrieving movie runtime (thanks to Steve Meier for bugreport). 154 | 155 | 0.27 Apr 16 2007 156 | - fixed a bug with parsing of list of movie directors and writers (thanks to Nick Johnston for 157 | his patch and Benjamin Juang and Bas for bugreports). 158 | 159 | 0.26 Apr 02 2007 160 | - fixed a bug with parsing directors and writers (thanks to Andy Dales for the bugreport); 161 | - added a few new tests; 162 | - updated a module documentation. 163 | 164 | 0.25 Mar 02 2007 165 | - fixed a bug with retrieving of movie rating (thanks to Arne Brutschy, Ulrich Tueshaus and 166 | Nick Johnston for bug reports and patches); 167 | - fixed a retrieving of movie certifications (thanks to Nick Johnston); 168 | - added new tests for rating and certifications. 169 | 170 | 0.24 Feb 20 2007 171 | - modified a logic of IMDB::Film and IMDB::Person according to the new 172 | layout of IMDB site. 173 | 174 | 0.23 Dec 19 2006 175 | - added a new method to retrieve a release dates (thanks to Danial Pearce); 176 | - added a new method to retrieve a list of quotes (thanks to buu); 177 | - fixed a bug with retrieving movie cover in case when its title contains 178 | some special symbils (thanks to Thomas Hoff); 179 | - added tests for new functionality. 180 | 181 | 0.22 Aug 1 2006 182 | - modified a procedure of parsing cast (thanks to drmarker for contibution); 183 | - removed a request for retrieving of movie official sites from the base initialization 184 | procedure (thanks to Danial Pearce); 185 | - fixed a bug with parsing movie title if the title contains some special symbols such '*' 186 | (thanks to Matthew Bone for bugreport); 187 | - fixed a bug with retrieving a cover if it isn't uploaded (Brano Gerzo). 188 | 189 | 0.21 May 17 2006 190 | - added retrieving official site urls for specified movie; 191 | - added possibility to clean cached data; 192 | - added new test for official sites; 193 | - fixed a bug with retrieving a filmography of specified movie person (thanks 194 | to Przemek Klys); 195 | - fixed a bug with test of full plot of movie; 196 | - fixed a bug with test of movie trivia. 197 | 198 | 0.20 Mar 10 2006 199 | - added possibility to specify a path for cached data (thanks to Brano Gerzo); 200 | - added new statuses to separate objects retrieved from internet, cache or file; 201 | - added test to check cache functionality. 202 | 203 | 0.19 Jan 16 2006 204 | - fixed a bug related with search by movie title contained some special symbols such "&" 205 | (thanks to Peter Backman); 206 | - fixed a bug with retrieving a movie cover (thanks to Len Kranendonk); 207 | - fixed a bug with retrieving a list of cast of TV series (thanks to Bas); 208 | - added prerequest module Digest::SHA1 (thanks to CPAN testers). 209 | 210 | 0.18 Dec 30 2005 211 | - fixed a bug with retrieving ID of writers (thanks to Brano Gerzo for bugreport); 212 | - fixed a bug with retrieving a list of writers if there is a link 'more'; 213 | - fixed a documentation of method 'awards'. 214 | 215 | 0.17 Dec 15 2005 216 | - moved functionality to get a page via HTTP to the separate method 217 | to it for getting of movie full plot; 218 | - fixed a bug with retrieving full plot (thanks to halcyon); 219 | - fixed a bug related with matched results if there is an only one; 220 | - fixed module documentation; 221 | - added new tests to cover bugs described above. 222 | 223 | 0.16 Dec 14 2005 224 | - added retrieving of AKA info (requested by Brano Gerzo); 225 | - added retriving of movie trivia; 226 | - added retriving of movie goofs; 227 | - added retrieving of movie avards; 228 | - fixed a bug with adding full plot into cache. 229 | 230 | 0.15 Nov 18 2005 231 | - added possibility to pass HTML page from IMDB instead of connection 232 | and getting page online (thanks to Brano Gerzo for idea); 233 | - switched to LWP::Simple from LWP::UserAgent; 234 | - added possibility to specify timeout and user agent for HTTP request; 235 | - removed a retrieving of a full plot from initialization stage; 236 | - improved test suite. 237 | 238 | 0.14 Aug 22 2005 239 | - fixed a bug with search film by title (reported by Scott D'Aquila 240 | and Kwesi Leggett); 241 | - fixed a bug with search person by name; 242 | - fixed a bug with define proxy address (reported by Bas Zoetekouw); 243 | - fixed bugs with movies and persons data in the test script. 244 | 245 | 0.13 Jun 03 2005 246 | - fixed a bug with craching of search when no any matches were 247 | found (reported by Peter Bäckman); 248 | - added a new property - status which indicates is successful or 249 | not information was retrieved from IMDB; 250 | - added a new property - id which is the same as code because 251 | it's not clear that IMDB code is like id. 252 | 253 | 0.12 Mar 07 2005 254 | - fixed a bug with retrieving information about movie 255 | persons in case no proxy server; 256 | - added an suppression of pseudo-hash warnings. 257 | 258 | 0.11 Mar 04 2005 259 | - fixed a bug with retrieving a list of casts 260 | in case if there are only credited casts (thanks to 261 | Steve Rushe); 262 | - fixed a bug with define a proxy in the class IMDB::Persons. 263 | - fixed a bug with retrieve a list of casts if there are 264 | only complete credited cast; 265 | - fixed a bug with assing a person IMDB code in case when we 266 | make a search by its name; 267 | - fixed a bug with assing a movie IMDB code when movie detailed 268 | page doesn't contain rating; 269 | 270 | 0.10 Jan 07 2005 271 | - moved all common functionality into base class; 272 | - added a new class to retrieve an information about IMDB persons; 273 | - modified a test; 274 | - fixed a bug with undefined value in the method 'rating'. 275 | - fixed a bug with using fields. 276 | 277 | 0.05 Dec 28 2004 278 | - added a new method - full_plot to retrieve a full novie plot; 279 | - fixed a bug with retrieve a title and production year of movie in case of search by 280 | movie title; 281 | - fixed bugs with test. 282 | 283 | 0.04 Sep 09 2004 284 | - added roles to the retrieving data about cast; 285 | - added possibility to retrieve number of votes; 286 | - fixed a bug with retrieving directors for movie which doesn't contains writers info. 287 | 288 | 0.03 Jul 31 2004 289 | - fixed bug with retrieving film information by its title; 290 | - corrected class documentation; 291 | 292 | 0.02 Jul 28 2004 293 | - fixed class documentation; 294 | 295 | 0.01 Jul 27 2004 296 | - original version; created by ExtUtils::ModuleMaker 0.32 297 | 298 | 299 | -------------------------------------------------------------------------------- /lib/IMDB/BaseClass.pm: -------------------------------------------------------------------------------- 1 | =head1 NAME 2 | 3 | IMDB::BaseClass - a base class for IMDB::Film and IMDB::Persons. 4 | 5 | =head1 SYNOPSIS 6 | 7 | use base qw(IMDB::BaseClass); 8 | 9 | =head1 DESCRIPTION 10 | 11 | IMDB::BaseClass implements a base functionality for IMDB::Film 12 | and IMDB::Persons. 13 | 14 | =cut 15 | 16 | package IMDB::BaseClass; 17 | 18 | use strict; 19 | use warnings; 20 | 21 | use HTML::TokeParser; 22 | use LWP::Simple qw($ua get); 23 | use Cache::FileCache; 24 | use Text::Unidecode qw(unidecode); 25 | use HTML::Entities; 26 | use Carp; 27 | 28 | use Data::Dumper; 29 | 30 | use constant MAIN_TAG => 'h4'; 31 | use constant ID_LENGTH => 6; 32 | 33 | use vars qw($VERSION %FIELDS $AUTOLOAD %STATUS_DESCR); 34 | 35 | BEGIN { 36 | $VERSION = '0.51'; 37 | 38 | %STATUS_DESCR = ( 39 | 0 => 'Empty', 40 | 1 => 'Filed', 41 | 2 => 'Fresh', 42 | 3 => 'Cached', 43 | ); 44 | } 45 | 46 | use constant FORCED => 1; 47 | use constant CLASS_NAME => 'IMDB::BaseClass'; 48 | 49 | use constant FROM_FILE => 1; 50 | use constant FROM_INTERNET => 2; 51 | use constant FROM_CACHE => 3; 52 | 53 | use fields qw( content 54 | parser 55 | matched 56 | proxy 57 | error 58 | cache 59 | host 60 | query 61 | search 62 | cacheObj 63 | cache_exp 64 | cache_root 65 | clear_cache 66 | debug 67 | status 68 | file 69 | timeout 70 | user_agent 71 | decode_html 72 | _code 73 | ); 74 | 75 | =head2 Constructor and initialization 76 | 77 | =over 4 78 | 79 | =item new() 80 | 81 | Object's constructor. You should pass as parameter movie title or IMDB code. 82 | 83 | my $imdb = new IMDB::Film(crit => ); 84 | 85 | or 86 | 87 | my $imdb = new IMDB::Film(crit => ); 88 | 89 | Also, you can specify following optional parameters: 90 | 91 | - proxy - define proxy server name and port; 92 | - debug - switch on debug mode (on by default); 93 | - cache - cache or not of content retrieved pages. 94 | 95 | =cut 96 | 97 | sub new { 98 | my $caller = shift; 99 | my $class = ref($caller) || $caller; 100 | my $self = fields::new($class); 101 | $self->_init(@_); 102 | return $self; 103 | } 104 | 105 | =item _init() 106 | 107 | Initialize object. It gets list of service class properties and assign value to them from input 108 | parameters or from the hash with default values. 109 | 110 | =cut 111 | 112 | sub _init { 113 | my CLASS_NAME $self = shift; 114 | my %args = @_; 115 | 116 | no warnings 'deprecated'; 117 | 118 | for my $prop ( keys %{ $self->fields } ) { 119 | unless($prop =~ /^_/) { 120 | $self->{$prop} = defined $args{$prop} ? $args{$prop} : $self->_get_default_value($prop); 121 | } 122 | } 123 | 124 | if($self->_cache()) { 125 | $self->_cacheObj( new Cache::FileCache( { default_expires_in => $self->_cache_exp, 126 | cache_root => $self->_cache_root } ) ); 127 | 128 | $self->_cacheObj->clear() if $self->_clear_cache; 129 | } 130 | 131 | if($self->_proxy) { $ua->proxy(['http', 'ftp'], $self->_proxy()) } 132 | else { $ua->env_proxy() } 133 | 134 | $ua->timeout($self->timeout); 135 | $ua->agent($self->user_agent); 136 | 137 | $self->_content( $args{crit} ); 138 | $self->_parser(); 139 | } 140 | 141 | =item user_agent() 142 | 143 | Define an user agent for HTTP request. It's 'Mozilla/5.0' by default. 144 | For more information refer to LWP::UserAgent. 145 | 146 | =cut 147 | 148 | sub user_agent { 149 | my CLASS_NAME $self = shift; 150 | if(@_) { $self->{user_agent} = shift } 151 | return $self->{user_agent} 152 | } 153 | 154 | =item timeout() 155 | 156 | Define a timeout for HTTP request in seconds. By default it's 10 sec. 157 | For more information refer to LWP::UserAgent. 158 | 159 | =cut 160 | 161 | sub timeout { 162 | my CLASS_NAME $self = shift; 163 | if(@_) { $self->{timeout} = shift } 164 | return $self->{timeout} 165 | } 166 | 167 | =item code() 168 | 169 | Get IMDB film code. 170 | 171 | my $code = $film->code(); 172 | 173 | =cut 174 | 175 | sub code { 176 | my CLASS_NAME $self = shift; 177 | if(@_) { $self->{_code} = shift } 178 | return $self->{_code}; 179 | } 180 | 181 | =item id() 182 | 183 | Get IMDB film id (actually, it's the same as code). 184 | 185 | my $id = $film->id(); 186 | 187 | =cut 188 | 189 | sub id { 190 | my CLASS_NAME $self = shift; 191 | if(@_) { $self->{_code} = shift } 192 | return $self->{_code}; 193 | } 194 | 195 | =item _proxy() 196 | 197 | Store address of proxy server. You can pass a proxy name as parameter into 198 | object constructor: 199 | 200 | my $imdb = new IMDB::Film(code => 111111, proxy => 'my.proxy.host:8080'); 201 | 202 | or you can define environment variable 'http_host'. For exanple, for Linux 203 | you shoud do a following: 204 | 205 | export http_proxy=my.proxy.host:8080 206 | 207 | =cut 208 | 209 | sub _proxy { 210 | my CLASS_NAME $self = shift; 211 | if(@_) { $self->{proxy} = shift } 212 | return $self->{proxy}; 213 | } 214 | 215 | sub _decode_html { 216 | my CLASS_NAME $self = shift; 217 | if(@_) { $self->{decode_html} = shift } 218 | return $self->{decode_html}; 219 | } 220 | 221 | =item _cache() 222 | 223 | Store cache flag. Indicate use file cache to store content page or not: 224 | 225 | my $imdb = new IMDB::Film(code => 111111, cache => 1); 226 | 227 | =cut 228 | 229 | sub _cache { 230 | my CLASS_NAME $self = shift; 231 | if(@_) { $self->{cache} = shift } 232 | return $self->{cache} 233 | } 234 | 235 | =item _clear_cache 236 | 237 | Store flag clear_cache which is indicated clear exisisting cache or not (false by default): 238 | 239 | my $imdb = new IMDB::Film(code => 111111, cache => 1, clear_cache => 1); 240 | 241 | =cut 242 | 243 | sub _clear_cache { 244 | my CLASS_NAME $self = shift; 245 | if($_) { $self->{clear_cache} = shift } 246 | return $self->{clear_cache}; 247 | } 248 | 249 | =item _cacheObj() 250 | 251 | In case of using cache, we create new Cache::File object and store it in object's 252 | propery. For more details about Cache::File please see Cache::Cache documentation. 253 | 254 | =cut 255 | 256 | sub _cacheObj { 257 | my CLASS_NAME $self = shift; 258 | if(@_) { $self->{cacheObj} = shift } 259 | return $self->{cacheObj} 260 | } 261 | 262 | =item _cache_exp() 263 | 264 | In case of using cache, we can define value time of cache expire. 265 | 266 | my $imdb = new IMDB::Film(code => 111111, cache_exp => '1 h'); 267 | 268 | For more details please see Cache::Cache documentation. 269 | 270 | =cut 271 | 272 | sub _cache_exp { 273 | my CLASS_NAME $self = shift; 274 | if(@_) { $self->{cache_exp} = shift } 275 | return $self->{cache_exp} 276 | } 277 | 278 | sub _cache_root { 279 | my CLASS_NAME $self = shift; 280 | $self->{cache_root} = shift if @_; 281 | 282 | $self->_show_message("CACHE ROOT is " . $self->{cache_root}, 'DEBUG'); 283 | 284 | return $self->{cache_root}; 285 | } 286 | 287 | sub _show_message { 288 | my CLASS_NAME $self = shift; 289 | my $msg = shift || 'Unknown error'; 290 | my $type = shift || 'ERROR'; 291 | 292 | return if $type =~ /^debug$/i && !$self->_debug(); 293 | 294 | if($type =~ /(debug|info|warn)/i) { carp "[$type] $msg" } 295 | else { croak "[$type] $msg" } 296 | } 297 | 298 | =item _host() 299 | 300 | Store IMDB host name. You can pass this value in object constructor: 301 | 302 | my $imdb = new IMDB::Film(code => 111111, host => 'us.imdb.com'); 303 | 304 | By default, it uses 'www.imdb.com'. 305 | 306 | =cut 307 | 308 | sub _host { 309 | my CLASS_NAME $self = shift; 310 | if(@_) { $self->{host} = shift } 311 | return $self->{host} 312 | } 313 | 314 | =item _query() 315 | 316 | Store query string to retrieve film by its ID. You can define 317 | different value for that: 318 | 319 | my $imdb = new IMDB::Film(code => 111111, query => 'some significant string'); 320 | 321 | Default value is 'title/tt'. 322 | 323 | B 325 | 326 | =cut 327 | 328 | sub _query { 329 | my CLASS_NAME $self = shift; 330 | if(@_) { $self->{query} = shift } 331 | return $self->{query} 332 | } 333 | 334 | =item _search() 335 | 336 | Store search string to find film by its title. You can define 337 | different value for that: 338 | 339 | my $imdb = new IMDB::Film(code => 111111, seach => 'some significant string'); 340 | 341 | Default value is 'Find?select=Titles&for='. 342 | 343 | =cut 344 | 345 | sub _search { 346 | my CLASS_NAME $self = shift; 347 | if(@_) { $self->{search} = shift } 348 | return $self->{search} 349 | } 350 | 351 | =item _debug() 352 | 353 | Indicate to use DEBUG mode to display some debug messages: 354 | 355 | my $imdb = new IMDB::Film(code => 111111, debug => 1); 356 | 357 | By default debug mode is switched off. 358 | 359 | =cut 360 | 361 | sub _debug { 362 | my CLASS_NAME $self = shift; 363 | if(@_) { $self->{debug} = shift } 364 | return $self->{debug} 365 | } 366 | 367 | =item _content() 368 | 369 | Connect to the IMDB, retrieve page according to crit: by film 370 | IMDB ID or its title and store content of that page in the object 371 | property. 372 | In case using cache, first check if page was already stored in the 373 | cache then retrieve page from the cache else store content of the 374 | page in the cache. 375 | 376 | =cut 377 | 378 | sub _content { 379 | my CLASS_NAME $self = shift; 380 | if(@_) { 381 | my $crit = shift || ''; 382 | my $page; 383 | 384 | $self->code($crit) if $crit =~ /^\d{6,8}$/; 385 | $page = $self->_cacheObj()->get($crit) if $self->_cache(); 386 | 387 | $self->_show_message("CRIT: $crit", 'DEBUG'); 388 | 389 | unless($page) { 390 | if( -f $crit ) { 391 | $self->_show_message("Parse IMDB HTML file ...", 'DEBUG'); 392 | 393 | local $/; 394 | undef $/; 395 | open FILE, $crit or die "Cannot open off-line IMDB file: $!!"; 396 | $page = ; 397 | close FILE; 398 | $self->status(FROM_FILE); 399 | } else { 400 | $self->_show_message("Retrieving page from internet ...", 'DEBUG'); 401 | 402 | my $url = 'http://'.$self->_host().'/'. 403 | ( $crit =~ /^\d+$/ && length($crit) >= ID_LENGTH ? $self->_query() : $self->_search() ).$crit; 404 | 405 | $page = $self->_get_page_from_internet($url); 406 | $self->status(FROM_INTERNET); 407 | } 408 | 409 | $self->_cacheObj()->set($crit, $page, $self->_cache_exp()) if $self->_cache(); 410 | } else { 411 | $self->_show_message("Retrieving page from cache ...", 'DEBUG'); 412 | $self->status(FROM_CACHE); 413 | } 414 | 415 | $self->{content} = \$page; 416 | } 417 | 418 | $self->{content}; 419 | } 420 | 421 | sub _get_page_from_internet { 422 | my CLASS_NAME $self = shift; 423 | my $url = shift; 424 | 425 | $self->_show_message("URL is [$url]...", 'DEBUG'); 426 | 427 | print "Calling URL: $url!\n"; 428 | 429 | my $page = get($url); 430 | 431 | unless($page) { 432 | $self->error("Cannot retieve an url: [$url]!"); 433 | $self->_show_message("Cannot retrieve url [$url]", 'CRITICAL'); 434 | } 435 | 436 | return $page; 437 | } 438 | 439 | =item _parser() 440 | 441 | Setup HTML::TokeParser and store. To have possibility to inherite that class 442 | we should every time initialize parser using stored content of page. 443 | For more information please see HTML::TokeParser documentation. 444 | 445 | =cut 446 | 447 | sub _parser { 448 | my CLASS_NAME $self = shift; 449 | my $forced = shift || 0; 450 | my $page = shift || undef; 451 | 452 | if($forced) { 453 | my $content = defined $page ? $page : $self->_content(); 454 | 455 | my $parser = new HTML::TokeParser($content) or croak "[CRITICAL] Cannot create HTML parser: $!!"; 456 | $self->{parser} = $parser; 457 | } 458 | 459 | return $self->{parser}; 460 | } 461 | 462 | =item _get_simple_prop() 463 | 464 | Retrieve a simple movie property which surrownded by . 465 | 466 | =cut 467 | 468 | sub _get_simple_prop { 469 | my CLASS_NAME $self = shift; 470 | my $target = shift || ''; 471 | 472 | my $parser = $self->_parser(FORCED); 473 | 474 | while(my $tag = $parser->get_tag(MAIN_TAG)) { 475 | my $text = $parser->get_text; 476 | 477 | $self->_show_message("[$tag->[0]] $text --> $target", 'DEBUG'); 478 | last if $text =~ /$target/i; 479 | } 480 | 481 | my $end_tag = '/a'; 482 | $end_tag = '/div' if $target eq 'trivia'; 483 | $end_tag = 'span' if $target eq 'Production Co'; 484 | $end_tag = '/div' if $target eq 'aspect ratio'; 485 | 486 | my $res = $parser->get_trimmed_text($end_tag); 487 | 488 | $res =~ s/\s+(see )?more$//i; 489 | 490 | $self->_show_message("RES: $res", 'DEBUG'); 491 | 492 | $res = $self->_decode_special_symbols($res); 493 | 494 | return $res; 495 | } 496 | 497 | sub _search_results { 498 | my CLASS_NAME $self = shift; 499 | my $pattern = shift || croak 'Please, specify search pattern!'; 500 | my $end_tag = shift || '/li'; 501 | my $year = shift; 502 | 503 | my(@matched, @guess_res, %matched_hash); 504 | my $parser = $self->_parser(); 505 | 506 | my $count = 0; 507 | while( my $tag = $parser->get_tag('a') ) { 508 | my $href = $tag->[1]{href}; 509 | my $title = $parser->get_trimmed_text('a', $end_tag); 510 | 511 | $self->_show_message("TITLE: " . $title, 'DEBUG'); 512 | next if $title =~ /\[IMG\]/i or $href =~ /pro.imdb.com/; 513 | 514 | # Remove garbage from the first title 515 | $title =~ s/(\n|\r)//g; 516 | $title =~ s/\s*\.media_strip_thumbs.*//m; 517 | 518 | if(my($id) = $href =~ /$pattern/) { 519 | $matched_hash{$id} = {title => $title, 'pos' => $count++}; 520 | @guess_res = ($id, $title) if $year && $title =~ /$year/ && !@guess_res; 521 | } 522 | } 523 | 524 | @matched = map { {title => $matched_hash{$_}->{title}, id => $_} } 525 | sort { $matched_hash{$a}->{'pos'} <=> $matched_hash{$b}->{'pos'} } keys %matched_hash; 526 | 527 | $self->matched(\@matched); 528 | 529 | $self->_show_message("matched: " . Dumper(\@matched), 'DEBUG'); 530 | $self->_show_message("guess: " . Dumper(\@guess_res), 'DEBUG'); 531 | 532 | my($title, $id); 533 | if(@guess_res) { 534 | ($id, $title) = @guess_res; 535 | } else { 536 | $title = $matched[0]->{title}; 537 | $id = $matched[0]->{id}; 538 | } 539 | 540 | $self->_content($id); 541 | $self->_parser(FORCED); 542 | 543 | return $title; 544 | } 545 | 546 | =item matched() 547 | 548 | Retrieve list of matched films each element of which is hash reference - 549 | { id => , title => : 550 | 551 | my @matched = @{ $film->matched() }; 552 | 553 | Note: if movie was matched by title unambiguously it won't be present in this array! 554 | 555 | =cut 556 | 557 | sub matched { 558 | my CLASS_NAME $self = shift; 559 | if(@_) { $self->{matched} = shift } 560 | return $self->{matched}; 561 | } 562 | 563 | sub status { 564 | my CLASS_NAME $self = shift; 565 | if(@_) { $self->{status} = shift } 566 | return $self->{status}; 567 | } 568 | 569 | sub status_descr { 570 | my CLASS_NAME $self = shift; 571 | return $STATUS_DESCR{$self->{status}} || $self->{status}; 572 | } 573 | 574 | sub retrieve_code { 575 | my CLASS_NAME $self = shift; 576 | my $parser = shift; 577 | my $pattern = shift; 578 | my($id, $tag); 579 | 580 | while($tag = $parser->get_tag('link')) { 581 | if($tag->[1]{href} && $tag->[1]{href} =~ m!$pattern!) { 582 | $self->code($1); 583 | last; 584 | } 585 | } 586 | } 587 | 588 | =item error() 589 | 590 | Return string which contains error messages separated by \n: 591 | 592 | my $errors = $film->error(); 593 | 594 | =cut 595 | 596 | sub error { 597 | my CLASS_NAME $self = shift; 598 | if(@_) { push @{ $self->{error} }, shift() } 599 | return join("\n", @{ $self->{error} }) if $self->{error}; 600 | } 601 | 602 | sub _decode_special_symbols { 603 | my($self, $text) = @_; 604 | if($self->_decode_html) { 605 | $text = unidecode(decode_entities($text)); 606 | } 607 | return $text; 608 | } 609 | 610 | sub AUTOLOAD { 611 | my $self = shift; 612 | my($class, $method) = $AUTOLOAD =~ /(.*)::(.*)/; 613 | my($pack, $file, $line) = caller; 614 | 615 | carp "Method [$method] not found in the class [$class]!\n Called from $pack at line $line"; 616 | } 617 | 618 | sub DESTROY { 619 | my $self = shift; 620 | } 621 | 622 | 1; 623 | 624 | __END__ 625 | 626 | =back 627 | 628 | =head1 EXPORTS 629 | 630 | Nothing 631 | 632 | =head1 BUGS 633 | 634 | Please, send me any found bugs by email: stepanov.michael@gmail.com. 635 | 636 | =head1 SEE ALSO 637 | 638 | IMDB::Persons 639 | IMDB::Film 640 | WWW::Yahoo::Movies 641 | HTML::TokeParser 642 | 643 | =head1 AUTHOR 644 | 645 | Mikhail Stepanov AKA nite_man (stepanov.michael@gmail.com) 646 | 647 | =head1 COPYRIGHT 648 | 649 | Copyright (c) 2004 - 2007, Mikhail Stepanov. 650 | This module is free software. It may be used, redistributed and/or 651 | modified under the same terms as Perl itself. 652 | 653 | =cut 654 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Terms of Perl itself 2 | 3 | a) the GNU General Public License as published by the Free 4 | Software Foundation; either version 1, or (at your option) any 5 | later version, or 6 | b) the "Artistic License" 7 | 8 | --------------------------------------------------------------------------- 9 | 10 | The General Public License (GPL) 11 | Version 2, June 1991 12 | 13 | Copyright (C) 1989, 1991 Free Software Foundation, Inc. 675 Mass Ave, 14 | Cambridge, MA 02139, USA. Everyone is permitted to copy and distribute 15 | verbatim copies of this license document, but changing it is not allowed. 16 | 17 | Preamble 18 | 19 | The licenses for most software are designed to take away your freedom to share 20 | and change it. By contrast, the GNU General Public License is intended to 21 | guarantee your freedom to share and change free software--to make sure the 22 | software is free for all its users. This General Public License applies to most of 23 | the Free Software Foundation's software and to any other program whose 24 | authors commit to using it. (Some other Free Software Foundation software is 25 | covered by the GNU Library General Public License instead.) You can apply it to 26 | your programs, too. 27 | 28 | When we speak of free software, we are referring to freedom, not price. Our 29 | General Public Licenses are designed to make sure that you have the freedom 30 | to distribute copies of free software (and charge for this service if you wish), that 31 | you receive source code or can get it if you want it, that you can change the 32 | software or use pieces of it in new free programs; and that you know you can do 33 | these things. 34 | 35 | To protect your rights, we need to make restrictions that forbid anyone to deny 36 | you these rights or to ask you to surrender the rights. These restrictions 37 | translate to certain responsibilities for you if you distribute copies of the 38 | software, or if you modify it. 39 | 40 | For example, if you distribute copies of such a program, whether gratis or for a 41 | fee, you must give the recipients all the rights that you have. You must make 42 | sure that they, too, receive or can get the source code. And you must show 43 | them these terms so they know their rights. 44 | 45 | We protect your rights with two steps: (1) copyright the software, and (2) offer 46 | you this license which gives you legal permission to copy, distribute and/or 47 | modify the software. 48 | 49 | Also, for each author's protection and ours, we want to make certain that 50 | everyone understands that there is no warranty for this free software. If the 51 | software is modified by someone else and passed on, we want its recipients to 52 | know that what they have is not the original, so that any problems introduced by 53 | others will not reflect on the original authors' reputations. 54 | 55 | Finally, any free program is threatened constantly by software patents. We wish 56 | to avoid the danger that redistributors of a free program will individually obtain 57 | patent licenses, in effect making the program proprietary. To prevent this, we 58 | have made it clear that any patent must be licensed for everyone's free use or 59 | not licensed at all. 60 | 61 | The precise terms and conditions for copying, distribution and modification 62 | follow. 63 | 64 | GNU GENERAL PUBLIC LICENSE 65 | TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND 66 | MODIFICATION 67 | 68 | 0. This License applies to any program or other work which contains a notice 69 | placed by the copyright holder saying it may be distributed under the terms of 70 | this General Public License. The "Program", below, refers to any such program 71 | or work, and a "work based on the Program" means either the Program or any 72 | derivative work under copyright law: that is to say, a work containing the 73 | Program or a portion of it, either verbatim or with modifications and/or translated 74 | into another language. (Hereinafter, translation is included without limitation in 75 | the term "modification".) Each licensee is addressed as "you". 76 | 77 | Activities other than copying, distribution and modification are not covered by 78 | this License; they are outside its scope. The act of running the Program is not 79 | restricted, and the output from the Program is covered only if its contents 80 | constitute a work based on the Program (independent of having been made by 81 | running the Program). Whether that is true depends on what the Program does. 82 | 83 | 1. You may copy and distribute verbatim copies of the Program's source code as 84 | you receive it, in any medium, provided that you conspicuously and appropriately 85 | publish on each copy an appropriate copyright notice and disclaimer of warranty; 86 | keep intact all the notices that refer to this License and to the absence of any 87 | warranty; and give any other recipients of the Program a copy of this License 88 | along with the Program. 89 | 90 | You may charge a fee for the physical act of transferring a copy, and you may at 91 | your option offer warranty protection in exchange for a fee. 92 | 93 | 2. You may modify your copy or copies of the Program or any portion of it, thus 94 | forming a work based on the Program, and copy and distribute such 95 | modifications or work under the terms of Section 1 above, provided that you also 96 | meet all of these conditions: 97 | 98 | a) You must cause the modified files to carry prominent notices stating that you 99 | changed the files and the date of any change. 100 | 101 | b) You must cause any work that you distribute or publish, that in whole or in 102 | part contains or is derived from the Program or any part thereof, to be licensed 103 | as a whole at no charge to all third parties under the terms of this License. 104 | 105 | c) If the modified program normally reads commands interactively when run, you 106 | must cause it, when started running for such interactive use in the most ordinary 107 | way, to print or display an announcement including an appropriate copyright 108 | notice and a notice that there is no warranty (or else, saying that you provide a 109 | warranty) and that users may redistribute the program under these conditions, 110 | and telling the user how to view a copy of this License. (Exception: if the 111 | Program itself is interactive but does not normally print such an announcement, 112 | your work based on the Program is not required to print an announcement.) 113 | 114 | These requirements apply to the modified work as a whole. If identifiable 115 | sections of that work are not derived from the Program, and can be reasonably 116 | considered independent and separate works in themselves, then this License, 117 | and its terms, do not apply to those sections when you distribute them as 118 | separate works. But when you distribute the same sections as part of a whole 119 | which is a work based on the Program, the distribution of the whole must be on 120 | the terms of this License, whose permissions for other licensees extend to the 121 | entire whole, and thus to each and every part regardless of who wrote it. 122 | 123 | Thus, it is not the intent of this section to claim rights or contest your rights to 124 | work written entirely by you; rather, the intent is to exercise the right to control 125 | the distribution of derivative or collective works based on the Program. 126 | 127 | In addition, mere aggregation of another work not based on the Program with the 128 | Program (or with a work based on the Program) on a volume of a storage or 129 | distribution medium does not bring the other work under the scope of this 130 | License. 131 | 132 | 3. You may copy and distribute the Program (or a work based on it, under 133 | Section 2) in object code or executable form under the terms of Sections 1 and 2 134 | above provided that you also do one of the following: 135 | 136 | a) Accompany it with the complete corresponding machine-readable source 137 | code, which must be distributed under the terms of Sections 1 and 2 above on a 138 | medium customarily used for software interchange; or, 139 | 140 | b) Accompany it with a written offer, valid for at least three years, to give any 141 | third party, for a charge no more than your cost of physically performing source 142 | distribution, a complete machine-readable copy of the corresponding source 143 | code, to be distributed under the terms of Sections 1 and 2 above on a medium 144 | customarily used for software interchange; or, 145 | 146 | c) Accompany it with the information you received as to the offer to distribute 147 | corresponding source code. (This alternative is allowed only for noncommercial 148 | distribution and only if you received the program in object code or executable 149 | form with such an offer, in accord with Subsection b above.) 150 | 151 | The source code for a work means the preferred form of the work for making 152 | modifications to it. For an executable work, complete source code means all the 153 | source code for all modules it contains, plus any associated interface definition 154 | files, plus the scripts used to control compilation and installation of the 155 | executable. However, as a special exception, the source code distributed need 156 | not include anything that is normally distributed (in either source or binary form) 157 | with the major components (compiler, kernel, and so on) of the operating system 158 | on which the executable runs, unless that component itself accompanies the 159 | executable. 160 | 161 | If distribution of executable or object code is made by offering access to copy 162 | from a designated place, then offering equivalent access to copy the source 163 | code from the same place counts as distribution of the source code, even though 164 | third parties are not compelled to copy the source along with the object code. 165 | 166 | 4. You may not copy, modify, sublicense, or distribute the Program except as 167 | expressly provided under this License. Any attempt otherwise to copy, modify, 168 | sublicense or distribute the Program is void, and will automatically terminate 169 | your rights under this License. However, parties who have received copies, or 170 | rights, from you under this License will not have their licenses terminated so long 171 | as such parties remain in full compliance. 172 | 173 | 5. You are not required to accept this License, since you have not signed it. 174 | However, nothing else grants you permission to modify or distribute the Program 175 | or its derivative works. These actions are prohibited by law if you do not accept 176 | this License. Therefore, by modifying or distributing the Program (or any work 177 | based on the Program), you indicate your acceptance of this License to do so, 178 | and all its terms and conditions for copying, distributing or modifying the 179 | Program or works based on it. 180 | 181 | 6. Each time you redistribute the Program (or any work based on the Program), 182 | the recipient automatically receives a license from the original licensor to copy, 183 | distribute or modify the Program subject to these terms and conditions. You 184 | may not impose any further restrictions on the recipients' exercise of the rights 185 | granted herein. You are not responsible for enforcing compliance by third parties 186 | to this License. 187 | 188 | 7. If, as a consequence of a court judgment or allegation of patent infringement 189 | or for any other reason (not limited to patent issues), conditions are imposed on 190 | you (whether by court order, agreement or otherwise) that contradict the 191 | conditions of this License, they do not excuse you from the conditions of this 192 | License. If you cannot distribute so as to satisfy simultaneously your obligations 193 | under this License and any other pertinent obligations, then as a consequence 194 | you may not distribute the Program at all. For example, if a patent license would 195 | not permit royalty-free redistribution of the Program by all those who receive 196 | copies directly or indirectly through you, then the only way you could satisfy 197 | both it and this License would be to refrain entirely from distribution of the 198 | Program. 199 | 200 | If any portion of this section is held invalid or unenforceable under any particular 201 | circumstance, the balance of the section is intended to apply and the section as 202 | a whole is intended to apply in other circumstances. 203 | 204 | It is not the purpose of this section to induce you to infringe any patents or other 205 | property right claims or to contest validity of any such claims; this section has 206 | the sole purpose of protecting the integrity of the free software distribution 207 | system, which is implemented by public license practices. Many people have 208 | made generous contributions to the wide range of software distributed through 209 | that system in reliance on consistent application of that system; it is up to the 210 | author/donor to decide if he or she is willing to distribute software through any 211 | other system and a licensee cannot impose that choice. 212 | 213 | This section is intended to make thoroughly clear what is believed to be a 214 | consequence of the rest of this License. 215 | 216 | 8. If the distribution and/or use of the Program is restricted in certain countries 217 | either by patents or by copyrighted interfaces, the original copyright holder who 218 | places the Program under this License may add an explicit geographical 219 | distribution limitation excluding those countries, so that distribution is permitted 220 | only in or among countries not thus excluded. In such case, this License 221 | incorporates the limitation as if written in the body of this License. 222 | 223 | 9. The Free Software Foundation may publish revised and/or new versions of the 224 | General Public License from time to time. Such new versions will be similar in 225 | spirit to the present version, but may differ in detail to address new problems or 226 | concerns. 227 | 228 | Each version is given a distinguishing version number. If the Program specifies a 229 | version number of this License which applies to it and "any later version", you 230 | have the option of following the terms and conditions either of that version or of 231 | any later version published by the Free Software Foundation. If the Program does 232 | not specify a version number of this License, you may choose any version ever 233 | published by the Free Software Foundation. 234 | 235 | 10. If you wish to incorporate parts of the Program into other free programs 236 | whose distribution conditions are different, write to the author to ask for 237 | permission. For software which is copyrighted by the Free Software Foundation, 238 | write to the Free Software Foundation; we sometimes make exceptions for this. 239 | Our decision will be guided by the two goals of preserving the free status of all 240 | derivatives of our free software and of promoting the sharing and reuse of 241 | software generally. 242 | 243 | NO WARRANTY 244 | 245 | 11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS 246 | NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY 247 | APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE 248 | COPYRIGHT HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM 249 | "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED OR 250 | IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF 251 | MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE 252 | ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE 253 | PROGRAM IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, 254 | YOU ASSUME THE COST OF ALL NECESSARY SERVICING, REPAIR OR 255 | CORRECTION. 256 | 257 | 12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED 258 | TO IN WRITING WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY 259 | WHO MAY MODIFY AND/OR REDISTRIBUTE THE PROGRAM AS 260 | PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY 261 | GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES 262 | ARISING OUT OF THE USE OR INABILITY TO USE THE PROGRAM 263 | (INCLUDING BUT NOT LIMITED TO LOSS OF DATA OR DATA BEING 264 | RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD 265 | PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY 266 | OTHER PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS 267 | BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGES. 268 | 269 | END OF TERMS AND CONDITIONS 270 | 271 | 272 | --------------------------------------------------------------------------- 273 | 274 | The Artistic License 275 | 276 | Preamble 277 | 278 | The intent of this document is to state the conditions under which a Package 279 | may be copied, such that the Copyright Holder maintains some semblance of 280 | artistic control over the development of the package, while giving the users of the 281 | package the right to use and distribute the Package in a more-or-less customary 282 | fashion, plus the right to make reasonable modifications. 283 | 284 | Definitions: 285 | 286 | - "Package" refers to the collection of files distributed by the Copyright 287 | Holder, and derivatives of that collection of files created through textual 288 | modification. 289 | - "Standard Version" refers to such a Package if it has not been modified, 290 | or has been modified in accordance with the wishes of the Copyright 291 | Holder. 292 | - "Copyright Holder" is whoever is named in the copyright or copyrights for 293 | the package. 294 | - "You" is you, if you're thinking about copying or distributing this Package. 295 | - "Reasonable copying fee" is whatever you can justify on the basis of 296 | media cost, duplication charges, time of people involved, and so on. (You 297 | will not be required to justify it to the Copyright Holder, but only to the 298 | computing community at large as a market that must bear the fee.) 299 | - "Freely Available" means that no fee is charged for the item itself, though 300 | there may be fees involved in handling the item. It also means that 301 | recipients of the item may redistribute it under the same conditions they 302 | received it. 303 | 304 | 1. You may make and give away verbatim copies of the source form of the 305 | Standard Version of this Package without restriction, provided that you duplicate 306 | all of the original copyright notices and associated disclaimers. 307 | 308 | 2. You may apply bug fixes, portability fixes and other modifications derived from 309 | the Public Domain or from the Copyright Holder. A Package modified in such a 310 | way shall still be considered the Standard Version. 311 | 312 | 3. You may otherwise modify your copy of this Package in any way, provided 313 | that you insert a prominent notice in each changed file stating how and when 314 | you changed that file, and provided that you do at least ONE of the following: 315 | 316 | a) place your modifications in the Public Domain or otherwise 317 | make them Freely Available, such as by posting said modifications 318 | to Usenet or an equivalent medium, or placing the modifications on 319 | a major archive site such as ftp.uu.net, or by allowing the 320 | Copyright Holder to include your modifications in the Standard 321 | Version of the Package. 322 | 323 | b) use the modified Package only within your corporation or 324 | organization. 325 | 326 | c) rename any non-standard executables so the names do not 327 | conflict with standard executables, which must also be provided, 328 | and provide a separate manual page for each non-standard 329 | executable that clearly documents how it differs from the Standard 330 | Version. 331 | 332 | d) make other distribution arrangements with the Copyright Holder. 333 | 334 | 4. You may distribute the programs of this Package in object code or executable 335 | form, provided that you do at least ONE of the following: 336 | 337 | a) distribute a Standard Version of the executables and library 338 | files, together with instructions (in the manual page or equivalent) 339 | on where to get the Standard Version. 340 | 341 | b) accompany the distribution with the machine-readable source of 342 | the Package with your modifications. 343 | 344 | c) accompany any non-standard executables with their 345 | corresponding Standard Version executables, giving the 346 | non-standard executables non-standard names, and clearly 347 | documenting the differences in manual pages (or equivalent), 348 | together with instructions on where to get the Standard Version. 349 | 350 | d) make other distribution arrangements with the Copyright Holder. 351 | 352 | 5. You may charge a reasonable copying fee for any distribution of this Package. 353 | You may charge any fee you choose for support of this Package. You may not 354 | charge a fee for this Package itself. However, you may distribute this Package in 355 | aggregate with other (possibly commercial) programs as part of a larger 356 | (possibly commercial) software distribution provided that you do not advertise 357 | this Package as a product of your own. 358 | 359 | 6. The scripts and library files supplied as input to or produced as output from 360 | the programs of this Package do not automatically fall under the copyright of this 361 | Package, but belong to whomever generated them, and may be sold 362 | commercially, and may be aggregated with this Package. 363 | 364 | 7. C or perl subroutines supplied by you and linked into this Package shall not 365 | be considered part of this Package. 366 | 367 | 8. Aggregation of this Package with a commercial distribution is always permitted 368 | provided that the use of this Package is embedded; that is, when no overt attempt 369 | is made to make this Package's interfaces visible to the end user of the 370 | commercial distribution. Such use shall not be construed as a distribution of 371 | this Package. 372 | 373 | 9. The name of the Copyright Holder may not be used to endorse or promote 374 | products derived from this software without specific prior written permission. 375 | 376 | 10. THIS PACKAGE IS PROVIDED "AS IS" AND WITHOUT ANY EXPRESS OR 377 | IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED 378 | WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR A PARTICULAR 379 | PURPOSE. 380 | 381 | The End 382 | 383 | 384 | -------------------------------------------------------------------------------- /lib/IMDB/Film.pm: -------------------------------------------------------------------------------- 1 | =head1 NAME 2 | 3 | IMDB::Film - OO Perl interface to the movies database IMDB. 4 | 5 | =head1 SYNOPSIS 6 | 7 | use IMDB::Film; 8 | 9 | # 10 | # Retrieve a movie information by its IMDB code 11 | # 12 | my $imdbObj = new IMDB::Film(crit => 227445); 13 | 14 | or 15 | 16 | # 17 | # Retrieve a movie information by its title 18 | # 19 | my $imdbObj = new IMDB::Film(crit => 'Troy'); 20 | 21 | or 22 | 23 | # 24 | # Parse already stored HTML page from IMDB 25 | # 26 | my $imdbObj = new IMDB::Film(crit => 'troy.html'); 27 | 28 | if($imdbObj->status) { 29 | print "Title: ".$imdbObj->title()."\n"; 30 | print "Year: ".$imdbObj->year()."\n"; 31 | print "Plot Symmary: ".$imdbObj->plot()."\n"; 32 | } else { 33 | print "Something wrong: ".$imdbObj->error; 34 | } 35 | 36 | =head1 DESCRIPTION 37 | 38 | =head2 Overview 39 | 40 | IMDB::Film is an object-oriented interface to the IMDB. 41 | You can use that module to retrieve information about film: 42 | title, year, plot etc. 43 | 44 | =cut 45 | 46 | package IMDB::Film; 47 | 48 | use strict; 49 | use warnings; 50 | 51 | use base qw(IMDB::BaseClass); 52 | 53 | use Carp; 54 | use Data::Dumper; 55 | 56 | use fields qw( _title 57 | _kind 58 | _year 59 | _episodes 60 | _episodeof 61 | _summary 62 | _cast 63 | _directors 64 | _writers 65 | _cover 66 | _language 67 | _country 68 | _top_info 69 | _rating 70 | _genres 71 | _tagline 72 | _plot 73 | _also_known_as 74 | _certifications 75 | _duration 76 | _full_plot 77 | _trivia 78 | _goofs 79 | _awards 80 | _official_sites 81 | _release_dates 82 | _aspect_ratio 83 | _mpaa_info 84 | _company 85 | _connections 86 | _full_companies 87 | _recommendation_movies 88 | _plot_keywords 89 | _big_cover_url 90 | _big_cover_page 91 | _storyline 92 | full_plot_url 93 | ); 94 | 95 | use vars qw( $VERSION %FIELDS %FILM_CERT %FILM_KIND $PLOT_URL ); 96 | 97 | use constant CLASS_NAME => 'IMDB::Film'; 98 | use constant FORCED => 1; 99 | use constant USE_CACHE => 1; 100 | use constant DEBUG_MOD => 1; 101 | use constant EMPTY_OBJECT => 0; 102 | use constant MAIN_TAG => 'h4'; 103 | 104 | BEGIN { 105 | $VERSION = '0.51'; 106 | 107 | # Convert age gradation to the digits 108 | # TODO: Store this info into constant file 109 | %FILM_CERT = ( G => 'All', 110 | R => 16, 111 | 'NC-17' => 16, 112 | PG => 13, 113 | 'PG-13' => 13 114 | ); 115 | 116 | %FILM_KIND = ( '' => 'movie', 117 | TV => 'tv movie', 118 | V => 'video movie', 119 | mini => 'tv mini series', 120 | VG => 'video game', 121 | S => 'tv series', 122 | E => 'episode' 123 | ); 124 | } 125 | 126 | { 127 | my %_defaults = ( 128 | cache => 0, 129 | debug => 0, 130 | error => [], 131 | cache_exp => '1 h', 132 | cache_root => '/tmp', 133 | matched => [], 134 | host => 'www.imdb.com', 135 | query => 'title/tt', 136 | search => 'find?s=tt;q=', 137 | status => 0, 138 | timeout => 10, 139 | user_agent => 'Mozilla/5.0', 140 | decode_html => 1, 141 | full_plot_url => 'http://www.imdb.com/rg/title-tease/plotsummary/title/tt', 142 | _also_known_as => [], 143 | _official_sites => [], 144 | _release_dates => [], 145 | _duration => [], 146 | _top_info => [], 147 | _cast => [], 148 | ); 149 | 150 | sub _get_default_attrs { keys %_defaults } 151 | sub _get_default_value { 152 | my($self, $attr) = @_; 153 | $_defaults{$attr}; 154 | } 155 | } 156 | 157 | =head2 Constructor 158 | 159 | =over 4 160 | 161 | =item new() 162 | 163 | Object's constructor. You should pass as parameter movie title or IMDB code. 164 | 165 | my $imdb = new IMDB::Film(crit => ); 166 | 167 | or 168 | 169 | my $imdb = new IMDB::Film(crit => ); 170 | 171 | or 172 | my $imdb = new IMDB::Film(crit => ); 173 | 174 | For more infomation about base methods refer to IMDB::BaseClass. 175 | 176 | =item _init() 177 | 178 | Initialize object. 179 | 180 | =cut 181 | 182 | sub _init { 183 | my CLASS_NAME $self = shift; 184 | my %args = @_; 185 | 186 | croak "Film IMDB ID or Title should be defined!" if !$args{crit} && !$args{file}; 187 | 188 | $self->SUPER::_init(%args); 189 | 190 | $self->title(FORCED, \%args); 191 | 192 | unless($self->title) { 193 | $self->status(EMPTY_OBJECT); 194 | $self->error('Not Found'); 195 | return; 196 | } 197 | 198 | for my $prop (grep { /^_/ && 199 | !/^(_title|_code|_full_plot|_official_sites|_release_dates|_connections|_full_companies|_plot_keywords|_big_cover_url|_big_cover_page)$/ } sort keys %FIELDS) { 200 | ($prop) = $prop =~ /^_(.*)/; 201 | $self->$prop(FORCED); 202 | } 203 | } 204 | 205 | =back 206 | 207 | =head2 Options 208 | 209 | =over 4 210 | 211 | =item year 212 | 213 | Define a movie's year. It's useful to use it to get the proper movie by its title: 214 | 215 | my $imdbObj = new IMDB::Film(crit => 'Jack', year => 2003); 216 | print "Got #" . $imdbObj->code . " " . $imdbObj->title . "\n"; #0379836 217 | 218 | =item proxy 219 | 220 | defines proxy server name and port: 221 | 222 | proxy => 'http://proxy.myhost.com:80' 223 | 224 | By default object tries to get proxy from environment 225 | 226 | =item debug 227 | 228 | switches on debug mode to display useful debug messages. Can be 0 or 1 (0 by default) 229 | 230 | =item cache 231 | 232 | indicates use cache or not to store retrieved page content. Can be 0 or 1 (0 by default) 233 | 234 | =item cache_root 235 | 236 | specifies a directory to store cache data. By default it use /tmp/FileCache for *NIX OS 237 | 238 | =item cache_exp 239 | 240 | specifies an expiration time for cache. By default, it's 1 hour 241 | 242 | =item clear_cache 243 | 244 | indicates clear cached data before get request to IMDB.com or not 245 | 246 | =item timeout 247 | 248 | specifies a timeout for HTTP connection in seconds (10 sec by default) 249 | 250 | =item user_agent 251 | 252 | specifies an user agent for request ('Mozilla/5.0' by default) 253 | 254 | =item full_plot_url 255 | 256 | specifies a full plot url for specified movie 257 | 258 | =item host 259 | 260 | specifies a host name for IMDB site. By default it's www.imdb.com 261 | 262 | =item query 263 | 264 | specifies a query string to get specified movie by its ID. By defualt it's 'title/tt' 265 | 266 | =item search 267 | 268 | specifies query string to make a search movie by its title. By default it's 'find?tt=on;mx=20;q=' 269 | 270 | 271 | Example: 272 | 273 | my $imdb = new IMDB::Film( crit => 'Troy', 274 | user_agent => 'Opera/8.x', 275 | timeout => 2, 276 | debug => 1, 277 | cache => 1, 278 | cache_root => '/tmp/imdb_cache', 279 | cache_exp => '1 d', 280 | ); 281 | 282 | It'll create an object with critery 'Troy', user agent 'Opera', timeout 2 seconds, debug mode on, 283 | using cache with directory '/tmp/imdb_cache' and expiration time in 1 day. 284 | 285 | =cut 286 | 287 | sub full_plot_url { 288 | my CLASS_NAME $self = shift; 289 | if(@_) { $self->{full_plot_url} = shift } 290 | return $self->{full_plot_url} 291 | } 292 | 293 | sub fields { 294 | my CLASS_NAME $self = shift; 295 | return \%FIELDS; 296 | } 297 | 298 | =back 299 | 300 | =head2 Object Private Methods 301 | 302 | =over 4 303 | 304 | =item _search_film() 305 | 306 | Implemets functionality to search film by name. 307 | 308 | =cut 309 | 310 | sub _search_film { 311 | my CLASS_NAME $self = shift; 312 | my $args = shift || {}; 313 | 314 | return $self->SUPER::_search_results('^\/title\/tt(\d+)', '/td', $args->{year}); 315 | } 316 | 317 | =back 318 | 319 | =head2 Object Public Methods 320 | 321 | =over 4 322 | 323 | =item status() 324 | 325 | Indicates a status of IMDB object: 326 | 327 | 0 - empty object; 328 | 1 - loaded from file; 329 | 2 - loaded from internet request; 330 | 3 - loaded from cache. 331 | 332 | =item status_descr() 333 | 334 | Return a description for IMDB object status. Can be 'Empty', 'Filed', 'Fresh' and 'Cached': 335 | 336 | 337 | if($film->status) { 338 | print "This is a " . $film->status_descr . " object!"; 339 | } else { 340 | die "Cannot retrieve IMDB object!"; 341 | } 342 | 343 | =item title() 344 | 345 | Retrieve film title from film page. If was got search page instead 346 | of film page this method calls method _search_film to get list 347 | matched films and continue to process first one: 348 | 349 | my $title = $film->title(); 350 | 351 | =cut 352 | 353 | sub title { 354 | my CLASS_NAME $self = shift; 355 | my $forced = shift || 0; 356 | my $args = shift || {}; 357 | 358 | if($forced) { 359 | my $parser = $self->_parser(FORCED); 360 | 361 | $parser->get_tag('title'); 362 | my $title = $parser->get_text(); 363 | if($title =~ /imdb\s+title\s+search/i) { 364 | $self->_show_message("Go to search page ...", 'DEBUG'); 365 | $title = $self->_search_film($args); 366 | } 367 | 368 | if($title) { 369 | $self->retrieve_code($parser, 'http://www.imdb.com/title/tt(\d+)') unless $self->code; 370 | $title =~ s/\*/\\*/g; 371 | $title = $self->_decode_special_symbols($title); 372 | 373 | $self->_show_message("title: $title", 'DEBUG'); 374 | 375 | # TODO: implement parsing of TV series like ALF (TV Series 1986–1990) 376 | $title =~ s/^imdb\s+\-\s+//i; 377 | ($self->{_title}, $self->{_year}, $self->{_kind}) = $title =~ m!(.*?)\s+\((\d{4})(?:/[IVX]+)\)(?:\s\((\w*)\))?!; 378 | unless($self->{_title}) { 379 | ($self->{_title}, $self->{_kind}, $self->{_year}) = $title =~ m!(.*?)\s+\((.*?)?\s?([0-9\-]*\s?)\)!; 380 | } 381 | $self->{_kind} = 'Movie' unless $self->{_kind}; # Default kind should be movie 382 | 383 | # "The Series" An Episode (2005) 384 | # "The Series" (2005) 385 | if( $self->{_title} =~ /\"[^\"]+\"(\s+.+\s+)?/ ) { 386 | $self->{_kind} = $1 ? 'E' : 'S'; 387 | } 388 | } 389 | } 390 | 391 | return $self->{_title}; 392 | } 393 | 394 | =item kind() 395 | 396 | Get kind of movie: 397 | 398 | my $kind = $film->kind(); 399 | 400 | Possible values are: 'movie', 'tv series', 'tv mini series', 'video game', 'video movie', 'tv movie', 'episode'. 401 | 402 | =cut 403 | 404 | sub kind { 405 | my CLASS_NAME $self = shift; 406 | return exists $FILM_KIND{$self->{_kind}} ? $FILM_KIND{$self->{_kind}} : lc($self->{_kind}); 407 | } 408 | 409 | =item year() 410 | 411 | Get film year: 412 | 413 | my $year = $film->year(); 414 | 415 | =cut 416 | 417 | sub year { 418 | my CLASS_NAME $self = shift; 419 | return $self->{_year}; 420 | } 421 | 422 | =item connections() 423 | 424 | Retrieve connections for the movie as an arrays of hashes with folloeing structure 425 | 426 | { 427 | follows => [ { id => , title => , year => , ..., } ], 428 | followed_by => [ { id => , title => , year => , ..., } ], 429 | references => [ { id => , title => , year => , ..., } ], 430 | referenced_in => [ { id => , title => , year => , ..., } ], 431 | featured_in => [ { id => , title => , year => , ..., } ], 432 | spoofed_by => [ { id => , title => , year => , ..., } ], 433 | } 434 | 435 | my %connections = %{ $film->connections() }; 436 | 437 | =cut 438 | 439 | sub connections { 440 | my CLASS_NAME $self = shift; 441 | 442 | unless($self->{_connections}) { 443 | my $page; 444 | $page = $self->_cacheObj->get($self->code . '_connections') if $self->_cache; 445 | 446 | unless($page) { 447 | my $url = "http://". $self->{host} . "/" . $self->{query} . $self->code . "/movieconnections"; 448 | $self->_show_message("URL for movie connections is $url ...", 'DEBUG'); 449 | 450 | $page = $self->_get_page_from_internet($url); 451 | $self->_cacheObj->set($self->code.'_connections', $page, $self->_cache_exp) if $self->_cache; 452 | } 453 | 454 | my $parser = $self->_parser(FORCED, \$page); 455 | 456 | my $group = undef; 457 | my %result; 458 | my @lookFor = ('h5'); 459 | while (my $tag = $parser->get_tag(@lookFor)) { 460 | if ($tag->[0] eq 'h5') { 461 | $group = $parser->get_text; 462 | $group = lc($group); 463 | $group =~ s/\s+/_/g; 464 | $result{$group} = []; 465 | @lookFor = ('h5', 'a', 'hr', 'hr/'); 466 | } elsif ($tag->[0] eq 'a') { 467 | my($id) = $tag->[1]->{href} =~ /(\d+)/; 468 | my $name = $parser->get_trimmed_text; 469 | 470 | # Handle series episodes (usually in 'referenced' sections) 471 | my($series,$t,$s,$e) = ($name =~ /^"(.*?): *(.*?) *\(?#(\d+)\.(\d+)\)?"$/); 472 | $name = $series if defined $series; 473 | 474 | $tag = $parser->get_tag('/a'); 475 | my $next = $parser->get_trimmed_text(); 476 | my %film = ('id' => $id, 'title' => $name); 477 | if(defined $t) { 478 | $film{'series_title'} = $t; 479 | $film{'season'} = $s; 480 | $film{'episode'} = $e; 481 | } 482 | 483 | $film{'year'} = $1 if $next =~ /\((\d{4})\)/; 484 | next if ($next =~ /\(VG\)/); 485 | push @{$result{$group}}, \%film; 486 | } else { 487 | # Stop when we hit the divider 488 | last; 489 | } 490 | } 491 | 492 | $self->{_connections} = \%result; 493 | } 494 | 495 | return $self->{_connections}; 496 | } 497 | 498 | 499 | =item full_companies() 500 | 501 | Retrieve companies for the movie as an array where each item has following stucture: 502 | 503 | { 504 | production => [ { name => , url => , extra => } ], 505 | distributors => [ { name => , url => , extra => } ], 506 | special_effects => [ { name => , url => , extra => } ], 507 | other => [ { name => , url => , extra => } ], 508 | } 509 | 510 | my %full_companies = %{ $film->full_companies() }; 511 | 512 | =cut 513 | 514 | sub full_companies { 515 | my CLASS_NAME $self = shift; 516 | 517 | unless($self->{_full_companies}) { 518 | my $page; 519 | $page = $self->_cacheObj->get($self->code . '_full_companies') if $self->_cache; 520 | 521 | unless($page) { 522 | my $url = "http://". $self->{host} . "/" . $self->{query} . $self->code . "/companycredits"; 523 | $self->_show_message("URL for company credits is $url ...", 'DEBUG'); 524 | 525 | $page = $self->_get_page_from_internet($url); 526 | $self->_cacheObj->set($self->code.'_full_companies', $page, $self->_cache_exp) if $self->_cache; 527 | } 528 | 529 | my $parser = $self->_parser(FORCED, \$page); 530 | my $group = undef; 531 | my %result; 532 | my @lookFor = ('h2'); 533 | while (my $tag = $parser->get_tag(@lookFor)) { 534 | if ($tag->[0] eq 'h2') { 535 | $group = $parser->get_text; 536 | $group =~ s/ compan(y|ies)//i; 537 | $group =~ tr/A-Z/a-z/; 538 | $group =~ s/\s+/_/g; 539 | $result{$group} = []; 540 | @lookFor = ('h2', 'a', 'hr', 'hr/'); 541 | } elsif($tag->[0] eq 'a') { 542 | 543 | my($url) = $tag->[1]->{href}; 544 | my $name = $parser->get_trimmed_text; 545 | 546 | $tag = $parser->get_tag('/a'); 547 | my $next = $parser->get_trimmed_text(); 548 | $next =~ s/^[\t \xA0]+//; # nbsp comes out as \xA0 549 | my %company = ( 'url' => $url, 550 | 'name' => $name, 551 | 'extra' => $next ); 552 | push @{$result{$group}}, \%company; 553 | } else { 554 | # Stop when we hit the divider 555 | last; 556 | } 557 | } 558 | 559 | $self->{_full_companies} = \%result; 560 | } 561 | 562 | return $self->{_full_companies}; 563 | } 564 | 565 | =item company() 566 | 567 | Returns a list of companies given for a specified movie: 568 | 569 | my $company = $film->company(); 570 | 571 | or 572 | 573 | my @companies = $film->company(); 574 | 575 | =cut 576 | 577 | sub company { 578 | my CLASS_NAME $self = shift; 579 | 580 | unless($self->{_company}) { 581 | my @companies = split /\s?\,\s?/, $self->_get_simple_prop('Production Co'); 582 | $self->{_company} = \@companies; 583 | } 584 | 585 | return wantarray ? $self->{_company} : $self->{_company}[0]; 586 | } 587 | 588 | =item episodes() 589 | 590 | Retrieve episodes info list each element of which is hash reference for tv series - 591 | { id => , title => , season => <Season>, episode => <Episode>, date => <Date>, plot => <Plot> }: 592 | 593 | my @episodes = @{ $film->episodes() }; 594 | 595 | =cut 596 | 597 | sub episodes { 598 | my CLASS_NAME $self = shift; 599 | 600 | return if !$self->kind or $self->kind !~ /tv serie/i; 601 | 602 | unless($self->{_episodes}) { 603 | my $page; 604 | $page = $self->_cacheObj->get($self->code . '_episodes') if $self->_cache; 605 | 606 | unless($page) { 607 | my $url = "http://". $self->{host} . "/" . $self->{query} . $self->code . "/epcast"; 608 | $self->_show_message("URL for episodes is $url ...", 'DEBUG'); 609 | 610 | $page = $self->_get_page_from_internet($url); 611 | $self->_cacheObj->set($self->code.'_episodes', $page, $self->_cache_exp) if $self->_cache; 612 | } 613 | 614 | my $parser = $self->_parser(FORCED, \$page); 615 | while(my $tag = $parser->get_tag('h4')) { 616 | my $id; 617 | my($season, $episode); 618 | next unless(($season, $episode) = $parser->get_text =~ /Season\s+(.*?),\s+Episode\s+([^:]+)/); 619 | my $imdb_tag = $parser->get_tag('a'); 620 | ($id) = $imdb_tag->[1]->{href} =~ /(\d+)/ if $imdb_tag->[1]->{href}; 621 | my $title = $parser->get_trimmed_text; 622 | $parser->get_tag('b'); 623 | my($date) = $parser->get_trimmed_text; 624 | $parser->get_tag('br'); 625 | my $plot = $parser->get_trimmed_text; 626 | 627 | push @{ $self->{_episodes} }, { 628 | season => $season, 629 | episode => $episode, 630 | id => $id, 631 | title => $title, 632 | date => $date, 633 | plot => $plot 634 | }; 635 | } 636 | } 637 | 638 | return $self->{_episodes}; 639 | } 640 | 641 | =item episodeof() 642 | 643 | Retrieve parent tv series list each element of which is hash reference for episode - 644 | { id => <ID>, title => <Title>, year => <Year> }: 645 | 646 | my @tvseries = @{ $film->episodeof() }; 647 | 648 | =cut 649 | 650 | sub episodeof { 651 | my CLASS_NAME $self = shift; 652 | my $forced = shift || 0; 653 | 654 | return if !$self->kind or $self->kind ne "episode"; 655 | 656 | if($forced) { 657 | my($episodeof, $title, $year, $episode, $season, $id); 658 | my($parser) = $self->_parser(FORCED); 659 | 660 | while($parser->get_tag(MAIN_TAG)) { 661 | last if $parser->get_text =~ /^TV Series/i; 662 | } 663 | 664 | while(my $tag = $parser->get_tag('a')) { 665 | ($title, $year) = ($1, $2) if $parser->get_text =~ m!(.*?)\s+\(([\d\?]{4}).*?\)!; 666 | last unless $tag->[1]{href} =~ /title/i; 667 | ($id) = $tag->[1]{href} =~ /(\d+)/; 668 | } 669 | 670 | #start again 671 | $parser = $self->_parser(FORCED); 672 | while($parser->get_tag(MAIN_TAG)) { 673 | last if $parser->get_text =~ /^Original Air Date/i; 674 | } 675 | 676 | $parser->get_token; 677 | ($season, $episode) = $parser->get_text =~ /\(Season\s+(\d+),\s+Episode\s+(\d+)/; 678 | 679 | push @{ $self->{_episodeof} }, {title => $title, year => $year, id => $id, season => $season, episode => $episode}; 680 | } 681 | 682 | return $self->{_episodeof}; 683 | } 684 | 685 | =item cover() 686 | 687 | Retrieve url of film cover: 688 | 689 | my $cover = $film->cover(); 690 | 691 | =cut 692 | 693 | sub cover { 694 | my CLASS_NAME $self = shift; 695 | my $forced = shift || 0; 696 | 697 | if($forced) { 698 | my $parser = $self->_parser(FORCED); 699 | my $cover; 700 | 701 | my $title = quotemeta($self->title); 702 | while(my $img_tag = $parser->get_tag('img')) { 703 | $img_tag->[1]{alt} ||= ''; 704 | 705 | last if $img_tag->[1]{alt} =~ /^poster not submitted/i; 706 | 707 | if($self->_decode_special_symbols($img_tag->[1]{alt}) =~ /^($title Poster|Add a poster for $title)$/i) { 708 | $cover = $img_tag->[1]{src}; 709 | last; 710 | } 711 | } 712 | $self->{_cover} = $cover; 713 | } 714 | 715 | return $self->{_cover}; 716 | } 717 | 718 | sub top_info { 719 | my CLASS_NAME $self = shift; 720 | my $forced = shift || 0; 721 | if($forced or !$self->{'_top_info'}) { 722 | my $parser = $self->_parser(FORCED); 723 | while(my $tag = $parser->get_tag('div')) { 724 | last if $tag->[1]->{class} && $tag->[1]->{class} eq 'article highlighted'; 725 | } 726 | my $text = $parser->get_trimmed_text('span'); 727 | my @top_items = split /\s?\|\s?/, $text; 728 | $self->{_top_info} = \@top_items; 729 | } 730 | return $self->{_top_info}; 731 | } 732 | 733 | =item recommendation_movies() 734 | 735 | Return a list of recommended movies for specified one as a hash where each key is a movie ID in IMDB and 736 | value - movie's title: 737 | 738 | $recommendation_movies = $film->recommendation_movies(); 739 | 740 | For example, the list of recommended movies for Troy will be similar to that: 741 | 742 | __DATA__ 743 | $VAR1 = { 744 | '0416449' => '300', 745 | '0167260' => 'The Lord of the Rings: The Return of the King', 746 | '0442933' => 'Beowulf', 747 | '0320661' => 'Kingdom of Heaven', 748 | '0172495' => 'Gladiator' 749 | }; 750 | 751 | =cut 752 | 753 | sub recommendation_movies { 754 | my CLASS_NAME $self = shift; 755 | my $forced = shift || 0; 756 | 757 | if($forced) { 758 | my $parser = $self->_parser(FORCED); 759 | 760 | while(my $tag = $parser->get_tag('h2')) { 761 | my $text = $parser->get_text(); 762 | last if $text =~ /recommendations/i; 763 | } 764 | 765 | my %result = (); 766 | while(my $tag = $parser->get_tag()) { 767 | last if $tag->[0] eq '/table'; 768 | 769 | my $text = $parser->get_text(); 770 | if($tag->[0] eq 'a' && $text && $tag->[1]{href} =~ /tt(\d+)/) { 771 | $result{$1} = $text; 772 | } 773 | } 774 | 775 | $self->{_recommendation_movies} = \%result; 776 | } 777 | 778 | return $self->{_recommendation_movies}; 779 | } 780 | 781 | =item directors() 782 | 783 | Retrieve film directors list each element of which is hash reference - 784 | { id => <ID>, name => <Name> }: 785 | 786 | my @directors = @{ $film->directors() }; 787 | 788 | =cut 789 | 790 | sub directors { 791 | my CLASS_NAME $self = shift; 792 | my $forced = shift || 0; 793 | 794 | if($forced) { 795 | my ($parser) = $self->_parser(FORCED); 796 | my (@directors, $tag); 797 | 798 | while($tag = $parser->get_tag(MAIN_TAG)) { 799 | my $text = $parser->get_text; 800 | last if $text =~ /direct(?:ed|or)/i; 801 | } 802 | 803 | while ($tag = $parser->get_tag() ) { 804 | my $text = $parser->get_text(); 805 | 806 | last if $text =~ /^writ(?:ing|ers)/i or $tag->[0] eq '/div'; 807 | 808 | if($tag->[0] eq 'a' && $tag->[1]{href} && $text !~ /(img|more)/i) { 809 | my($id) = $tag->[1]{href} =~ /(\d+)/; 810 | push @directors, {id => $id, name => $text}; 811 | } 812 | } 813 | 814 | $self->{_directors} = \@directors; 815 | } 816 | 817 | return $self->{_directors}; 818 | } 819 | 820 | =item writers() 821 | 822 | Retrieve film writers list each element of which is hash reference - 823 | { id => <ID>, name => <Name> }: 824 | 825 | my @writers = @{ $film->writers() }; 826 | 827 | <I>Note: this method returns Writing credits from movie main page. It maybe not 828 | contain a full list!</I> 829 | 830 | =cut 831 | 832 | sub writers { 833 | my CLASS_NAME $self = shift; 834 | my $forced = shift || 0; 835 | 836 | if($forced) { 837 | my ($parser) = $self->_parser(FORCED); 838 | my (@writers, $tag); 839 | 840 | while($tag = $parser->get_tag(MAIN_TAG)) { 841 | last if $parser->get_text =~ /writ(?:ing|ers|er)/i; 842 | } 843 | 844 | while($tag = $parser->get_tag()) { 845 | my $text = $parser->get_text(); 846 | last if $tag->[0] eq '/div'; 847 | 848 | if($tag->[0] eq 'a' && $tag->[1]{href} && $text !~ /more/i && $text !~ /img/i) { 849 | if(my($id) = $tag->[1]{href} =~ /nm(\d+)/) { 850 | push @writers, {id => $id, name => $text}; 851 | } 852 | } 853 | } 854 | 855 | $self->{_writers} = \@writers; 856 | } 857 | 858 | return $self->{_writers}; 859 | } 860 | 861 | =item genres() 862 | 863 | Retrieve film genres list: 864 | 865 | my @genres = @{ $film->genres() }; 866 | 867 | =cut 868 | 869 | sub genres { 870 | my CLASS_NAME $self = shift; 871 | my $forced = shift || 0; 872 | 873 | if($forced) { 874 | my ($parser) = $self->_parser(FORCED); 875 | my (@genres); 876 | 877 | while(my $tag = $parser->get_tag(MAIN_TAG)) { 878 | last if $parser->get_text =~ /^genre/i; 879 | } 880 | 881 | while(my $tag = $parser->get_tag('a')) { 882 | my $genre = $parser->get_text; 883 | last unless $tag->[1]{href} =~ m!/genre/!i; 884 | last if $genre =~ /more/i; 885 | push @genres, $genre; 886 | } 887 | 888 | $self->{_genres} = \@genres; 889 | } 890 | 891 | return $self->{_genres}; 892 | } 893 | 894 | =item tagline() 895 | 896 | Retrieve film tagline: 897 | 898 | my $tagline = $film->tagline(); 899 | 900 | =cut 901 | 902 | sub tagline { 903 | my CLASS_NAME $self = shift; 904 | my $forced = shift || 0; 905 | 906 | if($forced) { 907 | my ($parser) = $self->_parser(FORCED); 908 | 909 | while(my $tag = $parser->get_tag(MAIN_TAG)) { 910 | last if($parser->get_text =~ /tagline/i); 911 | } 912 | 913 | $self->{_tagline} = $parser->get_trimmed_text(MAIN_TAG, 'a'); 914 | } 915 | 916 | return $self->{_tagline}; 917 | } 918 | 919 | =item plot() 920 | 921 | Returns a movie plot: 922 | 923 | my $plot = $film->plot; 924 | 925 | =cut 926 | 927 | sub plot { 928 | my CLASS_NAME $self = shift; 929 | 930 | return $self->{_plot}; 931 | } 932 | 933 | =item storyline() 934 | 935 | Retrieve film plot summary: 936 | 937 | my $storyline = $film->storyline(); 938 | 939 | =cut 940 | 941 | sub storyline { 942 | my CLASS_NAME $self = shift; 943 | my $forced = shift || 0; 944 | 945 | if($forced) { 946 | my $parser = $self->_parser(FORCED); 947 | 948 | while(my $tag = $parser->get_tag('h2')) { 949 | last if $parser->get_text =~ /^storyline$/i; 950 | } 951 | 952 | my $plot = $parser->get_trimmed_text(MAIN_TAG, 'em'); 953 | $self->{_storyline} = $self->_decode_special_symbols($plot); 954 | } 955 | 956 | return $self->{_storyline}; 957 | } 958 | 959 | =item rating() 960 | 961 | In scalar context returns film user rating, in array context returns 962 | film rating, number of votes and info about place in TOP 250 or some other TOP and avards: 963 | 964 | my $rating = $film->rating(); 965 | 966 | or 967 | 968 | my($rating, $vnum, $avards) = $film->rating(); 969 | print "RATING: $rating ($vnum votes)"; 970 | 971 | Note, that $avards is array reference where the first elemen is a TOP info if so, and the next element is other avards - Oscar, nominations and etc 972 | 973 | =cut 974 | 975 | sub rating { 976 | my CLASS_NAME $self = shift; 977 | my ($forced) = shift || 0; 978 | 979 | if($forced) { 980 | my $parser = $self->_parser(FORCED); 981 | 982 | while(my $tag = $parser->get_tag('div')) { 983 | last if $tag->[1]{class} && $tag->[1]{class} eq 'star-box-details'; 984 | } 985 | 986 | my $text = $parser->get_trimmed_text('/a'); 987 | 988 | my($rating, $val) = $text =~ m!(\d+\.?\d*)/10.*?(\d+,?\d*)!; 989 | $val =~ s/\,// if $val; 990 | 991 | $self->{_rating} = [$rating, $val, $self->top_info]; 992 | 993 | unless($self->{_plot}) { 994 | my $tag = $parser->get_tag('p'); 995 | my $text = $parser->get_trimmed_text('/p'); 996 | $self->{_plot} = $text; 997 | } 998 | } 999 | 1000 | return wantarray ? @{ $self->{_rating} } : $self->{_rating}[0]; 1001 | } 1002 | 1003 | =item cast() 1004 | 1005 | Retrieve film cast list each element of which is hash reference - 1006 | { id => <ID>, name => <Full Name>, role => <Role> }: 1007 | 1008 | my @cast = @{ $film->cast() }; 1009 | 1010 | <I> 1011 | Note: this method retrieves a cast list first billed only! 1012 | </I> 1013 | 1014 | =cut 1015 | 1016 | sub cast { 1017 | my CLASS_NAME $self = shift; 1018 | my ($forced) = shift || 0; 1019 | 1020 | if($forced) { 1021 | my (@cast, $tag, $person, $id, $role); 1022 | my $parser = $self->_parser(FORCED); 1023 | 1024 | while($tag = $parser->get_tag('table')) { 1025 | last if $tag->[1]->{class} && $tag->[1]->{class} =~ /^cast_list$/i; 1026 | } 1027 | while($tag = $parser->get_tag()) { 1028 | last if $tag->[0] eq 'a' && $tag->[1]{href} && $tag->[1]{href} =~ /fullcredits/i; 1029 | if($tag->[0] eq 'td' && $tag->[1]{class} && $tag->[1]{class} eq 'name') { 1030 | $tag = $parser->get_tag('a'); 1031 | if($tag->[1]{href} && $tag->[1]{href} =~ m#name/nm(\d+?)/#) { 1032 | $person = $parser->get_text; 1033 | $id = $1; 1034 | my $text = $parser->get_trimmed_text('/tr'); 1035 | ($role) = $text =~ /.*?\s+(.*)$/; 1036 | push @cast, {id => $id, name => $person, role => $role}; 1037 | } 1038 | } 1039 | } 1040 | 1041 | $self->{_cast} = \@cast; 1042 | } 1043 | 1044 | return $self->{_cast}; 1045 | } 1046 | 1047 | =item duration() 1048 | 1049 | In the scalar context it retrieves a film duration in minutes (the first record): 1050 | 1051 | my $duration = $film->duration(); 1052 | 1053 | In array context it retrieves all movie's durations: 1054 | 1055 | my @durations = $film->duration(); 1056 | 1057 | =cut 1058 | 1059 | sub duration { 1060 | my CLASS_NAME $self = shift; 1061 | my $forced = shift || 0; 1062 | 1063 | if($forced) { 1064 | 1065 | my $parser = $self->_parser(FORCED); 1066 | while(my $tag = $parser->get_tag(MAIN_TAG)) { 1067 | my $text = $parser->get_text(); 1068 | last if $text =~ /runtime:/i; 1069 | } 1070 | my $duration_str = $self->_decode_special_symbols($parser->get_trimmed_text(MAIN_TAG, '/div')); 1071 | my @runtime = split /\s+(\/|\|)\s+/, $duration_str; 1072 | 1073 | $self->{_duration} = \@runtime; 1074 | } 1075 | 1076 | return wantarray ? @{ $self->{_duration} } : $self->{_duration}->[0]; 1077 | } 1078 | 1079 | =item country() 1080 | 1081 | Retrieve film produced countries list: 1082 | 1083 | my $countries = $film->country(); 1084 | 1085 | =cut 1086 | 1087 | sub country { 1088 | my CLASS_NAME $self = shift; 1089 | my $forced = shift || 0; 1090 | 1091 | if($forced) { 1092 | my $parser = $self->_parser(FORCED); 1093 | while (my $tag = $parser->get_tag(MAIN_TAG)) { 1094 | last if $parser->get_text =~ /country/i; 1095 | } 1096 | 1097 | my (@countries); 1098 | while(my $tag = $parser->get_tag()) { 1099 | 1100 | if( $tag->[0] eq 'a' && $tag->[1]{href} && $tag->[1]{href} =~ m!/country/[a-z]{2}!i ) { 1101 | my $text = $parser->get_text(); 1102 | $text =~ s/\n//g; 1103 | push @countries, $text; 1104 | } 1105 | 1106 | last if $tag->[0] eq 'br'; 1107 | } 1108 | 1109 | $self->{_country} = \@countries; 1110 | } 1111 | 1112 | return $self->{_country} 1113 | } 1114 | 1115 | =item language() 1116 | 1117 | Retrieve film languages list: 1118 | 1119 | my $languages = $film->language(); 1120 | 1121 | =cut 1122 | 1123 | sub language { 1124 | my CLASS_NAME $self = shift; 1125 | my $forced = shift || 0; 1126 | 1127 | if($forced) { 1128 | my (@languages, $tag); 1129 | my $parser = $self->_parser(FORCED); 1130 | while ($tag = $parser->get_tag(MAIN_TAG)) { 1131 | last if $parser->get_text =~ /language/i; 1132 | } 1133 | 1134 | while($tag = $parser->get_tag()) { 1135 | if( $tag->[0] eq 'a' && $tag->[1]{href} && $tag->[1]{href} =~ m!/language/[a-z]{2}!i ) { 1136 | my $text = $parser->get_text(); 1137 | $text =~ s/\n//g; 1138 | push @languages, $text; 1139 | } 1140 | 1141 | last if $tag->[0] eq '/div'; 1142 | } 1143 | 1144 | $self->{_language} = \@languages; 1145 | } 1146 | 1147 | return $self->{_language}; 1148 | 1149 | } 1150 | 1151 | =item also_known_as() 1152 | 1153 | Retrieve AKA information as array, each element of which is string: 1154 | 1155 | my $aka = $film->also_known_as(); 1156 | 1157 | print map { "$_\n" } @$aka; 1158 | 1159 | =cut 1160 | 1161 | sub also_known_as { 1162 | my CLASS_NAME $self= shift; 1163 | unless($self->{_also_known_as}) { 1164 | my $parser = $self->_parser(FORCED); 1165 | 1166 | while(my $tag = $parser->get_tag(MAIN_TAG)) { 1167 | my $text = $parser->get_text(); 1168 | $self->_show_message("AKA: $text", 'DEBUG'); 1169 | last if $text =~ /^(aka|also known as)/i; 1170 | } 1171 | 1172 | my $aka = $parser->get_trimmed_text('span'); 1173 | 1174 | $self->_show_message("AKA: $aka", 'DEBUG'); 1175 | my @aka = ($aka); 1176 | $self->{_also_known_as} = \@aka; 1177 | } 1178 | 1179 | return $self->{_also_known_as}; 1180 | } 1181 | 1182 | =item trivia() 1183 | 1184 | Retrieve a movie trivia: 1185 | 1186 | my $trivia = $film->trivia(); 1187 | 1188 | =cut 1189 | 1190 | sub trivia { 1191 | my CLASS_NAME $self = shift; 1192 | 1193 | $self->{_trivia} = $self->_get_simple_prop('trivia') unless $self->{_trivia}; 1194 | return $self->{_trivia}; 1195 | } 1196 | 1197 | =item goofs() 1198 | 1199 | Retrieve a movie goofs: 1200 | 1201 | my $goofs = $film->goofs(); 1202 | 1203 | =cut 1204 | 1205 | sub goofs { 1206 | my CLASS_NAME $self = shift; 1207 | 1208 | $self->{_goofs} = $self->_get_simple_prop('goofs') unless($self->{_goofs}); 1209 | return $self->{_goofs}; 1210 | } 1211 | 1212 | =item awards() 1213 | 1214 | Retrieve a general information about movie awards like 1 win & 1 nomination: 1215 | 1216 | my $awards = $film->awards(); 1217 | 1218 | =cut 1219 | 1220 | sub awards { 1221 | my CLASS_NAME $self = shift; 1222 | 1223 | return $self->{_top_info}; 1224 | } 1225 | 1226 | =item mpaa_info() 1227 | 1228 | Return a MPAA for the specified move: 1229 | 1230 | my mpaa = $film->mpaa_info(); 1231 | 1232 | =cut 1233 | 1234 | sub mpaa_info { 1235 | my CLASS_NAME $self = shift; 1236 | unless($self->{_mpaa_info}) { 1237 | 1238 | my $parser = $self->_parser(FORCED); 1239 | 1240 | while(my $tag = $parser->get_tag(MAIN_TAG)) { 1241 | my $text = $parser->get_trimmed_text(MAIN_TAG, '/a'); 1242 | last if $text =~ /^Motion Picture Rating/i; 1243 | } 1244 | 1245 | my $mpaa = $parser->get_trimmed_text('/span'); 1246 | $mpaa =~ s/^\)\s//; 1247 | $self->{_mpaa_info} = $mpaa; 1248 | } 1249 | 1250 | return $self->{_mpaa_info}; 1251 | } 1252 | 1253 | =item aspect_ratio() 1254 | 1255 | Returns an aspect ratio of specified movie: 1256 | 1257 | my $aspect_ratio = $film->aspect_ratio(); 1258 | 1259 | =cut 1260 | 1261 | sub aspect_ratio { 1262 | my CLASS_NAME $self = shift; 1263 | 1264 | $self->{_aspect_ratio} = $self->_get_simple_prop('aspect ratio') unless $self->{_aspect_ratio}; 1265 | 1266 | return $self->{_aspect_ratio}; 1267 | } 1268 | 1269 | =item summary() 1270 | 1271 | Retrieve film user summary: 1272 | 1273 | my $descr = $film->summary(); 1274 | 1275 | =cut 1276 | 1277 | sub summary { 1278 | my CLASS_NAME $self = shift; 1279 | my $forced = shift || 0; 1280 | 1281 | if($forced) { 1282 | my($tag, $text); 1283 | my($parser) = $self->_parser(FORCED); 1284 | 1285 | while($tag = $parser->get_tag('b')) { 1286 | $text = $parser->get_text(); 1287 | last if $text =~ /^summary/i; 1288 | } 1289 | 1290 | $text = $parser->get_text('b', 'a'); 1291 | $self->{_summary} = $text; 1292 | } 1293 | 1294 | return $self->{_summary}; 1295 | } 1296 | 1297 | =item certifications() 1298 | 1299 | Retrieve list of film certifications each element of which is hash reference - 1300 | { country => certificate }: 1301 | 1302 | my @cert = $film->certifications(); 1303 | 1304 | =cut 1305 | 1306 | sub certifications { 1307 | my CLASS_NAME $self = shift; 1308 | my $forced = shift || 0; 1309 | my (%cert_list, $tag); 1310 | 1311 | if($forced) { 1312 | my $parser = $self->_parser(FORCED); 1313 | while($tag = $parser->get_tag(MAIN_TAG)) { 1314 | last if $parser->get_text =~ /certification/i; 1315 | } 1316 | 1317 | while($tag = $parser->get_tag()) { 1318 | 1319 | if($tag->[0] eq 'a' && $tag->[1]{href} && $tag->[1]{href} =~ /certificates/i) { 1320 | my $text = $parser->get_text(); 1321 | $text =~ s/\n//g; 1322 | my($country, $range) = split /\:/, $text; 1323 | $cert_list{$country} = $range; 1324 | } 1325 | 1326 | last if $tag->[0] eq '/td'; 1327 | } 1328 | 1329 | $self->{_certifications} = \%cert_list; 1330 | } 1331 | 1332 | return $self->{_certifications}; 1333 | } 1334 | 1335 | =item full_plot 1336 | 1337 | Return full movie plot. 1338 | 1339 | my $full_plot = $film->full_plot(); 1340 | 1341 | =cut 1342 | 1343 | sub full_plot { 1344 | my CLASS_NAME $self = shift; 1345 | 1346 | $self->_show_message("Getting full plot ".$self->code."; url=".$self->full_plot_url." ...", 'DEBUG'); 1347 | # 1348 | # TODO: move all methods which needed additional connection to the IMDB.com 1349 | # to the separate module. 1350 | # 1351 | unless($self->{_full_plot}) { 1352 | my $page; 1353 | $page = $self->_cacheObj->get($self->code.'_plot') if $self->_cache; 1354 | unless($page) { 1355 | my $url = $self->full_plot_url . $self->code() . '/plotsummary'; 1356 | 1357 | $self->_show_message("URL is $url ...", 'DEBUG'); 1358 | 1359 | $page = $self->_get_page_from_internet($url); 1360 | unless($page) { 1361 | return; 1362 | } 1363 | 1364 | $self->_cacheObj->set($self->code.'_plot', $page, $self->_cache_exp) if $self->_cache; 1365 | } 1366 | 1367 | my $parser = $self->_parser(FORCED, \$page); 1368 | 1369 | my($text); 1370 | while(my $tag = $parser->get_tag('p')) { 1371 | if(defined $tag->[1]{class} && $tag->[1]{class} =~ /plotpar/i) { 1372 | $text = $parser->get_trimmed_text(); 1373 | last; 1374 | } 1375 | } 1376 | 1377 | $self->{_full_plot} = $text; 1378 | } 1379 | 1380 | return $self->{_full_plot}; 1381 | } 1382 | 1383 | sub big_cover { 1384 | my CLASS_NAME $self = shift; 1385 | 1386 | unless($self->{'_big_cover_url'}) { 1387 | unless($self->{'_big_cover_page'}) { 1388 | my $parser = $self->_parser(FORCED); 1389 | my $regexp = '^/media/.+/tt' . $self->code . '$'; 1390 | while(my $tag = $parser->get_tag('a')) { 1391 | $self->_show_message("$regexp --> " . $tag->[1]->{href}, 'DEBUG'); 1392 | if($tag->[1]->{'href'} =~ m!$regexp!) { 1393 | $self->{'_big_cover_page'} = $tag->[1]->{'href'}; 1394 | last; 1395 | } 1396 | } 1397 | } 1398 | if($self->{'_big_cover_page'}) { 1399 | my $page = $self->_get_page_from_internet('http://' . $self->{'host'} . $self->{'_big_cover_page'}); 1400 | return unless $page; 1401 | 1402 | my $parser = $self->_parser(FORCED, \$page); 1403 | while(my $tag = $parser->get_tag('img')) { 1404 | if($tag->[1]->{'id'} && $tag->[1]->{'id'} eq 'primary-img') { 1405 | $self->{'_big_cover_url'} = $tag->[1]->{'src'}; 1406 | last; 1407 | } 1408 | } 1409 | } 1410 | } 1411 | 1412 | return $self->{_big_cover_url}; 1413 | } 1414 | 1415 | =item official_sites() 1416 | 1417 | Returns a list of official sites of specified movie as array reference which contains hashes 1418 | with site information - URL => Site Title: 1419 | 1420 | my $sites = $film->official_sites(); 1421 | for(@$sites) { 1422 | print "Site name - $_->{title}; url - $_->{url}\n"; 1423 | } 1424 | 1425 | =cut 1426 | 1427 | sub official_sites { 1428 | my CLASS_NAME $self = shift; 1429 | 1430 | unless($self->{_official_sites}) { 1431 | my $page; 1432 | $page = $self->_cacheObj->get($self->code . '_sites') if $self->_cache; 1433 | 1434 | unless($page) { 1435 | my $url = "http://". $self->{host} . "/" . $self->{query} . $self->code . "/officialsites"; 1436 | $self->_show_message("URL for sites is $url ...", 'DEBUG'); 1437 | 1438 | $page = $self->_get_page_from_internet($url); 1439 | 1440 | $self->_cacheObj->set($self->code.'_sites', $page, $self->_cache_exp) if $self->_cache; 1441 | } 1442 | 1443 | 1444 | my $parser = $self->_parser(FORCED, \$page); 1445 | while(my $tag = $parser->get_tag()) { 1446 | last if $tag->[0] eq 'ol'; 1447 | } 1448 | 1449 | while(my $tag = $parser->get_tag()) { 1450 | my $text = $parser->get_text(); 1451 | if($tag->[0] eq 'a' && $tag->[1]->{href} !~ /sections/i) { 1452 | push @{ $self->{_official_sites} }, { $tag->[1]->{href} => $text }; 1453 | } 1454 | 1455 | last if $tag->[0] eq '/ol' or $tag->[0] eq 'hr'; 1456 | } 1457 | } 1458 | 1459 | return $self->{_official_sites}; 1460 | } 1461 | 1462 | =item release_dates() 1463 | 1464 | Returns a list of release dates of specified movie as array reference: 1465 | 1466 | my $sites = $film->release_dates(); 1467 | for(@$sites) { 1468 | print "Country - $_->{country}; release date - $_->{date}; info - $_->{info}\n"; 1469 | } 1470 | 1471 | Option info contains additional information about release - DVD premiere, re-release, restored version etc 1472 | 1473 | =cut 1474 | 1475 | sub release_dates { 1476 | my CLASS_NAME $self = shift; 1477 | 1478 | unless($self->{_release_dates}) { 1479 | my $page; 1480 | $page = $self->_cacheObj->get($self->code . '_dates') if $self->_cache; 1481 | 1482 | unless($page) { 1483 | my $url = "http://". $self->{host} . "/" . $self->{query} . $self->code . "/releaseinfo"; 1484 | $self->_show_message("URL for sites is $url ...", 'DEBUG'); 1485 | 1486 | $page = $self->_get_page_from_internet($url); 1487 | $self->_cacheObj->set($self->code.'_dates', $page, $self->_cache_exp) if $self->_cache; 1488 | } 1489 | 1490 | my $parser = $self->_parser(FORCED, \$page); 1491 | # Searching header of release dates table 1492 | while(my $tag = $parser->get_tag('th')) { 1493 | last if $tag->[1]->{class} && $tag->[1]->{class} eq 'xxxx'; 1494 | } 1495 | 1496 | # 1497 | # The table has three columns. So we parse then one by one and grab their text 1498 | # 1499 | my $count = 0; 1500 | my @dates = (); 1501 | while(my $tag = $parser->get_tag()) { 1502 | last if $tag->[0] eq '/table'; 1503 | next unless $tag->[0] eq 'td'; 1504 | 1505 | $dates[$count] = $parser->get_trimmed_text('/td'); 1506 | 1507 | # When rish 3rd column we should store dates into object property 1508 | if(++$count > 2) { 1509 | $dates[2] =~ s/\)\s\(/, /g; 1510 | $dates[2] =~ s/(\(|\))//g; 1511 | push @{ $self->{_release_dates} }, {country => $dates[0], date => $dates[1], info => $dates[2]}; 1512 | $count = 0; 1513 | } 1514 | } 1515 | } 1516 | 1517 | return $self->{_release_dates}; 1518 | } 1519 | =item 1520 | 1521 | Retrieve a list of plot keywords as an array reference: 1522 | 1523 | my $plot_keywords = $film->plot_keywords(); 1524 | for my $keyword (@$plot_keywords) { 1525 | print "keyword: $keyword\n"; 1526 | } 1527 | 1528 | =cut 1529 | 1530 | sub plot_keywords { 1531 | my CLASS_NAME $self = shift; 1532 | 1533 | unless($self->{_plot_keywords}) { 1534 | my $page; 1535 | $page = $self->_cacheObj->get($self->code . '_keywords') if $self->_cache; 1536 | 1537 | unless($page) { 1538 | my $url = "http://". $self->{host} . "/" . $self->{query} . $self->code . "/keywords"; 1539 | $self->_show_message("URL for sites is $url ...", 'DEBUG'); 1540 | 1541 | $page = $self->_get_page_from_internet($url); 1542 | $self->_cacheObj->set($self->code.'_keywords', $page, $self->_cache_exp) if $self->_cache; 1543 | } 1544 | 1545 | my $parser = $self->_parser(FORCED, \$page); 1546 | 1547 | my @keywords = (); 1548 | while(my $tag = $parser->get_tag('a')) { 1549 | my $text = $parser->get_text(); 1550 | $text = $self->_decode_special_symbols($text); 1551 | #$self->_show_message("*** $tag->[1]->{href} --> $text ***", 'DEBUG'); 1552 | push @keywords, $text if $tag->[1]->{href} && $tag->[1]->{href} =~ m#/keyword/#; 1553 | } 1554 | 1555 | $self->{_plot_keywords} = \@keywords; 1556 | } 1557 | 1558 | return $self->{_plot_keywords}; 1559 | } 1560 | 1561 | =back 1562 | 1563 | =cut 1564 | 1565 | sub DESTROY { 1566 | my CLASS_NAME $self = shift; 1567 | } 1568 | 1569 | 1; 1570 | 1571 | __END__ 1572 | 1573 | =head2 Class Variables 1574 | 1575 | =over 4 1576 | 1577 | =item %FIELDS 1578 | 1579 | Contains list all object's properties. See description of pragma C<fields>. 1580 | 1581 | =item @FILM_CERT 1582 | 1583 | Matches USA film certification notation and age. 1584 | 1585 | =back 1586 | 1587 | =head1 EXPORTS 1588 | 1589 | Nothing 1590 | 1591 | =head1 HOWTO CACTH EXCEPTIONS 1592 | 1593 | If it's needed to get information from IMDB for a list of movies in some case it can be returned 1594 | critical error: 1595 | 1596 | [CRITICAL] Cannot retrieve page: 500 Can't read entity body ... 1597 | 1598 | To catch an exception can be used eval: 1599 | 1600 | for my $search_crit ("search_crit1", "search_crit2", ..., "search_critN") { 1601 | my $ret; 1602 | eval { 1603 | $ret = new IMDB::Film(crit => "$search_crit") || print "UNKNOWN ERROR\n"; 1604 | }; 1605 | 1606 | if($@) { 1607 | # Opsssss! We got an exception! 1608 | print "EXCEPTION: $@!"; 1609 | next; 1610 | } 1611 | } 1612 | 1613 | =head1 BUGS 1614 | 1615 | Please, send me any found bugs by email: stepanov.michael@gmail.com or create 1616 | a bug report: http://rt.cpan.org/NoAuth/Bugs.html?Dist=IMDB-Film 1617 | 1618 | =head1 SEE ALSO 1619 | 1620 | IMDB::Persons 1621 | IMDB::BaseClass 1622 | WWW::Yahoo::Movies 1623 | IMDB::Movie 1624 | HTML::TokeParser 1625 | 1626 | http://videoguide.sf.net 1627 | 1628 | =head1 AUTHOR 1629 | 1630 | Michael Stepanov AKA nite_man (stepanov.michael@gmail.com) 1631 | 1632 | =head1 COPYRIGHT 1633 | 1634 | Copyright (c) 2004 - 2007, Michael Stepanov. 1635 | This module is free software. It may be used, redistributed and/or 1636 | modified under the same terms as Perl itself. 1637 | 1638 | =cut 1639 | --------------------------------------------------------------------------------