package IMDB;

# Usage:
#
# use IMDB;
# 
# my $imdb = IMDB->new( 'debug' => 1 );
# 
# %res = $imdb->query('title' => 'star trek');
# print "--> $_ = $res{$_}\n" foreach sort keys %res;
# 
# %res = $imdb->query('title' => 'pulp fiction');
# print "--> $_ = $res{$_}\n" foreach sort keys %res;
#
############################################################
# Lates release & documentation available on
#
# http://www.infocopter.com/perl_corner/retos-imdb.htm
############################################################

use strict;
my $VERSION = '0.81.02';

use LWP::UserAgent;

my $package = __PACKAGE__;


#####  GLOBAL VARIABLES
my $IMDB_Server = "us.imdb.com";
my $Genre_Err   = '* Genre not available *';
my %Hash = ();
my $contentType = "";
my $debug = 0;

$| = 1;

#-----  FORWARD DECLARATIONS & PROTOTYPING
sub Error($);
sub Debug($);
sub int_req (%);
sub http_req (%);

sub new {
	my $type = shift;
	my %params = @_;
	my $self = {};

	$self->{'debug'} = $debug = $params{'debug'};
	Debug "$package V$VERSION" if $self->{'debug'};
	bless $self, $type;
}

sub query {
	my $self = shift;
	my %args = @_;

	Debug "$_ = $args{$_}" foreach keys %args;
	Debug "N1002: Expanding information for title '$args{'title'}'";

	%Hash = (); # clean-up global hash

	my $title_encoded = $args{'title'};
	   $title_encoded =~ tr/ /+/;


	#####  check for title matches
	#
	my $pagedata  = http_req url => "http://$IMDB_Server/Find?select=all&for=$title_encoded";
	my @res_lines = split /\n/, $$pagedata;
	my $check     = (grep { $_ =~ "'title' matches" } @res_lines)[0];

	$Hash{'count'} = $check || 0;
	$Hash{'count'} =~ s/.*<B>(\d+)?<\/B> 'title' matches.+/$1/;

	$Hash{'imdb_ref'} = $check || "";
	$Hash{'imdb_ref'} =~ s/.*<OL><LI><A HREF="\/Title\?(\d+)?\"\>.+/$1/;

	my $first_page_title = $$pagedata || "";
	   $first_page_title =~ s/.*\<TITLE>(.+)?\<\/TITLE>.*/$1/si;

	$Hash{'genre'} = "";

	#####  Transform common numbers to words to double the change
	my $title_num2word = "---";
	if ($args{'title'} =~ /\d+/) {
		$title_num2word = $args{'title'};
		$title_num2word =~ s/^7 /Seven /;
		$title_num2word =~ s/^12 /Twelve /;
	}
	$first_page_title =~  s/There's/There is/i;

	Debug "N1003: Comparing '$first_page_title' with '$args{'title'}' and '$title_num2word'...";

	if ($first_page_title =~ /$args{'title'}/i or $first_page_title =~ /$title_num2word/i) {
		#####  title matched at first try!!

		$Hash{'imdb_ref'} = (grep { $_ =~ /^cache=const=/ } @res_lines)[0];
		$Hash{'imdb_ref'} =~ s/^cache=const=(\d+)?/$1/;

		# leading zeros are necessary
		$Hash{'imdb_ref'} = substr("0000000", 0, 7 - length($Hash{'imdb_ref'}) ) . $Hash{'imdb_ref'};

		Debug "N1009: Matched at first try: " . $Hash{'imdb_ref'};

		#####  genre
		$Hash{'genre'} = (grep { $_ =~ /Genre/i } @res_lines)[0];
		$Hash{'genre'} =~ s/<[^>]+>//g; # remove remaining HTML tags
		$Hash{'genre'} =~ s/\(more\)//;
		$Hash{'genre'} =~ s/Genre://;
		$Hash{'genre'} =~ s/ *(.+).*/$1/; # remove leading blanks

		#####  directed by
		my $i = 0; for (0 .. $#res_lines) { last if $res_lines[$i++] =~ /directed by/i; }
		$Hash{'directed_by'} = $res_lines[$i];
		$Hash{'directed_by'} =~ s/<[^>]+>//g; # remove remaining HTML tags
		$Hash{'directed_by'} =~ s/\ //;
		$Hash{'directed_by'} =~ s/ *(.+).*/$1/; # remove leading blanks

		# optimize result
		$Hash{'imdb_title'} = $first_page_title;
		$Hash{'count'     } = 1; # result is unique!
	}
	else {
		Debug "N1008: Not matched.";
		if ($first_page_title =~ /title search/) {;
			Debug "N1005: Received a list from IMDB...";
			my $i = 0;
			for ($i = 0; $i < $#res_lines; $i++) {
				last if $res_lines[$i] =~ /found the following results/;
			} 
			my $index = 0;
			for ($i = $i; $i < $#res_lines; $i++) {
				last if $res_lines[$i] =~ /\<\/OL>/i;
				my $line = $res_lines[$i];
				   $line =~ s/.*<LI><A HREF="\/Title\?(\d+)">(.+)?<\/A>.*/$1\t$2/;

				$Hash{"best_ref_"   . ++$index} = $1;
				$Hash{"best_title_" .   $index} = $2;
				Debug "N1011: $line";
			}
			$Hash{'imdb_ref'  } = $Hash{'best_ref_1'  }; # assign best match
			$Hash{'imdb_title'} = $Hash{'best_title_1'}; # assign best match
		}
	}

	unless ($Hash{'genre'}) {
		#####  check for genre on the follow-up page
		#
		Debug "N1004: Check for genre on follow-up page...";
		$Hash{'imdb_ref'} ||= ""; # prevent warnings
		$pagedata  = http_req url => "http://$IMDB_Server/Title?$Hash{'imdb_ref'}";
		@res_lines = split /\n/, $$pagedata;

		#####  genre
		$check     = (grep { $_ =~ "Genre" } @res_lines)[0];
		$Hash{'genre'} = $check || $Genre_Err;
		$Hash{'genre'} =~ s/<[^>]+>//g; # remove remaining HTML tags
		$Hash{'genre'} =~ s/\(more\)//;
		$Hash{'genre'} =~ s/Genre://;
		$Hash{'genre'} =~ s/ *(.+).*/$1/; # remove leading blanks
		$Hash{'genre'} = "<i>$Hash{'genre'}</i>";
		Debug "N1010: $Hash{'genre'}";

		#####  directed by
		my $i = 0; for (0 .. $#res_lines) { last if $res_lines[$i++] =~ /directed by/i; }
		$Hash{'directed_by'} = $res_lines[$i] || "";
		$Hash{'directed_by'} =~ s/<[^>]+>//g; # remove remaining HTML tags
		$Hash{'directed_by'} =~ s/\ //;
		$Hash{'directed_by'} =~ s/ *(.+).*/$1/; # remove leading blanks
	}

	unless ($Hash{'imdb_title'}) {
		$Hash{'page_title_2'} = $$pagedata || "";
		$Hash{'page_title_2'} =~ s/.*\<TITLE>(.+)?\<\/TITLE>.*/$1/si;
	}

	$Hash{'rc'} = $Hash{'genre'} eq $Genre_Err ? -1 : 0;

	#####  trim values
	$Hash{'genre'      } = join(' ', grep({$_;} split(/ /, $Hash{'genre'      }, 0))); 
	$Hash{'directed_by'} = join(' ', grep({$_;} split(/ /, $Hash{'directed_by'}, 0))); 

	%Hash;
}

sub http_req (%) {
	my %args = @_;

        my $r  = HTTP::Request->new('GET', $args{'url'});
	my $ua = LWP::UserAgent->new(agent => "Mozilla/4.04 [en] (Win2K; I ;Nav)"); # ;-)

        my $resp = $ua->request($r);
        Debug "N1000: http code = " . $resp->code() . " -> $args{'url'}";

        Error $resp->error_as_HTML unless $resp->is_success;

        my $res = $resp->content(); # content without HTTP header

	return \$res;
}

sub Error ($) {
	print "Content-type: text/html\n\n" unless $contentType; $contentType = 1;
	print STDERR "*** ERROR at $package: $_[0]\n";
	# do not exit, it's a Module!
}

sub Debug ($) { print "[ $package ] $_[0]\n" if $debug; }

####  Used Warning / Error Codes  ##########################
#	Next free W Code: 1000
#	Next free E Code: 1000
#	Next free N Code: 1012

1;