#!/usr/bin/perl # # This perl script is intended to perform TV Show data lookups based on # the popular www.tv.com website # # # Author: Steve Adeff (steve dot adeff At gmail com) # Based on Source by: Tim Harvey, Andrei Rjeousski and Jesse Anderson # # v0.1 # - Started conversion and cleanup from original tvbatchgrab.pl # v0.2 # - Clean up code, # - Seperate tv.com specific routines # v0.3 # - Added show selection as an option # - Moved Show selection to show subroutine # - More or less completely scrapes show info # # v0.4 # - Command input changes, will take single filename as arguement # - Allow for directory in filename to further support MythTV import # - Verbosity indicators for easier logging # - Some fixes for The O.C. and multiple Writer parsing # - Cleaned up help output # # v0.5 # - Adding functions for MythTV Watch Recordings integration # # v0.6 # - Fixed response to add to db answer, now "n" won't add to the db. # - Added network name parser for possible use later? # - Place SxxExx at the beginning of sub-title. # # v0.7 # - Allow mysql.txt to be used. (Thanks Mark Spieth!) # - Allow multple file name input. (Thanks Mark Spieth!) # - Allow "SSxEE - NAME.avi". (Thanks Christoph Holzbaur!) # - Double episode (ie S02E13-14) support. (Thanks Peter Kornhuld!) # - Fix letter S in filename after "S"eason. (Thanks Christoph Holzbaur!) # # v0.8 # - Use hostname instead of MythBox. (Thanks Mark Spieth!) # - Replace if Insert fails. May need to make an option to turn # this off. (Thanks Mark Spieth!) # # v0.9 # - Option to create a symlink to to the recording. (Thanks Mark Spieth!) # - Four digit series/episode number support. (Thanks Mark Spieth!) # - # # use DBI; use LWP::Simple; # libwww-perl providing simple HTML get actions use LWP::UserAgent; # libwww-perl providing simple HTML get actions use HTML::Entities; use URI::Escape; use XML::Simple; use Getopt::Long qw(:config permute ); use Sys::Hostname; use File::Spec; use POSIX; use Date::Manip; use IO::Socket::INET; # User settings my $recgroup = "Default"; my $playgroup = "Default"; my $host = `hostname`; my $dbhost = "192.168.1.151"; my $database = "mythconverg"; my $user = "mythtv"; my $pass = "mythtv"; my $RecordFilePrefix = "/var/mythtv/recordings"; my $MasterServerIP = "localhost"; my $MasterServerPort = 6543; chomp $host; # Script Info $title = "TV.com Show Info Grabber"; $version = "v0.9"; $author = "Steven Adeff based on code by Tim Harvey, Andrei Rjeousski, Jesse Anderson"; my $hflag = ''; my $vflag = ''; my $dflag = ''; my $rflag = ''; my $mflag = ''; my $opt_v = -1; my $opt_d = 0; my $opt_l = 0; my $opt_m = 0; my $opt_t = 0; my $opt_i = -1; my $cfg_file = "$ENV{'HOME'}/.tvgrabber"; my $argc = @ARGV; if ($argc == 0) { help(); } GetOptions( "help"=> sub { help(); }, "debug"=>\$opt_d, "verbose:i"=>\$opt_v, "manual"=>\$opt_m, "test"=>\$opt_t, "import!"=>\$opt_i, "recgroup=s"=>\$recgroup, "playgroup=s"=>\$playgroup, "file=s"=>\$inputfile, "config=s"=>\$cfg_file, "link"=>\$opt_l #'<>' => \&main ); if ($inputfile) { main($inputfile); } else { main(@ARGV); } exit(0); sub main { open F, ") { /DBHostName=(\S+)/ and do { $dbhost = $1; }; /DBUserName=(\S+)/ and do { $user = $1; }; /DBName=(\S+)/ and do { $database = $1; }; /DBPassword=(\S+)/ and do { $pass = $1; }; } close F; }; getsettings(); #my ( $inputfile ) = @_; my @inputfiles = @_; $opt_v = $opt_v + 1; if ( $opt_v > 2 ) { $opt_v = 0; } if ( $opt_v == 1 ) { print "(V1) Verbose Level 1\n"; } if ( $opt_v == 2 ) { print "(V2) Verbose Level 2\n"; $opt_d = 1; } if ( $opt_d == 1 ) { print "(DD) Debugging On\n"; } if ( $opt_t == 1 ) { print "(TT) Test Mode On\n"; } if ( $opt_l == 1 ) { print "(V1) SymLink Mode On\n" if $opt_v; } if ( $opt_v ) { print "(V1) Input filename: $inputfile\n"; } my $laststate = -1; my $addedstuff = 0; while ($#inputfiles>=0) { my $inputfile = shift @inputfiles; my $filename; my $foldername = ""; if ( $inputfile =~ m/\//) { $strPos1 = rindex( $inputfile, "/" ); $foldername = substr($inputfile, 0, $strPos1 + 1); $filename = $inputfile; $filename =~ s/.*\///; if ($opt_v) { print "(V1) Folder: $foldername\n"; } } else { $filename = $inputfile; } if ($opt_v) { print "(V1) Filename: $filename\n"; } if ($opt_l) { #link file into recorded directory my $rfile = $RecordFilePrefix . "/" . $filename; #print "checking for existing link $rfile\n"; if (-l $rfile) { #print "unlinking $rfile\n"; unlink($rfile); } chomp(my $cwd = `pwd`); my $fullpath = $cwd . "/" . $inputfile; symlink $fullpath, $rfile; if ($opt_v) { print "(V1) Symlink in : $inputfile, $fullpath, $rfile\n"; } $inputfile = $filename; } @showinfo = search($filename); if ( $opt_i == -1 ) { print "Do you want to add this to the Myth Database? (y/n)\n"; chomp(my $answer = ); # $answer = ; if ($answer eq "y") { addrecord(@showinfo, $inputfile); #exit (-1); $laststate = -1; $addedstuff = 1; next; } #exit (-1); $laststate = -1; next; } if ($opt_i == 0 ) { #exit (0); $laststate = 0; next; } if ($opt_i == 1 ) { addrecord(@showinfo, $inputfile); $addedstuff = 1; $laststate = -1; #exit (-1); next; } } send_recordinglistchanged() if $addedstuff != 0; exit ($addedstuff); } sub search { my ($filename, $inputfile) = @_; if ( $opt_v ) { print "(V1) Parseing input filename: $filename\n"; } #get the show title, season, episode from filename @tempFileInfoArray = getShowDataFromName($filename); my $showname=$tempFileInfoArray[0]; my $season=$tempFileInfoArray[1]; my $episode=$tempFileInfoArray[2]; $season = int $season; my $sepisode = $episode; $episode = int $episode; if ($showname eq "") { $filename =~ /([^\.]*)\./ and do { $showname = $1; }; } if ($opt_v) { printf("(V1) Show Name: %s, Season: %s Episode: %s,\n", $showname, $season, $episode);} if ($opt_v) { print "(V1) Searching for '$showname'\n"; } #get the show results from the TV.com search @tempShowArray = getShowList($showname); if ($#tempShowArray < 0) { my ($showname, $season, $episode, $title, $firstyear, $firstmonth, $firstday, $director, $writer, $plot, $userrating, $recgroup, $playgroup, $runtime, $airtime, $endtime, $category, $inputfile) = ( ); return (); } if ($opt_m) { #Print the matches and let the user choose if ( $#tempShowArray > 0 ) { print "\n\n\nFor \"$showname\" TV.com has the following names:\n"; for $h ( 0 .. $#tempShowArray ) { $tempH = $h + 1; print "$tempH-$tempShowArray[$h][0]\n"; } print "\nPlease input corresponding movie number:"; $selectedNum = ; $selectedNum--; } else { exit 1; } } else { $selectedNum = 0; } my $workingname = $tempShowArray[$selectedNum][2]; my $showid = $tempShowArray[$selectedNum][1]; my $showtitle = $tempShowArray[$selectedNum][0]; # printf ("You Chose:\nWorking Name: %s \tShowNum: %d \tTitle: %s\n",$workingname,$showid,$showtitle); # my $epListURL = "http://www.tv.com/$workingname/show/$showid/episode_guide.html&season=0"; # printf ("Load page: %s\n",$epListURL); my @showinfo; if ($season >= 0) { #print "real data so get it\n"; @showinfo = getShowData(0,$showtitle,$showid,$workingname,$season,$episode); } if ($#showinfo < 0) { #print "no real data so get fake stuff\n"; @showinfo = getShowDataFake(0,$showtitle,$showid,$workingname,$season,$sepisode); } return @showinfo; } sub version { print "$title ($version) by $author\n" } sub info { print "Query's www.tv.com for show information.\n"; } sub usage { print "usage: $0 -hvidrmlF filename\n"; print "\tfilename format must be in:\n"; print "\tepisode.title.format.s01e01.blah.avi\t\n\tor\n"; print "\tepisode.title.format.101.blah.avi\n"; print "\n\t-h\tthis help screen\n"; print "\t-d\tturn on debugging\n"; print "\t-v\tturn on verbose\n"; print "\n"; print "\t-m\tManually choose show from query list\n"; print "\t-F\tinput_filename\n"; print "\t-l\tcreate a symlink in the recordings directory first\n"; print "\t\tRequires absolute path!\n"; print "\n\tMythTV integration options\n\n"; print "\t-import/-noimport\tforce to import or not\n"; print "\t\t\t\tdefault is to ask\n"; print "\t-t\tturn on test mode, won't touch database\n"; exit (1); } sub help { version(); info(); usage(); } sub getShowDataFromName { my ($filename) = @_; if ($opt_d) { printf("(DD) Show Name: '%s'\n", $filename);} # if there are more than one Episode in one file (Serie.S02E13-14 or Serie.02x13-14) take the first one if ( $filename =~ /[0-9][0-9]-[0-9][0-9]/) { if ($opt_v) { print "(V1) File seems to have more than one show\n"; } $filename =~ s/(\d\d)-(\d\d)/\1/g; if ($opt_v) { print "(V1) New show name: $filename\n"; } } # Convert " - " to "." as Delimiter if ( $filename =~ / - /) { if ($opt_v) { print "(V1) Show name has \" - \" as delimiter\n"; } $filename =~ s/ - /\./g; if ($opt_v) { print "(V1) New show name: $filename\n"; } } # Convert 4 number format to S00E00 format if ( $filename =~ /\.[0-9][0-9][0-9][0-9]\./) { if ($opt_v) { print "(V1) Show name is four-number format\n"; } $filename =~ s/\.(\d\d)(\d\d)\./\.S\1E\2\./g; if ($opt_v) { print "(V1) New show name: $filename\n"; } } else { # Convert 3 number format to S00E00 format if ( $filename =~ /\.[0-9][0-9][0-9]\./) { if ($opt_v) { print "(V1) Show name is three-number format\n"; } $filename =~ s/\.(\d)(\d\d)\./\.S0\1E\2\./g; if ($opt_v) { print "(V1) New show name: $filename\n"; } } } # Convert 00x00 format to S00E00. format if ( $filename =~ /\.[0-9][0-9][xX][0-9][0-9]\./) { if ($opt_v) { print "(V1) Show name is 00x00 format\n"; } $filename =~ s/\.(\d\d)([xX])(\d\d)\./\.S\1E\3\./g; if ($opt_v) { print "(V1) New show name: $filename\n"; } } if ( $filename =~ /\.[sS][0-9][0-9][eE][0-9][0-9]\./ ) { if ($opt_v) { print "(V1) Show name is S00E00 format\n"; } $fileNameType=1 } if ($opt_d) { printf("(DD) File name is type: '%s'\n", $fileNameType);} my @returnData; if ( $fileNameType == 1 ) { #Show.Name.S01E01.Episode Name.avi $filename =~ s/[sS]([0-9]+)[eE]([0-9]+)/\.S\1E\2/g; #uppercase my $strPos1 = rindex( $filename, "..S" ); my $strPos2 = index( $filename, "E", $strPos1 + 1 ); my $strPos3 = index( $filename, ".", $strPos2 + 1 ); if ($opt_d) { print "(DD) Pos1 : $strPos1 pos2: $strPos2 pos3: $strPos3\n"; } if ($filename =~ m/the.o.c./) {$filename =~ s/o.c./oc./; } # The OC check $filename =~ tr/./\ /; #Show Name $returnData[0] = substr( $filename, 0, $strPos1); #Season $returnData[1] = substr( $filename, $strPos1 + 3, $strPos2 - $strPos1 - 3 ); #Episode $returnData[2] = substr( $filename, $strPos2 + 1, $strPos3 - $strPos2 - 1 ); } else { # # example line in config file # '(Bold and the beautiful) (.*)\.' '"the ".$1' '-1' '$2' # matches "Bold and the beautiful April 13th 2006.avi" # print "reading tvgrabber config\n" if $opt_v; open CF, "< $cfg_file" and do { CFREAD: while () { chomp; next if /^\#/; print "got line $_\n" if $opt_v>1; /\'([^\']*)\'\s+\'([^\']*)\'\s+\'([^\']*)\'\s+\'([^\']*)\'/ and do { my $re = $1; my $nameexp = $2; my $seasonexp = $3; my $episodeexp = $4; print "got line $re $nameexp $seasonexp $episodeexp\n" if $opt_v>1; $filename =~ /$re/ and do { $returnData[0] = eval($nameexp); $returnData[1] = eval($seasonexp); $returnData[2] = eval($episodeexp); print "params are $returnData[0], $returnData[1], $returnData[2]\n" if $opt_v>1; last CFREAD; }; }; } } } $returnData[0] =~ s/\./ /g; if ($opt_d) { printf("(DD) Show Name: '%s', Season: '%s' Episode: '%s',\n", $returnData[0], $returnData[1], $returnData[2]);} return @returnData; } sub getShowList { my ($filename)=@_; # grab parameters $query = $filename; if ($opt_d) { printf("(DD) query: '%s'\n", $filename); } # IMDB searches do better if any trailing ,The is left off # TV.com doesn't really need it except for The O.C. ... $query =~ /(.*), The$/i; if ($1) { $query = $1; } # prepare the url $query = uri_escape($query); if ($opt_d) { printf("(DD) query: '%s'\n", $query); } # create the search results page URL my $request = "http://www.tv.com/search.php?type=11&stype=program&qs=$query&x=0&y=0"; if ($opt_d) { printf("(DD) Search Page: '%s'\n", $request); } # get the search results # my $ua = LWP::UserAgent->new; # $ua->timeout(10); # $ua->env_proxy; # my $response = $ua->get($request); # if ($response->is_success) { # print "good fetch $request ",$response->status_line,"\n"; # $response = $response->content; # } else { # print "bad fetch ",$response->status_line,"\n"; # undef $response; # } # undef $ua; my $response = ""; for (my $retries=0; $retries < 5; $retries++) { $response = get $request; last if $response ne ""; sleep(5); } if ($opt_v == 2) { print "(V2) Search Results:\n$response\n"; } # extract possible matches my $popular_results = parseBetween($response, ""); if ($opt_d) { printf ("(DD) Results \n%s", $popular_results); } # parse movie list from matches my $beg = ""; my $end = ""; my $count = 0; my @movies; my $data = $popular_results; if ($data eq "") { print "response is \n$response\n"; printf("(EE) No results found!\n"); #exit (-1); return (); } my $start = index($data, $beg); my $finish = index($data, $end, $start); my $year; my $type; my $title; while ($start != -1 && $start < length($data)) { $start += length($beg); my $entry = substr($data, $start, $finish - $start); if ($opt_v == 2) { print "(V2) Entry: $entry\n"; } $start = index($data, $beg, $finish + 1); $finish = index($data, $end, $start); my $title = ""; my $year = ""; my $type = ""; my $movienum = ""; my $workingname = ""; my $link_end = ""; $fl_end = rindex($entry, $link_end); $fl_end += length($link_end); my $lhs = substr($entry, 0, $fl_end); my $rhs = substr($entry, $fl_end); if ($$opt_v == 2) { print("Right hand side: $rhs\n"); printf ("Left hand side: %s\n", $lhs); } if ($lhs =~ m|href=\"http://www.tv.com/(.+)/show/(\d+)/.*xbig\">(.+)|i) { $workingname = $1; $movienum = $2; $title = $3; } else { if ($opt_d) { print "(DD) Unrecognized entry format\n"; } next; } # add to array if (!$skip) { $movies[$count][0] = $title; $movies[$count][1] = $movienum; $movies[$count++][2] = $workingname; # display array of values if ($opt_v) { printf("(V1) Working Name: %s \tShowNum: %d \tTitle: %s\n",$movies[$count-1][1],$movies[$count-1][1],$movies[$count-1][0]); } } } if ($opt_m) { #Print the matches and let the user choose if ( $#movies > 0 ) { print "\n\n\nFor \"$showname\" TV.com has the following names:\n"; for $h ( 0 .. $#movies ) { $tempH = $h + 1; print "$tempH-$movies[$h][0]\n"; } print "\nPlease input corresponding movie number:"; $selectedNum = ; $selectedNum--; } else { exit 1; } } else { $selectedNum = 0; } return @movies[$selectednum]; } sub getShowDataFake { my ($dbindex, $realname, $movieid, $workingname, $season, $episode) = @_; if ($opt_d) { printf("(DD) looking for movie id: '%s'\n", $movieid); } # create the season episode guide page #$guideURL = "http://www.tv.com/$workingname/show/$movieid/episode_guide.html&season=$season"; #workaround since new battlestar galactica started with the miniseries #if ( $workingname eq "battlestar-galactica-2003" ) { # if ( $season == 01 ){ # $episode = $episode + 2; # } #} #if ($opt_d) { print "(DD) Episode Guide URL: $guideURL\n"; } #my $response = get $guideURL; #if ($$opt_v == 2) { print "(V2) Episode Guide HTML:\n$response\n"; } #Strip unneeded data #$startOfData = index( $response, "div id=\"episode_guide\"" ); #$endOfData = index( $response, "div id=\"showspace_foot\" class=\"pod\"" ); #$response = substr( $response, $startOfData, $endOfData - $startOfData ); #if ($$opt_v == 2) { print "(V2) Episodes Stripped:\n$response\n"; } #seasons after one dont start with one, they start at the number of the last season #$fakeepisode = $episode; #if ( $season > 1 ) { # if ($opt_v) { print "(V1) Not First Season!\n"; } # $tempFirstSeasonEpisode = parseBetween( $response, "
", "" ); # $tempFirstSeasonEpisode = trim( parseBetween( $tempFirstSeasonEpisode, "class=\"f-big\">", "" ) ); # $firstEpisode = substr( $tempFirstSeasonEpisode, 0, index( $tempFirstSeasonEpisode, "." ) ); # #convert to number usable with TV.com # $fakeepisode = ( $firstEpisode - 1 ) + $episode; # # $fakeepisode = s/^0//; # if ($opt_d) { print "(DD) FakeEpisode: $fakeepisode\n"; } #} else { $fakeepisode = $episode; } #Strip just required Episode info #$startOfEpisodeString = index( $response, $fakeepisode . ".\n "); #if ($opt_v == 2) { print "(V2) Start of Episode String:\nsubstr($response, $startOfEpisodeString)\n"; } #$startOfEpisodeData = rindex( $response, "
", $startOfEpisodeString ); #$endOfEpisodeData = index( $response, "
", $startOfEpisodeData ); # print "Response:\n$response"; #Narrow the results down to the part we want #$response = substr( $response, $startOfEpisodeData, $endOfEpisodeData - $startOfEpisodeData ); #if ($$opt_v == 2) { print "Response:\n$response"; } # parse title #my $title = parseBetween( $response, "$fakeepisode.", " " ); my $title = $episode; #$episode = ""; $title = trim( $title ); # parse First Aired Date into Month, Day, and Year #$firstaired = trim( parseBetween( $response, "First aired: ", "
" ) ); #print "checking date $episode\n"; $firstaired = ParseDate($episode); if (defined $firstaired) { $firstaired = UnixDate($firstaired, "%m/%d/%Y"); #print "date seems ok $x : $firstaired\n"; } $firstaired = strftime "%m/%d/%Y", localtime if (!defined $firstaired); $strPoint2 = rindex( $firstaired, "/" ); $strPoint1 = index( $firstaired, "/" ); # parse year my $firstyear = substr( $firstaired, $strPoint2 + 1 ); # parse month my $firstmonth = substr( $firstaired, 0, $strPoint1 ); # parse day my $firstday = substr( $firstaired, $strPoint1 + 1, $strPoint2 - $strPoint1 - 1 ); # parse director #my $director = parseBetween($response, "Director:", "/a>"); my $director = "Unknown"; $director = stripParensFromName( parseBetween($director, ">", "<") ); # parse writer # (Note: this takes the 'first' writer, may want to include others) #my $writer = parseBetween($response, "Writer:", "Director:"); my $writer = "Unknown"; if ($writer =~ /,/) { while ($writer =~ />/ | ///; $writer =~ s/\s{3,}//; $writer =~ s/\s{2,}/ /; } } else { $writer = parseBetween($writer, ">", "<") ; $writer =~ s/\s{2,}/ /; $writer =~ s/\A\s//; } # parse plot #my $plot = parseBetween($response, "

", "

"); my $plot = ""; # parse user rating #my $userrating = parseBetween($response, "
", "/a>"); my $userrating = ""; $userrating = parseBetween($userrating, ">", "<"); #still need to parse air time and runtime from show main summary.html page! # create the Show Summary URL $summaryURL = "http://www.tv.com/$workingname/show/$movieid/summary.html"; if ($opt_d) { printf("(DD) Show Summary URL: %s\n", $summaryURL); } $summaryHTML = get $summaryURL; if ($$opt_v == 2) { printf("%s", $summaryHTML); } $summaryHTML = parseBetween ($summaryHTML, "Airs:", "

"); my $category = parseBetween($summaryHTML, "Show Category:", "
"); $strPos1 = index($category, ","); $category = substr($category, 0, $strPos1); while ($category =~ />/ | ///; $category =~ s/\s{3,}//; $category =~ s/\s{2,}/ /; } #Airtime my $runtime = parseBetween($summaryHTML, "(", " mins)"); $strPoint1 = index( $summaryHTML, "M" ); $strPoint2 = rindex( $summaryHTML, " ", $strPoint1 - 5); my $airtime = substr($summaryHTML, $strPoint2 + 1, ($strPoint1 + 1) - $strPoint2 ); $strPoint1 = index ($airtime, ":" ); $hour = substr($airtime, 0, $strPoint1 ); $minute = substr($airtime, $strPoint1 + 1, 2 ); if ($airtime =~ m/P/) { $hour = $hour + 12 }; if (($airtime =~ m/A/) && ($hour == 12)) { $hour = 00 }; $hour = 0 if ($hour == 24); $airtime = $hour . ":" . $minute . ":00"; $endminute = $minute + $runtime; $endhour = $hour; while ($endminute >= 60) { $endminute = $endminute - 60; $endhour = $endhour + 1; } if ($endhour >= 24 ) { $endhour = $endhour - 24; } if ($endminute <10 ) { $endminute = "0" . $endminute; } $endtime = $endhour . ":" . $endminute . ":00"; # Network my $network = parseBetween($summaryHTML, "on ", " ("); $strPoint1 = index( $summaryHTML, " (" ); $strPoint2 = rindex( $summaryHTML, "f-bold\">"); $network = substr($summaryHTML, $strPoint2 + 8, ($strPoint1 - 7) - $strPoint2 ); $network = trim($network); print "Show: $realname\n"; print "Network: \"$network\"\n"; print "Season: $season\n"; print "Episode: $episode\n"; if ( $opt_v | $opt_d ) { print "FakeEp: $fakeepisode\n"; } print "Title:$title\n"; print "First aired:$firstyear / $firstmonth / $firstday\n"; print "Director:$director\n"; print "Plot:\n$plot\n"; print "UserRating:$userrating\n"; print "Record Group:$recgroup\n"; print "Play Group:$playgroup\n"; print "Runtime:$runtime\n"; print "Airtime:$airtime\n"; print "Endtime:$endtime\n"; print "Writers:$writer\n"; print "Category:$category\n"; @showinfo = ($realname, $season, $episode, $title, $firstyear, $firstmonth, $firstday, $director, $writer, $plot, $userrating, $recgroup, $playgroup, $runtime, $airtime, $endtime, $category ); return @showinfo; } sub getShowData { my ($dbindex, $realname, $movieid, $workingname, $season, $episode) = @_; if ($opt_d) { printf("(DD) looking for movie id: '%s'\n", $movieid); } # create the season episode guide page $guideURL = "http://www.tv.com/$workingname/show/$movieid/episode_guide.html&season=$season"; #workaround since new battlestar galactica started with the miniseries if ( $workingname eq "battlestar-galactica-2003" ) { if ( $season == 01 ){ $episode = $episode + 2; } } if ($opt_d) { print "(DD) Episode Guide URL: $guideURL\n"; } my $response = get $guideURL; if ($$opt_v == 2) { print "(V2) Episode Guide HTML:\n$response\n"; } #Strip unneeded data $startOfData = index( $response, "div id=\"episode_guide\"" ); $endOfData = index( $response, "div id=\"showspace_foot\" class=\"pod\"" ); $response = substr( $response, $startOfData, $endOfData - $startOfData ); if ($$opt_v == 2) { print "(V2) Episodes Stripped:\n$response\n"; } #seasons after one dont start with one, they start at the number of the last season $fakeepisode = $episode; if ( $season > 1 ) { if ($opt_v) { print "(V1) Not First Season!\n"; } $tempFirstSeasonEpisode = parseBetween( $response, "

", "" ); $tempFirstSeasonEpisode = trim( parseBetween( $tempFirstSeasonEpisode, "class=\"f-big\">", "" ) ); $firstEpisode = substr( $tempFirstSeasonEpisode, 0, index( $tempFirstSeasonEpisode, "." ) ); #convert to number usable with TV.com $fakeepisode = ( $firstEpisode - 1 ) + $episode; # $fakeepisode = s/^0//; if ($opt_d) { print "(DD) FakeEpisode: $fakeepisode\n"; } } else { $fakeepisode = $episode; } #Strip just required Episode info $startOfEpisodeString = index( $response, $fakeepisode . ".\n "); if ($opt_v == 2) { print "(V2) Start of Episode String:\nsubstr($response, $startOfEpisodeString)\n"; } $startOfEpisodeData = rindex( $response, "
", $startOfEpisodeString ); $endOfEpisodeData = index( $response, "
", $startOfEpisodeData ); # print "Response:\n$response"; #Narrow the results down to the part we want $response = substr( $response, $startOfEpisodeData, $endOfEpisodeData - $startOfEpisodeData ); if ($$opt_v == 2) { print "Response:\n$response"; } # parse title my $title = parseBetween( $response, "$fakeepisode.", " " ); $title = trim( $title ); # parse First Aired Date into Month, Day, and Year $firstaired = trim( parseBetween( $response, "First aired: ", "
" ) ); $strPoint2 = rindex( $firstaired, "/" ); $strPoint1 = index( $firstaired, "/" ); # parse year my $firstyear = substr( $firstaired, $strPoint2 + 1 ); # parse month my $firstmonth = substr( $firstaired, 0, $strPoint1 ); # parse day my $firstday = substr( $firstaired, $strPoint1 + 1, $strPoint2 - $strPoint1 - 1 ); # parse director my $director = parseBetween($response, "Director:", "/a>"); $director = stripParensFromName( parseBetween($director, ">", "<") ); # parse writer # (Note: this takes the 'first' writer, may want to include others) my $writer = parseBetween($response, "Writer:", "Director:"); if ($writer =~ /,/) { while ($writer =~ />/ | ///; $writer =~ s/\s{3,}//; $writer =~ s/\s{2,}/ /; } } else { $writer = parseBetween($writer, ">", "<") ; $writer =~ s/\s{2,}/ /; $writer =~ s/\A\s//; } # parse plot my $plot = parseBetween($response, "

", "

"); # parse user rating my $userrating = parseBetween($response, "
", "/a>"); $userrating = parseBetween($userrating, ">", "<"); #still need to parse air time and runtime from show main summary.html page! # create the Show Summary URL $summaryURL = "http://www.tv.com/$workingname/show/$movieid/summary.html"; if ($opt_d) { printf("(DD) Show Summary URL: %s\n", $summaryURL); } $summaryHTML = get $summaryURL; if ($$opt_v == 2) { printf("%s", $summaryHTML); } $summaryHTML = parseBetween ($summaryHTML, "Airs:", "

"); my $category = parseBetween($summaryHTML, "Show Category:", "
"); $strPos1 = index($category, ","); $category = substr($category, 0, $strPos1); while ($category =~ />/ | ///; $category =~ s/\s{3,}//; $category =~ s/\s{2,}/ /; } #Airtime my $runtime = parseBetween($summaryHTML, "(", " mins)"); $strPoint1 = index( $summaryHTML, "M" ); $strPoint2 = rindex( $summaryHTML, " ", $strPoint1 - 5); my $airtime = substr($summaryHTML, $strPoint2 + 1, ($strPoint1 + 1) - $strPoint2 ); $strPoint1 = index ($airtime, ":" ); $hour = substr($airtime, 0, $strPoint1 ); $minute = substr($airtime, $strPoint1 + 1, 2 ); if ($airtime =~ m/P/) { $hour = $hour + 12 }; if (($airtime =~ m/A/) & ($hour == 12)) { $hour = 00 }; $airtime = $hour . ":" . $minute . ":00"; $endminute = $minute + $runtime; $endhour = $hour; while ($endminute >= 60) { $endminute = $endminute - 60; $endhour = $endhour + 1; } if ($endhour >= 24 ) { $endhour = $endhour - 24; } if ($endminute <10 ) { $endminute = "0" . $endminute; } $endtime = $endhour . ":" . $endminute . ":00"; # Network my $network = parseBetween($summaryHTML, "on ", " ("); $strPoint1 = index( $summaryHTML, " (" ); $strPoint2 = rindex( $summaryHTML, "f-bold\">"); $network = substr($summaryHTML, $strPoint2 + 8, ($strPoint1 - 7) - $strPoint2 ); $network = trim($network); print "Show: $realname\n"; print "Network: \"$network\"\n"; print "Season: $season\n"; print "Episode: $episode\n"; if ( $opt_v | $opt_d ) { print "FakeEp: $fakeepisode\n"; } print "Title:$title\n"; print "First aired:$firstyear / $firstmonth / $firstday\n"; print "Director:$director\n"; print "Plot:\n$plot\n"; print "UserRating:$userrating\n"; print "Record Group:$recgroup\n"; print "Play Group:$playgroup\n"; print "Runtime:$runtime\n"; print "Airtime:$airtime\n"; print "Endtime:$endtime\n"; print "Writers:$writer\n"; print "Category:$category\n"; @showinfo = ($realname, $season, $episode, $title, $firstyear, $firstmonth, $firstday, $director, $writer, $plot, $userrating, $recgroup, $playgroup, $runtime, $airtime, $endtime, $category ); return @showinfo; } # # # MythTV integration Functions # # sub commercialflag { my ($inputfile) = @_; print("Building a seek table should improve FF/RW and JUMP functions when watching this video\n"); print("Do you want to build a seek table for this file? (y/n): "); chomp(my $do_commflag = ); if ($do_commflag eq "y") { if (!$test_mode) { exec("mythcommflag --file $inputfile --rebuild"); } else { print("Test mode: exec would have done\n"); print(" Exec: 'mythcommflag --file $inputfile --rebuild'\n"); } } else { print("Skipping illegal file format: $show\n"); } } sub getsetting { my ($dbh, $ref, $val, $host ) = @_; my $sql = qq{ SELECT data FROM settings WHERE value = '$val' }; $sql .= " AND hostname = '$host'" if defined $host; my $sth = $dbh->prepare($sql); my $rv = $sth->execute(); my $data; if ($rv) { my @row = $sth->fetchrow_array; #print "num rows $rv,$#row, $row[0], $row[1], $row[2]\n"; # if $opt_v; $$ref = $row[0] if ($#row >= 0); } undef $sth; } sub getsettings { my $dbh = DBI->connect("dbi:mysql:database=$database:host=$dbhost","$user","$pass") or die "(EE) Cannot connect to database ($!)\n" . exit(-1); $dbh->{PrintError} = 0; #$dbh->{PrintWarn} = 1; #$dbh->{RaiseError} = 0; getsetting($dbh, \$RecordFilePrefix, "RecordFilePrefix", $host); getsetting($dbh, \$MasterServerIP, "MasterServerIP"); getsetting($dbh, \$MasterServerPort, "MasterServerPort"); $dbh->disconnect; } sub sendcmd { my ($sock, $cmd) = @_; my $l = length($cmd); $cmd = sprintf("%-8d",$l) . $cmd; #print "sending $cmd\n"; print $sock "$cmd"; $sock->flush(); $sock->recv($response,8,0); my $l = int $response; $sock->recv($response,$l,0); return $response; } sub send_recordinglistchanged { #print "notifing server of update\n"; my $sock = IO::Socket::INET->new( Proto => 'tcp', PeerAddr => $MasterServerIP, PeerPort => $MasterServerPort) or return 0; $sock->autoflush(1); binmode($sock,":utf8"); my $s = "MESSAGE[]:[]RECORDING_LIST_CHANGE"; my $response = sendcmd($sock, "ANN Playback tvgrabber 0"); #my $response = sendcmd($sock, "ANN SlaveBackend tvgrabber localhost"); #print "ANN resp was '$response'\n"; $response = sendcmd($sock, $s); #print "response was '$response'\n"; $response = sendcmd($sock, "DONE"); #print "DONE resp was '$response'\n"; undef $sock; } ## add records to db sub addrecord { my @showdata = @_; my ($showname, $season, $episode, $title, $firstyear, $firstmonth, $firstday, $director, $writer, $plot, $userrating, $recgroup, $playgroup, $runtime, $airtime, $endtime, $category, $inputfile) = @showdata; if ($opt_v) { print "(V1) Going to add '$showname - $title' to database!\n"; } my $chanid = 0; my $starttime = $firstyear . "-" . $firstmonth . "-" . $firstday . " " . $airtime; my $endtime = $firstyear . "-" . $firstmonth . "-" . $firstday . " " . $endtime; my $originalairdate = $firstyear . "-" . $firstmonth . "-" . $firstday; my $autoexpire = 0; my $programid; if (($season+0) >= 0) { if ($season < 10 ) { $season = "0" . $season; } if ($episode < 10 ) { $episode = "0" . $episode; } $programid = "S" . $season . "E" . $episode; $title = "s" . $season . "e" . $episode . " " . $title } else { $title = $title; $programid = $title; } if ($opt_t == 0) { print "Inserting \'$showname - $title\' into MythTV Database\n"; my $dbh = DBI->connect("dbi:mysql:database=$database:host=$dbhost","$user","$pass") or die "(EE) Cannot connect to database ($!)\n" . exit(-1); $dbh->{PrintError} = 0; #$dbh->{PrintWarn} = 1; #$dbh->{RaiseError} = 0; my $sth; my $i = "INSERT INTO recorded (chanid, starttime, endtime, title, subtitle, description, category, hostname, autoexpire, recgroup, playgroup, programid, originalairdate, basename, progstart, progend) VALUES ((?), (?), (?), (?), (?), (?), (?), (?), (?), (?), (?), (?), (?), (?), (?), (?))"; $sth = $dbh->prepare($i); my $rv = $sth->execute($chanid, $starttime, $endtime, $showname, $title, $plot, $category, $host, $autoexpire, $recgroup, $playgroup, $programid, $originalairdate, $inputfile, $starttime, $endtime); if ($rv == 0) { print "Found, Replacing instead\n"; my $i = "REPLACE recorded SET chanid = (?), starttime = (?), endtime = (?), title = (?), subtitle = (?), description = (?), category = (?), hostname = (?), autoexpire = (?), recgroup = (?), playgroup = (?), programid = (?), originalairdate = (?), basename = (?), progstart = (?), progend = (?)"; $sth = $dbh->prepare($i); $rv = $sth->execute($chanid, $starttime, $endtime, $showname, $title, $plot, $category, $host, $autoexpire, $recgroup, $playgroup, $programid, $originalairdate, $inputfile, $starttime, $endtime); } if ($rv == 0) { print "Could not execute ($i) ",$sth->errstr,"\n"; $dbh->disconnect; return 0; }; $dbh->disconnect; print "Database updated!\n"; return 1; } else { print("Test mode: insert would have been done\n"); print(" Query: '$i'\n"); print(" Query params: $plot, $inputfile\n"); return 1; } } # # # Generic Functions # # sub parseBetween { my ($data, $beg, $end)=@_; # grab parameters my $ldata = lc($data); my $start = index($ldata, lc($beg)) + length($beg); my $finish = index($ldata, lc($end), $start); if ($start != (length($beg) -1) && $finish != -1) { my $result = substr($data, $start, $finish - $start); # return w/ decoded numeric character references # (see http://www.w3.org/TR/html4/charset.html#h-5.3.1) decode_entities($result); return $result; } return ""; } sub trim { my $string = shift; for ($string) { s/^\s+//; s/\s+$//; } return $string; } sub stripParensFromName { my ($parensName) = @_; # grab parameters if ( rindex( $parensName, "(" ) != -1 ) { return trim( substr( $parensName, 0, rindex( $parensName, "(" ) ) ); } else { return trim( $parensName ); } }