Perl Internet Rake

This is my solution to raking the internet with Perl. Just populate the array with the extension you want. I use the dark art of Google hacks to get the information back.

There is a two part loop running off this. Part one build a list of Url where the data can be collected Hash of Array using the extension as the base. Stage two of the loop builds a list of links for the data then starts downloading.

#! /usr/perl -w

use Google::Search;
use LWP::Simple;
use HTML::LinkExtor;
use Cwd;

#file extension array
my @extentions    = qw(xls xlsx doc docx jpeg gif png);//ext to get
my %searchResults = ();
my %downloadlist  = ();
my @linklist      = ();

#file reanme propertys

my $dir    = '/Users/Robert/workspace/perltest/dir1';
my $cwd    = getcwd();
print $cwd;
my $LinkExtor = HTML::LinkExtor->new( \%links );

#search for extenstions and build a hash of hash
foreach $ext (@extentions) {

	my $search = Google::Search->Web( query => "indexof $ext" );
	while ( my $result = $search->next ) {

		#list the url of search
		print "working with extension: $ext " . $result->rank,
		  " the search address ", $result->uri, "\n";
		$URL = get( $result->uri );

		$LinkExtor = HTML::LinkExtor->new( \&links );

		#use the LinkExtor Sub
		$LinkExtor->parse($URL);

		#build Hash table.
		$downloadlist{$ext}{ $result->uri } = [@linklist]
		  #if ( $#linklist >= 0 );
	}

	#build a direcotry for the downloads lable by extention
	mkdir $ext, 0777 unless -d $ext;

}
print "-----------------------------------------------------\n";
print "finish building table of links now for the down load \n";
print "-----------------------------------------------------\n";

#download the files using the hash of hash table
#{extention}->{uri}->{document}
foreach $exten ( keys(%downloadlist) ) {

	#exten is the extion we are working with ie doc
	print "working on extention $exten";
	foreach $url ( keys( %{ $downloadlist{$exten} } ) ) {
		print "\t|-- $url\n";

		#elm means the file name
		foreach $elm ( @{ $downloadlist{$exten}{$url} } ) {

			#concat the url with elment to make the full downloadable link
			$downloadURI = $url . $elm;

			#check to see if we have the file befor downloading

			if ( -e "$exten/$elm" ) {
				

				#file download check
				print "got the file: $elm already\n";
			}
			else {
				my $status = getstore( $downloadURI, "$exten/$elm" );

				if ( is_success($status) ) {
					print "got a file from \t\t|-- $url$elm\n";
				}
				else {

					#http status code 404?
					print "Couldn't retrieve page: $status\n $downloadURI\n";
				}    #end of download

			}    #end of file check
		}    #end of loop
	}
}

print "Finish!!!!!!!!!!!";
sub links {
	( $tag, %links ) = @_;
	if ( $tag eq "a" ) {
		foreach $key ( keys %links ) {
			if ( $links{$key} =~ m/.$ext$/ ) {
				print "\t\t---->Link Found at ", $links{$key}, "\n  ";
				push( @linklist, $links{$key} );
			}    #end if
		}    #end foreach
	}    #end if
}    #end sub