Perl: script_finder.pl PDF Print E-mail
Wednesday, 23 September 2009 21:44
#!/usr/bin/perl
use HTML::TreeBuilder;
use URI;

#Specify valid hosts and domains here. The script will skip these.
my @domains = ( "example.com",
"img.example.com",
"js.example.com" );

#Parse each file passed via the command line:
foreach my $file_name (@ARGV) {
my $tree = HTML::TreeBuilder->new;
$tree->parse_file($file_name);
$tree->elementify();
@elements = $tree->find("script"); #Find each instance of the "script" tag
foreach my $element (@elements) {
my $src = $element->attr("src"); #Get the results of the SRC attribute
if( $src ) {
$url = URI->new($src);
$host = $url->host;
if(!(grep( /$host/i, @domains ))) {
print $host; #Print just the SRC URL's Host
}
}
}
$tree = $tree->delete; #Delete the tree to start over for the next file
}