|
Wednesday, 23 September 2009 21:44 |
#!/usr/bin/perl use HTML::TreeBuilder; use URI;
#Specify valid hosts and domains here. The script will skip these. my @domains = ( "example.com", "img.example.com", "js.example.com" );
#Parse each file passed via the command line: foreach my $file_name (@ARGV) { my $tree = HTML::TreeBuilder->new; $tree->parse_file($file_name); $tree->elementify(); @elements = $tree->find("script"); #Find each instance of the "script" tag foreach my $element (@elements) { my $src = $element->attr("src"); #Get the results of the SRC attribute if( $src ) { $url = URI->new($src); $host = $url->host; if(!(grep( /$host/i, @domains ))) { print $host; #Print just the SRC URL's Host } } } $tree = $tree->delete; #Delete the tree to start over for the next file }
|