Perl: recipe_8.11.pl PDF Print E-mail
Wednesday, 23 September 2009 21:39
#!/usr/bin/perl
use LWP::UserAgent;
use HTTP::Request::Common qw(GET POST);
use HTML::Parser; use URI;
use HTML::Entities;

use constant MAINPAGE =>
'http://en.wikipedia.org/wiki/Wikipedia:Tutorial_%28Keep_in_mind%29/sandbox';
use constant EDITPAGE => 'http://en.wikipedia.org/w/index.php'
. '?title=Wikipedia:Tutorial_%28Keep_in_mind%29/sandbox';

# These are form inputs we care about on the edit page
my @wpTags = qw(wpEditToken wpAutoSummary wpStarttime wpEdittime wpSave );

sub findPageData {
my ( $self, $tag, $attr ) = @_;
# signal to the endHandler handler if we find the text
if ( $attr->{name} eq "wpTextbox1" ) {
$main::wpTextboxFound = 1;
return;
}
elsif ( grep( /$attr->{name}/, @wpTags ) > 0 ) {
# if it's one of the form parameters we care about,
# record the parameter's value for use in our submission later.
$main::parms{ $attr->{name} } = $attr->{value};
return;
}
}

# This is called on closing tags like
sub endHandler {
next unless $main::wpTextboxFound;
my ( $self, $tag, $attr, $skipped ) = @_;
if ( $tag eq "textarea" ) {
$main::parms{"wpTextbox1"} = $skipped;
undef $main::wpTextboxFound;
}
}

sub checkError {
my $resp = shift;
if ( ( $resp->code() < 200 ) || ( $resp->code() >= 400 ) ) {
print "Error: " . $resp->status_line . "\n";
exit 1;
}
}

###
### MAIN
###

# First, fetch the main wikipedia sandbox page. This just confirms
# our connectivity and makes sure it really works.
$UA = LWP::UserAgent->new();
$req = HTTP::Request->new( GET => MAINPAGE );
$resp = $UA->request($req);

checkError($resp);

# Now fetch the edit version of that page
$req->uri( EDITPAGE . '&action=edit' );
$resp = $UA->request($req);

checkError($resp);

# Build a parser to parse the edit page and find the text on it.
my $p = HTML::Parser->new(
api_version => 3,
start_h => [ \&findPageData, "self,tagname,attr" ],
end_h => [ \&endHandler, "self,tagname,attr,skipped_text" ],
unbroken_text => 1,
attr_encoded => 0,
report_tags => [qw(textarea input)]
);
$p->parse( $resp->content );
$p->eof;

# The text will have entities encoded (e.g., < instead of <)
# We have to decode them and submit raw characters.
$main::parms{wpTextbox1} = decode_entities($main::parms{wpTextbox1});

# make our trivial edit. append text to whatever was already there.
$main::parms{wpTextbox1} .= "\r\n\r\n===Test 1===\r\n\r\n"
. "ISBN: 9780596514839\r\n\r\nThis is a test.\r\n\r\n";

# POST our edit
$req = HTTP::Request::Common::POST(
EDITPAGE,
Content_Type => 'form-data',
Content => \%main::parms
);
$req->uri( EDITPAGE . '&action=submit' );

$resp = $UA->request($req);
checkError($resp);
# We expect a 302 redirection if it is successful.