#!/usr/bin/perl use LWP::UserAgent; use HTTP::Request::Common qw(GET POST); use HTML::Parser; use URI; use HTML::Entities;
use constant MAINPAGE => 'http://en.wikipedia.org/wiki/Wikipedia:Tutorial_%28Keep_in_mind%29/sandbox'; use constant EDITPAGE => 'http://en.wikipedia.org/w/index.php' . '?title=Wikipedia:Tutorial_%28Keep_in_mind%29/sandbox';
# These are form inputs we care about on the edit page my @wpTags = qw(wpEditToken wpAutoSummary wpStarttime wpEdittime wpSave );
sub findPageData { my ( $self, $tag, $attr ) = @_; # signal to the endHandler handler if we find the text if ( $attr->{name} eq "wpTextbox1" ) { $main::wpTextboxFound = 1; return; } elsif ( grep( /$attr->{name}/, @wpTags ) > 0 ) { # if it's one of the form parameters we care about, # record the parameter's value for use in our submission later. $main::parms{ $attr->{name} } = $attr->{value}; return; } }
# This is called on closing tags like sub endHandler { next unless $main::wpTextboxFound; my ( $self, $tag, $attr, $skipped ) = @_; if ( $tag eq "textarea" ) { $main::parms{"wpTextbox1"} = $skipped; undef $main::wpTextboxFound; } }
sub checkError { my $resp = shift; if ( ( $resp->code() < 200 ) || ( $resp->code() >= 400 ) ) { print "Error: " . $resp->status_line . "\n"; exit 1; } }
### ### MAIN ###
# First, fetch the main wikipedia sandbox page. This just confirms # our connectivity and makes sure it really works. $UA = LWP::UserAgent->new(); $req = HTTP::Request->new( GET => MAINPAGE ); $resp = $UA->request($req);
checkError($resp);
# Now fetch the edit version of that page $req->uri( EDITPAGE . '&action=edit' ); $resp = $UA->request($req);
checkError($resp);
# Build a parser to parse the edit page and find the text on it. my $p = HTML::Parser->new( api_version => 3, start_h => [ \&findPageData, "self,tagname,attr" ], end_h => [ \&endHandler, "self,tagname,attr,skipped_text" ], unbroken_text => 1, attr_encoded => 0, report_tags => [qw(textarea input)] ); $p->parse( $resp->content ); $p->eof;
# The text will have entities encoded (e.g., < instead of <) # We have to decode them and submit raw characters. $main::parms{wpTextbox1} = decode_entities($main::parms{wpTextbox1});
# make our trivial edit. append text to whatever was already there. $main::parms{wpTextbox1} .= "\r\n\r\n===Test 1===\r\n\r\n" . "ISBN: 9780596514839\r\n\r\nThis is a test.\r\n\r\n";
# POST our edit $req = HTTP::Request::Common::POST( EDITPAGE, Content_Type => 'form-data', Content => \%main::parms ); $req->uri( EDITPAGE . '&action=submit' );
$resp = $UA->request($req); checkError($resp); # We expect a 302 redirection if it is successful.
|