#!/usr/bin/perl # Streetwikibot V1.1 # Author Philippe Schelté # Licensed under the MIT license: http://www.opensource.org/licenses/mit-license.php use warnings; use strict; use LWP::Simple; use LWP::UserAgent; use HTTP::Cookies; use MediaWiki::API; use Getopt::Std; use Data::Dumper; # options getopts ("l:n:p:t:u:"); our ($opt_n,$opt_p,$opt_t,$opt_u); if (!$opt_u) { display_help(); } # configuration our $BASE_DIR = "/var/mercurial/metagov/streetwiki"; our $EXPORT; unless (-e $BASE_DIR) { mkdir($BASE_DIR,0700) || die "Can't create $BASE_DIR : $!";; } our $BASE_URL = $opt_u; our ($NUM_PAGES,$NUM_TURNS,$NS_NUMS,$LIMIT); if ($opt_p) { $NUM_PAGES = $opt_p; } else { $NUM_PAGES = 500; } if ($opt_t) { $NUM_TURNS = $opt_t; } else { $NUM_TURNS = 20; } if ($opt_n) { $NS_NUMS = $opt_n; } else { $NS_NUMS = 10; } # end of configuration open (LOGS, ">>$BASE_DIR/$BASE_URL.log"); select((select(LOGS), $|=1)[0]); my $mw = MediaWiki::API->new(); $mw->{config}->{api_url} = "http://$BASE_URL/w/api.php"; my @ns_nums = split(/,/,$NS_NUMS); foreach (@ns_nums) { get_pages($mw,$_); } my $xml_content; $xml_content = send_request("w/index.php?title=Special:Export&pages=$EXPORT&action=submit","Everything saved into $BASE_URL.xml",0); if ($xml_content) { my $time = time; open(my $xml_file,'>',"$BASE_DIR/$BASE_URL.xml") || die "Can't open : $BASE_DIR/$BASE_URL.xml $!"; print $xml_file $xml_content; close($xml_file); } else { exit 0; } sub display_help { print 'StreetWikiBot Version 1.1 usage : ./streetwikibot.pl -u okidoke.referata.com -n 10,102 -p 500 -t 20 -l 0 options: -u hostname # hostname or ip of the streetwiki -n namespace # number of the namespace to backup. To backup several namespaces separate the numbers by semicolon without spaces (default 10) -p number_of_pages # the number of pages to retrieve by each iteration (default 500) -t number_of_turns # numbers of iteration to do (default 20) -l limit # limit the revision fetch (default 0 only the last revision) -h # display this help '; exit 0; } sub get_pages { my ($mw,$ns_num) = @_; log_it("Using namespace number : ".$ns_num); $mw->list ( { action => 'query', list => 'allpages', apnamespace => $ns_num, aplimit=> $NUM_PAGES }, { max => $NUM_TURNS, hook => \&loop_pages, skip_encodings => 1 } ) || die log_it($mw->{error}->{code} . ': ' . $mw->{error}->{details}); } sub loop_pages { my ($ref) = @_; foreach (@$ref) { if ($_->{title} ne '%s') { $EXPORT.= $_->{title}.'%0A'; log_it("fetching ".$_->{title}); } } } sub log_it { my($comment) = @_; my $date_log = localtime(time()); my $log_line = "$date_log: $comment\n"; print LOGS $log_line; } sub send_request { my ($path,$comment,$post) = @_; my $ua = LWP::UserAgent->new; $path =~ s/ /_/g; $ua->agent("Streetwikibot/1.1" . $ua->agent); $ua->cookie_jar(HTTP::Cookies->new(file => "$BASE_DIR/$BASE_URL.cookies", autosave => 1)); $ua->requests_redirectable; my ($req,$res); if ($post) { $req = HTTP::Request->new(POST => "http://$BASE_URL/$path"); $req->content_type('application/x-www-form-urlencoded'); $req->content($post); } else { $req = HTTP::Request->new(GET => "http://$BASE_URL/$path"); } $res = $ua->request($req); if ($res->is_success) { log_it($comment) unless ($comment eq ""); return $res->decoded_content; } else { log_it("Error on $comment : ".$res->status_line); return 0; } } close(LOGS); exit 0;