#!/usr/bin/perl
# Streetwikibot V1.1
# Author Philippe Schelté <pschelte@migratis.net>
# Licensed under the MIT license: http://www.opensource.org/licenses/mit-license.php
use warnings;
use strict;
use LWP::Simple;
use LWP::UserAgent;
use HTTP::Cookies;
use MediaWiki::API;
use Getopt::Std;
use Data::Dumper;
# options
getopts ("l:n:p:t:u:");
our ($opt_n,$opt_p,$opt_t,$opt_u);
if (!$opt_u)
{
display_help();
}
# configuration
our $BASE_DIR = "/var/mercurial/metagov/streetwiki";
our $EXPORT;
unless (-e $BASE_DIR)
{
mkdir($BASE_DIR,0700) || die "Can't create $BASE_DIR : $!";;
}
our $BASE_URL = $opt_u;
our ($NUM_PAGES,$NUM_TURNS,$NS_NUMS,$LIMIT);
if ($opt_p) { $NUM_PAGES = $opt_p; }
else { $NUM_PAGES = 500; }
if ($opt_t) { $NUM_TURNS = $opt_t; }
else { $NUM_TURNS = 20; }
if ($opt_n) { $NS_NUMS = $opt_n; }
else { $NS_NUMS = 10; }
# end of configuration
open (LOGS, ">>$BASE_DIR/$BASE_URL.log");
select((select(LOGS), $|=1)[0]);
my $mw = MediaWiki::API->new();
$mw->{config}->{api_url} = "http://$BASE_URL/w/api.php";
my @ns_nums = split(/,/,$NS_NUMS);
foreach (@ns_nums)
{
get_pages($mw,$_);
}
my $xml_content;
$xml_content = send_request("w/index.php?title=Special:Export&pages=$EXPORT&action=submit","Everything saved into $BASE_URL.xml",0);
if ($xml_content)
{
my $time = time;
open(my $xml_file,'>',"$BASE_DIR/$BASE_URL.xml") || die "Can't open : $BASE_DIR/$BASE_URL.xml $!";
print $xml_file $xml_content;
close($xml_file);
} else {
exit 0;
}
sub display_help
{
print 'StreetWikiBot Version 1.1
usage :
./streetwikibot.pl -u okidoke.referata.com -n 10,102 -p 500 -t 20 -l 0
options:
-u hostname # hostname or ip of the streetwiki
-n namespace # number of the namespace to backup. To backup several namespaces separate the numbers by semicolon without spaces (default 10)
-p number_of_pages # the number of pages to retrieve by each iteration (default 500)
-t number_of_turns # numbers of iteration to do (default 20)
-l limit # limit the revision fetch (default 0 only the last revision)
-h # display this help
';
exit 0;
}
sub get_pages
{
my ($mw,$ns_num) = @_;
log_it("Using namespace number : ".$ns_num);
$mw->list ( { action => 'query',
list => 'allpages',
apnamespace => $ns_num,
aplimit=> $NUM_PAGES },
{ max => $NUM_TURNS, hook => \&loop_pages, skip_encodings => 1 } )
|| die log_it($mw->{error}->{code} . ': ' . $mw->{error}->{details});
}
sub loop_pages
{
my ($ref) = @_;
foreach (@$ref) {
if ($_->{title} ne '%s')
{
$EXPORT.= $_->{title}.'%0A';
log_it("fetching ".$_->{title});
}
}
}
sub log_it
{
my($comment) = @_;
my $date_log = localtime(time());
my $log_line = "$date_log: $comment\n";
print LOGS $log_line;
}
sub send_request
{
my ($path,$comment,$post) = @_;
my $ua = LWP::UserAgent->new;
$path =~ s/ /_/g;
$ua->agent("Streetwikibot/1.1" . $ua->agent);
$ua->cookie_jar(HTTP::Cookies->new(file => "$BASE_DIR/$BASE_URL.cookies", autosave => 1));
$ua->requests_redirectable;
my ($req,$res);
if ($post)
{
$req = HTTP::Request->new(POST => "http://$BASE_URL/$path");
$req->content_type('application/x-www-form-urlencoded');
$req->content($post);
}
else
{
$req = HTTP::Request->new(GET => "http://$BASE_URL/$path");
}
$res = $ua->request($req);
if ($res->is_success)
{
log_it($comment) unless ($comment eq "");
return $res->decoded_content;
}
else
{
log_it("Error on $comment : ".$res->status_line);
return 0;
}
}
close(LOGS);
exit 0;