#!/usr/bin/perl
# Streetwikibot V1.1
# Author Philippe Schelté <pschelte@migratis.net> 
# Licensed under the MIT license: http://www.opensource.org/licenses/mit-license.php 

use warnings;
use strict;
use LWP::Simple;
use LWP::UserAgent;
use HTTP::Cookies;
use MediaWiki::API;
use Getopt::Std;
use Data::Dumper;

# options

getopts ("l:n:p:t:u:");

our ($opt_n,$opt_p,$opt_t,$opt_u);
if (!$opt_u) 
{
    display_help();
}

# configuration

our $BASE_DIR = "/var/mercurial/metagov/streetwiki";
our $EXPORT;

unless (-e $BASE_DIR)
{
	mkdir($BASE_DIR,0700) || die "Can't create $BASE_DIR : $!";;
}

our $BASE_URL = $opt_u;
our ($NUM_PAGES,$NUM_TURNS,$NS_NUMS,$LIMIT);
if ($opt_p) { $NUM_PAGES = $opt_p; }
else { $NUM_PAGES = 500; }
if ($opt_t) { $NUM_TURNS = $opt_t; }
else { $NUM_TURNS = 20; }
if ($opt_n) { $NS_NUMS = $opt_n; }
else { $NS_NUMS = 10; }

# end of configuration

open (LOGS, ">>$BASE_DIR/$BASE_URL.log");
select((select(LOGS), $|=1)[0]);

my $mw = MediaWiki::API->new();
$mw->{config}->{api_url} = "http://$BASE_URL/w/api.php";

my @ns_nums = split(/,/,$NS_NUMS);
foreach (@ns_nums)
{
    get_pages($mw,$_);
}

my $xml_content;
$xml_content = send_request("w/index.php?title=Special:Export&pages=$EXPORT&action=submit","Everything saved into $BASE_URL.xml",0);
if ($xml_content)
{
    my $time = time;
    open(my $xml_file,'>',"$BASE_DIR/$BASE_URL.xml") || die "Can't open : $BASE_DIR/$BASE_URL.xml $!";
    print $xml_file $xml_content;
    close($xml_file);
} else {
    exit 0;   
}

sub display_help
{
    print 'StreetWikiBot Version 1.1
usage : 
    ./streetwikibot.pl -u okidoke.referata.com -n 10,102 -p 500 -t 20 -l 0
options:
    -u hostname # hostname or ip of the streetwiki
    -n namespace # number of the namespace to backup. To backup several namespaces separate the numbers by semicolon without spaces (default 10)
    -p number_of_pages # the number of pages to retrieve by each iteration (default 500)
    -t number_of_turns # numbers of iteration to do (default 20)
    -l limit # limit the revision fetch (default 0 only the last revision)
    -h # display this help
';
    exit 0;
}

sub get_pages 
{
    my ($mw,$ns_num) = @_;
    log_it("Using namespace number : ".$ns_num);
    $mw->list ( { action => 'query',
              list => 'allpages',
              apnamespace => $ns_num,
              aplimit=> $NUM_PAGES },
            { max => $NUM_TURNS, hook => \&loop_pages, skip_encodings => 1 } )
    || die log_it($mw->{error}->{code} . ': ' . $mw->{error}->{details});
}

sub loop_pages 
{
    my ($ref) = @_;
    foreach (@$ref) {
        if ($_->{title} ne '%s') 
        {
            $EXPORT.= $_->{title}.'%0A';
            log_it("fetching ".$_->{title});
        } 
    }
}

sub log_it
{
	my($comment) = @_;
	my $date_log = localtime(time());
	my $log_line = "$date_log: $comment\n";
	print LOGS $log_line;
}

sub send_request
{
	my ($path,$comment,$post) = @_;
    my $ua = LWP::UserAgent->new;
    $path =~ s/ /_/g;
    $ua->agent("Streetwikibot/1.1" . $ua->agent);
    $ua->cookie_jar(HTTP::Cookies->new(file => "$BASE_DIR/$BASE_URL.cookies", autosave => 1));
    $ua->requests_redirectable;

	my ($req,$res);
	if ($post)
	{
		$req = HTTP::Request->new(POST => "http://$BASE_URL/$path");
		$req->content_type('application/x-www-form-urlencoded');
		$req->content($post);
	}
	else
	{
		$req = HTTP::Request->new(GET => "http://$BASE_URL/$path");
	}
	$res = $ua->request($req);
	if ($res->is_success)
	{	
		log_it($comment) unless ($comment eq "");
		return $res->decoded_content;
	}
	else
	{
		log_it("Error on $comment : ".$res->status_line);
		return 0;
	}
}

close(LOGS);

exit 0;