package votorola.g; // Copyright 2010-2013, Michael Allan. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Votorola Software"), to deal in the Votorola Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicence, and/or sell copies of the Votorola Software, and to permit persons to whom the Votorola Software is furnished to do so, subject to the following conditions: The preceding copyright notice and this permission notice shall be included in all copies or substantial portions of the Votorola Software. THE VOTOROLA SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE VOTOROLA SOFTWARE OR THE USE OR OTHER DEALINGS IN THE VOTOROLA SOFTWARE. import com.google.gson.stream.*; import java.io.*; import java.net.*; import java.nio.charset.*; import java.util.HashMap; import java.util.logging.*; import votorola.g.logging.*; import java.util.regex.*; import javax.ws.rs.core.UriBuilder; import javax.xml.stream.*; import votorola.g.hold.*; import votorola.g.lang.*; import votorola.g.logging.*; import votorola.g.net.*; import votorola.g.xml.stream.*; /** Utilities for communicating with the MediaWiki wiki system. */ public @ThreadSafe final class MediaWiki { private MediaWiki() {} /** The character set for posting to the MediaWiki API. */ public static final String API_POST_CHARSET = "UTF-8"; // presumeably /** Appends an already encoded page name to a wiki base URL. The encoded page name * (e.g. "Main_page") is appended as a simple path (/Main_page) if that * is safe, otherwise as a title query (?title=Main_page). * * @param b a String builder containing only the wiki base URL, without a * trailing slash (/). * @param maybeUgly whether the base URL might be the standard access URL ending * in "index.php" for example, or is definitely a pretty alias per * $wgUsePathInfo. * * @return the same string builder with the page name appended. * * @see #encodePageSpecifier(UriBuilder,boolean,String) * @see #MAYBE_UGLY_URL_PATTERN */ public static StringBuilder appendPageSpecifier( final StringBuilder b, final boolean maybeUgly, final String encodedPageName ) { // We used to simply append the path even to index.php, but that fails with some // wikis. It fails with Metagov's 1.16 (quite new) wiki for example: // http://metagovernment.org/w/index.php5/Help:Contents // // cf. encodePageSpecifier // // changing? change also in g/web/gwt/super/ if( maybeUgly ) { b.append( "?title=" ); b.append( encodedPageName ); } else { b.append( '/' ); b.append( encodedPageName ); } return b; } /** Completes the URL-decoding of a page name by substituting spaces for underscores. * * @see Help:Magic_words#Page_names */// per INLDOC public static String demiDecodedPageName( final String demiEncodedPageName ) { // changing? change also in g/web/gwt/super/ return demiEncodedPageName.replace( '_', ' ' ); } /** Partially encodes a page name prior to full URL-encoding, either by substituting * underscores for spaces, or by doing the opposite if the wiki URL is ugly. In that * case, the page name will be used as a 'title' parameter and the removal of all * demi-encoding is necessary for 'view' actions to be automatically redirected by * the wiki to its pretty alias where one is actually available. (This despite the * fact that the wiki itself demi-encodes the title parameter for actions such as * 'edit' and 'history'.) * * @param maybeUgly whether the base URL might be the standard access * URL ending in "index.php" for example, or is definitely a pretty alias per * $wgUsePathInfo. * * @see Help:Magic_words#Page_names */// per INLDOC public static String demiEncodedPageName( final String unEncodedPageName, final boolean maybeUgly ) { // changing? change also in g/web/gwt/super/ return maybeUgly? demiDecodedPageName( unEncodedPageName ): unEncodedPageName.replace( ' ', '_' ); } /** Encodes a page name and appends it to a wiki base URL. The page name (e.g. "Main * page") is encoded and appended as a simple path (/Main_page) if that * is safe, otherwise as a title query (?title=Main+page). * * @param ub a URI builder containing only the wiki base URL. * @param maybeUgly whether the base URL might be the standard access * URL ending in "index.php" for example, or is definitely a pretty alias per * $wgUsePathInfo. * * @return the same URI builder with the page name encoded and appended. * * @see #appendPageSpecifier(StringBuilder,boolean,String) * @see #MAYBE_UGLY_URL_PATTERN */ public static UriBuilder encodePageSpecifier( final UriBuilder ub, final boolean maybeUgly, String pageName ) { // cf. appendPageSpecifier pageName = demiEncodedPageName( pageName, maybeUgly ); // will be fully encoded here: if( maybeUgly ) ub.queryParam( "title", pageName ); else ub.path( pageName ); return ub; } /** Downloads the wikitext source of the specified page into a temporary file. * * @param idType one of "curid" or "oldid". * @param id the page identifier (curid) or revision identifier (oldid). * @param s the base URL for script execution in the wiki, without a trailing * slash (/). * @param prefix the {@linkplain File#createTempFile(String,String) prefix} * for the temporary file. * * @see www.mediawiki.org/wiki/Manual:Parameters_to_index.php */// per INLDOC public static File fetchPageAsFile( final URI s, final String idType, final int id, final String prefix ) throws IOException { final File file = File.createTempFile( prefix, "." + id ); final HttpURLConnection http; try { final URI uri = new URI( s + "/index.php?action=raw&" + idType + "=" + id ); logger.fine( "querying wiki " + uri ); http = (HttpURLConnection)( uri.toURL().openConnection() ); } catch( final URISyntaxException x ) { throw new RuntimeException( x ); } URLConnectionX.connect( http ); try { final BufferedReader in = new BufferedReader( new InputStreamReader( http.getInputStream(), "UTF-8" )); // assuming UTF-8, maybe FIX by reading the HTTP header try { final BufferedWriter out = new BufferedWriter( new OutputStreamWriter( new FileOutputStream(file), Charset.defaultCharset() )); // to OS charset try { // ReaderX.appendTo( out, in ); /// or, less efficiently, but giving proper line endings regardless of wiki host: for( ;; ) { final String l = in.readLine(); if( l == null ) break; out.write( l ); out.newLine(); } } finally{ out.close(); } } finally{ in.close(); } } finally{ http.disconnect(); } return file; } /** Logs into a wiki. * * @param api the URL of the wiki's api.php script. * * @return error message for any failure that might be user actionable, such as * an incorrect password; or null if login succeeds. */ public static String login( final URI api, final CookieHandler cookieHandler, final String username, final String password ) throws IOException { final HashMap responseMap = new HashMap(); // Request login // - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - final boolean toHandshake; { final HttpURLConnection http = (HttpURLConnection)api.toURL().openConnection(); http.setDoOutput( true ); // automatically does setRequestMethod( "POST" ) http.setRequestProperty( "Content-Type", "application/x-www-form-urlencoded;charset=" + API_POST_CHARSET ); URLConnectionX.setRequestCookies( api, http, cookieHandler ); // after other req headers final Spool spool = new Spool1(); try { URLConnectionX.connect( http ); spool.add( new Hold() { public void release() { http.disconnect(); }} ); // write // ` ` ` ` ` ` ` ` ` ` ` ` ` ` ` ` ` ` ` ` ` ` ` ` ` ` ` ` ` ` ` ` ` ` ` ` ` { final BufferedWriter out = new BufferedWriter( new OutputStreamWriter( http.getOutputStream(), API_POST_CHARSET )); try { out.append( "format=xml&action=login&lgname=" ); out.append( URLEncoder.encode( username, API_POST_CHARSET )); out.append( "&lgpassword=" ); out.append( URLEncoder.encode( password, API_POST_CHARSET )); } finally{ out.close(); } } // read // ` ` ` ` ` ` ` ` ` ` ` ` ` ` ` ` ` ` ` ` ` ` ` ` ` ` ` ` ` ` ` ` ` ` ` ` ` cookieHandler.put( api, http.getHeaderFields() ); final InputStream in = http.getInputStream(); spool.add( new Hold() { public void release() { try{ in.close(); } catch( final IOException x ) { throw new RuntimeException( x ); }} }); final XMLStreamReader xml = newXMLStreamReader( in, spool ); while( xml.hasNext() ) { xml.next(); if( !xml.isStartElement() ) continue; if( "login".equals( xml.getLocalName() )) { for( int a = 0, aN = xml.getAttributeCount(); a < aN; ++a ) { responseMap.put( /*key*/xml.getAttributeLocalName(a), xml.getAttributeValue( a )); } } test_error( xml ); } } catch( final XMLStreamException x ) { throw new IOException( x ); } finally{ spool.unwind(); } final String result = responseMap.get( "result" ); if( "Success".equals( result )) toHandshake = false; // MediaWiki < 1.15.3 else if( "NeedToken".equals( result )) toHandshake = true; // >= 1.15.3 else return "login call failed with result: " + result; } // Handshake to complete login // - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - if( toHandshake ) { logger.finer( "handshaking to complete login" ); final HttpURLConnection http = (HttpURLConnection)api.toURL().openConnection(); http.setDoOutput( true ); // automatically does setRequestMethod( "POST" ) http.setRequestProperty( "Content-Type", "application/x-www-form-urlencoded;charset=" + API_POST_CHARSET ); URLConnectionX.setRequestCookies( api, http, cookieHandler ); // after other req headers final Spool spool = new Spool1(); try { URLConnectionX.connect( http ); spool.add( new Hold() { public void release() { http.disconnect(); }} ); // write // ` ` ` ` ` ` ` ` ` ` ` ` ` ` ` ` ` ` ` ` ` ` ` ` ` ` ` ` ` ` ` ` ` ` ` ` ` { final BufferedWriter out = new BufferedWriter( new OutputStreamWriter( http.getOutputStream(), API_POST_CHARSET )); try { out.append( "format=xml&action=login&lgname=" ); out.append( URLEncoder.encode( username, API_POST_CHARSET )); out.append( "&lgpassword=" ); out.append( URLEncoder.encode( password, API_POST_CHARSET )); out.append( "&lgtoken=" ); // echo it back: out.append( URLEncoder.encode( responseMap.get( "token" ), API_POST_CHARSET )); } finally{ out.close(); } } // read // ` ` ` ` ` ` ` ` ` ` ` ` ` ` ` ` ` ` ` ` ` ` ` ` ` ` ` ` ` ` ` ` ` ` ` ` ` cookieHandler.put( api, http.getHeaderFields() ); final InputStream in = http.getInputStream(); spool.add( new Hold() { public void release() { try{ in.close(); } catch( final IOException x ) { throw new RuntimeException( x ); }} }); final XMLStreamReader xml = newXMLStreamReader( in, spool ); responseMap.clear(); // clear previous response while( xml.hasNext() ) { xml.next(); if( !xml.isStartElement() ) continue; if( "login".equals( xml.getLocalName() )) { for( int a = 0, aN = xml.getAttributeCount(); a < aN; ++a ) { responseMap.put( /*key*/xml.getAttributeLocalName(a), xml.getAttributeValue( a )); } } test_error( xml ); } } catch( final XMLStreamException x ) { throw new IOException( x ); } finally{ spool.unwind(); } final String result = responseMap.get( "result" ); if( !"Success".equals( result )) return "handshake failed with result: " + result; } return null; } /** A pattern that detects whether a wiki URL might be based on the standard access * URL ending in "index.php" for example, or is definitely based on a pretty alias * per $wgUsePathInfo. */ public static final Pattern MAYBE_UGLY_URL_PATTERN = Pattern.compile( ".+/index\\.php[0-9]*.*" ); // OK to err on side of false inclusion // changing? change also in s.gwt.web.PollwikiG /** Constructs a new stream reader suitable for reading a MediaWiki API response, and * reads just to the 'api' element. * * @param spool an optional spool for the release of associated holds. When * unwound it releases the holds of the reader and thereby disables it. * * @see #requestXML(URLConnection,Spool) */ public static @ThreadSafe XMLStreamReader newXMLStreamReader( final InputStream in, final Spool spool ) throws IOException, XMLStreamException { final XMLStreamReader xml; synchronized( XMLInputFactoryX.class ) { xml = XMLInputFactoryX.SIMPLE_INPUT_FACTORY.createXMLStreamReader( in ); } if( spool != null ) { spool.add( new Hold() { public void release() { try{ xml.close(); } catch( final XMLStreamException x ) { throw new RuntimeException( x ); } } }); } while( xml.hasNext() ) { xml.next(); if( xml.isStartElement() ) { final String name = xml.getLocalName(); if( "api".equals( name )) return xml; throw new MalformedResponse( "expected 'api' element, found '" + name + "'" ); } } throw new MalformedResponse( "response missing 'api' element" ); } /** Translates the username to normal form by shifting the first letter to uppercase * and substituting spaces for underscores. * * @return the translated name, which may be the same name; or null if the name * is null. */ public static String normalUsername( String name ) { // changing? change also in g/web/gwt/super/ if( name != null ) { name = name.replace( '_', ' ' ); final char ch = name.charAt( 0 ); if( Character.isLowerCase( ch )) name = Character.toUpperCase(ch) + name.substring(1); } return name; } /** Parses a page name (example "Ns:Root/sub/path") into two groups: (1) namespace * "Ns", and (2) local name "Root/sub/path". The namespace group will be null if not * present. Note that group values may have underscores from {@linkplain * #demiEncodedPageName(String,boolean) demi-encoding}, depending on where the * provided name string was constructed. * * @see Help:Magic_word#Page_names */// per INLDOC public static MatchResult parsePageName( final String pageName ) { final Matcher m = PAGE_NAME_PATTERN.matcher( pageName ); return m.matches()? m: null; } private static final Pattern PAGE_NAME_PATTERN = Pattern.compile( "(?:(.+?):)?(.+)" ); // NS : LOCAL /** Parses a page name (example "Ns:Root/sub/path") into three groups: (1) namespace * "Ns", (2) local root name "Root" and (3) subpage path "sub/path". The namespace * group will be null if not present, likewise for the subpage path. Note that group * values may have underscores from {@linkplain #demiEncodedPageName(String,boolean) * demi-encoding}, depending on where the provided name string was constructed. * * @see Help:Magic_word#Page_names */// per INLDOC public static MatchResult parsePageNameS( final String pageName ) { final Matcher m = PAGE_NAME_S_PATTERN.matcher( pageName ); return m.matches()? m: null; } private static final Pattern PAGE_NAME_S_PATTERN = Pattern.compile( "(?:(.+?):)?([^/]+)(?:/(.*))?" ); // changing? change also in g/web/gwt/super/ // NS : ROOT / SUB /** Establishes an HTTP connection and returns a JSON reader for the response. * * @param _http the connector, which must be of type HttpURLConnection. The base * class is accepted only as a convenience to save clients having to cast the * result of URL.openConnection(). * @param spool a spool for the release of associated holds. When unwound it * releases the holds of the reader and thereby disables it. */ public static JsonReader requestJSON( URLConnection _http, final Spool spool ) throws IOException { final HttpURLConnection http = (HttpURLConnection)_http; URLConnectionX.connect( http ); spool.add( new Hold() { public void release() { http.disconnect(); }} ); final JsonReader in = new JsonReader( new BufferedReader( new InputStreamReader( http.getInputStream(), "UTF-8" ))); spool.add( new Hold() { public void release() { try{ in.close(); } catch( IOException x ) { throw new RuntimeException( x ); } } }); return in; } /** Establishes an HTTP connection and returns an XML reader pre-situated on the 'api' * element of the response body. * * @param _http the connector, which must be of type HttpURLConnection. The base * class is accepted only as a convenience to save clients having to cast the * result of URL.openConnection(). * @param spool a spool for the release of associated holds. When unwound it * releases the holds of the reader and thereby disables it. */ public static XMLStreamReader requestXML( URLConnection _http, final Spool spool ) throws IOException, XMLStreamException { final HttpURLConnection http = (HttpURLConnection)_http; URLConnectionX.connect( http ); spool.add( new Hold() { public void release() { http.disconnect(); }} ); final InputStream in = http.getInputStream(); spool.add( new Hold() { public void release() { try{ in.close(); } // because not closed by closing the XMLStreamReader catch( IOException x ) { throw new RuntimeException( x ); } } }); return newXMLStreamReader( in, spool ); } /** Constructs the URL for a page revision. * * @param scriptLoc the base location for script execution in the wiki, without a * trailing slash (/). */ public static String revLoc( final String scriptLoc, final int rev ) { return scriptLoc + "/index.php?oldid=" + rev; } /** Constructs the URL to a page revision. * * @param scriptURI the base location for script execution in the wiki, without a * trailing slash (/). */ public static String revLoc( final URI scriptURI, int _rev ) { return revLoc( scriptURI.toASCIIString(), _rev ); } /** Tests the current element of an API response and throws NoSuchRev if the element * is named 'badrevids'. The 'badrevids' element is undocumented in the API * (2010-11). * * @param r a reader positioned at an element start tag. */ public static void test_badrevids( final XMLStreamReader r ) throws NoSuchRev, XMLStreamException { if( !"badrevids".equals( r.getLocalName() )) return; final StringBuilder b = new StringBuilder(); b.append( "No such page revision(s):" ); while( r.hasNext() ) { r.next(); if( r.isStartElement() && "rev".equals( r.getLocalName() )) { b.append( ' ' ); b.append( r.getAttributeValue( /*ns*/null, "revid" )); } else if( r.isEndElement() && "badrevids".equals( r.getLocalName() )) break; } throw new NoSuchRev( b.toString() ); } /** Tests the current element of an API response and throws an APIError if the element * is named 'error'. * * @param r a reader positioned at an element start tag. * * @see API:Errors_and_warnings#Errors */// per INLDOC public static void test_error( final XMLStreamReader r ) throws APIError { if( "error".equals( r.getLocalName() )) throw new APIError( r ); } /** Tests the 'page' element of an API 'info' query response and throws NoSuchPage if * it encodes a 'missing' attribute. Works for queries that specify a 'titles' * parameter, but not a 'revids' parameter (MediaWiki 1.15.1). For 'revids', use * instead {@linkplain #test_badrevids(XMLStreamReader) test_badrevids}. * * @param r a reader positioned at a 'page' element start tag. * * @throws NoSuchPage if the response indicates a missing page. * @throws MalformedRequest if the response indicates an invalid page name. * * @see API:Query#Missing_and_invalid_titles */// per INLDOC public static void testPage_missing( final XMLStreamReader r ) throws NoSuchPage { if( r.getAttributeValue(/*ns*/null,"invalid") != null ) { throw new MalformedRequest( "invalid page name: " + r.getAttributeValue(/*ns*/null,"title") ); } final String missing = r.getAttributeValue( /*ns*/null, "missing" ); if( missing == null ) return; final StringBuilder b = new StringBuilder(); b.append( "No such page:" ); String pageName = null; // till found for( int a = r.getAttributeCount() - 1; a >= 0; --a ) { final String name = r.getAttributeLocalName( a ); if( "missing".equals( name )) continue; final String value = r.getAttributeValue( a ); b.append( ' ' ); b.append( name ); b.append( "=\"" ); b.append( value ); b.append( '"' ); if( "title".equals( name )) pageName = value; } if( pageName == null ) { assert false; pageName = "NAMELESS_PAGE"; } throw new NoSuchPage( b.toString(), pageName ); } // ==================================================================================== /** Thrown when a MediaWiki API call returns an explicit error response. */ public static @ThreadSafe final class APIError extends IOException { /** @param r the response reader having just read the 'error' start element. */ public APIError( final String message, final XMLStreamReader r ) { super( (message == null? "": message + " " ) + "(" + r.getAttributeValue( /*ns*/null, "code" ) + "): " + r.getAttributeValue( /*ns*/null, "info" )); } /** @param r the response reader having just read the 'error' start element. */ public APIError( final XMLStreamReader r ) { this( null, r ); } } // ==================================================================================== /** Thrown when an improperly formed API call is detected. */ public static @ThreadSafe final class MalformedRequest extends RuntimeException { public MalformedRequest( String _message ) { super( _message ); } } // ==================================================================================== /** Thrown when the response to an API call is improperly formed. */ public static @ThreadSafe final class MalformedResponse extends RuntimeException { public MalformedResponse( String _message ) { super( _message ); } } // ==================================================================================== /** Thrown when a request cannot be met because an item does not exist. * * @see java.util.NoSuchElementException */ public static abstract @ThreadSafe class NoSuchItem extends IOException { NoSuchItem( String _message ) { super( _message ); } } // ==================================================================================== /** Thrown when a request cannot be met because a page does not exist. */ public static @ThreadSafe final class NoSuchPage extends NoSuchItem implements UserInformative { /** @see #pageName() */ public NoSuchPage( String _message, String _pageName ) { super( _message ); pageName = _pageName; if( pageName == null ) throw new NullPointerException(); // fail fast } /** The full name of the non-existent page including any namespace. Note that the * page name is not automatically added to the message. */ public String pageName() { return pageName; } private final String pageName; } // ==================================================================================== /** Thrown when a request cannot be met because a page revision does not exist. */ public static @ThreadSafe final class NoSuchRev extends NoSuchItem implements UserInformative { public NoSuchRev( String _message ) { super( _message ); } } //// P r i v a t e /////////////////////////////////////////////////////////////////////// private static final Logger logger = LoggerX.i( MediaWiki.class ); }