package votorola.a; // Copyright 2012, Michael Allan. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Votorola Software"), to deal in the Votorola Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicence, and/or sell copies of the Votorola Software, and to permit persons to whom the Votorola Software is furnished to do so, subject to the following conditions: The preceding copyright notice and this permission notice shall be included in all copies or substantial portions of the Votorola Software. THE VOTOROLA SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE VOTOROLA SOFTWARE OR THE USE OR OTHER DEALINGS IN THE VOTOROLA SOFTWARE. import com.google.gson.stream.*; import java.io.*; import java.net.*; import java.util.*; import votorola.g.lang.*; /** A reader of Semantic MediaWiki properties for a cached pollwiki page. It reads the * ordinary properties of the page, as well as the special "URL" property (undocumented * as of Semantic MediaWiki 1.7.1). */ public final class PagePropertyReader implements Closeable { // FIX add constructor for cacheless query via SMW 1.7 query API, per WikiCache.churn /** Creates a PagePropertyReader. * * @param _fullPageName the full name of the page, including any namespace. * * @see #properties() */ public PagePropertyReader( final WikiCache wikiCache, String _fullPageName, final PageProperty... _properties ) throws IOException { fullPageName = _fullPageName; properties = _properties; if( fullPageName == null ) throw new NullPointerException(); // fail fast in = new JsonReader( new BufferedReader( new InputStreamReader( wikiCache.openRDF_JSON(fullPageName), "UTF-8" ))); in.beginObject(); init: while( in.hasNext() ) { String name = in.nextName(); if( "results".equals( name )) { in.beginObject(); while( in.hasNext() ) { name = in.nextName(); if( "bindings".equals( name )) { in.beginArray(); next(); break init; } else in.skipValue(); } } else in.skipValue(); } } // ------------------------------------------------------------------------------------ /** Answers whether a property remains to be read. Returns true if the page has an * unread property that matches one of {@linkplain #properties() those requested}. */ public boolean hasNext() { return hasNext; } private boolean hasNext; /** The properties to read. */ private PageProperty[] properties() { return properties; } private final PageProperty[] properties; /** Reads the next property from the page that matches one of {@linkplain * #properties() those requested}. Returns the property with the value filled in * from the page. * * @throws NoSuchElementException if no property remains to be read. */ public PageProperty read() throws IOException { if( !hasNext ) throw new NoSuchElementException(); final PageProperty p = nextProperty; p.setValue( nextValue ); next(); return p; } /** Reads the page properties until either each of {@linkplain #properties() those * requested} has a value, or no more properties remain in the page. Use this method * when all requested properties are single valued, or when you intend to ignore any * additional values they might have. In the latter case, which of the multiple * values is filled in by this method is undefined; viz. it is not guaranteed to be * the first value. */ public void readAllRequested() throws IOException { if( !hasNext ) return; readAll: for( ;; ) { int valuelessCount = 0; for( PageProperty p: properties ) if( p.getValue() == null ) ++valuelessCount; if( valuelessCount == 0 ) break readAll; // all have values filled in readMin: while( valuelessCount > 0 ) // read the minimum that *might* fill all values { read(); if( !hasNext ) break readAll; // no more properties in page --valuelessCount; } // loop back to recount; a property may have been read twice, leaving an empty value } } // - C l o s e a b l e ---------------------------------------------------------------- /** Does nothing but close the underlying reader. This reader itself need not be * closed. */ public void close() throws IOException { in.close(); } //// P r i v a t e /////////////////////////////////////////////////////////////////////// /** Attempts to decode a Semantic MediaWiki URIResolver subject value in the form * Special:URIResolver/SMW_ENCODED_FULLPAGE_NAME. The formatting and encoding is * apparently done in includes/export/SMWExporter. * * @return the full page name, or null if none can be decoded. */ private static String decodedFullPageName( final String subjectValue ) { return decodedPageName( subjectValue, "/Special:URIResolver/" ); } private static String decodedPageName( final String value, final String marker ) { int c = value.indexOf( marker ); if( c == -1 ) return null; // not so encoded c += marker.length(); final int cN = value.length(); final StringBuilder b = new StringBuilder( cN - c ); for(; c < cN; ++c ) { char ch = value.charAt( c ); if( ch == '-' ) ch = '%'; else if( ch == '_' ) ch = ' '; b.append( ch ); } try{ return URLDecoder.decode( b.toString(), "UTF-8" ); } catch( UnsupportedEncodingException x ) { throw new RuntimeException( x ); } } /** Attempts to decode a Semantic MediaWiki URIResolver predicate value in the form * Special:URIResolver/SMW_ENCODED_FULLPAGE_NAME. The formatting and encoding is * apparently done in includes/export/SMWExporter. * * @return the short property name, or null if none can be decoded. */ private static String decodedPropertyName( final String predicateValue ) { final String name; if( predicateValue.endsWith( "#specialProperty_uri" )) { // ensure this robust, short encoding^ maps 1:1 with full encoding: assert predicateValue.equals( "http://semantic-mediawiki.org/swivt/1.0#specialProperty_uri" ); name = "URL"; /* for some undocumented reason, what was formerly an ordinary property "URL" received a special encoding at or before SMW 1.7.1 */ } else name = decodedPageName( predicateValue, "/Special:URIResolver/Property-3A" ); return name; } private String findValue() throws IOException { while( in.hasNext() ) { final String name = in.nextName(); if( "value".equals( name )) return in.nextString(); in.skipValue(); } throw new IllegalStateException(); } private final String fullPageName; private final JsonReader in; private void next() throws IOException { while( in.hasNext() ) { in.beginObject(); // single binding try { String name; String value; // Subject. // - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - name = in.nextName(); assert "s".equals( name ); in.beginObject(); value = decodedFullPageName( findValue() ); skipAndEndObject(); // s if( fullPageName.equals( value )) { // Predicate. // - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - name = in.nextName(); assert "p".equals( name ); in.beginObject(); value = decodedPropertyName( findValue() ); skipAndEndObject(); // p for( PageProperty property: properties ) if( property.name().equals( value )) { nextProperty = property; // Object. // - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - name = in.nextName(); assert "o".equals( name ); in.beginObject(); name = in.nextName(); assert "type".equals( name ); final String type = in.nextString(); value = findValue(); skipAndEndObject(); // o if( "uri".equals( type )) { nextValue = decodedFullPageName( value ); if( nextValue == null ) nextValue = value; // undecodeable, leave as such } else nextValue = value; hasNext = true; return; } } } finally{ skipAndEndObject(); } // single binding } hasNext = false; } private PageProperty nextProperty; private String nextValue; private void skipAndEndObject() throws IOException { while( in.hasNext() ) in.skipValue(); in.endObject(); } }