|
18 | 18 | */ |
19 | 19 | package ubic.basecode.ontology.jena; |
20 | 20 |
|
| 21 | +import com.hp.hpl.jena.datatypes.xsd.XSDDateTime; |
21 | 22 | import com.hp.hpl.jena.ontology.OntModel; |
22 | 23 | import com.hp.hpl.jena.ontology.OntResource; |
23 | | -import com.hp.hpl.jena.rdf.model.Property; |
24 | | -import com.hp.hpl.jena.rdf.model.Resource; |
25 | | -import com.hp.hpl.jena.rdf.model.Statement; |
26 | | -import com.hp.hpl.jena.rdf.model.StmtIterator; |
| 24 | +import com.hp.hpl.jena.rdf.model.*; |
27 | 25 | import com.hp.hpl.jena.shared.JenaException; |
28 | 26 | import com.hp.hpl.jena.util.iterator.ExtendedIterator; |
29 | 27 | import com.hp.hpl.jena.util.iterator.WrappedIterator; |
|
35 | 33 | import org.apache.lucene.analysis.standard.StandardAnalyzer; |
36 | 34 | import org.apache.lucene.document.Document; |
37 | 35 | import org.apache.lucene.document.Field; |
| 36 | +import org.apache.lucene.document.Fieldable; |
38 | 37 | import org.apache.lucene.document.NumericField; |
39 | 38 | import org.apache.lucene.index.IndexReader; |
40 | 39 | import org.apache.lucene.index.IndexWriter; |
@@ -221,8 +220,46 @@ private static Directory index( String name, OntModel model, Analyzer analyzer, |
221 | 220 | while ( listStatements.hasNext() ) { |
222 | 221 | Statement s = listStatements.next(); |
223 | 222 | String field = s.getPredicate().getURI(); |
224 | | - String value = JenaUtils.asString( s.getObject() ); |
225 | | - doc.add( new Field( field, value, Field.Store.NO, indexablePropertiesByField.get( field ).isAnalyzed() ? Field.Index.ANALYZED : Field.Index.NOT_ANALYZED ) ); |
| 223 | + Fieldable f; |
| 224 | + if ( s.getObject().isLiteral() ) { |
| 225 | + Literal l = s.getObject().asLiteral(); |
| 226 | + if ( l.getValue() instanceof String ) { |
| 227 | + f = new Field( field, l.getString(), Field.Store.NO, indexablePropertiesByField.get( field ).isAnalyzed() ? Field.Index.ANALYZED : Field.Index.NOT_ANALYZED ); |
| 228 | + } else if ( l.getValue() instanceof Number ) { |
| 229 | + NumericField nf = new NumericField( field ); |
| 230 | + if ( l.getValue() instanceof Integer ) { |
| 231 | + nf.setIntValue( s.getInt() ); |
| 232 | + } else if ( l.getValue() instanceof Long ) { |
| 233 | + nf.setLongValue( s.getLong() ); |
| 234 | + } else if ( l.getValue() instanceof Float ) { |
| 235 | + nf.setFloatValue( s.getFloat() ); |
| 236 | + } else if ( l.getValue() instanceof Double ) { |
| 237 | + nf.setDoubleValue( s.getDouble() ); |
| 238 | + } else { |
| 239 | + log.warn( "Skipping numeric literal of unsupported type: {}", l ); |
| 240 | + continue; |
| 241 | + } |
| 242 | + f = nf; |
| 243 | + } else if ( l.getValue() instanceof XSDDateTime ) { |
| 244 | + f = new NumericField( field ) |
| 245 | + .setLongValue( ( ( XSDDateTime ) l.getValue() ).asCalendar().getTime().getTime() ); |
| 246 | + } else if ( l.getValue() instanceof Boolean ) { |
| 247 | + f = new NumericField( field ).setIntValue( Boolean.TRUE.equals( l.getValue() ) ? 1 : 0 ); |
| 248 | + } else { |
| 249 | + log.warn( "Skipping literal of unsupported type: {}", l ); |
| 250 | + continue; |
| 251 | + } |
| 252 | + } else if ( s.getObject().isURIResource() ) { |
| 253 | + // index the URI |
| 254 | + f = new Field( field, s.getObject().asResource().getURI(), Field.Store.NO, Field.Index.NOT_ANALYZED ); |
| 255 | + } else { |
| 256 | + // could be a blank node |
| 257 | + continue; |
| 258 | + } |
| 259 | + if ( isIndividual ) { |
| 260 | + System.out.println( doc ); |
| 261 | + } |
| 262 | + doc.add( f ); |
226 | 263 | } |
227 | 264 | } |
228 | 265 | indexWriter.addDocument( doc ); |
@@ -278,7 +315,7 @@ private ExtendedIterator<JenaSearchResult> search( OntModel model, String queryS |
278 | 315 | Query query = new MultiFieldQueryParser( Version.LUCENE_36, searchableFields, analyzer ).parse( queryString ); |
279 | 316 | // in general, results are found in both regular and std index, so we divide by 2 the initial capacity |
280 | 317 | // we also have to double the number of hits to account for duplicates |
281 | | - TopDocs hits = new IndexSearcher( index ).search( query, filter, maxResults * 3 ); |
| 318 | + TopDocs hits = new IndexSearcher( index ).search( query, filter, maxResults * 2 ); |
282 | 319 | Set<String> seenIds = new HashSet<>( hits.totalHits / 2 ); |
283 | 320 | List<JenaSearchResult> resources = new ArrayList<>( hits.totalHits / 2 ); |
284 | 321 | for ( int i = 0; i < hits.scoreDocs.length; i++ ) { |
|
0 commit comments