|
18 | 18 | */ |
19 | 19 | package ubic.basecode.ontology.jena; |
20 | 20 |
|
21 | | -import com.hp.hpl.jena.graph.NodeFactory; |
22 | 21 | import com.hp.hpl.jena.ontology.OntModel; |
23 | | -import com.hp.hpl.jena.rdf.model.*; |
| 22 | +import com.hp.hpl.jena.ontology.OntResource; |
| 23 | +import com.hp.hpl.jena.rdf.model.Property; |
| 24 | +import com.hp.hpl.jena.rdf.model.Resource; |
| 25 | +import com.hp.hpl.jena.rdf.model.Statement; |
| 26 | +import com.hp.hpl.jena.rdf.model.StmtIterator; |
24 | 27 | import com.hp.hpl.jena.shared.JenaException; |
25 | 28 | import com.hp.hpl.jena.util.iterator.ExtendedIterator; |
26 | 29 | import com.hp.hpl.jena.util.iterator.WrappedIterator; |
|
32 | 35 | import org.apache.lucene.analysis.standard.StandardAnalyzer; |
33 | 36 | import org.apache.lucene.document.Document; |
34 | 37 | import org.apache.lucene.document.Field; |
| 38 | +import org.apache.lucene.document.NumericField; |
35 | 39 | import org.apache.lucene.index.IndexReader; |
36 | 40 | import org.apache.lucene.index.IndexWriter; |
37 | 41 | import org.apache.lucene.index.IndexWriterConfig; |
38 | 42 | import org.apache.lucene.index.MultiReader; |
39 | 43 | import org.apache.lucene.queryParser.MultiFieldQueryParser; |
40 | 44 | import org.apache.lucene.queryParser.ParseException; |
41 | | -import org.apache.lucene.search.IndexSearcher; |
42 | | -import org.apache.lucene.search.Query; |
43 | | -import org.apache.lucene.search.TopDocs; |
| 45 | +import org.apache.lucene.search.*; |
44 | 46 | import org.apache.lucene.store.Directory; |
45 | 47 | import org.apache.lucene.store.FSDirectory; |
46 | 48 | import org.apache.lucene.util.Version; |
@@ -71,7 +73,9 @@ class OntologyIndexer { |
71 | 73 | */ |
72 | 74 | private static final String |
73 | 75 | ID_FIELD = "_ID", |
74 | | - LOCAL_NAME_FIELD = "_LOCAL_NAME"; |
| 76 | + LOCAL_NAME_FIELD = "_LOCAL_NAME", |
| 77 | + IS_CLASS_FIELD = "_IS_CLASS", |
| 78 | + IS_INDIVIDUAL_FIELD = "_IS_INDIVIDUAL"; |
75 | 79 |
|
76 | 80 | public static class IndexableProperty { |
77 | 81 | private final Property property; |
@@ -203,6 +207,16 @@ private static Directory index( String name, OntModel model, Analyzer analyzer, |
203 | 207 | Document doc = new Document(); |
204 | 208 | doc.add( new Field( ID_FIELD, id, Field.Store.YES, Field.Index.NOT_ANALYZED ) ); |
205 | 209 | doc.add( new Field( LOCAL_NAME_FIELD, subject.getLocalName(), Field.Store.NO, Field.Index.NOT_ANALYZED ) ); |
| 210 | + boolean isClass, isIndividual; |
| 211 | + if ( subject.canAs( OntResource.class ) ) { |
| 212 | + isClass = subject.as( OntResource.class ).isClass(); |
| 213 | + isIndividual = subject.as( OntResource.class ).isIndividual(); |
| 214 | + } else { |
| 215 | + isClass = false; |
| 216 | + isIndividual = false; |
| 217 | + } |
| 218 | + doc.add( new NumericField( IS_CLASS_FIELD ).setIntValue( isClass ? 1 : 0 ) ); |
| 219 | + doc.add( new NumericField( IS_INDIVIDUAL_FIELD ).setIntValue( isIndividual ? 1 : 0 ) ); |
206 | 220 | for ( IndexableProperty prop : indexableProperties ) { |
207 | 221 | StmtIterator listStatements = subject.listProperties( prop.property ); |
208 | 222 | while ( listStatements.hasNext() ) { |
@@ -242,26 +256,44 @@ public LuceneSearchIndex( String[] searchableFields, IndexReader index, Analyzer |
242 | 256 | } |
243 | 257 |
|
244 | 258 | @Override |
245 | | - public ExtendedIterator<JenaSearchResult> search( OntModel model, String queryString ) throws OntologySearchException { |
| 259 | + public ExtendedIterator<JenaSearchResult> search( OntModel model, String queryString, int maxResults ) throws OntologySearchException { |
| 260 | + return search( model, queryString, null, maxResults ); |
| 261 | + } |
| 262 | + |
| 263 | + @Override |
| 264 | + public ExtendedIterator<JenaSearchResult> searchClasses( OntModel model, String queryString, int maxResults ) throws OntologySearchException { |
| 265 | + return search( model, queryString, NumericRangeFilter.newIntRange( IS_CLASS_FIELD, 1, 1, true, true ), maxResults ); |
| 266 | + } |
| 267 | + |
| 268 | + @Override |
| 269 | + public ExtendedIterator<JenaSearchResult> searchIndividuals( OntModel model, String queryString, int maxResults ) throws OntologySearchException { |
| 270 | + return search( model, queryString, NumericRangeFilter.newIntRange( IS_INDIVIDUAL_FIELD, 1, 1, true, true ), maxResults ); |
| 271 | + } |
| 272 | + |
| 273 | + private ExtendedIterator<JenaSearchResult> search( OntModel model, String queryString, @Nullable Filter filter, int maxResults ) throws OntologySearchException { |
246 | 274 | if ( StringUtils.isBlank( queryString ) ) { |
247 | 275 | throw new IllegalArgumentException( "Query cannot be blank" ); |
248 | 276 | } |
249 | 277 | StopWatch timer = StopWatch.createStarted(); |
250 | 278 | try { |
251 | 279 | Query query = new MultiFieldQueryParser( Version.LUCENE_36, searchableFields, analyzer ).parse( queryString ); |
252 | | - TopDocs hits = new IndexSearcher( index ).search( query, 500 ); |
253 | 280 | // in general, results are found in both regular and std index, so we divide by 2 the initial capacity |
| 281 | + // we also have to double the number of hits to account for duplicates |
| 282 | + TopDocs hits = new IndexSearcher( index ).search( query, filter, maxResults * 3 ); |
254 | 283 | Set<String> seenIds = new HashSet<>( hits.totalHits / 2 ); |
255 | 284 | List<JenaSearchResult> resources = new ArrayList<>( hits.totalHits / 2 ); |
256 | | - for ( int i = 0; i < hits.totalHits; i++ ) { |
| 285 | + for ( int i = 0; i < hits.scoreDocs.length; i++ ) { |
257 | 286 | Document doc = index.document( hits.scoreDocs[i].doc ); |
258 | 287 | String id = doc.get( ID_FIELD ); |
259 | 288 | if ( seenIds.contains( id ) ) { |
260 | 289 | continue; |
261 | 290 | } |
262 | | - RDFNode node = model.getRDFNode( NodeFactory.createURI( id ) ); |
263 | | - resources.add( new JenaSearchResult( node, hits.scoreDocs[i].score ) ); |
| 291 | + Resource res = model.getResource( id ); |
| 292 | + resources.add( new JenaSearchResult( res, hits.scoreDocs[i].score ) ); |
264 | 293 | seenIds.add( id ); |
| 294 | + if ( seenIds.size() >= maxResults ) { |
| 295 | + break; |
| 296 | + } |
265 | 297 | } |
266 | 298 | return WrappedIterator.create( resources.iterator() ); |
267 | 299 | } catch ( ParseException e ) { |
|
0 commit comments