Skip to content

Commit 975cd61

Browse files
committed
ontology: Add an option to specify the maximum number of search results
1 parent 1afc021 commit 975cd61

10 files changed

Lines changed: 99 additions & 94 deletions

src/ubic/basecode/ontology/jena/AbstractOntologyService.java

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -303,7 +303,7 @@ private static boolean hasCauseMatching( Throwable t, Predicate<Throwable> predi
303303
}
304304

305305
@Override
306-
public Set<OntologySearchResult<OntologyIndividual>> findIndividuals( String search, boolean keepObsoletes ) throws
306+
public Set<OntologySearchResult<OntologyIndividual>> findIndividuals( String search, int maxResults, boolean keepObsoletes ) throws
307307
OntologySearchException {
308308
State state = this.state;
309309
if ( state == null ) {
@@ -314,14 +314,14 @@ public Set<OntologySearchResult<OntologyIndividual>> findIndividuals( String sea
314314
log.warn( "Attempt to search {} when index is null, no results will be returned.", this );
315315
return Collections.emptySet();
316316
}
317-
return state.index.searchIndividuals( state.model, search )
317+
return state.index.searchIndividuals( state.model, search, maxResults )
318318
.mapWith( i -> new OntologySearchResult<>( ( OntologyIndividual ) new OntologyIndividualImpl( i.result.as( Individual.class ), state.additionalRestrictions ), i.score ) )
319319
.filterKeep( where( ontologyTerm -> keepObsoletes || !ontologyTerm.getResult().isObsolete() ) )
320320
.toSet();
321321
}
322322

323323
@Override
324-
public Collection<OntologySearchResult<OntologyResource>> findResources( String searchString, boolean keepObsoletes ) throws
324+
public Collection<OntologySearchResult<OntologyResource>> findResources( String searchString, int maxResults, boolean keepObsoletes ) throws
325325
OntologySearchException {
326326
State state = this.state;
327327
if ( state == null ) {
@@ -332,7 +332,7 @@ public Collection<OntologySearchResult<OntologyResource>> findResources( String
332332
log.warn( "Attempt to search {} when index is null, no results will be returned.", this );
333333
return Collections.emptySet();
334334
}
335-
return state.index.searchResources( state.model, searchString )
335+
return state.index.search( state.model, searchString, maxResults )
336336
.filterKeep( where( r -> r.result.canAs( OntClass.class ) || r.result.canAs( Individual.class ) ) )
337337
.mapWith( r -> {
338338
try {
@@ -354,7 +354,7 @@ public Collection<OntologySearchResult<OntologyResource>> findResources( String
354354
}
355355

356356
@Override
357-
public Collection<OntologySearchResult<OntologyTerm>> findTerm( String search, boolean keepObsoletes ) throws OntologySearchException {
357+
public Collection<OntologySearchResult<OntologyTerm>> findTerm( String search, int maxResults, boolean keepObsoletes ) throws OntologySearchException {
358358
State state = this.state;
359359
if ( state == null ) {
360360
log.warn( "Ontology {} is not ready, no terms will be returned.", this );
@@ -364,7 +364,7 @@ public Collection<OntologySearchResult<OntologyTerm>> findTerm( String search, b
364364
log.warn( "Attempt to search {} when index is null, no results will be returned.", this );
365365
return Collections.emptySet();
366366
}
367-
return state.index.searchClasses( state.model, search )
367+
return state.index.searchClasses( state.model, search, maxResults )
368368
.mapWith( r -> new OntologySearchResult<>( ( OntologyTerm ) new OntologyTermImpl( r.result.as( OntClass.class ), state.additionalRestrictions ), r.score ) )
369369
.filterKeep( where( ontologyTerm -> keepObsoletes || !ontologyTerm.getResult().isObsolete() ) )
370370
.toSet();

src/ubic/basecode/ontology/jena/OntologyIndexer.java

Lines changed: 43 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -18,9 +18,12 @@
1818
*/
1919
package ubic.basecode.ontology.jena;
2020

21-
import com.hp.hpl.jena.graph.NodeFactory;
2221
import com.hp.hpl.jena.ontology.OntModel;
23-
import com.hp.hpl.jena.rdf.model.*;
22+
import com.hp.hpl.jena.ontology.OntResource;
23+
import com.hp.hpl.jena.rdf.model.Property;
24+
import com.hp.hpl.jena.rdf.model.Resource;
25+
import com.hp.hpl.jena.rdf.model.Statement;
26+
import com.hp.hpl.jena.rdf.model.StmtIterator;
2427
import com.hp.hpl.jena.shared.JenaException;
2528
import com.hp.hpl.jena.util.iterator.ExtendedIterator;
2629
import com.hp.hpl.jena.util.iterator.WrappedIterator;
@@ -32,15 +35,14 @@
3235
import org.apache.lucene.analysis.standard.StandardAnalyzer;
3336
import org.apache.lucene.document.Document;
3437
import org.apache.lucene.document.Field;
38+
import org.apache.lucene.document.NumericField;
3539
import org.apache.lucene.index.IndexReader;
3640
import org.apache.lucene.index.IndexWriter;
3741
import org.apache.lucene.index.IndexWriterConfig;
3842
import org.apache.lucene.index.MultiReader;
3943
import org.apache.lucene.queryParser.MultiFieldQueryParser;
4044
import org.apache.lucene.queryParser.ParseException;
41-
import org.apache.lucene.search.IndexSearcher;
42-
import org.apache.lucene.search.Query;
43-
import org.apache.lucene.search.TopDocs;
45+
import org.apache.lucene.search.*;
4446
import org.apache.lucene.store.Directory;
4547
import org.apache.lucene.store.FSDirectory;
4648
import org.apache.lucene.util.Version;
@@ -71,7 +73,9 @@ class OntologyIndexer {
7173
*/
7274
private static final String
7375
ID_FIELD = "_ID",
74-
LOCAL_NAME_FIELD = "_LOCAL_NAME";
76+
LOCAL_NAME_FIELD = "_LOCAL_NAME",
77+
IS_CLASS_FIELD = "_IS_CLASS",
78+
IS_INDIVIDUAL_FIELD = "_IS_INDIVIDUAL";
7579

7680
public static class IndexableProperty {
7781
private final Property property;
@@ -203,6 +207,16 @@ private static Directory index( String name, OntModel model, Analyzer analyzer,
203207
Document doc = new Document();
204208
doc.add( new Field( ID_FIELD, id, Field.Store.YES, Field.Index.NOT_ANALYZED ) );
205209
doc.add( new Field( LOCAL_NAME_FIELD, subject.getLocalName(), Field.Store.NO, Field.Index.NOT_ANALYZED ) );
210+
boolean isClass, isIndividual;
211+
if ( subject.canAs( OntResource.class ) ) {
212+
isClass = subject.as( OntResource.class ).isClass();
213+
isIndividual = subject.as( OntResource.class ).isIndividual();
214+
} else {
215+
isClass = false;
216+
isIndividual = false;
217+
}
218+
doc.add( new NumericField( IS_CLASS_FIELD ).setIntValue( isClass ? 1 : 0 ) );
219+
doc.add( new NumericField( IS_INDIVIDUAL_FIELD ).setIntValue( isIndividual ? 1 : 0 ) );
206220
for ( IndexableProperty prop : indexableProperties ) {
207221
StmtIterator listStatements = subject.listProperties( prop.property );
208222
while ( listStatements.hasNext() ) {
@@ -242,26 +256,44 @@ public LuceneSearchIndex( String[] searchableFields, IndexReader index, Analyzer
242256
}
243257

244258
@Override
245-
public ExtendedIterator<JenaSearchResult> search( OntModel model, String queryString ) throws OntologySearchException {
259+
public ExtendedIterator<JenaSearchResult> search( OntModel model, String queryString, int maxResults ) throws OntologySearchException {
260+
return search( model, queryString, null, maxResults );
261+
}
262+
263+
@Override
264+
public ExtendedIterator<JenaSearchResult> searchClasses( OntModel model, String queryString, int maxResults ) throws OntologySearchException {
265+
return search( model, queryString, NumericRangeFilter.newIntRange( IS_CLASS_FIELD, 1, 1, true, true ), maxResults );
266+
}
267+
268+
@Override
269+
public ExtendedIterator<JenaSearchResult> searchIndividuals( OntModel model, String queryString, int maxResults ) throws OntologySearchException {
270+
return search( model, queryString, NumericRangeFilter.newIntRange( IS_INDIVIDUAL_FIELD, 1, 1, true, true ), maxResults );
271+
}
272+
273+
private ExtendedIterator<JenaSearchResult> search( OntModel model, String queryString, @Nullable Filter filter, int maxResults ) throws OntologySearchException {
246274
if ( StringUtils.isBlank( queryString ) ) {
247275
throw new IllegalArgumentException( "Query cannot be blank" );
248276
}
249277
StopWatch timer = StopWatch.createStarted();
250278
try {
251279
Query query = new MultiFieldQueryParser( Version.LUCENE_36, searchableFields, analyzer ).parse( queryString );
252-
TopDocs hits = new IndexSearcher( index ).search( query, 500 );
253280
// in general, results are found in both regular and std index, so we divide by 2 the initial capacity
281+
// we also have to double the number of hits to account for duplicates
282+
TopDocs hits = new IndexSearcher( index ).search( query, filter, maxResults * 3 );
254283
Set<String> seenIds = new HashSet<>( hits.totalHits / 2 );
255284
List<JenaSearchResult> resources = new ArrayList<>( hits.totalHits / 2 );
256-
for ( int i = 0; i < hits.totalHits; i++ ) {
285+
for ( int i = 0; i < hits.scoreDocs.length; i++ ) {
257286
Document doc = index.document( hits.scoreDocs[i].doc );
258287
String id = doc.get( ID_FIELD );
259288
if ( seenIds.contains( id ) ) {
260289
continue;
261290
}
262-
RDFNode node = model.getRDFNode( NodeFactory.createURI( id ) );
263-
resources.add( new JenaSearchResult( node, hits.scoreDocs[i].score ) );
291+
Resource res = model.getResource( id );
292+
resources.add( new JenaSearchResult( res, hits.scoreDocs[i].score ) );
264293
seenIds.add( id );
294+
if ( seenIds.size() >= maxResults ) {
295+
break;
296+
}
265297
}
266298
return WrappedIterator.create( resources.iterator() );
267299
} catch ( ParseException e ) {

src/ubic/basecode/ontology/jena/SearchIndex.java

Lines changed: 6 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -1,66 +1,39 @@
11
package ubic.basecode.ontology.jena;
22

3-
import com.hp.hpl.jena.ontology.Individual;
4-
import com.hp.hpl.jena.ontology.OntClass;
53
import com.hp.hpl.jena.ontology.OntModel;
6-
import com.hp.hpl.jena.rdf.model.RDFNode;
4+
import com.hp.hpl.jena.rdf.model.Resource;
75
import com.hp.hpl.jena.util.iterator.ExtendedIterator;
86
import ubic.basecode.ontology.search.OntologySearchException;
97

10-
import java.util.Objects;
11-
12-
import static ubic.basecode.ontology.jena.JenaUtils.where;
13-
148
interface SearchIndex extends AutoCloseable {
159

1610
/**
1711
* Find RDF nodes matching the given query string.
1812
*/
19-
ExtendedIterator<JenaSearchResult> search( OntModel model, String queryString ) throws OntologySearchException;
13+
ExtendedIterator<JenaSearchResult> search( OntModel model, String queryString, int maxResults ) throws OntologySearchException;
2014

2115
/**
2216
* Find classes that match the query string.
2317
*
2418
* @param model that goes with the index
2519
* @return Collection of OntologyTerm objects
2620
*/
27-
default ExtendedIterator<JenaSearchResult> searchClasses( OntModel model, String queryString ) throws OntologySearchException {
28-
return search( model, queryString )
29-
.filterKeep( where( r -> r.result.isURIResource() && r.result.canAs( OntClass.class ) ) )
30-
.filterKeep( where( Objects::nonNull ) );
31-
}
21+
ExtendedIterator<JenaSearchResult> searchClasses( OntModel model, String queryString, int maxResults ) throws OntologySearchException;
3222

3323
/**
3424
* Find individuals that match the query string
3525
*
3626
* @param model that goes with the index
3727
* @return Collection of OntologyTerm objects
3828
*/
39-
default ExtendedIterator<JenaSearchResult> searchIndividuals( OntModel model, String queryString ) throws OntologySearchException {
40-
return search( model, queryString )
41-
.filterKeep( where( r -> r.result.isURIResource() && r.result.canAs( Individual.class ) ) )
42-
.filterKeep( where( Objects::nonNull ) );
43-
}
44-
45-
/**
46-
* Find OntologyIndividuals and OntologyTerms that match the query string. Search with a wildcard is attempted
47-
* whenever possible.
48-
*
49-
* @param model that goes with the index
50-
* @return Collection of OntologyResource objects
51-
*/
52-
default ExtendedIterator<JenaSearchResult> searchResources( OntModel model, String queryString ) throws OntologySearchException {
53-
return search( model, queryString )
54-
.filterKeep( where( o -> o.result.isURIResource() && o.result.isResource() ) )
55-
.filterKeep( where( Objects::nonNull ) );
56-
}
29+
ExtendedIterator<JenaSearchResult> searchIndividuals( OntModel model, String queryString, int maxResults ) throws OntologySearchException;
5730

5831
class JenaSearchResult {
5932

60-
public final RDFNode result;
33+
public final Resource result;
6134
public final double score;
6235

63-
JenaSearchResult( RDFNode result, double score ) {
36+
JenaSearchResult( Resource result, double score ) {
6437
this.result = result;
6538
this.score = score;
6639
}

src/ubic/basecode/ontology/providers/OntologyService.java

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -183,8 +183,8 @@ enum InferenceMode {
183183
* <p>
184184
* Obsolete terms are filtered out.
185185
*/
186-
default Collection<OntologySearchResult<OntologyIndividual>> findIndividuals( String search ) throws OntologySearchException {
187-
return findIndividuals( search, false );
186+
default Collection<OntologySearchResult<OntologyIndividual>> findIndividuals( String search, int maxResults ) throws OntologySearchException {
187+
return findIndividuals( search, maxResults, false );
188188
}
189189

190190
/**
@@ -193,7 +193,7 @@ default Collection<OntologySearchResult<OntologyIndividual>> findIndividuals( St
193193
* @param search search query
194194
* @param keepObsoletes retain obsolete terms
195195
*/
196-
Set<OntologySearchResult<OntologyIndividual>> findIndividuals( String search, boolean keepObsoletes ) throws OntologySearchException;
196+
Set<OntologySearchResult<OntologyIndividual>> findIndividuals( String search, int maxResults, boolean keepObsoletes ) throws OntologySearchException;
197197

198198
/**
199199
* Looks for any resources (terms or individuals) that match the given search string
@@ -203,8 +203,8 @@ default Collection<OntologySearchResult<OntologyIndividual>> findIndividuals( St
203203
* @return results, or an empty collection if the results are empty OR the ontology is not available to be
204204
* searched.
205205
*/
206-
default Collection<OntologySearchResult<OntologyResource>> findResources( String searchString ) throws OntologySearchException {
207-
return findResources( searchString, false );
206+
default Collection<OntologySearchResult<OntologyResource>> findResources( String searchString, int maxResults ) throws OntologySearchException {
207+
return findResources( searchString, maxResults, false );
208208
}
209209

210210
/**
@@ -213,15 +213,15 @@ default Collection<OntologySearchResult<OntologyResource>> findResources( String
213213
* @param search search query
214214
* @param keepObsoletes retain obsolete terms
215215
*/
216-
Collection<OntologySearchResult<OntologyResource>> findResources( String search, boolean keepObsoletes ) throws OntologySearchException;
216+
Collection<OntologySearchResult<OntologyResource>> findResources( String search, int maxResults, boolean keepObsoletes ) throws OntologySearchException;
217217

218218
/**
219219
* Looks for any terms that match the given search string.
220220
* <p>
221221
* Obsolete terms are filtered out.
222222
*/
223-
default Collection<OntologySearchResult<OntologyTerm>> findTerm( String search ) throws OntologySearchException {
224-
return findTerm( search, false );
223+
default Collection<OntologySearchResult<OntologyTerm>> findTerm( String search, int maxResults ) throws OntologySearchException {
224+
return findTerm( search, maxResults, false );
225225
}
226226

227227

@@ -231,7 +231,7 @@ default Collection<OntologySearchResult<OntologyTerm>> findTerm( String search )
231231
* @param search search query
232232
* @param keepObsoletes retain obsolete terms
233233
*/
234-
Collection<OntologySearchResult<OntologyTerm>> findTerm( String search, boolean keepObsoletes ) throws OntologySearchException;
234+
Collection<OntologySearchResult<OntologyTerm>> findTerm( String search, int maxResults, boolean keepObsoletes ) throws OntologySearchException;
235235

236236
/**
237237
* Find a term using an alternative ID.

test/ubic/basecode/ontology/jena/OntologyLoaderTest.java

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -63,7 +63,7 @@ public void testCacheOntologyToDisk() throws Exception {
6363
assertFalse( OntologyLoader.getTmpDiskCachePath( name ).exists() );
6464
assertFalse( OntologyLoader.getOldDiskCachePath( name ).exists() );
6565

66-
Collection<OntologySearchResult<OntologyTerm>> r = s.findTerm( "Mouse" );
66+
Collection<OntologySearchResult<OntologyTerm>> r = s.findTerm( "Mouse", 500 );
6767
assertFalse( r.isEmpty() );
6868

6969
// Recreate OntologyService using this cache file
@@ -74,7 +74,7 @@ public void testCacheOntologyToDisk() throws Exception {
7474
assertFalse( OntologyLoader.getTmpDiskCachePath( name ).exists() );
7575
assertFalse( OntologyLoader.getOldDiskCachePath( name ).exists() );
7676

77-
r = s.findTerm( "Mouse" );
77+
r = s.findTerm( "Mouse", 500 );
7878
assertFalse( r.isEmpty() );
7979

8080
// Recreate OntologyService with bad URL and no cache

0 commit comments

Comments
 (0)