Skip to content

Commit a28e91f

Browse files
committed
Include full-text scores in search results
1 parent ddbad5d commit a28e91f

10 files changed

Lines changed: 211 additions & 130 deletions

File tree

src/ubic/basecode/ontology/jena/AbstractOntologyResource.java

Lines changed: 22 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,9 +24,12 @@
2424
import org.slf4j.LoggerFactory;
2525
import ubic.basecode.ontology.model.OntologyResource;
2626

27+
import javax.annotation.Nullable;
2728
import java.util.Comparator;
2829
import java.util.Objects;
2930

31+
import static java.util.Comparator.*;
32+
3033
/**
3134
* @author pavlidis
3235
*/
@@ -36,10 +39,22 @@ public abstract class AbstractOntologyResource implements OntologyResource {
3639

3740
private static final long serialVersionUID = 1L;
3841

42+
private static final Comparator<OntologyResource> comparator = Comparator
43+
.comparing( OntologyResource::getScore, nullsLast( reverseOrder() ) )
44+
.thenComparing( OntologyResource::getUri, nullsLast( naturalOrder() ) );
45+
3946
private transient final OntResource res;
47+
@Nullable
48+
private final Double score;
4049

4150
protected AbstractOntologyResource( OntResource resource ) {
4251
this.res = resource;
52+
this.score = null;
53+
}
54+
55+
public AbstractOntologyResource( OntResource resource, double score ) {
56+
this.res = resource;
57+
this.score = score;
4358
}
4459

4560
@Override
@@ -66,9 +81,15 @@ public boolean isObsolete() {
6681
return res.hasLiteral( OWL2.deprecated, true );
6782
}
6883

84+
@Override
85+
@Nullable
86+
public Double getScore() {
87+
return score;
88+
}
89+
6990
@Override
7091
public int compareTo( OntologyResource other ) {
71-
return Objects.compare( getUri(), other.getUri(), Comparator.nullsLast( Comparator.naturalOrder() ) );
92+
return Objects.compare( this, other, comparator );
7293
}
7394

7495
@Override

src/ubic/basecode/ontology/jena/AbstractOntologyService.java

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -224,7 +224,7 @@ public Collection<OntologyIndividual> findIndividuals( String search, boolean ke
224224
return Collections.emptySet();
225225
}
226226
return OntologySearch.matchIndividuals( model, index, search )
227-
.mapWith( i -> ( OntologyIndividual ) new OntologyIndividualImpl( i, additionalRestrictions ) )
227+
.mapWith( i -> ( OntologyIndividual ) new OntologyIndividualImpl( i.result, additionalRestrictions, i.score ) )
228228
.filterKeep( where( ontologyTerm -> keepObsoletes || !ontologyTerm.isObsolete() ) )
229229
.toSet();
230230
} finally {
@@ -246,13 +246,13 @@ public Collection<OntologyResource> findResources( String searchString, boolean
246246
return Collections.emptySet();
247247
}
248248
return OntologySearch.matchResources( model, index, searchString )
249-
.filterKeep( where( r -> r.canAs( OntClass.class ) || r.canAs( Individual.class ) ) )
249+
.filterKeep( where( r -> r.result.canAs( OntClass.class ) || r.result.canAs( Individual.class ) ) )
250250
.mapWith( r -> {
251251
OntologyResource res;
252-
if ( r.canAs( OntClass.class ) ) {
253-
res = new OntologyTermImpl( r.as( OntClass.class ), additionalRestrictions );
252+
if ( r.result.canAs( OntClass.class ) ) {
253+
res = new OntologyTermImpl( r.result.as( OntClass.class ), additionalRestrictions, r.score );
254254
} else {
255-
res = new OntologyIndividualImpl( r.as( Individual.class ), additionalRestrictions );
255+
res = new OntologyIndividualImpl( r.result.as( Individual.class ), additionalRestrictions, r.score );
256256
}
257257
return res;
258258
} )
@@ -278,7 +278,7 @@ public Collection<OntologyTerm> findTerm( String search, boolean keepObsoletes )
278278
return Collections.emptySet();
279279
}
280280
return OntologySearch.matchClasses( model, index, search )
281-
.mapWith( r -> ( OntologyTerm ) new OntologyTermImpl( r, additionalRestrictions ) )
281+
.mapWith( r -> ( OntologyTerm ) new OntologyTermImpl( r.result, additionalRestrictions, r.score ) )
282282
.filterKeep( where( ontologyTerm -> keepObsoletes || !ontologyTerm.isObsolete() ) )
283283
.toSet();
284284
} finally {

src/ubic/basecode/ontology/jena/OntologyIndividualImpl.java

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,12 @@ public OntologyIndividualImpl( Individual ind, Set<Restriction> additionalRestri
4646
this.additionalRestrictions = additionalRestrictions;
4747
}
4848

49+
public OntologyIndividualImpl( Individual ind, Set<Restriction> additionalRestrictions, double score ) {
50+
super( ind, score );
51+
this.ind = ind;
52+
this.additionalRestrictions = additionalRestrictions;
53+
}
54+
4955
@Override
5056
public OntologyTerm getInstanceOf() {
5157
Resource type = ind.getRDFType();

src/ubic/basecode/ontology/jena/OntologyTermImpl.java

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@
2525
import ubic.basecode.ontology.model.OntologyRestriction;
2626
import ubic.basecode.ontology.model.OntologyTerm;
2727

28+
import javax.annotation.Nullable;
2829
import java.util.*;
2930
import java.util.stream.Collectors;
3031

@@ -49,12 +50,18 @@ public class OntologyTermImpl extends AbstractOntologyResource implements Ontolo
4950
*/
5051
private final transient Set<Restriction> additionalRestrictions;
5152

52-
public OntologyTermImpl( OntClass resource, Set<Restriction> additionalRestrictions ) {
53+
public OntologyTermImpl( OntClass resource, @Nullable Set<Restriction> additionalRestrictions ) {
5354
super( resource );
5455
this.ontResource = resource;
5556
this.additionalRestrictions = additionalRestrictions;
5657
}
5758

59+
public OntologyTermImpl( OntClass resource, Set<Restriction> additionalRestrictions, double score ) {
60+
super( resource, score );
61+
this.ontResource = resource;
62+
this.additionalRestrictions = additionalRestrictions;
63+
}
64+
5865
@Override
5966
public Collection<String> getAlternativeIds() {
6067
Collection<String> results = new HashSet<>();

src/ubic/basecode/ontology/jena/search/OntologySearch.java

Lines changed: 65 additions & 50 deletions
Original file line numberDiff line numberDiff line change
@@ -21,11 +21,12 @@
2121
import com.hp.hpl.jena.ontology.Individual;
2222
import com.hp.hpl.jena.ontology.OntClass;
2323
import com.hp.hpl.jena.ontology.OntModel;
24-
import com.hp.hpl.jena.rdf.model.NodeIterator;
24+
import com.hp.hpl.jena.rdf.model.Model;
2525
import com.hp.hpl.jena.rdf.model.RDFNode;
2626
import com.hp.hpl.jena.rdf.model.Resource;
2727
import com.hp.hpl.jena.shared.JenaException;
2828
import com.hp.hpl.jena.util.iterator.ExtendedIterator;
29+
import com.hp.hpl.jena.util.iterator.Map1Iterator;
2930
import org.apache.commons.lang3.StringUtils;
3031
import org.apache.commons.lang3.time.StopWatch;
3132
import org.apache.lucene.queryParser.QueryParser.Operator;
@@ -35,9 +36,11 @@
3536

3637
import java.util.ArrayList;
3738
import java.util.List;
39+
import java.util.Objects;
3840
import java.util.regex.Matcher;
3941
import java.util.regex.Pattern;
4042

43+
import static com.hp.hpl.jena.sparql.util.ModelUtils.convertGraphNodeToRDFNode;
4144
import static ubic.basecode.ontology.jena.JenaUtils.where;
4245

4346
/**
@@ -50,65 +53,45 @@ public class OntologySearch {
5053
/**
5154
* Find classes that match the query string.
5255
*
53-
* @param model that goes with the index
54-
* @param index to search
56+
* @param model that goes with the index
57+
* @param index to search
5558
* @return Collection of OntologyTerm objects
5659
*/
57-
public static ExtendedIterator<OntClass> matchClasses( OntModel model, SearchIndex index, String queryString ) throws OntologySearchException {
58-
NodeIterator iterator = runSearch( index, queryString );
59-
return iterator
60-
.mapWith( r -> r.inModel( model ) )
61-
.filterKeep( where( r -> r.isURIResource() && r.canAs( OntClass.class ) ) )
60+
public static ExtendedIterator<SearchResult<OntClass>> matchClasses( OntModel model, SearchIndex index, String queryString ) throws OntologySearchException {
61+
return runSearch( model, index, queryString )
62+
.filterKeep( where( r -> r.result.isURIResource() && r.result.canAs( OntClass.class ) ) )
6263
.mapWith( r -> r.as( OntClass.class ) );
6364
}
6465

6566
/**
6667
* Find individuals that match the query string
6768
*
68-
* @param model that goes with the index
69-
* @param index to search
69+
* @param model that goes with the index
70+
* @param index to search
7071
* @return Collection of OntologyTerm objects
7172
*/
72-
public static ExtendedIterator<Individual> matchIndividuals( OntModel model, SearchIndex index, String queryString ) throws OntologySearchException {
73-
NodeIterator iterator;
74-
75-
queryString = queryString.trim();
76-
77-
// Add wildcard only if the last word is longer than one character. This is to prevent lucene from
78-
// blowing up. See bug#1145
79-
String[] words = queryString.split( "\\s+" );
80-
int lastWordLength = words[words.length - 1].length();
81-
if ( lastWordLength > 1 ) {
82-
try { // Use wildcard search.
83-
iterator = runSearch( index, queryString + "*" );
84-
} catch ( OntologySearchJenaException e ) { // retry without wildcard
85-
log.warn( "Failed to perform search with wildcard. Retrying search without wildcard.", e );
86-
try {
87-
iterator = runSearch( index, queryString );
88-
} catch ( OntologySearchJenaException e1 ) {
89-
throw new RetryWithoutWildcardFailedException( "Failed to search while retrying without wildcard.", queryString, e.getCause(), e1.getCause() );
90-
}
91-
}
92-
} else {
93-
iterator = runSearch( index, queryString );
94-
}
95-
96-
return iterator
97-
.mapWith( r -> r.inModel( model ) )
98-
.filterKeep( where( r -> r.isURIResource() && r.canAs( Individual.class ) ) )
73+
public static ExtendedIterator<SearchResult<Individual>> matchIndividuals( OntModel model, SearchIndex index, String queryString ) throws OntologySearchException {
74+
return runSearchWithWildcard( model, index, queryString )
75+
.filterKeep( where( r -> r.result.isURIResource() && r.result.canAs( Individual.class ) ) )
9976
.mapWith( r -> r.as( Individual.class ) );
10077
}
10178

10279
/**
10380
* Find OntologyIndividuals and OntologyTerms that match the query string. Search with a wildcard is attempted
10481
* whenever possible.
10582
*
106-
* @param model that goes with the index
107-
* @param index to search
83+
* @param model that goes with the index
84+
* @param index to search
10885
* @return Collection of OntologyResource objects
10986
*/
110-
public static ExtendedIterator<Resource> matchResources( OntModel model, SearchIndex index, String queryString ) throws OntologySearchException {
111-
NodeIterator iterator;
87+
public static ExtendedIterator<SearchResult<Resource>> matchResources( OntModel model, SearchIndex index, String queryString ) throws OntologySearchException {
88+
return runSearchWithWildcard( model, index, queryString )
89+
.filterKeep( where( o -> o.result.isURIResource() && o.result.isResource() ) )
90+
.mapWith( r -> r.as( Resource.class ) );
91+
}
92+
93+
private static ExtendedIterator<SearchResult<RDFNode>> runSearchWithWildcard( Model model, SearchIndex index, String queryString ) throws OntologySearchException {
94+
ExtendedIterator<SearchResult<RDFNode>> iterator;
11295

11396
queryString = queryString.trim();
11497

@@ -118,27 +101,24 @@ public static ExtendedIterator<Resource> matchResources( OntModel model, SearchI
118101
int lastWordLength = words[words.length - 1].length();
119102
if ( lastWordLength > 1 ) {
120103
try { // Use wildcard search.
121-
iterator = runSearch( index, queryString + "*" );
104+
iterator = runSearch( model, index, queryString + "*" );
122105
} catch ( OntologySearchJenaException e ) { // retry without wildcard
123106
// retry without wildcard
124107
log.warn( "Failed to search in {}. Retrying search without wildcard.", model, e );
125108
try {
126-
iterator = runSearch( index, queryString );
109+
iterator = runSearch( model, index, queryString );
127110
} catch ( OntologySearchJenaException e1 ) {
128111
throw new RetryWithoutWildcardFailedException( "Failed to search while retrying without wildcard.", queryString, e.getCause(), e1.getCause() );
129112
}
130113
}
131114
} else {
132-
iterator = runSearch( index, queryString );
115+
iterator = runSearch( model, index, queryString );
133116
}
134117

135-
return iterator
136-
.mapWith( r -> r.inModel( model ) )
137-
.filterKeep( where( o -> o.isURIResource() && o.isResource() ) )
138-
.mapWith( RDFNode::asResource );
118+
return iterator;
139119
}
140120

141-
private static NodeIterator runSearch( SearchIndex index, String queryString ) throws OntologySearchJenaException {
121+
private static ExtendedIterator<SearchResult<RDFNode>> runSearch( Model model, SearchIndex index, String queryString ) throws OntologySearchJenaException {
142122
String strippedQuery = StringUtils.strip( queryString );
143123

144124
if ( StringUtils.isBlank( strippedQuery ) ) {
@@ -158,7 +138,8 @@ private static NodeIterator runSearch( SearchIndex index, String queryString ) t
158138

159139
StopWatch timer = StopWatch.createStarted();
160140
try {
161-
return index.searchModelByIndex( enhancedQuery );
141+
return new Map1Iterator<>( o -> new SearchResult<>( o.getLuceneDocId(), convertGraphNodeToRDFNode( o.getNode(), model ), o.getScore() ),
142+
index.search( enhancedQuery ) );
162143
} catch ( JenaException e ) {
163144
throw new OntologySearchJenaException( "Failed to search with enhanced query.", enhancedQuery, e );
164145
} finally {
@@ -168,4 +149,38 @@ private static NodeIterator runSearch( SearchIndex index, String queryString ) t
168149
}
169150
}
170151
}
152+
153+
public static class SearchResult<T extends RDFNode> {
154+
public final int docId;
155+
public final T result;
156+
public final double score;
157+
158+
private SearchResult( int docId, T result, double score ) {
159+
this.docId = docId;
160+
this.result = result;
161+
this.score = score;
162+
}
163+
164+
@Override
165+
public boolean equals( Object obj ) {
166+
if ( obj instanceof SearchResult ) {
167+
return Objects.equals( result, ( ( SearchResult<?> ) obj ).result );
168+
}
169+
return false;
170+
}
171+
172+
@Override
173+
public int hashCode() {
174+
return Objects.hash( result );
175+
}
176+
177+
@Override
178+
public String toString() {
179+
return String.format( "%s [docId = %d, score = %f]", result, docId, score );
180+
}
181+
182+
private <U extends Resource> SearchResult<U> as( Class<U> clazz ) {
183+
return new SearchResult<>( docId, result.as( clazz ), score );
184+
}
185+
}
171186
}

src/ubic/basecode/ontology/jena/vocabulary/OBO.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,5 +21,5 @@ private static Property property( String name ) {
2121
public static final Property hasRelatedSynonym = property( "hasRelatedSynonym" );
2222
public static final Property alternativeLabel = ResourceFactory.createProperty( "http://purl.obolibrary.org/obo/IAO_0000118" );
2323
public static final Resource ObsoleteClass = ResourceFactory.createResource( "http://www.geneontology.org/formats/oboInOwl#ObsoleteClass" );
24-
public static final Property ObsoleteProperty = ResourceFactory.createProperty( "http://www.geneontology.org/formats/oboInOwl#ObsoleteProperty" );
24+
public static final Property ObsoleteProperty = property( "ObsoleteProperty" );
2525
}

src/ubic/basecode/ontology/model/OntologyResource.java

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818
*/
1919
package ubic.basecode.ontology.model;
2020

21+
import javax.annotation.Nullable;
2122
import java.io.Serializable;
2223

2324
/**
@@ -39,4 +40,10 @@ public interface OntologyResource extends Serializable, Comparable<OntologyResou
3940
* Whether the resource is marked as obsolete.
4041
*/
4142
boolean isObsolete();
43+
44+
/**
45+
* If this is result from a free-text search, a corresponding score, otherwise null.
46+
*/
47+
@Nullable
48+
Double getScore();
4249
}

src/ubic/basecode/ontology/model/OntologyTermSimple.java

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
*/
1515
package ubic.basecode.ontology.model;
1616

17+
import javax.annotation.Nullable;
1718
import java.util.Collection;
1819
import java.util.Comparator;
1920
import java.util.Objects;
@@ -123,6 +124,12 @@ public boolean isObsolete() {
123124
return obsolete;
124125
}
125126

127+
@Nullable
128+
@Override
129+
public Double getScore() {
130+
return null;
131+
}
132+
126133
@Override
127134
public int compareTo( OntologyResource other ) {
128135
return Objects.compare( getUri(), other.getUri(), Comparator.nullsLast( Comparator.naturalOrder() ) );

test/ubic/basecode/ontology/OntologyTermTest.java

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -308,5 +308,8 @@ public void testGetChildrenFromMultipleTermsWithSearch() throws OntologySearchEx
308308
public void testFindTerm() throws OntologySearchException {
309309
assertEquals( 123, uberon.findTerm( "brain" ).size() );
310310
assertEquals( 128, uberon.findTerm( "brain", true ).size() );
311+
OntologyTerm firstResult = uberon.findTerm( "brain" ).iterator().next();
312+
assertNotNull( firstResult.getScore() );
313+
assertEquals( 2.8577, firstResult.getScore(), 0.0001 );
311314
}
312315
}

0 commit comments

Comments
 (0)