Skip to content

Commit bb271e2

Browse files
committed
Let Lucene parse the search query with the default settings
1 parent fbb3877 commit bb271e2

3 files changed

Lines changed: 8 additions & 80 deletions

File tree

src/ubic/basecode/ontology/jena/JenaUtils.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -221,7 +221,7 @@ public static <T> Filter<T> where( Predicate<T> predicate ) {
221221
return new PredicateFilter<>( predicate );
222222
}
223223

224-
public static <T extends RDFNode> Optional<T> as( Resource resource, Class<T> clazz ) {
224+
public static <T extends RDFNode> Optional<T> as( RDFNode resource, Class<T> clazz ) {
225225
if ( !resource.canAs( clazz ) ) {
226226
return Optional.empty();
227227
}

src/ubic/basecode/ontology/jena/OntologySearch.java

Lines changed: 7 additions & 54 deletions
Original file line numberDiff line numberDiff line change
@@ -30,16 +30,12 @@
3030
import com.hp.hpl.jena.util.iterator.Map1Iterator;
3131
import org.apache.commons.lang3.StringUtils;
3232
import org.apache.commons.lang3.time.StopWatch;
33-
import org.apache.lucene.queryParser.QueryParser.Operator;
3433
import org.slf4j.Logger;
3534
import org.slf4j.LoggerFactory;
3635
import ubic.basecode.ontology.search.OntologySearchException;
3736

38-
import java.util.ArrayList;
39-
import java.util.List;
4037
import java.util.Objects;
41-
import java.util.regex.Matcher;
42-
import java.util.regex.Pattern;
38+
import java.util.Optional;
4339

4440
import static com.hp.hpl.jena.sparql.util.ModelUtils.convertGraphNodeToRDFNode;
4541
import static ubic.basecode.ontology.jena.JenaUtils.where;
@@ -73,7 +69,7 @@ public static ExtendedIterator<SearchResult<OntClass>> matchClasses( OntModel mo
7369
* @return Collection of OntologyTerm objects
7470
*/
7571
public static ExtendedIterator<SearchResult<Individual>> matchIndividuals( OntModel model, SearchIndex index, String queryString ) throws OntologySearchException {
76-
return runSearchWithWildcard( model, index, queryString )
72+
return runSearch( model, index, queryString )
7773
.filterKeep( where( r -> r.result.isURIResource() && r.result.canAs( Individual.class ) ) )
7874
.mapWith( r -> r.as( Individual.class ) )
7975
.filterKeep( where( Objects::nonNull ) );
@@ -88,68 +84,25 @@ public static ExtendedIterator<SearchResult<Individual>> matchIndividuals( OntMo
8884
* @return Collection of OntologyResource objects
8985
*/
9086
public static ExtendedIterator<SearchResult<Resource>> matchResources( OntModel model, SearchIndex index, String queryString ) throws OntologySearchException {
91-
return runSearchWithWildcard( model, index, queryString )
87+
return runSearch( model, index, queryString )
9288
.filterKeep( where( o -> o.result.isURIResource() && o.result.isResource() ) )
9389
.mapWith( r -> r.as( Resource.class ) )
9490
.filterKeep( where( Objects::nonNull ) );
9591
}
9692

97-
private static ExtendedIterator<SearchResult<RDFNode>> runSearchWithWildcard( Model model, SearchIndex index, String queryString ) throws OntologySearchException {
98-
ExtendedIterator<SearchResult<RDFNode>> iterator;
99-
100-
queryString = queryString.trim();
101-
102-
// Add wildcard only if the last word is longer than one character. This is to prevent lucene from
103-
// blowing up. See bug#1145
104-
String[] words = queryString.split( "\\s+" );
105-
int lastWordLength = words[words.length - 1].length();
106-
if ( lastWordLength > 1 ) {
107-
try { // Use wildcard search.
108-
iterator = runSearch( model, index, queryString + "*" );
109-
} catch ( OntologySearchJenaException e ) { // retry without wildcard
110-
// retry without wildcard
111-
log.warn( "Failed to search in {}. Retrying search without wildcard.", model, e );
112-
try {
113-
iterator = runSearch( model, index, queryString );
114-
} catch ( OntologySearchJenaException e1 ) {
115-
throw new RetryWithoutWildcardFailedException( "Failed to search while retrying without wildcard.", queryString, e.getCause(), e1.getCause() );
116-
}
117-
}
118-
} else {
119-
iterator = runSearch( model, index, queryString );
120-
}
121-
122-
return iterator;
123-
}
124-
12593
private static ExtendedIterator<SearchResult<RDFNode>> runSearch( Model model, SearchIndex index, String queryString ) throws OntologySearchJenaException {
126-
String strippedQuery = StringUtils.strip( queryString );
127-
128-
if ( StringUtils.isBlank( strippedQuery ) ) {
94+
if ( StringUtils.isBlank( queryString ) ) {
12995
throw new IllegalArgumentException( "Query cannot be blank" );
13096
}
131-
132-
String query = queryString.replaceAll( " AND ", " " );
133-
List<String> list = new ArrayList<>();
134-
Matcher m = Pattern.compile( "([^\"]\\S*|\".+?\")\\s*" ).matcher( query );
135-
while ( m.find() ) {
136-
list.add( m.group( 1 ) );
137-
}
138-
String enhancedQuery = StringUtils.join( list, " " + Operator.AND + " " );
139-
140-
// Note: LARQ does not allow you to change the default operator without making it non-thread-safe.
141-
index.getLuceneQueryParser().setDefaultOperator( Operator.AND );
142-
14397
StopWatch timer = StopWatch.createStarted();
14498
try {
145-
return new Map1Iterator<>( o -> new SearchResult<>( o.getLuceneDocId(), convertGraphNodeToRDFNode( o.getNode(), model ), o.getScore() ),
146-
index.search( enhancedQuery ) );
99+
return new Map1Iterator<>( o -> new SearchResult<>( o.getLuceneDocId(), convertGraphNodeToRDFNode( o.getNode(), model ), o.getScore() ), index.search( queryString ) );
147100
} catch ( JenaException e ) {
148-
throw new OntologySearchJenaException( "Failed to search with enhanced query.", enhancedQuery, e );
101+
throw new OntologySearchJenaException( "Failed to search with query.", queryString, e );
149102
} finally {
150103
timer.stop();
151104
if ( timer.getTime() > 100 ) {
152-
log.warn( "Ontology resource search for: {} (parsed to: {}) took {} ms.", queryString, enhancedQuery, timer.getTime() );
105+
log.warn( "Ontology resource search for: {} took {} ms.", queryString, timer.getTime() );
153106
}
154107
}
155108
}

src/ubic/basecode/ontology/jena/RetryWithoutWildcardFailedException.java

Lines changed: 0 additions & 25 deletions
This file was deleted.

0 commit comments

Comments
 (0)