2121import com .hp .hpl .jena .ontology .Individual ;
2222import com .hp .hpl .jena .ontology .OntClass ;
2323import com .hp .hpl .jena .ontology .OntModel ;
24- import com .hp .hpl .jena .rdf .model .NodeIterator ;
24+ import com .hp .hpl .jena .rdf .model .Model ;
2525import com .hp .hpl .jena .rdf .model .RDFNode ;
2626import com .hp .hpl .jena .rdf .model .Resource ;
2727import com .hp .hpl .jena .shared .JenaException ;
2828import com .hp .hpl .jena .util .iterator .ExtendedIterator ;
29+ import com .hp .hpl .jena .util .iterator .Map1Iterator ;
2930import org .apache .commons .lang3 .StringUtils ;
3031import org .apache .commons .lang3 .time .StopWatch ;
3132import org .apache .lucene .queryParser .QueryParser .Operator ;
3536
3637import java .util .ArrayList ;
3738import java .util .List ;
39+ import java .util .Objects ;
3840import java .util .regex .Matcher ;
3941import java .util .regex .Pattern ;
4042
43+ import static com .hp .hpl .jena .sparql .util .ModelUtils .convertGraphNodeToRDFNode ;
4144import static ubic .basecode .ontology .jena .JenaUtils .where ;
4245
4346/**
@@ -50,65 +53,45 @@ public class OntologySearch {
5053 /**
5154 * Find classes that match the query string.
5255 *
53- * @param model that goes with the index
54- * @param index to search
56+ * @param model that goes with the index
57+ * @param index to search
5558 * @return Collection of OntologyTerm objects
5659 */
57- public static ExtendedIterator <OntClass > matchClasses ( OntModel model , SearchIndex index , String queryString ) throws OntologySearchException {
58- NodeIterator iterator = runSearch ( index , queryString );
59- return iterator
60- .mapWith ( r -> r .inModel ( model ) )
61- .filterKeep ( where ( r -> r .isURIResource () && r .canAs ( OntClass .class ) ) )
60+ public static ExtendedIterator <SearchResult <OntClass >> matchClasses ( OntModel model , SearchIndex index , String queryString ) throws OntologySearchException {
61+ return runSearch ( model , index , queryString )
62+ .filterKeep ( where ( r -> r .result .isURIResource () && r .result .canAs ( OntClass .class ) ) )
6263 .mapWith ( r -> r .as ( OntClass .class ) );
6364 }
6465
6566 /**
6667 * Find individuals that match the query string
6768 *
68- * @param model that goes with the index
69- * @param index to search
69+ * @param model that goes with the index
70+ * @param index to search
7071 * @return Collection of OntologyTerm objects
7172 */
72- public static ExtendedIterator <Individual > matchIndividuals ( OntModel model , SearchIndex index , String queryString ) throws OntologySearchException {
73- NodeIterator iterator ;
74-
75- queryString = queryString .trim ();
76-
77- // Add wildcard only if the last word is longer than one character. This is to prevent lucene from
78- // blowing up. See bug#1145
79- String [] words = queryString .split ( "\\ s+" );
80- int lastWordLength = words [words .length - 1 ].length ();
81- if ( lastWordLength > 1 ) {
82- try { // Use wildcard search.
83- iterator = runSearch ( index , queryString + "*" );
84- } catch ( OntologySearchJenaException e ) { // retry without wildcard
85- log .warn ( "Failed to perform search with wildcard. Retrying search without wildcard." , e );
86- try {
87- iterator = runSearch ( index , queryString );
88- } catch ( OntologySearchJenaException e1 ) {
89- throw new RetryWithoutWildcardFailedException ( "Failed to search while retrying without wildcard." , queryString , e .getCause (), e1 .getCause () );
90- }
91- }
92- } else {
93- iterator = runSearch ( index , queryString );
94- }
95-
96- return iterator
97- .mapWith ( r -> r .inModel ( model ) )
98- .filterKeep ( where ( r -> r .isURIResource () && r .canAs ( Individual .class ) ) )
73+ public static ExtendedIterator <SearchResult <Individual >> matchIndividuals ( OntModel model , SearchIndex index , String queryString ) throws OntologySearchException {
74+ return runSearchWithWildcard ( model , index , queryString )
75+ .filterKeep ( where ( r -> r .result .isURIResource () && r .result .canAs ( Individual .class ) ) )
9976 .mapWith ( r -> r .as ( Individual .class ) );
10077 }
10178
10279 /**
10380 * Find OntologyIndividuals and OntologyTerms that match the query string. Search with a wildcard is attempted
10481 * whenever possible.
10582 *
106- * @param model that goes with the index
107- * @param index to search
83+ * @param model that goes with the index
84+ * @param index to search
10885 * @return Collection of OntologyResource objects
10986 */
110- public static ExtendedIterator <Resource > matchResources ( OntModel model , SearchIndex index , String queryString ) throws OntologySearchException {
111- NodeIterator iterator ;
87+ public static ExtendedIterator <SearchResult <Resource >> matchResources ( OntModel model , SearchIndex index , String queryString ) throws OntologySearchException {
88+ return runSearchWithWildcard ( model , index , queryString )
89+ .filterKeep ( where ( o -> o .result .isURIResource () && o .result .isResource () ) )
90+ .mapWith ( r -> r .as ( Resource .class ) );
91+ }
92+
93+ private static ExtendedIterator <SearchResult <RDFNode >> runSearchWithWildcard ( Model model , SearchIndex index , String queryString ) throws OntologySearchException {
94+ ExtendedIterator <SearchResult <RDFNode >> iterator ;
11295
11396 queryString = queryString .trim ();
11497
@@ -118,27 +101,24 @@ public static ExtendedIterator<Resource> matchResources( OntModel model, SearchI
118101 int lastWordLength = words [words .length - 1 ].length ();
119102 if ( lastWordLength > 1 ) {
120103 try { // Use wildcard search.
121- iterator = runSearch ( index , queryString + "*" );
104+ iterator = runSearch ( model , index , queryString + "*" );
122105 } catch ( OntologySearchJenaException e ) { // retry without wildcard
123106 // retry without wildcard
124107 log .warn ( "Failed to search in {}. Retrying search without wildcard." , model , e );
125108 try {
126- iterator = runSearch ( index , queryString );
109+ iterator = runSearch ( model , index , queryString );
127110 } catch ( OntologySearchJenaException e1 ) {
128111 throw new RetryWithoutWildcardFailedException ( "Failed to search while retrying without wildcard." , queryString , e .getCause (), e1 .getCause () );
129112 }
130113 }
131114 } else {
132- iterator = runSearch ( index , queryString );
115+ iterator = runSearch ( model , index , queryString );
133116 }
134117
135- return iterator
136- .mapWith ( r -> r .inModel ( model ) )
137- .filterKeep ( where ( o -> o .isURIResource () && o .isResource () ) )
138- .mapWith ( RDFNode ::asResource );
118+ return iterator ;
139119 }
140120
141- private static NodeIterator runSearch ( SearchIndex index , String queryString ) throws OntologySearchJenaException {
121+ private static ExtendedIterator < SearchResult < RDFNode >> runSearch ( Model model , SearchIndex index , String queryString ) throws OntologySearchJenaException {
142122 String strippedQuery = StringUtils .strip ( queryString );
143123
144124 if ( StringUtils .isBlank ( strippedQuery ) ) {
@@ -158,7 +138,8 @@ private static NodeIterator runSearch( SearchIndex index, String queryString ) t
158138
159139 StopWatch timer = StopWatch .createStarted ();
160140 try {
161- return index .searchModelByIndex ( enhancedQuery );
141+ return new Map1Iterator <>( o -> new SearchResult <>( o .getLuceneDocId (), convertGraphNodeToRDFNode ( o .getNode (), model ), o .getScore () ),
142+ index .search ( enhancedQuery ) );
162143 } catch ( JenaException e ) {
163144 throw new OntologySearchJenaException ( "Failed to search with enhanced query." , enhancedQuery , e );
164145 } finally {
@@ -168,4 +149,38 @@ private static NodeIterator runSearch( SearchIndex index, String queryString ) t
168149 }
169150 }
170151 }
152+
153+ public static class SearchResult <T extends RDFNode > {
154+ public final int docId ;
155+ public final T result ;
156+ public final double score ;
157+
158+ private SearchResult ( int docId , T result , double score ) {
159+ this .docId = docId ;
160+ this .result = result ;
161+ this .score = score ;
162+ }
163+
164+ @ Override
165+ public boolean equals ( Object obj ) {
166+ if ( obj instanceof SearchResult ) {
167+ return Objects .equals ( result , ( ( SearchResult <?> ) obj ).result );
168+ }
169+ return false ;
170+ }
171+
172+ @ Override
173+ public int hashCode () {
174+ return Objects .hash ( result );
175+ }
176+
177+ @ Override
178+ public String toString () {
179+ return String .format ( "%s [docId = %d, score = %f]" , result , docId , score );
180+ }
181+
182+ private <U extends Resource > SearchResult <U > as ( Class <U > clazz ) {
183+ return new SearchResult <>( docId , result .as ( clazz ), score );
184+ }
185+ }
171186}
0 commit comments