Skip to content

Commit 5e6a22b

Browse files
committed
Better handle indexing of literals and URI resources
Parse numeric literals and ignore blank nodes.
1 parent ce38406 commit 5e6a22b

3 files changed

Lines changed: 61 additions & 25 deletions

File tree

src/ubic/basecode/ontology/jena/JenaUtils.java

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -244,7 +244,6 @@ public static ExtendedIterator<Restriction> listRestrictionsOnProperties( OntMod
244244
while ( it.hasNext() ) {
245245
OntProperty sp = it.next();
246246
allProps.add( sp );
247-
log.info( "Inferred {} from {}", sp, property );
248247
}
249248
}
250249
}

src/ubic/basecode/ontology/jena/OntologyIndexer.java

Lines changed: 44 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -18,12 +18,10 @@
1818
*/
1919
package ubic.basecode.ontology.jena;
2020

21+
import com.hp.hpl.jena.datatypes.xsd.XSDDateTime;
2122
import com.hp.hpl.jena.ontology.OntModel;
2223
import com.hp.hpl.jena.ontology.OntResource;
23-
import com.hp.hpl.jena.rdf.model.Property;
24-
import com.hp.hpl.jena.rdf.model.Resource;
25-
import com.hp.hpl.jena.rdf.model.Statement;
26-
import com.hp.hpl.jena.rdf.model.StmtIterator;
24+
import com.hp.hpl.jena.rdf.model.*;
2725
import com.hp.hpl.jena.shared.JenaException;
2826
import com.hp.hpl.jena.util.iterator.ExtendedIterator;
2927
import com.hp.hpl.jena.util.iterator.WrappedIterator;
@@ -35,6 +33,7 @@
3533
import org.apache.lucene.analysis.standard.StandardAnalyzer;
3634
import org.apache.lucene.document.Document;
3735
import org.apache.lucene.document.Field;
36+
import org.apache.lucene.document.Fieldable;
3837
import org.apache.lucene.document.NumericField;
3938
import org.apache.lucene.index.IndexReader;
4039
import org.apache.lucene.index.IndexWriter;
@@ -221,8 +220,46 @@ private static Directory index( String name, OntModel model, Analyzer analyzer,
221220
while ( listStatements.hasNext() ) {
222221
Statement s = listStatements.next();
223222
String field = s.getPredicate().getURI();
224-
String value = JenaUtils.asString( s.getObject() );
225-
doc.add( new Field( field, value, Field.Store.NO, indexablePropertiesByField.get( field ).isAnalyzed() ? Field.Index.ANALYZED : Field.Index.NOT_ANALYZED ) );
223+
Fieldable f;
224+
if ( s.getObject().isLiteral() ) {
225+
Literal l = s.getObject().asLiteral();
226+
if ( l.getValue() instanceof String ) {
227+
f = new Field( field, l.getString(), Field.Store.NO, indexablePropertiesByField.get( field ).isAnalyzed() ? Field.Index.ANALYZED : Field.Index.NOT_ANALYZED );
228+
} else if ( l.getValue() instanceof Number ) {
229+
NumericField nf = new NumericField( field );
230+
if ( l.getValue() instanceof Integer ) {
231+
nf.setIntValue( s.getInt() );
232+
} else if ( l.getValue() instanceof Long ) {
233+
nf.setLongValue( s.getLong() );
234+
} else if ( l.getValue() instanceof Float ) {
235+
nf.setFloatValue( s.getFloat() );
236+
} else if ( l.getValue() instanceof Double ) {
237+
nf.setDoubleValue( s.getDouble() );
238+
} else {
239+
log.warn( "Skipping numeric literal of unsupported type: {}", l );
240+
continue;
241+
}
242+
f = nf;
243+
} else if ( l.getValue() instanceof XSDDateTime ) {
244+
f = new NumericField( field )
245+
.setLongValue( ( ( XSDDateTime ) l.getValue() ).asCalendar().getTime().getTime() );
246+
} else if ( l.getValue() instanceof Boolean ) {
247+
f = new NumericField( field ).setIntValue( Boolean.TRUE.equals( l.getValue() ) ? 1 : 0 );
248+
} else {
249+
log.warn( "Skipping literal of unsupported type: {}", l );
250+
continue;
251+
}
252+
} else if ( s.getObject().isURIResource() ) {
253+
// index the URI
254+
f = new Field( field, s.getObject().asResource().getURI(), Field.Store.NO, Field.Index.NOT_ANALYZED );
255+
} else {
256+
// could be a blank node
257+
continue;
258+
}
259+
if ( isIndividual ) {
260+
System.out.println( doc );
261+
}
262+
doc.add( f );
226263
}
227264
}
228265
indexWriter.addDocument( doc );
@@ -278,7 +315,7 @@ private ExtendedIterator<JenaSearchResult> search( OntModel model, String queryS
278315
Query query = new MultiFieldQueryParser( Version.LUCENE_36, searchableFields, analyzer ).parse( queryString );
279316
// in general, results are found in both regular and std index, so we divide by 2 the initial capacity
280317
// we also have to double the number of hits to account for duplicates
281-
TopDocs hits = new IndexSearcher( index ).search( query, filter, maxResults * 3 );
318+
TopDocs hits = new IndexSearcher( index ).search( query, filter, maxResults * 2 );
282319
Set<String> seenIds = new HashSet<>( hits.totalHits / 2 );
283320
List<JenaSearchResult> resources = new ArrayList<>( hits.totalHits / 2 );
284321
for ( int i = 0; i < hits.scoreDocs.length; i++ ) {

test/ubic/basecode/ontology/providers/ObiServiceTest.java

Lines changed: 17 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -16,15 +16,10 @@
1616

1717
import org.junit.Test;
1818
import ubic.basecode.ontology.AbstractOntologyTest;
19-
import ubic.basecode.ontology.model.OntologyIndividual;
2019
import ubic.basecode.ontology.model.OntologyResource;
21-
import ubic.basecode.ontology.model.OntologyTerm;
2220
import ubic.basecode.ontology.search.OntologySearchResult;
2321

24-
import java.util.Collection;
25-
26-
import static org.junit.Assert.assertFalse;
27-
import static org.junit.Assert.assertTrue;
22+
import static org.assertj.core.api.Assertions.assertThat;
2823

2924
/**
3025
* @author paul
@@ -36,16 +31,21 @@ public void testLoadAndSearch() throws Exception {
3631
ObiService m = new ObiService();
3732
m.setInferenceMode( OntologyService.InferenceMode.NONE );
3833
m.initialize( true, false );
39-
40-
assertTrue( m.isOntologyLoaded() );
41-
42-
Collection<OntologySearchResult<OntologyTerm>> hits = m.findTerm( "batch", 500 );
43-
assertFalse( hits.isEmpty() );
44-
45-
Collection<OntologySearchResult<OntologyIndividual>> ihits = m.findIndividuals( "batch", 500 );
46-
assertFalse( ihits.isEmpty() );
47-
48-
Collection<OntologySearchResult<OntologyResource>> rhits = m.findResources( "batch", 500 );
49-
assertFalse( rhits.isEmpty() );
34+
assertThat( m.isOntologyLoaded() ).isTrue();
35+
36+
assertThat( m.findTerm( "batch", 500 ) )
37+
.extracting( OntologySearchResult::getResult )
38+
.extracting( OntologyResource::getUri )
39+
.contains( "http://purl.obolibrary.org/obo/IAO_0000132" );
40+
41+
assertThat( m.findIndividuals( "failed exploratory term", 500 ) )
42+
.extracting( OntologySearchResult::getResult )
43+
.extracting( OntologyResource::getUri )
44+
.contains( "http://purl.obolibrary.org/obo/IAO_0000103" );
45+
46+
assertThat( m.findResources( "batch", 500 ) )
47+
.extracting( OntologySearchResult::getResult )
48+
.extracting( OntologyResource::getUri )
49+
.contains( "http://purl.obolibrary.org/obo/IAO_0000132" );
5050
}
5151
}

0 commit comments

Comments
 (0)