Skip to content

Commit d8e8f5c

Browse files
committed
Add proper support for initializing ontologies from stream
Deprecate loadTermsInNameSpace and use the improved initialize() method from a stream. Improve documentations for the ontology service interface. Add more checks for the current thread interruption in initialize().
1 parent 87c8f5f commit d8e8f5c

3 files changed

Lines changed: 120 additions & 60 deletions

File tree

src/ubic/basecode/ontology/jena/AbstractOntologyService.java

Lines changed: 60 additions & 54 deletions
Original file line numberDiff line numberDiff line change
@@ -88,9 +88,15 @@ public abstract class AbstractOntologyService implements OntologyService {
8888

8989
private boolean isInitialized = false;
9090

91-
92-
@Override
9391
public void initialize( boolean forceLoad, boolean forceIndexing ) {
92+
initialize( null, forceLoad, forceIndexing );
93+
}
94+
95+
public void initialize( InputStream stream, boolean forceIndexing ) {
96+
initialize( stream, true, forceIndexing );
97+
}
98+
99+
private void initialize( InputStream stream, boolean forceLoad, boolean forceIndexing ) {
94100
if ( !forceLoad && isInitialized ) {
95101
log.warn( "{} is already loaded, and force=false, not restarting", this );
96102
return;
@@ -118,20 +124,21 @@ public void initialize( boolean forceLoad, boolean forceIndexing ) {
118124
log.info( "Loading ontology: {}...", this );
119125
StopWatch loadTime = StopWatch.createStarted();
120126

121-
// use temporary variables so we can minimize the critical region for replacing the service's state
122-
Map<String, OntologyTerm> terms = new HashMap<>();
123-
Map<String, OntologyIndividual> individuals = new HashMap<>();
127+
// use temporary variables, so that we can minimize the critical region for replacing the service's state
124128
OntModel model;
125129
SearchIndex index;
126130

127-
if ( Thread.currentThread().isInterrupted() ) {
128-
log.warn( "The current thread is interrupted, initialization of {} will be stop.", this );
131+
// loading the model from disk or URL is lengthy
132+
if ( checkIfInterrupted() )
129133
return;
130-
}
131134

132-
model = loadModel(); // can take a while.
135+
model = stream != null ? loadModelFromStream( stream ) : loadModel(); // can take a while.
133136
assert model != null;
134137

138+
// retrieving restrictions is lengthy
139+
if ( checkIfInterrupted() )
140+
return;
141+
135142
// compute additional restrictions
136143
Set<Restriction> additionalRestrictions = model.listRestrictions()
137144
.filterKeep( new RestrictionWithOnPropertyFilter( additionalProperties ) )
@@ -147,12 +154,15 @@ public void initialize( boolean forceLoad, boolean forceIndexing ) {
147154
*/
148155
boolean force = forceReindexing || changed || !indexExists;
149156

157+
// indexing is lengthy, don't bother if we're interrupted
158+
if ( checkIfInterrupted() )
159+
return;
160+
150161
index = OntologyIndexer.indexOntology( getCacheName(), model, force );
151162

152-
if ( Thread.currentThread().isInterrupted() ) {
153-
log.warn( "The current thread is interrupted, initialization of {} will be stop.", this );
163+
// if interrupted, we don't need to replace the model and clear the *old* cache
164+
if ( checkIfInterrupted() )
154165
return;
155-
}
156166

157167
Lock lock = rwLock.writeLock();
158168
try {
@@ -161,18 +171,25 @@ public void initialize( boolean forceLoad, boolean forceIndexing ) {
161171
this.additionalRestrictions = additionalRestrictions;
162172
this.index = index;
163173
this.isInitialized = true;
174+
// now that the terms have been replaced, we can clear old caches
175+
OntologyLoader.deleteOldCache( getCacheName() );
164176
} finally {
165177
lock.unlock();
166178
}
167179

168-
// now that the terms have been replaced, we can clear old caches
169-
OntologyLoader.deleteOldCache( getCacheName() );
170-
171180
loadTime.stop();
172181

173182
log.info( "Finished loading {} in {}s", this, String.format( "%.2f", loadTime.getTime() / 1000.0 ) );
174183
}
175184

185+
private boolean checkIfInterrupted() {
186+
if ( Thread.interrupted() ) {
187+
log.warn( "The current thread is interrupted, initialization of {} will be stop.", this );
188+
return true;
189+
}
190+
return false;
191+
}
192+
176193
/**
177194
* Do not do this except before re-indexing.
178195
*/
@@ -496,6 +513,14 @@ public void waitForInitializationThread() throws InterruptedException {
496513
*/
497514
protected abstract OntModel loadModel();
498515

516+
517+
/**
518+
* Load a model from a given input stream.
519+
*/
520+
protected OntModel loadModelFromStream( InputStream is ) {
521+
return OntologyLoader.loadMemoryModel( is, this.getOntologyUrl() );
522+
}
523+
499524
protected String getCacheName() {
500525
return getOntologyName();
501526
}
@@ -559,48 +584,29 @@ private void initSearchByAlternativeId() {
559584

560585
@Override
561586
public void loadTermsInNameSpace( InputStream is, boolean forceIndex ) {
562-
Lock lock = rwLock.writeLock();
563-
try {
564-
lock.lock();
565-
this.isInitialized = false;
566-
567-
if ( initializationThread != null && initializationThread.isAlive() ) {
568-
log.warn( "{} initialization is already running, trying to cancel ...", this );
569-
initializationThread.interrupt();
570-
// wait for the thread to die.
571-
int maxWait = 10;
572-
int wait = 0;
573-
while ( initializationThread.isAlive() ) {
574-
try {
575-
initializationThread.join( 5000 );
576-
log.warn( "Waiting for auto-initialization to stop so manual initialization can begin ..." );
577-
} catch ( InterruptedException e ) {
578-
Thread.currentThread().interrupt();
579-
log.warn( "Got interrupted while waiting for the initialization thread of {} to finish.", this );
580-
return;
581-
}
582-
++wait;
583-
if ( wait >= maxWait && !initializationThread.isAlive() ) {
584-
throw new RuntimeException( String.format( "Got tired of waiting for %s's initialization thread.", this ) );
585-
}
587+
// wait for the initialization thread to finish
588+
if ( initializationThread != null && initializationThread.isAlive() ) {
589+
log.warn( "{} initialization is already running, trying to cancel ...", this );
590+
initializationThread.interrupt();
591+
// wait for the thread to die.
592+
int maxWait = 10;
593+
int wait = 0;
594+
while ( initializationThread.isAlive() ) {
595+
try {
596+
initializationThread.join( 5000 );
597+
log.warn( "Waiting for auto-initialization to stop so manual initialization can begin ..." );
598+
} catch ( InterruptedException e ) {
599+
Thread.currentThread().interrupt();
600+
log.warn( "Got interrupted while waiting for the initialization thread of {} to finish.", this );
601+
return;
602+
}
603+
++wait;
604+
if ( wait >= maxWait && !initializationThread.isAlive() ) {
605+
throw new RuntimeException( String.format( "Got tired of waiting for %s's initialization thread.", this ) );
586606
}
587607
}
588-
589-
this.model = OntologyLoader.loadMemoryModel( is, this.getOntologyUrl() );
590-
this.additionalRestrictions = model.listRestrictions()
591-
.filterKeep( new RestrictionWithOnPropertyFilter( additionalProperties ) )
592-
.toSet();
593-
this.index = OntologyIndexer.getSubjectIndex( getCacheName() );
594-
if ( index == null || forceIndex ) {
595-
this.index = OntologyIndexer.indexOntology( getCacheName(), model, true /* force */ );
596-
}
597-
598-
isInitialized = true;
599-
} finally {
600-
lock.unlock();
601608
}
602-
603-
log.info( "Ontology {} is ready!", this );
609+
initialize( is, forceIndex );
604610
}
605611

606612
@Override

src/ubic/basecode/ontology/providers/OntologyService.java

Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,13 @@ public interface OntologyService {
2121
*/
2222
void initialize( boolean forceLoad, boolean forceIndexing );
2323

24+
/**
25+
* Initialize this ontology service from a stream.
26+
* <p>
27+
* Note that when this method of initialization is used, the ontology cache is not created on-disk.
28+
*/
29+
void initialize( InputStream stream, boolean forceIndexing );
30+
2431
/**
2532
* Looks for any individuals that match the given search string.
2633
* <p>
@@ -97,21 +104,55 @@ default Collection<OntologyTerm> findTerm( String search ) throws OntologySearch
97104
*/
98105
OntologyTerm getTerm( String uri );
99106

107+
/**
108+
* Obtain all the individuals for a given term URI.
109+
*/
100110
Collection<OntologyIndividual> getTermIndividuals( String uri );
101111

112+
/**
113+
* Obtain all the parents of a given set of terms, excluding obsolete terms.
114+
*
115+
* @see #getParents(Collection, boolean, boolean, boolean)
116+
*/
102117
default Set<OntologyTerm> getParents( Collection<OntologyTerm> terms, boolean direct, boolean includeAdditionalProperties ) {
103118
return getParents( terms, direct, includeAdditionalProperties, false );
104119
}
105120

121+
/**
122+
* Obtain all the parents of a given set of terms.
123+
*
124+
* @param terms set of terms whose parents are retrieved
125+
* @param direct only retain direct parents
126+
* @param includeAdditionalProperties also include parents matched via additional properties
127+
* @param keepObsoletes retain obsolete terms
128+
* @return a set of parent terms
129+
*/
106130
Set<OntologyTerm> getParents( Collection<OntologyTerm> terms, boolean direct, boolean includeAdditionalProperties, boolean keepObsoletes );
107131

132+
/**
133+
* Obtain all the children of a given set of terms, excluding obsolete terms.
134+
*
135+
* @see #getChildren(Collection, boolean, boolean, boolean)
136+
*/
108137
default Set<OntologyTerm> getChildren( Collection<OntologyTerm> terms, boolean direct, boolean includeAdditionalProperties ) {
109138
return getChildren( terms, direct, includeAdditionalProperties, false );
110139
}
111140

141+
/**
142+
* Obtain all the children of a given set of terms.
143+
*
144+
* @param terms set of terms whose children are retrieved
145+
* @param direct only retain direct children
146+
* @param includeAdditionalProperties also include children matched via additional properties
147+
* @param keepObsoletes retain obsolete terms
148+
* @return a set of child terms
149+
*/
112150
Set<OntologyTerm> getChildren( Collection<OntologyTerm> terms, boolean direct, boolean includeAdditionalProperties, boolean keepObsoletes );
113151

114152

153+
/**
154+
* Check if this ontology is enabled.
155+
*/
115156
boolean isEnabled();
116157

117158
/**
@@ -133,10 +174,19 @@ default Set<OntologyTerm> getChildren( Collection<OntologyTerm> terms, boolean d
133174
*/
134175
void startInitializationThread( boolean forceLoad, boolean forceIndexing );
135176

177+
/**
178+
* Check if the initialization thread is alive.
179+
*/
136180
boolean isInitializationThreadAlive();
137181

182+
/**
183+
* Check if the initialization thread is cancelled.
184+
*/
138185
boolean isInitializationThreadCancelled();
139186

187+
/**
188+
* Cancel a running initialization thread.
189+
*/
140190
void cancelInitializationThread();
141191

142192
/**
@@ -147,6 +197,7 @@ default Set<OntologyTerm> getChildren( Collection<OntologyTerm> terms, boolean d
147197
/**
148198
* Index the ontology for performing full-text searches.
149199
*
200+
* @param force if true, perform indexing even if an index already exists
150201
* @see #findIndividuals(String)
151202
* @see #findTerm(String)
152203
* @see #findResources(String)
@@ -159,6 +210,9 @@ default Set<OntologyTerm> getChildren( Collection<OntologyTerm> terms, boolean d
159210
*
160211
* @param is input stream from which the ontology model is loaded
161212
* @param forceIndex initialize the index. Otherwise it will only be initialized if it doesn't exist.
213+
* @deprecated use {@link #initialize(InputStream, boolean)} instead and possibly {@link #cancelInitializationThread()}
214+
* prior to get any running initialization thread out of the way
162215
*/
216+
@Deprecated
163217
void loadTermsInNameSpace( InputStream is, boolean forceIndex );
164218
}

test/ubic/basecode/ontology/OntologyTermTest.java

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -52,7 +52,7 @@ protected String getCacheName() {
5252
};
5353
try ( InputStream is = new GZIPInputStream( requireNonNull( OntologyTermTest.class.getResourceAsStream( "/data/uberon.owl.gz" ) ) ) ) {
5454
// FIXME: indexing Uberon is very slow, so we disable it so if the tests are breaking, try force-indexing
55-
uberon.loadTermsInNameSpace( is, false );
55+
uberon.initialize( is, false );
5656
}
5757
}
5858

@@ -61,7 +61,7 @@ public void testGetChildren() throws Exception {
6161
// DOID:4159
6262
DiseaseOntologyService s = new DiseaseOntologyService();
6363
InputStream is = new GZIPInputStream( requireNonNull( this.getClass().getResourceAsStream( "/data/doid.short.owl.gz" ) ) );
64-
s.loadTermsInNameSpace( is, false );
64+
s.initialize( is, false );
6565

6666
OntologyTerm t = s.getTerm( "http://purl.obolibrary.org/obo/DOID_4159" );
6767

@@ -105,7 +105,7 @@ public void testGetChildrenHasProperPart() throws Exception {
105105
NIFSTDOntologyService s = new NIFSTDOntologyService();
106106
InputStream is = new GZIPInputStream( requireNonNull( this.getClass().getResourceAsStream(
107107
"/data/NIF-GrossAnatomy.small.owl.xml.gz" ) ) );
108-
s.loadTermsInNameSpace( is, false );
108+
s.initialize( is, false );
109109

110110
OntologyTerm t = s.getTerm( "http://ontology.neuinfo.org/NIF/BiomaterialEntities/NIF-GrossAnatomy.owl#birnlex_734" );
111111
assertNotNull( t );
@@ -138,7 +138,7 @@ public void testGetParents() throws Exception {
138138
DiseaseOntologyService s = new DiseaseOntologyService();
139139
InputStream is = new GZIPInputStream( requireNonNull( this.getClass().getResourceAsStream( "/data/doid.short.owl.gz" ) ) );
140140

141-
s.loadTermsInNameSpace( is, false );
141+
s.initialize( is, false );
142142

143143
/*
144144
* Note that this test uses the 'new style' URIs for DO, but at this writing we actually use purl.org not
@@ -202,7 +202,7 @@ public void testGetParentsHasProperPart() throws Exception {
202202
InputStream is = new GZIPInputStream( requireNonNull( this.getClass().getResourceAsStream(
203203
"/data/NIF-GrossAnatomy.small.owl.xml.gz" ) ) );
204204
assertNotNull( is );
205-
s.loadTermsInNameSpace( is, false );
205+
s.initialize( is, false );
206206

207207
// Mammillary princeps fasciculus: part of white matter, hypothalamus, etc.
208208
OntologyTerm t = s
@@ -245,7 +245,7 @@ public void testGetParentsHasProperPart() throws Exception {
245245
public void testRejectNonEnglish() throws Exception {
246246
CellLineOntologyService s = new CellLineOntologyService();
247247
InputStream is = new GZIPInputStream( requireNonNull( this.getClass().getResourceAsStream( "/data/clo_merged.sample.owl.xml.gz" ) ) );
248-
s.loadTermsInNameSpace( is, false );
248+
s.initialize( is, false );
249249

250250
OntologyTerm t = s.getTerm( "http://purl.obolibrary.org/obo/CLO_0000292" );
251251
assertEquals( "immortal larynx-derived cell line cell", t.getLabel() );

0 commit comments

Comments
 (0)