-
Notifications
You must be signed in to change notification settings - Fork 2
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Build SNOMED CT index within 1GB memory 🚀
- Loading branch information
Showing
7 changed files
with
242 additions
and
100 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
67 changes: 0 additions & 67 deletions
67
src/main/java/org/snomed/snowstormmicro/loading/ImportService.java
This file was deleted.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
58 changes: 58 additions & 0 deletions
58
...ain/java/org/snomed/snowstormmicro/snomedimport/ComponentFactoryWithDescriptionBatch.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,58 @@ | ||
package org.snomed.snowstormmicro.snomedimport; | ||
|
||
import org.ihtsdo.otf.snomedboot.factory.ImpotentComponentFactory; | ||
import org.snomed.snowstormmicro.domain.Concept; | ||
import org.snomed.snowstormmicro.domain.Concepts; | ||
import org.snomed.snowstormmicro.domain.Description; | ||
|
||
import java.util.Collection; | ||
import java.util.HashMap; | ||
import java.util.List; | ||
import java.util.Map; | ||
|
||
public class ComponentFactoryWithDescriptionBatch extends ImpotentComponentFactory { | ||
|
||
private final ComponentFactoryWithoutDescriptions baseComponents; | ||
private final Collection<Long> conceptIdBatch; | ||
private final Map<Long, Description> descriptionSynonymMap; | ||
private final Concept dummyConcept; | ||
|
||
public ComponentFactoryWithDescriptionBatch(ComponentFactoryWithoutDescriptions baseComponents, Collection<Long> conceptIdBatch) { | ||
this.baseComponents = baseComponents; | ||
descriptionSynonymMap = new HashMap<>(); | ||
dummyConcept = new Concept(); | ||
this.conceptIdBatch = conceptIdBatch; | ||
} | ||
|
||
@Override | ||
public void newDescriptionState(String id, String effectiveTime, String active, String moduleId, String conceptId, String languageCode, String typeId, String term, String caseSignificanceId) { | ||
if (active.equals("1") && (typeId.equals(Concepts.FSN) || typeId.equals(Concepts.SYNONYM)) && conceptIdBatch.contains(Long.parseLong(conceptId))) { | ||
Description description = new Description(id, languageCode, typeId.equals(Concepts.FSN), term); | ||
baseComponents.getConceptMap().getOrDefault(Long.parseLong(conceptId), dummyConcept).addDescription(description); | ||
if (typeId.equals(Concepts.SYNONYM)) { | ||
descriptionSynonymMap.put(Long.parseLong(id), description); | ||
} | ||
} | ||
} | ||
|
||
@Override | ||
public void newReferenceSetMemberState(String[] fieldNames, String id, String effectiveTime, String active, String moduleId, String refsetId, String referencedComponentId, String... otherValues) { | ||
if (active.equals("1")) { | ||
if (fieldNames.length == 7 && fieldNames[6].equals("acceptabilityId") && otherValues[0].equals(Concepts.PREFERRED)) { | ||
// Active lang refset member | ||
Description description = descriptionSynonymMap.get(Long.parseLong(referencedComponentId)); | ||
if (description != null) { | ||
description.getPreferredLangRefsets().add(refsetId); | ||
} | ||
} | ||
} | ||
} | ||
|
||
public Map<Long, Concept> getConceptMap() { | ||
return baseComponents.getConceptMap(); | ||
} | ||
|
||
public Collection<Long> getConceptIdBatch() { | ||
return conceptIdBatch; | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
124 changes: 124 additions & 0 deletions
124
src/main/java/org/snomed/snowstormmicro/snomedimport/ImportService.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,124 @@ | ||
package org.snomed.snowstormmicro.snomedimport; | ||
|
||
import it.unimi.dsi.fastutil.longs.LongOpenHashSet; | ||
import org.ihtsdo.otf.snomedboot.ReleaseImportException; | ||
import org.ihtsdo.otf.snomedboot.ReleaseImporter; | ||
import org.ihtsdo.otf.snomedboot.factory.ComponentFactory; | ||
import org.ihtsdo.otf.snomedboot.factory.ComponentFactoryProvider; | ||
import org.ihtsdo.otf.snomedboot.factory.LoadingProfile; | ||
import org.slf4j.Logger; | ||
import org.slf4j.LoggerFactory; | ||
import org.snomed.snowstormmicro.domain.Concept; | ||
import org.snomed.snowstormmicro.service.CodeSystemRepository; | ||
import org.snomed.snowstormmicro.util.TimerUtil; | ||
import org.springframework.beans.factory.annotation.Autowired; | ||
import org.springframework.beans.factory.annotation.Value; | ||
import org.springframework.stereotype.Service; | ||
|
||
import java.io.File; | ||
import java.io.FileInputStream; | ||
import java.io.IOException; | ||
import java.io.InputStream; | ||
import java.util.*; | ||
|
||
import static com.google.common.collect.Iterables.partition; | ||
import static java.lang.String.format; | ||
|
||
@Service | ||
public class ImportService { | ||
|
||
@Autowired | ||
private CodeSystemRepository codeSystemRepository; | ||
|
||
@Value("${index.path}") | ||
private String indexPath; | ||
|
||
private final Logger logger = LoggerFactory.getLogger(getClass()); | ||
|
||
private final LoadingProfile loadingProfile = LoadingProfile.light | ||
.withAllRefsets() | ||
.withInactiveConcepts() | ||
.withIncludedReferenceSetFilenamePattern(".*der2_Refset.*|.*der2_cRefset.*"); | ||
|
||
public void importRelease(Set<String> releaseArchivePaths, String versionUri) throws IOException, ReleaseImportException { | ||
Set<InputStream> archiveInputStream = new HashSet<>(); | ||
for (String filePath : releaseArchivePaths) { | ||
File file = new File(filePath); | ||
if (!file.isFile()) { | ||
throw new IOException(format("File not found %s", file.getAbsolutePath())); | ||
} | ||
archiveInputStream.add(new FileInputStream(file)); | ||
} | ||
|
||
ReleaseImporter releaseImporter = new ReleaseImporter(); | ||
try (IndexCreator indexCreator = new IndexCreator(indexPath, codeSystemRepository)) { | ||
|
||
indexCreator.recreateIndex(); | ||
indexCreator.createCodeSystem(versionUri); | ||
|
||
ComponentFactoryWithoutDescriptions componentFactoryBase = new ComponentFactoryWithoutDescriptions(); | ||
ComponentFactoryProvider componentFactoryProvider = new ComponentFactoryProvider() { | ||
|
||
private boolean firstFactoryProvided; | ||
private Iterator<List<Long>> conceptIdBatchIterator; | ||
private final TimerUtil timer = new TimerUtil("Loading"); | ||
private Integer batchNumber = 0; | ||
|
||
@Override | ||
public ComponentFactory getNextComponentFactory() { | ||
if (!firstFactoryProvided) { | ||
firstFactoryProvided = true; | ||
return componentFactoryBase; | ||
} | ||
|
||
if (conceptIdBatchIterator == null) { | ||
timer.checkpoint("Loaded concepts"); | ||
Set<Long> conceptIds = componentFactoryBase.getConceptMap().keySet(); | ||
conceptIdBatchIterator = partition(conceptIds, 50_000).iterator(); | ||
} | ||
if (conceptIdBatchIterator.hasNext()) { | ||
batchNumber++; | ||
componentFactoryBase.clearDescriptions(); | ||
return new ComponentFactoryWithDescriptionBatch(componentFactoryBase, new LongOpenHashSet(conceptIdBatchIterator.next())) { | ||
@Override | ||
public LoadingProfile getLoadingProfile() { | ||
return LoadingProfile.light | ||
.withoutConcepts() | ||
.withoutTextDefinitions() | ||
.withoutRelationships() | ||
.withoutIdentifiers() | ||
.withIncludedReferenceSetFilenamePattern(".?der2_cRefset_Language.*"); | ||
} | ||
|
||
@Override | ||
public void loadingComponentsCompleted() throws ReleaseImportException { | ||
timer.checkpoint("Loaded description batch " + batchNumber); | ||
try { | ||
Collection<Long> conceptIdBatch = getConceptIdBatch(); | ||
List<Concept> conceptBatch = getConceptMap().values().stream() | ||
.filter(concept -> conceptIdBatch.contains(Long.parseLong(concept.getConceptId()))) | ||
.toList(); | ||
indexCreator.createConceptBatch(conceptBatch); | ||
timer.checkpoint("Written to index, batch " + batchNumber); | ||
} catch (IOException e) { | ||
throw new ReleaseImportException("Failed to write concept batch to index.", e); | ||
} | ||
} | ||
}; | ||
} else { | ||
timer.finish(); | ||
return null; | ||
} | ||
} | ||
}; | ||
|
||
logger.info("Reading release files"); | ||
releaseImporter.loadEffectiveSnapshotReleaseFileStreams(archiveInputStream, loadingProfile, componentFactoryProvider, false); | ||
|
||
// logger.info("Writing lucene index"); | ||
// try (IndexCreator indexCreator = new IndexCreator(directory, codeSystemRepository)) { | ||
// indexCreator.createIndex(componentFactory, versionUri); | ||
// } | ||
} | ||
} | ||
} |
Oops, something went wrong.