Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

HAPI FHIR Dependency Bumps #5576

Merged
merged 28 commits into from
Jan 14, 2024
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
Show all changes
28 commits
Select commit Hold shift + click to select a range
1691b50
HAPI FHIR Dependency Bumps
jamesagnew Jan 2, 2024
6d34af6
Hibernate search 6
jamesagnew Jan 2, 2024
284f7db
Dep bumps
jamesagnew Jan 2, 2024
76b9de5
Bump mockito
jamesagnew Jan 3, 2024
b50d736
Fix memory leak
jamesagnew Jan 3, 2024
fc9c32b
Test cleanup
jamesagnew Jan 3, 2024
8f82c60
Test fix
jamesagnew Jan 3, 2024
c57faa2
Test fixes
jamesagnew Jan 4, 2024
6eb812f
Test fix
jamesagnew Jan 4, 2024
ab19b08
Test fix
jamesagnew Jan 4, 2024
20daf25
Test fix
jamesagnew Jan 4, 2024
ac8f83c
Test fix
jamesagnew Jan 4, 2024
9cbb6e7
Prepare to try and rework max clauses
jamesagnew Jan 5, 2024
00278d1
Refactor large VS expansion
jamesagnew Jan 5, 2024
80e9c57
Spotless
jamesagnew Jan 5, 2024
47c9bf8
Test fix
jamesagnew Jan 5, 2024
0f22b5a
Merge branch 'ja_20240102_dep_bumps' of github.com:hapifhir/hapi-fhir…
jamesagnew Jan 5, 2024
cb81e27
Version bump
jamesagnew Jan 5, 2024
dad66f7
Merge branch 'master' into ja_20240102_dep_bumps
jamesagnew Jan 5, 2024
d79b6fa
Test fixes
jamesagnew Jan 5, 2024
5777652
Bump maven container
jamesagnew Jan 7, 2024
b80f122
Build fix
jamesagnew Jan 7, 2024
fda79e3
Add license headers
jamesagnew Jan 8, 2024
d7b900f
Merge branch 'master' into ja_20240102_dep_bumps
jamesagnew Jan 9, 2024
296e49a
Merge branch 'ja_20240102_dep_bumps' of github.com:jamesagnew/hapi-fh…
jamesagnew Jan 9, 2024
28e7ad6
Merge branch 'master' into ja_20240102_dep_bumps
jamesagnew Jan 12, 2024
f328f91
Version bump
jamesagnew Jan 12, 2024
7046e0c
License headers
jamesagnew Jan 12, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
Refactor large VS expansion
  • Loading branch information
jamesagnew committed Jan 5, 2024
commit 00278d10610338e853273a29e4f5c53780616fd2
Original file line number Diff line number Diff line change
Expand Up @@ -176,6 +176,7 @@
import java.util.UUID;
import java.util.concurrent.TimeUnit;
import java.util.function.Consumer;
import java.util.function.Supplier;
import java.util.stream.Collectors;

import static ca.uhn.fhir.jpa.entity.TermConceptPropertyBinder.CONCEPT_PROPERTY_PREFIX_NAME;
Expand Down Expand Up @@ -1145,68 +1146,70 @@ private void expandValueSetHandleIncludeOrExcludeUsingDatabase(
includeOrExcludeVersion);

int accumulatedBatchesSoFar = 0;
try (SearchScroll<EntityReference> scroll = searchProps.getSearchScroll()) {
for (var next : searchProps.getSearchScroll()) {
try (SearchScroll<EntityReference> scroll = next.get()) {

ourLog.debug(
ourLog.debug(
"Beginning batch expansion for {} with max results per batch: {}",
(theAdd ? "inclusion" : "exclusion"),
chunkSize);
for (SearchScrollResult<EntityReference> chunk = scroll.next(); chunk.hasHits(); chunk = scroll.next()) {
int countForBatch = 0;
for (SearchScrollResult<EntityReference> chunk = scroll.next(); chunk.hasHits(); chunk = scroll.next()) {
int countForBatch = 0;

List<Long> pids = chunk.hits().stream().map(t -> (Long) t.id()).collect(Collectors.toList());
List<Long> pids = chunk.hits().stream().map(t -> (Long) t.id()).collect(Collectors.toList());

List<TermConcept> termConcepts = myTermConceptDao.fetchConceptsAndDesignationsByPid(pids);
List<TermConcept> termConcepts = myTermConceptDao.fetchConceptsAndDesignationsByPid(pids);

// If the include section had multiple codes, return the codes in the same order
termConcepts = sortTermConcepts(searchProps, termConcepts);
// If the include section had multiple codes, return the codes in the same order
termConcepts = sortTermConcepts(searchProps, termConcepts);

// int firstResult = theQueryIndex * maxResultsPerBatch;// TODO GGG HS we lose the ability to check the
// index of the first result, so just best-guessing it here.
Optional<PredicateFinalStep> expansionStepOpt = searchProps.getExpansionStepOpt();
int delta = 0;
for (TermConcept concept : termConcepts) {
count++;
countForBatch++;
if (theAdd && expansionStepOpt.isPresent()) {
ValueSet.ConceptReferenceComponent theIncludeConcept =
// int firstResult = theQueryIndex * maxResultsPerBatch;
// TODO GGG HS we lose the ability to check the
// index of the first result, so just best-guessing it here.
int delta = 0;
for (TermConcept concept : termConcepts) {
count++;
countForBatch++;
if (theAdd && searchProps.hasIncludeOrExcludeCodes()) {
ValueSet.ConceptReferenceComponent theIncludeConcept =
getMatchedConceptIncludedInValueSet(theIncludeOrExclude, concept);
if (theIncludeConcept != null && isNotBlank(theIncludeConcept.getDisplay())) {
concept.setDisplay(theIncludeConcept.getDisplay());
if (theIncludeConcept != null && isNotBlank(theIncludeConcept.getDisplay())) {
concept.setDisplay(theIncludeConcept.getDisplay());
}
}
}
boolean added = addCodeIfNotAlreadyAdded(
boolean added = addCodeIfNotAlreadyAdded(
theExpansionOptions,
theValueSetCodeAccumulator,
theAddedCodes,
concept,
theAdd,
includeOrExcludeVersion);
if (added) {
delta++;
if (added) {
delta++;
}
}
}

ourLog.debug(
ourLog.debug(
"Batch expansion scroll for {} with offset {} produced {} results in {}ms",
(theAdd ? "inclusion" : "exclusion"),
accumulatedBatchesSoFar,
chunk.hits().size(),
chunk.took().toMillis());

theValueSetCodeAccumulator.incrementOrDecrementTotalConcepts(theAdd, delta);
accumulatedBatchesSoFar += countForBatch;
theValueSetCodeAccumulator.incrementOrDecrementTotalConcepts(theAdd, delta);
accumulatedBatchesSoFar += countForBatch;

// keep session bounded
myEntityManager.flush();
myEntityManager.clear();
}
// keep session bounded
myEntityManager.flush();
myEntityManager.clear();
}

ourLog.debug(
ourLog.debug(
"Expansion for {} produced {} results in {}ms",
(theAdd ? "inclusion" : "exclusion"),
count,
fullOperationSw.getMillis());
}
}
}

Expand Down Expand Up @@ -1256,54 +1259,74 @@ private SearchProperties buildSearchScroll(
SearchPredicateFactory predicate =
searchSession.scope(TermConcept.class).predicate();

// Build the top-level expansion on filters.
PredicateFinalStep step = predicate.bool(b -> {
b.must(predicate.match().field("myCodeSystemVersionPid").matching(theTermCodeSystemVersion.getPid()));

if (theExpansionFilter.hasCode()) {
b.must(predicate.match().field("myCode").matching(theExpansionFilter.getCode()));
}

String codeSystemUrlAndVersion = buildCodeSystemUrlAndVersion(theSystem, theIncludeOrExcludeVersion);
for (ValueSet.ConceptSetFilterComponent nextFilter : theIncludeOrExclude.getFilter()) {
handleFilter(codeSystemUrlAndVersion, predicate, b, nextFilter);
}
for (ValueSet.ConceptSetFilterComponent nextFilter : theExpansionFilter.getFilters()) {
handleFilter(codeSystemUrlAndVersion, predicate, b, nextFilter);
}
});

SearchProperties returnProps = new SearchProperties();

List<String> codes = theIncludeOrExclude.getConcept().stream()
List<String> allCodes = theIncludeOrExclude.getConcept().stream()
.filter(Objects::nonNull)
.map(ValueSet.ConceptReferenceComponent::getCode)
.filter(StringUtils::isNotBlank)
.collect(Collectors.toList());
returnProps.setIncludeOrExcludeCodes(codes);

Optional<PredicateFinalStep> expansionStepOpt = buildExpansionPredicate(codes, predicate);
final PredicateFinalStep finishedQuery =
expansionStepOpt.isPresent() ? predicate.bool().must(step).must(expansionStepOpt.get()) : step;
returnProps.setExpansionStepOpt(expansionStepOpt);
SearchProperties returnProps = new SearchProperties();
returnProps.setIncludeOrExcludeCodes(allCodes);

/*
* DM 2019-08-21 - Processing slows after any ValueSets with many codes explicitly identified. This might
* be due to the dark arts that is memory management. Will monitor but not do anything about this right now.
* Lucene/ES can't typically handle more than 1024 clauses per search, so if
* we have more than that number (e.g. because of a ValueSet that explicitly
* includes thousands of codes), we break this up into multiple searches.
*/
List<List<String>> partitionedCodes = ListUtils.partition(allCodes, IndexSearcher.getMaxClauseCount() - 10);
if (partitionedCodes.isEmpty()) {
partitionedCodes = List.of(List.of());
}

// BooleanQuery.setMaxClauseCount(SearchBuilder.getMaximumPageSize());
// TODO GGG HS looks like we can't set max clause count, but it can be set server side.
// BooleanQuery.setMaxClauseCount(10000);
// JM 22-02-15 - Hopefully increasing maxClauseCount should be not needed anymore
for (List<String> nextCodePartition : partitionedCodes) {
Supplier<SearchScroll<EntityReference>> nextScroll = () -> {
// Build the top-level expansion on filters.
PredicateFinalStep step = predicate.bool(b -> {
b.must(predicate.match().field("myCodeSystemVersionPid").matching(theTermCodeSystemVersion.getPid()));

SearchQuery<EntityReference> termConceptsQuery = searchSession
.search(TermConcept.class)
.selectEntityReference()
.where(f -> finishedQuery)
.toQuery();
if (theExpansionFilter.hasCode()) {
b.must(predicate.match().field("myCode").matching(theExpansionFilter.getCode()));
}

String codeSystemUrlAndVersion = buildCodeSystemUrlAndVersion(theSystem, theIncludeOrExcludeVersion);
for (ValueSet.ConceptSetFilterComponent nextFilter : theIncludeOrExclude.getFilter()) {
handleFilter(codeSystemUrlAndVersion, predicate, b, nextFilter);
}
for (ValueSet.ConceptSetFilterComponent nextFilter : theExpansionFilter.getFilters()) {
handleFilter(codeSystemUrlAndVersion, predicate, b, nextFilter);
}
});

// Add a selector on any explicitly enumerated codes in the VS component
final PredicateFinalStep finishedQuery;
if (nextCodePartition.isEmpty()) {
finishedQuery = step;
} else {
PredicateFinalStep expansionStep = buildExpansionPredicate(nextCodePartition, predicate);
finishedQuery = predicate.bool().must(step).must(expansionStep);
}

/*
* DM 2019-08-21 - Processing slows after any ValueSets with many allCodes explicitly identified. This might
* be due to the dark arts that is memory management. Will monitor but not do anything about this right now.
*/

// BooleanQuery.setMaxClauseCount(SearchBuilder.getMaximumPageSize());
// TODO GGG HS looks like we can't set max clause count, but it can be set server side.
// BooleanQuery.setMaxClauseCount(10000);
// JM 22-02-15 - Hopefully increasing maxClauseCount should be not needed anymore

SearchQuery<EntityReference> termConceptsQuery = searchSession
.search(TermConcept.class)
.selectEntityReference()
.where(f -> finishedQuery)
.toQuery();

return termConceptsQuery.scroll(theScrollChunkSize);
};

returnProps.addSearchScroll(nextScroll);
}

returnProps.setSearchScroll(termConceptsQuery.scroll(theScrollChunkSize));
return returnProps;
}

Expand All @@ -1318,29 +1341,9 @@ private ValueSet.ConceptReferenceComponent getMatchedConceptIncludedInValueSet(
/**
* Helper method which builds a predicate for the expansion
*/
private Optional<PredicateFinalStep> buildExpansionPredicate(
private PredicateFinalStep buildExpansionPredicate(
List<String> theCodes, SearchPredicateFactory thePredicate) {
if (CollectionUtils.isEmpty(theCodes)) {
return Optional.empty();
}

if (theCodes.size() < IndexSearcher.getMaxClauseCount()) {
return Optional.of(thePredicate.simpleQueryString().field("myCode").matching(String.join(" | ", theCodes)));
}

// Number of codes is larger than maxClauseCount, so we split the query in several clauses

// partition codes in lists of BooleanQuery.getMaxClauseCount() size
List<List<String>> listOfLists = ListUtils.partition(theCodes, IndexSearcher.getMaxClauseCount() - 1);

PredicateFinalStep step = thePredicate.bool().with(b -> {
b.minimumShouldMatchNumber(1);
for (List<String> codeList : listOfLists) {
b.should(p -> p.simpleQueryString().field("myCode").matching(String.join(" | ", codeList)));
}
});

return Optional.of(step);
return thePredicate.simpleQueryString().field("myCode").matching(String.join(" | ", theCodes));
}

private String buildCodeSystemUrlAndVersion(String theSystem, String theIncludeOrExcludeVersion) {
Expand Down Expand Up @@ -3151,24 +3154,15 @@ public void execute(JobExecutionContext theContext) {
* Properties returned from method buildSearchScroll
*/
private static final class SearchProperties {
private SearchScroll<EntityReference> mySearchScroll;
private Optional<PredicateFinalStep> myExpansionStepOpt;
private List<Supplier<SearchScroll<EntityReference>>> mySearchScroll = new ArrayList<>();
private List<String> myIncludeOrExcludeCodes;

public SearchScroll<EntityReference> getSearchScroll() {
public List<Supplier<SearchScroll<EntityReference>>> getSearchScroll() {
return mySearchScroll;
}

public void setSearchScroll(SearchScroll<EntityReference> theSearchScroll) {
mySearchScroll = theSearchScroll;
}

public Optional<PredicateFinalStep> getExpansionStepOpt() {
return myExpansionStepOpt;
}

public void setExpansionStepOpt(Optional<PredicateFinalStep> theExpansionStepOpt) {
myExpansionStepOpt = theExpansionStepOpt;
public void addSearchScroll(Supplier<SearchScroll<EntityReference>> theSearchScrollSupplier) {
mySearchScroll.add(theSearchScrollSupplier);
}

public List<String> getIncludeOrExcludeCodes() {
Expand All @@ -3178,6 +3172,10 @@ public List<String> getIncludeOrExcludeCodes() {
public void setIncludeOrExcludeCodes(List<String> theIncludeOrExcludeCodes) {
myIncludeOrExcludeCodes = theIncludeOrExcludeCodes;
}

public boolean hasIncludeOrExcludeCodes() {
return !myIncludeOrExcludeCodes.isEmpty();
}
}

static boolean isValueSetDisplayLanguageMatch(ValueSetExpansionOptions theExpansionOptions, String theStoredLang) {
Expand Down
Loading
Loading