Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Rematch optimising #308

Closed
wants to merge 10 commits into from
Prev Previous commit
#288 fixed the last record log save issue
  • Loading branch information
qifeng-bai committed Jun 24, 2024
commit 0c005919d7243a6980ec22d1ed12828a84e7a2f1
8 changes: 5 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -67,12 +67,14 @@ grails run-app
`id` bigint NOT NULL AUTO_INCREMENT,
`version` bigint NOT NULL,
`end_time` datetime(6) DEFAULT NULL,
`processing` varchar(255) DEFAULT NULL,
`processing` varchar(255) NOT NULL,
`start_time` datetime(6) NOT NULL,
`by_whom` varchar(255) NOT NULL,
`status` varchar(255) DEFAULT NULL,
`latest_processing_time` datetime(6) DEFAULT NULL,
`latest_processing_time` datetime(6) NOT NULL,
`history` longtext,
PRIMARY KEY (`id`)
) ENGINE=InnoDB AUTO_INCREMENT=1 DEFAULT CHARSET=utf8mb4;

```

After the database change, change 'dataSource.dbCreate=none' into local config file to speed up the application startup.
Original file line number Diff line number Diff line change
Expand Up @@ -560,15 +560,12 @@ class SpeciesListController {
def speciesList = speciesLists[i]

msg = helperService.rematchList(speciesList,params.reset?.toBoolean() == true)
if (msg['status'] == 0) {
rematchLog.latestProcessingTime = new Date()
rematchLog.appendLog("${msg['message']}")
rematchLog.save()
} else {
rematchLog.latestProcessingTime = new Date()
rematchLog.status = 'Failed'
rematchLog.logs.add("${msg['message']}")
rematchLog.save()

rematchLog.latestProcessingTime = new Date()
rematchLog.appendLog("${msg['message']}")
rematchLog.save(failOnError: true)

if (msg['status'] != 0) {
break
}
}
Expand All @@ -577,13 +574,18 @@ class SpeciesListController {
} else {
rematchLog.status = "Failed"
}
def finalMsg = "Total time to complete ${speciesLists.itemsCount} lists : ${TimeCategory.minus(new Date(), startProcessing)}"
def finalMsg = "Total time to complete ${total} lists : ${TimeCategory.minus(new Date(), startProcessing)}"
rematchLog.endTime = new Date()
rematchLog.appendLog(finalMsg)
rematchLog.save()

log.info("Total time to complete ${rematchLog.processing} lists : ${TimeCategory.minus(rematchLog.endTime, startProcessing)}")
/**
* With unknown reasons, the live session is closed after the last list is completed.
* And the log for the last list is not saved, We have to use a new transaction to update the log.
*/
RematchLog.withTransaction {
rematchLog.save(failOnError: true, flush: true )
}

log.info(finalMsg)
render(rematchLog.toMap() as JSON)
}

Expand Down
146 changes: 0 additions & 146 deletions grails-app/services/au/org/ala/specieslist/HelperService.groovy
Original file line number Diff line number Diff line change
Expand Up @@ -964,9 +964,7 @@ class HelperService {
session.close()
}
}

message

}

/**
Expand Down Expand Up @@ -1102,150 +1100,6 @@ class HelperService {
log.debug("Saving to DB took ${ TimeCategory.minus(new Date(), updatingDB)}")
}

/**
*
* @param listDRId: data resource id (dr_id) of the species list, species in this speciesList need to rematched
*
* @param beforeId if @id is not given, but @before is given, it will rematch all species before 'beforeId'
*
* if both id and beforeId are not given, it will rematch all species
* @return
*/
@NotTransactional
def rematch(id, beforeId) {
Integer totalRows, offset = 0;
// Save to DB if no id is given.
boolean saveToDB = id ? false:true
def rematchLog = new RematchLog(byWhom: authService?.userDetails()?.email ?: "Developer", startTime: new Date(), recentProcessTime: new Date(), status: Status.RUNNING);
rematchLog.saveToDB = saveToDB

if (id) {
totalRows = SpeciesListItem.countByDataResourceUid(id)
} else {
if (beforeId && beforeId > 0) {
def c = SpeciesListItem.createCriteria()
totalRows = c.list(max:1, offset: 0) {
order "id", "desc"
le("id", beforeId)
}.totalCount
} else {
totalRows = SpeciesListItem.count();
//Total rematch - Clean matchedSpecies table
MatchedSpecies.withTransaction {
MatchedSpecies.executeUpdate("delete from MatchedSpecies")
SpeciesListItem.executeUpdate("update SpeciesListItem set matched_species_id = null")
}
}
}
RematchLog.withTransaction {
rematchLog.total = totalRows
rematchLog.remaining = totalRows
rematchLog.persist()
}

try {
while (true) {
List items
List guidBatch = [], sliBatch = []
Map<SpeciesList, List<SpeciesListItem>> batches = new HashMap<>()
List<SpeciesListItem> searchBatch = new ArrayList<SpeciesListItem>()

if (id) {
items = SpeciesListItem.findAllByDataResourceUid(id, [max: BATCH_SIZE, offset: offset])
} else {
//items = SpeciesListItem.list(max: BATCH_SIZE, offset: offset, sort: "id", order: "desc")
if (beforeId) {
def c = SpeciesListItem.createCriteria()
items = c.list(max:BATCH_SIZE, offset: offset) {
order "id", "desc"
le("id", beforeId)
}
} else {
items = SpeciesListItem.list(max: BATCH_SIZE, offset: offset, sort: "id", order: "desc")
}
//Update
totalRows = items.totalCount
rematchLog.total = items.totalCount
}

if ( items.size() <=0 ){
break;
}

def start = new Date()
SpeciesListItem.withTransaction {
items.eachWithIndex { SpeciesListItem item, Integer i ->
SpeciesList speciesList = item.mylist
List<SpeciesListItem> batch = batches.get(speciesList)
if (batch == null) {
batch = new ArrayList<>();
batches.put(speciesList, batch)
}
String rawName = removeHtmlTag(item.rawScientificName)
if (rawName != item.rawScientificName) {
item.rawScientificName = rawName
if (!item.save(flush: true)) {
log.error("Error saving item with updated rawScientificName: " + item.errors())
}
}
log.debug i + ". Rematching: " + rawName + "/" + speciesList.dataResourceUid
if (rawName && rawName.length() > 0) {
batch.add(item)
} else {
item.guid = null
if (!item.save(flush: true)) {
log.error "Error saving item (" + rawName + "): " + item.errors()
}
}
}
batches.each { list, batch ->
matchAll(batch, list)
batch.each { SpeciesListItem item ->
if (item.guid) {
guidBatch.push(item.guid)
sliBatch.push(item)
}
}
}

if (!guidBatch.isEmpty()) {
getCommonNamesAndUpdateRecords(sliBatch, guidBatch)
}
} // End transaction

offset += BATCH_SIZE;
if (totalRows < offset) {
log.info("Rematched the last ${totalRows} species, time elapsed:" + TimeCategory.minus(new Date(), start))
rematchLog.remaining = 0
} else {
log.info("Rematched ${offset} of ${totalRows} completed, time elapsed:" + TimeCategory.minus(new Date(), start))
if (TimeCategory.minus(new Date(), start) > 30) {
break
}
rematchLog.remaining = totalRows - offset
}
RematchLog.withTransaction {
rematchLog.logs = "ID: ${items.last().id} was rematched!"
rematchLog.currentRecordId = items.last().id
rematchLog.recentProcessTime = new Date()
rematchLog.persist()
}
}// end full iteration
rematchLog.status = Status.COMPLETED
rematchLog.endTime = new Date()
rematchLog.logs = "Rematch completed"
log.info("Rematching is completed")
} catch (Exception e) {
log.error("Error in rematching:" + e.message)
rematchLog.status = Status.FAILED
rematchLog.endTime = new Date()
} finally {
RematchLog.withTransaction{
rematchLog.persist()
}
} // end try
return rematchLog
}

String removeHtmlTag(String value) {
Pattern pattern = Pattern.compile("<a[^>]*>(.*?)</a>")
Expand Down
1 change: 1 addition & 0 deletions grails-app/views/admin/specieslists.gsp
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,7 @@
type: 'DELETE',
success: function(result) {
jQuery.fancybox.close();
window.location.reload();
}
});
}
Expand Down