Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Rematch optimising #308

Closed
wants to merge 10 commits into from
Prev Previous commit
Next Next commit
#288 manage rematchLogs
  • Loading branch information
qifeng-bai committed Jun 20, 2024
commit bf04d1f4a3a12683aac7fbc7005ac09aa3a02874
Original file line number Diff line number Diff line change
Expand Up @@ -517,15 +517,68 @@ class SpeciesListController {
file?.getInputStream().withReader { r -> helperService.getSeparator(r.readLine()) }
}

/**
*
* 1, Rematch the species list of the given data resource id (drid), or the sequence id (id)
* 2, If the sequence id and the data resource id is not provided, it will rematch all species lists
*
* param id: data resource id of a species list, starting with 'dr'
* reset optional, default to false. Remove all existing matched species if true
*
* if the id is NOT started with "dr", it is assumed it is a sequence id of a species list
* the params.id will be recalculated to the data resource id of this species lis
*
*/
def rematch() {
if (params.id) {
def drid = params.id
//If the id is not started with "dr", it is assumed it is a sequence id of a species list
if ( !params.id.startsWith("dr")) {
drid = SpeciesList.get(params.id)?.dataResourceUid
}
rematchList(drid)
} else {
rematchAll()
}
}

def rematchAll() {
//Collect all species lists ordered by creation date
//def speciesLists = SpeciesList.list(sort: 'dateCreated', order: 'asc')
def speciesLists = SpeciesList.list(sort: 'itemsCount', order: 'desc')
def order = params.order?.equalsIgnoreCase("asc") ? 'asc' : 'desc'
def speciesLists = SpeciesList.list(sort: 'itemsCount', order: order)

def total = speciesLists.size()
def startProcessing = new Date()
speciesLists.each { speciesList ->
helperService.rematchList(speciesList,params.reset?.toBoolean() == true)
def msg = [status: 0, message: "Rematch all species lists [${total}]"]
def rematchLog = new RematchLog(byWhom: authService?.userDetails()?.email ?: "Developer", startTime: new Date(), status: 'Running', logs: [msg.message]);

for(int i= 0; i < speciesLists.size(); i++) {
rematchLog.processing = "${i + 1}/${total}"
def speciesList = speciesLists[i]

msg = helperService.rematchList(speciesList,params.reset?.toBoolean() == true)
if (msg['status'] == 0) {
rematchLog.latestProcessingTime = new Date()
rematchLog.appendLog("${msg['message']}")
rematchLog.save()
} else {
rematchLog.latestProcessingTime = new Date()
rematchLog.status = 'Failed'
rematchLog.logs.add("${msg['message']}")
rematchLog.save()
break
}
}
if (msg.status == 0) {
rematchLog.status = 'Completed'
} else {
rematchLog.status = "Failed"
}
log.info("Total time cost to complete ${speciesLists.itemsCount} lists : ${TimeCategory.minus(new Date(), startProcessing)}")
def finalMsg = "Total time to complete ${speciesLists.itemsCount} lists : ${TimeCategory.minus(new Date(), startProcessing)}"
rematchLog.endTime = new Date()
rematchLog.appendLog(finalMsg)
rematchLog.save()

log.info("Total time to complete ${speciesLists.itemsCount} lists : ${TimeCategory.minus(new Date(), startProcessing)}")
}

/**
Expand All @@ -541,64 +594,11 @@ class SpeciesListController {
response.sendError(401, "Not authorised.")
return
}
boolean reset = params.reset?.toBoolean() == true
helperService.rematchList(speciesList, reset)
render([status: 0, message: "Rematching the species list ${id} is completed" ] as JSON)
def msg = helperService.rematchList(speciesList, params.reset?.toBoolean() == true)
render(msg as JSON)
} else {
response.sendError(200, "No species list found for data resource id: ${id}")
}
}


/**
* todo: decouple the rematching all lists with rematching a single list
* todo: decouple resume rematching process
*
* Rematch the species list of the given data resource id (drid), or the sequence id (id)
*
* param id: data resource id of a species list, starting with 'dr'
* if the id is NOT started with "dr", it is assumed it is a sequence id of a species list
* the params.id will be recalculated to the data resource id of this species lis
*
* param beforeId: the id of the last matched species. It only works when the dr/sequence id of a species list is not given,
*
* If the sequence id and the data resource id is not provided, it will rematch all species lists
*
* If the sequence id and the date resource id is provided, and the before id is given,
* the rematch ( Species sorted in descending order by sequence ID) will start from the next species BEFORE this id
*
*/
def rematch() {
long beforeId = 0
if (!params.id) {
String msg = "Rematching for ALL"
if (params.beforeId) {
try {
beforeId = Long.parseLong(params.beforeId)
if (beforeId > 0) {
msg = "Continue to rematch the rest of species before id: " + beforeId
}
} catch (Exception e) {
}
}
log.warn(msg)
} else if ( !params.id.startsWith("dr")) {
//Get speciesList Id from species id
params.id = SpeciesList.get(params.id)?.dataResourceUid
log.info("Rematching for data resource:" + params.id)
render([status: 0, message: "No species list found for data resource id: ${id}" ] as JSON)
}

Integer totalRows, offset = 0;
String id = params.id
def splist = SpeciesList.findByDataResourceUid(params.id)
if (splist && !isCurrentUserEditorForList(splist)) {
response.sendError(401, "Not authorised.")
return
}

helperService.rematch(id,beforeId)

render(text: "${message(code: 'admin.lists.page.button.rematch.messages', default: 'Rematch complete')}")
}

private parseDataFromCSV(CSVReader csvReader, String separator) {
Expand Down
11 changes: 10 additions & 1 deletion grails-app/controllers/au/org/ala/specieslist/UrlMappings.groovy
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,16 @@ class UrlMappings {
action = [GET: 'getSpeciesListItemKvp']
}

"/ws/rematchStatus" (controller: "webService", action: "rematchStatus")
"/ws/rematchLogs" (controller: "webService", action: "rematchLogs")

"/ws/rematchLog/$id"(controller: 'webService') {
action = [GET: 'rematchLog', DELETE: 'deleteRematchLog']
}

// "/ws/deleteRematchLog/$id" (controller: "webService", action: "deleteRematchLog")
//
// "/ws/downloadRematchLog/$id" (controller: "webService", action: "downloadRematchLog")

"/"(controller: 'public' ,action: 'index')
"500"(view:'/error')
"404"(view:'/404')
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ import io.swagger.v3.oas.annotations.parameters.RequestBody
import io.swagger.v3.oas.annotations.responses.ApiResponse
import io.swagger.v3.oas.annotations.security.SecurityRequirement
import org.apache.http.HttpStatus
import grails.web.mime.MimeType
hamzajaved-csiro marked this conversation as resolved.
Show resolved Hide resolved

import static io.swagger.v3.oas.annotations.enums.ParameterIn.HEADER
import static io.swagger.v3.oas.annotations.enums.ParameterIn.PATH
Expand Down Expand Up @@ -1814,21 +1815,32 @@ class WebServiceController {
}


/**
* rematch existing SpeciesListItem
*/
def rematchSpecies() {
def result = helperService.rematchSpeciesInList("developer", params.matchAll)
def resp = result.toMap()
render resp as JSON
}

def rematchStatus() {
def rematchLogs() {
def result = helperService.queryRematchingProcess()
def resp = result
render resp as JSON
}

def deleteRematchLog(String id){
if (id) {
helperService.deleteRematchLog(id.toLong())
render(status: 200)
} else {
render(text: "ID is required!", status: 200)
}
}

def rematchLog(Long id) {
RematchLog rematchLog = RematchLog.get(id)
if (!rematchLog) {
render status: 404, text: "Log not found"
return
}
render rematchLog.toMap() as JSON

}


def handleException(final Exception e ) {
log.error(e.message)
return {error: e.message}
Expand Down
61 changes: 44 additions & 17 deletions grails-app/domain/au/org/ala/specieslist/RematchLog.groovy
Original file line number Diff line number Diff line change
@@ -1,37 +1,64 @@
package au.org.ala.specieslist

class RematchLog {
static transients = [ "saveToDB" ]

boolean saveToDB
String byWhom
Date startTime
Date endTime
Date recentProcessTime

int total
int remaining
Date latestProcessingTime
String status
String logs
// the id of species list item was just processed
// It is used to select those which are not matched yet.
long currentRecordId
String processing // e.g. "2/3000" the 2nd list of 3000 lists
String history

List logs = []

static constraints = {
endTime(nullable: true)
recentProcessTime(nullable: true)
status(nullable: true)
logs(nullable: true)
history(nullable: true)
}

static transients = ['logs'] // Transient property for the list

static mapping = {
history type: 'text'
}

def beforeInsert() {
// if (logs.size() > 5) {
// logs = logs.takeRight(5)
// }
history = logs.join('|')
}

def beforeUpdate() {
// if (logs.size() > 5) {
// logs = logs.takeRight(5)
// }
history = logs.join('|')
}

def persist() {
if (this.saveToDB) {
this.save()
def afterLoad() {
if (history) {
logs = history.split(/\|/)
}
}

void appendLog(String log) {
logs << log
}


def toMap() {
this.class.declaredFields.findAll { it.modifiers == java.lang.reflect.Modifier.PRIVATE }.
collectEntries { [it.name, this[it.name]] }
def map = [
id: id,
byWhom: byWhom,
startTime: startTime,
latestProcessingTime: latestProcessingTime,
endtime: endTime,
status: status,
processing: processing,
logs : logs,
]
}
}
Loading