Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Rematch optimising #308

Closed
wants to merge 10 commits into from
Next Next commit
#288 using a live session to process a large list
  • Loading branch information
qifeng-bai committed Jun 17, 2024
commit 29f87885b4f97ad596e1950ae8d0cf1686b8694f
2 changes: 2 additions & 0 deletions grails-app/conf/application.yml
hamzajaved-csiro marked this conversation as resolved.
Show resolved Hide resolved
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
---
logging:
config: /data/specieslist-webapp/config/logback.xml

grails:
profile: web
Expand Down
2 changes: 1 addition & 1 deletion grails-app/conf/logback.xml
hamzajaved-csiro marked this conversation as resolved.
Show resolved Hide resolved
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
<logger name="org.hibernate.orm.deprecation" level="OFF"/>
<logger name="org.grails.config.NavigableMap" level="OFF"/>

<root level="warn">
<root level="WARN">
<appender-ref ref="STDOUT" />
</root>
</configuration>
Original file line number Diff line number Diff line change
Expand Up @@ -19,12 +19,17 @@ import au.org.ala.web.AuthService
import com.opencsv.CSVReader
import grails.converters.JSON
import grails.gorm.transactions.Transactional
import groovy.time.TimeCategory
import org.apache.commons.io.filefilter.FalseFileFilter
import org.grails.web.json.JSONObject
import org.hibernate.criterion.DetachedCriteria
import org.springframework.web.multipart.MultipartHttpServletRequest

import javax.annotation.PostConstruct
import java.util.concurrent.Executors
import java.util.concurrent.TimeUnit
import java.lang.management.ManagementFactory
import com.sun.management.OperatingSystemMXBean

class SpeciesListController {

Expand Down Expand Up @@ -512,8 +517,56 @@ class SpeciesListController {
file?.getInputStream().withReader { r -> helperService.getSeparator(r.readLine()) }
}

def rematchAll() {
//Collect all species lists ordered by creation date
//def speciesLists = SpeciesList.list(sort: 'dateCreated', order: 'asc')
def speciesLists = SpeciesList.list(sort: 'itemsCount', order: 'desc')
def startProcessing = new Date()
speciesLists.each { speciesList ->
helperService.rematchList(speciesList,params.reset?.toBoolean() == true)
}
log.info("Total time cost to complete ${speciesLists.itemsCount} lists : ${TimeCategory.minus(new Date(), startProcessing)}")
}

/**
* Rematch the species list of the given data resource id (drid)
* @param id dataResource id of a species list, starting with 'dr'
* @reset optional, default to false. Remove all existing matched species if true
* @return
*/
def rematchList(String id) {
def speciesList = SpeciesList.findByDataResourceUid(id)
if (speciesList) {
if (!isCurrentUserEditorForList(speciesList)) {
response.sendError(401, "Not authorised.")
return
}
boolean reset = params.reset?.toBoolean() == true
helperService.rematchList(speciesList, reset)
render([status: 0, message: "Rematching the species list ${id} is completed" ] as JSON)
} else {
response.sendError(200, "No species list found for data resource id: ${id}")
}
}


/**
* Rematches the scientific names in the supplied list
* todo: decouple the rematching all lists with rematching a single list
* todo: decouple resume rematching process
*
* Rematch the species list of the given data resource id (drid), or the sequence id (id)
*
* param id: data resource id of a species list, starting with 'dr'
* if the id is NOT started with "dr", it is assumed it is a sequence id of a species list
* the params.id will be recalculated to the data resource id of this species lis
*
* param beforeId: the id of the last matched species. It only works when the dr/sequence id of a species list is not given,
*
* If the sequence id and the data resource id is not provided, it will rematch all species lists
*
* If the sequence id and the date resource id is provided, and the before id is given,
* the rematch ( Species sorted in descending order by sequence ID) will start from the next species BEFORE this id
*
*/
def rematch() {
long beforeId = 0
Expand All @@ -530,8 +583,9 @@ class SpeciesListController {
}
log.warn(msg)
} else if ( !params.id.startsWith("dr")) {
//Get speciesList Id from species id
params.id = SpeciesList.get(params.id)?.dataResourceUid
log.info("Rematching for " + params.id)
log.info("Rematching for data resource:" + params.id)
}

Integer totalRows, offset = 0;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -121,7 +121,7 @@ class SpeciesListItemController {

def fqs = requestParams.fq ? [requestParams.fq].flatten().findAll { it != null } : null

def baseQueryAndParams = requestParams.fq ? queryService.constructWithFacets(" from SpeciesListItem sli ", fqs, requestParams.id, requestParams.q) : null
def baseQueryAndParams = requestParams.fq ? queryService.constructWithFacets(" from SpeciesListItem sli left join fetch sli.matchedSpecies ", fqs, requestParams.id, requestParams.q) : null
log.debug(baseQueryAndParams?.toString())

// to sort on a column 'order by' clause has to be added explicitly since executeQuery function does
Expand All @@ -147,7 +147,7 @@ class SpeciesListItemController {
distinctCount: queryService.getDistinctCountByParams(requestParams, baseQueryAndParams),
hasUnrecognised: noMatchCount > 0,
keys: queryService.getSpeciesListKVPKeysByDataResourceUid(requestParams.id),
downloadReasons: loggerService.getReasons(),
downloadReasons: null,
users: queryService.getUsersForList(),
userId: authService.getUserId(),
facets: queryService.generateFacetValues(fqs, baseQueryAndParams, requestParams.id, requestParams.q, maxLengthForFacet),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1818,7 +1818,7 @@ class WebServiceController {
* rematch existing SpeciesListItem
*/
def rematchSpecies() {
def result = helperService.rematchSpecies("developer", params.matchAll)
def result = helperService.rematchSpeciesInList("developer", params.matchAll)
def resp = result.toMap()
render resp as JSON
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ class MatchedSpecies {
String genus
Date lastUpdated

static belongsTo = [speciesListItem:SpeciesListItem]
hamzajaved-csiro marked this conversation as resolved.
Show resolved Hide resolved
//static belongsTo = [speciesListItem:SpeciesListItem]

static constraints = {
vernacularName(nullable: true)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -32,13 +32,17 @@ class SpeciesListItem {
Integer itemOrder

MatchedSpecies matchedSpecies
//Impose hasOne relation for fixing session writing errors in rematch process
//But it affect the reading speed significantly

//static hasOne = [matchedSpecies: MatchedSpecies]
static hasMany = [kvpValues: SpeciesListKVP]
//allows the items to be sorted before they are extracted.
SortedSet kvpValues
//NC 2013-05-09: Changed the name for the list relationship because this is a reserved word in certain situations. This causes
//issues when being used in a "criteria"
static belongsTo = [mylist:SpeciesList]
static embedded = ['homeAddress', 'workAddress']

static constraints = {
//guid unique: 'name' //AK for the table
Expand Down Expand Up @@ -66,7 +70,7 @@ class SpeciesListItem {
mylist index: 'idx_list_id'
//kvpValues cascade: "all-delete-orphan"
//kvpValues lazy: false
matchedSpecies (ignoreNotFound: true)
matchedSpecies (column: 'matched_species_id', ignoreNotFound: true, nullable: true, lazy:true)
}

def toMap() {
Expand Down
Loading