Skip to content

Commit

Permalink
Add wordforms/search_suggest
Browse files Browse the repository at this point in the history
Results of lexemes/search_suggest are wrapped in 'lexeme' for consistency.
  • Loading branch information
johnjcamilleri committed Jul 4, 2020
1 parent 1c31cf8 commit 5fb002f
Show file tree
Hide file tree
Showing 8 changed files with 222 additions and 40 deletions.
10 changes: 10 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,16 @@ Run all tests with `npm test`.
Run an individual testsuite with `npx mocha --exit test/schema.js` or use the `--grep` flag.
To stop on first failure, use `--bail`

### Using test data

1. Set DB URL in `server-config.js` to `...gabra-test` (or something else)
2. ```
node scripts/node/populate.js test/data/*.json
node scripts/node/resolve-lexeme-ids.js
node scripts/node/create-indexes.js
(cd scripts/node && ./run.js update-glosses-collection.js)
```

## Repository

- `master` branch is used for development.
Expand Down
24 changes: 20 additions & 4 deletions package-lock.json

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion package.json
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
{
"name": "gabra-api",
"version": "2.12.0",
"version": "2.13.0",
"description": "Ġabra: an open lexicon for Maltese",
"author": "John J. Camilleri <[email protected]> (http:https://johnjcamilleri.com/)",
"license": "MIT",
Expand Down
6 changes: 4 additions & 2 deletions public/markdown/api.md
Original file line number Diff line number Diff line change
Expand Up @@ -96,12 +96,14 @@ The results are sorted by part of speech and derived form, and will not include
|:-----------------|:------------|:---------------------------|
| `:id` (required) | Lexeme ID | `5200a366e36f237975000f26` |

### Search suggest
### Search suggest <small>Changed in v2.13</small>

List variations in spelling (diacritics, character case) of a search term, from lemmas:
Find matching words which vary in spelling (diacritics, character case) of the search term, from lemmas or wordforms:

> [/lexemes/search_suggest?s=Hareg](#{baseURL}/lexemes/search_suggest?s=Hareg)
> [/wordforms/search_suggest?s=ohorgu](#{baseURL}/wordforms/search_suggest?s=ohorgu)
| Argument | Description | Example |
|:---------------|:-------------|:--------|
| `s` (required) | Search query | `Hareg` |
Expand Down
62 changes: 31 additions & 31 deletions routes/lexemes.js
Original file line number Diff line number Diff line change
Expand Up @@ -178,24 +178,24 @@ router.get('/search', function (req, res) {
collection.find(conds_l, opts),
collection.count(conds_l)
])
.then(values => {
var docs = values[0]
var count = values[1]
queryObj.result_count = count
res.json({
'results': docs.map(doc => {
return {
'lexeme': doc
}
}),
'query': queryObj
})
})
.catch(err => {
console.error(err)
res.status(500).end()
.then(values => {
var docs = values[0]
var count = values[1]
queryObj.result_count = count
res.json({
'results': docs.map(doc => {
return {
'lexeme': doc
}
}),
'query': queryObj
})
})
.catch(err => {
console.error(err)
res.status(500).end()
})
})
})
})

/*
Expand Down Expand Up @@ -312,17 +312,16 @@ router.get('/search_suggest', function (req, res) {
// s = s.replace(/^([^\[])/, function (m,c,o,s) { return '[' + c.toUpperCase() + ']'})

// Handle diacritics
s = s.replace(/^\^/, '')
s = s.replace(/\$$/, '')
s = s.replace(/c/g, 'ċ')
s = s.replace(/g/g, '[gġ]')
s = s.replace(/h/g, '[hħ]')
s = s.replace(/z/g, '[zż]')

// No substrings
s = s.replace(/^\^/, '')
s = s.replace(/\$$/, '')
s = '^' + s + '$'

var collection = db.get('lexemes')
var query = {
'$or': [
{
Expand All @@ -334,23 +333,24 @@ router.get('/search_suggest', function (req, res) {
],
'pending': {'$ne': true}
}

var opts = {
'projection': {'lemma': true}
}
collection.find(query, opts, function (err, docs) {
if (err) {
db.get('lexemes').find(query, opts)
.catch(function (err) {
console.error(err)
res.status(500).end()
return
}
res.json({
'results': docs,
'query': {
'term': orig,
'result_count': docs.length
}
})
})
.then(function (data) {
res.json({
'results': data.map((l) => { return {'lexeme': l} }),
'query': {
'term': orig,
'result_count': data.length
}
})
})
})

/*
Expand Down
52 changes: 52 additions & 0 deletions routes/wordforms.js
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ var express = require('express')
var router = express.Router()
var passport = require('passport')
var async = require('async')
var regexquote = require('regexp-quote')
var monk = require('monk')

var log = require('./helpers/logger').makeLogger('wordforms')
Expand Down Expand Up @@ -154,6 +155,57 @@ router.post('/replace/:lexeme_id',
})
})

/*
* GET search suggest
*/
router.get('/search_suggest', function (req, res) {
var db = req.db

var orig = req.query.s
var s = regexquote(orig)

// Handle capitalisation
s = s.toLowerCase()
// s = s.replace(/^\[(.+?)\]/, function (m,c,o,s) { return '[' + c.toLowerCase() + c.toUpperCase() + ']'})
// s = s.replace(/^([^\[])/, function (m,c,o,s) { return '[' + c.toUpperCase() + ']'})

// Handle diacritics
s = s.replace(/c/g, 'ċ')
s = s.replace(/g/g, '[gġ]')
s = s.replace(/h/g, '[hħ]')
s = s.replace(/z/g, '[zż]')

// No substrings
s = s.replace(/^\^/, '')
s = s.replace(/\$$/, '')
s = '^' + s + '$'

var query = {
'surface_form': {'$regex': s, '$ne': orig},
'pending': {'$ne': true}
}
var opts = {
'projection': {
'surface_form': true,
'lexeme_id': true
}
}
db.get('wordforms').find(query, opts)
.catch(function (err) {
console.error(err)
res.status(500).end()
})
.then(function (data) {
res.json({
'results': data.map((l) => { return {'wordform': l} }),
'query': {
'term': orig,
'result_count': data.length
}
})
})
})

/*
* GET count
*/
Expand Down
74 changes: 74 additions & 0 deletions test/data/wordforms.json
Original file line number Diff line number Diff line change
Expand Up @@ -123,5 +123,79 @@
"Camilleri2013"
],
"pending" : true
},
{
"aspect": "perf",
"dir_obj": null,
"generated": true,
"ind_obj": null,
"lexeme": {
"lemma": "ħareġ",
"pos": "VERB"
},
"phonetic": "hrɪʧt",
"polarity": "pos",
"sources": ["Camilleri2013"],
"subject": {
"person": "p1",
"number": "sg"
},
"surface_form": "ħriġt"
},
{
"aspect": "perf",
"dir_obj": null,
"generated": true,
"ind_obj": null,
"lexeme": {
"lemma": "ħareġ",
"pos": "VERB"
},
"phonetic": "hrɪʧt",
"polarity": "pos",
"sources": ["Camilleri2013"],
"subject": {
"person": "p2",
"number": "sg"
},
"surface_form": "ħriġt"
},
{
"aspect": "perf",
"dir_obj": null,
"generated": true,
"ind_obj": null,
"lexeme": {
"lemma": "ħareġ",
"pos": "VERB"
},
"phonetic": "hɐrɛʧ",
"polarity": "pos",
"sources": ["Camilleri2013"],
"subject": {
"person": "p3",
"number": "sg",
"gender": "m"
},
"surface_form": "ħareġ"
},
{
"aspect": "perf",
"dir_obj": null,
"generated": true,
"ind_obj": null,
"lexeme": {
"lemma": "ħareġ",
"pos": "VERB"
},
"phonetic": "hɐrʤɛt",
"polarity": "pos",
"sources": ["Camilleri2013"],
"subject": {
"person": "p3",
"number": "sg",
"gender": "f"
},
"surface_form": "ħarġet"
}
]
32 changes: 30 additions & 2 deletions test/search.js
Original file line number Diff line number Diff line change
Expand Up @@ -22,13 +22,23 @@ describe('Search', function () {
res.body.query.result_count.should.be.greaterThanOrEqual(opts.result_count)
}

// Results should contain these lemmas (in any order)
// Lexeme results should contain these lemmas (in any order)
if (opts.lemmas) {
for (let i in opts.lemmas) {
let lemma = opts.lemmas[i]
res.body.results.should.matchAny(function (value) {
value.lexeme.lemma.should.equal(lemma)
}, 'lemma "' + lemma + '" not found in results')
}, `lemma "${lemma}" not found in results`)
}
}

// Wordform results should contain these surface forms (in any order)
if (opts.surface_forms) {
for (let i in opts.surface_forms) {
let sf = opts.surface_forms[i]
res.body.results.should.matchAny(function (value) {
value.wordform.surface_form.should.equal(sf)
}, `surface form "${sf}" not found in results`)
}
}

Expand Down Expand Up @@ -93,6 +103,24 @@ describe('Search', function () {

// -------------------------------------------------------------------------

describe('Search suggest', function () {
it('suggest lexeme', function (done) {
request(server)
.get('/lexemes/search_suggest?s=Hareg')
.expect(200)
.end(checkResponse({lemmas: ['ħareġ']}, done))
})

it('suggest wordform', function (done) {
request(server)
.get('/wordforms/search_suggest?s=harget')
.expect(200)
.end(checkResponse({surface_forms: ['ħarġet']}, done))
})
})

// -------------------------------------------------------------------------

describe('Load stuff', function () {
var lexeme_id

Expand Down

0 comments on commit 5fb002f

Please sign in to comment.