Skip to content

Commit

Permalink
feat: improve taxons search (unaccent, better display)
Browse files Browse the repository at this point in the history
  • Loading branch information
jpm-cbna committed May 7, 2024
1 parent 3978263 commit 787a279
Show file tree
Hide file tree
Showing 5 changed files with 140 additions and 42 deletions.
2 changes: 1 addition & 1 deletion atlas/atlasAPI.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ def searchTaxonAPI():
session = db.session
search = request.args.get("search", "")
limit = request.args.get("limit", 50)
results = vmSearchTaxonRepository.listeTaxonsSearch(session, search, limit)
results = vmSearchTaxonRepository.searchTaxons(session, search, limit)
session.close()
return jsonify(results)

Expand Down
1 change: 1 addition & 0 deletions atlas/modeles/entities/vmSearchTaxon.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ class VmSearchTaxon(Base):
metadata,
Column("cd_ref", Integer, primary_key=True, unique=True),
Column("cd_nom", Integer),
Column("display_name", String),
Column("search_name", String),
schema="atlas",
autoload=True,
Expand Down
26 changes: 13 additions & 13 deletions atlas/modeles/repositories/vmSearchTaxonRepository.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ def listeTaxons(session):
return taxonList


def listeTaxonsSearch(session, search, limit=50):
def searchTaxons(session, search, limit=50):
"""
Recherche dans la VmSearchTaxon en ilike
Utilisé pour l'autocomplétion de la recherche de taxon
Expand All @@ -34,20 +34,20 @@ def listeTaxonsSearch(session, search, limit=50):
label = search_name
value = cd_ref
"""

req = session.query(
VmSearchTaxon.search_name,
VmSearchTaxon.cd_ref,
func.similarity(VmSearchTaxon.search_name, search).label("idx_trgm"),
).distinct()

search = search.replace(" ", "%")
req = (
req.filter(VmSearchTaxon.search_name.ilike("%" + search + "%"))
like_search = "%" + search.replace(" ", "%") + "%"

query = (
session.query(
VmSearchTaxon.display_name,
VmSearchTaxon.cd_ref,
func.similarity(VmSearchTaxon.search_name, search).label("idx_trgm"),
)
.distinct()
.filter(func.unaccent(VmSearchTaxon.search_name).ilike(func.unaccent(like_search)))
.order_by(desc("idx_trgm"))
.order_by(VmSearchTaxon.cd_ref == VmSearchTaxon.cd_nom)
.limit(limit)
)
data = req.all()
results = query.all()

return [{"label": d[0], "value": d[1]} for d in data]
return [{"label": r[0], "value": r[1]} for r in results]
84 changes: 56 additions & 28 deletions data/atlas/5.atlas.vm_search_taxon.sql
Original file line number Diff line number Diff line change
@@ -1,34 +1,62 @@
-- Taxons observés et de tous leurs synonymes (utilisés pour la recherche d'une espèce)
CREATE MATERIALIZED VIEW atlas.vm_search_taxon AS
WITH verna_names AS (
SELECT DISTINCT
cd_nom,
lb_nom,
cd_ref,
STRING_TO_TABLE(nom_vern, ', ') AS nom_vern
FROM atlas.vm_taxref
WHERE nom_vern IS NOT NULL
AND cd_nom = cd_ref
AND nom_vern <> lb_nom
),
names AS (
-- Chosen scinames
SELECT
cd_nom,
cd_ref,
lb_nom AS search_name,
CONCAT('<b>', REPLACE(nom_complet_html, lb_auteur, ''), '</b> ', lb_auteur) AS display_name
FROM atlas.vm_taxref
WHERE cd_nom = cd_ref

CREATE MATERIALIZED VIEW atlas.vm_search_taxon AS
SELECT row_number() OVER (ORDER BY t.cd_nom,t.cd_ref,t.search_name)::integer AS fid,
t.cd_nom,
t.cd_ref,
t.search_name,
t.nom_valide,
t.lb_nom
FROM (
SELECT t_1.cd_nom,
t_1.cd_ref,
concat(t_1.lb_nom, ' = <i> ', t_1.nom_valide, '</i>') AS search_name,
t_1.nom_valide,
t_1.lb_nom
FROM atlas.vm_taxref t_1
UNION

UNION
SELECT t_1.cd_nom,
t_1.cd_ref,
concat(t_1.nom_vern, ' = <i> ', t_1.nom_valide, '</i>' ) AS search_name,
t_1.nom_valide,
t_1.lb_nom
FROM atlas.vm_taxref t_1
WHERE t_1.nom_vern IS NOT NULL AND t_1.cd_nom = t_1.cd_ref
) t
JOIN atlas.vm_taxons taxons ON taxons.cd_ref = t.cd_ref;
-- Synonym scinames
SELECT
t1.cd_nom,
t1.cd_ref,
t1.lb_nom AS search_name,
CONCAT(REPLACE(t1.nom_complet_html, t1.lb_auteur, ''), ' = <b> ', REPLACE(t2.nom_complet_html, t2.lb_auteur, ''), '</b> ', t2.lb_auteur) AS display_name
FROM atlas.vm_taxref AS t1
JOIN atlas.vm_taxref AS t2
ON t1.cd_ref = t2.cd_nom
WHERE t1.cd_nom <> t1.cd_ref

UNION

-- Vernacular names
SELECT
v.cd_nom,
v.cd_ref,
v.nom_vern AS search_name,
CONCAT(v.nom_vern, ' = <b> ', REPLACE(t.nom_complet_html, t.lb_auteur, ''), '</b> ', t.lb_auteur) AS display_name
FROM verna_names AS v
JOIN atlas.vm_taxref AS t
ON t.cd_nom = v.cd_ref
WHERE v.nom_vern <> v.lb_nom
)
SELECT ROW_NUMBER() OVER (ORDER BY n.cd_nom, n.cd_ref, n.search_name)::integer AS fid,
n.cd_nom,
n.cd_ref,
n.search_name,
n.display_name
FROM atlas.vm_taxons AS t
JOIN names AS n
ON t.cd_ref = n.cd_ref ;

CREATE UNIQUE INDEX ON atlas.vm_search_taxon(fid);
CREATE INDEX ON atlas.vm_search_taxon(cd_nom);
create INDEX ON atlas.vm_search_taxon(cd_ref);

CREATE INDEX ON atlas.vm_search_taxon(cd_ref);
CREATE INDEX trgm_idx ON atlas.vm_search_taxon USING GIST (search_name gist_trgm_ops);
CREATE UNIQUE INDEX ON atlas.vm_search_taxon (cd_nom, search_name);
CREATE UNIQUE INDEX ON atlas.vm_search_taxon (cd_nom, search_name);
69 changes: 69 additions & 0 deletions data/update/update_1.6.1to1.6.2.sql
Original file line number Diff line number Diff line change
Expand Up @@ -43,3 +43,72 @@ CREATE INDEX ON atlas.vm_observations_mailles

CREATE INDEX ON atlas.vm_observations_mailles
USING btree (id_maille, cd_ref);


-- ISSUE #531 & #532
CREATE EXTENSION IF NOT EXISTS unaccent SCHEMA "public";

-- ISSUE #532
DROP MATERIALIZED VIEW IF EXISTS atlas.vm_search_taxon ;
CREATE MATERIALIZED VIEW atlas.vm_search_taxon AS
WITH verna_names AS (
SELECT DISTINCT
cd_nom,
lb_nom,
cd_ref,
STRING_TO_TABLE(nom_vern, ', ') AS nom_vern
FROM atlas.vm_taxref
WHERE nom_vern IS NOT NULL
AND cd_nom = cd_ref
AND nom_vern <> lb_nom
),
names AS (
-- Chosen scinames
SELECT
cd_nom,
cd_ref,
lb_nom AS search_name,
CONCAT('<b>', REPLACE(nom_complet_html, lb_auteur, ''), '</b> ', lb_auteur) AS display_name
FROM atlas.vm_taxref
WHERE cd_nom = cd_ref

UNION

-- Synonym scinames
SELECT
t1.cd_nom,
t1.cd_ref,
t1.lb_nom AS search_name,
CONCAT(REPLACE(t1.nom_complet_html, t1.lb_auteur, ''), ' = <b> ', REPLACE(t2.nom_complet_html, t2.lb_auteur, ''), '</b> ', t2.lb_auteur) AS display_name
FROM atlas.vm_taxref AS t1
JOIN atlas.vm_taxref AS t2
ON t1.cd_ref = t2.cd_nom
WHERE t1.cd_nom <> t1.cd_ref

UNION

-- Vernacular names
SELECT
v.cd_nom,
v.cd_ref,
v.nom_vern AS search_name,
CONCAT(v.nom_vern, ' = <b> ', REPLACE(t.nom_complet_html, t.lb_auteur, ''), '</b> ', t.lb_auteur) AS display_name
FROM verna_names AS v
JOIN atlas.vm_taxref AS t
ON t.cd_nom = v.cd_ref
WHERE v.nom_vern <> v.lb_nom
)
SELECT ROW_NUMBER() OVER (ORDER BY n.cd_nom, n.cd_ref, n.search_name)::integer AS fid,
n.cd_nom,
n.cd_ref,
n.search_name,
n.display_name
FROM atlas.vm_taxons AS t
JOIN names AS n
ON t.cd_ref = n.cd_ref ;

CREATE UNIQUE INDEX ON atlas.vm_search_taxon(fid);
CREATE INDEX ON atlas.vm_search_taxon(cd_nom);
CREATE INDEX ON atlas.vm_search_taxon(cd_ref);
CREATE INDEX trgm_idx ON atlas.vm_search_taxon USING GIST (search_name gist_trgm_ops);
CREATE UNIQUE INDEX ON atlas.vm_search_taxon (cd_nom, search_name);

0 comments on commit 787a279

Please sign in to comment.