-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
5 changed files
with
162 additions
and
11 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,4 @@ | ||
occurrenceID,format,creator,license,type,identifier,documentId,rights | ||
014826,jpg,Joyce TJ,CC BY 4.0,image,https://www.somelinks.come/image?image=d68f8c06,"The rights to all uploaded images are held under the specified Creative Commons license, by the contributor of the image and the primary organisation responsible for the project to which they are contributed." | ||
014825,jpg,Joyce TJ,CC BY 4.0,image,https://www.somelinks.come/image?image=a5923083,"The rights to all uploaded images are held under the specified Creative Commons license, by the contributor of the image and the primary organisation responsible for the project to which they are contributed." | ||
014824,jpg,Joyce TJ,CC BY 4.0,image,https://www.somelinks.come/image?image=7fab23e4,"The rights to all uploaded images are held under the specified Creative Commons license, by the contributor of the image and the primary organisation responsible for the project to which they are contributed." |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,13 @@ | ||
occurrenceID,basisOfRecord,scientificName,license,decimalLatitude,decimalLongitude | ||
014826,Human Observation,Ageratina adenophora,CC-BY 4.0 (Int),-30.0000,144.0000 | ||
014825,Human Observation,Ageratina adenophora,CC-BY 4.0 (Int),-31.1111,145.0000 | ||
014824,Human Observation,Delairea odorata,CC-BY 4.0 (Int),-32.085431,100.828059 | ||
014823,Human Observation,Delairea odorata,CC-BY 4.0 (Int),-33.097233,101.820888 | ||
014822,Human Observation,Delairea odorata,CC-BY 4.0 (Int),-34.099936,102.821654 | ||
014821,Human Observation,Delairea odorata,CC-BY 4.0 (Int),-35.893671,104.999974 | ||
014802,Human Observation,Alectryon coriaceus,CC-BY 4.0 (Int),-34.113747,120.889354 | ||
014801,Human Observation,Eucalyptus robusta,CC-BY 4.0 (Int),-36.0000,144.308848 | ||
014800,Human Observation,Arundo donax,CC-BY 4.0 (Int),-30.440251,146.240159 | ||
014799,Human Observation,Arundo donax,CC-BY 4.0 (Int),-31.547195,150.783246 | ||
014798,Human Observation,Arundo donax,CC-BY 4.0 (Int),-40.481117,150.823468 | ||
014792,Human Observation,Euphorbia paralias,CC-BY 4.0 (Int),-28.0000,115.0000 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,129 @@ | ||
from dwcahandler import DwcaHandler, CsvFileType, CoreOrExtType, Eml | ||
from zipfile import ZipFile | ||
from pathlib import Path | ||
import xml.etree.ElementTree as ET | ||
import re | ||
import pandas as pd | ||
|
||
|
||
def _get_namespace(element): | ||
"""Get the namespace from a `{namespace}tag` formatted URI | ||
param: element | ||
"return: The namespace for the element | ||
""" | ||
m = re.match("\\{.*\\}", element.tag) | ||
return m.group(0) if m else '' | ||
|
||
|
||
def _get_eml_content(): | ||
return Eml(dataset_name='Sample Dataset', | ||
description='A dataset sample', | ||
license='sample license', | ||
citation='sample citation', | ||
rights='sample rights') | ||
|
||
|
||
occurrence_sample_file = "./input_files/sample/occurrence.csv" | ||
multimedia_sample_file = "./input_files/sample/multimedia.csv" | ||
sample_occ_df = pd.read_csv(occurrence_sample_file) | ||
sample_multimedia_df = pd.read_csv(multimedia_sample_file) | ||
|
||
|
||
class TestPublish: | ||
""" | ||
Test for terms | ||
""" | ||
|
||
def test_generate_dwca_without_ext(self): | ||
""" | ||
Test that generated dwca is valid with core occ data | ||
""" | ||
core_csv = CsvFileType(files=["./input_files/sample/occurrence.csv"], keys=['occurrenceID'], | ||
type=CoreOrExtType.CORE) | ||
p = Path("temp") | ||
p.mkdir(parents=True, exist_ok=True) | ||
dwca_output_path = str(Path(p / "dwca.zip").absolute()) | ||
DwcaHandler.create_dwca(core_csv=core_csv, output_dwca_path=dwca_output_path, | ||
eml_content=_get_eml_content()) | ||
with ZipFile(dwca_output_path, 'r') as zf: | ||
files = zf.namelist() | ||
assert 'meta.xml' in files | ||
assert 'eml.xml' in files | ||
core_file = "" | ||
with zf.open('meta.xml') as meta_xml_file: | ||
tree = ET.parse(meta_xml_file) | ||
root = tree.getroot() | ||
ns = _get_namespace(root) | ||
assert ns == "{http:https://rs.tdwg.org/dwc/text/}" | ||
core_node = root.find(f'{ns}{CoreOrExtType.CORE}') | ||
assert core_node | ||
fields = core_node.findall(f'{ns}field') | ||
term_fields = [f.attrib.get('term') for f in fields] | ||
assert len(term_fields) == len(sample_occ_df.columns) | ||
for sample_col in sample_occ_df.columns: | ||
assert any(sample_col in f for f in term_fields) | ||
core_file = core_node.find(f'{ns}files').find(f'{ns}location').text | ||
|
||
assert core_file | ||
with zf.open(core_file) as occ_file: | ||
df = pd.read_csv(occ_file) | ||
pd.testing.assert_frame_equal(df.drop(columns=['id']), sample_occ_df) | ||
|
||
zf.close() | ||
|
||
def test_generate_dwca_with_ext(self): | ||
""" | ||
Test that generated dwca is valid with core occ and multimedia data | ||
""" | ||
core_csv = CsvFileType(files=["./input_files/sample/occurrence.csv"], keys=['occurrenceID'], | ||
type=CoreOrExtType.CORE) | ||
ext_csv = CsvFileType(files=["./input_files/sample/multimedia.csv"], keys=['occurrenceID'], | ||
type=CoreOrExtType.EXTENSION) | ||
p = Path("temp") | ||
p.mkdir(parents=True, exist_ok=True) | ||
dwca_output_path = str(Path(p / "dwca_with_ext.zip").absolute()) | ||
DwcaHandler.create_dwca(core_csv=core_csv, ext_csv_list=[ext_csv], output_dwca_path=dwca_output_path, | ||
eml_content=_get_eml_content()) | ||
with ZipFile(dwca_output_path, 'r') as zf: | ||
files = zf.namelist() | ||
assert 'meta.xml' in files | ||
assert 'eml.xml' in files | ||
core_file = "" | ||
with zf.open('meta.xml') as meta_xml_file: | ||
tree = ET.parse(meta_xml_file) | ||
root = tree.getroot() | ||
ns = _get_namespace(root) | ||
assert ns == "{http:https://rs.tdwg.org/dwc/text/}" | ||
core_node = root.find(f'{ns}{CoreOrExtType.CORE}') | ||
assert core_node | ||
fields = core_node.findall(f'{ns}field') | ||
term_fields = [f.attrib.get('term') for f in fields] | ||
assert len(term_fields) == len(sample_occ_df.columns) | ||
for sample_col in sample_occ_df.columns: | ||
assert any(sample_col in f for f in term_fields) | ||
core_file = core_node.find(f'{ns}files').find(f'{ns}location').text | ||
|
||
ext_node = root.find(f'{ns}{CoreOrExtType.EXTENSION}') | ||
assert ext_node | ||
fields = ext_node.findall(f'{ns}field') | ||
term_fields = [f.attrib.get('term') for f in fields] | ||
assert len(term_fields) == len(sample_multimedia_df.columns) | ||
for sample_m_col in sample_multimedia_df.columns: | ||
assert any(sample_m_col in f for f in term_fields) | ||
ext_file = ext_node.find(f'{ns}files').find(f'{ns}location').text | ||
|
||
assert core_file | ||
assert ext_file | ||
|
||
with zf.open(core_file) as occ_file: | ||
df = pd.read_csv(occ_file) | ||
assert 'id' in df.columns | ||
pd.testing.assert_frame_equal(df.drop(columns=['id']), sample_occ_df) | ||
|
||
with zf.open(ext_file) as image_file: | ||
df = pd.read_csv(image_file) | ||
assert 'coreid' in df.columns | ||
pd.testing.assert_frame_equal(df.drop(columns=['coreid']), sample_multimedia_df) | ||
|
||
zf.close() |