Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

MCR-3133 Add fallback value to xpath based classification mapping #2190

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
import java.util.Map;
import java.util.Objects;
import java.util.Optional;
import java.util.Set;
import java.util.stream.Collectors;
import java.util.stream.Stream;

Expand All @@ -49,14 +50,16 @@
* Maps classifications in Mods-Documents.
* <p>You can define a label <b><code>x-mapping</code></b> in a classification with space seperated categoryIds
* to which the classification will be mapped. You can further define a label <b><code>x-mapping-xpath</code></b> in a
* classification which creates a classification mapping when the xPath inside the x-mapping-xpath attribute
* is matched.</p>
* classification which creates a classification mapping when the XPath inside the x-mapping-xpath attribute
* is matched. You can also define a label <b><code>x-mapping-xpathfb</code></b> which contains a fallback value
* in case that no other XPaths inside a classification match.</p>
* <code>
* &lt;category ID=&quot;article&quot; counter=&quot;1&quot;&gt;<br>
* &nbsp;&lt;label xml:lang=&quot;en&quot; text=&quot;Article / Chapter&quot; /&gt;<br>
* &nbsp;&lt;label xml:lang=&quot;de&quot; text=&quot;Artikel / Aufsatz&quot; /&gt;<br>
* &nbsp;&lt;label xml:lang=&quot;x-mapping&quot; text=&quot;diniPublType:article&quot; /&gt;<br>
* &nbsp;&lt;label xml:lang=&quot;x-mapping-xpath&quot; text=&quot;mods:genre[text()='article']&quot; /&gt;<br>
* &nbsp;&lt;label xml:lang=&quot;x-mapping-xpathfb&quot; text=&quot;mods:genre[text()='other']&quot; /&gt;<br>
* &lt;/category&gt;
* </code>
*
Expand All @@ -68,6 +71,8 @@ public class MCRClassificationMappingEventHandler extends MCREventHandlerBase {

public static final String LABEL_LANG_XPATH_MAPPING = "x-mapping-xpath";

public static final String LABEL_LANG_XPATH_MAPPING_FALLBACK = "x-mapping-xpathfb";

public static final String LABEL_LANG_X_MAPPING = "x-mapping";

public static final String XPATH_GENERATOR_NAME = "xpathmapping";
Expand Down Expand Up @@ -104,16 +109,22 @@ private static List<Map.Entry<MCRCategoryID, MCRCategoryID>> getXMappings(MCRCat
/**
* Searches all configured classifications
* (see {@link MCRClassificationMappingEventHandler#X_PATH_MAPPING_CLASSIFICATIONS}) for categories
* with language label {@link MCRClassificationMappingEventHandler#LABEL_LANG_XPATH_MAPPING}.
* All categories with said label and present in database are returned in a list.
* @return a list of {@link MCRCategory categories} with the XPath-Mapping label
* with language labels {@link MCRClassificationMappingEventHandler#LABEL_LANG_XPATH_MAPPING} or
* {@link MCRClassificationMappingEventHandler#LABEL_LANG_XPATH_MAPPING_FALLBACK}.
* All categories with said label and present in database are returned in a Map of Sets,
* separated by their classification.
* @return a Map with classification-IDs as key and Sets of {@link MCRCategory categories}
* with the XPath-Mapping label as value
*/
private static List<MCRCategory> loadAllXPathMappings() {
private static Map<String, Set<MCRCategory>> loadAllXPathMappings() {
final MCRCategoryDAO dao = MCRCategoryDAOFactory.getInstance();
return Arrays.stream(X_PATH_MAPPING_CLASSIFICATIONS.trim().split(","))
.flatMap(
relevantClass -> dao.getCategoriesByClassAndLang(relevantClass, LABEL_LANG_XPATH_MAPPING).stream())
.collect(Collectors.toList());
.collect(Collectors.toMap(
relevantClass -> relevantClass,
relevantClass -> Stream.concat(
dao.getCategoriesByClassAndLang(relevantClass, LABEL_LANG_XPATH_MAPPING).stream(),
dao.getCategoriesByClassAndLang(relevantClass, LABEL_LANG_XPATH_MAPPING_FALLBACK).stream())
.collect(Collectors.toSet())));
}

@Override
Expand Down Expand Up @@ -173,21 +184,47 @@ private void createMapping(MCRObject obj) {
mappedClassification.setAttribute("generator", generator);
MCRClassMapper.assignCategory(mappedClassification, mapping.getValue());
});
// check x-mapping-xpath-mappings
for (MCRCategory category : loadAllXPathMappings()) {
if (category.getLabel(LABEL_LANG_XPATH_MAPPING).isPresent()) {

String xPath = category.getLabel(LABEL_LANG_XPATH_MAPPING).get().getText();
MCRXPathEvaluator evaluator = new MCRXPathEvaluator(new HashMap<>(), mcrmodsWrapper.getMODS());

if (evaluator.test(xPath)) {
String taskMessage = String.format(Locale.ROOT, "add x-path-mapping from '%s'",
category.getId().toString());
LOGGER.info(taskMessage);
Element mappedClassification = mcrmodsWrapper.addElement("classification");
String generator = getXPathMappingGenerator(category.getId());
mappedClassification.setAttribute("generator", generator);
MCRClassMapper.assignCategory(mappedClassification, category.getId());

final Map<String, Set<MCRCategory>> xPathMappings = loadAllXPathMappings();
for (Set<MCRCategory> categoriesPerClass : xPathMappings.values()) {
boolean isXPathMatched = false;
// check x-mapping-xpath-mappings
for (MCRCategory category : categoriesPerClass) {
if (category.getLabel(LABEL_LANG_XPATH_MAPPING).isPresent()) {

String xPath = category.getLabel(LABEL_LANG_XPATH_MAPPING).get().getText();
MCRXPathEvaluator evaluator = new MCRXPathEvaluator(new HashMap<>(), mcrmodsWrapper.getMODS());

if (evaluator.test(xPath)) {
String taskMessage = String.format(Locale.ROOT, "add x-path-mapping from '%s'",
category.getId().toString());
LOGGER.info(taskMessage);
Element mappedClassification = mcrmodsWrapper.addElement("classification");
String generator = getXPathMappingGenerator(category.getId());
mappedClassification.setAttribute("generator", generator);
MCRClassMapper.assignCategory(mappedClassification, category.getId());
isXPathMatched = true;
}
}
}
//check x-mapping-xpath-fallback-mappings
if (!isXPathMatched) {
for (MCRCategory category : categoriesPerClass) {
if (category.getLabel(LABEL_LANG_XPATH_MAPPING_FALLBACK).isPresent()) {

String xPath = category.getLabel(LABEL_LANG_XPATH_MAPPING_FALLBACK).get().getText();
MCRXPathEvaluator evaluator = new MCRXPathEvaluator(new HashMap<>(), mcrmodsWrapper.getMODS());

if (evaluator.test(xPath)) {
String taskMessage = String.format(Locale.ROOT, "add x-path-mapping-fallback from '%s'",
category.getId().toString());
LOGGER.info(taskMessage);
Element mappedClassification = mcrmodsWrapper.addElement("classification");
String generator = getXPathMappingGenerator(category.getId());
mappedClassification.setAttribute("generator", generator);
MCRClassMapper.assignCategory(mappedClassification, category.getId());
}
}
}
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -95,7 +95,8 @@ public void testMapping() throws IOException, JDOMException, URISyntaxException
}

/**
* Tests if an XPath-mapping is properly added into a Mods-Document.
* Tests if an XPath-mapping is properly added into a Mods-Document. Also tests, if multiple fallbacks per
* classification are considered
* @throws IOException in case of error
* @throws JDOMException in case of error
* @throws URISyntaxException in case of error
Expand All @@ -109,29 +110,103 @@ public void testXPathMapping() throws IOException, JDOMException, URISyntaxExcep

loadCategory("genre.xml");
loadCategory("orcidWorkType.xml");
loadCategory("dummyClassification.xml");

Document document = saxBuilder.build(classLoader.getResourceAsStream(TEST_DIRECTORY + "testMods2.xml"));
MCRObject mcro = new MCRObject();

MCRMODSWrapper mw = new MCRMODSWrapper(mcro);
mw.setMODS(document.getRootElement().detach());
mw.setID("junit", 1);
mw.setID("junit", 2);

MCRClassificationMappingEventHandler mapper = new MCRClassificationMappingEventHandler();
mapper.handleObjectUpdated(null, mcro);

Document xml = mcro.createXML();

String expression = "//mods:classification[contains(@generator,'-mycore') and "
+ "contains(@generator,'xpathmapping2orcidWorkType') and contains(@valueURI, 'journal-article')]";
XPathExpression<Element> expressionObject = XPathFactory.instance()
.compile(expression, Filters.element(), null, MCRConstants.MODS_NAMESPACE, MCRConstants.XLINK_NAMESPACE);

Assert.assertNotNull("The mapped classification should be in the MyCoReObject now!",
expressionObject.evaluateFirst(
xml));

expression = "//mods:classification[contains(@generator,'-mycore') and "
+ "contains(@generator,'xpathmapping2dummyClassification') and contains(@valueURI, 'dummy-text')]";
expressionObject = XPathFactory.instance()
.compile(expression, Filters.element(), null, MCRConstants.MODS_NAMESPACE, MCRConstants.XLINK_NAMESPACE);

Assert.assertNotNull("The mapped dummy classification should be in the MyCoReObject now!",
expressionObject.evaluateFirst(xml));

expression = "//mods:classification[contains(@generator,'-mycore') and "
+ "contains(@generator,'xpathmapping2dummyClassification') and contains(@valueURI, 'dummy-fbonly')]";
expressionObject = XPathFactory.instance()
.compile(expression, Filters.element(), null, MCRConstants.MODS_NAMESPACE, MCRConstants.XLINK_NAMESPACE);

Assert.assertNotNull("The mapped dummy classification should be in the MyCoReObject now!",
expressionObject.evaluateFirst(xml));

LOGGER.info(new XMLOutputter(Format.getPrettyFormat()).outputString(xml));
}

/**
* Tests if a fallback XPath-mapping is properly added into a Mods-Document if the original mapping doesn't match.
* @throws IOException in case of error
* @throws JDOMException in case of error
* @throws URISyntaxException in case of error
*/
@Test
public void testXPathMappingFallback() throws IOException, JDOMException, URISyntaxException {
MCRSessionMgr.getCurrentSession();
MCRTransactionHelper.isTransactionActive();
ClassLoader classLoader = getClass().getClassLoader();
SAXBuilder saxBuilder = new SAXBuilder();

loadCategory("genre.xml");
loadCategory("orcidWorkType.xml");
loadCategory("dummyClassification.xml");

Document document = saxBuilder.build(classLoader.getResourceAsStream(TEST_DIRECTORY + "testMods3.xml"));
MCRObject mcro = new MCRObject();

MCRMODSWrapper mw = new MCRMODSWrapper(mcro);
mw.setMODS(document.getRootElement().detach());
mw.setID("junit", 3);

MCRClassificationMappingEventHandler mapper = new MCRClassificationMappingEventHandler();
mapper.handleObjectUpdated(null, mcro);

Document xml = mcro.createXML();

String expression = "//mods:classification[contains(@generator,'-mycore') and "
+ "contains(@generator,'xpathmapping2orcidWorkType') and contains(@valueURI, 'journal-article')]";
+ "contains(@generator,'xpathmapping2orcidWorkType') and contains(@valueURI, 'online-resource')]";
XPathExpression<Element> expressionObject = XPathFactory.instance()
.compile(expression, Filters.element(), null, MCRConstants.MODS_NAMESPACE, MCRConstants.XLINK_NAMESPACE);

Assert.assertNotNull("The mapped classification should be in the MyCoReObject now!",
expressionObject.evaluateFirst(
xml));

expression = "//mods:classification[contains(@generator,'-mycore') and "
+ "contains(@generator,'xpathmapping2dummyClassification') and contains(@valueURI, 'dummy-text')]";
expressionObject = XPathFactory.instance()
.compile(expression, Filters.element(), null, MCRConstants.MODS_NAMESPACE, MCRConstants.XLINK_NAMESPACE);

Assert.assertNotNull("The mapped dummy classification should be in the MyCoReObject now!",
expressionObject.evaluateFirst(xml));

expression = "//mods:classification[contains(@generator,'-mycore') and "
+ "contains(@generator,'xpathmapping2dummyClassification') and contains(@valueURI, 'dummy-fbonly')]";
expressionObject = XPathFactory.instance()
.compile(expression, Filters.element(), null, MCRConstants.MODS_NAMESPACE, MCRConstants.XLINK_NAMESPACE);

Assert.assertNotNull("The mapped dummy classification should be in the MyCoReObject now!",
expressionObject.evaluateFirst(xml));

LOGGER.info(new XMLOutputter(Format.getPrettyFormat()).outputString(xml));
}

private void loadCategory(String categoryFileName) throws URISyntaxException, JDOMException, IOException {
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
<?xml version="1.0"?>
<mycoreclass ID="dummyClassification" xmlns:xsi="http:https://www.w3.org/2001/XMLSchema-instance"
xsi:noNamespaceSchemaLocation="MCRClassification.xsd">
<label xml:lang="de" text="Dummy Classification"/>
<label xml:lang="en" text="dummy classification"/>
<categories>
<category ID="dummy-text">
<label xml:lang="de" text="Text"/>
<label xml:lang="en" text="text"
description="Dummy category for a test."/>
<label xml:lang="x-mapping-xpathfb" text="mods:typeOfResource['text']" />
</category>
<category ID="dummy-fbonly">
<label xml:lang="de" text="Immer Fallback"/>
<label xml:lang="en" text="always fallback"
description="Dummy category for a test."/>
<label xml:lang="x-mapping-xpath" text="mods:notInSpec" />
<label xml:lang="x-mapping-xpathfb" text="mods:titleInfo" />
</category>
</categories>
</mycoreclass>
Original file line number Diff line number Diff line change
Expand Up @@ -10,5 +10,23 @@
description="Articles in peer-reviewed publications that disseminate the results of original research and scholarship."/>
<label xml:lang="x-mapping-xpath" text="mods:genre[substring-after(@valueURI,'#')='article'] and mods:relatedItem[@type='host']/mods:genre[substring-after(@valueURI,'#')='journal']" />
</category>
<category ID="newspaper-article">
<label xml:lang="de" text="Zeitungsartikel"/>
<label xml:lang="en" text="Newspaper article"
description="Articles in a daily, weekly or monthly publication reporting on news and social issues aimed at the public."/>
<label xml:lang="x-mapping-xpath" text="mods:genre[substring-after(@valueURI,'#')='article'] and mods:relatedItem[@type='host']/mods:genre[substring-after(@valueURI,'#')='newspaper']" />
</category>
<category ID="online-resource">
<label xml:lang="de" text="Online Ressource"/>
<label xml:lang="en" text="Online resource"
description="Information accessible only on the web via traditional technical methods"/>
<label xml:lang="x-mapping-xpath" text="mods:genre[substring-after(@valueURI,'#')='article'] and mods:relatedItem[@type='host']/mods:genre[substring-after(@valueURI,'#')='blog']" />
<label xml:lang="x-mapping-xpathfb" text="mods:genre[substring-after(@valueURI,'#')='article']" />
</category>
<category ID="no-xpath-mapping">
<label xml:lang="de" text="Test Ressource"/>
<label xml:lang="en" text="test resource"
description="category only for test purposes"/>
</category>
</categories>
</mycoreclass>
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
<?xml version="1.0" encoding="UTF-8"?>
<mods:mods xmlns:mods="http:https://www.loc.gov/mods/v3" xmlns:xlink="http:https://www.w3.org/1999/xlink">
<mods:typeOfResource>text</mods:typeOfResource>
<mods:genre type="intern" authorityURI="http:https://www.mycore.org/classifications/mir_genres" valueURI="http:https://www.mycore.org/classifications/mir_genres#article" />
<mods:titleInfo xml:lang="de" xlink:type="simple">
<mods:title>Test-Titel</mods:title>
</mods:titleInfo>
<mods:language>
<mods:languageTerm authority="rfc5646" type="code">de</mods:languageTerm>
</mods:language>
<mods:accessCondition type="use and reproduction" xlink:type="simple">cc_by-nc</mods:accessCondition>
</mods:mods>
2 changes: 1 addition & 1 deletion mycore-mods/src/test/resources/mycore.properties
Original file line number Diff line number Diff line change
Expand Up @@ -52,4 +52,4 @@ MCR.MODS.EnrichmentResolver.DataSource.GBV2.StopOnFirstResult=false
MCR.MODS.EnrichmentResolver.DataSource.GBV2.IdentifierTypes=isbn
MCR.MODS.EnrichmentResolver.DataSource.GBV2.isbn.URI=resource:%TestFolder%/gbv-{0}.xml

MCR.Category.XPathMapping.ClassIDs=orcidWorkType
MCR.Category.XPathMapping.ClassIDs=orcidWorkType,dummyClassification
Loading
Loading