Commit ae03c51c authored by Jonas Waeber's avatar Jonas Waeber
Browse files

Add single date fixes.

Add additional stuff
Add trim for qualifier and certainty values.
parent 7bd25fc4
Pipeline #43324 passed with stages
in 3 minutes and 13 seconds
......@@ -21,7 +21,6 @@ import ch.memobase.rdf.RDF
import ch.memobase.rdf.RICO
import ch.memobase.rdf.RicoResource
import org.apache.logging.log4j.LogManager
import kotlin.math.sin
class DateNormalizationTransform(
private val singleDateMatchers: List<Regex>,
......@@ -48,10 +47,24 @@ class DateNormalizationTransform(
val match = it.matchEntire(value)
if (match != null) {
// The presence of these fields is validated when compiling the regex.
val day = match.groups["day"]!!.value
val month = match.groups["month"]!!.value
val year = match.groups["year"]!!.value
val day = match.groups["day"]?.value.let { day ->
if (day != null && day.length == 1)
"0$day"
else day
}
val month = match.groups["month"]?.value.let { month ->
if (month != null)
Date.validateMonthValue(issues, month)
else
null
}
val year = match.groups["year"]?.value
if (day != null && month != null && year != null) {
getSingleDate(day, month, year)
} else {
issues.add("Failed to parse matched single date: $value.")
null
}
} else {
null
}
......@@ -134,7 +147,16 @@ class DateNormalizationTransform(
} catch (ex: IllegalArgumentException) {
null
}
getDateRange(fromDay, untilDay, fromMonth, untilMonth, fromYear, untilYear, singleMonth, singleYear)
getDateRange(
fromDay,
untilDay,
fromMonth,
untilMonth,
fromYear,
untilYear,
singleMonth,
singleYear
)
} else {
null
}
......@@ -170,7 +192,7 @@ class DateNormalizationTransform(
val matchedGroup = matchResult.groups[group]?.value
if (matchedGroup != null) {
value = value.replace(matchResult.value, "")
matchedGroup
matchedGroup.trim()
} else {
null
}
......
......@@ -22,27 +22,27 @@
@prefix foaf: <http://xmlns.com/foaf/0.1/> .
@prefix dc: <http://purl.org/dc/elements/1.1/> .
_:b0 a rico:Language ;
schema:sameAs "http://www.wikidata.org/entity/Q188" ;
rico:name "tedesco"@it , "allemand"@fr , "Deutsch"@de ;
rico:resultsFrom _:b1 ;
rico:type "caption" .
_:b2 a rico:Language ;
rico:name "Deutsch" ;
rico:type "caption" .
_:b1 a rico:Activity ;
rico:affects _:b2 ;
rico:beginningDate "2022-02-17T13:37:45Z"^^xsd:dateTime ;
rico:endDate "2022-02-17T13:37:45Z"^^xsd:dateTime ;
_:b0 a rico:Activity ;
rico:affects _:b1 ;
rico:beginningDate "2022-02-21T15:39:10Z"^^xsd:dateTime ;
rico:endDate "2022-02-21T15:39:10Z"^^xsd:dateTime ;
rico:performedBy [ a rico:Mechanism ;
rico:name "LanguagesNormalizer" ;
rico:performs _:b1
rico:performs _:b0
] ;
rico:resultsIn _:b0 ;
rico:resultsIn _:b2 ;
rico:type "enrichment" .
[ a rico:Record ;
rico:hasLanguage _:b0 , _:b2
rico:hasLanguage _:b2 , _:b1
] .
_:b1 a rico:Language ;
rico:name "Deutsch" ;
rico:type "caption" .
_:b2 a rico:Language ;
schema:sameAs "http://www.wikidata.org/entity/Q188" ;
rico:name "tedesco"@it , "allemand"@fr , "Deutsch"@de ;
rico:resultsFrom _:b0 ;
rico:type "caption" .
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment