Skip to content
GitLab
Menu
Projects
Groups
Snippets
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
memoriav
Memobase 2020
libraries
Normalizer Service Configuration
Commits
ae03c51c
Commit
ae03c51c
authored
Feb 21, 2022
by
Jonas Waeber
Browse files
Add single date fixes.
Add additional stuff Add trim for qualifier and certainty values.
parent
7bd25fc4
Pipeline
#43324
passed with stages
in 3 minutes and 13 seconds
Changes
2
Pipelines
2
Hide whitespace changes
Inline
Side-by-side
src/main/kotlin/ch/memobase/transform/DateNormalizationTransform.kt
View file @
ae03c51c
...
...
@@ -21,7 +21,6 @@ import ch.memobase.rdf.RDF
import
ch.memobase.rdf.RICO
import
ch.memobase.rdf.RicoResource
import
org.apache.logging.log4j.LogManager
import
kotlin.math.sin
class
DateNormalizationTransform
(
private
val
singleDateMatchers
:
List
<
Regex
>,
...
...
@@ -48,10 +47,24 @@ class DateNormalizationTransform(
val
match
=
it
.
matchEntire
(
value
)
if
(
match
!=
null
)
{
// The presence of these fields is validated when compiling the regex.
val
day
=
match
.
groups
[
"day"
]
!!
.
value
val
month
=
match
.
groups
[
"month"
]
!!
.
value
val
year
=
match
.
groups
[
"year"
]
!!
.
value
getSingleDate
(
day
,
month
,
year
)
val
day
=
match
.
groups
[
"day"
]
?.
value
.
let
{
day
->
if
(
day
!=
null
&&
day
.
length
==
1
)
"0$day"
else
day
}
val
month
=
match
.
groups
[
"month"
]
?.
value
.
let
{
month
->
if
(
month
!=
null
)
Date
.
validateMonthValue
(
issues
,
month
)
else
null
}
val
year
=
match
.
groups
[
"year"
]
?.
value
if
(
day
!=
null
&&
month
!=
null
&&
year
!=
null
)
{
getSingleDate
(
day
,
month
,
year
)
}
else
{
issues
.
add
(
"Failed to parse matched single date: $value."
)
null
}
}
else
{
null
}
...
...
@@ -134,7 +147,16 @@ class DateNormalizationTransform(
}
catch
(
ex
:
IllegalArgumentException
)
{
null
}
getDateRange
(
fromDay
,
untilDay
,
fromMonth
,
untilMonth
,
fromYear
,
untilYear
,
singleMonth
,
singleYear
)
getDateRange
(
fromDay
,
untilDay
,
fromMonth
,
untilMonth
,
fromYear
,
untilYear
,
singleMonth
,
singleYear
)
}
else
{
null
}
...
...
@@ -170,7 +192,7 @@ class DateNormalizationTransform(
val
matchedGroup
=
matchResult
.
groups
[
group
]
?.
value
if
(
matchedGroup
!=
null
)
{
value
=
value
.
replace
(
matchResult
.
value
,
""
)
matchedGroup
matchedGroup
.
trim
()
}
else
{
null
}
...
...
@@ -196,11 +218,11 @@ class DateNormalizationTransform(
singleYear
!=
null
->
when
{
singleMonth
!=
null
->
when
{
dayFrom
!=
null
&&
dayUntil
!=
null
->
"$singleYear-$singleMonth-$dayFrom/$dayUntil"
// the case where only a single day is set is covered by the single date normalizer.
else
->
"$singleYear-$singleMonth"
}
when
{
dayFrom
!=
null
&&
dayUntil
!=
null
->
"$singleYear-$singleMonth-$dayFrom/$dayUntil"
// the case where only a single day is set is covered by the single date normalizer.
else
->
"$singleYear-$singleMonth"
}
monthFrom
!=
null
&&
monthUntil
!=
null
->
when
{
dayFrom
!=
null
&&
dayUntil
!=
null
->
"$singleYear-$monthFrom-$dayFrom/$monthUntil-$dayUntil"
...
...
src/test/resources/tmp/turtle-output-language-normalization.ttl
View file @
ae03c51c
...
...
@@ -22,27 +22,27 @@
@prefix
foaf:
<http://xmlns.com/foaf/0.1/>
.
@prefix
dc:
<http://purl.org/dc/elements/1.1/>
.
_:
b0
a
rico:
Language
;
schema:
sameAs
"http://www.wikidata.org/entity/Q188"
;
rico:
name
"tedesco"
@it
,
"allemand"
@fr
,
"Deutsch"
@de
;
rico:
resultsFrom
_:
b1
;
rico:
type
"caption"
.
_:
b2
a
rico:
Language
;
rico:
name
"Deutsch"
;
rico:
type
"caption"
.
_:
b1
a
rico:
Activity
;
rico:
affects
_:
b2
;
rico:
beginningDate
"2022-02-17T13:37:45Z"
^^
xsd:
dateTime
;
rico:
endDate
"2022-02-17T13:37:45Z"
^^
xsd:
dateTime
;
_:
b0
a
rico:
Activity
;
rico:
affects
_:
b1
;
rico:
beginningDate
"2022-02-21T15:39:10Z"
^^
xsd:
dateTime
;
rico:
endDate
"2022-02-21T15:39:10Z"
^^
xsd:
dateTime
;
rico:
performedBy
[
a
rico:
Mechanism
;
rico:
name
"LanguagesNormalizer"
;
rico:
performs
_:
b
1
rico:
performs
_:
b
0
]
;
rico:
resultsIn
_:
b
0
;
rico:
resultsIn
_:
b
2
;
rico:
type
"enrichment"
.
[
a
rico:
Record
;
rico:
hasLanguage
_:
b
0
,
_:
b
2
rico:
hasLanguage
_:
b
2
,
_:
b
1
]
.
_:
b1
a
rico:
Language
;
rico:
name
"Deutsch"
;
rico:
type
"caption"
.
_:
b2
a
rico:
Language
;
schema:
sameAs
"http://www.wikidata.org/entity/Q188"
;
rico:
name
"tedesco"
@it
,
"allemand"
@fr
,
"Deutsch"
@de
;
rico:
resultsFrom
_:
b0
;
rico:
type
"caption"
.
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment