Skip to content
GitLab
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
🚀
This server has been upgraded to GitLab release
15.7
.
🚀
Open sidebar
ub-unibas
Go OCFL
Commits
05ae1e2d
Commit
05ae1e2d
authored
Nov 23, 2022
by
Juergen Enge
Browse files
cli create, validate
parent
8a55aaeb
Changes
12
Hide whitespace changes
Inline
Side-by-side
ocflmain/cmd/create.go
0 → 100644
View file @
05ae1e2d
package
cmd
import
(
"context"
"fmt"
lm
"github.com/je4/utils/v2/pkg/logger"
"github.com/spf13/cobra"
"github.com/thediveo/enumflag"
"go.ub.unibas.ch/gocfl/v2/pkg/ocfl"
"os"
"path/filepath"
"strings"
)
var
createCmd
=
&
cobra
.
Command
{
Use
:
"create [path to ocfl structure]"
,
Aliases
:
[]
string
{
"check"
},
Short
:
"creates an empty ocfl structure"
,
Long
:
"creates an ocfl object "
,
Example
:
"gocfl create ./archive.zip /tmp/testdata"
,
Args
:
cobra
.
ExactArgs
(
1
),
Run
:
create
,
}
func
initCreate
()
{
createCmd
.
PersistentFlags
()
.
StringVarP
(
&
flagExtensionFolder
,
"extensions"
,
"e"
,
""
,
"folder with extension configurations"
)
createCmd
.
PersistentFlags
()
.
VarP
(
enumflag
.
New
(
&
flagVersion
,
"ocfl-version"
,
VersionIds
,
enumflag
.
EnumCaseInsensitive
),
"ocfl-version"
,
"v"
,
"ocfl version for new storage root"
)
}
func
create
(
cmd
*
cobra
.
Command
,
args
[]
string
)
{
ocflPath
:=
filepath
.
ToSlash
(
filepath
.
Clean
(
args
[
0
]))
fmt
.
Printf
(
"creating '%s'
\n
"
,
ocflPath
)
logger
,
lf
:=
lm
.
CreateLogger
(
"ocfl"
,
persistentFlagLogfile
,
nil
,
LogLevelIds
[
persistentFlagLoglevel
][
0
],
LOGFORMAT
)
defer
lf
.
Close
()
logger
.
Infof
(
"creating '%s'"
,
ocflPath
)
finfo
,
err
:=
os
.
Stat
(
ocflPath
)
if
err
!=
nil
{
if
!
(
os
.
IsNotExist
(
err
)
&&
strings
.
HasSuffix
(
strings
.
ToLower
(
ocflPath
),
".zip"
))
{
logger
.
Errorf
(
"cannot stat '%s': %v"
,
ocflPath
,
err
)
logger
.
Errorf
(
"%v%+v"
,
err
,
ocfl
.
GetErrorStacktrace
(
err
))
return
}
}
else
{
if
strings
.
HasSuffix
(
strings
.
ToLower
(
ocflPath
),
".zip"
)
{
logger
.
Errorf
(
"path '%s' already exists"
,
ocflPath
)
fmt
.
Printf
(
"path '%s' already exists
\n
"
,
ocflPath
)
return
}
if
!
finfo
.
IsDir
()
{
logger
.
Errorf
(
"'%s' is not a directory"
,
ocflPath
)
logger
.
Errorf
(
"%v%+v"
,
err
,
ocfl
.
GetErrorStacktrace
(
err
))
return
}
}
extensionFactory
,
err
:=
ocfl
.
NewExtensionFactory
(
logger
)
if
err
!=
nil
{
logger
.
Errorf
(
"cannot instantiate extension factory: %v"
,
err
)
logger
.
Errorf
(
"%v%+v"
,
err
,
ocfl
.
GetErrorStacktrace
(
err
))
return
}
if
err
:=
initExtensionFactory
(
extensionFactory
);
err
!=
nil
{
logger
.
Errorf
(
"cannot initialize extension factory: %v"
,
err
)
logger
.
Errorf
(
"%v%+v"
,
err
,
ocfl
.
GetErrorStacktrace
(
err
))
return
}
storageRootExtensions
,
_
,
err
:=
initDefaultExtensions
(
extensionFactory
,
flagExtensionFolder
,
logger
)
if
err
!=
nil
{
logger
.
Errorf
(
"cannot initialize default extensions: %v"
,
err
)
logger
.
Errorf
(
"%v%+v"
,
err
,
ocfl
.
GetErrorStacktrace
(
err
))
return
}
ocfs
,
err
:=
OpenRW
(
ocflPath
,
logger
)
if
err
!=
nil
{
logger
.
Errorf
(
"cannot create target filesystem: %v"
,
err
)
logger
.
Errorf
(
"%v%+v"
,
err
,
ocfl
.
GetErrorStacktrace
(
err
))
return
}
ctx
:=
ocfl
.
NewContextValidation
(
context
.
TODO
())
defer
showStatus
(
ctx
)
if
_
,
err
=
ocfl
.
CreateStorageRoot
(
ctx
,
ocfs
,
VersionIdsVersion
[
flagVersion
],
extensionFactory
,
storageRootExtensions
,
""
,
logger
);
err
!=
nil
{
ocfs
.
Discard
()
logger
.
Errorf
(
"cannot create new storageroot: %v"
,
err
)
logger
.
Errorf
(
"%v%+v"
,
err
,
ocfl
.
GetErrorStacktrace
(
err
))
return
}
if
err
:=
ocfs
.
Close
();
err
!=
nil
{
logger
.
Errorf
(
"error closing filesystem '%s': %v"
,
ocfs
,
err
)
logger
.
Errorf
(
"%v%+v"
,
err
,
ocfl
.
GetErrorStacktrace
(
err
))
}
}
ocflmain/cmd/helper.go
View file @
05ae1e2d
...
...
@@ -14,6 +14,7 @@ import (
"io/fs"
"log"
"os"
"path/filepath"
"strings"
)
...
...
@@ -121,6 +122,47 @@ func OpenRO(ocflPath string, logger *logging.Logger) (ocfl.OCFLFS, error) {
return
ocfs
,
nil
}
func
OpenRW
(
ocflPath
string
,
logger
*
logging
.
Logger
)
(
ocfl
.
OCFLFS
,
error
)
{
var
ocfs
ocfl
.
OCFLFS
var
err
error
var
zipSize
int64
var
zipReader
*
os
.
File
var
zipWriter
*
os
.
File
ocflPath
=
filepath
.
ToSlash
(
filepath
.
Clean
(
ocflPath
))
if
strings
.
HasSuffix
(
strings
.
ToLower
(
ocflPath
),
".zip"
)
{
stat
,
err
:=
os
.
Stat
(
ocflPath
)
if
err
!=
nil
{
if
!
os
.
IsNotExist
(
err
)
{
log
.
Print
(
errors
.
Wrapf
(
err
,
"%s does not exist. creating new file"
,
ocflPath
))
}
}
else
{
zipSize
=
stat
.
Size
()
if
zipReader
,
err
=
os
.
Open
(
ocflPath
);
err
!=
nil
{
return
nil
,
errors
.
Wrapf
(
err
,
"cannot open zipfile %s"
,
ocflPath
)
}
}
tempFile
:=
fmt
.
Sprintf
(
"%s.tmp"
,
ocflPath
)
if
zipWriter
,
err
=
os
.
Create
(
tempFile
);
err
!=
nil
{
logger
.
Errorf
(
"%v%+v"
,
err
,
ocfl
.
GetErrorStacktrace
(
err
))
panic
(
err
)
}
ocfs
,
err
=
zipfs
.
NewFSIO
(
zipReader
,
zipSize
,
zipWriter
,
"."
,
logger
)
if
err
!=
nil
{
return
nil
,
errors
.
Wrapf
(
err
,
"cannot create zipfs"
)
}
}
else
{
ocfs
,
err
=
osfs
.
NewFSIO
(
ocflPath
,
logger
)
if
err
!=
nil
{
return
nil
,
errors
.
Wrapf
(
err
,
"cannot create osfs"
)
}
}
return
ocfs
,
nil
}
func
showStatus
(
ctx
context
.
Context
)
error
{
status
,
err
:=
ocfl
.
GetValidationStatus
(
ctx
)
if
err
!=
nil
{
...
...
@@ -128,7 +170,11 @@ func showStatus(ctx context.Context) error {
}
status
.
Compact
()
context
:=
""
errs
:=
0
for
_
,
err
:=
range
status
.
Errors
{
if
err
.
Code
[
0
]
==
'E'
{
errs
++
}
if
err
.
Context
!=
context
{
fmt
.
Printf
(
"
\n
[%s]
\n
"
,
err
.
Context
)
context
=
err
.
Context
...
...
@@ -136,6 +182,11 @@ func showStatus(ctx context.Context) error {
fmt
.
Printf
(
" #%s - %s [%s]
\n
"
,
err
.
Code
,
err
.
Description
,
err
.
Description2
)
//logger.Infof("ERROR: %v", err)
}
if
errs
>
0
{
fmt
.
Printf
(
"
\n
%d errors found
\n
"
,
errs
)
}
else
{
fmt
.
Printf
(
"
\n
no errors found
\n
"
)
}
/*
for _, err := range status.Warnings {
if err.Context != context {
...
...
ocflmain/cmd/root.go
View file @
05ae1e2d
...
...
@@ -4,21 +4,24 @@ import (
"fmt"
"github.com/spf13/cobra"
"github.com/thediveo/enumflag"
"go.ub.unibas.ch/gocfl/v2/pkg/ocfl"
"os"
)
type
LogLevel
enumflag
.
Flag
const
LOGFORMAT
=
`%{time:2006-01-02T15:04:05.000} %{shortpkg}::%{longfunc} [%{shortfile}] > %{level:.5s} - %{message}`
type
LogLevelFlag
enumflag
.
Flag
const
(
LOGLEVEL
DEBUG
=
iota
LOGLEVEL
ERROR
=
iota
LOGLEVELINFO
LOGLEVELNOTICE
LOGLEVELWARNING
LOGLEVEL
ERROR
LOGLEVEL
DEBUG
LOGLEVELCRITICAL
)
var
LogLevelIds
=
map
[
LogLevel
][]
string
{
var
LogLevelIds
=
map
[
LogLevel
Flag
][]
string
{
LOGLEVELDEBUG
:
{
"DEBUG"
},
LOGLEVELINFO
:
{
"INFO"
},
LOGLEVELNOTICE
:
{
"NOTICE"
},
...
...
@@ -27,9 +30,53 @@ var LogLevelIds = map[LogLevel][]string{
LOGLEVELCRITICAL
:
{
"CRITICAL"
},
}
var
logfile
string
var
loglevel
LogLevel
var
extensionFolder
string
type
VersionFlag
enumflag
.
Flag
const
(
VERSION1_1
=
iota
VERSION1_0
)
var
VersionIds
=
map
[
VersionFlag
][]
string
{
VERSION1_1
:
{
"1.1"
,
"v1.1"
},
VERSION1_0
:
{
"1.0"
,
"v1.0"
},
}
var
VersionIdsVersion
=
map
[
VersionFlag
]
ocfl
.
OCFLVersion
{
VERSION1_1
:
ocfl
.
Version1_1
,
VERSION1_0
:
ocfl
.
Version1_0
,
}
type
DigestFlag
enumflag
.
Flag
const
(
DIGESTSHA512
=
iota
DIGESTSHA256
DIGESTMD5
DIGESTSHA1
DIGESTBlake2b160
DIGESTBlake2b256
DIGESTBlake2b384
DIGESTBlake2b512
)
var
DigestIds
=
map
[
DigestFlag
][]
string
{
DIGESTSHA512
:
{
"sha512"
},
DIGESTSHA256
:
{
"sha256"
},
DIGESTMD5
:
{
"md5"
},
DIGESTSHA1
:
{
"sha1"
},
DIGESTBlake2b160
:
{
"blake2b160"
},
DIGESTBlake2b256
:
{
"blake2b256"
},
DIGESTBlake2b384
:
{
"blake2b384"
},
DIGESTBlake2b512
:
{
"blake2b512"
},
}
// all possible flags of all modules go here
var
persistentFlagLogfile
string
var
persistentFlagLoglevel
LogLevelFlag
var
flagDigest
DigestFlag
var
flagExtensionFolder
string
var
flagVersion
VersionFlag
var
rootCmd
=
&
cobra
.
Command
{
Use
:
"gocfl"
,
...
...
@@ -42,14 +89,17 @@ var rootCmd = &cobra.Command{
}
func
init
()
{
rootCmd
.
PersistentFlags
()
.
StringVar
(
&
logfile
,
"log-file"
,
""
,
"log output file (default is console)"
)
rootCmd
.
PersistentFlags
()
.
StringVar
(
&
persistentFlagLogfile
,
"log-file"
,
""
,
"log output file (default is console)"
)
rootCmd
.
PersistentFlags
()
.
Var
(
enumflag
.
New
(
&
l
oglevel
,
"log-level"
,
LogLevelIds
,
enumflag
.
EnumCaseInsensitive
),
enumflag
.
New
(
&
persistentFlagL
oglevel
,
"log-level"
,
LogLevelIds
,
enumflag
.
EnumCaseInsensitive
),
"log-level"
,
"log level (CRITICAL|ERROR|WARNING|NOTICE|INFO|DEBUG)"
)
rootCmd
.
PersistentFlags
()
.
StringVar
(
&
e
xtensionFolder
,
"extensions"
,
""
,
"folder with default extension configurations"
)
rootCmd
.
PersistentFlags
()
.
StringVar
(
&
flagE
xtensionFolder
,
"extensions"
,
""
,
"folder with default extension configurations"
)
initValidate
()
rootCmd
.
AddCommand
(
validateCmd
)
initCreate
()
rootCmd
.
AddCommand
(
createCmd
)
}
func
Execute
()
{
...
...
ocflmain/cmd/validate.go
View file @
05ae1e2d
...
...
@@ -2,13 +2,13 @@ package cmd
import
(
"context"
"fmt"
lm
"github.com/je4/utils/v2/pkg/logger"
"github.com/spf13/cobra"
"go.ub.unibas.ch/gocfl/v2/pkg/ocfl"
"path/filepath"
)
const
LOGFORMAT
=
`%{time:2006-01-02T15:04:05.000} %{shortpkg}::%{longfunc} [%{shortfile}] > %{level:.5s} - %{message}`
var
validateCmd
=
&
cobra
.
Command
{
Use
:
"validate [path to ocfl structure]"
,
Aliases
:
[]
string
{
"check"
},
...
...
@@ -16,28 +16,36 @@ var validateCmd = &cobra.Command{
//Long: "an utterly useless command for testing",
Example
:
"gocfl validate ./archive.zip"
,
Args
:
cobra
.
ExactArgs
(
1
),
Run
:
func
(
cmd
*
cobra
.
Command
,
args
[]
string
)
{
validate
(
cmd
,
args
)
},
Run
:
validate
,
}
func
initValidate
()
{
validateCmd
.
Flags
()
.
StringVarP
(
&
objectPath
,
"object-path"
,
"o"
,
""
,
"validate only the selected object in storage root"
)
}
var
objectPath
string
func
validate
(
cmd
*
cobra
.
Command
,
args
[]
string
)
{
ocflPath
:=
args
[
0
]
ocflPath
:=
filepath
.
ToSlash
(
filepath
.
Clean
(
args
[
0
]
))
logger
,
lf
:=
lm
.
CreateLogger
(
"ocfl"
,
logfile
,
nil
,
LogLevelIds
[
loglevel
][
0
],
LOGFORMAT
)
fmt
.
Printf
(
"validating '%s'
\n
"
,
ocflPath
)
logger
,
lf
:=
lm
.
CreateLogger
(
"ocfl"
,
persistentFlagLogfile
,
nil
,
LogLevelIds
[
persistentFlagLoglevel
][
0
],
LOGFORMAT
)
defer
lf
.
Close
()
extensionFactory
,
err
:=
ocfl
.
NewExtensionFactory
(
logger
)
if
err
!=
nil
{
logger
.
Errorf
(
"cannot instantiate extension factory: %v"
,
err
)
logger
.
Errorf
(
"%v%+v"
,
err
,
ocfl
.
GetErrorStacktrace
(
err
))
return
}
if
err
:=
initExtensionFactory
(
extensionFactory
);
err
!=
nil
{
logger
.
Errorf
(
"cannot initialize extension factory: %v"
,
err
)
logger
.
Errorf
(
"%v%+v"
,
err
,
ocfl
.
GetErrorStacktrace
(
err
))
return
}
/*
storageRootExtensions, objectExtensions, err := initDefaultExtensions(extensionFactory,
e
xtensionFolder, logger)
storageRootExtensions, objectExtensions, err := initDefaultExtensions(extensionFactory,
flagE
xtensionFolder, logger)
if err != nil {
logger.Errorf("cannot initialize default extensions: %v", err)
return
...
...
@@ -46,6 +54,7 @@ func validate(cmd *cobra.Command, args []string) {
ocfs
,
err
:=
OpenRO
(
ocflPath
,
logger
)
if
err
!=
nil
{
logger
.
Errorf
(
"%v%+v"
,
err
,
ocfl
.
GetErrorStacktrace
(
err
))
logger
.
Errorf
(
"%v%+v"
,
err
,
ocfl
.
GetErrorStacktrace
(
err
))
return
}
...
...
@@ -53,14 +62,22 @@ func validate(cmd *cobra.Command, args []string) {
ctx
:=
ocfl
.
NewContextValidation
(
context
.
TODO
())
defer
showStatus
(
ctx
)
storageRoot
,
err
:=
ocfl
.
LoadStorageRoot
(
ctx
,
ocfs
,
extensionFactory
,
logger
)
if
err
!=
nil
{
logger
.
Errorf
(
"cannot create new storageroot: %v"
,
err
)
logger
.
Errorf
(
"cannot load storageroot: %v"
,
err
)
logger
.
Errorf
(
"%v%+v"
,
err
,
ocfl
.
GetErrorStacktrace
(
err
))
return
}
if
err
:=
storageRoot
.
Check
();
err
!=
nil
{
logger
.
Errorf
(
"ocfl not valid: %v"
,
err
)
return
if
objectPath
==
""
{
if
err
:=
storageRoot
.
Check
();
err
!=
nil
{
logger
.
Errorf
(
"ocfl not valid: %v"
,
err
)
logger
.
Errorf
(
"%v%+v"
,
err
,
ocfl
.
GetErrorStacktrace
(
err
))
return
}
}
else
{
if
err
:=
storageRoot
.
CheckObject
(
objectPath
);
err
!=
nil
{
logger
.
Errorf
(
"ocfl object '%s' not valid: %v"
,
objectPath
,
err
)
logger
.
Errorf
(
"%v%+v"
,
err
,
ocfl
.
GetErrorStacktrace
(
err
))
return
}
}
}
pkg/extension/NNNN-direct-clean-path-layout.md
View file @
05ae1e2d
...
...
@@ -107,7 +107,8 @@ is a list of UTF characters mentioned below.
1.
Replace all non-UTF8 characters with
`replacementString`
2.
Split the string at path separator
`/`
3.
For each part do the following
1.
Replace any character from this list with its utf code in the form
`=uXXXX`
1.
Replace
`=`
with
`=u003D`
if it is followed by
`u`
and four hex digits
2.
Replace any character from this list with its utf code in the form
`=uXXXX`
where
`XXXX`
is the code:
`U+0000-U+001F`
`U+007F`
`U+0020`
`U+0085`
`U+00A0`
`U+1680`
`U+2000-U+200F`
`U+2028`
`U+2029`
`U+202F`
`U+205F`
`U+3000`
`\n`
`\t`
`*`
`?`
`:`
`[`
`]`
`"`
...
...
@@ -211,14 +212,16 @@ import (
[
...
]
)
var
flatDirectCleanRuleAll
=
regexp
.
MustCompile
(
"[
\u0000
-
\u001f\u007f\u0020\u0085\u00a0\u1680\u2000
-
\u200f\u2028\u2029\u202f\u205f\u3000\n\n\t
*?:
\\
[
\\
]
\"
<>|(){}&'!
\\
;#@]"
)
var
flatDirectCleanRuleWhitespace
=
regexp
.
MustCompile
(
"[
\u0009\u000a
-
\u000d\u0020\u0085\u00a0\u1680\u2000
-
\u200f\u2028\u2029\u202f\u205f\u3000
]"
)
var
flatDirectCleanRule_1_5
=
regexp
.
MustCompile
(
"[
\u0000
-
\u001F\u007F\n\r\t
*?:
\\
[
\\
]
\"
<>|(){}&'!
\\
;#@]"
)
var
flatDirectCleanRule_2_4_6
=
regexp
.
MustCompile
(
"^[
\\
-~
\u0009\u000a
-
\u000d\u0020\u0085\u00a0\u1680\u2000
-
\u200f\u2028\u2029\u202f\u205f\u3000
]*(.*?)[
\u0009\u000a
-
\u000d\u0020\u0085\u00a0\u1680\u2000
-
\u20a0\u2028\u2029\u202f\u205f\u3000
]*$"
)
var
flatDirectCleanRulePeriods
=
regexp
.
MustCompile
(
"^
\\
.+$"
)
var
flatDirectCleanErrFilenameTooLong
=
errors
.
New
(
"filename too long"
)
var
flatDirectCleanErrPathnameTooLong
=
errors
.
New
(
"pathname too long"
)
var
directCleanRuleAll
=
regexp
.
MustCompile
(
"[
\u0000
-
\u001f\u007f\u0020\u0085\u00a0\u1680\u2000
-
\u200f\u2028\u2029\u202f\u205f\u3000\n\t
*?:
\\
[
\\
]
\"
<>|(){}&'!
\\
;#@]"
)
var
directCleanRuleWhitespace
=
regexp
.
MustCompile
(
"[
\u0009\u000a
-
\u000d\u0020\u0085\u00a0\u1680\u2000
-
\u200f\u2028\u2029\u202f\u205f\u3000
]"
)
var
directCleanRuleEqual
=
regexp
.
MustCompile
(
"=(u[a-zA-Z0-9]{4})"
)
var
directCleanRule_1_5
=
regexp
.
MustCompile
(
"[
\u0000
-
\u001F\u007F\n\r\t
*?:
\\
[
\\
]
\"
<>|(){}&'!
\\
;#@]"
)
var
directCleanRule_2_4_6
=
regexp
.
MustCompile
(
"^[
\\
-~
\u0009\u000a
-
\u000d\u0020\u0085\u00a0\u1680\u2000
-
\u200f\u2028\u2029\u202f\u205f\u3000
]*(.*?)[
\u0009\u000a
-
\u000d\u0020\u0085\u00a0\u1680\u2000
-
\u20a0\u2028\u2029\u202f\u205f\u3000
]*$"
)
var
directCleanRulePeriods
=
regexp
.
MustCompile
(
"^
\\
.+$"
)
var
directCleanErrFilenameTooLong
=
errors
.
New
(
"filename too long"
)
var
directCleanErrPathnameTooLong
=
errors
.
New
(
"pathname too long"
)
[
...
]
...
...
@@ -227,7 +230,6 @@ func encodeUTFCode(s string) string {
}
func
(
sl
*
DirectClean
)
ExecutePath
(
fname
string
)
(
string
,
error
)
{
fname
=
strings
.
ToValidUTF8
(
fname
,
sl
.
ReplacementString
)
names
:=
strings
.
Split
(
fname
,
"/"
)
...
...
@@ -238,22 +240,23 @@ func (sl *DirectClean) ExecutePath(fname string) (string, error) {
continue
}
if
sl
.
UTFEncode
{
n
=
flatDirectCleanRuleAll
.
ReplaceAllStringFunc
(
n
,
encodeUTFCode
)
if
n
[
0
]
==
'~'
||
flatDirectCleanRulePeriods
.
MatchString
(
n
)
{
n
=
directCleanRuleEqual
.
ReplaceAllString
(
n
,
"=u003D$1"
)
n
=
directCleanRuleAll
.
ReplaceAllStringFunc
(
n
,
encodeUTFCode
)
if
n
[
0
]
==
'~'
||
directCleanRulePeriods
.
MatchString
(
n
)
{
n
=
encodeUTFCode
(
string
(
n
[
0
]))
+
n
[
1
:
]
}
}
else
{
n
=
flatD
irectCleanRuleWhitespace
.
ReplaceAllString
(
n
,
sl
.
WhitespaceReplacementString
)
n
=
flatD
irectCleanRule_1_5
.
ReplaceAllString
(
n
,
sl
.
ReplacementString
)
n
=
flatD
irectCleanRule_2_4_6
.
ReplaceAllString
(
n
,
"$1"
)
if
flatD
irectCleanRulePeriods
.
MatchString
(
n
)
{
n
=
d
irectCleanRuleWhitespace
.
ReplaceAllString
(
n
,
sl
.
WhitespaceReplacementString
)
n
=
d
irectCleanRule_1_5
.
ReplaceAllString
(
n
,
sl
.
ReplacementString
)
n
=
d
irectCleanRule_2_4_6
.
ReplaceAllString
(
n
,
"$1"
)
if
d
irectCleanRulePeriods
.
MatchString
(
n
)
{
n
=
sl
.
ReplacementString
+
n
[
1
:
]
}
}
lenN
:=
len
(
n
)
if
lenN
>
sl
.
MaxFilenameLen
{
return
""
,
errors
.
Wrapf
(
flatD
irectCleanErrFilenameTooLong
,
"filename: %s"
,
n
)
return
""
,
errors
.
Wrapf
(
d
irectCleanErrFilenameTooLong
,
"filename: %s"
,
n
)
}
if
lenN
>
0
{
...
...
@@ -264,7 +267,7 @@ func (sl *DirectClean) ExecutePath(fname string) (string, error) {
fname
=
strings
.
Join
(
result
,
"/"
)
if
len
(
fname
)
>
sl
.
MaxPathnameLen
{
return
""
,
errors
.
Wrapf
(
flatD
irectCleanErrPathnameTooLong
,
"pathname: %s"
,
fname
)
return
""
,
errors
.
Wrapf
(
d
irectCleanErrPathnameTooLong
,
"pathname: %s"
,
fname
)
}
return
fname
,
nil
...
...
pkg/extension/gocfldirectclean.go
View file @
05ae1e2d
...
...
@@ -15,6 +15,7 @@ const DirectCleanDescription = "Maps OCFL object identifiers to storage paths or
var
directCleanRuleAll
=
regexp
.
MustCompile
(
"[
\u0000
-
\u001f\u007f\u0020\u0085\u00a0\u1680\u2000
-
\u200f\u2028\u2029\u202f\u205f\u3000\n\t
*?:
\\
[
\\
]
\"
<>|(){}&'!
\\
;#@]"
)
var
directCleanRuleWhitespace
=
regexp
.
MustCompile
(
"[
\u0009\u000a
-
\u000d\u0020\u0085\u00a0\u1680\u2000
-
\u200f\u2028\u2029\u202f\u205f\u3000
]"
)
var
directCleanRuleEqual
=
regexp
.
MustCompile
(
"=(u[a-zA-Z0-9]{4})"
)
var
directCleanRule_1_5
=
regexp
.
MustCompile
(
"[
\u0000
-
\u001F\u007F\n\r\t
*?:
\\
[
\\
]
\"
<>|(){}&'!
\\
;#@]"
)
var
directCleanRule_2_4_6
=
regexp
.
MustCompile
(
"^[
\\
-~
\u0009\u000a
-
\u000d\u0020\u0085\u00a0\u1680\u2000
-
\u200f\u2028\u2029\u202f\u205f\u3000
]*(.*?)[
\u0009\u000a
-
\u000d\u0020\u0085\u00a0\u1680\u2000
-
\u20a0\u2028\u2029\u202f\u205f\u3000
]*$"
)
var
directCleanRulePeriods
=
regexp
.
MustCompile
(
"^
\\
.+$"
)
...
...
@@ -129,6 +130,7 @@ func (sl *DirectClean) build(fname string) (string, error) {
continue
}
if
sl
.
UTFEncode
{
n
=
directCleanRuleEqual
.
ReplaceAllString
(
n
,
"=u003D$1"
)
n
=
directCleanRuleAll
.
ReplaceAllStringFunc
(
n
,
encodeUTFCode
)
if
n
[
0
]
==
'~'
||
directCleanRulePeriods
.
MatchString
(
n
)
{
n
=
encodeUTFCode
(
string
(
n
[
0
]))
+
n
[
1
:
]
...
...
pkg/extension/gocfldirectclean_test.go
View file @
05ae1e2d
...
...
@@ -2,22 +2,24 @@ package extension
import
(
"fmt"
"go.ub.unibas.ch/gocfl/v2/pkg/
extension/storageroot
"
"go.ub.unibas.ch/gocfl/v2/pkg/
ocfl
"
"testing"
)
func
TestFlatCleanDirectoryWithoutUTFEncode
(
t
*
testing
.
T
)
{
l
,
err
:=
NewStorageLayoutDirectClean
(
&
DirectCleanConfig
{
Config
:
&
storageroot
.
Config
{
ExtensionName
:
DirectCleanName
},
MaxPathnameLen
:
32000
,
MaxFilenameLen
:
127
,
WhitespaceReplacementString
:
" "
,
ReplacementString
:
"_"
,
UTFEncode
:
false
,
})
l
:=
DirectClean
{
&
DirectCleanConfig
{
ExtensionConfig
:
&
ocfl
.
ExtensionConfig
{
ExtensionName
:
DirectCleanName
},
MaxPathnameLen
:
32000
,
MaxFilenameLen
:
127
,
WhitespaceReplacementString
:
" "
,
ReplacementString
:
"_"
,
UTFEncode
:
false
,
},
}
objectID
:=
"object-01"
testResult
:=
"object-01"
rootPath
,
err
:=
l
.
ExecuteID
(
objectID
)
rootPath
,
err
:=
l
.
BuildStorageRootPath
(
nil
,
objectID
)
if
err
!=
nil
{
t
.
Errorf
(
"cannot convert %s"
,
objectID
)
}
...
...
@@ -28,7 +30,7 @@ func TestFlatCleanDirectoryWithoutUTFEncode(t *testing.T) {
objectID
=
"..hor_rib:lé-$id"
testResult
=
"..hor_rib_lé-$id"
rootPath
,
err
=
l
.
ExecuteID
(
objectID
)
rootPath
,
err
=
l
.
BuildStorageRootPath
(
nil
,
objectID
)
if
err
!=
nil
{
t
.
Errorf
(
"cannot convert %s - %v"
,
objectID
,
err
)
}
else
{
...
...
@@ -43,7 +45,7 @@ func TestFlatCleanDirectoryWithoutUTFEncode(t *testing.T) {
// Example 2
objectID
=
"info:fedora/object-01"
testResult
=
"info_fedora/object-01"
rootPath
,
err
=
l
.
ExecuteID
(
objectID
)
rootPath
,
err
=
l
.
BuildStorageRootPath
(
nil
,
objectID
)
if
err
!=
nil
{
t
.
Errorf
(
"cannot convert %s - %v"
,
objectID
,
err
)
}
else
{
...
...
@@ -56,7 +58,7 @@ func TestFlatCleanDirectoryWithoutUTFEncode(t *testing.T) {
objectID
=
"~ info:fedora/-obj#ec@t-
\"
01 "
testResult
=
"info_fedora/obj_ec_t-_01"
rootPath
,
err
=
l
.
ExecuteID
(
objectID
)
rootPath
,
err
=
l
.
BuildStorageRootPath
(
nil
,
objectID
)
if
err
!=
nil
{
t
.
Errorf
(
"cannot convert %s - %v"
,
objectID
,
err
)
}
else
{
...
...
@@ -69,7 +71,7 @@ func TestFlatCleanDirectoryWithoutUTFEncode(t *testing.T) {
objectID
=
"/test/ ~/.../blah"
testResult
=
"test/_../blah"
rootPath
,
err
=
l
.
ExecuteID
(
objectID
)
rootPath
,
err
=
l
.
BuildStorageRootPath
(
nil
,
objectID
)
if
err
!=
nil
{
t
.
Errorf
(
"cannot convert %s - %v"
,
objectID
,
err
)
}
else
{
...
...
@@ -82,7 +84,7 @@ func TestFlatCleanDirectoryWithoutUTFEncode(t *testing.T) {
objectID
=
"https://hdl.handle.net/XXXXX/test/bl ah"
testResult
=
"https_/hdl.handle.net/XXXXX/test/bl ah"
rootPath
,
err
=
l
.
ExecuteID
(
objectID
)
rootPath
,
err
=
l
.
BuildStorageRootPath
(
nil
,
objectID
)
if
err
!=
nil
{
t
.
Errorf
(
"cannot convert %s - %v"
,
objectID
,
err
)
}
else
{
...
...
@@ -95,7 +97,7 @@ func TestFlatCleanDirectoryWithoutUTFEncode(t *testing.T) {
objectID
=
"abcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghij"
testResult
=
"abcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghij"
rootPath
,
err
=
l
.
ExecuteID
(
objectID
)
rootPath
,
err
=
l
.
BuildStorageRootPath
(
nil
,
objectID
)
if
err
!=
nil
{
fmt
.
Printf
(
"DirectClean(%s) -> %v
\n
"
,
objectID
,
err
)
}
else
{
...
...
@@ -105,17 +107,30 @@ func TestFlatCleanDirectoryWithoutUTFEncode(t *testing.T) {
}
func
TestFlatCleanDirectoryWithUTFEncode
(
t
*
testing
.
T
)
{
l
,
err
:=
NewStorageLayoutDirectClean
(
&
DirectCleanConfig
{
Config
:
&
storageroot
.
Config
{
ExtensionName
:
DirectCleanName
},
MaxPathnameLen
:
32000
,
MaxFilenameLen
:
127
,
WhitespaceReplacementString
:
" "
,
ReplacementString
:
"_"
,
UTFEncode
:
true
,
})
l
:=
DirectClean
{
&
DirectCleanConfig
{
ExtensionConfig
:
&
ocfl
.
ExtensionConfig
{
ExtensionName
:
DirectCleanName
},
MaxPathnameLen
:
32000
,
MaxFilenameLen
:
127
,
WhitespaceReplacementString
:
" "
,
ReplacementString
:
"_"
,
UTFEncode
:
true
,
},
}
objectID
:=
"object-01"
testResult
:=
"object-01"
rootPath
,
err
:=
l
.
ExecuteID
(
objectID
)
rootPath
,
err
:=
l
.
BuildStorageRootPath
(
nil
,
objectID
)
if
err
!=
nil
{
t
.
Errorf
(
"cannot convert %s"
,
objectID
)
}
if
rootPath
!=
testResult
{
t
.
Errorf
(
"%s -> %s != %s"
,
objectID
,
rootPath
,
testResult
)
}
fmt
.
Printf
(
"DirectClean(%s) -> %s
\n
"
,
objectID
,
rootPath
)
objectID
=
"object=u123a-01"
testResult
=
"object=u003Du123a-01"
rootPath
,
err
=
l
.
BuildStorageRootPath
(
nil
,
objectID
)
if
err
!=
nil
{
t
.
Errorf
(
"cannot convert %s"
,
objectID
)
}
...
...
@@ -126,7 +141,7 @@ func TestFlatCleanDirectoryWithUTFEncode(t *testing.T) {