Skip to content
GitLab
Projects
Groups
Snippets
Help
Loading...
Help
What's new
10
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Open sidebar
memoriav
M
Memobase 2020
services
URL Checker
Commits
9df4ddb2
Commit
9df4ddb2
authored
Mar 31, 2020
by
Jürgen Enge
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
banner added
parent
e0b2dd42
Changes
5
Hide whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
70 additions
and
98 deletions
+70
-98
main/main.go
main/main.go
+7
-4
memocrawler.toml
memocrawler.toml
+5
-5
memocrawler/crawler.go
memocrawler/crawler.go
+58
-18
memocrawler/fileMapper.go
memocrawler/fileMapper.go
+0
-39
memocrawler/helper.go
memocrawler/helper.go
+0
-32
No files found.
main/main.go
View file @
9df4ddb2
...
...
@@ -8,6 +8,7 @@ import (
_
"github.com/go-sql-driver/mysql"
"github.com/op/go-logging"
"github.com/robfig/cron"
"gitlab.switch.ch/memoriav/memobase-2020/streaming-server/memostream"
"gitlab.switch.ch/memoriav/memobase-2020/url-checker/memocrawler"
"log"
"os"
...
...
@@ -54,13 +55,13 @@ func main() {
var
exPath
=
""
// if configfile not found try path of executable as prefix
if
!
memo
crawler
.
FileExists
(
*
configFile
)
{
if
!
memo
stream
.
FileExists
(
*
configFile
)
{
ex
,
err
:=
os
.
Executable
()
if
err
!=
nil
{
panic
(
err
)
}
exPath
=
filepath
.
Dir
(
ex
)
if
memo
crawler
.
FileExists
(
filepath
.
Join
(
exPath
,
*
configFile
))
{
if
memo
stream
.
FileExists
(
filepath
.
Join
(
exPath
,
*
configFile
))
{
*
configFile
=
filepath
.
Join
(
exPath
,
*
configFile
)
}
else
{
log
.
Fatalf
(
"cannot find configuration file: %v"
,
*
configFile
)
...
...
@@ -72,7 +73,7 @@ func main() {
config
=
LoadConfig
(
*
configFile
)
// create logger instance
log
,
lf
:=
memo
crawler
.
CreateLogger
(
"memostream"
,
config
.
Logfile
,
config
.
Loglevel
)
log
,
lf
:=
memo
stream
.
CreateLogger
(
"memostream"
,
config
.
Logfile
,
config
.
Loglevel
)
defer
lf
.
Close
()
db
,
err
:=
sql
.
Open
(
"mysql"
,
config
.
DB
.
Dsn
)
...
...
@@ -99,7 +100,7 @@ func main() {
for
_
,
val
:=
range
config
.
FileMap
{
mapping
[
strings
.
ToLower
(
val
.
Alias
)]
=
val
.
Folder
}
fm
:=
memo
crawler
.
NewFileMapper
(
mapping
)
fm
:=
memo
stream
.
NewFileMapper
(
mapping
)
cr
:=
memocrawler
.
NewCrawler
(
db
,
...
...
@@ -116,6 +117,8 @@ func main() {
fm
,
log
)
cr
.
CrawlAll
()
return
c
:=
cron
.
New
(
cron
.
WithLogger
(
cronLogger
{
log
:
log
}))
c
.
AddFunc
(
config
.
Cron
,
func
()
{
...
...
memocrawler.toml
View file @
9df4ddb2
...
...
@@ -12,15 +12,15 @@ siegfried = "http://localhost:5138/identify/[[PATH]]?format=json"
crawlok
=
"600h"
# check files every 600 hours
crawlerror
=
"168h"
# if there's an error, check all 168 hours minimum
crawlerrornew
=
"22h"
# new errors should be checked the next day
ffmpeg
=
"/usr/local/bin/ffmpeg"
ffprobe
=
"/usr/local/bin/ffprobe"
ffmpeg
=
"/usr/local/bin/ffmpeg
2
"
ffprobe
=
"/usr/local/bin/ffprobe
2
"
bannerfolder
=
"c:/temp/banner"
cron
=
"
10
* * * *"
# cron format (https://pkg.go.dev/github.com/robfig/cron?tab=doc)
cron
=
"
42
* * * *"
# cron format (https://pkg.go.dev/github.com/robfig/cron?tab=doc)
[[filemap]]
alias
=
"
main
"
folder
=
"c:/
temp
"
alias
=
"
c
"
folder
=
"c:/"
[[filemap]]
alias
=
"blah"
...
...
memocrawler/crawler.go
View file @
9df4ddb2
...
...
@@ -9,6 +9,8 @@ import (
"github.com/goph/emperror"
"github.com/op/go-logging"
"gitlab.switch.ch/memoriav/memobase-2020/streaming-server/memostream"
"os/exec"
"io"
"io/ioutil"
"mime"
...
...
@@ -33,7 +35,7 @@ type Crawler struct {
ffmpeg
string
ffprobe
string
bannerfolder
string
mapping
*
FileMapper
mapping
*
memostream
.
FileMapper
}
func
NewCrawler
(
...
...
@@ -43,7 +45,7 @@ func NewCrawler(
crawlOK
,
crawlError
,
crawlErrorNew
time
.
Duration
,
ffmpeg
,
ffprobe
string
,
bannerfolder
string
,
mapping
*
FileMapper
,
mapping
*
memostream
.
FileMapper
,
log
*
logging
.
Logger
)
*
Crawler
{
cr
:=
&
Crawler
{
db
:
db
,
...
...
@@ -141,7 +143,7 @@ func (cr *Crawler) getContentHeader(entry *memostream.MediaEntry) (buf []byte, m
break
}
}
else
if
entry
.
Protocol
==
memostream
.
Media_File
{
path
,
err
:=
cr
.
mapping
.
g
et
(
entry
.
URI
)
path
,
err
:=
cr
.
mapping
.
G
et
(
entry
.
URI
)
if
err
!=
nil
{
return
nil
,
""
,
emperror
.
Wrapf
(
err
,
"cannot map uri %s of signature %s"
,
entry
.
URI
.
String
(),
entry
.
Signature
)
}
...
...
@@ -162,33 +164,55 @@ func (cr *Crawler) getContentHeader(entry *memostream.MediaEntry) (buf []byte, m
func
(
cr
*
Crawler
)
getBanner
(
entry
*
memostream
.
MediaEntry
)
(
string
,
error
)
{
var
prg
string
var
params
[]
string
var
outputfilename
string
var
inputfilename
string
var
err
error
if
entry
.
Protocol
==
memostream
.
Media_File
{
inputfilename
,
err
=
cr
.
mapping
.
Get
(
entry
.
URI
)
if
runtime
.
GOOS
==
"windows"
{
inputfilename
=
strings
.
Replace
(
filepath
.
ToSlash
(
inputfilename
),
"c:"
,
"/mnt/c"
,
-
1
)
}
}
else
{
inputfilename
=
entry
.
URI
.
String
()
}
outputfilename
=
filepath
.
Join
(
cr
.
bannerfolder
,
fmt
.
Sprintf
(
"%s.png"
,
entry
.
Signature
))
// todo: this code is unusable crap
if
err
!=
nil
{
return
""
,
emperror
.
Wrapf
(
err
,
"cannot get path for signature %s"
,
entry
.
Signature
)
}
outputfilename
:=
filepath
.
Join
(
cr
.
bannerfolder
,
fmt
.
Sprintf
(
"%s.png"
,
entry
.
Signature
))
// todo: bad hack for windows wsl...
if
runtime
.
GOOS
==
"windows"
{
prg
=
"wsl.exe"
params
=
append
(
params
,
cr
.
ffmpeg
)
inputfilename
=
filepath
.
Join
(
prg
)
prg
=
inputfilename
outputfilename
=
strings
.
Replace
(
filepath
.
ToSlash
(
outputfilename
),
"c:"
,
"/mnt/c"
,
-
1
)
}
else
{
prg
=
cr
.
ffmpeg
}
params
=
append
(
params
,
"-ss"
,
"00:00:12"
,
"-i"
,
entry
.
URI
.
String
()
,
"-i"
,
inputfilename
,
"-vframes"
,
"1"
,
"-q:v"
,
"2"
,
outputfilename
)
return
""
,
nil
cmd
:=
exec
.
Command
(
prg
,
params
...
)
stderr
,
err
:=
cmd
.
StderrPipe
()
if
err
!=
nil
{
return
""
,
emperror
.
Wrapf
(
err
,
"cannot get stderr output pipe"
)
}
if
err
:=
cmd
.
Run
();
err
!=
nil
{
slurp
,
_
:=
ioutil
.
ReadAll
(
stderr
)
return
""
,
emperror
.
Wrapf
(
err
,
"cannot execute %s %s: %s"
,
prg
,
strings
.
Join
(
params
,
" "
),
slurp
)
}
return
fmt
.
Sprintf
(
"%s.png"
,
entry
.
Signature
),
nil
}
/*
load 500 byte from an url and send it to siegfried
*/
func
(
cr
*
Crawler
)
check
(
entry
*
memostream
.
MediaEntry
,
siegfried
bool
)
([]
SFMatches
,
string
,
string
,
error
)
{
func
(
cr
*
Crawler
)
check
(
entry
*
memostream
.
MediaEntry
,
siegfried
bool
,
banner
bool
)
([]
SFMatches
,
string
,
string
,
string
,
error
)
{
cr
.
log
.
Infof
(
"checking %s"
,
entry
.
Signature
)
// ************************************
...
...
@@ -196,7 +220,7 @@ func (cr *Crawler) check(entry *memostream.MediaEntry, siegfried bool) ([]SFMatc
// ************************************
buf
,
mimetype
,
err
:=
cr
.
getContentHeader
(
entry
)
if
err
!=
nil
{
return
nil
,
""
,
emperror
.
Wrapf
(
err
,
"cannot read content header"
)
.
Error
(),
nil
return
nil
,
""
,
""
,
emperror
.
Wrapf
(
err
,
"cannot read content header"
)
.
Error
(),
nil
}
// if there's no mimetype in response header try to detect
...
...
@@ -210,15 +234,15 @@ func (cr *Crawler) check(entry *memostream.MediaEntry, siegfried bool) ([]SFMatc
// write buf to temp file
tmpfile
,
err
:=
ioutil
.
TempFile
(
cr
.
tempDir
,
"siegfried"
)
if
err
!=
nil
{
return
nil
,
""
,
""
,
emperror
.
Wrapf
(
err
,
"cannot create tempfile"
)
return
nil
,
""
,
""
,
""
,
emperror
.
Wrapf
(
err
,
"cannot create tempfile"
)
}
defer
os
.
Remove
(
tmpfile
.
Name
())
// clean up
if
_
,
err
:=
tmpfile
.
Write
(
buf
);
err
!=
nil
{
return
nil
,
""
,
""
,
emperror
.
Wrapf
(
err
,
"cannot write to tempfile %s"
,
tmpfile
.
Name
())
return
nil
,
""
,
""
,
""
,
emperror
.
Wrapf
(
err
,
"cannot write to tempfile %s"
,
tmpfile
.
Name
())
}
if
err
:=
tmpfile
.
Close
();
err
!=
nil
{
return
nil
,
""
,
""
,
emperror
.
Wrapf
(
err
,
"cannot close tempfile %s"
,
tmpfile
.
Name
())
return
nil
,
""
,
""
,
""
,
emperror
.
Wrapf
(
err
,
"cannot close tempfile %s"
,
tmpfile
.
Name
())
}
// ************************************
...
...
@@ -228,7 +252,7 @@ func (cr *Crawler) check(entry *memostream.MediaEntry, siegfried bool) ([]SFMatc
if
siegfried
{
sfMatches
,
err
=
cr
.
siegfried
.
Get
(
tmpfile
.
Name
())
if
err
!=
nil
{
return
nil
,
""
,
""
,
emperror
.
Wrapf
(
err
,
"cannot call siegfried for file %s"
,
tmpfile
.
Name
())
return
nil
,
""
,
""
,
""
,
emperror
.
Wrapf
(
err
,
"cannot call siegfried for file %s"
,
tmpfile
.
Name
())
}
mrel
:=
MimeRelevance
(
mimetype
)
// set the mimetype if it's a better one...
...
...
@@ -239,12 +263,21 @@ func (cr *Crawler) check(entry *memostream.MediaEntry, siegfried bool) ([]SFMatc
}
}
}
return
sfMatches
,
mimetype
,
""
,
nil
var
bannerfile
string
if
banner
{
bannerfile
,
err
=
cr
.
getBanner
(
entry
)
if
err
!=
nil
{
bannerfile
=
""
cr
.
log
.
Errorf
(
"cannot get banner: %v"
,
err
.
Error
())
}
}
return
sfMatches
,
mimetype
,
bannerfile
,
""
,
nil
}
func
(
cr
*
Crawler
)
checkList
(
entries
[]
*
memostream
.
MediaEntry
)
error
{
for
_
,
entry
:=
range
entries
{
sfMatches
,
mimetype
,
errMsg
,
err
:=
cr
.
check
(
entry
,
true
)
sfMatches
,
mimetype
,
bannerfile
,
errMsg
,
err
:=
cr
.
check
(
entry
,
true
,
true
)
if
err
!=
nil
{
return
emperror
.
Wrapf
(
err
,
"error checking entry %s"
,
entry
.
Signature
)
}
...
...
@@ -275,7 +308,14 @@ func (cr *Crawler) checkList(entries []*memostream.MediaEntry) error {
if
entry
.
Status
!=
memostream
.
Media_OK
{
sqlstr
+=
", lastchange=NOW()"
}
if
bannerfile
!=
""
{
if
mimetype
!=
""
{
sqlstr
+=
", banner=?"
params
=
append
(
params
,
bannerfile
)
}
}
}
sqlstr
+=
", status=?, errormessage=? WHERE sig=?"
params
=
append
(
params
,
statusStr
,
errMsg
,
entry
.
Signature
)
if
_
,
err
:=
cr
.
db
.
Exec
(
sqlstr
,
params
...
);
err
!=
nil
{
...
...
memocrawler/fileMapper.go
deleted
100644 → 0
View file @
e0b2dd42
package
memocrawler
import
(
"errors"
"fmt"
"net/url"
"os"
"path/filepath"
"runtime"
"strings"
)
type
FileMapper
struct
{
mapping
map
[
string
]
string
}
func
NewFileMapper
(
mapping
map
[
string
]
string
)
*
FileMapper
{
return
&
FileMapper
{
mapping
:
mapping
}
}
func
(
fm
*
FileMapper
)
get
(
uri
*
url
.
URL
)
(
string
,
error
)
{
if
uri
.
Scheme
!=
"file"
{
return
""
,
errors
.
New
(
fmt
.
Sprintf
(
"cannot handle scheme %s: need file scheme"
,
uri
.
Scheme
))
}
var
filename
string
var
ok
bool
if
uri
.
Host
!=
""
{
filename
,
ok
=
fm
.
mapping
[
strings
.
ToLower
(
uri
.
Host
)]
if
!
ok
{
return
""
,
errors
.
New
(
fmt
.
Sprintf
(
"no mapping for %s"
,
uri
.
Host
))
}
}
filename
=
filepath
.
Join
(
filename
,
uri
.
Path
)
filename
=
filepath
.
Clean
(
filename
)
if
runtime
.
GOOS
==
"windows"
{
filename
=
strings
.
TrimPrefix
(
filename
,
string
(
os
.
PathSeparator
))
}
return
filename
,
nil
}
memocrawler/helper.go
View file @
9df4ddb2
...
...
@@ -2,7 +2,6 @@ package memocrawler
import
(
"github.com/op/go-logging"
"os"
"strings"
)
...
...
@@ -10,37 +9,6 @@ var _logformat = logging.MustStringFormatter(
`%{time:2006-01-02T15:04:05.000} %{module}::%{shortfunc} [%{shortfile}] > %{level:.5s} - %{message}`
,
)
func
FileExists
(
filename
string
)
bool
{
info
,
err
:=
os
.
Stat
(
filename
)
if
os
.
IsNotExist
(
err
)
{
return
false
}
return
!
info
.
IsDir
()
}
func
CreateLogger
(
module
string
,
logfile
string
,
loglevel
string
)
(
log
*
logging
.
Logger
,
lf
*
os
.
File
)
{
log
=
logging
.
MustGetLogger
(
module
)
var
err
error
if
logfile
!=
""
{
lf
,
err
=
os
.
OpenFile
(
logfile
,
os
.
O_APPEND
|
os
.
O_CREATE
|
os
.
O_WRONLY
,
0644
)
if
err
!=
nil
{
log
.
Errorf
(
"Cannot open logfile %v: %v"
,
logfile
,
err
)
}
//defer lf.CloseInternal()
}
else
{
lf
=
os
.
Stderr
}
backend
:=
logging
.
NewLogBackend
(
lf
,
""
,
0
)
backendLeveled
:=
logging
.
AddModuleLevel
(
backend
)
backendLeveled
.
SetLevel
(
logging
.
GetLevel
(
loglevel
),
""
)
logging
.
SetFormatter
(
_logformat
)
logging
.
SetBackend
(
backendLeveled
)
return
}
/*
holistic function to give some mimetypes a relevance
*/
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment