Commit 9df4ddb2 authored by Jürgen Enge's avatar Jürgen Enge

banner added

parent e0b2dd42
......@@ -8,6 +8,7 @@ import (
_ "github.com/go-sql-driver/mysql"
"github.com/op/go-logging"
"github.com/robfig/cron"
"gitlab.switch.ch/memoriav/memobase-2020/streaming-server/memostream"
"gitlab.switch.ch/memoriav/memobase-2020/url-checker/memocrawler"
"log"
"os"
......@@ -54,13 +55,13 @@ func main() {
var exPath = ""
// if configfile not found try path of executable as prefix
if !memocrawler.FileExists(*configFile) {
if !memostream.FileExists(*configFile) {
ex, err := os.Executable()
if err != nil {
panic(err)
}
exPath = filepath.Dir(ex)
if memocrawler.FileExists(filepath.Join(exPath, *configFile)) {
if memostream.FileExists(filepath.Join(exPath, *configFile)) {
*configFile = filepath.Join(exPath, *configFile)
} else {
log.Fatalf("cannot find configuration file: %v", *configFile)
......@@ -72,7 +73,7 @@ func main() {
config = LoadConfig(*configFile)
// create logger instance
log, lf := memocrawler.CreateLogger("memostream", config.Logfile, config.Loglevel)
log, lf := memostream.CreateLogger("memostream", config.Logfile, config.Loglevel)
defer lf.Close()
db, err := sql.Open("mysql", config.DB.Dsn)
......@@ -99,7 +100,7 @@ func main() {
for _, val := range config.FileMap {
mapping[strings.ToLower(val.Alias)] = val.Folder
}
fm := memocrawler.NewFileMapper(mapping)
fm := memostream.NewFileMapper(mapping)
cr := memocrawler.NewCrawler(
db,
......@@ -116,6 +117,8 @@ func main() {
fm,
log)
cr.CrawlAll()
return
c := cron.New(cron.WithLogger(cronLogger{log:log}))
c.AddFunc(config.Cron, func() {
......
......@@ -12,15 +12,15 @@ siegfried = "http://localhost:5138/identify/[[PATH]]?format=json"
crawlok = "600h" # check files every 600 hours
crawlerror = "168h" # if there's an error, check all 168 hours minimum
crawlerrornew = "22h" # new errors should be checked the next day
ffmpeg = "/usr/local/bin/ffmpeg"
ffprobe = "/usr/local/bin/ffprobe"
ffmpeg = "/usr/local/bin/ffmpeg2"
ffprobe = "/usr/local/bin/ffprobe2"
bannerfolder = "c:/temp/banner"
cron = "10 * * * *" # cron format (https://pkg.go.dev/github.com/robfig/cron?tab=doc)
cron = "42 * * * *" # cron format (https://pkg.go.dev/github.com/robfig/cron?tab=doc)
[[filemap]]
alias = "main"
folder = "c:/temp"
alias = "c"
folder = "c:/"
[[filemap]]
alias = "blah"
......
......@@ -9,6 +9,8 @@ import (
"github.com/goph/emperror"
"github.com/op/go-logging"
"gitlab.switch.ch/memoriav/memobase-2020/streaming-server/memostream"
"os/exec"
"io"
"io/ioutil"
"mime"
......@@ -33,7 +35,7 @@ type Crawler struct {
ffmpeg string
ffprobe string
bannerfolder string
mapping *FileMapper
mapping *memostream.FileMapper
}
func NewCrawler(
......@@ -43,7 +45,7 @@ func NewCrawler(
crawlOK, crawlError, crawlErrorNew time.Duration,
ffmpeg, ffprobe string,
bannerfolder string,
mapping *FileMapper,
mapping *memostream.FileMapper,
log *logging.Logger) *Crawler {
cr := &Crawler{
db: db,
......@@ -141,7 +143,7 @@ func (cr *Crawler) getContentHeader(entry *memostream.MediaEntry) (buf []byte, m
break
}
} else if entry.Protocol == memostream.Media_File {
path, err := cr.mapping.get(entry.URI)
path, err := cr.mapping.Get(entry.URI)
if err != nil {
return nil, "", emperror.Wrapf(err, "cannot map uri %s of signature %s", entry.URI.String(), entry.Signature)
}
......@@ -162,33 +164,55 @@ func (cr *Crawler) getContentHeader(entry *memostream.MediaEntry) (buf []byte, m
func (cr *Crawler) getBanner(entry *memostream.MediaEntry) (string, error) {
var prg string
var params []string
var outputfilename string
var inputfilename string
var err error
if entry.Protocol == memostream.Media_File {
inputfilename, err = cr.mapping.Get(entry.URI)
if runtime.GOOS == "windows" {
inputfilename = strings.Replace(filepath.ToSlash(inputfilename), "c:", "/mnt/c", -1)
}
} else {
inputfilename = entry.URI.String()
}
outputfilename = filepath.Join(cr.bannerfolder, fmt.Sprintf("%s.png", entry.Signature))
// todo: this code is unusable crap
if err != nil {
return "", emperror.Wrapf(err, "cannot get path for signature %s", entry.Signature)
}
outputfilename := filepath.Join(cr.bannerfolder, fmt.Sprintf("%s.png", entry.Signature))
// todo: bad hack for windows wsl...
if runtime.GOOS == "windows" {
prg = "wsl.exe"
params = append(params, cr.ffmpeg)
inputfilename = filepath.Join(prg)
prg = inputfilename
outputfilename = strings.Replace(filepath.ToSlash(outputfilename), "c:", "/mnt/c", -1)
} else {
prg = cr.ffmpeg
}
params = append(params,
"-ss", "00:00:12",
"-i", entry.URI.String(),
"-i", inputfilename,
"-vframes", "1",
"-q:v", "2",
outputfilename)
return "", nil
cmd := exec.Command(prg, params...)
stderr, err := cmd.StderrPipe()
if err != nil {
return "", emperror.Wrapf(err, "cannot get stderr output pipe")
}
if err := cmd.Run(); err != nil {
slurp, _ := ioutil.ReadAll(stderr)
return "", emperror.Wrapf(err, "cannot execute %s %s: %s", prg, strings.Join(params, " "), slurp)
}
return fmt.Sprintf("%s.png", entry.Signature), nil
}
/*
load 500 byte from an url and send it to siegfried
*/
func (cr *Crawler) check(entry *memostream.MediaEntry, siegfried bool) ([]SFMatches, string, string, error) {
func (cr *Crawler) check(entry *memostream.MediaEntry, siegfried bool, banner bool) ([]SFMatches, string, string, string, error) {
cr.log.Infof("checking %s", entry.Signature)
// ************************************
......@@ -196,7 +220,7 @@ func (cr *Crawler) check(entry *memostream.MediaEntry, siegfried bool) ([]SFMatc
// ************************************
buf, mimetype, err := cr.getContentHeader(entry)
if err != nil {
return nil, "", emperror.Wrapf(err, "cannot read content header").Error(), nil
return nil, "", "", emperror.Wrapf(err, "cannot read content header").Error(), nil
}
// if there's no mimetype in response header try to detect
......@@ -210,15 +234,15 @@ func (cr *Crawler) check(entry *memostream.MediaEntry, siegfried bool) ([]SFMatc
// write buf to temp file
tmpfile, err := ioutil.TempFile(cr.tempDir, "siegfried")
if err != nil {
return nil, "", "", emperror.Wrapf(err, "cannot create tempfile")
return nil, "", "", "", emperror.Wrapf(err, "cannot create tempfile")
}
defer os.Remove(tmpfile.Name()) // clean up
if _, err := tmpfile.Write(buf); err != nil {
return nil, "", "", emperror.Wrapf(err, "cannot write to tempfile %s", tmpfile.Name())
return nil, "", "", "", emperror.Wrapf(err, "cannot write to tempfile %s", tmpfile.Name())
}
if err := tmpfile.Close(); err != nil {
return nil, "", "", emperror.Wrapf(err, "cannot close tempfile %s", tmpfile.Name())
return nil, "", "", "", emperror.Wrapf(err, "cannot close tempfile %s", tmpfile.Name())
}
// ************************************
......@@ -228,7 +252,7 @@ func (cr *Crawler) check(entry *memostream.MediaEntry, siegfried bool) ([]SFMatc
if siegfried {
sfMatches, err = cr.siegfried.Get(tmpfile.Name())
if err != nil {
return nil, "", "", emperror.Wrapf(err, "cannot call siegfried for file %s", tmpfile.Name())
return nil, "", "", "", emperror.Wrapf(err, "cannot call siegfried for file %s", tmpfile.Name())
}
mrel := MimeRelevance(mimetype)
// set the mimetype if it's a better one...
......@@ -239,12 +263,21 @@ func (cr *Crawler) check(entry *memostream.MediaEntry, siegfried bool) ([]SFMatc
}
}
}
return sfMatches, mimetype, "", nil
var bannerfile string
if banner {
bannerfile, err = cr.getBanner(entry)
if err != nil {
bannerfile = ""
cr.log.Errorf("cannot get banner: %v", err.Error())
}
}
return sfMatches, mimetype, bannerfile, "", nil
}
func (cr *Crawler) checkList(entries []*memostream.MediaEntry) error {
for _, entry := range entries {
sfMatches, mimetype, errMsg, err := cr.check(entry, true)
sfMatches, mimetype, bannerfile, errMsg, err := cr.check(entry, true, true)
if err != nil {
return emperror.Wrapf(err, "error checking entry %s", entry.Signature)
}
......@@ -275,7 +308,14 @@ func (cr *Crawler) checkList(entries []*memostream.MediaEntry) error {
if entry.Status != memostream.Media_OK {
sqlstr += ", lastchange=NOW()"
}
if bannerfile != "" {
if mimetype != "" {
sqlstr += ", banner=?"
params = append(params, bannerfile)
}
}
}
sqlstr += ", status=?, errormessage=? WHERE sig=?"
params = append(params, statusStr, errMsg, entry.Signature)
if _, err := cr.db.Exec(sqlstr, params...); err != nil {
......
package memocrawler
import (
"errors"
"fmt"
"net/url"
"os"
"path/filepath"
"runtime"
"strings"
)
type FileMapper struct {
mapping map[string]string
}
func NewFileMapper(mapping map[string]string) *FileMapper {
return &FileMapper{mapping:mapping}
}
func (fm *FileMapper) get(uri *url.URL) (string, error) {
if uri.Scheme != "file" {
return "", errors.New( fmt.Sprintf("cannot handle scheme %s: need file scheme", uri.Scheme))
}
var filename string
var ok bool
if uri.Host != "" {
filename, ok = fm.mapping[strings.ToLower(uri.Host)]
if !ok {
return "", errors.New(fmt.Sprintf("no mapping for %s", uri.Host))
}
}
filename = filepath.Join(filename, uri.Path)
filename = filepath.Clean(filename)
if runtime.GOOS == "windows" {
filename = strings.TrimPrefix(filename, string(os.PathSeparator))
}
return filename, nil
}
......@@ -2,7 +2,6 @@ package memocrawler
import (
"github.com/op/go-logging"
"os"
"strings"
)
......@@ -10,37 +9,6 @@ var _logformat = logging.MustStringFormatter(
`%{time:2006-01-02T15:04:05.000} %{module}::%{shortfunc} [%{shortfile}] > %{level:.5s} - %{message}`,
)
func FileExists(filename string) bool {
info, err := os.Stat(filename)
if os.IsNotExist(err) {
return false
}
return !info.IsDir()
}
func CreateLogger(module string, logfile string, loglevel string) (log *logging.Logger, lf *os.File) {
log = logging.MustGetLogger(module)
var err error
if logfile != "" {
lf, err = os.OpenFile(logfile, os.O_APPEND|os.O_CREATE|os.O_WRONLY, 0644)
if err != nil {
log.Errorf("Cannot open logfile %v: %v", logfile, err)
}
//defer lf.CloseInternal()
} else {
lf = os.Stderr
}
backend := logging.NewLogBackend(lf, "", 0)
backendLeveled := logging.AddModuleLevel(backend)
backendLeveled.SetLevel(logging.GetLevel(loglevel), "")
logging.SetFormatter(_logformat)
logging.SetBackend(backendLeveled)
return
}
/*
holistic function to give some mimetypes a relevance
*/
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment