Commit 8da5ec53 authored by Jürgen Enge's avatar Jürgen Enge

use of indexer

parent d7c7e457
......@@ -67,6 +67,7 @@ type Config struct {
Siegfried string
Ffmpeg string
Ffprobe string
Indexer string
Crawler Crawler
Metadata Meta
Banner Banner
......
......@@ -20,7 +20,7 @@ import (
)
/*
clear database: update `test2` set status="new", metadata=null,errormessage=null,mimetype=null,lastcheck=null,lastchange=null
clear database: update test.`entities` set status="new", errormessage=null,mimetype=null,lastcheck=null,lastchange=null WHERE sig <> "xxx"
*/
type cronLogger struct {
......@@ -122,6 +122,7 @@ func main() {
config.Banner.Folder,
config.Ffmpeg,
config.Ffprobe,
config.Indexer,
fm,
log)
......
......@@ -8,6 +8,7 @@ jwtkey = "swordfish"
jwtalg = ["HS256", "HS384", "HS512"] # "hs256" "hs384" "hs512" "es256" "es384" "es512" "ps256" "ps384" "ps512"
tempdir = "C:/temp/"
siegfried = "http://localhost:5138/identify/[[PATH]]?format=json"
indexer = "http://localhost:81"
ffmpeg = "/usr/local/bin/ffmpeg2"
ffprobe = "/usr/local/bin/ffprobe2"
......
......@@ -13,7 +13,6 @@ package memocrawler
import (
"context"
"database/sql"
"encoding/json"
"fmt"
"github.com/goph/emperror"
"github.com/op/go-logging"
......@@ -42,6 +41,7 @@ type Crawler struct {
bannerFolder string
ffmpeg string
ffprobe string
indexer string
bannerfolder string
bannertimeout time.Duration
mapping *memostream.FileMapper
......@@ -70,7 +70,7 @@ func NewCrawler(
bannerPageSize int,
bannerTimestamp string,
bannerFolder string,
ffmpeg, ffprobe string,
ffmpeg, ffprobe, indexer string,
mapping *memostream.FileMapper,
log *logging.Logger) *Crawler {
cr := &Crawler{
......@@ -80,6 +80,7 @@ func NewCrawler(
schema: schema,
tempDir: tempDir,
siegfried: Siegfried{surl: siegfried},
indexer: indexer,
crawlOK: crawlOK,
crawlError: crawlError,
crawlErrorNew: crawlErrorNew,
......@@ -143,51 +144,6 @@ func (cr *Crawler) getEntries(sqlstr string, args ...interface{}) ([]*memostream
return entries, nil
}
func (cr *Crawler) checkList(entries []*memostream.MediaEntry) error {
for _, entry := range entries {
sfMatches, mimetype, errMsg, err := cr.cl.linkCheck(entry, true)
if err != nil {
return emperror.Wrapf(err, "error checking entry %s", entry.Signature)
}
sqlstr := fmt.Sprintf("UPDATE %s.entities SET lastcheck=NOW()", cr.schema)
var statusStr string
var params []interface{}
if errMsg != "" {
statusStr = memostream.MediaStatusNum[memostream.Media_Error]
if entry.Status != memostream.Media_Error {
sqlstr += ", lastchange=NOW()"
}
// return emperror.Wrapf(err, "cannot linkCheck entry %s", entry.Signature)
} else {
statusStr = memostream.MediaStatusNum[memostream.Media_OK]
meta := &Metadata{SFMatches: sfMatches}
metajson, err := json.Marshal(meta)
if err != nil {
return emperror.Wrapf(err, "cannot marshal %v", meta)
}
sqlstr += ", metadata=?"
params = append(params, string(metajson))
if mimetype != "" {
sqlstr += ", mimetype=?"
params = append(params, mimetype)
}
if entry.Status != memostream.Media_OK {
sqlstr += ", lastchange=NOW()"
}
}
sqlstr += ", status=?, errormessage=? WHERE sig=?"
params = append(params, statusStr, errMsg, entry.Signature)
if _, err := cr.db.Exec(sqlstr, params...); err != nil {
return emperror.Wrapf(err, "error executing sql %s [%v]", sqlstr, params)
}
}
return nil
}
func (cr *Crawler) MetaNew() error {
cr.log.Infof("start crawling metadata for new entities")
......
......@@ -11,7 +11,9 @@
package memocrawler
import (
"bytes"
"context"
"encoding/json"
"errors"
"fmt"
"github.com/goph/emperror"
......@@ -21,6 +23,7 @@ import (
"mime"
"net/http"
"os"
"path/filepath"
"strings"
"time"
)
......@@ -104,9 +107,51 @@ func (cl *CrawlerLinkcheck) getContentHeader(entry *memostream.MediaEntry) (buf
/*
load 500 byte from an url and send it to siegfried
*/
func (cl *CrawlerLinkcheck) linkCheck(entry *memostream.MediaEntry, siegfried bool) ([]SFMatches, string, string, error) {
func (cl *CrawlerLinkcheck) linkCheck(entry *memostream.MediaEntry, siegfried bool) (
result map[string]interface{},
mimetype string,
width int64,
height int64,
duration int64,
err error) {
cl.crawler.log.Infof("checking %s", entry.Signature)
url := entry.URI.String()
if entry.URI.Scheme == "file" {
filename, err := cl.crawler.mapping.Get(entry.URI)
if err != nil {
return nil, "", 0, 0, 0, emperror.Wrapf(err, "cannot map uri to filename - %v", entry.URI.String())
}
url = fmt.Sprintf("file:///%s", filepath.ToSlash(filename))
}
jsonstr, err := json.Marshal(map[string]string{"url": url})
if err != nil {
return nil, "", 0, 0, 0, emperror.Wrapf(err, "cannot marshal json")
}
resp, err := http.Post(cl.crawler.indexer, "application/json", bytes.NewBuffer(jsonstr))
if err != nil {
return nil, "", 0, 0,0 , emperror.Wrapf(err, "error calling call indexer")
}
defer resp.Body.Close()
bodyBytes, err := ioutil.ReadAll(resp.Body)
if err != nil {
return nil, "", 0, 0,0 , emperror.Wrapf(err, "error reading indexer result")
}
if err := json.Unmarshal(bodyBytes, &result); err != nil {
return nil, "", 0, 0, 0, emperror.Wrapf(err, "cannot unmarshal result")
}
mimetype, _ = result["mimetype"].(string)
_width, _ := result["width"].(float64)
width = int64(_width)
_height, _ := result["height"].(float64)
height = int64(_height)
_duration, _ := result["duration"].(float64)
duration = int64(_duration)
return
/*
// ************************************
// * get the first bytes of data
// ************************************
......@@ -157,4 +202,6 @@ func (cl *CrawlerLinkcheck) linkCheck(entry *memostream.MediaEntry, siegfried bo
}
return sfMatches, mimetype, "", nil
*/
}
......@@ -64,10 +64,51 @@ func (w Worker) Do(job *Job) error {
}
func (w Worker) linkcheck( entry *memostream.MediaEntry ) error {
sfMatches, mimetype, errMsg, err := w.cr.cl.linkCheck(entry, true)
metadata, mimetype, width, height, duration, err := w.cr.cl.linkCheck(entry, true)
if err != nil {
return emperror.Wrapf(err, "error checking entry %s", entry.Signature)
err := emperror.Wrapf(err, "error checking entry %s", entry.Signature)
sqlstr := fmt.Sprintf("UPDATE %s.entities SET lastcheck=NOW(), status=?, errormessage=?", w.cr.schema)
params := []interface{}{"error", err.Error()}
if _, err := w.cr.db.Exec(sqlstr, params...); err != nil {
return emperror.Wrapf(err, "error executing sql %s [%v]", sqlstr, params)
}
return err
}
metajson, err := json.Marshal(metadata)
if err != nil {
err := emperror.Wrapf(err, "cannot marshal metadata %v", metadata)
sqlstr := fmt.Sprintf("UPDATE %s.entities SET lastcheck=NOW(), status=?, errormessage=?", w.cr.schema)
params := []interface{}{"error", err.Error()}
if _, err := w.cr.db.Exec(sqlstr, params...); err != nil {
return emperror.Wrapf(err, "error executing sql %s [%v]", sqlstr, params)
}
return err
}
sqlstr := fmt.Sprintf("REPLACE INTO %s.metadata(sig, mimetype, width, height, duration, metadata, modificationtime) " +
" VALUES( ?, ?, ?, ?, ?, ?, NOW())",
w.cr.schema)
var params = []interface{}{entry.Signature, mimetype, width, height, duration, metajson}
if _, err := w.cr.db.Exec(sqlstr, params...); err != nil {
err := emperror.Wrapf(err, "error executing sql %s [%v]", sqlstr, params)
sqlstr := fmt.Sprintf("UPDATE %s.entities SET lastcheck=NOW(), status=?, errormessage=?", w.cr.schema)
params := []interface{}{"error", err.Error()}
if _, err := w.cr.db.Exec(sqlstr, params...); err != nil {
return emperror.Wrapf(err, "error executing sql %s [%v]", sqlstr, params)
}
return err
}
sqlstr = fmt.Sprintf("UPDATE %s.entities SET lastcheck=NOW(), status=?, errormessage=?", w.cr.schema)
params = []interface{}{"ok", ""}
if _, err := w.cr.db.Exec(sqlstr, params...); err != nil {
return emperror.Wrapf(err, "error executing sql %s [%v]", sqlstr, params)
}
return nil
/*
sqlstr := fmt.Sprintf("UPDATE %s.entities SET lastcheck=NOW()", w.cr.schema)
var statusStr string
......@@ -80,10 +121,10 @@ func (w Worker) linkcheck( entry *memostream.MediaEntry ) error {
// return emperror.Wrapf(err, "cannot linkCheck entry %s", entry.Signature)
} else {
statusStr = memostream.MediaStatusNum[memostream.Media_OK]
meta := &Metadata{SFMatches: sfMatches}
metajson, err := json.Marshal(meta)
//meta := &Metadata{SFMatches: metadata}
metajson, err := json.Marshal(metadata)
if err != nil {
return emperror.Wrapf(err, "cannot marshal %v", meta)
return emperror.Wrapf(err, "cannot marshal %v", metadata)
}
sqlstr += ", siegfried=?"
params = append(params, string(metajson))
......@@ -103,4 +144,5 @@ func (w Worker) linkcheck( entry *memostream.MediaEntry ) error {
return emperror.Wrapf(err, "error executing sql %s [%v]", sqlstr, params)
}
return nil
*/
}
\ No newline at end of file
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment