Commit a58d3e4e authored by Günter Hipler's avatar Günter Hipler
Browse files

initial commit to implement base memobase OAI functionality

parents
target
.classpath
.project
.settings/org.scala-ide.sdt.core.prefs
.settings/org.eclipse.core.resources.prefs
logs
/data
/GND*
.cache*
bin
*.jsonl
*log*.gz
*.tmp
gh/testdaten
.idea
node_modules
conf/development.conf
\ No newline at end of file
This diff is collapsed.
# memobase OAI Server implementation
import javax.inject.{Inject, Provider}
import play.api._
import play.api.http.DefaultHttpErrorHandler
import play.api.http.Status._
import play.api.libs.json.Json
import play.api.mvc.Results._
import play.api.mvc._
import play.api.routing.Router
import play.core.SourceMapper
import scala.concurrent._
/**
* Provides a stripped down error handler that does not use HTML in error pages, and
* prints out debugging output.
*
* https://www.playframework.com/documentation/latest/ScalaErrorHandling
*/
class ErrorHandler(environment: Environment,
configuration: Configuration,
sourceMapper: Option[SourceMapper] = None,
optionRouter: => Option[Router] = None)
extends DefaultHttpErrorHandler(environment,
configuration,
sourceMapper,
optionRouter) {
private val logger =
org.slf4j.LoggerFactory.getLogger("application.ErrorHandler")
// This maps through Guice so that the above constructor can call methods.
@Inject
def this(environment: Environment,
configuration: Configuration,
sourceMapper: OptionalSourceMapper,
router: Provider[Router]) = {
this(environment,
configuration,
sourceMapper.sourceMapper,
Some(router.get))
}
override def onClientError(request: RequestHeader,
statusCode: Int,
message: String): Future[Result] = {
logger.debug(
s"onClientError: statusCode = $statusCode, uri = ${request.uri}, message = $message")
Future.successful {
val result = statusCode match {
case BAD_REQUEST =>
Results.BadRequest(message)
case FORBIDDEN =>
Results.Forbidden(message)
case NOT_FOUND =>
Results.NotFound(message)
case clientError if statusCode >= 400 && statusCode < 500 =>
Results.Status(statusCode)
case nonClientError =>
val msg =
s"onClientError invoked with non client error status code $statusCode: $message"
throw new IllegalArgumentException(msg)
}
result
}
}
override protected def onDevServerError(
request: RequestHeader,
exception: UsefulException): Future[Result] = {
Future.successful(
InternalServerError(Json.obj("exception" -> exception.toString)))
}
override protected def onProdServerError(
request: RequestHeader,
exception: UsefulException): Future[Result] = {
Future.successful(InternalServerError)
}
}
import javax.inject.Inject
import play.api.OptionalDevContext
import play.api.http._
import play.api.mvc._
import play.api.mvc.request.RequestTarget
import play.api.routing.Router
import play.core.WebCommands
/**
* Handles all requests.
*
* https://www.playframework.com/documentation/latest/ScalaHttpRequestHandlers#extending-the-default-request-handler
*/
class RequestHandler @Inject()(webCommands: WebCommands,
optDevContext: OptionalDevContext,
router: Router,
errorHandler: HttpErrorHandler,
configuration: HttpConfiguration,
filters: HttpFilters)
extends DefaultHttpRequestHandler(webCommands,
optDevContext,
router,
errorHandler,
configuration,
filters) {
override def handlerForRequest(
request: RequestHeader): (RequestHeader, Handler) = {
super.handlerForRequest {
// ensures that REST API does not need a trailing "/"
if (isREST(request)) {
addTrailingSlash(request)
} else {
request
}
}
}
private def isREST(request: RequestHeader) = {
request.uri match {
case uri: String if uri.contains("post") => true
case _ => false
}
}
private def addTrailingSlash(origReq: RequestHeader): RequestHeader = {
if (!origReq.path.endsWith("/")) {
val path = origReq.path + "/"
if (origReq.rawQueryString.isEmpty) {
origReq.withTarget(
RequestTarget(path = path, uriString = path, queryString = Map())
)
} else {
origReq.withTarget(
RequestTarget(path = path,
uriString = origReq.uri,
queryString = origReq.queryString)
)
}
} else {
origReq
}
}
}
package controllers
import controllers.BindController.AgeRange
import javax.inject.Inject
import play.api.Configuration
import play.api.mvc.{AbstractController, ControllerComponents, QueryStringBindable, Rendering}
class BindController @Inject()(cc: ControllerComponents,
config: Configuration
//kafka:KafkaComponent
) extends AbstractController(cc) with Rendering {
def age(age:AgeRange) = Action {
Ok(age.from.asInstanceOf[String])
}
}
object BindController {
case class AgeRange(from: String, to: String) {}
implicit def queryStringBindable(implicit intBinder: QueryStringBindable[String]): QueryStringBindable[AgeRange] = new QueryStringBindable[AgeRange] {
override def bind(key: String, params: Map[String, Seq[String]]): Option[Either[String, AgeRange]] = {
for {
from <- intBinder.bind("from", params)
to <- intBinder.bind("to", params)
} yield {
(from, to) match {
case (Right(from), Right(to)) => Right(AgeRange(from, to))
case _ => Left("Unable to bind an AgeRange")
}
}
}
override def unbind(key: String, ageRange: AgeRange): String = {
intBinder.unbind("from", ageRange.from) + "&" + intBinder.unbind("to", ageRange.to)
}
}
}
\ No newline at end of file
package controllers
import javax.inject.Inject
import modules.OaiRepository
import org.swissbib.memobase.oai.request.OaiRequest
import org.swissbib.memobase.oai.common.verb.OaiVerb
import play.api.Configuration
import play.api.mvc.{AbstractController, Action, AnyContent, ControllerComponents, Rendering}
class OaiController @Inject()(cc: ControllerComponents,
config: Configuration,
repository:OaiRepository
) extends AbstractController(cc) with Rendering {
//todo: better way might be to use a so called BindController - have a look into this type part of this package
// where I played around a little bit with it
//https://www.playframework.com/documentation/2.8.x/RequestBinders
def handleRequest(verb: String,
metadataPrefix: Option[String],
set: Option[String],
from:Option[String],
until:Option[String],
identifier: Option[String],
resumptionToken: Option[String]): Action[AnyContent] =
Action { implicit request =>
//Todo has to be wrapped with Try because verb might be wrong
val enumVerb = OaiVerb.withName(verb)
val request = OaiRequest(enumVerb,
metadataPrefix,
set,
from,
until,
identifier,
resumptionToken
)
val response = request.execute(config, repository).createResponse
/*
todo: make Writable for Node[Seq]
[info] Compiling 4 Scala sources to /home/swissbib/environment/code/swissbib.repositories/memoriav/gitlab/services/oai/target/scala-2.13/classes ...
[error] /home/swissbib/environment/code/swissbib.repositories/memoriav/gitlab/services/oai/app/controllers/OaiController.scala:44:9: Cannot write an instance of Seq[scala.xml.Node] to HTTP response. Try to define a Writeable[Seq[scala.xml.Node]]
[error] Ok(response.createResponse).as("text/xml")
*/
Ok(response.toString()).as("text/xml")
}
def fallback: Action[AnyContent] =
Action { implicit request =>
Ok(s"hello from fallback")
}
}
\ No newline at end of file
package modules
import java.io.IOException
import java.util
import com.typesafe.config.{Config, ConfigObject}
import javax.inject.{Inject, Singleton}
import org.apache.http.{Header, HttpHost}
import org.apache.http.message.BasicHeader
import org.elasticsearch.action.admin.cluster.storedscripts.PutStoredScriptRequest
import org.elasticsearch.client.{RequestOptions, RestClient, RestHighLevelClient}
import org.elasticsearch.common.bytes.BytesReference
import org.elasticsearch.common.xcontent.{XContentFactory, XContentType}
import org.swissbib.memobase.oai.common.util.{OaiCommonConfig, OaiConfig, OaiConfigMetadataPrefixes, OaiConfigSets, OaiIdentifyConfig}
import play.Environment
import play.api.inject.ApplicationLifecycle
import utilities.FileUtil
import scala.jdk.CollectionConverters._
import scala.collection.mutable.ArrayBuffer
import scala.concurrent.Future
@Singleton
class ElasticsearchClient @Inject()(
lifecycle: ApplicationLifecycle,
private val config: Config,
private val env: Environment
) extends ElasticsearchComponent {
lifecycle.addStopHook(() => {
//Future.successful(client.get.close())
Future.successful(client.getOrElse(Option.empty))
})
//val client: Option[RestHighLevelClient] = connect()
override val client: Option[RestHighLevelClient] = None
//val oai = loadOaiConfig()
override val oaiConfig: OaiConfig = loadOaiConfig()
//uploadTemplates()
private def loadOaiConfig(): OaiConfig = {
val sI = config.getObject("oaiconfigs.identify").toConfig
//todo: make this as implicit Conversion
val identifyConfig = OaiIdentifyConfig(
earliestDatestamp = sI.getString("earliestDatestamp"),
deletedRecord = sI.getString("deletedRecord"),
adminEmail = sI.getString("adminEmail"),
repositoryName = sI.getString("repositoryName"),
baseUrl = sI.getString("baseURL"),
granularity = sI.getString("granularity"),
protocolVersion = sI.getString("protocolVersion"))
val sC = config.getObject("oaiconfigs.common").toConfig
val commonConfig = OaiCommonConfig(
xsi_schemaLocation = sC.getString("xsi_schemaLocation")
)
//collection conversions
//https://stackoverflow.com/questions/8301947/what-is-the-difference-between-javaconverters-and-javaconversions-in-scala
val oaiConfigSet = config.getStringList("oaiconfigs.sets").asScala.toSeq
val oaiConfigPrefixes = config.getStringList("oaiconfigs.metadataPrefix").asScala.toSeq
OaiConfig(
commonConfig,
identifyConfig,
OaiConfigSets(oaiConfigSet),
OaiConfigMetadataPrefixes(oaiConfigPrefixes) )
}
private def connect(): Option[RestHighLevelClient] = {
val hosts = new ArrayBuffer[HttpHost]
config.getStringList("index.hosts").forEach(
value => {
val hostPort = value.split(":")
hosts += new HttpHost(hostPort(0), hostPort(1).toInt)
}
)
val headers = Array(new BasicHeader("cluster.name", config.getString("index.cluster")).asInstanceOf[Header])
Option(new RestHighLevelClient(RestClient.builder(hosts.toArray : _*).setDefaultHeaders(headers)))
}
private def uploadTemplates(): Unit = {
config.getStringList("index.templatequeries").forEach((templateName: String) => {
val template = FileUtil.readFile(templateName, env)
val nameWithoutPath = templateName.substring(templateName.lastIndexOf("/") + 1).replaceAll(".mustache", "")
val templatePrefix = config.getString("index.template_prefix")
try {
val request = new PutStoredScriptRequest
request.id(templatePrefix + "_" + nameWithoutPath)
val builder = XContentFactory.jsonBuilder
builder.startObject
builder.startObject("script")
builder.field("lang", "mustache")
// load mustache source as string to allow invalid JSON template features!
builder.field("source", template.replace("\n", ""))
builder.endObject
builder.endObject
request.content(BytesReference.bytes(builder), XContentType.JSON)
client.get.putScript(request, RequestOptions.DEFAULT)
} catch {
case io: IOException =>
//TODO: if the templates cannot be read the application should shut down.
io.printStackTrace()
}
})
}
}
package modules
import org.elasticsearch.client.RestHighLevelClient
trait ElasticsearchComponent extends OaiRepository {
val client: Option[RestHighLevelClient]
}
package modules
import com.google.inject.AbstractModule
class ElasticsearchModule extends AbstractModule{
override def configure(): Unit = {
/*
bind(classOf[ElasticsearchComponent])
.to(classOf[ElasticsearchClient])
.asEagerSingleton()
*/
bind(classOf[OaiRepository])
.to(classOf[ElasticsearchClient])
.asEagerSingleton()
}
}
package modules
import com.typesafe.config.Config
import javax.inject.Inject
import play.api.Environment
import play.api.inject.ApplicationLifecycle
trait KafkaComponent {
}
class KafkaComponentImpl @Inject()(lifecycle: ApplicationLifecycle,
config: Config,
environment: Environment) extends KafkaComponent {
}
package modules
import com.google.inject.AbstractModule
import javax.inject._
import net.codingwell.scalaguice.ScalaModule
import play.api.{Configuration, Environment}
/**
* Sets up custom components for Play.
*
* https://www.playframework.com/documentation/latest/ScalaDependencyInjection
*/
class KafkaModule(environment: Environment, configuration: Configuration)
extends AbstractModule
with ScalaModule {
//todo: look at the differences between instantiation of Elasticsearch module compared to the Kafka way here
override def configure(): Unit = {
//bind[PostRepository].to[PostRepositoryImpl].in[Singleton]
//bind[SolrComponent].to[SolrComponentImpl].in[Singleton]
bind[KafkaComponent].to[KafkaComponentImpl].in[Singleton]
}
}
package modules
import org.swissbib.memobase.oai.common.util.OaiConfig
trait OaiRepository {
val oaiConfig: OaiConfig
}
package org.swissbib.memobase.oai.common.util
case class OaiConfig(common: OaiCommonConfig,
identify: OaiIdentifyConfig,
sets: OaiConfigSets,
prefixes: OaiConfigMetadataPrefixes)
case class OaiCommonConfig(
xsi_schemaLocation: String
)
case class OaiIdentifyConfig(earliestDatestamp: String,
deletedRecord: String,
adminEmail: String,
repositoryName: String,
baseUrl: String,
granularity: String,
protocolVersion: String
)
case class OaiConfigSets(sets: Seq[String])
case class OaiConfigMetadataPrefixes(prefixes: Seq[String])
package org.swissbib.memobase.oai.common.util
class RecordHeader {
}
package org.swissbib.memobase.oai.common.util
class ResumptionToken {
}
package org.swissbib.memobase.oai.common.verb
object OaiVerb extends Enumeration {
type OaiVerb = Value
protected case class OaiVal(verb: String) extends super.Val {
//https://www.scala-lang.org/api/current/scala/Enumeration.html
}
import scala.language.implicitConversions
implicit def getVerb(x: Value): String = x.asInstanceOf[OaiVal].verb
val ListRecords: OaiVal = OaiVal("ListRecords")
val ListSets: OaiVal = OaiVal("ListSets")
val ListIdentifiers: OaiVal = OaiVal("ListIdentifiers")
val GetRecord: OaiVal = OaiVal("GetRecord")
val ListMetadataFormats: OaiVal = OaiVal("ListMetadataFormats")
val Identify: OaiVal = OaiVal("Identify")
}
<
package org.swissbib.memobase.oai.request
import modules.OaiRepository
import org.swissbib.memobase.oai.response.OaiResponse
import org.swissbib.memobase.oai.common.verb.OaiVerb
import org.swissbib.memobase.oai.common.verb.OaiVerb.OaiVerb
import org.swissbib.memobase.oai.runner.{GetRecordRunner, IdentifyRunner,
ListIdentifiersRunner, ListMetadataFormatsRunner, ListRecordsRunner, ListSetsRunner}
import play.api.Configuration
case class OaiRequest(verb:OaiVerb,
metadataPrefix: Option[String],
set: Option[String],
from:Option[String],
until:Option[String],
identifier: Option[String],
resumptionToken: Option[String]) {
def hasNecessaryArguments:Boolean = {
def checkArgumentsGetRecord: Boolean = {
/*
https://www.openarchives.org/OAI/openarchivesprotocol.html#GetRecord
**identifier** a required argument that specifies the unique identifier of the item in the repository from which the record must be disseminated.
**metadataPrefix** a required argument that specifies the metadataPrefix of the format that should be included in the metadata part of the returned record . A record should only be returned if the format specified by the metadataPrefix can be disseminated from the item identified by the value of the identifier argument. The metadata formats supported by a repository and for a particular record can be retrieved using the ListMetadataFormats request.
Error and Exception Conditions
badArgument - The request includes illegal arguments or is missing required arguments.
cannotDisseminateFormat - The value of the metadataPrefix argument is not supported by the item identified by the value of the identifier argument.
idDoesNotExist - The value of the identifier argument is unknown or illegal in this repository.
*/
this match {
case OaiRequest(_,Some(metadataPrefix),None, None,None,Some(ident),None) => true
case _ => false
}
}
def checkArgumentsIdentify: Boolean = {
/*
Error and Exception Conditions
badArgument - The request includes illegal arguments.
Response Format
The response must include one instance of the following elements:
repositoryName : a human readable name for the repository;
baseURL : the base URL of the repository;
protocolVersion : the version of the OAI-PMH supported by the repository;
earliestDatestamp : a UTCdatetime that is the guaranteed lower limit of all datestamps recording changes, modifications, or deletions in the repository. A repository must not use datestamps lower than the one specified by the content of the earliestDatestamp element. earliestDatestamp must be expressed at the finest granularity supported by the repository.
deletedRecord : the manner in which the repository supports the notion of deleted records. Legitimate values are no ; transient ; persistent with meanings defined in the section on deletion.
granularity: the finest harvesting granularity supported by the repository. The legitimate values are YYYY-MM-DD and YYYY-MM-DDThh:mm:ssZ with meanings as defined in ISO8601.
*/
//no additional arguments allowed
this match {
case OaiRequest(_,None,None, None,None,None,None) => true
case _ => false
}
}
def checkArgumentsListIdentifiers: Boolean = {
/*