├── Makefile ├── .gitignore ├── conf.sample.json ├── cnchook ├── types.go ├── tools.go ├── profiles │ ├── cncResource.go │ └── components │ │ ├── bibliographicInfo.go │ │ └── dataInfo.go ├── conversion.go └── cnchook.go ├── general └── info.go ├── cncdb ├── cnf.go ├── scripts │ └── schema.sql ├── cncdb_test.go └── cncdb.go ├── oaipmh ├── formats │ ├── common.go │ ├── dc.go │ └── cmdi.go ├── common.go ├── error.go ├── structs.go ├── reqresp.go ├── args.go └── handler.go ├── go.mod ├── scripts ├── triggers.sql └── triggers_cnc.sql ├── cnf └── conf.go ├── vlo.go ├── go.sum └── LICENSE /Makefile: -------------------------------------------------------------------------------- 1 | VERSION=`git describe --tags --always` 2 | BUILD=`date +%FT%T%z` 3 | HASH=`git rev-parse --short HEAD` 4 | 5 | 6 | LDFLAGS=-ldflags "-w -s -X main.version=${VERSION} -X main.buildDate=${BUILD} -X main.gitCommit=${HASH}" 7 | 8 | all: test build 9 | 10 | build: 11 | go build ${LDFLAGS} 12 | 13 | clean: 14 | rm ./cnc-vlo 15 | 16 | test: 17 | go test ./... 18 | 19 | .PHONY: clean install test build -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # If you prefer the allow list template instead of the deny list, see community template: 2 | # https://github.com/github/gitignore/blob/main/community/Golang/Go.AllowList.gitignore 3 | # 4 | # Binaries for programs and plugins 5 | *.exe 6 | *.exe~ 7 | *.dll 8 | *.so 9 | *.dylib 10 | 11 | # Test binary, built with `go test -c` 12 | *.test 13 | 14 | # Output of the go coverage tool, specifically when used with LiteIDE 15 | *.out 16 | 17 | # Dependency directories (remove the comment below to include it) 18 | # vendor/ 19 | 20 | # Go workspace file 21 | go.work 22 | -------------------------------------------------------------------------------- /conf.sample.json: -------------------------------------------------------------------------------- 1 | { 2 | "listenAddress" : "127.0.0.1", 3 | "listenPort": 8080, 4 | "serverReadTimeoutSecs": 120, 5 | "serverWriteTimeoutSecs": 60, 6 | "logging": { 7 | "level": "debug" 8 | }, 9 | "timeZone": "UTC", 10 | "cncDb": { 11 | "host": "localhost:3306", 12 | "user": "kontext", 13 | "passwd": "kontext-secret", 14 | "db": "kontext", 15 | "overrides": { 16 | "corporaTableName": "corpora", 17 | "userTableName": "user", 18 | "userTableFirstNameCol": "firstName", 19 | "userTableLastNameCol": "surname" 20 | } 21 | }, 22 | "repositoryInfo": { 23 | "name": "CNC metadata repository", 24 | "baseUrl": "http://localhost:8080", 25 | "adminEmail": ["admin@cnc.cz"] 26 | }, 27 | "metadataValues": { 28 | "publisher": "UCNK" 29 | } 30 | } 31 | -------------------------------------------------------------------------------- /cnchook/types.go: -------------------------------------------------------------------------------- 1 | // Copyright 2024 Martin Zimandl 2 | // Copyright 2024 Institute of the Czech National Corpus, 3 | // Faculty of Arts, Charles University 4 | // 5 | // Licensed under the Apache License, Version 2.0 (the "License"); 6 | // you may not use this file except in compliance with the License. 7 | // You may obtain a copy of the License at 8 | // 9 | // http://www.apache.org/licenses/LICENSE-2.0 10 | // 11 | // Unless required by applicable law or agreed to in writing, software 12 | // distributed under the License is distributed on an "AS IS" BASIS, 13 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | // See the License for the specific language governing permissions and 15 | // limitations under the License. 16 | 17 | package cnchook 18 | 19 | type MetadataType string 20 | 21 | const ( 22 | CorpusMetadataType MetadataType = "corpus" 23 | ServiceMetadataType MetadataType = "service" 24 | ) 25 | -------------------------------------------------------------------------------- /general/info.go: -------------------------------------------------------------------------------- 1 | // Copyright 2024 Martin Zimandl 2 | // Copyright 2024 Institute of the Czech National Corpus, 3 | // Faculty of Arts, Charles University 4 | // 5 | // Licensed under the Apache License, Version 2.0 (the "License"); 6 | // you may not use this file except in compliance with the License. 7 | // You may obtain a copy of the License at 8 | // 9 | // http://www.apache.org/licenses/LICENSE-2.0 10 | // 11 | // Unless required by applicable law or agreed to in writing, software 12 | // distributed under the License is distributed on an "AS IS" BASIS, 13 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | // See the License for the specific language governing permissions and 15 | // limitations under the License. 16 | 17 | package general 18 | 19 | // VersionInfo provides a detailed information about the actual build 20 | type VersionInfo struct { 21 | Version string `json:"version"` 22 | BuildDate string `json:"buildDate"` 23 | GitCommit string `json:"gitCommit"` 24 | } 25 | -------------------------------------------------------------------------------- /cncdb/cnf.go: -------------------------------------------------------------------------------- 1 | // Copyright 2024 Martin Zimandl 2 | // Copyright 2024 Institute of the Czech National Corpus, 3 | // Faculty of Arts, Charles University 4 | // 5 | // Licensed under the Apache License, Version 2.0 (the "License"); 6 | // you may not use this file except in compliance with the License. 7 | // You may obtain a copy of the License at 8 | // 9 | // http://www.apache.org/licenses/LICENSE-2.0 10 | // 11 | // Unless required by applicable law or agreed to in writing, software 12 | // distributed under the License is distributed on an "AS IS" BASIS, 13 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | // See the License for the specific language governing permissions and 15 | // limitations under the License. 16 | 17 | package cncdb 18 | 19 | type DatabaseSetup struct { 20 | Host string `json:"host"` 21 | User string `json:"user"` 22 | Passwd string `json:"passwd"` 23 | Name string `json:"db"` 24 | Overrides DBOverrides `json:"overrides"` 25 | PublicCorplistID int `json:"publicCorplistId"` 26 | } 27 | -------------------------------------------------------------------------------- /oaipmh/formats/common.go: -------------------------------------------------------------------------------- 1 | // Copyright 2024 Martin Zimandl 2 | // Copyright 2024 Institute of the Czech National Corpus, 3 | // Faculty of Arts, Charles University 4 | // 5 | // Licensed under the Apache License, Version 2.0 (the "License"); 6 | // you may not use this file except in compliance with the License. 7 | // You may obtain a copy of the License at 8 | // 9 | // http://www.apache.org/licenses/LICENSE-2.0 10 | // 11 | // Unless required by applicable law or agreed to in writing, software 12 | // distributed under the License is distributed on an "AS IS" BASIS, 13 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | // See the License for the specific language governing permissions and 15 | // limitations under the License. 16 | 17 | package formats 18 | 19 | // note - omitempties are optional 20 | 21 | type MultilangElement struct { 22 | Lang string `xml:"xml:lang,attr,omitempty"` 23 | Value string `xml:",chardata"` 24 | } 25 | 26 | type MultilangArray []MultilangElement 27 | 28 | func (d *MultilangArray) Add(value string, lang string) { 29 | *d = append(*d, MultilangElement{Value: value, Lang: lang}) 30 | } 31 | 32 | type TypedElement struct { 33 | Type string `xml:"type,attr,omitempty"` 34 | Value string `xml:",chardata"` 35 | } 36 | -------------------------------------------------------------------------------- /oaipmh/common.go: -------------------------------------------------------------------------------- 1 | // Copyright 2024 Martin Zimandl 2 | // Copyright 2024 Institute of the Czech National Corpus, 3 | // Faculty of Arts, Charles University 4 | // 5 | // Licensed under the Apache License, Version 2.0 (the "License"); 6 | // you may not use this file except in compliance with the License. 7 | // You may obtain a copy of the License at 8 | // 9 | // http://www.apache.org/licenses/LICENSE-2.0 10 | // 11 | // Unless required by applicable law or agreed to in writing, software 12 | // distributed under the License is distributed on an "AS IS" BASIS, 13 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | // See the License for the specific language governing permissions and 15 | // limitations under the License. 16 | 17 | package oaipmh 18 | 19 | import ( 20 | "encoding/xml" 21 | "net/http" 22 | "net/url" 23 | 24 | "github.com/rs/zerolog/log" 25 | ) 26 | 27 | func getTypedArg[T ~string](args url.Values, name string) T { 28 | return T(args.Get(name)) 29 | } 30 | 31 | func writeXMLResponse(w http.ResponseWriter, code int, value any) { 32 | xmlAns, err := xml.Marshal(value) 33 | if err != nil { 34 | log.Err(err).Msg("failed to encode a result to XML") 35 | http.Error(w, err.Error(), http.StatusInternalServerError) 36 | return 37 | } 38 | w.WriteHeader(code) 39 | _, err = w.Write([]byte(xml.Header + string(xmlAns))) 40 | if err != nil { 41 | log.Err(err).Msg("failed to write XML to response") 42 | http.Error(w, err.Error(), http.StatusInternalServerError) 43 | } 44 | w.Header().Set("Content-Type", "text/xml") 45 | } 46 | -------------------------------------------------------------------------------- /oaipmh/error.go: -------------------------------------------------------------------------------- 1 | // Copyright 2024 Martin Zimandl 2 | // Copyright 2024 Institute of the Czech National Corpus, 3 | // Faculty of Arts, Charles University 4 | // 5 | // Licensed under the Apache License, Version 2.0 (the "License"); 6 | // you may not use this file except in compliance with the License. 7 | // You may obtain a copy of the License at 8 | // 9 | // http://www.apache.org/licenses/LICENSE-2.0 10 | // 11 | // Unless required by applicable law or agreed to in writing, software 12 | // distributed under the License is distributed on an "AS IS" BASIS, 13 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | // See the License for the specific language governing permissions and 15 | // limitations under the License. 16 | 17 | package oaipmh 18 | 19 | type OAIPMHErrorCode string 20 | 21 | const ( 22 | // http://www.openarchives.org/OAI/openarchivesprotocol.html#ErrorConditions 23 | ErrorCodeBadArgument OAIPMHErrorCode = "badArgument" 24 | ErrorCodeBadResumptionToken OAIPMHErrorCode = "badResumptionToken" 25 | ErrorCodeBadVerb OAIPMHErrorCode = "badVerb" 26 | ErrorCodeCannotDisseminateFormat OAIPMHErrorCode = "cannotDisseminateFormat" 27 | ErrorCodeIDDoesNotExist OAIPMHErrorCode = "idDoesNotExist" 28 | ErrorCodeNoRecordsMatch OAIPMHErrorCode = "noRecordsMatch" 29 | ErrorCodeNoMetadataFormats OAIPMHErrorCode = "noMetadataFormats" 30 | ErrorCodeNoSetHierarchy OAIPMHErrorCode = "noSetHierarchy" 31 | ) 32 | 33 | type OAIPMHError struct { 34 | Code OAIPMHErrorCode `xml:"code,attr"` 35 | Message string `xml:",chardata"` 36 | } 37 | -------------------------------------------------------------------------------- /cnchook/tools.go: -------------------------------------------------------------------------------- 1 | // Copyright 2024 Martin Zimandl 2 | // Copyright 2024 Institute of the Czech National Corpus, 3 | // Faculty of Arts, Charles University 4 | // 5 | // Licensed under the Apache License, Version 2.0 (the "License"); 6 | // you may not use this file except in compliance with the License. 7 | // You may obtain a copy of the License at 8 | // 9 | // http://www.apache.org/licenses/LICENSE-2.0 10 | // 11 | // Unless required by applicable law or agreed to in writing, software 12 | // distributed under the License is distributed on an "AS IS" BASIS, 13 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | // See the License for the specific language governing permissions and 15 | // limitations under the License. 16 | 17 | package cnchook 18 | 19 | import ( 20 | "fmt" 21 | "strings" 22 | 23 | "github.com/czcorpus/cnc-vlo/cncdb" 24 | "github.com/czcorpus/cnc-vlo/cnchook/profiles/components" 25 | ) 26 | 27 | func getAuthorList(data *cncdb.DBData) []components.AuthorComponent { 28 | authors := []components.AuthorComponent{} 29 | for _, author := range strings.Split(strings.ReplaceAll(data.Authors, "\r\n", "\n"), "\n") { 30 | sAuthor := strings.Split(strings.Trim(author, " "), " ") 31 | if len(sAuthor) == 1 { 32 | authors = append(authors, components.AuthorComponent{LastName: sAuthor[0]}) 33 | } else if len(sAuthor) > 1 { 34 | authors = append(authors, components.AuthorComponent{FirstName: sAuthor[0], LastName: sAuthor[1]}) 35 | } 36 | } 37 | return authors 38 | } 39 | 40 | func getKontextPath(corpusID string) string { 41 | return fmt.Sprintf("https://www.korpus.cz/kontext/query?corpname=%s", corpusID) 42 | } 43 | -------------------------------------------------------------------------------- /cncdb/scripts/schema.sql: -------------------------------------------------------------------------------- 1 | CREATE TABLE vlo_metadata_corpus ( 2 | id int(11) PRIMARY KEY NOT NULL AUTO_INCREMENT, 3 | corpus_name varchar(63) NOT NULL, 4 | CONSTRAINT vlo_metadata_corpus_corpus_name_fk FOREIGN KEY (corpus_name) REFERENCES kontext_corpus(name) ON DELETE CASCADE ON UPDATE CASCADE, 5 | UNIQUE (corpus_name) 6 | ) ENGINE=InnoDB DEFAULT CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci; 7 | 8 | CREATE TABLE vlo_metadata_service ( 9 | id int(11) PRIMARY KEY NOT NULL AUTO_INCREMENT, 10 | name varchar(255) NOT NULL, 11 | link varchar(255) NOT NULL, 12 | UNIQUE (name) 13 | ) ENGINE=InnoDB DEFAULT CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci; 14 | 15 | CREATE TABLE vlo_metadata_common ( 16 | id int(11) PRIMARY KEY NOT NULL AUTO_INCREMENT, 17 | created TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP, 18 | updated TIMESTAMP NOT NULL ON UPDATE CURRENT_TIMESTAMP, 19 | deleted TINYINT(1) DEFAULT 0, 20 | hosted TINYINT(1) DEFAULT 0, 21 | type ENUM('corpus', 'service') NOT NULL, 22 | desc_en TEXT, 23 | desc_cs TEXT, 24 | date_issued VARCHAR(255) NOT NULL, 25 | license_info VARCHAR(255) NOT NULL, 26 | contact_user_id INT(11) NOT NULL, 27 | authors TEXT NOT NULL, 28 | corpus_metadata_id INT, 29 | service_metadata_id INT, 30 | CONSTRAINT vlo_metadata_common_contact_user_id_fk FOREIGN KEY (contact_user_id) REFERENCES kontext_user(id) ON DELETE RESTRICT ON UPDATE RESTRICT, 31 | CONSTRAINT vlo_metadata_common_corpus_metadata_id_fk FOREIGN KEY (corpus_metadata_id) REFERENCES vlo_metadata_corpus(id) ON DELETE CASCADE ON UPDATE RESTRICT, 32 | CONSTRAINT vlo_metadata_common_service_metadata_id_fk FOREIGN KEY (service_metadata_id) REFERENCES vlo_metadata_service(id) ON DELETE CASCADE ON UPDATE RESTRICT 33 | ) ENGINE=InnoDB DEFAULT CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci; -------------------------------------------------------------------------------- /go.mod: -------------------------------------------------------------------------------- 1 | module github.com/czcorpus/cnc-vlo 2 | 3 | go 1.21 4 | 5 | toolchain go1.23.0 6 | 7 | require ( 8 | github.com/czcorpus/cnc-gokit v0.11.0 9 | github.com/gin-gonic/gin v1.9.1 10 | github.com/go-sql-driver/mysql v1.8.0 11 | github.com/rs/zerolog v1.31.0 12 | github.com/stretchr/testify v1.9.0 13 | golang.org/x/text v0.14.0 14 | ) 15 | 16 | require ( 17 | filippo.io/edwards25519 v1.1.0 // indirect 18 | github.com/BurntSushi/toml v1.4.0 // indirect 19 | github.com/bytedance/sonic v1.9.1 // indirect 20 | github.com/chenzhuoyu/base64x v0.0.0-20221115062448-fe3a3abad311 // indirect 21 | github.com/davecgh/go-spew v1.1.1 // indirect 22 | github.com/gabriel-vasile/mimetype v1.4.2 // indirect 23 | github.com/gin-contrib/sse v0.1.0 // indirect 24 | github.com/go-playground/locales v0.14.1 // indirect 25 | github.com/go-playground/universal-translator v0.18.1 // indirect 26 | github.com/go-playground/validator/v10 v10.14.1 // indirect 27 | github.com/goccy/go-json v0.10.2 // indirect 28 | github.com/json-iterator/go v1.1.12 // indirect 29 | github.com/klauspost/cpuid/v2 v2.2.5 // indirect 30 | github.com/leodido/go-urn v1.2.4 // indirect 31 | github.com/mattn/go-colorable v0.1.13 // indirect 32 | github.com/mattn/go-isatty v0.0.19 // indirect 33 | github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd // indirect 34 | github.com/modern-go/reflect2 v1.0.2 // indirect 35 | github.com/natefinch/lumberjack v2.0.0+incompatible // indirect 36 | github.com/pelletier/go-toml/v2 v2.0.8 // indirect 37 | github.com/pmezard/go-difflib v1.0.0 // indirect 38 | github.com/twitchyliquid64/golang-asm v0.15.1 // indirect 39 | github.com/ugorji/go/codec v1.2.11 // indirect 40 | golang.org/x/arch v0.3.0 // indirect 41 | golang.org/x/crypto v0.21.0 // indirect 42 | golang.org/x/net v0.23.0 // indirect 43 | golang.org/x/sys v0.18.0 // indirect 44 | google.golang.org/protobuf v1.33.0 // indirect 45 | gopkg.in/natefinch/lumberjack.v2 v2.2.1 // indirect 46 | gopkg.in/yaml.v2 v2.4.0 // indirect 47 | gopkg.in/yaml.v3 v3.0.1 // indirect 48 | ) 49 | -------------------------------------------------------------------------------- /cnchook/profiles/cncResource.go: -------------------------------------------------------------------------------- 1 | // Copyright 2024 Martin Zimandl 2 | // Copyright 2024 Institute of the Czech National Corpus, 3 | // Faculty of Arts, Charles University 4 | // 5 | // Licensed under the Apache License, Version 2.0 (the "License"); 6 | // you may not use this file except in compliance with the License. 7 | // You may obtain a copy of the License at 8 | // 9 | // http://www.apache.org/licenses/LICENSE-2.0 10 | // 11 | // Unless required by applicable law or agreed to in writing, software 12 | // distributed under the License is distributed on an "AS IS" BASIS, 13 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | // See the License for the specific language governing permissions and 15 | // limitations under the License. 16 | 17 | package profiles 18 | 19 | import ( 20 | "fmt" 21 | 22 | "github.com/czcorpus/cnc-vlo/cnchook/profiles/components" 23 | "github.com/czcorpus/cnc-vlo/oaipmh/formats" 24 | ) 25 | 26 | // note - omitempties are optional 27 | // profile is derived from LINDAT_CLARIN profile 28 | 29 | const CNCResourceProfileID = "clarin.eu:cr1:p_1712653174418" 30 | 31 | type CNCResourceProfile struct { 32 | BibliographicInfo components.BibliographicInfoComponent `xml:"cmdp:CNC_Resource>cmdp:bibliographicInfo"` 33 | DataInfo components.DataInfoComponent `xml:"cmdp:CNC_Resource>cmdp:dataInfo"` 34 | LicenseInfo []LicenseElement `xml:"cmdp:CNC_Resource>cmdp:licenseInfo>cmdp:license"` 35 | RelationsInfo *[]formats.TypedElement `xml:"cmdp:CNC_Resource>cmdp:relationsInfo>cmdp:relation,omitempty"` 36 | } 37 | 38 | func (c *CNCResourceProfile) GetSchemaURL() string { 39 | return fmt.Sprintf("http://www.clarin.eu/cmd/1/profiles/%s", CNCResourceProfileID) 40 | } 41 | 42 | func (c *CNCResourceProfile) GetSchemaLocation() []string { 43 | return []string{ 44 | c.GetSchemaURL(), 45 | fmt.Sprintf("https://catalog.clarin.eu/ds/ComponentRegistry/rest/registry/1.x/profiles/%s/xsd", CNCResourceProfileID), 46 | } 47 | } 48 | 49 | type LicenseElement struct { 50 | Name string `xml:"cmdp:name,omitempty"` 51 | URI string `xml:"cmdp:uri"` 52 | } 53 | -------------------------------------------------------------------------------- /cncdb/cncdb_test.go: -------------------------------------------------------------------------------- 1 | // Copyright 2024 Tomas Machalek 2 | // Copyright 2024 Institute of the Czech National Corpus, 3 | // Faculty of Arts, Charles University 4 | // 5 | // Licensed under the Apache License, Version 2.0 (the "License"); 6 | // you may not use this file except in compliance with the License. 7 | // You may obtain a copy of the License at 8 | // 9 | // http://www.apache.org/licenses/LICENSE-2.0 10 | // 11 | // Unless required by applicable law or agreed to in writing, software 12 | // distributed under the License is distributed on an "AS IS" BASIS, 13 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | // See the License for the specific language governing permissions and 15 | // limitations under the License. 16 | 17 | package cncdb 18 | 19 | import ( 20 | "testing" 21 | 22 | "github.com/stretchr/testify/assert" 23 | "golang.org/x/text/language" 24 | ) 25 | 26 | func TestParseLocaleOK(t *testing.T) { 27 | var h CNCMySQLHandler 28 | tag, err := h.parseLocale("en_US") 29 | assert.NoError(t, err) 30 | b, conf := tag.Base() 31 | assert.Equal(t, language.Exact, conf) 32 | assert.Equal(t, "en", b.String()) 33 | reg, conf := tag.Region() 34 | assert.Equal(t, language.Exact, conf) 35 | assert.Equal(t, "US", reg.String()) 36 | } 37 | 38 | func TestParseLocaleOKWithEncoding(t *testing.T) { 39 | var h CNCMySQLHandler 40 | tag, err := h.parseLocale("en_US.UTF-8") 41 | assert.NoError(t, err) 42 | b, conf := tag.Base() 43 | assert.Equal(t, language.Exact, conf) 44 | assert.Equal(t, "en", b.String()) 45 | reg, conf := tag.Region() 46 | assert.Equal(t, language.Exact, conf) 47 | assert.Equal(t, "US", reg.String()) 48 | } 49 | 50 | func TestParseLocaleOKBase(t *testing.T) { 51 | var h CNCMySQLHandler 52 | tag, err := h.parseLocale("cs") 53 | assert.NoError(t, err) 54 | b, conf := tag.Base() 55 | assert.Equal(t, language.Exact, conf) 56 | assert.Equal(t, "cs", b.String()) 57 | reg, conf := tag.Region() 58 | assert.Equal(t, language.Low, conf) 59 | assert.Equal(t, "CZ", reg.String()) 60 | } 61 | 62 | func TestParseLocaleBroken(t *testing.T) { 63 | var h CNCMySQLHandler 64 | tag, err := h.parseLocale("en_EN") 65 | assert.NoError(t, err) 66 | b, conf := tag.Base() 67 | assert.Equal(t, language.Exact, conf) 68 | assert.Equal(t, "en", b.String()) 69 | reg, conf := tag.Region() 70 | assert.Equal(t, language.Low, conf) 71 | assert.Equal(t, "US", reg.String()) 72 | } 73 | -------------------------------------------------------------------------------- /cnchook/profiles/components/bibliographicInfo.go: -------------------------------------------------------------------------------- 1 | // Copyright 2024 Martin Zimandl 2 | // Copyright 2024 Institute of the Czech National Corpus, 3 | // Faculty of Arts, Charles University 4 | // 5 | // Licensed under the Apache License, Version 2.0 (the "License"); 6 | // you may not use this file except in compliance with the License. 7 | // You may obtain a copy of the License at 8 | // 9 | // http://www.apache.org/licenses/LICENSE-2.0 10 | // 11 | // Unless required by applicable law or agreed to in writing, software 12 | // distributed under the License is distributed on an "AS IS" BASIS, 13 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | // See the License for the specific language governing permissions and 15 | // limitations under the License. 16 | 17 | package components 18 | 19 | import "github.com/czcorpus/cnc-vlo/oaipmh/formats" 20 | 21 | // note - omitempties are optional 22 | 23 | type BibliographicInfoComponent struct { 24 | ProjectUrl string `xml:"cmdp:projectUrl,omitempty"` 25 | Version string `xml:"cmdp:version,omitempty"` 26 | Titles formats.MultilangArray `xml:"cmdp:titles>cmdp:title"` 27 | Authors []AuthorComponent `xml:"cmdp:authors>cmdp:author"` 28 | Dates *DatesComponent `xml:"cmdp:dates,omitempty"` 29 | Identifiers []formats.TypedElement `xml:"cmdp:identifiers>cmdp:identifier"` 30 | Funds *[]FundingComponent `xml:"cmdp:funding>cmdp:funds,omitempty"` 31 | ContactPerson ContactPersonComponent `xml:"cmdp:contactPerson"` 32 | Publishers []string `xml:"cmdp:publishers>cmdp:publisher"` 33 | } 34 | 35 | type AuthorComponent struct { 36 | LastName string `xml:"cmdp:lastName"` 37 | FirstName string `xml:"cmdp:firstName,omitempty"` 38 | } 39 | 40 | type DatesComponent struct { 41 | Dates []formats.TypedElement `xml:"cmdp:date,omitempty"` // type is value scheme 42 | DateIssued string `xml:"cmdp:dateIssued,omitempty"` 43 | } 44 | 45 | type FundingComponent struct { 46 | Organization string `xml:"cmdp:organization"` 47 | Code string `xml:"cmdp:code"` // grant or project id 48 | ProjectName string `xml:"cmdp:projectName"` 49 | FundsType string `xml:"cmdp:fundsType"` 50 | } 51 | 52 | type ContactPersonComponent struct { 53 | LastName string `xml:"cmdp:lastName"` 54 | FirstName string `xml:"cmdp:firstName"` 55 | Email string `xml:"cmdp:email"` 56 | Affiliation string `xml:"cmdp:affiliation"` 57 | } 58 | -------------------------------------------------------------------------------- /scripts/triggers.sql: -------------------------------------------------------------------------------- 1 | -- 2 | -- This is a general version of triggers for synchronizing 3 | -- descriptions between the `corpora` and `vlo_metadata_common` tables. 4 | -- It should work with any non-cnc instance of KonText in case 5 | -- the installation is based on mysql_* plugins. 6 | -- 7 | 8 | 9 | DELIMITER // 10 | 11 | DROP TRIGGER IF EXISTS sync_descriptions_from_corpora_trig // 12 | DROP TRIGGER IF EXISTS sync_descriptions_from_metadata_trig // 13 | DROP TRIGGER IF EXISTS insert_metadata_on_corpora_insert_trig // 14 | 15 | CREATE TRIGGER sync_descriptions_from_corpora_trig 16 | AFTER UPDATE ON kontext_corpus 17 | FOR EACH ROW 18 | BEGIN 19 | SET @contact_user_id = 1; 20 | IF NOT (NEW.description_cs <=> OLD.description_cs) OR NOT (NEW.description_en <=> OLD.description_en) THEN 21 | SELECT name INTO @corpus_name FROM kontext_corpus WHERE id = NEW.id; 22 | SELECT id INTO @corpus_metadata_id FROM vlo_metadata_corpus WHERE corpus_name = @corpus_name; 23 | IF @corpus_metadata_id IS NULL THEN 24 | INSERT INTO vlo_metadata_corpus (corpus_name) VALUES (@corpus_name); 25 | INSERT INTO vlo_metadata_common (type, desc_cs, desc_en, corpus_metadata_id, contact_user_id, deleted, license_info, authors, date_issued) 26 | VALUES ('corpus', NEW.description_cs, NEW.description_en, LAST_INSERT_ID(), @contact_user_id, 1, '', '', ''); 27 | ELSEIF @skip_vlo_update IS NULL THEN 28 | SET @skip_corpora_update = 1; 29 | UPDATE vlo_metadata_common SET desc_cs = NEW.description_cs, desc_en = NEW.description_en WHERE corpus_metadata_id = @corpus_metadata_id; 30 | SET @skip_corpora_update = NULL; 31 | END IF; 32 | END IF; 33 | END; 34 | // 35 | 36 | CREATE TRIGGER sync_descriptions_from_metadata_trig 37 | AFTER UPDATE ON vlo_metadata_common 38 | FOR EACH ROW 39 | BEGIN 40 | IF NEW.type = 'corpus' AND (NOT (NEW.desc_cs <=> OLD.desc_cs) OR NOT (NEW.desc_en <=> OLD.desc_en)) THEN 41 | IF @skip_corpora_update IS NULL THEN 42 | SET @skip_vlo_update = 1; 43 | UPDATE kontext_corpus SET description_cs = NEW.desc_cs, description_en = NEW.desc_en WHERE name = (SELECT corpus_name FROM vlo_metadata_corpus WHERE id = NEW.corpus_metadata_id); 44 | SET @skip_vlo_update = NULL; 45 | END IF; 46 | END IF; 47 | END; 48 | // 49 | 50 | CREATE TRIGGER insert_metadata_on_corpora_insert_trig 51 | AFTER INSERT ON kontext_corpus 52 | FOR EACH ROW 53 | BEGIN 54 | SET @contact_user_id = 1; 55 | INSERT INTO vlo_metadata_corpus (corpus_name) VALUES (NEW.name); 56 | INSERT INTO vlo_metadata_common (type, desc_cs, desc_en, corpus_metadata_id, contact_user_id, deleted, license_info, authors, date_issued) 57 | VALUES ('corpus', NEW.description_cs, NEW.description_en, LAST_INSERT_ID(), @contact_user_id, 1, '', '', ''); 58 | END; 59 | // -------------------------------------------------------------------------------- /oaipmh/formats/dc.go: -------------------------------------------------------------------------------- 1 | // Copyright 2024 Martin Zimandl 2 | // Copyright 2024 Institute of the Czech National Corpus, 3 | // Faculty of Arts, Charles University 4 | // 5 | // Licensed under the Apache License, Version 2.0 (the "License"); 6 | // you may not use this file except in compliance with the License. 7 | // You may obtain a copy of the License at 8 | // 9 | // http://www.apache.org/licenses/LICENSE-2.0 10 | // 11 | // Unless required by applicable law or agreed to in writing, software 12 | // distributed under the License is distributed on an "AS IS" BASIS, 13 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | // See the License for the specific language governing permissions and 15 | // limitations under the License. 16 | 17 | package formats 18 | 19 | import ( 20 | "encoding/xml" 21 | "strings" 22 | 23 | "github.com/czcorpus/cnc-vlo/oaipmh" 24 | ) 25 | 26 | const DublinCoreMetadataPrefix = "oai_dc" 27 | 28 | // note - omitempties are optional 29 | 30 | type DublinCore struct { 31 | XMLName xml.Name `xml:"oai_dc:dc"` 32 | XMLNSOAIDC string `xml:"xmlns:oai_dc,attr"` 33 | XMLNSDC string `xml:"xmlns:dc,attr"` 34 | XMLNSXSI string `xml:"xmlns:xsi,attr"` 35 | XSISchemaLocation string `xml:"xsi:schemaLocation,attr"` 36 | 37 | Title MultilangArray `xml:"dc:title"` 38 | Creator MultilangArray `xml:"dc:creator"` 39 | Subject MultilangArray `xml:"dc:subject"` 40 | Description MultilangArray `xml:"dc:description"` 41 | Publisher MultilangArray `xml:"dc:publisher"` 42 | Contributor MultilangArray `xml:"dc:contributor"` 43 | Date MultilangArray `xml:"dc:date"` // ISO 8601 44 | Type MultilangArray `xml:"dc:type"` 45 | Format MultilangArray `xml:"dc:format"` 46 | Identifier MultilangArray `xml:"dc:identifier"` 47 | Source MultilangArray `xml:"dc:source"` 48 | Language MultilangArray `xml:"dc:language"` // ISO 639 + optionally ISO 3166 49 | Relation MultilangArray `xml:"dc:relation"` 50 | Coverage MultilangArray `xml:"dc:coverage"` 51 | Rights MultilangArray `xml:"dc:rights"` 52 | } 53 | 54 | func NewDublinCore() DublinCore { 55 | return DublinCore{ 56 | XMLNSOAIDC: "http://www.openarchives.org/OAI/2.0/oai_dc/", 57 | XMLNSDC: "http://purl.org/dc/elements/1.1/", 58 | XMLNSXSI: "http://www.w3.org/2001/XMLSchema-instance", 59 | XSISchemaLocation: strings.Join([]string{ 60 | "http://www.openarchives.org/OAI/2.0/oai_dc/", 61 | "http://www.openarchives.org/OAI/2.0/oai_dc.xsd", 62 | }, " "), 63 | } 64 | } 65 | 66 | func GetDublinCoreFormat() oaipmh.OAIPMHMetadataFormat { 67 | return oaipmh.OAIPMHMetadataFormat{ 68 | MetadataPrefix: DublinCoreMetadataPrefix, 69 | Schema: "http://www.openarchives.org/OAI/2.0/oai_dc.xsd", 70 | MetadataNamespace: "http://www.openarchives.org/OAI/2.0/oai_dc/", 71 | } 72 | } 73 | -------------------------------------------------------------------------------- /scripts/triggers_cnc.sql: -------------------------------------------------------------------------------- 1 | -- 2 | -- This is a CNC-specific version of triggers for synchronizing 3 | -- descriptions between the `corpora` and `vlo_metadata_common` tables. 4 | -- 5 | 6 | DELIMITER // 7 | 8 | DROP TRIGGER IF EXISTS sync_descriptions_from_corpora_trig // 9 | DROP TRIGGER IF EXISTS sync_descriptions_from_metadata_trig // 10 | DROP TRIGGER IF EXISTS insert_metadata_on_corpora_insert_trig // 11 | 12 | CREATE TRIGGER sync_descriptions_from_corpora_trig 13 | AFTER UPDATE ON corpora 14 | FOR EACH ROW 15 | BEGIN 16 | SELECT id INTO @contact_user_id FROM user WHERE corplist = 17 ORDER BY id LIMIT 1; 17 | IF @contact_user_id IS NULL THEN 18 | SET @contact_user_id = 1; 19 | END IF; 20 | IF NOT (NEW.description_cs <=> OLD.description_cs) OR NOT (NEW.description_en <=> OLD.description_en) THEN 21 | SELECT name INTO @corpus_name FROM corpora WHERE id = NEW.id; 22 | SELECT id INTO @corpus_metadata_id FROM vlo_metadata_corpus WHERE corpus_name = @corpus_name; 23 | IF @corpus_metadata_id IS NULL THEN 24 | INSERT INTO vlo_metadata_corpus (corpus_name) VALUES (@corpus_name); 25 | INSERT INTO vlo_metadata_common (type, desc_cs, desc_en, corpus_metadata_id, contact_user_id, deleted, license_info, authors, date_issued) 26 | VALUES ('corpus', NEW.description_cs, NEW.description_en, LAST_INSERT_ID(), @contact_user_id, 1, 'RES', '', ''); 27 | ELSEIF @skip_vlo_update IS NULL THEN 28 | SET @skip_corpora_update = 1; 29 | UPDATE vlo_metadata_common SET desc_cs = NEW.description_cs, desc_en = NEW.description_en WHERE corpus_metadata_id = @corpus_metadata_id; 30 | SET @skip_corpora_update = NULL; 31 | END IF; 32 | END IF; 33 | END; 34 | // 35 | 36 | CREATE TRIGGER sync_descriptions_from_metadata_trig 37 | AFTER UPDATE ON vlo_metadata_common 38 | FOR EACH ROW 39 | BEGIN 40 | IF NEW.type = 'corpus' AND (NOT (NEW.desc_cs <=> OLD.desc_cs) OR NOT (NEW.desc_en <=> OLD.desc_en)) THEN 41 | IF @skip_corpora_update IS NULL THEN 42 | SET @skip_vlo_update = 1; 43 | UPDATE corpora SET description_cs = NEW.desc_cs, description_en = NEW.desc_en WHERE name = (SELECT corpus_name FROM vlo_metadata_corpus WHERE id = NEW.corpus_metadata_id); 44 | SET @skip_vlo_update = NULL; 45 | END IF; 46 | END IF; 47 | END; 48 | // 49 | 50 | CREATE TRIGGER insert_metadata_on_corpora_insert_trig 51 | AFTER INSERT ON corpora 52 | FOR EACH ROW 53 | BEGIN 54 | SELECT id INTO @contact_user_id FROM user WHERE corplist = 17 ORDER BY id LIMIT 1; 55 | IF @contact_user_id IS NULL THEN 56 | SET @contact_user_id = 1; 57 | END IF; 58 | INSERT INTO vlo_metadata_corpus (corpus_name) VALUES (NEW.name); 59 | INSERT INTO vlo_metadata_common (type, desc_cs, desc_en, corpus_metadata_id, contact_user_id, deleted, license_info, authors, date_issued) 60 | VALUES ('corpus', NEW.description_cs, NEW.description_en, LAST_INSERT_ID(), @contact_user_id, 1, 'RES', '', ''); 61 | END; 62 | // -------------------------------------------------------------------------------- /cnchook/profiles/components/dataInfo.go: -------------------------------------------------------------------------------- 1 | // Copyright 2024 Martin Zimandl 2 | // Copyright 2024 Institute of the Czech National Corpus, 3 | // Faculty of Arts, Charles University 4 | // 5 | // Licensed under the Apache License, Version 2.0 (the "License"); 6 | // you may not use this file except in compliance with the License. 7 | // You may obtain a copy of the License at 8 | // 9 | // http://www.apache.org/licenses/LICENSE-2.0 10 | // 11 | // Unless required by applicable law or agreed to in writing, software 12 | // distributed under the License is distributed on an "AS IS" BASIS, 13 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | // See the License for the specific language governing permissions and 15 | // limitations under the License. 16 | 17 | package components 18 | 19 | import "github.com/czcorpus/cnc-vlo/oaipmh/formats" 20 | 21 | // note - omitempties are optional 22 | 23 | type DataInfoComponent struct { 24 | Type string `xml:"cmdp:type"` // e.g. corpus, tool 25 | DetailedType string `xml:"cmdp:detailedType,omitempty"` // Further specification of the type 26 | Description formats.MultilangArray `xml:"cmdp:description"` 27 | Languages *[]LanguageComponent `xml:"cmdp:languages>cmdp:language,omitempty"` 28 | Keywords *[]string `xml:"cmdp:keywords>cmdp:keyword,omitempty"` 29 | Links *[]formats.TypedElement `xml:"cmdp:links>cmdp:link,omitempty"` // demo url, documentation url 30 | SizeInfo *[]SizeComponent `xml:"cmdp:sizeInfo>cmdp:size,omitempty"` 31 | Formats *[]FormatComponent `xml:"cmdp:formats>cmdp:format,omitempty"` 32 | Requirements *[]string `xml:"cmdp:requirements>cmdp:requirement,omitempty"` // e.g. OS, prerequisities 33 | CollectionInfo *CollectionInfoComponent `xml:"cmdp:collectionInfo,omitempty"` 34 | AnnotationInfo *[]string `xml:"cmdp:annotationInfo>cmdp:annotationType,omitempty"` // tags, lemmas, phrase alignment, coreference, ... 35 | } 36 | 37 | type LanguageComponent struct { 38 | Name string `xml:"cmdp:name"` 39 | Code string `xml:"cmdp:code"` 40 | } 41 | 42 | type SizeComponent struct { 43 | Size string `xml:"cmdp:size"` 44 | Unit string `xml:"cmdp:unit"` 45 | } 46 | 47 | type FormatComponent struct { 48 | Type string `xml:"cmdp:type,attr,omitempty"` 49 | Name string `xml:"cmdp:name,omitempty"` 50 | Medium string `xml:"cmdp:medium,omitempty"` // text, audio, ... 51 | Documentation string `xml:"cmdp:documentation,omitempty"` 52 | Description string `xml:"cmdp:description,omitempty"` // e.g. vertical format, where each line is "form/lemma/tag" 53 | } 54 | 55 | type CollectionInfoComponent struct { 56 | TimePeriods []string `xml:"cmdp:timePeriod,omitempty"` // When the data were gathered, which era do they come from 57 | Places []string `xml:"cmdp:place,omitempty"` // The origin of the data. e.g. The data were gathered in Bohemia 58 | Forms []string `xml:"cmdp:forms>cmdp:form,omitempty"` // spoken, written,... 59 | Genres []string `xml:"cmdp:genres>cmdp:genre,omitempty"` // fiction, news, blog 60 | } 61 | -------------------------------------------------------------------------------- /oaipmh/structs.go: -------------------------------------------------------------------------------- 1 | // Copyright 2024 Martin Zimandl 2 | // Copyright 2024 Institute of the Czech National Corpus, 3 | // Faculty of Arts, Charles University 4 | // 5 | // Licensed under the Apache License, Version 2.0 (the "License"); 6 | // you may not use this file except in compliance with the License. 7 | // You may obtain a copy of the License at 8 | // 9 | // http://www.apache.org/licenses/LICENSE-2.0 10 | // 11 | // Unless required by applicable law or agreed to in writing, software 12 | // distributed under the License is distributed on an "AS IS" BASIS, 13 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | // See the License for the specific language governing permissions and 15 | // limitations under the License. 16 | 17 | package oaipmh 18 | 19 | import "time" 20 | 21 | // wrapper to be able to embed custom element with name defined by XMLName 22 | type ElementWrapper struct { 23 | Value any 24 | } 25 | 26 | // note - omitempties are optional 27 | 28 | type OAIPMHRecordHeader struct { 29 | Status string `xml:"status,attr,omitempty"` // only `deleted` status 30 | Identifier string `xml:"identifier"` // URL 31 | Datestamp time.Time `xml:"datestamp"` // creation, modification or deletion of the record for the purpose of selective harvesting 32 | SetSpec []string `xml:"setSpec,omitempty"` 33 | } 34 | 35 | // ----------------------- Identify --------------------------- 36 | 37 | type OAIPMHIdentify struct { 38 | RepositoryName string `xml:"repositoryName"` 39 | BaseURL string `xml:"baseURL"` // filled automatically by handler 40 | ProtocolVersion string `xml:"protocolVersion"` // filled automatically by handler 41 | AdminEmail []string `xml:"adminEmail"` 42 | EarliestDatestamp time.Time `xml:"earliestDatestamp"` 43 | DeletedRecord string `xml:"deletedRecord"` // are we tracking deleted records no/transient/persistent? 44 | Granularity string `xml:"granularity"` // all repositories must support YYYY-MM-DD, extra YYYY-MM-DDThh:mm:ssZ 45 | Compression string `xml:"compression,omitempty"` 46 | Description []ElementWrapper `xml:"description,omitempty"` 47 | } 48 | 49 | // --------------------- ListMetadataFormats ------------------ 50 | 51 | type OAIPMHMetadataFormat struct { 52 | MetadataPrefix string `xml:"metadataPrefix"` 53 | Schema string `xml:"schema"` 54 | MetadataNamespace string `xml:"metadataNamespace"` 55 | } 56 | 57 | // ----------------------- GetRecord/ListRecords -------------- 58 | 59 | type OAIPMHRecord struct { 60 | Header *OAIPMHRecordHeader `xml:"header"` 61 | Metadata *ElementWrapper `xml:"metadata,omitempty"` 62 | } 63 | 64 | func NewOAIPMHRecord(metadata any) OAIPMHRecord { 65 | return OAIPMHRecord{ 66 | Header: &OAIPMHRecordHeader{}, 67 | Metadata: &ElementWrapper{Value: metadata}, 68 | } 69 | } 70 | 71 | // ----------------------- ListSets --------------------- 72 | 73 | type OAIPMHSet struct { 74 | SetSpec string `xml:"setSpec"` 75 | SetName string `xml:"setName"` 76 | SetDescription *ElementWrapper `xml:"setDescription,omitempty"` 77 | } 78 | -------------------------------------------------------------------------------- /oaipmh/reqresp.go: -------------------------------------------------------------------------------- 1 | // Copyright 2024 Martin Zimandl 2 | // Copyright 2024 Institute of the Czech National Corpus, 3 | // Faculty of Arts, Charles University 4 | // 5 | // Licensed under the Apache License, Version 2.0 (the "License"); 6 | // you may not use this file except in compliance with the License. 7 | // You may obtain a copy of the License at 8 | // 9 | // http://www.apache.org/licenses/LICENSE-2.0 10 | // 11 | // Unless required by applicable law or agreed to in writing, software 12 | // distributed under the License is distributed on an "AS IS" BASIS, 13 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | // See the License for the specific language governing permissions and 15 | // limitations under the License. 16 | 17 | package oaipmh 18 | 19 | import ( 20 | "encoding/xml" 21 | "time" 22 | 23 | "github.com/rs/zerolog/log" 24 | ) 25 | 26 | // note - omitempties are optional 27 | 28 | type OAIPMHRequest struct { 29 | URL string `xml:",chardata"` 30 | 31 | Verb Verb `xml:"verb,attr,omitempty"` 32 | Identifier string `xml:"identifier,attr,omitempty"` 33 | MetadataPrefix string `xml:"metadataPrefix,attr,omitempty"` 34 | From *time.Time `xml:"from,attr,omitempty"` 35 | Until *time.Time `xml:"until,attr,omitempty"` 36 | Set string `xml:"set,attr,omitempty"` 37 | ResumptionToken string `xml:"resumptionToken,attr,omitempty"` 38 | } 39 | 40 | type OAIPMHResponse struct { 41 | XMLName xml.Name `xml:"OAI-PMH"` 42 | XMLNS string `xml:"xmlns,attr"` 43 | XMLNSXSI string `xml:"xmlns:xsi,attr"` 44 | XSISchemaLocation string `xml:"xsi:schemaLocation,attr"` 45 | 46 | ResponseDate time.Time `xml:"responseDate"` 47 | Request *OAIPMHRequest `xml:"request"` 48 | Errors OAIPMHErrors `xml:"error,omitempty"` 49 | 50 | Identify *OAIPMHIdentify `xml:"Identify,omitempty"` 51 | GetRecord *OAIPMHRecord `xml:"GetRecord>record,omitempty"` 52 | ListMetadataFormats *[]OAIPMHMetadataFormat `xml:"ListMetadataFormats>metadataFormat,omitempty"` 53 | ListIdentifiers *[]OAIPMHRecordHeader `xml:"ListIdentifiers>header,omitempty"` 54 | ListRecords *[]OAIPMHRecord `xml:"ListRecords>record,omitempty"` 55 | ListSets *[]OAIPMHSet `xml:"ListSets>set,omitempty"` 56 | 57 | ProtocolVersion string `xml:"-"` 58 | } 59 | 60 | type OAIPMHErrors []OAIPMHError 61 | 62 | func (r *OAIPMHErrors) Add(code OAIPMHErrorCode, message string) { 63 | *r = append(*r, OAIPMHError{Code: code, Message: message}) 64 | } 65 | 66 | func (r *OAIPMHErrors) HasErrors() bool { 67 | if len(*r) > 0 { 68 | log.Debug().Any("errors", r).Send() 69 | return true 70 | } 71 | return false 72 | } 73 | 74 | func NewOAIPMHResponse(request *OAIPMHRequest) *OAIPMHResponse { 75 | return &OAIPMHResponse{ 76 | XMLNS: "http://www.openarchives.org/OAI/2.0/", 77 | XMLNSXSI: "http://www.w3.org/2001/XMLSchema-instance", 78 | XSISchemaLocation: "http://www.openarchives.org/OAI/2.0/ http://www.openarchives.org/OAI/2.0/OAI-PMH.xsd", 79 | ResponseDate: time.Now().Round(time.Second).In(time.UTC), 80 | Request: request, 81 | ProtocolVersion: "2.0", 82 | } 83 | } 84 | -------------------------------------------------------------------------------- /oaipmh/args.go: -------------------------------------------------------------------------------- 1 | // Copyright 2024 Martin Zimandl 2 | // Copyright 2024 Institute of the Czech National Corpus, 3 | // Faculty of Arts, Charles University 4 | // 5 | // Licensed under the Apache License, Version 2.0 (the "License"); 6 | // you may not use this file except in compliance with the License. 7 | // You may obtain a copy of the License at 8 | // 9 | // http://www.apache.org/licenses/LICENSE-2.0 10 | // 11 | // Unless required by applicable law or agreed to in writing, software 12 | // distributed under the License is distributed on an "AS IS" BASIS, 13 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | // See the License for the specific language governing permissions and 15 | // limitations under the License. 16 | 17 | package oaipmh 18 | 19 | import ( 20 | "fmt" 21 | "net/url" 22 | ) 23 | 24 | const ( 25 | ArgVerb string = "verb" // always required 26 | ArgIdentifier string = "identifier" // req GetRecord, op ListMetadataFormats 27 | ArgMetadataPrefix string = "metadataPrefix" // req GetRecord, req ListIdentifiers, req ListRecords 28 | ArgFrom string = "from" // op ListIdentifiers, op ListRecords 29 | ArgUntil string = "until" // op ListIdentifiers, op ListRecords 30 | ArgSet string = "set" // op ListIdentifiers, op ListRecords 31 | ArgResumptionToken string = "resumptionToken" // ListIdentifiers, ListRecords, ListSets 32 | 33 | VerbIdentify Verb = "Identify" 34 | VerbGetRecord Verb = "GetRecord" 35 | VerbListIdentifiers Verb = "ListIdentifiers" 36 | VerbListMetadataFormats Verb = "ListMetadataFormats" 37 | VerbListRecords Verb = "ListRecords" 38 | VerbListSets Verb = "ListSets" 39 | ) 40 | 41 | // ---- 42 | 43 | type Verb string 44 | 45 | func (v Verb) Validate() error { 46 | if v == VerbGetRecord || v == VerbIdentify || 47 | v == VerbListIdentifiers || v == VerbListMetadataFormats || 48 | v == VerbListRecords || v == VerbListSets { 49 | return nil 50 | } 51 | return fmt.Errorf("unknown verb: %s", v) 52 | } 53 | 54 | func (v Verb) ValidateArg(arg string) bool { 55 | switch v { 56 | case VerbGetRecord: 57 | return arg == ArgVerb || arg == ArgIdentifier || arg == ArgMetadataPrefix 58 | case VerbListIdentifiers: 59 | return arg == ArgVerb || arg == ArgMetadataPrefix || arg == ArgFrom || arg == ArgUntil || arg == ArgSet || arg == ArgResumptionToken 60 | case VerbListMetadataFormats: 61 | return arg == ArgVerb || arg == ArgIdentifier 62 | case VerbListRecords: 63 | return arg == ArgVerb || arg == ArgMetadataPrefix || arg == ArgFrom || arg == ArgUntil || arg == ArgSet || arg == ArgResumptionToken 64 | case VerbListSets: 65 | return arg == ArgVerb || arg == ArgResumptionToken 66 | default: // VerbIdentify 67 | return arg == ArgVerb 68 | } 69 | } 70 | 71 | func (v Verb) ValidateRequiredArgs(args url.Values) string { 72 | reqArgs := []string{ArgVerb} 73 | switch v { 74 | case VerbGetRecord: 75 | reqArgs = append(reqArgs, ArgIdentifier, ArgMetadataPrefix) 76 | case VerbListIdentifiers: 77 | reqArgs = append(reqArgs, ArgMetadataPrefix) 78 | case VerbListRecords: 79 | reqArgs = append(reqArgs, ArgMetadataPrefix) 80 | } 81 | for _, arg := range reqArgs { 82 | if !args.Has(arg) { 83 | return arg 84 | } 85 | } 86 | return "" 87 | } 88 | -------------------------------------------------------------------------------- /cnf/conf.go: -------------------------------------------------------------------------------- 1 | // Copyright 2024 Martin Zimandl 2 | // Copyright 2024 Institute of the Czech National Corpus, 3 | // Faculty of Arts, Charles University 4 | // 5 | // Licensed under the Apache License, Version 2.0 (the "License"); 6 | // you may not use this file except in compliance with the License. 7 | // You may obtain a copy of the License at 8 | // 9 | // http://www.apache.org/licenses/LICENSE-2.0 10 | // 11 | // Unless required by applicable law or agreed to in writing, software 12 | // distributed under the License is distributed on an "AS IS" BASIS, 13 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | // See the License for the specific language governing permissions and 15 | // limitations under the License. 16 | 17 | package cnf 18 | 19 | import ( 20 | "encoding/json" 21 | "os" 22 | "path/filepath" 23 | "time" 24 | 25 | "github.com/czcorpus/cnc-gokit/logging" 26 | "github.com/czcorpus/cnc-vlo/cncdb" 27 | "github.com/rs/zerolog/log" 28 | ) 29 | 30 | const ( 31 | dfltServerWriteTimeoutSecs = 30 32 | dfltLanguage = "en" 33 | dfltTimeZone = "Europe/Prague" 34 | ) 35 | 36 | // Conf is a global configuration of the app 37 | type Conf struct { 38 | ListenAddress string `json:"listenAddress"` 39 | ListenPort int `json:"listenPort"` 40 | ServerReadTimeoutSecs int `json:"serverReadTimeoutSecs"` 41 | ServerWriteTimeoutSecs int `json:"serverWriteTimeoutSecs"` 42 | Logging logging.LoggingConf `json:"logging"` 43 | TimeZone string `json:"timeZone"` 44 | CNCDB cncdb.DatabaseSetup `json:"cncDb"` 45 | RepositoryInfo RepositoryInfo `json:"repositoryInfo"` 46 | 47 | // values common to all metadata records 48 | MetadataValues MetadataValues `json:"metadataValues"` 49 | 50 | srcPath string 51 | } 52 | 53 | type RepositoryInfo struct { 54 | Name string `json:"name"` 55 | BaseURL string `json:"baseUrl"` 56 | AdminEmail []string `json:"adminEmail"` 57 | } 58 | 59 | type MetadataValues struct { 60 | Publisher string `json:"publisher"` 61 | } 62 | 63 | func (conf *Conf) TimezoneLocation() *time.Location { 64 | // we can ignore the error here as we always call c.Validate() 65 | // first (which also tries to load the location and report possible 66 | // error) 67 | loc, _ := time.LoadLocation(conf.TimeZone) 68 | return loc 69 | } 70 | 71 | // GetSourcePath returns an absolute path of a file 72 | // the config was loaded from. 73 | func (conf *Conf) GetSourcePath() string { 74 | if filepath.IsAbs(conf.srcPath) { 75 | return conf.srcPath 76 | } 77 | var cwd string 78 | cwd, err := os.Getwd() 79 | if err != nil { 80 | cwd = "[failed to get working dir]" 81 | } 82 | return filepath.Join(cwd, conf.srcPath) 83 | } 84 | 85 | func LoadConfig(path string) *Conf { 86 | if path == "" { 87 | log.Fatal().Msg("Cannot load config - path not specified") 88 | } 89 | rawData, err := os.ReadFile(path) 90 | if err != nil { 91 | log.Fatal().Err(err).Msg("Cannot load config") 92 | } 93 | var conf Conf 94 | conf.srcPath = path 95 | err = json.Unmarshal(rawData, &conf) 96 | if err != nil { 97 | log.Fatal().Err(err).Msg("Cannot load config") 98 | } 99 | return &conf 100 | } 101 | 102 | func ValidateAndDefaults(conf *Conf) { 103 | if conf.ServerWriteTimeoutSecs == 0 { 104 | conf.ServerWriteTimeoutSecs = dfltServerWriteTimeoutSecs 105 | log.Warn().Msgf( 106 | "serverWriteTimeoutSecs not specified, using default: %d", 107 | dfltServerWriteTimeoutSecs, 108 | ) 109 | } 110 | 111 | if conf.TimeZone == "" { 112 | log.Warn(). 113 | Str("timeZone", dfltTimeZone). 114 | Msg("time zone not specified, using default") 115 | } 116 | if _, err := time.LoadLocation(conf.TimeZone); err != nil { 117 | log.Fatal().Err(err).Msg("invalid time zone") 118 | } 119 | } 120 | -------------------------------------------------------------------------------- /oaipmh/formats/cmdi.go: -------------------------------------------------------------------------------- 1 | // Copyright 2024 Martin Zimandl 2 | // Copyright 2024 Institute of the Czech National Corpus, 3 | // Faculty of Arts, Charles University 4 | // 5 | // Licensed under the Apache License, Version 2.0 (the "License"); 6 | // you may not use this file except in compliance with the License. 7 | // You may obtain a copy of the License at 8 | // 9 | // http://www.apache.org/licenses/LICENSE-2.0 10 | // 11 | // Unless required by applicable law or agreed to in writing, software 12 | // distributed under the License is distributed on an "AS IS" BASIS, 13 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | // See the License for the specific language governing permissions and 15 | // limitations under the License. 16 | 17 | package formats 18 | 19 | import ( 20 | "encoding/xml" 21 | "strings" 22 | "time" 23 | 24 | "github.com/czcorpus/cnc-vlo/oaipmh" 25 | ) 26 | 27 | const ( 28 | CMDIMetadataPrefix = "cmdi" 29 | CMDINamespace = "http://www.clarin.eu/cmd/1" 30 | CMDIEnvelopeSchema = "http://www.clarin.eu/cmd/1/xsd/cmd-envelop.xsd" 31 | ) 32 | 33 | // note - omitempties are optional 34 | 35 | type CMDIFormat struct { 36 | XMLName xml.Name `xml:"cmd:CMD"` 37 | XMLNSXSI string `xml:"xmlns:xsi,attr"` 38 | XMLNSCMD string `xml:"xmlns:cmd,attr"` 39 | XMLNSCMDP string `xml:"xmlns:cmdp,attr"` 40 | XSISchemaLocation string `xml:"xsi:schemaLocation,attr"` 41 | Version string `xml:"CMDVersion,attr"` 42 | 43 | Header CMDIHeader `xml:"cmd:Header"` 44 | Resources CMDIResources `xml:"cmd:Resources"` 45 | IsPartOf *[]string `xml:"cmd:IsPartOfList>IsPartOf,omitempty"` 46 | Components any `xml:"cmd:Components"` 47 | } 48 | 49 | // --------------------- Header --------------------- 50 | type CMDIHeader struct { 51 | MdCreator []string `xml:"cmd:MdCreator,omitempty"` 52 | MdCreationDate *time.Time `xml:"cmd:MdCreationDate,omitempty"` 53 | MdSelfLink string `xml:"cmd:MdSelfLink,omitempty"` 54 | MdProfile string `xml:"cmd:MdProfile"` 55 | MdCollectionDisplayName string `xml:"cmd:MdCollectionDisplayName,omitempty"` 56 | } 57 | 58 | // --------------------- Resources ------------------ 59 | 60 | type CMDIResources struct { 61 | // !!!IMPORTANT!!! Clarin requires at least one resource proxy for record to be harvested 62 | ResourceProxyList []CMDIResourceProxy `xml:"cmd:ResourceProxyList>cmd:ResourceProxy,omitempty"` 63 | JournalFileProxyList []string `xml:"cmd:JournalFileProxyList>cmd:JournaFileProxy>cmd:ResourceRef,omitempty"` 64 | ResourceRelationList []CMDIResourceRelation `xml:"cmd:ResourceRelationList>cmd:ResourceRelation,omitempty"` 65 | } 66 | 67 | type CMDIResourceProxy struct { 68 | ID string `xml:"id,attr"` 69 | ResourceType CMDIResourceType `xml:"cmd:ResourceType"` 70 | ResourceRef string `xml:"cmd:ResourceRef"` 71 | } 72 | 73 | type ResourceType string 74 | 75 | const ( 76 | // A resource that is described in the present CMD instance, e.g., a text document, media file or tool. 77 | RTResource ResourceType = "Resource" 78 | 79 | // A metadata resource, i.e., another CMD instance, that is subordinate to the present CMD instance. 80 | // The media type of this metadata resource SHOULD be application/x-cmdi+xml. 81 | RTMetadata ResourceType = "Metadata" 82 | 83 | // A resources that is a web page that provides the original context of the described resource, e.g., a “deep link” into a repository system. 84 | RTLandingPage ResourceType = "LandingPage" 85 | 86 | // A resource that is a web service that allows the described resource to be queried by means of dedicated software. 87 | RTSearchService ResourceType = "SearchService" 88 | 89 | // Resource that is a web page that allows the described resource to be queried by an end-user. 90 | RTSearchPage ResourceType = "SearchPage" 91 | ) 92 | 93 | type CMDIResourceType struct { 94 | MimeType string `xml:"mimetype,attr,omitempty"` 95 | Value ResourceType `xml:",chardata"` 96 | } 97 | 98 | type CMDIResourceRelation struct { 99 | RelationType CMDIRelationType `xml:"cmd:RelationType"` 100 | Resources [2]CMDIResource `xml:"cmd:Resource"` 101 | } 102 | 103 | type CMDIRelationType struct { 104 | ConceptLink string `xml:"cmd:ConceptLink,attr,omitempty"` 105 | Value string `xml:",chardata"` 106 | } 107 | 108 | type CMDIResource struct { 109 | Ref string `xml:"ref,attr"` 110 | Role *CMDIRelationType `xml:"cmd:Role,omitempty"` 111 | } 112 | 113 | // ------------------------------------------------------- 114 | 115 | type CMDIProfile interface { 116 | GetSchemaURL() string 117 | GetSchemaLocation() []string 118 | } 119 | 120 | func NewCMDI(profile CMDIProfile) CMDIFormat { 121 | return CMDIFormat{ 122 | XMLNSXSI: "http://www.w3.org/2001/XMLSchema-instance", 123 | XMLNSCMD: CMDINamespace, 124 | XMLNSCMDP: profile.GetSchemaURL(), 125 | XSISchemaLocation: strings.Join( 126 | append( 127 | []string{CMDINamespace, CMDIEnvelopeSchema}, 128 | profile.GetSchemaLocation()..., 129 | ), 130 | " ", 131 | ), 132 | Version: "1.2", 133 | Header: CMDIHeader{MdProfile: profile.GetSchemaURL()}, 134 | Components: profile, 135 | } 136 | } 137 | 138 | func GetCMDIFormat() oaipmh.OAIPMHMetadataFormat { 139 | return oaipmh.OAIPMHMetadataFormat{ 140 | MetadataPrefix: CMDIMetadataPrefix, 141 | Schema: CMDIEnvelopeSchema, 142 | MetadataNamespace: CMDINamespace, 143 | } 144 | } 145 | -------------------------------------------------------------------------------- /vlo.go: -------------------------------------------------------------------------------- 1 | // Copyright 2024 Martin Zimandl 2 | // Copyright 2024 Institute of the Czech National Corpus, 3 | // Faculty of Arts, Charles University 4 | // 5 | // Licensed under the Apache License, Version 2.0 (the "License"); 6 | // you may not use this file except in compliance with the License. 7 | // You may obtain a copy of the License at 8 | // 9 | // http://www.apache.org/licenses/LICENSE-2.0 10 | // 11 | // Unless required by applicable law or agreed to in writing, software 12 | // distributed under the License is distributed on an "AS IS" BASIS, 13 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | // See the License for the specific language governing permissions and 15 | // limitations under the License. 16 | 17 | package main 18 | 19 | import ( 20 | "context" 21 | "flag" 22 | "fmt" 23 | "net/http" 24 | "os" 25 | "os/signal" 26 | "path/filepath" 27 | "strings" 28 | "syscall" 29 | "time" 30 | 31 | "github.com/czcorpus/cnc-gokit/logging" 32 | "github.com/czcorpus/cnc-gokit/uniresp" 33 | "github.com/gin-gonic/gin" 34 | "github.com/rs/zerolog/log" 35 | 36 | "github.com/czcorpus/cnc-vlo/cncdb" 37 | "github.com/czcorpus/cnc-vlo/cnchook" 38 | "github.com/czcorpus/cnc-vlo/cnf" 39 | "github.com/czcorpus/cnc-vlo/general" 40 | "github.com/czcorpus/cnc-vlo/oaipmh" 41 | ) 42 | 43 | var ( 44 | version string 45 | buildDate string 46 | gitCommit string 47 | ) 48 | 49 | func runApiServer( 50 | conf *cnf.Conf, 51 | syscallChan chan os.Signal, 52 | exitEvent chan os.Signal, 53 | db *cncdb.CNCMySQLHandler, 54 | ) { 55 | if !conf.Logging.Level.IsDebugMode() { 56 | gin.SetMode(gin.ReleaseMode) 57 | } 58 | 59 | engine := gin.New() 60 | engine.Use(gin.Recovery()) 61 | engine.Use(logging.GinMiddleware()) 62 | engine.NoMethod(uniresp.NoMethodHandler) 63 | engine.NoRoute(uniresp.NotFoundHandler) 64 | 65 | hook := cnchook.NewCNCHook(conf, db) 66 | handler := oaipmh.NewVLOHandler(conf.RepositoryInfo.BaseURL, hook) 67 | engine.GET("/oai", handler.HandleOAIGet) 68 | engine.POST("/oai", handler.HandleOAIPost) 69 | engine.GET("/record/:recordId", handler.HandleSelfLink) 70 | 71 | log.Info().Msgf("starting to listen at %s:%d", conf.ListenAddress, conf.ListenPort) 72 | srv := &http.Server{ 73 | Handler: engine, 74 | Addr: fmt.Sprintf("%s:%d", conf.ListenAddress, conf.ListenPort), 75 | WriteTimeout: time.Duration(conf.ServerWriteTimeoutSecs) * time.Second, 76 | ReadTimeout: time.Duration(conf.ServerReadTimeoutSecs) * time.Second, 77 | } 78 | go func() { 79 | err := srv.ListenAndServe() 80 | if err != nil { 81 | log.Error().Err(err).Msg("") 82 | } 83 | syscallChan <- syscall.SIGTERM 84 | }() 85 | 86 | select { 87 | case <-exitEvent: 88 | ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second) 89 | defer cancel() 90 | err := srv.Shutdown(ctx) 91 | if err != nil { 92 | log.Info().Err(err).Msg("Shutdown request error") 93 | } 94 | } 95 | } 96 | 97 | func cleanVersionInfo(v string) string { 98 | return strings.TrimLeft(strings.Trim(v, "'"), "v") 99 | } 100 | 101 | func main() { 102 | version := general.VersionInfo{ 103 | Version: cleanVersionInfo(version), 104 | BuildDate: cleanVersionInfo(buildDate), 105 | GitCommit: cleanVersionInfo(gitCommit), 106 | } 107 | 108 | flag.Usage = func() { 109 | fmt.Fprintf(os.Stderr, "VLO repository\n\n") 110 | fmt.Fprintf(os.Stderr, "Usage:\n\t%s [options] start [config.json]\n\t", filepath.Base(os.Args[0])) 111 | fmt.Fprintf(os.Stderr, "%s [options] version\n", filepath.Base(os.Args[0])) 112 | flag.PrintDefaults() 113 | } 114 | flag.Parse() 115 | action := flag.Arg(0) 116 | if action == "version" { 117 | fmt.Printf("cnc-vlo %s\nbuild date: %s\nlast commit: %s\n", version.Version, version.BuildDate, version.GitCommit) 118 | return 119 | } 120 | conf := cnf.LoadConfig(flag.Arg(1)) 121 | logging.SetupLogging(conf.Logging) 122 | log.Info().Msg("Starting CNC-VLO node") 123 | cnf.ValidateAndDefaults(conf) 124 | syscallChan := make(chan os.Signal, 1) 125 | signal.Notify(syscallChan, os.Interrupt) 126 | signal.Notify(syscallChan, syscall.SIGTERM) 127 | exitEvent := make(chan os.Signal) 128 | go func() { 129 | evt := <-syscallChan 130 | exitEvent <- evt 131 | close(exitEvent) 132 | }() 133 | 134 | switch action { 135 | case "start": 136 | if conf.CNCDB.Overrides.CorporaTableName != "" { 137 | log.Warn().Msgf( 138 | "Overriding default corpora table name to '%s'", conf.CNCDB.Overrides.CorporaTableName) 139 | 140 | } else { 141 | conf.CNCDB.Overrides.CorporaTableName = "kontext_corpus" 142 | } 143 | if conf.CNCDB.Overrides.UserTableName != "" { 144 | log.Warn().Msgf( 145 | "Overriding default user table name to '%s'", conf.CNCDB.Overrides.UserTableName) 146 | 147 | } else { 148 | conf.CNCDB.Overrides.UserTableName = "kontext_user" 149 | } 150 | if conf.CNCDB.Overrides.UserTableFirstNameCol != "" { 151 | log.Warn().Msgf( 152 | "Overriding default user table column for the `first name` to '%s'", 153 | conf.CNCDB.Overrides.UserTableFirstNameCol, 154 | ) 155 | 156 | } else { 157 | conf.CNCDB.Overrides.UserTableFirstNameCol = "firstname" 158 | } 159 | 160 | if conf.CNCDB.Overrides.UserTableLastNameCol != "" { 161 | log.Warn().Msgf( 162 | "Overriding default user table column for the `first name` to '%s'", 163 | conf.CNCDB.Overrides.UserTableLastNameCol, 164 | ) 165 | 166 | } else { 167 | conf.CNCDB.Overrides.UserTableLastNameCol = "lastname" 168 | } 169 | db, err := cncdb.NewCNCMySQLHandler(conf.CNCDB) 170 | if err != nil { 171 | log.Fatal().Err(err).Msg("Failed to create DB connection") 172 | } 173 | runApiServer(conf, syscallChan, exitEvent, db) 174 | default: 175 | log.Fatal().Msgf("Unknown action %s", action) 176 | } 177 | 178 | } 179 | -------------------------------------------------------------------------------- /cnchook/conversion.go: -------------------------------------------------------------------------------- 1 | // Copyright 2024 Martin Zimandl 2 | // Copyright 2024 Institute of the Czech National Corpus, 3 | // Faculty of Arts, Charles University 4 | // 5 | // Licensed under the Apache License, Version 2.0 (the "License"); 6 | // you may not use this file except in compliance with the License. 7 | // You may obtain a copy of the License at 8 | // 9 | // http://www.apache.org/licenses/LICENSE-2.0 10 | // 11 | // Unless required by applicable law or agreed to in writing, software 12 | // distributed under the License is distributed on an "AS IS" BASIS, 13 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | // See the License for the specific language governing permissions and 15 | // limitations under the License. 16 | 17 | package cnchook 18 | 19 | import ( 20 | "fmt" 21 | "strings" 22 | "time" 23 | 24 | "github.com/czcorpus/cnc-vlo/cncdb" 25 | "github.com/czcorpus/cnc-vlo/cnchook/profiles" 26 | "github.com/czcorpus/cnc-vlo/cnchook/profiles/components" 27 | "github.com/czcorpus/cnc-vlo/oaipmh" 28 | "github.com/czcorpus/cnc-vlo/oaipmh/formats" 29 | "golang.org/x/text/language/display" 30 | ) 31 | 32 | func (c *CNCHook) dcRecordFromData(data *cncdb.DBData) oaipmh.OAIPMHRecord { 33 | recordID := fmt.Sprint(data.ID) 34 | metadata := formats.NewDublinCore() 35 | metadata.Title.Add(data.TitleEN, "en") 36 | metadata.Title.Add(data.TitleCS, "cs") 37 | if data.DescCS.Valid { 38 | metadata.Description.Add(data.DescCS.String, "cs") 39 | } 40 | if data.DescEN.Valid { 41 | metadata.Description.Add(data.DescEN.String, "en") 42 | } 43 | metadata.Date.Add(data.Date.In(time.UTC).Format(time.RFC3339), "") 44 | for _, author := range getAuthorList(data) { 45 | if author.FirstName == "" { 46 | metadata.Creator.Add(author.LastName, "") 47 | } else { 48 | metadata.Creator.Add(author.FirstName+" "+author.LastName, "") 49 | } 50 | } 51 | metadata.Identifier.Add(data.Name, "") 52 | metadata.Type.Add(data.Type, "") 53 | metadata.Rights.Add(data.License, "") 54 | 55 | switch MetadataType(data.Type) { 56 | case CorpusMetadataType: 57 | if data.CorpusData.Locale != nil { 58 | base, _ := data.CorpusData.Locale.Base() 59 | metadata.Language.Add(base.String(), "") 60 | } 61 | case ServiceMetadataType: 62 | default: 63 | } 64 | 65 | record := oaipmh.NewOAIPMHRecord(metadata) 66 | record.Header.Datestamp = data.Date.In(time.UTC) 67 | record.Header.Identifier = recordID 68 | return record 69 | } 70 | 71 | func (c *CNCHook) cmdiLindatClarinRecordFromData(data *cncdb.DBData) oaipmh.OAIPMHRecord { 72 | recordID := fmt.Sprint(data.ID) 73 | profile := &profiles.CNCResourceProfile{ 74 | BibliographicInfo: components.BibliographicInfoComponent{ 75 | Titles: formats.MultilangArray{ 76 | {Lang: "en", Value: data.TitleEN}, 77 | {Lang: "cs", Value: data.TitleCS}, 78 | }, 79 | Identifiers: []formats.TypedElement{ 80 | {Value: data.Name}, 81 | }, 82 | Authors: getAuthorList(data), 83 | ContactPerson: components.ContactPersonComponent{ 84 | LastName: data.ContactPerson.Lastname, 85 | FirstName: data.ContactPerson.Firstname, 86 | Email: data.ContactPerson.Email, 87 | Affiliation: data.ContactPerson.Affiliation.String, 88 | }, 89 | Publishers: []string{ 90 | c.conf.MetadataValues.Publisher, 91 | }, 92 | }, 93 | DataInfo: components.DataInfoComponent{ 94 | Type: data.Type, 95 | Description: formats.MultilangArray{ 96 | {Lang: "en", Value: data.DescEN.String}, 97 | {Lang: "cs", Value: data.DescCS.String}, 98 | }, 99 | }, 100 | LicenseInfo: []profiles.LicenseElement{ 101 | {URI: data.License}, 102 | }, 103 | } 104 | if data.DateIssued == "" { 105 | profile.BibliographicInfo.Dates = &components.DatesComponent{DateIssued: data.DateIssued} 106 | } 107 | metadata := formats.NewCMDI(profile) 108 | metadata.Header.MdSelfLink = fmt.Sprintf("%s/record/%s?format=cmdi", c.conf.RepositoryInfo.BaseURL, recordID) 109 | 110 | switch MetadataType(data.Type) { 111 | case CorpusMetadataType: 112 | profile.DataInfo.SizeInfo = &[]components.SizeComponent{ 113 | {Size: fmt.Sprint(data.CorpusData.Size.Int64), Unit: "words"}, 114 | } 115 | if data.CorpusData.Locale != nil { 116 | base, _ := data.CorpusData.Locale.Base() 117 | profile.DataInfo.Languages = &[]components.LanguageComponent{ 118 | {Name: display.English.Languages().Name(base), Code: base.String()}, 119 | } 120 | } 121 | if data.CorpusData.Keywords.String != "" { 122 | keywords := strings.Split(data.CorpusData.Keywords.String, ",") 123 | profile.DataInfo.Keywords = &keywords 124 | } 125 | metadata.Resources.ResourceProxyList = append( 126 | metadata.Resources.ResourceProxyList, 127 | formats.CMDIResourceProxy{ 128 | ID: fmt.Sprintf("sp_%s", recordID), 129 | ResourceType: formats.CMDIResourceType{MimeType: "text/html", Value: formats.RTSearchPage}, 130 | ResourceRef: getKontextPath(data.Name), 131 | }, 132 | ) 133 | 134 | case ServiceMetadataType: 135 | default: 136 | } 137 | 138 | // insert link if available 139 | if data.Link.String != "" { 140 | link := data.Link.String 141 | // generate path to english version wiki 142 | if strings.Contains(link, "wiki.korpus.cz") { 143 | link = strings.ReplaceAll(link, "/cnk:", "/en:cnk:") 144 | } 145 | metadata.Resources.ResourceProxyList = append( 146 | metadata.Resources.ResourceProxyList, 147 | formats.CMDIResourceProxy{ 148 | ID: fmt.Sprintf("uri_%s", recordID), 149 | ResourceType: formats.CMDIResourceType{MimeType: "text/html", Value: formats.RTResource}, 150 | ResourceRef: link, 151 | }, 152 | ) 153 | } 154 | 155 | record := oaipmh.NewOAIPMHRecord(metadata) 156 | record.Header.Datestamp = data.Date.In(time.UTC) 157 | record.Header.Identifier = recordID 158 | return record 159 | } 160 | -------------------------------------------------------------------------------- /cnchook/cnchook.go: -------------------------------------------------------------------------------- 1 | // Copyright 2024 Martin Zimandl 2 | // Copyright 2024 Institute of the Czech National Corpus, 3 | // Faculty of Arts, Charles University 4 | // 5 | // Licensed under the Apache License, Version 2.0 (the "License"); 6 | // you may not use this file except in compliance with the License. 7 | // You may obtain a copy of the License at 8 | // 9 | // http://www.apache.org/licenses/LICENSE-2.0 10 | // 11 | // Unless required by applicable law or agreed to in writing, software 12 | // distributed under the License is distributed on an "AS IS" BASIS, 13 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | // See the License for the specific language governing permissions and 15 | // limitations under the License. 16 | 17 | package cnchook 18 | 19 | import ( 20 | "fmt" 21 | "net/http" 22 | "time" 23 | 24 | "github.com/czcorpus/cnc-vlo/cncdb" 25 | "github.com/czcorpus/cnc-vlo/cnf" 26 | "github.com/czcorpus/cnc-vlo/oaipmh" 27 | "github.com/czcorpus/cnc-vlo/oaipmh/formats" 28 | "github.com/rs/zerolog/log" 29 | ) 30 | 31 | type CNCHook struct { 32 | conf *cnf.Conf 33 | db *cncdb.CNCMySQLHandler 34 | } 35 | 36 | func (c *CNCHook) Identify() oaipmh.ResultWrapper[oaipmh.OAIPMHIdentify] { 37 | earliestDatestamp, err := c.db.GetFirstDate() 38 | result := oaipmh.NewResultWrapper( 39 | oaipmh.OAIPMHIdentify{ 40 | RepositoryName: c.conf.RepositoryInfo.Name, 41 | BaseURL: c.conf.RepositoryInfo.BaseURL, 42 | AdminEmail: c.conf.RepositoryInfo.AdminEmail, 43 | EarliestDatestamp: earliestDatestamp.In(time.UTC), 44 | DeletedRecord: "no", 45 | Granularity: "YYYY-MM-DDThh:mm:ssZ", 46 | }, 47 | ) 48 | if err != nil { 49 | log.Error().Err(err).Msg("Failed to call Identify") 50 | result.HTTPCode = http.StatusInternalServerError 51 | } 52 | return result 53 | } 54 | 55 | func (c *CNCHook) ListMetadataFormats(req oaipmh.OAIPMHRequest) oaipmh.ResultWrapper[[]oaipmh.OAIPMHMetadataFormat] { 56 | ans := oaipmh.NewResultWrapper( 57 | []oaipmh.OAIPMHMetadataFormat{ 58 | formats.GetDublinCoreFormat(), 59 | formats.GetCMDIFormat(), 60 | }, 61 | ) 62 | if req.Identifier != "" { 63 | exists, err := c.db.IdentifierExists(req.Identifier) 64 | if err != nil { 65 | log.Error().Err(err).Msg("Failed to call ListMetadataFormats") 66 | ans.HTTPCode = http.StatusInternalServerError 67 | return ans 68 | 69 | } else if !exists { 70 | ans.Errors.Add(oaipmh.ErrorCodeIDDoesNotExist, fmt.Sprintf("Result for ID = %s not found", req.Identifier)) 71 | ans.HTTPCode = http.StatusNotFound 72 | return ans 73 | } 74 | } 75 | return ans 76 | } 77 | 78 | func (c *CNCHook) GetRecord(req oaipmh.OAIPMHRequest) oaipmh.ResultWrapper[oaipmh.OAIPMHRecord] { 79 | ans := oaipmh.NewResultWrapper(oaipmh.OAIPMHRecord{}) 80 | data, err := c.db.GetRecordInfo(req.Identifier) 81 | if err != nil { 82 | log.Error().Err(err).Msg("Failed to call GetRecord") 83 | ans.HTTPCode = http.StatusInternalServerError 84 | return ans 85 | 86 | } else if data == nil { 87 | ans.Errors.Add(oaipmh.ErrorCodeIDDoesNotExist, fmt.Sprintf("Result for ID = %s not found", req.Identifier)) 88 | ans.HTTPCode = http.StatusNotFound 89 | return ans 90 | } 91 | 92 | switch req.MetadataPrefix { 93 | case formats.DublinCoreMetadataPrefix: 94 | ans.Data = c.dcRecordFromData(data) 95 | case formats.CMDIMetadataPrefix: 96 | ans.Data = c.cmdiLindatClarinRecordFromData(data) 97 | default: 98 | ans.Errors.Add(oaipmh.ErrorCodeCannotDisseminateFormat, "Unknown metadata format") 99 | ans.HTTPCode = http.StatusBadRequest 100 | } 101 | return ans 102 | } 103 | 104 | // same as ListRecords but returns only RecordHeaders 105 | func (c *CNCHook) ListIdentifiers(req oaipmh.OAIPMHRequest) oaipmh.ResultWrapper[[]oaipmh.OAIPMHRecordHeader] { 106 | ans := oaipmh.NewResultWrapper([]oaipmh.OAIPMHRecordHeader{}) 107 | data, err := c.db.ListRecordInfo(req.From, req.Until) 108 | if err != nil { 109 | log.Error().Err(err).Msg("Failed to call ListIdentifiers") 110 | ans.HTTPCode = http.StatusInternalServerError 111 | return ans 112 | } 113 | if len(data) == 0 { 114 | ans.Errors.Add(oaipmh.ErrorCodeNoRecordsMatch, "No records") 115 | return ans 116 | } 117 | switch req.MetadataPrefix { 118 | case formats.DublinCoreMetadataPrefix: 119 | for _, d := range data { 120 | ans.Data = append(ans.Data, *c.dcRecordFromData(&d).Header) 121 | } 122 | case formats.CMDIMetadataPrefix: 123 | for _, d := range data { 124 | ans.Data = append(ans.Data, *c.cmdiLindatClarinRecordFromData(&d).Header) 125 | } 126 | default: 127 | ans.Errors.Add(oaipmh.ErrorCodeCannotDisseminateFormat, "Unknown metadata format") 128 | ans.HTTPCode = http.StatusBadRequest 129 | } 130 | return ans 131 | } 132 | 133 | func (c *CNCHook) ListRecords(req oaipmh.OAIPMHRequest) oaipmh.ResultWrapper[[]oaipmh.OAIPMHRecord] { 134 | ans := oaipmh.NewResultWrapper([]oaipmh.OAIPMHRecord{}) 135 | data, err := c.db.ListRecordInfo(req.From, req.Until) 136 | if err != nil { 137 | log.Error().Err(err).Msg("Failed to call ListRecords") 138 | ans.HTTPCode = http.StatusInternalServerError 139 | return ans 140 | } 141 | if len(data) == 0 { 142 | ans.Errors.Add(oaipmh.ErrorCodeNoRecordsMatch, "No records") 143 | return ans 144 | } 145 | switch req.MetadataPrefix { 146 | case formats.DublinCoreMetadataPrefix: 147 | for _, d := range data { 148 | ans.Data = append(ans.Data, c.dcRecordFromData(&d)) 149 | } 150 | case formats.CMDIMetadataPrefix: 151 | for _, d := range data { 152 | ans.Data = append(ans.Data, c.cmdiLindatClarinRecordFromData(&d)) 153 | } 154 | default: 155 | ans.Errors.Add(oaipmh.ErrorCodeCannotDisseminateFormat, "Unknown metadata format") 156 | ans.HTTPCode = http.StatusBadRequest 157 | } 158 | return ans 159 | } 160 | 161 | func (c *CNCHook) ListSets(req oaipmh.OAIPMHRequest) oaipmh.ResultWrapper[[]oaipmh.OAIPMHSet] { 162 | return oaipmh.NewResultWrapper([]oaipmh.OAIPMHSet{}) 163 | } 164 | 165 | func (c *CNCHook) SupportsSets() bool { 166 | return false 167 | } 168 | 169 | func (c *CNCHook) SupportedMetadataPrefixes() []string { 170 | return []string{ 171 | formats.DublinCoreMetadataPrefix, 172 | formats.CMDIMetadataPrefix, 173 | } 174 | } 175 | 176 | func NewCNCHook(conf *cnf.Conf, db *cncdb.CNCMySQLHandler) *CNCHook { 177 | return &CNCHook{ 178 | conf: conf, 179 | db: db, 180 | } 181 | } 182 | -------------------------------------------------------------------------------- /oaipmh/handler.go: -------------------------------------------------------------------------------- 1 | // Copyright 2024 Martin Zimandl 2 | // Copyright 2024 Institute of the Czech National Corpus, 3 | // Faculty of Arts, Charles University 4 | // 5 | // Licensed under the Apache License, Version 2.0 (the "License"); 6 | // you may not use this file except in compliance with the License. 7 | // You may obtain a copy of the License at 8 | // 9 | // http://www.apache.org/licenses/LICENSE-2.0 10 | // 11 | // Unless required by applicable law or agreed to in writing, software 12 | // distributed under the License is distributed on an "AS IS" BASIS, 13 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | // See the License for the specific language governing permissions and 15 | // limitations under the License. 16 | 17 | package oaipmh 18 | 19 | import ( 20 | "fmt" 21 | "net/http" 22 | "net/url" 23 | "strings" 24 | "time" 25 | 26 | "github.com/czcorpus/cnc-gokit/collections" 27 | "github.com/czcorpus/cnc-gokit/logging" 28 | "github.com/gin-gonic/gin" 29 | "github.com/rs/zerolog/log" 30 | ) 31 | 32 | type ResultWrapper[T any] struct { 33 | Data T 34 | Errors OAIPMHErrors 35 | HTTPCode int 36 | } 37 | 38 | func (w *ResultWrapper[any]) NoError() bool { 39 | return !w.Errors.HasErrors() && w.HTTPCode < 400 40 | } 41 | 42 | func NewResultWrapper[T any](data T) ResultWrapper[T] { 43 | return ResultWrapper[T]{ 44 | Data: data, 45 | HTTPCode: http.StatusOK, 46 | } 47 | } 48 | 49 | type VLOHook interface { 50 | Identify() ResultWrapper[OAIPMHIdentify] 51 | GetRecord(req OAIPMHRequest) ResultWrapper[OAIPMHRecord] 52 | ListIdentifiers(req OAIPMHRequest) ResultWrapper[[]OAIPMHRecordHeader] 53 | ListMetadataFormats(req OAIPMHRequest) ResultWrapper[[]OAIPMHMetadataFormat] 54 | ListRecords(req OAIPMHRequest) ResultWrapper[[]OAIPMHRecord] 55 | ListSets(req OAIPMHRequest) ResultWrapper[[]OAIPMHSet] 56 | 57 | SupportsSets() bool 58 | SupportedMetadataPrefixes() []string 59 | } 60 | 61 | type VLOHandler struct { 62 | basePath string 63 | hook VLOHook 64 | } 65 | 66 | func (a *VLOHandler) getReqResp(argSource url.Values) (*OAIPMHRequest, *OAIPMHResponse, error) { 67 | OAIURL, err := url.JoinPath(a.basePath, "oai") 68 | if err != nil { 69 | return nil, nil, fmt.Errorf("failed to prepare OAIPMH request and response: %w", err) 70 | } 71 | req := &OAIPMHRequest{URL: OAIURL} 72 | resp := NewOAIPMHResponse(req) 73 | 74 | // get verb operation 75 | if !argSource.Has(ArgVerb) { 76 | resp.Errors.Add(ErrorCodeBadArgument, fmt.Sprintf("Missing required argument `%s`", ArgVerb)) 77 | return req, resp, nil 78 | } 79 | req.Verb = getTypedArg[Verb](argSource, ArgVerb) 80 | if err := req.Verb.Validate(); err != nil { 81 | resp.Errors.Add(ErrorCodeBadVerb, fmt.Sprintf("Invalid verb `%s`", req.Verb)) 82 | return req, resp, nil 83 | } 84 | 85 | // check required arguments 86 | if arg := req.Verb.ValidateRequiredArgs(argSource); arg != "" { 87 | resp.Errors.Add(ErrorCodeBadArgument, fmt.Sprintf("Missing required argument `%s` for verb `%s`", arg, req.Verb)) 88 | return req, resp, nil 89 | } 90 | // check allowed arguments 91 | for k := range argSource { 92 | if !req.Verb.ValidateArg(k) { 93 | resp.Errors.Add(ErrorCodeBadArgument, fmt.Sprintf("Invalid argument `%s` for verb `%s`", k, req.Verb)) 94 | return req, resp, nil 95 | } 96 | } 97 | 98 | req.Identifier = getTypedArg[string](argSource, ArgIdentifier) 99 | req.MetadataPrefix = getTypedArg[string](argSource, ArgMetadataPrefix) 100 | if from := getTypedArg[string](argSource, ArgFrom); from != "" { 101 | var parsed time.Time 102 | if strings.Contains(from, "T") { 103 | parsed, err = time.Parse(time.RFC3339, from) 104 | } else { 105 | parsed, err = time.Parse(time.DateOnly, from) 106 | } 107 | if err != nil { 108 | return nil, nil, fmt.Errorf("failed to parse `from`: %w", err) 109 | } 110 | parsed = parsed.In(time.UTC) 111 | req.From = &parsed 112 | } 113 | if until := getTypedArg[string](argSource, ArgUntil); until != "" { 114 | var parsed time.Time 115 | if strings.Contains(until, "T") { 116 | parsed, err = time.Parse(time.RFC3339, until) 117 | } else { 118 | parsed, err = time.Parse(time.DateOnly, until) 119 | parsed = parsed.Add(24 * time.Hour) 120 | } 121 | if err != nil { 122 | return nil, nil, fmt.Errorf("failed to until `from`: %w", err) 123 | } 124 | parsed = parsed.In(time.UTC) 125 | req.Until = &parsed 126 | } 127 | req.Set = getTypedArg[string](argSource, ArgSet) 128 | req.ResumptionToken = getTypedArg[string](argSource, ArgResumptionToken) 129 | return req, resp, nil 130 | } 131 | 132 | func (a *VLOHandler) handleRequest(ctx *gin.Context, req *OAIPMHRequest, resp *OAIPMHResponse) { 133 | var errors OAIPMHErrors 134 | httpCode := http.StatusOK 135 | switch req.Verb { 136 | case VerbIdentify: 137 | ans := a.hook.Identify() 138 | errors, httpCode = ans.Errors, ans.HTTPCode 139 | if ans.NoError() { 140 | resp.Identify = &ans.Data 141 | resp.Identify.BaseURL = req.URL 142 | resp.Identify.ProtocolVersion = resp.ProtocolVersion 143 | } 144 | 145 | case VerbGetRecord: 146 | if !collections.SliceContains(a.hook.SupportedMetadataPrefixes(), req.MetadataPrefix) { 147 | resp.Errors.Add(ErrorCodeCannotDisseminateFormat, "Unknown metadata format") 148 | writeXMLResponse(ctx.Writer, http.StatusBadRequest, resp) 149 | return 150 | } 151 | ans := a.hook.GetRecord(*req) 152 | errors, httpCode = ans.Errors, ans.HTTPCode 153 | if ans.NoError() { 154 | resp.GetRecord = &ans.Data 155 | } 156 | 157 | case VerbListIdentifiers: 158 | if !collections.SliceContains(a.hook.SupportedMetadataPrefixes(), req.MetadataPrefix) { 159 | resp.Errors.Add(ErrorCodeCannotDisseminateFormat, "Unknown metadata format") 160 | writeXMLResponse(ctx.Writer, http.StatusBadRequest, resp) 161 | return 162 | } 163 | if req.Set != "" && !a.hook.SupportsSets() { 164 | resp.Errors.Add(ErrorCodeNoSetHierarchy, "Sets functionality not implemented") 165 | writeXMLResponse(ctx.Writer, http.StatusNotImplemented, resp) 166 | return 167 | } 168 | ans := a.hook.ListIdentifiers(*req) 169 | errors, httpCode = ans.Errors, ans.HTTPCode 170 | if ans.NoError() { 171 | resp.ListIdentifiers = &ans.Data 172 | } 173 | 174 | case VerbListMetadataFormats: 175 | ans := a.hook.ListMetadataFormats(*req) 176 | errors, httpCode = ans.Errors, ans.HTTPCode 177 | if ans.NoError() { 178 | resp.ListMetadataFormats = &ans.Data 179 | } 180 | 181 | case VerbListRecords: 182 | if !collections.SliceContains(a.hook.SupportedMetadataPrefixes(), req.MetadataPrefix) { 183 | resp.Errors.Add(ErrorCodeCannotDisseminateFormat, "Unknown metadata format") 184 | writeXMLResponse(ctx.Writer, http.StatusBadRequest, resp) 185 | return 186 | } 187 | if req.Set != "" && !a.hook.SupportsSets() { 188 | resp.Errors.Add(ErrorCodeNoSetHierarchy, "Sets functionality not implemented") 189 | writeXMLResponse(ctx.Writer, http.StatusNotImplemented, resp) 190 | return 191 | } 192 | ans := a.hook.ListRecords(*req) 193 | errors, httpCode = ans.Errors, ans.HTTPCode 194 | if ans.NoError() { 195 | resp.ListRecords = &ans.Data 196 | } 197 | 198 | case VerbListSets: 199 | if !a.hook.SupportsSets() { 200 | resp.Errors.Add(ErrorCodeNoSetHierarchy, "Sets functionality not implemented") 201 | writeXMLResponse(ctx.Writer, http.StatusNotImplemented, resp) 202 | return 203 | } 204 | ans := a.hook.ListSets(*req) 205 | errors, httpCode = ans.Errors, ans.HTTPCode 206 | if ans.NoError() { 207 | resp.ListSets = &ans.Data 208 | } 209 | 210 | default: 211 | resp.Errors.Add(ErrorCodeBadArgument, fmt.Sprintf("Verb not implemented `%s`", req.Verb)) 212 | httpCode = http.StatusNotImplemented 213 | } 214 | 215 | resp.Errors = append(resp.Errors, errors...) 216 | if httpCode >= 400 && !resp.Errors.HasErrors() { 217 | ctx.AbortWithStatus(httpCode) 218 | return 219 | } 220 | writeXMLResponse(ctx.Writer, httpCode, resp) 221 | } 222 | 223 | func (a *VLOHandler) HandleOAIGet(ctx *gin.Context) { 224 | req, resp, err := a.getReqResp(ctx.Request.URL.Query()) 225 | if err != nil { 226 | log.Error().Err(err).Msg("Failed to handle OAIPMH Get request") 227 | ctx.AbortWithStatus(http.StatusInternalServerError) 228 | return 229 | } 230 | logging.AddLogEvent(ctx, "operation", req.Verb) 231 | if resp.Errors.HasErrors() { 232 | writeXMLResponse(ctx.Writer, http.StatusBadRequest, resp) 233 | return 234 | } 235 | a.handleRequest(ctx, req, resp) 236 | } 237 | 238 | func (a *VLOHandler) HandleOAIPost(ctx *gin.Context) { 239 | if err := ctx.Request.ParseForm(); err != nil { 240 | log.Error().Err(err).Msg("Failed to handle OAIPMH Post request") 241 | ctx.AbortWithStatus(http.StatusInternalServerError) 242 | return 243 | } 244 | req, resp, err := a.getReqResp(ctx.Request.PostForm) 245 | if err != nil { 246 | log.Error().Err(err).Msg("Failed to handle OAIPMH Post request") 247 | ctx.AbortWithStatus(http.StatusInternalServerError) 248 | return 249 | } 250 | logging.AddLogEvent(ctx, "operation", req.Verb) 251 | if resp.Errors.HasErrors() { 252 | writeXMLResponse(ctx.Writer, http.StatusBadRequest, resp) 253 | return 254 | } 255 | a.handleRequest(ctx, req, resp) 256 | } 257 | 258 | func (a *VLOHandler) HandleSelfLink(ctx *gin.Context) { 259 | req := OAIPMHRequest{ 260 | URL: ctx.Request.Host + ctx.Request.URL.Path, 261 | Identifier: ctx.Param("recordId"), 262 | MetadataPrefix: ctx.DefaultQuery("format", "oai_dc"), 263 | } 264 | 265 | ans := a.hook.GetRecord(req) 266 | if ans.HTTPCode >= 400 { 267 | ctx.AbortWithStatus(ans.HTTPCode) 268 | } else { 269 | writeXMLResponse(ctx.Writer, ans.HTTPCode, ans.Data.Metadata.Value) 270 | } 271 | } 272 | 273 | func NewVLOHandler(basePath string, hook VLOHook) *VLOHandler { 274 | return &VLOHandler{ 275 | basePath: basePath, 276 | hook: hook, 277 | } 278 | } 279 | -------------------------------------------------------------------------------- /go.sum: -------------------------------------------------------------------------------- 1 | filippo.io/edwards25519 v1.1.0 h1:FNf4tywRC1HmFuKW5xopWpigGjJKiJSV0Cqo0cJWDaA= 2 | filippo.io/edwards25519 v1.1.0/go.mod h1:BxyFTGdWcka3PhytdK4V28tE5sGfRvvvRV7EaN4VDT4= 3 | github.com/BurntSushi/toml v1.4.0 h1:kuoIxZQy2WRRk1pttg9asf+WVv6tWQuBNVmK8+nqPr0= 4 | github.com/BurntSushi/toml v1.4.0/go.mod h1:ukJfTF/6rtPPRCnwkur4qwRxa8vTRFBF0uk2lLoLwho= 5 | github.com/bytedance/sonic v1.5.0/go.mod h1:ED5hyg4y6t3/9Ku1R6dU/4KyJ48DZ4jPhfY1O2AihPM= 6 | github.com/bytedance/sonic v1.9.1 h1:6iJ6NqdoxCDr6mbY8h18oSO+cShGSMRGCEo7F2h0x8s= 7 | github.com/bytedance/sonic v1.9.1/go.mod h1:i736AoUSYt75HyZLoJW9ERYxcy6eaN6h4BZXU064P/U= 8 | github.com/chenzhuoyu/base64x v0.0.0-20211019084208-fb5309c8db06/go.mod h1:DH46F32mSOjUmXrMHnKwZdA8wcEefY7UVqBKYGjpdQY= 9 | github.com/chenzhuoyu/base64x v0.0.0-20221115062448-fe3a3abad311 h1:qSGYFH7+jGhDF8vLC+iwCD4WpbV1EBDSzWkJODFLams= 10 | github.com/chenzhuoyu/base64x v0.0.0-20221115062448-fe3a3abad311/go.mod h1:b583jCggY9gE99b6G5LEC39OIiVsWj+R97kbl5odCEk= 11 | github.com/coreos/go-systemd/v22 v22.5.0/go.mod h1:Y58oyj3AT4RCenI/lSvhwexgC+NSVTIJ3seZv2GcEnc= 12 | github.com/czcorpus/cnc-gokit v0.11.0 h1:0DSWVAMu6TyBLxeBfTRB/yezoFKQPy1zW8yqUJmcBzg= 13 | github.com/czcorpus/cnc-gokit v0.11.0/go.mod h1:BZSRrYUFIHXVIiuqnSoZbfXfL2X/gHWG3w35aIVW36U= 14 | github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= 15 | github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= 16 | github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= 17 | github.com/gabriel-vasile/mimetype v1.4.2 h1:w5qFW6JKBz9Y393Y4q372O9A7cUSequkh1Q7OhCmWKU= 18 | github.com/gabriel-vasile/mimetype v1.4.2/go.mod h1:zApsH/mKG4w07erKIaJPFiX0Tsq9BFQgN3qGY5GnNgA= 19 | github.com/gin-contrib/sse v0.1.0 h1:Y/yl/+YNO8GZSjAhjMsSuLt29uWRFHdHYUb5lYOV9qE= 20 | github.com/gin-contrib/sse v0.1.0/go.mod h1:RHrZQHXnP2xjPF+u1gW/2HnVO7nvIa9PG3Gm+fLHvGI= 21 | github.com/gin-gonic/gin v1.9.1 h1:4idEAncQnU5cB7BeOkPtxjfCSye0AAm1R0RVIqJ+Jmg= 22 | github.com/gin-gonic/gin v1.9.1/go.mod h1:hPrL7YrpYKXt5YId3A/Tnip5kqbEAP+KLuI3SUcPTeU= 23 | github.com/go-playground/assert/v2 v2.2.0 h1:JvknZsQTYeFEAhQwI4qEt9cyV5ONwRHC+lYKSsYSR8s= 24 | github.com/go-playground/assert/v2 v2.2.0/go.mod h1:VDjEfimB/XKnb+ZQfWdccd7VUvScMdVu0Titje2rxJ4= 25 | github.com/go-playground/locales v0.14.1 h1:EWaQ/wswjilfKLTECiXz7Rh+3BjFhfDFKv/oXslEjJA= 26 | github.com/go-playground/locales v0.14.1/go.mod h1:hxrqLVvrK65+Rwrd5Fc6F2O76J/NuW9t0sjnWqG1slY= 27 | github.com/go-playground/universal-translator v0.18.1 h1:Bcnm0ZwsGyWbCzImXv+pAJnYK9S473LQFuzCbDbfSFY= 28 | github.com/go-playground/universal-translator v0.18.1/go.mod h1:xekY+UJKNuX9WP91TpwSH2VMlDf28Uj24BCp08ZFTUY= 29 | github.com/go-playground/validator/v10 v10.14.1 h1:9c50NUPC30zyuKprjL3vNZ0m5oG+jU0zvx4AqHGnv4k= 30 | github.com/go-playground/validator/v10 v10.14.1/go.mod h1:9iXMNT7sEkjXb0I+enO7QXmzG6QCsPWY4zveKFVRSyU= 31 | github.com/go-sql-driver/mysql v1.8.0 h1:UtktXaU2Nb64z/pLiGIxY4431SJ4/dR5cjMmlVHgnT4= 32 | github.com/go-sql-driver/mysql v1.8.0/go.mod h1:wEBSXgmK//2ZFJyE+qWnIsVGmvmEKlqwuVSjsCm7DZg= 33 | github.com/goccy/go-json v0.10.2 h1:CrxCmQqYDkv1z7lO7Wbh2HN93uovUHgrECaO5ZrCXAU= 34 | github.com/goccy/go-json v0.10.2/go.mod h1:6MelG93GURQebXPDq3khkgXZkazVtN9CRI+MGFi0w8I= 35 | github.com/godbus/dbus/v5 v5.0.4/go.mod h1:xhWf0FNVPg57R7Z0UbKHbJfkEywrmjJnf7w5xrFpKfA= 36 | github.com/google/go-cmp v0.5.8 h1:e6P7q2lk1O+qJJb4BtCQXlK8vWEO8V1ZeuEdJNOqZyg= 37 | github.com/google/go-cmp v0.5.8/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY= 38 | github.com/google/gofuzz v1.0.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg= 39 | github.com/json-iterator/go v1.1.12 h1:PV8peI4a0ysnczrg+LtxykD8LfKY9ML6u2jnxaEnrnM= 40 | github.com/json-iterator/go v1.1.12/go.mod h1:e30LSqwooZae/UwlEbR2852Gd8hjQvJoHmT4TnhNGBo= 41 | github.com/klauspost/cpuid/v2 v2.0.9/go.mod h1:FInQzS24/EEf25PyTYn52gqo7WaD8xa0213Md/qVLRg= 42 | github.com/klauspost/cpuid/v2 v2.2.5 h1:0E5MSMDEoAulmXNFquVs//DdoomxaoTY1kUhbc/qbZg= 43 | github.com/klauspost/cpuid/v2 v2.2.5/go.mod h1:Lcz8mBdAVJIBVzewtcLocK12l3Y+JytZYpaMropDUws= 44 | github.com/leodido/go-urn v1.2.4 h1:XlAE/cm/ms7TE/VMVoduSpNBoyc2dOxHs5MZSwAN63Q= 45 | github.com/leodido/go-urn v1.2.4/go.mod h1:7ZrI8mTSeBSHl/UaRyKQW1qZeMgak41ANeCNaVckg+4= 46 | github.com/mattn/go-colorable v0.1.13 h1:fFA4WZxdEF4tXPZVKMLwD8oUnCTTo08duU7wxecdEvA= 47 | github.com/mattn/go-colorable v0.1.13/go.mod h1:7S9/ev0klgBDR4GtXTXX8a3vIGJpMovkB8vQcUbaXHg= 48 | github.com/mattn/go-isatty v0.0.16/go.mod h1:kYGgaQfpe5nmfYZH+SKPsOc2e4SrIfOl2e/yFXSvRLM= 49 | github.com/mattn/go-isatty v0.0.19 h1:JITubQf0MOLdlGRuRq+jtsDlekdYPia9ZFsB8h/APPA= 50 | github.com/mattn/go-isatty v0.0.19/go.mod h1:W+V8PltTTMOvKvAeJH7IuucS94S2C6jfK/D7dTCTo3Y= 51 | github.com/modern-go/concurrent v0.0.0-20180228061459-e0a39a4cb421/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q= 52 | github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd h1:TRLaZ9cD/w8PVh93nsPXa1VrQ6jlwL5oN8l14QlcNfg= 53 | github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q= 54 | github.com/modern-go/reflect2 v1.0.2 h1:xBagoLtFs94CBntxluKeaWgTMpvLxC4ur3nMaC9Gz0M= 55 | github.com/modern-go/reflect2 v1.0.2/go.mod h1:yWuevngMOJpCy52FWWMvUC8ws7m/LJsjYzDa0/r8luk= 56 | github.com/natefinch/lumberjack v2.0.0+incompatible h1:4QJd3OLAMgj7ph+yZTuX13Ld4UpgHp07nNdFX7mqFfM= 57 | github.com/natefinch/lumberjack v2.0.0+incompatible/go.mod h1:Wi9p2TTF5DG5oU+6YfsmYQpsTIOm0B1VNzQg9Mw6nPk= 58 | github.com/pelletier/go-toml/v2 v2.0.8 h1:0ctb6s9mE31h0/lhu+J6OPmVeDxJn+kYnJc2jZR9tGQ= 59 | github.com/pelletier/go-toml/v2 v2.0.8/go.mod h1:vuYfssBdrU2XDZ9bYydBu6t+6a6PYNcZljzZR9VXg+4= 60 | github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= 61 | github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= 62 | github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= 63 | github.com/rs/xid v1.5.0/go.mod h1:trrq9SKmegXys3aeAKXMUTdJsYXVwGY3RLcfgqegfbg= 64 | github.com/rs/zerolog v1.31.0 h1:FcTR3NnLWW+NnTwwhFWiJSZr4ECLpqCm6QsEnyvbV4A= 65 | github.com/rs/zerolog v1.31.0/go.mod h1:/7mN4D5sKwJLZQ2b/znpjC3/GQWY/xaDXUM0kKWRHss= 66 | github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= 67 | github.com/stretchr/objx v0.4.0/go.mod h1:YvHI0jy2hoMjB+UWwv71VJQ9isScKT/TqJzVSSt89Yw= 68 | github.com/stretchr/objx v0.5.0/go.mod h1:Yh+to48EsGEfYuaHDzXPcE3xhTkx73EhmCGUpEOglKo= 69 | github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI= 70 | github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= 71 | github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= 72 | github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO+kdMU+MU= 73 | github.com/stretchr/testify v1.8.1/go.mod h1:w2LPCIKwWwSfY2zedu0+kehJoqGctiVI29o6fzry7u4= 74 | github.com/stretchr/testify v1.8.2/go.mod h1:w2LPCIKwWwSfY2zedu0+kehJoqGctiVI29o6fzry7u4= 75 | github.com/stretchr/testify v1.8.3/go.mod h1:sz/lmYIOXD/1dqDmKjjqLyZ2RngseejIcXlSw2iwfAo= 76 | github.com/stretchr/testify v1.9.0 h1:HtqpIVDClZ4nwg75+f6Lvsy/wHu+3BoSGCbBAcpTsTg= 77 | github.com/stretchr/testify v1.9.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY= 78 | github.com/twitchyliquid64/golang-asm v0.15.1 h1:SU5vSMR7hnwNxj24w34ZyCi/FmDZTkS4MhqMhdFk5YI= 79 | github.com/twitchyliquid64/golang-asm v0.15.1/go.mod h1:a1lVb/DtPvCB8fslRZhAngC2+aY1QWCk3Cedj/Gdt08= 80 | github.com/ugorji/go/codec v1.2.11 h1:BMaWp1Bb6fHwEtbplGBGJ498wD+LKlNSl25MjdZY4dU= 81 | github.com/ugorji/go/codec v1.2.11/go.mod h1:UNopzCgEMSXjBc6AOMqYvWC1ktqTAfzJZUZgYf6w6lg= 82 | golang.org/x/arch v0.0.0-20210923205945-b76863e36670/go.mod h1:5om86z9Hs0C8fWVUuoMHwpExlXzs5Tkyp9hOrfG7pp8= 83 | golang.org/x/arch v0.3.0 h1:02VY4/ZcO/gBOH6PUaoiptASxtXU10jazRCP865E97k= 84 | golang.org/x/arch v0.3.0/go.mod h1:5om86z9Hs0C8fWVUuoMHwpExlXzs5Tkyp9hOrfG7pp8= 85 | golang.org/x/crypto v0.21.0 h1:X31++rzVUdKhX5sWmSOFZxx8UW/ldWx55cbf08iNAMA= 86 | golang.org/x/crypto v0.21.0/go.mod h1:0BP7YvVV9gBbVKyeTG0Gyn+gZm94bibOW5BjDEYAOMs= 87 | golang.org/x/net v0.23.0 h1:7EYJ93RZ9vYSZAIb2x3lnuvqO5zneoD6IvWjuhfxjTs= 88 | golang.org/x/net v0.23.0/go.mod h1:JKghWKKOSdJwpW2GEx0Ja7fmaKnMsbu+MWVZTokSYmg= 89 | golang.org/x/sys v0.0.0-20220811171246-fbc7d0a398ab/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= 90 | golang.org/x/sys v0.5.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= 91 | golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= 92 | golang.org/x/sys v0.12.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= 93 | golang.org/x/sys v0.18.0 h1:DBdB3niSjOA/O0blCZBqDefyWNYveAYMNF1Wum0DYQ4= 94 | golang.org/x/sys v0.18.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= 95 | golang.org/x/text v0.14.0 h1:ScX5w1eTa3QqT8oi6+ziP7dTV1S2+ALU0bI+0zXKWiQ= 96 | golang.org/x/text v0.14.0/go.mod h1:18ZOQIKpY8NJVqYksKHtTdi31H5itFRjB5/qKTNYzSU= 97 | google.golang.org/protobuf v1.33.0 h1:uNO2rsAINq/JlFpSdYEKIZ0uKD/R9cpdv0T+yoGwGmI= 98 | google.golang.org/protobuf v1.33.0/go.mod h1:c6P6GXX6sHbq/GpV6MGZEdwhWPcYBgnhAHhKbcUYpos= 99 | gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM= 100 | gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= 101 | gopkg.in/natefinch/lumberjack.v2 v2.2.1 h1:bBRl1b0OH9s/DuPhuXpNl+VtCaJXFZ5/uEFST95x9zc= 102 | gopkg.in/natefinch/lumberjack.v2 v2.2.1/go.mod h1:YD8tP3GAjkrDg1eZH7EGmyESg/lsYskCTPBJVb9jqSc= 103 | gopkg.in/yaml.v2 v2.4.0 h1:D8xgwECY7CYvx+Y2n4sBz93Jn9JRvxdiyyo8CTfuKaY= 104 | gopkg.in/yaml.v2 v2.4.0/go.mod h1:RDklbk79AGWmwhnvt/jBztapEOGDOx6ZbXqjP6csGnQ= 105 | gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= 106 | gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= 107 | gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= 108 | rsc.io/pdf v0.1.1/go.mod h1:n8OzWcQ6Sp37PL01nO98y4iUCRdTGarVfzxY20ICaU4= 109 | -------------------------------------------------------------------------------- /cncdb/cncdb.go: -------------------------------------------------------------------------------- 1 | // Copyright 2024 Martin Zimandl 2 | // Copyright 2024 Tomas Machalek 3 | // Copyright 2024 Institute of the Czech National Corpus, 4 | // Faculty of Arts, Charles University 5 | // 6 | // Licensed under the Apache License, Version 2.0 (the "License"); 7 | // you may not use this file except in compliance with the License. 8 | // You may obtain a copy of the License at 9 | // 10 | // http://www.apache.org/licenses/LICENSE-2.0 11 | // 12 | // Unless required by applicable law or agreed to in writing, software 13 | // distributed under the License is distributed on an "AS IS" BASIS, 14 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | // See the License for the specific language governing permissions and 16 | // limitations under the License. 17 | 18 | package cncdb 19 | 20 | import ( 21 | "database/sql" 22 | "fmt" 23 | "strings" 24 | "time" 25 | 26 | "github.com/go-sql-driver/mysql" 27 | "github.com/rs/zerolog/log" 28 | "golang.org/x/text/language" 29 | ) 30 | 31 | // DBOverrides handles differences between KonText default 32 | // database schema and the CNC-one which is slightly different 33 | type DBOverrides struct { 34 | CorporaTableName string `json:"corporaTableName"` 35 | UserTableName string `json:"userTableName"` 36 | UserTableFirstNameCol string `json:"userTableFirstNameCol"` 37 | UserTableLastNameCol string `json:"userTableLastNameCol"` 38 | } 39 | 40 | type CNCMySQLHandler struct { 41 | conn *sql.DB 42 | overrides DBOverrides 43 | publicCorplistID int 44 | } 45 | 46 | type DBData struct { 47 | ID int 48 | Date time.Time 49 | Hosted bool 50 | Type string 51 | Name string 52 | DescEN sql.NullString 53 | DescCS sql.NullString 54 | DateIssued string 55 | TitleEN string 56 | TitleCS string 57 | Link sql.NullString 58 | License string 59 | Authors string 60 | ContactPerson ContactPersonData 61 | CorpusData CorpusData 62 | } 63 | 64 | type ContactPersonData struct { 65 | Firstname string 66 | Lastname string 67 | Email string 68 | Affiliation sql.NullString 69 | } 70 | 71 | type CorpusData struct { 72 | Size sql.NullInt64 73 | Locale *language.Tag 74 | Keywords sql.NullString 75 | } 76 | 77 | func (c *CNCMySQLHandler) GetFirstDate() (time.Time, error) { 78 | var date time.Time 79 | row := c.conn.QueryRow("SELECT MIN(created) FROM vlo_metadata_common") 80 | err := row.Scan(&date) 81 | return date, err 82 | } 83 | 84 | func (c *CNCMySQLHandler) IdentifierExists(identifier string) (bool, error) { 85 | var id int 86 | row := c.conn.QueryRow( 87 | fmt.Sprintf( 88 | "SELECT m.id FROM vlo_metadata_common AS m "+ 89 | "LEFT JOIN vlo_metadata_corpus AS mc ON m.corpus_metadata_id = mc.id "+ 90 | "LEFT JOIN %s AS c ON m.corpus_name = c.name "+ 91 | "LEFT JOIN corplist_corpus AS cc ON c.id = cc.corpus_id "+ 92 | "WHERE m.id = ? AND m.deleted = FALSE "+ 93 | "AND ((m.type = 'corpus' AND cc.corplist_id = ?) OR m.type != 'corpus')", 94 | c.overrides.CorporaTableName, 95 | ), 96 | identifier, c.publicCorplistID, 97 | ) 98 | err := row.Scan(&id) 99 | if err != nil { 100 | if err == sql.ErrNoRows { 101 | return false, nil 102 | } 103 | return false, fmt.Errorf("failed to check identifier existence record info: %w", err) 104 | } 105 | return true, nil 106 | } 107 | 108 | func (c *CNCMySQLHandler) parseLocale(loc string) (ans language.Tag, err error) { 109 | tmp := strings.Split(loc, ".") 110 | base := tmp[0] 111 | ans, err = language.Parse(base) 112 | if err != nil { 113 | log.Error(). 114 | Err(err). 115 | Str("value", loc). 116 | Msg("Failed to parse database language record. Trying partial parsing.") 117 | tmp := strings.Split(loc, "_") 118 | if len(tmp) == 0 { 119 | tmp = strings.Split(loc, "-") 120 | } 121 | if len(tmp) != 2 { 122 | err = fmt.Errorf("unable to parse locale %s", loc) 123 | return 124 | } 125 | ans, err = language.Parse(tmp[0]) 126 | return 127 | } 128 | return 129 | } 130 | 131 | func (c *CNCMySQLHandler) GetRecordInfo(identifier string) (*DBData, error) { 132 | var data DBData 133 | var locale sql.NullString 134 | 135 | row := c.conn.QueryRow( 136 | fmt.Sprintf( 137 | "SELECT "+ 138 | "m.id, "+ 139 | "GREATEST(m.created, m.updated), "+ 140 | "m.hosted, "+ 141 | "m.type, "+ 142 | "m.desc_en, "+ 143 | "m.desc_cs, "+ 144 | "m.date_issued, "+ 145 | "m.license_info, "+ 146 | "m.authors, "+ 147 | "u.%s, "+ 148 | "u.%s, "+ 149 | "u.email, "+ 150 | "u.affiliation, "+ 151 | "COALESCE(c.name, ms.name), "+ 152 | "COALESCE(rc.name, c.name, ms.name), "+ 153 | "COALESCE(rc.name, c.name, ms.name), "+ 154 | "COALESCE(c.web, ms.link), "+ 155 | "c.size, c.locale, GROUP_CONCAT(k.label_en ORDER BY k.display_order SEPARATOR ',') "+ 156 | "FROM vlo_metadata_common AS m "+ 157 | "LEFT JOIN vlo_metadata_corpus AS mc ON m.corpus_metadata_id = mc.id "+ 158 | "LEFT JOIN vlo_metadata_service AS ms ON m.service_metadata_id = ms.id "+ 159 | "LEFT JOIN %s AS c ON mc.corpus_name = c.name "+ 160 | "LEFT JOIN kontext_keyword_corpus AS kc ON kc.corpus_name = c.name "+ 161 | "LEFT JOIN kontext_keyword AS k ON kc.keyword_id = k.id "+ 162 | "LEFT JOIN corplist_corpus AS cc ON c.id = cc.corpus_id "+ 163 | "LEFT JOIN corplist_parallel_corpus AS cpc ON cpc.parallel_corpus_id = c.parallel_corpus_id "+ 164 | "LEFT JOIN registry_conf AS rc ON mc.corpus_name = rc.corpus_name "+ 165 | "JOIN %s AS u ON m.contact_user_id = u.id "+ 166 | "WHERE m.id = ? AND m.deleted = FALSE "+ 167 | "AND ((m.type = 'corpus' AND cc.corplist_id = ?) OR (cpc.corplist_id = ?) OR m.type != 'corpus') "+ 168 | "GROUP BY kc.corpus_name ", 169 | c.overrides.UserTableFirstNameCol, c.overrides.UserTableLastNameCol, 170 | c.overrides.CorporaTableName, c.overrides.UserTableName, 171 | ), identifier, c.publicCorplistID, c.publicCorplistID, 172 | ) 173 | err := row.Scan( 174 | &data.ID, &data.Date, &data.Hosted, &data.Type, &data.DescEN, &data.DescCS, &data.DateIssued, &data.License, &data.Authors, 175 | &data.ContactPerson.Firstname, &data.ContactPerson.Lastname, &data.ContactPerson.Email, 176 | &data.ContactPerson.Affiliation, &data.Name, &data.TitleEN, &data.TitleCS, &data.Link, 177 | &data.CorpusData.Size, &locale, &data.CorpusData.Keywords, 178 | ) 179 | if err != nil { 180 | if err == sql.ErrNoRows { 181 | return nil, nil 182 | } 183 | return nil, fmt.Errorf("failed to get record info: %w", err) 184 | } 185 | if locale.Valid { 186 | tag, err := c.parseLocale(locale.String) 187 | if err != nil { 188 | return nil, fmt.Errorf("failed to get record info: %w", err) 189 | } 190 | data.CorpusData.Locale = &tag 191 | } 192 | return &data, nil 193 | } 194 | 195 | func (c *CNCMySQLHandler) ListRecordInfo(from *time.Time, until *time.Time) ([]DBData, error) { 196 | whereClause := []string{ 197 | "m.deleted = ?", 198 | "((m.type = 'corpus' AND cc.corplist_id = ?) OR cpc.corplist_id = ? OR m.type != 'corpus')", 199 | } 200 | whereValues := []any{ 201 | "FALSE", 202 | c.publicCorplistID, 203 | c.publicCorplistID, 204 | } 205 | if from != nil { 206 | whereClause = append(whereClause, "GREATEST(m.created, m.updated) >= ?") 207 | whereValues = append(whereValues, from) 208 | } 209 | if until != nil { 210 | whereClause = append(whereClause, "GREATEST(m.created, m.updated) <= ?") 211 | whereValues = append(whereValues, until) 212 | } 213 | query := fmt.Sprintf( 214 | "SELECT "+ 215 | "m.id, "+ 216 | " GREATEST(m.created, m.updated), "+ 217 | "m.hosted, "+ 218 | "m.type, "+ 219 | "m.desc_en, "+ 220 | "m.desc_cs, "+ 221 | "m.date_issued, "+ 222 | "m.license_info, "+ 223 | "m.authors, "+ 224 | "u.%s, "+ 225 | "u.%s, "+ 226 | "u.email, "+ 227 | "u.affiliation, "+ 228 | "COALESCE(c.name, ms.name), "+ 229 | "COALESCE(rc.name, c.name, ms.name), "+ 230 | "COALESCE(rc.name, c.name, ms.name), "+ 231 | "COALESCE(c.web, ms.link), "+ 232 | "c.size, "+ 233 | "c.locale, "+ 234 | "GROUP_CONCAT(k.label_en ORDER BY k.display_order SEPARATOR ',') "+ 235 | "FROM vlo_metadata_common AS m "+ 236 | "LEFT JOIN vlo_metadata_corpus AS mc ON m.corpus_metadata_id = mc.id "+ 237 | "LEFT JOIN vlo_metadata_service AS ms ON m.service_metadata_id = ms.id "+ 238 | "LEFT JOIN %s AS c ON mc.corpus_name = c.name "+ 239 | "LEFT JOIN kontext_keyword_corpus AS kc ON kc.corpus_name = c.name "+ 240 | "LEFT JOIN kontext_keyword AS k ON kc.keyword_id = k.id "+ 241 | "LEFT JOIN corplist_corpus AS cc ON c.id = cc.corpus_id "+ 242 | "LEFT JOIN corplist_parallel_corpus AS cpc ON cpc.parallel_corpus_id = c.parallel_corpus_id "+ 243 | "LEFT JOIN registry_conf AS rc ON mc.corpus_name = rc.corpus_name "+ 244 | "JOIN %s AS u ON m.contact_user_id = u.id ", 245 | c.overrides.UserTableFirstNameCol, c.overrides.UserTableLastNameCol, 246 | c.overrides.CorporaTableName, c.overrides.UserTableName, 247 | ) 248 | if len(whereClause) > 0 { 249 | query += " WHERE " + strings.Join(whereClause, " AND ") 250 | } 251 | query += " GROUP BY c.name " 252 | rows, err := c.conn.Query(query, whereValues...) 253 | if err != nil { 254 | return nil, fmt.Errorf("failed to list record info: %w", err) 255 | } 256 | results := make([]DBData, 0, 10) 257 | for rows.Next() { 258 | var row DBData 259 | var locale sql.NullString 260 | err := rows.Scan( 261 | &row.ID, &row.Date, &row.Hosted, &row.Type, &row.DescEN, &row.DescCS, &row.DateIssued, &row.License, &row.Authors, 262 | &row.ContactPerson.Firstname, &row.ContactPerson.Lastname, &row.ContactPerson.Email, 263 | &row.ContactPerson.Affiliation, &row.Name, &row.TitleEN, &row.TitleCS, &row.Link, 264 | &row.CorpusData.Size, &locale, &row.CorpusData.Keywords, 265 | ) 266 | if err != nil { 267 | return nil, fmt.Errorf("failed to list record info: %w", err) 268 | } 269 | if locale.String != "" { 270 | tag, err := c.parseLocale(locale.String) 271 | if err != nil { 272 | return nil, fmt.Errorf("failed to list record info: %w", err) 273 | } 274 | row.CorpusData.Locale = &tag 275 | } 276 | results = append(results, row) 277 | } 278 | return results, nil 279 | } 280 | 281 | func NewCNCMySQLHandler(cnf DatabaseSetup) (*CNCMySQLHandler, error) { 282 | conf := mysql.NewConfig() 283 | conf.Net = "tcp" 284 | conf.Addr = cnf.Host 285 | conf.User = cnf.User 286 | conf.Passwd = cnf.Passwd 287 | conf.DBName = cnf.Name 288 | conf.ParseTime = true 289 | conf.Loc = time.Local 290 | db, err := sql.Open("mysql", conf.FormatDSN()) 291 | if err != nil { 292 | return nil, fmt.Errorf("failed to open CNC DB: %w", err) 293 | } 294 | return &CNCMySQLHandler{ 295 | conn: db, 296 | overrides: cnf.Overrides, 297 | publicCorplistID: cnf.PublicCorplistID, 298 | }, nil 299 | } 300 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "[]" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright [yyyy] [name of copyright owner] 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | --------------------------------------------------------------------------------