├── .adr-dir ├── .dockerignore ├── .github ├── dependabot.yml └── workflows │ └── test.yml ├── .gitignore ├── Dockerfile ├── LICENSE ├── Makefile ├── README.md ├── cmd └── mario │ └── main.go ├── config └── es_record_mappings.json ├── docs ├── architecture-decisions │ ├── 0001-record-architecture-decisions.md │ ├── 0002-use-elasticsearch.md │ ├── 0003-follow-twelve-factor-methodology.md │ ├── 0004-use-aws-s3.md │ ├── 0005-use-aws-lambda.md │ ├── 0006-use-terraform-to-configure-infrastructure.md │ ├── 0007-use-go-for-core-language.md │ ├── 0008-use-one-s3-bucket-per-source.md │ ├── 0009-elasticsearch-indexing-strategy.md │ ├── 0010-use-openapi-specification.md │ ├── 0011-indexing-commands-and-flows.md │ ├── 0012-use-lambda-and-fargate-for-task-execution.md │ ├── 0013-use-sqs-and-airflow-for-task-execution.md │ ├── 0014-structure-of-cli.md │ ├── 0015-use-a-single-bucket.md │ └── 0016-replace-elasticsearch-with-opensearch.md ├── charts │ ├── dip_architecture.dot │ ├── dip_architecture.png │ ├── dip_overview.dot │ └── dip_overview.png └── pull_request_template.md ├── fixtures └── timdex_record_samples.json ├── go.mod ├── go.sum ├── mario.go └── pkg ├── client ├── elastic.go └── s3.go ├── consumer ├── consumers.go └── consumers_test.go ├── generator ├── jsonrecord.go └── jsonrecord_test.go ├── ingester └── ingester.go ├── pipeline ├── pipeline.go └── pipeline_test.go ├── record └── record.go └── transformer ├── transformers.go └── transformers_test.go /.adr-dir: -------------------------------------------------------------------------------- 1 | docs/architecture-decisions 2 | -------------------------------------------------------------------------------- /.dockerignore: -------------------------------------------------------------------------------- 1 | .git 2 | docs 3 | fixtures 4 | vendor 5 | tmp 6 | -------------------------------------------------------------------------------- /.github/dependabot.yml: -------------------------------------------------------------------------------- 1 | version: 2 2 | updates: 3 | - package-ecosystem: gomod 4 | directory: "/" 5 | schedule: 6 | interval: daily 7 | time: "09:00" 8 | timezone: America/New_York 9 | open-pull-requests-limit: 10 10 | ignore: 11 | - dependency-name: github.com/aws/aws-sdk-go 12 | versions: 13 | - 1.37.0 14 | - 1.37.1 15 | - 1.37.10 16 | - 1.37.11 17 | - 1.37.12 18 | - 1.37.13 19 | - 1.37.14 20 | - 1.37.15 21 | - 1.37.16 22 | - 1.37.17 23 | - 1.37.18 24 | - 1.37.19 25 | - 1.37.2 26 | - 1.37.20 27 | - 1.37.21 28 | - 1.37.22 29 | - 1.37.23 30 | - 1.37.24 31 | - 1.37.25 32 | - 1.37.26 33 | - 1.37.27 34 | - 1.37.28 35 | - 1.37.29 36 | - 1.37.3 37 | - 1.37.30 38 | - 1.37.31 39 | - 1.37.32 40 | - 1.37.33 41 | - 1.37.5 42 | - 1.37.6 43 | - 1.37.7 44 | - 1.37.8 45 | - 1.37.9 46 | - 1.38.0 47 | - 1.38.1 48 | - 1.38.11 49 | - 1.38.12 50 | - 1.38.13 51 | - 1.38.14 52 | - 1.38.15 53 | - 1.38.16 54 | - 1.38.17 55 | - 1.38.18 56 | - 1.38.19 57 | - 1.38.2 58 | - 1.38.20 59 | - 1.38.21 60 | - 1.38.22 61 | - 1.38.23 62 | - 1.38.24 63 | - 1.38.25 64 | - 1.38.26 65 | - 1.38.27 66 | - 1.38.3 67 | - 1.38.4 68 | - 1.38.6 69 | - 1.38.7 70 | - 1.38.8 71 | - 1.38.9 72 | - dependency-name: github.com/antchfx/xmlquery 73 | versions: 74 | - 1.3.4 75 | - 1.3.5 76 | - dependency-name: gopkg.in/yaml.v2 77 | versions: 78 | - 2.4.0 79 | -------------------------------------------------------------------------------- /.github/workflows/test.yml: -------------------------------------------------------------------------------- 1 | name: Tests 2 | on: 3 | push: 4 | branches-ignore: 5 | - main 6 | - 2.x 7 | jobs: 8 | test: 9 | name: Tests 10 | runs-on: ubuntu-latest 11 | steps: 12 | - uses: actions/checkout@v2 13 | - name: Install Go 14 | uses: actions/setup-go@v2 15 | with: 16 | go-version: 1.16.x 17 | - name: Run tests 18 | run: make test 19 | - name: Test docker build 20 | run: make dist-dev 21 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Binaries for programs and plugins 2 | *.exe 3 | *.exe~ 4 | *.dll 5 | *.so 6 | *.dylib 7 | 8 | # Test binary, build with `go test -c` 9 | *.test 10 | 11 | # Output of the go coverage tool, specifically when used with LiteIDE 12 | *.out 13 | 14 | # Don't include vendored libraries 15 | vendor/ 16 | debug 17 | .debug_config 18 | 19 | # System files 20 | .DS_Store 21 | tmp/ 22 | 23 | mario 24 | pkged.go 25 | 26 | # Local env settings 27 | .env 28 | -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | FROM golang:1.16-alpine AS build 2 | RUN apk add --no-cache curl git ca-certificates 3 | RUN go get github.com/markbates/pkger/cmd/pkger 4 | WORKDIR /go/src/mario 5 | COPY go.mod . 6 | COPY go.sum . 7 | RUN go mod download 8 | COPY mario.go . 9 | COPY pkg pkg 10 | COPY cmd cmd 11 | COPY config config 12 | RUN \ 13 | pkger && \ 14 | go build -o mario cmd/mario/main.go 15 | # Note: the two `RUN true` commands appear to be necessary because of 16 | # https://github.com/moby/moby/issues/37965 17 | 18 | FROM golang:1.16-alpine 19 | WORKDIR /go/src/mario 20 | COPY --from=build /etc/ssl/certs/ca-certificates.crt /etc/ssl/certs/ 21 | RUN true 22 | COPY --from=build /go/src/mario/mario . 23 | RUN true 24 | COPY --from=build /go/src/mario/config ./config 25 | run true 26 | COPY --from=0 /go/src/mario/go.mod ./go.mod 27 | RUN true 28 | ENTRYPOINT ["./mario"] 29 | CMD ["--help"] 30 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "[]" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright [yyyy] [name of copyright owner] 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | .PHONY: help install test tests update dist-dev publish-dev 2 | SHELL=/bin/bash 3 | DATETIME:=$(shell date -u +%Y%m%dT%H%M%SZ) 4 | 5 | help: ## Print this message 6 | @awk 'BEGIN { FS = ":.*##"; print "Usage: make \n\nTargets:" } \ 7 | /^[-_[:alpha:]]+:.?*##/ { printf " %-15s%s\n", $$1, $$2 }' $(MAKEFILE_LIST) 8 | 9 | install: ## Install mario binary 10 | go install ./... 11 | 12 | test: ## Run tests 13 | go test -v ./... 14 | 15 | tests: test 16 | 17 | update: ## Update dependencies 18 | go get -u ./... 19 | 20 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Mario 2 | 3 | ## What is this? 4 | 5 | Mario is a metadata processing pipeline that will process data from various 6 | sources and write to Opensearch. 7 | 8 | ## Installing 9 | 10 | The `mario` command can be installed with: 11 | 12 | ``` 13 | $ make install 14 | ``` 15 | 16 | ## How to Use This 17 | 18 | An OpenSearch index can be started for development purposes by running: 19 | 20 | ``` 21 | $ docker run -p 9200:9200 -p 9600:9600 -e "discovery.type=single-node" \ 22 | -e "plugins.security.disabled=true" \ 23 | opensearchproject/opensearch:1.2.4 24 | ``` 25 | 26 | Alternatively, if you intend to test this with a local instance of TIMDEX 27 | as well, use docker-compose to run both docker and TIMDEX locally using the 28 | instructions in the [TIMDEX README](https://github.com/MITLibraries/timdex/blob/master/README.md#docker-compose-orchestrated-local-environment). 29 | 30 | Here are a few sample Mario commands that may be useful for local development: 31 | - `mario ingest -c json -s aspace fixtures/aspace_samples.xml` 32 | runs the ingest process with ASpace sample files and prints out each record 33 | as JSON 34 | - `mario ingest -s dspace fixtures/dspace_samples.xml` ingests the 35 | DSpace sample files into a local OpenSearch instance. 36 | - `mario ingest -s alma --auto fixtures/alma_samples.mrc` ingests the 37 | Alma sample files into a local OpenSearch instance and promotes the 38 | index to the timdex-prod alias on completion. 39 | - `mario indexes` list all indexes 40 | - `mario promote -i [index name]` promotes the named index to the 41 | timdex-prod alias. 42 | 43 | ## Developing 44 | 45 | This project uses modules for dependencies. To upgrade all dependencies to the latest minor/patch version use: 46 | 47 | ``` 48 | $ make update 49 | ``` 50 | 51 | Tests can be run with: 52 | 53 | ``` 54 | $ make test 55 | ``` 56 | 57 | ### Adding a new source parser 58 | To add a new source parser: 59 | - (Probably) create a source record struct in `pkg/generator`. 60 | - Add a source parser module in `pkg/generator`. 61 | - Add a tests file that tests ALL fields mapped from the source. 62 | - Update `pkg/ingester/ingester.go` to add a Config.source that uses the new 63 | generator. 64 | - Update documentation to include the new generator param option (as "type") to 65 | command options. 66 | - (Probably) don’t need to update the CLI. 67 | - After all of that is completed, tested, and merged, create tasks to harvest 68 | the source metadata files and ingest them using our [airflow implementation](https://github.com/MITLibraries/workflow). 69 | 70 | ### Updating the data model 71 | Updating the data model is somewhat complicated because many files need to be 72 | edited across multiple repositories and deployment steps should happen in a 73 | particular order so as not to break production services. Start by updating the data model here in Mario as follows: 74 | - Update `config/es_record_mappings.json` to reflect added/updated/deleted 75 | fields. 76 | - Update `pkg/record/record.go` to reflect added/updated/deleted fields. 77 | - Update ALL relevant source record definitions and source parser files in 78 | `pkg/generator`. If a field is edited or deleted, be sure to check every 79 | source file for usage. If a field is new, add to all relevant sources 80 | (confirm mapping with metadata folks first). 81 | - Update relevant tests in `pkg/generator`. 82 | - Once the above steps are done, update the data model in TIMDEX following the 83 | instructions in the [TIMDEX README](https://github.com/MITLibraries/timdex/blob/master/README.md) and test locally with the docker-compose 84 | orchestrated environment to ensure all changes are properly indexed and 85 | consumable via the API. 86 | 87 | ## Config Files 88 | We have several config files that are essential for mapping various metadata 89 | field codes to their human-readable translations, and some of them may need to 90 | be updated from time to time. Most of these config files are pulled from 91 | authoritative sources, with the exception of `marc_rules.json` which we created 92 | and manage ourselves. Sources of the other config files are as follows: 93 | 94 | - `dspace_set_list.json` this is harvested from our DSpace repository using our 95 | OAI-PMH harvester app. The app includes a flag to convert the standard XML 96 | response to JSON, which just makes it easier to parse. 97 | 98 | ## System Overview 99 | ![alt text](docs/charts/dip_overview.png "Mario system overview chart") 100 | 101 | ## Architecture Overview 102 | ![alt text](docs/charts/dip_architecture.png "Mario system overview chart") 103 | 104 | ## Architecture Decision Records 105 | 106 | This repository contains Architecture Decision Records in the 107 | [docs/architecture-decisions directory](docs/architecture-decisions). 108 | 109 | [adr-tools](https://github.com/npryce/adr-tools) should allow easy creation of 110 | additional records with a standardized template. 111 | -------------------------------------------------------------------------------- /cmd/mario/main.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "fmt" 5 | "github.com/mitlibraries/mario/pkg/client" 6 | "github.com/mitlibraries/mario/pkg/ingester" 7 | "github.com/urfave/cli/v2" 8 | "log" 9 | "os" 10 | ) 11 | 12 | func main() { 13 | var url string 14 | var v4 bool 15 | 16 | app := cli.NewApp() 17 | 18 | // Global options 19 | app.Flags = []cli.Flag{ 20 | &cli.StringFlag{ 21 | Name: "url", 22 | Aliases: []string{"u"}, 23 | Value: "http://127.0.0.1:9200", 24 | Usage: "URL for the OpenSearch cluster", 25 | Destination: &url, 26 | }, 27 | &cli.BoolFlag{ 28 | Name: "v4", 29 | Usage: "Use AWS v4 signing", 30 | Destination: &v4, 31 | }, 32 | } 33 | 34 | app.Commands = []*cli.Command{ 35 | // OpenSearch commands 36 | { 37 | Name: "aliases", 38 | Usage: "List OpenSearch aliases and their associated indexes", 39 | Category: "OpenSearch actions", 40 | Action: func(c *cli.Context) error { 41 | es, err := client.NewESClient(url, v4) 42 | if err != nil { 43 | return err 44 | } 45 | aliases, err := es.Aliases() 46 | if err != nil { 47 | return err 48 | } 49 | for _, a := range aliases { 50 | fmt.Printf("Alias: %s\n\tIndex: %s\n\n", a.Alias, a.Index) 51 | } 52 | return nil 53 | }, 54 | }, 55 | { 56 | Name: "indexes", 57 | Usage: "List all OpenSearch indexes", 58 | Category: "OpenSearch actions", 59 | Action: func(c *cli.Context) error { 60 | es, err := client.NewESClient(url, v4) 61 | if err != nil { 62 | return err 63 | } 64 | indexes, err := es.Indexes() 65 | if err != nil { 66 | return err 67 | } 68 | for _, i := range indexes { 69 | fmt.Printf("Name: %s\n\tDocuments: %d\n\tHealth: %s\n\tStatus: %s\n\tUUID: %s\n\tSize: %s\n\n", i.Index, i.DocsCount, i.Health, i.Status, i.UUID, i.StoreSize) 70 | } 71 | return nil 72 | }, 73 | }, 74 | { 75 | Name: "ping", 76 | Usage: "Ping OpenSearch", 77 | Category: "OpenSearch actions", 78 | Action: func(c *cli.Context) error { 79 | es, err := client.NewESClient(url, v4) 80 | if err != nil { 81 | return err 82 | } 83 | res, err := es.Ping(url) 84 | if err != nil { 85 | return err 86 | } 87 | fmt.Printf("Name: %s\nCluster: %s\nVersion: %s\nLucene version: %s", res.Name, res.ClusterName, res.Version.Number, res.Version.LuceneVersion) 88 | return nil 89 | }, 90 | }, 91 | // Index-specific commands 92 | { 93 | Name: "ingest", 94 | Usage: "Parse and ingest the input file. By default, ingests into the current production index for the provided source.", 95 | ArgsUsage: "[filepath, use format 's3://bucketname/objectname' for s3]", 96 | Category: "Index actions", 97 | Flags: []cli.Flag{ 98 | &cli.StringFlag{ 99 | Name: "source", 100 | Aliases: []string{"s"}, 101 | Usage: "Source system of metadata file to process. Must be one of [alma, aspace, dspace, mario]", 102 | Required: true, 103 | }, 104 | &cli.StringFlag{ 105 | Name: "consumer", 106 | Aliases: []string{"c"}, 107 | Value: "es", 108 | Usage: "Consumer to use. Must be one of [es, json, title, silent]", 109 | }, 110 | &cli.BoolFlag{ 111 | Name: "new", 112 | Usage: "Create a new index instead of ingesting into the current production index for the source", 113 | }, 114 | &cli.BoolFlag{ 115 | Name: "auto", 116 | Usage: "Automatically promote / demote on completion", 117 | }, 118 | }, 119 | Action: func(c *cli.Context) error { 120 | var es *client.ESClient 121 | config := ingester.Config{ 122 | Filename: c.Args().Get(0), 123 | Consumer: c.String("consumer"), 124 | Source: c.String("source"), 125 | NewIndex: c.Bool("new"), 126 | Promote: c.Bool("auto"), 127 | } 128 | log.Printf("Ingesting records from file: %s\n", config.Filename) 129 | stream, err := ingester.NewStream(config.Filename) 130 | if err != nil { 131 | return err 132 | } 133 | defer stream.Close() 134 | if config.Consumer == "es" { 135 | es, err = client.NewESClient(url, v4) 136 | if err != nil { 137 | return err 138 | } 139 | } 140 | ingest := ingester.Ingester{Stream: stream, Client: es} 141 | err = ingest.Configure(config) 142 | if err != nil { 143 | return err 144 | } 145 | count, err := ingest.Ingest() 146 | log.Printf("Total records ingested: %d\n", count) 147 | return err 148 | }, 149 | }, 150 | { 151 | Name: "promote", 152 | Usage: "Promote an index to production", 153 | UsageText: "Demotes the existing production index for the provided prefix, if there is one", 154 | Category: "Index actions", 155 | Flags: []cli.Flag{ 156 | &cli.StringFlag{ 157 | Name: "index", 158 | Aliases: []string{"i"}, 159 | Usage: "Name of the OpenSearch index to promote", 160 | Required: true, 161 | }, 162 | }, 163 | Action: func(c *cli.Context) error { 164 | es, err := client.NewESClient(url, v4) 165 | if err != nil { 166 | return err 167 | } 168 | err = es.Promote(c.String("index")) 169 | return err 170 | }, 171 | }, 172 | { 173 | Name: "reindex", 174 | Usage: "Reindex one index to another index", 175 | UsageText: "Use the OpenSearch reindex API to copy one index to another. The doc source must be present in the original index.", 176 | Category: "Index actions", 177 | Flags: []cli.Flag{ 178 | &cli.StringFlag{ 179 | Name: "index", 180 | Aliases: []string{"i"}, 181 | Usage: "Name of the OpenSearch index to copy", 182 | Required: true, 183 | }, 184 | &cli.StringFlag{ 185 | Name: "destination", 186 | Aliases: []string{"d"}, 187 | Usage: "Name of new index", 188 | Required: true, 189 | }, 190 | }, 191 | Action: func(c *cli.Context) error { 192 | es, err := client.NewESClient(url, v4) 193 | if err != nil { 194 | return err 195 | } 196 | count, err := es.Reindex(c.String("index"), c.String("destination")) 197 | fmt.Printf("%d documents reindexed\n", count) 198 | return err 199 | }, 200 | }, 201 | { 202 | Name: "delete", 203 | Usage: "Delete an index", 204 | Category: "Index actions", 205 | Flags: []cli.Flag{ 206 | &cli.StringFlag{ 207 | Name: "index", 208 | Aliases: []string{"i"}, 209 | Usage: "Name of the OpenSearch index to delete", 210 | Required: true, 211 | }, 212 | }, 213 | Action: func(c *cli.Context) error { 214 | es, err := client.NewESClient(url, v4) 215 | if err != nil { 216 | return err 217 | } 218 | err = es.Delete(c.String("index")) 219 | return err 220 | }, 221 | }, 222 | } 223 | 224 | err := app.Run(os.Args) 225 | if err != nil { 226 | log.Fatal(err) 227 | } 228 | } 229 | -------------------------------------------------------------------------------- /config/es_record_mappings.json: -------------------------------------------------------------------------------- 1 | { 2 | "settings": { 3 | "analysis": { 4 | "analyzer": { 5 | "keyword_no_trailing_punctuation": { 6 | "tokenizer": "keyword", 7 | "char_filter": [ 8 | "no_trailing_punctuation" 9 | ], 10 | "filter": [ 11 | "lowercase", 12 | "trim" 13 | ] 14 | } 15 | }, 16 | "char_filter": { 17 | "no_trailing_punctuation": { 18 | "type": "pattern_replace", 19 | "pattern": "[./;=,?]$", 20 | "replacement": "" 21 | } 22 | }, 23 | "normalizer": { 24 | "lowercase": { 25 | "type": "custom", 26 | "filter": [ 27 | "lowercase" 28 | ] 29 | } 30 | } 31 | } 32 | }, 33 | "mappings": { 34 | "properties": { 35 | "alternate_titles": { 36 | "type": "nested", 37 | "include_in_parent": "true", 38 | "properties": { 39 | "kind": { 40 | "type": "keyword" 41 | }, 42 | "value": { 43 | "type": "text", 44 | "fields": { 45 | "exact_value": { 46 | "type": "text", 47 | "analyzer": "keyword_no_trailing_punctuation" 48 | } 49 | } 50 | } 51 | } 52 | }, 53 | "call_numbers": { 54 | "type": "text", 55 | "fields": { 56 | "keyword": { 57 | "type": "keyword", 58 | "normalizer": "lowercase" 59 | }, 60 | "completion": { 61 | "type": "completion", 62 | "preserve_separators": false 63 | } 64 | } 65 | }, 66 | "citation": { 67 | "type": "text" 68 | }, 69 | "content_type": { 70 | "type": "keyword", 71 | "normalizer": "lowercase" 72 | }, 73 | "contents": { 74 | "type": "text" 75 | }, 76 | "contributors": { 77 | "type": "nested", 78 | "include_in_parent": "true", 79 | "properties": { 80 | "affiliation": { 81 | "type": "text" 82 | }, 83 | "kind": { 84 | "type": "keyword", 85 | "normalizer": "lowercase" 86 | }, 87 | "identifier": { 88 | "type": "text" 89 | }, 90 | "mit_affiliated": { 91 | "type": "boolean" 92 | }, 93 | "value": { 94 | "type": "text", 95 | "fields": { 96 | "keyword": { 97 | "type": "keyword", 98 | "normalizer": "lowercase" 99 | } 100 | } 101 | } 102 | } 103 | }, 104 | "dates": { 105 | "type": "nested", 106 | "include_in_parent": "true", 107 | "properties": { 108 | "kind": { 109 | "type": "keyword", 110 | "normalizer": "lowercase" 111 | }, 112 | "note": { 113 | "type": "text" 114 | }, 115 | "range": { 116 | "type": "date_range", 117 | "format": "strict_year||strict_year_month||date_optional_time||date||basic_date" 118 | }, 119 | "value": { 120 | "type": "text", 121 | "fields": { 122 | "as_date": { 123 | "type": "date", 124 | "format": "strict_year||strict_year_month||date_optional_time||date||basic_date" 125 | } 126 | } 127 | } 128 | } 129 | }, 130 | "edition": { 131 | "type": "text" 132 | }, 133 | "file_formats": { 134 | "type": "keyword", 135 | "normalizer": "lowercase" 136 | }, 137 | "format": { 138 | "type": "keyword", 139 | "normalizer": "lowercase" 140 | }, 141 | "funding_information": { 142 | "type": "nested", 143 | "include_in_parent": "true", 144 | "properties": { 145 | "award_number": { 146 | "type": "text" 147 | }, 148 | "award_uri": { 149 | "type": "text", 150 | "fields": { 151 | "keyword": { 152 | "type": "keyword", 153 | "normalizer": "lowercase" 154 | } 155 | } 156 | }, 157 | "funder_identifier": { 158 | "type": "text", 159 | "fields": { 160 | "keyword": { 161 | "type": "keyword", 162 | "normalizer": "lowercase" 163 | } 164 | } 165 | }, 166 | "funder_identifier_type": { 167 | "type": "keyword", 168 | "normalizer": "lowercase" 169 | }, 170 | "funder_name": { 171 | "type": "text", 172 | "fields": { 173 | "keyword": { 174 | "type": "keyword", 175 | "normalizer": "lowercase" 176 | } 177 | } 178 | } 179 | } 180 | }, 181 | "holdings": { 182 | "type": "nested", 183 | "include_in_parent": "true", 184 | "properties": { 185 | "call_number": { 186 | "type": "keyword", 187 | "normalizer": "lowercase" 188 | }, 189 | "collection": { 190 | "type": "keyword", 191 | "normalizer": "lowercase" 192 | }, 193 | "format": { 194 | "type": "keyword", 195 | "normalizer": "lowercase" 196 | }, 197 | "location": { 198 | "type": "keyword", 199 | "normalizer": "lowercase" 200 | }, 201 | "note": { 202 | "type": "text" 203 | } 204 | } 205 | }, 206 | "identifiers": { 207 | "type": "nested", 208 | "include_in_parent": "true", 209 | "properties": { 210 | "kind": { 211 | "type": "keyword", 212 | "normalizer": "lowercase" 213 | }, 214 | "value": { 215 | "type": "text" 216 | } 217 | } 218 | }, 219 | "languages": { 220 | "type": "text", 221 | "fields": { 222 | "keyword": { 223 | "type": "keyword", 224 | "normalizer": "lowercase" 225 | } 226 | } 227 | }, 228 | "links": { 229 | "type": "nested", 230 | "properties": { 231 | "kind": { 232 | "type": "keyword", 233 | "normalizer": "lowercase" 234 | }, 235 | "restrictions": { 236 | "type": "text", 237 | "fields": { 238 | "keyword": { 239 | "type": "keyword", 240 | "normalizer": "lowercase" 241 | } 242 | } 243 | }, 244 | "text": { 245 | "type": "text" 246 | }, 247 | "url": { 248 | "type": "text" 249 | } 250 | } 251 | }, 252 | "literary_form": { 253 | "type": "keyword", 254 | "normalizer": "lowercase" 255 | }, 256 | "locations": { 257 | "type": "nested", 258 | "include_in_parent": "true", 259 | "properties": { 260 | "geopoint": { 261 | "type": "geo_point" 262 | }, 263 | "kind": { 264 | "type": "keyword", 265 | "normalizer": "lowercase" 266 | }, 267 | "value": { 268 | "type": "text", 269 | "fields": { 270 | "keyword": { 271 | "type": "keyword", 272 | "normalizer": "lowercase" 273 | } 274 | } 275 | } 276 | } 277 | }, 278 | "notes": { 279 | "type": "nested", 280 | "include_in_parent": "true", 281 | "properties": { 282 | "kind": { 283 | "type": "text", 284 | "fields": { 285 | "keyword": { 286 | "type": "keyword", 287 | "normalizer": "lowercase" 288 | } 289 | } 290 | }, 291 | "value": { 292 | "type": "text" 293 | } 294 | } 295 | }, 296 | "numbering": { 297 | "type": "text" 298 | }, 299 | "physical_description": { 300 | "type": "text", 301 | "index": "false" 302 | }, 303 | "publication_frequency": { 304 | "type": "text", 305 | "fields": { 306 | "keyword": { 307 | "type": "keyword", 308 | "normalizer": "lowercase" 309 | } 310 | } 311 | }, 312 | "publication_information": { 313 | "type": "text" 314 | }, 315 | "related_items": { 316 | "type": "nested", 317 | "include_in_parent": "true", 318 | "properties": { 319 | "description": { 320 | "type": "text" 321 | }, 322 | "item_type": { 323 | "type": "keyword", 324 | "normalizer": "lowercase" 325 | }, 326 | "relationship": { 327 | "type": "keyword", 328 | "normalizer": "lowercase" 329 | }, 330 | "uri": { 331 | "type": "text" 332 | } 333 | } 334 | }, 335 | "rights": { 336 | "type": "nested", 337 | "properties": { 338 | "description": { 339 | "type": "text" 340 | }, 341 | "kind": { 342 | "type": "keyword", 343 | "normalizer": "lowercase" 344 | }, 345 | "uri": { 346 | "type": "text", 347 | "fields": { 348 | "keyword": { 349 | "type": "keyword", 350 | "normalizer": "lowercase" 351 | } 352 | } 353 | } 354 | } 355 | }, 356 | "source": { 357 | "type": "keyword", 358 | "normalizer": "lowercase" 359 | }, 360 | "source_link": { 361 | "type": "text", 362 | "index": "false" 363 | }, 364 | "subjects": { 365 | "type": "nested", 366 | "include_in_parent": "true", 367 | "properties": { 368 | "kind": { 369 | "type": "keyword", 370 | "normalizer": "lowercase" 371 | }, 372 | "value": { 373 | "type": "text", 374 | "fields": { 375 | "keyword": { 376 | "type": "keyword", 377 | "normalizer": "lowercase" 378 | } 379 | } 380 | } 381 | } 382 | }, 383 | "summary": { 384 | "type": "text" 385 | }, 386 | "timdex_record_id": { 387 | "type": "text", 388 | "index": "false" 389 | }, 390 | "title": { 391 | "type": "text", 392 | "fields": { 393 | "exact_value": { 394 | "type": "text", 395 | "analyzer": "keyword_no_trailing_punctuation" 396 | } 397 | } 398 | } 399 | } 400 | } 401 | } 402 | -------------------------------------------------------------------------------- /docs/architecture-decisions/0001-record-architecture-decisions.md: -------------------------------------------------------------------------------- 1 | # 1. Record architecture decisions 2 | 3 | Date: 2018-07-03 4 | 5 | ## Status 6 | 7 | Accepted 8 | 9 | ## Context 10 | 11 | We need to record the architectural decisions made on this project. 12 | 13 | ## Decision 14 | 15 | We will use Architecture Decision Records, as described by Michael Nygard in this article: http://thinkrelevance.com/blog/2011/11/15/documenting-architecture-decisions 16 | 17 | ## Consequences 18 | 19 | See Michael Nygard's article, linked above. For a lightweight ADR toolset, see Nat Pryce's _adr-tools_ at https://github.com/npryce/adr-tools. 20 | -------------------------------------------------------------------------------- /docs/architecture-decisions/0002-use-elasticsearch.md: -------------------------------------------------------------------------------- 1 | # 2. Use Elasticsearch 2 | 3 | Date: 2018-07-03 4 | 5 | ## Status 6 | 7 | Superceded by [16. Replace Elasticsearch with OpenSearch](0016-replace-elasticsearch-with-opensearch.md) 8 | 9 | ## Context 10 | 11 | We need to choose between using Solr and Elasticsearch for indexing. 12 | 13 | ## Decision 14 | 15 | We will use Elasticsearch. See https://docs.google.com/document/d/1LX3svZ59f2Ni5TNCPG6jIYb8CnSYOjR0ae0ujPOUN-k/edit for a more detailed description of how this decision was arrived at. 16 | 17 | ## Consequences 18 | 19 | Because Solr and Elasticsearch are so different we won't be able to easily switch from one to the other at a later point. Choosing Elasticsearch means we can't make use of any of the Blacklight family of applications since it is designed to work with Solr. 20 | 21 | We expect to have more hosted options available to us with Elasticsearch. 22 | -------------------------------------------------------------------------------- /docs/architecture-decisions/0003-follow-twelve-factor-methodology.md: -------------------------------------------------------------------------------- 1 | # 3. Follow Twelve Factor methodology 2 | 3 | Date: 2018-07-03 4 | 5 | ## Status 6 | 7 | Accepted 8 | 9 | ## Context 10 | 11 | Designing modern scalable cloud based applications requires intentionally 12 | designing the architecture to take advantage of the cloud. 13 | 14 | One leading way to do that is 15 | [The Twelve Factor](https://12factor.net) methodology. 16 | 17 | ## Decision 18 | 19 | We will follow Twelve Factor methodology. 20 | 21 | ## Consequences 22 | 23 | Our application will be deployable in the cloud in a scalable efficient manner. 24 | 25 | We will leverage services for some aspects of applications that 26 | previously would have relied on a Virtual Machine, such as storage for files 27 | and logs. 28 | -------------------------------------------------------------------------------- /docs/architecture-decisions/0004-use-aws-s3.md: -------------------------------------------------------------------------------- 1 | # 4. Use AWS S3 2 | 3 | Date: 2018-07-03 4 | 5 | ## Status 6 | 7 | Accepted 8 | 9 | ## Context 10 | 11 | One of the tenants of Twelve Factor application design is that applications 12 | should be stateless, which includes not relying on local file storage to be 13 | persistent. As such, this project needs a cloud based object store. 14 | See [3. Follow Twelve Factor methodology](0003-follow-twelve-factor-methodology.md) 15 | 16 | Amazon Simple Storage Service (S3) is a secure, durable, and scalable object 17 | storage solution to use in the cloud with which we have established an existing 18 | payment relationship. 19 | 20 | Amazon provides official SDKs for various programming languages to interact 21 | with S3. 22 | 23 | ## Decision 24 | 25 | We will use Amazon S3 for our object store. 26 | 27 | ## Consequences 28 | 29 | We will have secure, durable, and scalable object storage to use in the cloud 30 | as needed for this project. 31 | -------------------------------------------------------------------------------- /docs/architecture-decisions/0005-use-aws-lambda.md: -------------------------------------------------------------------------------- 1 | # 5. Use AWS Lambda 2 | 3 | Date: 2018-07-03 4 | 5 | ## Status 6 | 7 | Superceded by [12. Use Lambda and Fargate for Task Execution](0012-use-lambda-and-fargate-for-task-execution.md) 8 | 9 | ## Context 10 | 11 | The bulk of this application will consist of a data processing pipeline that takes metadata from incoming systems and indexes it in Elasticsearch. The processing will only need to be run for relatively short periods of time, usually, when new data arrives. We expect integrations with external systems to be minimal, likely limited only to S3 and Elasticsearch. Given the periodic nature of the application, it seems wasteful and needlessly complex to provision and maintain a VM for providing compute resources. 12 | 13 | ## Decision 14 | 15 | We will use AWS Lambdas as the compute model for the processing pipeline. 16 | 17 | ## Consequences 18 | 19 | Since we are using S3 for storage, the Lambda can be easily configured to run when a new file is placed in an S3 bucket. Lambda supports several different languages. While triggering Lambdas through S3 events will be convenient for most cases, it does potentially add some complexity in situations that require running Lambdas outside of the normal event structure. 20 | 21 | Integration with central logging will likely be more complex as Lambda logs currently go directly to CloudWatch. 22 | -------------------------------------------------------------------------------- /docs/architecture-decisions/0006-use-terraform-to-configure-infrastructure.md: -------------------------------------------------------------------------------- 1 | # 6. Use Terraform to Configure Infrastructure 2 | 3 | Date: 2018-07-03 4 | 5 | ## Status 6 | 7 | Accepted 8 | 9 | ## Context 10 | 11 | Having a repeatable, predictable way to create and change infrastructure is essential to stability and reliability of applications. One leading candidate to allow writing infrastructure as code is [Terraform](https://www.terraform.io). 12 | 13 | Other tools to consider might be Ansible, Puppet or Chef, but they are less suited to modern cloud infrastructure than Terraform as they were developed when running code on VMs was the norm. They are good options for Configuration Management, but less appropriate for managing the infrastructure itself. 14 | 15 | Amazon CloudFormation is a closer a possibility, but it is only usable on the Amazon stack whereas Terraform can be used to manage any Cloud. This flexibility will allow us to manage infrastructure which span clouds which this project may require (such as backend processing on AWS and frontend APIs on Heroku). 16 | 17 | Both CloudFormation and Terraform are good choices for Infrastructure Orchestration, but the AWS only restriction of CloudFormation makes it much less compelling to adopt. 18 | 19 | See [3. Follow Twelve Factor methodology](0003-follow-twelve-factor-methodology.md) 20 | 21 | ## Decision 22 | 23 | We will use Terraform to configure our Infrastructure. 24 | 25 | ## Consequences 26 | 27 | We will have a repeatable, predictable way to create and change infrastructure. 28 | 29 | Staff will need to learn Terraform. 30 | 31 | Developers and Operations will be able to propose and review changes to Infrastructure prior to changes being made. 32 | 33 | The same code review processes we use to ensure better software can be used to allow us to better understand our infrastructure as well as see exactly what changes are being proposed and why. 34 | -------------------------------------------------------------------------------- /docs/architecture-decisions/0007-use-go-for-core-language.md: -------------------------------------------------------------------------------- 1 | # 7. Use Go for Core Language 2 | 3 | Date: 2018-07-05 4 | 5 | ## Status 6 | 7 | Accepted 8 | 9 | ## Context 10 | 11 | The choice of which programming language to use is governed by a host of different factors. Most languages can be made to work for whatever task is required, though some are better at certain tasks than others. 12 | 13 | We expect the nature of the work in this project to benefit from concurrency, so choosing a language with good support for this is important. Since we will be deploying to AWS Lambda (See [5. Use AWS Lambda](0005-use-aws-lambda.md)), we are further limited to using one of the supported languages. Other considerations include ease of packaging and distribution, excellent data streaming abilities, and a healthy ecosystem of 3rd party libraries. 14 | 15 | ## Decision 16 | 17 | Use Go for the core application language. 18 | 19 | ## Consequences 20 | 21 | Go is a new language for us, so there is some inherent risk involved in this choice. If we decide we are not making progress quickly enough due to lack of language familiarity, we can fall back to Python. 22 | -------------------------------------------------------------------------------- /docs/architecture-decisions/0008-use-one-s3-bucket-per-source.md: -------------------------------------------------------------------------------- 1 | # 8. Use One S3 Bucket Per Source 2 | 3 | Date: 2018-07-05 4 | 5 | ## Status 6 | 7 | Superceded by [15. Use a Single Bucket](0015-use-a-single-bucket.md) 8 | 9 | ## Context 10 | 11 | Each data source will need to upload one or more files to S3 in order to trigger processing. S3 events, which will drive Lambda execution (See [5. Use AWS Lambda](0005-use-aws-lambda.md)), are configured at the bucket level. We may or may not have much control over the environment which is sending data to S3, for example, if it came directly from a vendor. At minimum we must be able to specify a bucket, but we should not assume we will have much more control than this. 12 | 13 | Each data source will also need different processing. This implies the need to identify which source a data file came from. 14 | 15 | ## Decision 16 | 17 | Use one S3 bucket per data source. 18 | 19 | ## Consequences 20 | 21 | Each source will need a new S3 bucket. Each bucket must be configured to tie the Lambda function to the object creation event. An advantage to this approach is that it makes it easier to enable and disable sources by simply controlling whether or a not a bucket publishes its events. No changes to application code would be necessary. 22 | 23 | The bucket name can be used as the configuration key to identify the source and determine how the data should be processed. 24 | -------------------------------------------------------------------------------- /docs/architecture-decisions/0009-elasticsearch-indexing-strategy.md: -------------------------------------------------------------------------------- 1 | # 9. Elasticsearch Indexing Strategy 2 | 3 | Date: 2018-07-06 4 | 5 | ## Status 6 | 7 | Accepted 8 | 9 | ## Context 10 | 11 | There are a number of different ways we could approach indexing in Elasticsearch. We would like to choose a path that allows us some flexibility to adjust as future needs arise. We also need to think about how to maintain index uptime while modifying the contents of the index. 12 | 13 | ## Decision 14 | 15 | Use an index alias for searching that points to a separate index for each source. 16 | 17 | ## Consequences 18 | 19 | An index alias provides a constant, unchanging endpoint for searches which minimizes the integration impact of modifications to index structure. Changing which indexes the alias points to is an atomic action allowing for smooth transitions to different versions of indexes with no downtime. 20 | 21 | Using one index per source allows us to further isolate the impact of bringing new sources online and modifying how different sources are indexed. 22 | 23 | The process for indexing (and reindexing) a source would generally follow these steps: 24 | 25 | 1. Create a new index, using some kind of versioning in the name. 26 | 2. Add documents to the new index. 27 | 3. Modify the alias to add the new index and remove the old index from its pointers. 28 | 4. Delete the old index. 29 | -------------------------------------------------------------------------------- /docs/architecture-decisions/0010-use-openapi-specification.md: -------------------------------------------------------------------------------- 1 | # 10. Use OpenAPI Specification 2 | 3 | Date: 2018-08-10 4 | 5 | ## Status 6 | 7 | Accepted 8 | 9 | ## Context 10 | 11 | By choosing an API documentation standard we make it easier to auto generate developer documentation. There are two existing standards used to document REST APIs--RAML and OpenAPI Specification (OAS). Both seem capable of doing the job. The main difference seems to be that RAML is focused on defining data models while OpenAPI is focused on the nuts and bolts of the API. If we were supporting several APIs, RAML might be more useful for defining reusable types across systems. In this case OAS seems more suited to our task. 12 | 13 | ## Decision 14 | 15 | Use OpenAPI specification to document the API. 16 | 17 | ## Consequences 18 | 19 | Given that Mulesoft uses RAML and Swagger uses OAS, this means we'd be probably be using Swagger as a documentation platform (if we choose to do so). There are tools to convert between RAML and OAS, though, so if we decide we would rather use RAML later it should not be too difficult to switch. 20 | -------------------------------------------------------------------------------- /docs/architecture-decisions/0011-indexing-commands-and-flows.md: -------------------------------------------------------------------------------- 1 | # 11. Indexing Commands and Flows 2 | 3 | Date: 2018-11-26 4 | 5 | ## Status 6 | 7 | Superceded by [14. Structure of CLI](0014-structure-of-cli.md) 8 | 9 | ## Context 10 | 11 | Our AWS instance of Elasticsearch will be awkward to maintain unless we build the right tools into the CLI. Some of these commands will not be used often, but are anticipatory of being able to intervene and correct issues in production. As such, these are the CLI commands we intend to make available to developers / maintainers of the Discovery Index. Future needs may dictate additional commands or adjustments to these. 12 | 13 | ## Decision 14 | 15 | #### COMMANDS: 16 | 17 | indexes List Elasticsearch indexes and aliases 18 | 19 | ingest Parse and ingest the input file 20 | [note: was parse. Ingest is more reflective of what this does as parsing is just one aspect] 21 | 22 | ping Request and display general info about the Elasticsearch server 23 | 24 | help, h Shows a list of commands or help for one command 25 | 26 | Index actions: 27 | 28 | create Currently available, removed in this proposal. 29 | 30 | delete Delete an Elasticsearch index 31 | 32 | demote Remove the given index from the production alias 33 | 34 | promote Add the given index to production alias 35 | 36 | stats Stats for provided Index (total records, maybe more?) 37 | 38 | 39 | In order to ensure we always have a production index available, we want to use a known, semi-hardcoded alias value. The fixed value will be "production". 40 | 41 | As we'll have new sources soon, it will be best to have each source maintain their own indexes for ease of use. Elasticsearch will allow multiple indexes to be associated with the alias "production" to allow us to search as many sources as we need. For aleph, the prefix will be `aleph_`. Future sources will declare a prefix that is appropriate as we add them following the `source_` convention. 42 | 43 | We currently default to an index name of `timdex`. We should no longer do that and instead default to different values depending on the command. Some commands will not have a default at all and are detailed below. 44 | 45 | We currently allow for an independent `create` command. That will be removed and the `ingest` command will handle creating indexes if necessary. 46 | 47 | For interactive work, we allow specific index values to be passed in as it may be useful for either intervening in a problem in production, or general local development. A default index value for `ingest` will be a combination of the source, such as `aleph_` and a partial timestamp such as `2018_11_26_1001`. 48 | 49 | Once we are confident the new index is ready, we'd then `promote` that index while `demoting` any existing indexes for the source we are working with. `promote` and `demote` are used to signify adding / removing the index to / from the alias "production". `promote` and `demote` will not default to any index value to ensure intent. When in fully automated mode, we'll need to ensure the index value set during the `ingest` process is used to `promote` and the source is used to `demote` as part of the single atomic action. However, that will not be done via the `promote` / `demote` CLI interface and will instead be kicked off as part of the `ingest` process when run in `auto` mode. The `index` argument will be required to inform these command what index to operate on. Additionally, we will ensure we do not allow demoting and index if it will leave the `production` alias with no indexes for the specific source. In other words, we will programmatically ensure that at least one `aleph_` source is always accessible via the `production` alias. Once we add additional sources, they will follow the same requirement of always having one index with the alias of `production`. 50 | 51 | We will keep as many old indexes as we deem useful and then go back and `delete` indexes we no longer need. The `delete` process will ensure that the index is not assigned to the alias `production` before proceeding. Additionally, `delete` will not default to any index and one must be supplied to continue. 52 | 53 | Those set of commands will allow us to fully manually run the pipeline in production. The following is a proposed automatic flow: 54 | 55 | ``` 56 | mario ingest aleph --promote auto --url esurl --v4 file 57 | ``` 58 | 59 | The source argument (`aleph` above) will allow us to construct the new index automatically when used in conjunction with a current timestamp. 60 | 61 | We'll also use the source to check the alias `production` for any indexes that currently have that prefix. `--promote auto` would dd the new index to production alias as well as remove the old index from the production alias. If `--promote auto` is not set, the new index will be created but the old index would remain in place for production use until further interactive steps were taken. 62 | 63 | ## Consequences 64 | 65 | This set of commands and flows will allow us to setup automatic processes while still maintaining manual ability to intervene if necessary for both the current aleph dataset as well as future datasets as we start adding them to the system. 66 | -------------------------------------------------------------------------------- /docs/architecture-decisions/0012-use-lambda-and-fargate-for-task-execution.md: -------------------------------------------------------------------------------- 1 | # 12. Use Lambda and Fargate for Task Execution 2 | 3 | Date: 2019-01-31 4 | 5 | ## Status 6 | 7 | Supercedes [5. Use AWS Lambda](0005-use-aws-lambda.md) 8 | 9 | Superceded by [13. Use SQS and Airflow for Task Execution](0013-use-sqs-and-airflow-for-task-execution.md) 10 | 11 | ## Context 12 | 13 | Given the limitations of Lambdas we decided to rely on containers to handle the bulk of the processing. Fargate provides a cheap, accessible container runtime. 14 | 15 | ## Decision 16 | 17 | We will use AWS Lambda to trigger a Fargate task for the processing pipeline. 18 | 19 | ## Consequences 20 | 21 | Unfortunately, there's no easy way to trigger a Fargate task from an S3 file upload. The S3 upload will have to trigger a Lambda that can then run the Fargate task, setting the filename as the container's runtime parameters. 22 | -------------------------------------------------------------------------------- /docs/architecture-decisions/0013-use-sqs-and-airflow-for-task-execution.md: -------------------------------------------------------------------------------- 1 | # 13. Use SQS and Airflow for Task Execution 2 | 3 | Date: 2020-04-03 4 | 5 | ## Status 6 | 7 | Accepted 8 | 9 | Supercedes [12. Use Lambda and Fargate for Task Execution](0012-use-lambda-and-fargate-for-task-execution.md) 10 | 11 | ## Context 12 | 13 | The execution model described by ADR 12 was designed before we had Airflow. It works, but we'd like to simplify things by moving it to Airflow to avoid having similar processes handled in different ways. 14 | 15 | ## Decision 16 | 17 | We will change the S3 notification from triggering a Lambda to sending a message to an SQS queue. We will configure a single workflow in Airflow that begins with an SQS sensor. 18 | 19 | ## Consequences 20 | 21 | 1. Instead of having a different indexing process for each source, there will be a single indexing workflow in Airflow that gets run on every file uploaded to the S3 bucket. Mario will need logic added to it to handle correctly indexing based only on the name of the bucket and key. 22 | 23 | 2. The Lambda process was a push process--as soon as the file was added to S3, it was processed. This new process will be a pull process. There will be a delay from when a file is added to S3 to when it is processed. This delay can be controlled by changing how often the workflow is run in Airflow. 24 | 25 | 3. SQS has a limit of 12 hours between when a message is received to when it can be deleted, meaning a single indexing process can't run for more than 12 hours. 26 | -------------------------------------------------------------------------------- /docs/architecture-decisions/0014-structure-of-cli.md: -------------------------------------------------------------------------------- 1 | # 14. Structure of CLI 2 | 3 | Date: 2020-04-08 4 | 5 | ## Status 6 | 7 | Accepted 8 | 9 | Supercedes [11. Indexing Commands and Flows](0011-indexing-commands-and-flows.md) 10 | 11 | ## Context 12 | 13 | Our current command line interface is awkward to use and contains a considerable amount of business logic. Rather than listing out every command, this ADR will provide guidelines for adding new commands. 14 | 15 | ## Decision 16 | 17 | The mario command itself will be a collection of subcommands. Only use global options for values that can truly be applied for every subcommand. All other options should be attached to the subcommand. Provide reasonable default values when it makes sense. 18 | 19 | A few examples: 20 | 21 | ``` 22 | $ mario ingest --v4 --index aleph-2020-01-01 s3://bucket/key.mrc 23 | $ mario reindex --url http://example.com -s aleph-01 -d aleph-02 24 | ``` 25 | 26 | Additionally, the `main.go` file should be kept small and all business logic should reside elsewhere in the application. 27 | 28 | ## Consequences 29 | 30 | The `main.go` file will need a significant rewrite in order to pull out the business logic which is currently in there. The CLI will also likely change (`--index` is currently a global option, which makes no sense for most subcommands). Changes to the CLI will need to be propagated to the automated ingest workflow processes. 31 | -------------------------------------------------------------------------------- /docs/architecture-decisions/0015-use-a-single-bucket.md: -------------------------------------------------------------------------------- 1 | # 15. Use a Single Bucket 2 | 3 | Date: 2020-04-08 4 | 5 | ## Status 6 | 7 | Accepted 8 | 9 | Supercedes [8. Use One S3 Bucket Per Source](0008-use-one-s3-bucket-per-source.md) 10 | 11 | ## Context 12 | 13 | There are a couple reasons to switch from multiple buckets to a single bucket. The first is that it simplifies the infrastructure provisioning that needs to be done when a new source is added. The second is that there is a hard limit on the number of buckets an AWS account can have, and our current approach to bucket creation is unsustainable. 14 | 15 | ## Decision 16 | 17 | Use a single namespaced S3 bucket for all source data. The structure of the bucket should be: 18 | 19 | ``` 20 | s3://bucket/// 21 | ``` 22 | 23 | Where `environment` would be either `prod` or `stage`, and `source` would be the source identifier. The source identifier used here should also be used as the prefix for the index name. No specific decisions are made here about how the files are structured within a source. 24 | 25 | mario should ignore source identifiers that it does not know about. 26 | 27 | ## Consequences 28 | 29 | Changes will be needed both in mario and in the scripts that upload data to the current bucket. It's worth noting that bucket policies around lifecycle and permissions can be applied to objects based on prefix, so any existing needs specific to a source can be supported with this change. 30 | -------------------------------------------------------------------------------- /docs/architecture-decisions/0016-replace-elasticsearch-with-opensearch.md: -------------------------------------------------------------------------------- 1 | # 16. Replace Elasticsearch with OpenSearch 2 | 3 | Date: 2022-03-02 4 | 5 | ## Status 6 | 7 | Accepted 8 | 9 | Supercedes [2. Use Elasticsearch](0002-use-elasticsearch.md) 10 | 11 | ## Context 12 | 13 | Amazon has moved to support OpenSearch over Elasticsearch, and no longer provides Elasticsearch as a managed service past version 7.10. Due to our heavy use of AWS services, it makes sense to use the indexing service that will be supported going forward. See https://aws.amazon.com/blogs/aws/amazon-elasticsearch-service-is-now-amazon-opensearch-service-and-supports-opensearch-10/ for background information. 14 | 15 | ## Decision 16 | 17 | Use the latest version of OpenSearch as our index instead of Elasticsearch. 18 | 19 | ## Consequences 20 | 21 | Currently switching to OpenSearch has very little consequences, as the current version is still nearly identical to the version of Elasticsearch it was forked from (7.10). As OpenSearch evolves and we make adjustments to follow, it may become more difficult to move back to Elasticsearch should we ever desire to. 22 | 23 | The documentation for OpenSearch is pretty minimal, however for the same reason as above that isn't very consequential yet since we can still use Elasticsearch documentation, which is very robust. 24 | 25 | This doesn't impact other decisions made about our indexing strategy and flow, as they remain the same in OpenSearch. -------------------------------------------------------------------------------- /docs/charts/dip_architecture.dot: -------------------------------------------------------------------------------- 1 | // dot docs/charts/dip_architecture.dot -Tpng > docs/charts/dip_architecture.png 2 | // requires installation of GraphViz (brew install graphviz, or 3 | // http://www.graphviz.org/Download..php if that doesn't work) 4 | 5 | digraph G { 6 | label="Discovery Index Flow"; 7 | labelloc=t; 8 | fontname=helvetica; 9 | fontsize=18; 10 | node [style=filled, fontname=helvetica, fillcolor=white, penwidth=4, fontsize=16]; 11 | pad=0.3; 12 | 13 | alma[label="Alma Export Cron to AWS S3 Bucket", color=DarkOrange3] 14 | s3Event[label="S3 bucket event triggers Mario Powerup in AWS Lambda via CloudWatch", color=DarkOrange3] 15 | lambda[label="Mario Powerup formats command and calls Mario in AWS Fargate", color=DarkOrange3] 16 | fullLoad[label="Full Load: create new OpenSearch index", color=DeepSkyBlue4] 17 | dailyLoad[label="Daily Updates: use current OpenSearch index", color=DeepSkyBlue4] 18 | process[label="Process data from S3 to standard data model in OpenSearch", color=DeepSkyBlue4] 19 | updateAlias[label="Promote new index to production if full load", color=DeepSkyBlue4] 20 | 21 | alma -> s3Event 22 | s3Event -> lambda 23 | lambda -> fullLoad 24 | lambda -> dailyLoad 25 | dailyLoad -> process 26 | fullLoad -> process 27 | process -> updateAlias 28 | 29 | subgraph clusterLegend { 30 | label="Key"; 31 | { 32 | k2[label="Process the data", color=DeepSkyBlue4]; 33 | k1[label="Get the data into place", color=darkorange3]; 34 | k1 -> k2 [style=invis] 35 | } 36 | } 37 | } 38 | -------------------------------------------------------------------------------- /docs/charts/dip_architecture.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MITLibraries/mario/065ef1ce90c486110aa7e7508e0913ef3e9e60c1/docs/charts/dip_architecture.png -------------------------------------------------------------------------------- /docs/charts/dip_overview.dot: -------------------------------------------------------------------------------- 1 | // dot docs/charts/dip_overview.dot -Tpng > docs/charts/dip_overview.png 2 | // requires installation of GraphViz (brew install graphviz, or 3 | // http://www.graphviz.org/Download..php if that doesn't work) 4 | 5 | digraph G { 6 | label="Discovery Index"; 7 | labelloc=t; 8 | fontname=helvetica; 9 | fontsize=18; 10 | node [style=filled, fontname=helvetica, color=black, fillcolor=white, penwidth=3, fontsize=16]; 11 | pad=0.3; 12 | 13 | alma[label="Alma"] 14 | almaExporter[label="Alma Metadata Exporter", color=Yellow1, fillcolor=Yellow1] 15 | s3[label="AWS S3 Bucket", color=DarkOrange3] 16 | processor[label="Mario: pipeline for metadata indexing", color=Yellow1, fillcolor=Yellow1] 17 | index[label="OpenSearch", color=DarkOrange3] 18 | archives[label="Archival Metadata"] 19 | archivesExporter[label="Archival Metadata Exporter", color=SteelBlue1] 20 | futureData[label="Source X, Y, Z, etc", color=black] 21 | futureExporter[label="Source X, Y, Z, etc Metadata Exporter", color=SteelBlue1] 22 | websites[label="Library Websites / Guides", color=black] 23 | websitesCrawler[label="Crawler", color=SteelBlue1] 24 | api[label="TIMDEX: Local Discovery API", color=Yellow1, fillcolor=Yellow1] 25 | eds[label="EDS"] 26 | bento[label="Bento"] 27 | magic[label="Users Doing Cool Stuff?", color=transparent] 28 | 29 | alma -> almaExporter 30 | almaExporter -> s3 31 | 32 | s3 -> processor 33 | processor -> index 34 | 35 | archives -> archivesExporter 36 | archivesExporter -> s3 37 | 38 | futureData -> futureExporter 39 | futureExporter -> s3 40 | 41 | websites -> websitesCrawler 42 | websitesCrawler -> s3 43 | 44 | index -> api 45 | api -> bento [color=SteelBlue1, penwidth=3] 46 | api -> magic 47 | 48 | eds -> bento 49 | 50 | subgraph clusterLegend { 51 | label="Key"; 52 | { 53 | k4[label="Future Project", color=SteelBlue1]; 54 | k3[label="Cloud Service", color=darkorange3]; 55 | k2[label="Current Project", color=Yellow1, fillcolor=Yellow1]; 56 | k1[label="Existing System"] 57 | k1 -> k2 -> k3 -> k4 [style=invis] 58 | } 59 | } 60 | 61 | } 62 | -------------------------------------------------------------------------------- /docs/charts/dip_overview.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MITLibraries/mario/065ef1ce90c486110aa7e7508e0913ef3e9e60c1/docs/charts/dip_overview.png -------------------------------------------------------------------------------- /docs/pull_request_template.md: -------------------------------------------------------------------------------- 1 | #### What does this PR do? 2 | 3 | A few sentences describing the overall goals of the pull request's commits. 4 | Why are we making these changes? Is there more work to be done to fully 5 | achieve these goals? 6 | 7 | #### Helpful background context 8 | 9 | Describe any additional context beyond what the PR accomplishes if it is likely 10 | to be useful to a reviewer. 11 | 12 | Delete this section if it isn't applicable to the PR. 13 | 14 | #### How can a reviewer manually see the effects of these changes? 15 | 16 | Explain how to see the proposed changes in the application if possible. 17 | 18 | Delete this section if it isn't applicable to the PR. 19 | 20 | #### What are the relevant tickets? 21 | 22 | - https://mitlibraries.atlassian.net/browse/DIP- 23 | 24 | #### Screenshots (if appropriate) 25 | 26 | Delete this section if it isn't applicable to the PR. 27 | 28 | #### Requires Full Reindexing of all Sources? 29 | YES | NO 30 | 31 | #### Includes new or updated dependencies? 32 | YES | NO 33 | 34 | #### Todo: 35 | - [ ] Tests 36 | - [ ] Documentation 37 | - [ ] Stakeholder approval 38 | -------------------------------------------------------------------------------- /fixtures/timdex_record_samples.json: -------------------------------------------------------------------------------- 1 | [ 2 | { 3 | "alternate_titles": [ 4 | { 5 | "kind": "Alternate title", 6 | "value": "Best of Paquito D'Rivera" 7 | } 8 | ], 9 | "call_numbers": [ 10 | "781.657" 11 | ], 12 | "citation": "D'Rivera, Paquito et al. 2008. Portraits of Cuba", 13 | "content_type": [ 14 | "Sound recording" 15 | ], 16 | "contents": [ 17 | "Chucho -- Havana cafe -- The peanut vendor -- A night in Tunisia -- Mambo a la Kenton -- Echale salsita -- Drume negrita -- Tropicana nights -- Who's smoking -- Tico tico -- Portraits of Cuba -- Excerpt from Aires tropicales -- What are you doing tomorrow night -- A mi que/El manisero." 18 | ], 19 | "contributors": [ 20 | { 21 | "kind": "author", 22 | "value": "D'Rivera, Paquito, 1948-" 23 | }, 24 | { 25 | "kind": "contributor", 26 | "value": "D'Rivera, Paquito, 1948-" 27 | }, 28 | { 29 | "kind": "contributor", 30 | "value": "Pérez, Danilo." 31 | }, 32 | { 33 | "kind": "contributor", 34 | "value": "Gilbert, Wolfe." 35 | }, 36 | { 37 | "kind": "contributor", 38 | "value": "Gillespie, Dizzy, 1917-1993." 39 | }, 40 | { 41 | "kind": "contributor", 42 | "value": "Pérez Prado, 1916-1989." 43 | }, 44 | { 45 | "kind": "contributor", 46 | "value": "Piñeiro, Ignacio, 1888-1969." 47 | }, 48 | { 49 | "kind": "contributor", 50 | "value": "Grenet, Ernesto Wood." 51 | }, 52 | { 53 | "kind": "contributor", 54 | "value": "Roditi, Claudio." 55 | }, 56 | { 57 | "kind": "contributor", 58 | "value": "Abreu, Zequinha de, 1880-1935." 59 | }, 60 | { 61 | "kind": "contributor", 62 | "value": "Godoy, Lucio." 63 | }, 64 | { 65 | "kind": "contributor", 66 | "value": "Hernández, Rafael." 67 | } 68 | ], 69 | "dates": [ 70 | { 71 | "kind": "Date of publication", 72 | "value": "2008" 73 | } 74 | ], 75 | "identifiers": [ 76 | { 77 | "kind": "oclc", 78 | "value": "811549562" 79 | } 80 | ], 81 | "languages": [ 82 | "No linguistic content" 83 | ], 84 | "links": [ 85 | { 86 | "kind": "Digital object link", 87 | "text": "Naxos Music Library", 88 | "url": "http://BLCMIT.NaxosMusicLibrary.com/catalogue/item.asp?cid=JD-342" 89 | } 90 | ], 91 | "locations": [ 92 | { 93 | "kind": "Place of publication", 94 | "value": "New York (State)" 95 | } 96 | ], 97 | "notes": [ 98 | { 99 | "value": [ 100 | "Paquito d' Rivera, saxophone ; Paquito d' Rivera, soprano saxophone.", 101 | "Description based on hard copy version record." 102 | ] 103 | } 104 | ], 105 | "physical_description": "1 online resource (1 sound file)", 106 | "publication_information": [ 107 | "[New York, N.Y.] : Chesky Records, p2008." 108 | ], 109 | "source": "MIT Alma", 110 | "source_link": "https://mit.primo.exlibrisgroup.com/discovery/fulldisplay?vid=01MIT_INST:MIT&docid=alma990026671500206761", 111 | "subjects": [ 112 | { 113 | "value": [ 114 | "Jazz.", 115 | "Latin jazz.", 116 | "Clarinet music (Jazz)", 117 | "Saxophone music (Jazz)" 118 | ] 119 | } 120 | ], 121 | "timdex_record_id": "mit:alma:990026671500206761", 122 | "title": "Spice it up! the best of Paquito D'Rivera." 123 | }, 124 | { 125 | "call_numbers": [ 126 | "TX724.5.A1", 127 | "641.595" 128 | ], 129 | "content_type": [ 130 | "Text" 131 | ], 132 | "contents": [ 133 | "Breakfast -- Lunch & small eats -- Date night in -- Celebrations & gatherings -- On the side -- Sweet -- Drinks." 134 | ], 135 | "contributors": [ 136 | { 137 | "kind": "author", 138 | "value": "McTernan, Cynthia Chen, author." 139 | } 140 | ], 141 | "dates": [ 142 | { 143 | "kind": "Date of publication", 144 | "value": "2018" 145 | } 146 | ], 147 | "edition": "First edition.", 148 | "format": "Print volume", 149 | "holdings": [ 150 | { 151 | "call_number": "TX724.5.A1 M38 2018", 152 | "collection": "Stacks", 153 | "format": "Print volume", 154 | "location": "Hayden Library", 155 | "note": "This is an examples holdings note" 156 | } 157 | ], 158 | "identifiers": [ 159 | { 160 | "kind": "isbn", 161 | "value": "163565002X (hardback)" 162 | }, 163 | { 164 | "kind": "isbn", 165 | "value": "9781635650020 (hardback)" 166 | }, 167 | { 168 | "kind": "oclc", 169 | "value": "1019737335" 170 | }, 171 | { 172 | "kind": "oclc", 173 | "value": "1061147498" 174 | }, 175 | { 176 | "kind": "lccn", 177 | "value": "2018287279" 178 | } 179 | ], 180 | "languages": [ 181 | "English" 182 | ], 183 | "literary_form": "nonfiction", 184 | "locations": [ 185 | { 186 | "kind": "Place of publication", 187 | "value": "New York (State)" 188 | } 189 | ], 190 | "notes": [ 191 | { 192 | "value": [ 193 | "Cynthia Chen McTernan.", 194 | "Includes index." 195 | ] 196 | } 197 | ], 198 | "physical_description": "285 pages : color illustrations ; 27 cm", 199 | "publicacation_information": [ 200 | "New York : Rodale Books, an imprint of the Crown Publishing Group, a division of Penguin Random House LLC, [2018]", 201 | "©2018" 202 | ], 203 | "source": "MIT Alma", 204 | "source_link": "https://mit.primo.exlibrisgroup.com/discovery/fulldisplay?vid=01MIT_INST:MIT&docid=alma990027672770206761", 205 | "subjects": [ 206 | { 207 | "value": [ 208 | "Asian American cooking." 209 | ] 210 | } 211 | ], 212 | "summary": [ 213 | "In A Common Table, Two Red Bowls blogger Cynthia Chen McTernan shares more than 80 Asian-inspired, modern recipes that marry food from her Chinese roots, Southern upbringing, and Korean mother-in-law's table. The book chronicles Cynthia's story alongside the recipes she and her family eat every day--beginning when she met her husband at law school and ate out of two battered red bowls, through the first years of her legal career in New York, to when she moved to Los Angeles to start a family. As Cynthia's life has changed, her cooking has become more diverse. She shares recipes that celebrate both the commonalities and the diversity of cultures: her mother-in-law's spicy Korean-inspired take on Hawaiian poke, a sticky sesame peanut pie that combines Chinese peanut sesame brittle with the decadence of a Southern pecan pie, and a grilled cheese topped with a crisp fried egg and fiery kimchi. And of course, she shares the basics: how to make soft, pillowy steamed buns, savory pork dumplings, and a simple fried rice that can form the base of any meal. Asian food may have a reputation for having long ingredient lists and complicated instructions, but Cynthia makes it relatable, avoiding hard-to-find ingredients or equipment, and breaking down how to bring Asian flavors home into your own kitchen. Above all, Cynthia believes that food can bring us together around the same table, no matter where we are from. The message at the heart of A Common Table is that the food we make and eat is rarely the product of one culture or moment, but is richly interwoven--and though some dishes might seem new or different, they are often more alike than they appear. -- Amazon." 214 | ], 215 | "timdex_record_id": "mit:alma:990027672770206761", 216 | "title": "A common table : 80 recipes and stories from my shared cultures /" 217 | }, 218 | { 219 | "call_numbers": [ 220 | "SB351.P3", 221 | "633" 222 | ], 223 | "content_type": [ 224 | "Text" 225 | ], 226 | "contributors": [ 227 | { 228 | "kind": "contributor", 229 | "value": "American Peanut Research and Education Society." 230 | } 231 | ], 232 | "dates": [ 233 | { 234 | "kind": "Date of publication", 235 | "value": "2005" 236 | } 237 | ], 238 | "identifiers": [ 239 | { 240 | "kind": "issn", 241 | "value": "1943-7668" 242 | }, 243 | { 244 | "kind": "oclc", 245 | "value": "232113616" 246 | }, 247 | { 248 | "kind": "lccn", 249 | "value": "2008202156" 250 | } 251 | ], 252 | "languages": [ 253 | "English" 254 | ], 255 | "literary_form": "fiction", 256 | "locations": [ 257 | { 258 | "kind": "Place of publication", 259 | "value": "Oklahoma" 260 | } 261 | ], 262 | "notes": [ 263 | { 264 | "value": [ 265 | "Refereed/Peer-reviewed", 266 | "Electronic reproduction. [S.l.] : HathiTrust Digital Library, 2010.", 267 | "Latest issue consulted: Vol. 35, issue 1 (Jan./June 2008).", 268 | "Description based on print version record." 269 | ] 270 | } 271 | ], 272 | "numbering": "Began with v. 32, issue 1 (Jan./June 2005).", 273 | "physical_description": "1 online resource", 274 | "publication_frequency": [ 275 | "Semiannual" 276 | ], 277 | "publication_information": [ 278 | "Perkins, OK : American Peanut Research and Education Society" 279 | ], 280 | "source": "MIT Alma", 281 | "source_link": "https://mit.primo.exlibrisgroup.com/discovery/fulldisplay?vid=01MIT_INST:MIT&docid=alma9933052979806761", 282 | "subjects": [ 283 | { 284 | "value": [ 285 | "Peanuts Periodicals.", 286 | "(OCoLC)fst01055999 Peanuts." 287 | ] 288 | } 289 | ], 290 | "timdex_record_id": "mit:alma:9933052979806761", 291 | "title": "Peanut science." 292 | }, 293 | { 294 | "citation": "Ranjram, Mike K., Intae Moon, and David J. Perreault. 'Variable-Inverter-Rectifier-Transformer: A Hybrid Electronic and Magnetic Structure Enabling Adjustable High Step-Down Conversion Ratios.' 2017 IEEE Workshop on Control and Modeling for Power Electronics (COMPEL 17), 9-12 July, 2017, Stanford, California, IEEE, 2017.", 295 | "content_type": [ 296 | "Article", 297 | "Conference paper" 298 | ], 299 | "contributors": [ 300 | { 301 | "affiliation": ["MIT"], 302 | "kind": "author", 303 | "mit_affiliated": true, 304 | "value": "Moon, Intae" 305 | }, 306 | { 307 | "affiliation": ["MIT"], 308 | "kind": "author", 309 | "mit_affiliated": true, 310 | "value": "Ranjram, Mike Kavian" 311 | }, 312 | { 313 | "affiliation": ["MIT"], 314 | "kind": "author", 315 | "identifier": ["https://orcid.org/0000-0002-0746-6191"], 316 | "mit_affiliated": true, 317 | "value": "Perreault, David J" 318 | }, 319 | { 320 | "kind": "department", 321 | "value": "Massachusetts Institute of Technology. Department of Electrical Engineering and Computer Science" 322 | }, 323 | { 324 | "kind": "approver", 325 | "value": "Perreault, David J." 326 | } 327 | ], 328 | "dates": [ 329 | { 330 | "kind": "Date accessioned", 331 | "value": "2018-02-12T15:24:17Z" 332 | }, 333 | { 334 | "kind": "Date available", 335 | "value": "2018-02-12T15:24:17Z" 336 | }, 337 | { 338 | "kind": "Date of publication", 339 | "value": "2017-08" 340 | } 341 | ], 342 | "file_formats": [ 343 | "application/pdf", 344 | "text/plain" 345 | ], 346 | "format": "Electronic resource", 347 | "funding_information": [ 348 | { 349 | "award_number": "1609240", 350 | "funder_name": "National Science Foundation (U.S.)" 351 | }, 352 | { 353 | "funder_name": "Texas Instruments Incorporated" 354 | }, 355 | { 356 | "funder_name": "Futurewei Technologies, Inc." 357 | }, 358 | { 359 | "funder_name": "Massachusetts Institute of Technology. Center for Integrated Circuits and Systems" 360 | } 361 | ], 362 | "identifiers": [ 363 | { 364 | "kind": "isbn", 365 | "value": "9781509053278" 366 | }, 367 | { 368 | "kind": "uri", 369 | "value": "http://hdl.handle.net/1721.1/113566" 370 | } 371 | ], 372 | "languages": [ 373 | "English" 374 | ], 375 | "links": [ 376 | { 377 | "kind": "Digital object link", 378 | "url": "http://hdl.handle.net/1721.1/113566" 379 | } 380 | ], 381 | "related_items": [ 382 | { 383 | "kind": "Community", 384 | "relationship": "Is part of", 385 | "uri": "https://dspace.mit.edu/handle/1721.1/49432", 386 | "value": "MIT Open Access Articles" 387 | }, 388 | { 389 | "kind": "Collection", 390 | "relationship": "Is part of", 391 | "uri": "https://dspace.mit.edu/handle/1721.1/49433", 392 | "value": "MIT Open Access Articles" 393 | }, 394 | { 395 | "relationship": "Is version of", 396 | "uri": "http://dx.doi.org/10.1109/COMPEL.2017.8013350" 397 | }, 398 | { 399 | "kind": "Journal", 400 | "relationship": "Published in", 401 | "uri": "http://dx.doi.org/10.1109/COMPEL.2017.8013350", 402 | "value": "2017 IEEE Workshop on Modeling and Control in Power Electronics (COMPEL)" 403 | } 404 | ], 405 | "rights": [ 406 | { 407 | "description": "Creative Commons Attribution-Noncommercial-Share Alike", 408 | "kind": "Terms of use", 409 | "uri": "https://creativecommons.org/licenses/by-nc-sa/3.0/" 410 | } 411 | ], 412 | "summary": [ 413 | "This paper proposes a hybrid electronic and magnetic structure that enables transformers with “fractional” and reconfigurable turns ratios (e.g. 12:0.5, 12:1, 12:2). This functionality is valuable in converters with wide operating voltage ranges and high step-up/down, as it offers a means to reduce copper loss within the transformer while also facilitating voltage doubling and quadrupling. We introduce the principle of operation of the structure and present models for its magnetic and electrical behaviour. An experimental prototype capable of accommodating a widely varying input (120-380[subscript Vdc]) and output (5, 9, 12V) validates the operating principle and modelling of the proposed structure and achieves conversion efficiencies between 93.4% and 95.7% at 25-36 W." 414 | ], 415 | "timdex_record_id": "mit:dspace:1721.1-113566", 416 | "title": "Variable-Inverter-Rectifier-Transformer: A Hybrid Electronic and Magnetic Structure Enabling Adjustable High Step-Down Conversion Ratios" 417 | }, 418 | { 419 | "citation": "Charles J. Connick Stained Glass Foundation Collection, VC-0002, box X. Massachusetts Institute of Technology, Department of Distinctive Collections, Cambridge, Massachusetts.", 420 | "content_type": [ 421 | "Archival collection" 422 | ], 423 | "contents": [ 424 | "This collection is organized into ten series", 425 | "Series 1. Charles J. Connick and Connick Studio documents", 426 | "Series 2. Charles J. Connick Studio and Associates job information", 427 | "Series 3. Charles J. Connick Stained Glass Foundation documents", 428 | "Series 4. Charles J. Connick and Connick Studio media", 429 | "Series 5. Charles J. Connick and Connick Studio collected text", 430 | "Series 6. Charles J. Connick Studio and Associates subcollections", 431 | "Series 7. Charles J. Connick Studio and Associates studio hardware", 432 | "Series 8. Charles J. Connick Studio and Associates supplementary art materials", 433 | "Series 9. Charles J. Connick Studio and Associates stained glass works", 434 | "Series 10. Charles J. Connick Studio and Associates works on paper" 435 | ], 436 | "contributors": [ 437 | { 438 | "affiliation": ["MIT"], 439 | "kind": "creator", 440 | "identifier": ["https://lccn.loc.gov/nr99025157"], 441 | "mit_affiliated": true, 442 | "value": "Connick, Charles J. (Charles Jay)" 443 | } 444 | ], 445 | "dates": [ 446 | { 447 | "kind": "Date of creation", 448 | "range": { 449 | "gte": "1905", 450 | "lte": "2012" 451 | }, 452 | "note": "This is an example date note" 453 | } 454 | ], 455 | "identifiers": [ 456 | { 457 | "kind": "Archival collection number", 458 | "value": "VC.0002" 459 | } 460 | ], 461 | "languages": [ 462 | "English" 463 | ], 464 | "links": [ 465 | { 466 | "kind": "Digital object link", 467 | "restrictions": "This is an example link restriction", 468 | "text": "Digitized items in the collection and a finding aid can be viewed in the MIT Libraries Digital Repository, Dome", 469 | "url": "http://dome.mit.edu/handle/1721.3/74802" 470 | } 471 | ], 472 | "notes": [ 473 | { 474 | "kind": "Biographical Note", 475 | "value": [ 476 | "Charles J. Connick (1875-1945) was an American stained glass artist whose work may be found in cities all across the United States. Connick's works in the Arts and Crafts movement and beyond uniquely combined ancient and modern techniques and also sparked a revival of medieval European stained glass craftsmanship. Connick studied symbols and the interaction between light, color and glass, as well as the crucial connection between the stained glass window and its surrounding architecture.", 477 | "Connick founded his own studio in 1912 in Boston. The Charles J. Connick Studio performed work for churches, synagogues, schools, hospitals, public buildings and private homes in cities across the United States and in several other countries. When Connick died in 1945, the worker-owned studio continued as Charles J. Connick Associates under the supervision of Orin E. Skinner in Boston's Back Bay until closing in 1987.", 478 | "The Charles J. Connick Stained Glass Foundation was created to preserve the Connick tradition of stained glass. At the same time, items from the studio were donated to the Boston Public Library's Fine Arts Department to form Charles J. Connick Studio Collection. In 2008, the Foundation donated its own collection of stained glass windows, designs, cartoons, slides, documents, periodicals, and other items to the MIT Libraries. The collection was processed over three years from March 2009 to May 2012." 479 | ] 480 | }, 481 | { 482 | "kind": "Scope and Contents", 483 | "value": [ 484 | "The Charles J. Connick Stained Glass Foundation Collection contains documents, photographs, slides, film, periodicals, articles, clippings, lecture transcripts, tools, sketches, designs and cartoons (full size stained glass window designs), stained glass, and ephemera.", 485 | "The primary reference material is the job information. In particular, the job files (boxes 7-9) are used most often in research. Job files list specific information for each job performed by the studio.", 486 | "For more information, including access to the digital content of the collection, please visit the collection website." 487 | ] 488 | } 489 | ], 490 | "related_items": [ 491 | { 492 | "description": "The Charles J. Connick and Associates Archives are located at the Boston Public Library's Fine Arts Department.", 493 | "uri": "http://www.bpl.org/research/finearts.htm" 494 | }, 495 | { 496 | "description": "The Charles J. Connick papers, 1901-1949 are located at the Smithsonian Archives of American Art.", 497 | "uri": "http://www.aaa.si.edu/collections/charles-j-connick-papers-7235" 498 | }, 499 | { 500 | "description": "Information on the Charles J. Connick Stained Glass Foundation may be found at their website.", 501 | "uri": "http://www.cjconnick.org/" 502 | } 503 | ], 504 | "rights": [ 505 | { 506 | "description": "Access to collections in the Department of Distinctive Collections is not authorization to publish. Please see the MIT Libraries Permissions Policy for permission information. Copyright of some items in this collection may be held by respective creators, not by the donor of the collection or MIT.", 507 | "kind": "Conditions Governing Use" 508 | }, 509 | { 510 | "description": "This collection is open.", 511 | "kind": "Conditions Governing Access" 512 | } 513 | ], 514 | "source": "MIT ArchivesSpace", 515 | "source_link": "https://archivesspace.mit.edu/repositories/2/resources/1", 516 | "subjects": [ 517 | { 518 | "kind": "LCSH", 519 | "value": [ 520 | "Glass painting and staining" 521 | ] 522 | }, 523 | { 524 | "kind": "NAF", 525 | "value": [ 526 | "Connick, Charles J. (Charles Jay)" 527 | ] 528 | } 529 | ], 530 | "timdex_record_id": "mit:archivesspace:VC.0002", 531 | "title": "Charles J. Connick Stained Glass Foundation Collection" 532 | }, 533 | { 534 | "call_numbers": [ 535 | "SB106.B56.C76 2002", 536 | "631.5/233" 537 | ], 538 | "contents": [ 539 | "1. 2. 3. 4. 5. 6. 7. 8. 9. 10. 11. 12. 13. 14. 15. Overview of Crop Biotechnology / Defining Biotechnology: Increasingly Important and Increasingly Difficult / Genetically Modified Crop Approvals and Planted Acreages / Insect-Resistant Transgenic Crops / Transgenic Technology for Insect Resistance: Current Achievements and Future Prospects / Genetic Engineering Crops for Improved Weed Management Traits / Environmentally Friendly Approaches in Biotechnology: Engineering the Chloroplast Genome to Confer Stress Tolerance / DNA Microchip Technology in the Plant Tissue Culture Industry / Genetic Engineering for Resistance to Phytopathogens / Engineering Resveratrol Glucoside Accumulation into Alfalfa: Crop Protection and Nutraceutical Applications / Corn as a Source of Antifungal Genes for Genetic Engineering of Crops for Resistance to Aflatoxin Contamination / Reduction of Aflatoxin Contamination in Peanut: A Genetic Engineering Approach / Development of Micropropagation Technologies for St. John's wort (Hypericum perforaturm L.): Relevance on Application / Production of Vaccines and Therapeutics in Plants for Oral Delivery / Food Allergy: Recent Advances in Food Allergy Research / K. Rajasekaran, T. J. Jacks and J. W. Finley -- J. W. Radin and P. K. Bretting -- V. A. Forster -- J. J. Adamczyk, Jr. and D. D. Hardee -- D. R. Walker, H. R. Boerma, J. N. All and W. A. Parrott -- S. O. Duke, B. E. Scheffler, F. E. Dayan and W. E. Dyer -- H. Daniell -- K. J. Kunert, J. Vorster, C. Bester and C. A. Cullis -- K. Rajasekaran, J. W. Cary, T. J. Jacks and T. E. Cleveland -- N. L. Paiva -- Z.-Y. Chen, T. E. Cleveland, R. L. Brown, D. Bhatnagar, J. W. Cary and K. Rajasekaran -- P. Ozias-Akins, H. Yang, R. Gill, H. Fan and E. Lynch -- S. J. Murch, S. D. S. Chiwocha and P. K. Saxena -- L. M. Welter -- S. J. Maleki and B. K. Hurlburt --", 540 | "16. 17. 18. Assessment of the Allergenicity of Foods Produced through Agricultural Biotechnology / Prediction of Parental Genetic Compatibility to Enhance Flavor Attributes of Peanuts / Outlook for Consumer Acceptance of Agricultural Biotechnology / S. L. Taylor -- H. E. Pattee, T. G. Isleib, F. G. Giesbrecht and Z. Cui -- D. B. Schmidt." 541 | ], 542 | "content_type": [ 543 | "Text" 544 | ], 545 | "contributors": [ 546 | { 547 | "kind": "contributor", 548 | "value": "Rajasekaran, K., 1952-" 549 | }, 550 | { 551 | "kind": "contributor", 552 | "value": "Jacks, T. J. (Thomas J.), 1938-" 553 | }, 554 | { 555 | "kind": "contributor", 556 | "value": "Finley, John W., 1942-" 557 | }, 558 | { 559 | "kind": "contributor", 560 | "value": "American Chemical Society. Meeting San Francisco, Calif.) 2000 :" 561 | } 562 | ], 563 | "dates": [ 564 | { 565 | "kind": "Date of publication", 566 | "value": "2002" 567 | } 568 | ], 569 | "format": "Print volume", 570 | "holdings": [ 571 | { 572 | "call_number": "SB106.B56.C76 2002", 573 | "collection": "Off Campus Collection", 574 | "format": "Print volume", 575 | "location": "Library Storage Annex" 576 | } 577 | ], 578 | "identifiers": [ 579 | { 580 | "kind": "isbn", 581 | "value": "0841237662 (alk. paper)" 582 | }, 583 | { 584 | "kind": "oclc", 585 | "value": "49383680" 586 | }, 587 | { 588 | "kind": "lccn", 589 | "value": "2002018690" 590 | } 591 | ], 592 | "languages": [ 593 | "English" 594 | ], 595 | "links": [ 596 | { 597 | "kind": "Hathi Trust", 598 | "url": "http://catalog.hathitrust.org/api/volumes/oclc/49383680.html" 599 | }, 600 | { 601 | "kind": "unknown", 602 | "url": "http://dx.doi.org/10.1021/bk-2002-0829" 603 | } 604 | ], 605 | "literary_form": "nonfiction", 606 | "locations": [ 607 | { 608 | "geopoint": [ 609 | -77.025955, 610 | 38.942142 611 | ], 612 | "kind": "Place of publication", 613 | "value": "District of Columbia" 614 | } 615 | ], 616 | "notes": [ 617 | { 618 | "value": [ 619 | "K. Rajasekaran, editor, T.J. Jacks, editor, J.W. Finley, editor.", 620 | "\"Product of a 3-day symposium held during the 219th American Chemical Society (ACS) national meeting in San Francisco, California in 2000\"--P. x.", 621 | "Includes bibliographical references and indexes." 622 | ] 623 | } 624 | ], 625 | "physical_description": "xi, 259 p. : ill. ; 24 cm.", 626 | "publication_information": [ 627 | "Washington, DC : American Chemical Society : Distributed by Oxford University Press, c2002." 628 | ], 629 | "related_items": [ 630 | { 631 | "description": "ACS symposium series ; 829.", 632 | "relationship": "In series" 633 | } 634 | ], 635 | "source": "MIT Alma", 636 | "source_link": "https://mit.primo.exlibrisgroup.com/discovery/fulldisplay?vid=01MIT_INST:MIT&docid=alma990011240870206761", 637 | "subjects": [ 638 | { 639 | "value": [ 640 | "Plant biotechnology Congresses.", 641 | "Crops Congresses. Genetic engineering" 642 | ] 643 | } 644 | ], 645 | "timdex_record_id": "mit:alma:990011240870206761", 646 | "title": "Crop biotechnology /" 647 | } 648 | ] -------------------------------------------------------------------------------- /go.mod: -------------------------------------------------------------------------------- 1 | module github.com/mitlibraries/mario 2 | 3 | go 1.16 4 | 5 | require ( 6 | github.com/aws/aws-sdk-go v1.43.32 7 | github.com/gobuffalo/here v0.6.5 // indirect 8 | github.com/markbates/pkger v0.17.1 9 | github.com/olivere/elastic/v7 v7.0.32 10 | github.com/urfave/cli/v2 v2.4.0 11 | gopkg.in/yaml.v2 v2.4.0 // indirect 12 | ) 13 | -------------------------------------------------------------------------------- /go.sum: -------------------------------------------------------------------------------- 1 | cloud.google.com/go v0.26.0/go.mod h1:aQUYkXzVsufM+DwF1aE+0xfcU+56JwCaLick0ClmMTw= 2 | github.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03qcyfWMU= 3 | github.com/aws/aws-sdk-go v1.43.21/go.mod h1:y4AeaBuwd2Lk+GepC1E9v0qOiTws0MIWAX4oIKwKHZo= 4 | github.com/aws/aws-sdk-go v1.43.32 h1:b2NQnfWfImfo7yzXq6gzXEC+6s5v1t2RU3G9o+VirYo= 5 | github.com/aws/aws-sdk-go v1.43.32/go.mod h1:y4AeaBuwd2Lk+GepC1E9v0qOiTws0MIWAX4oIKwKHZo= 6 | github.com/census-instrumentation/opencensus-proto v0.2.1/go.mod h1:f6KPmirojxKA12rnyqOA5BBL4O983OfeGPqjHWSTneU= 7 | github.com/client9/misspell v0.3.4/go.mod h1:qj6jICC3Q7zFZvVWo7KLAzC3yx5G7kyvSDkc90ppPyw= 8 | github.com/cncf/udpa/go v0.0.0-20191209042840-269d4d468f6f/go.mod h1:M8M6+tZqaGXZJjfX53e64911xZQV5JYwmTeXPW+k8Sc= 9 | github.com/cpuguy83/go-md2man/v2 v2.0.1 h1:r/myEWzV9lfsM1tFLgDyu0atFtJ1fXn261LKYj/3DxU= 10 | github.com/cpuguy83/go-md2man/v2 v2.0.1/go.mod h1:tgQtvFlXSQOSOSIRvRPT7W67SCa46tRHOmNcaadrF8o= 11 | github.com/creack/pty v1.1.9/go.mod h1:oKZEueFk5CKHvIhNR5MUki03XCEU+Q6VDXinZuGJ33E= 12 | github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= 13 | github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= 14 | github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= 15 | github.com/envoyproxy/go-control-plane v0.9.0/go.mod h1:YTl/9mNaCwkRvm6d1a2C3ymFceY/DCBVvsKhRF0iEA4= 16 | github.com/envoyproxy/go-control-plane v0.9.1-0.20191026205805-5f8ba28d4473/go.mod h1:YTl/9mNaCwkRvm6d1a2C3ymFceY/DCBVvsKhRF0iEA4= 17 | github.com/envoyproxy/go-control-plane v0.9.4/go.mod h1:6rpuAdCZL397s3pYoYcLgu1mIlRU8Am5FuJP05cCM98= 18 | github.com/envoyproxy/protoc-gen-validate v0.1.0/go.mod h1:iSmxcyjqTsJpI2R4NaDN7+kN2VEUnK/pcBlmesArF7c= 19 | github.com/fortytw2/leaktest v1.3.0 h1:u8491cBMTQ8ft8aeV+adlcytMZylmA5nnwwkRZjI8vw= 20 | github.com/fortytw2/leaktest v1.3.0/go.mod h1:jDsjWgpAGjm2CA7WthBh/CdZYEPF31XHquHwclZch5g= 21 | github.com/go-logr/logr v1.2.2/go.mod h1:jdQByPbusPIv2/zmleS9BjJVeZ6kBagPoEUsqbVz/1A= 22 | github.com/go-logr/stdr v1.2.2/go.mod h1:mMo/vtBO5dYbehREoey6XUKy/eSumjCCveDpRre4VKE= 23 | github.com/gobuffalo/here v0.6.0/go.mod h1:wAG085dHOYqUpf+Ap+WOdrPTp5IYcDAs/x7PLa8Y5fM= 24 | github.com/gobuffalo/here v0.6.5 h1:OjrFcVbQBXff4EN+/m2xa+i1Wy6lW+3fn9Jf+b5WDXY= 25 | github.com/gobuffalo/here v0.6.5/go.mod h1:y6q8eG7YstM/DfOKKAyHV1plrNsuYS5dcIerm8Habas= 26 | github.com/golang/glog v0.0.0-20160126235308-23def4e6c14b/go.mod h1:SBH7ygxi8pfUlaOkMMuAQtPIUF8ecWP5IEl/CR7VP2Q= 27 | github.com/golang/groupcache v0.0.0-20200121045136-8c9f03a8e57e/go.mod h1:cIg4eruTrX1D+g88fzRXU5OdNfaM+9IcxsU14FzY7Hc= 28 | github.com/golang/groupcache v0.0.0-20210331224755-41bb18bfe9da/go.mod h1:cIg4eruTrX1D+g88fzRXU5OdNfaM+9IcxsU14FzY7Hc= 29 | github.com/golang/mock v1.1.1/go.mod h1:oTYuIxOrZwtPieC+H1uAHpcLFnEyAGVDL/k47Jfbm0A= 30 | github.com/golang/protobuf v1.2.0/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U= 31 | github.com/golang/protobuf v1.3.2/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U= 32 | github.com/golang/protobuf v1.4.0-rc.1/go.mod h1:ceaxUfeHdC40wWswd/P6IGgMaK3YpKi5j83Wpe3EHw8= 33 | github.com/golang/protobuf v1.4.0-rc.1.0.20200221234624-67d41d38c208/go.mod h1:xKAWHe0F5eneWXFV3EuXVDTCmh+JuBKY0li0aMyXATA= 34 | github.com/golang/protobuf v1.4.0-rc.2/go.mod h1:LlEzMj4AhA7rCAGe4KMBDvJI+AwstrUpVNzEA03Pprs= 35 | github.com/golang/protobuf v1.4.0-rc.4.0.20200313231945-b860323f09d0/go.mod h1:WU3c8KckQ9AFe+yFwt9sWVRKCVIyN9cPHBJSNnbL67w= 36 | github.com/golang/protobuf v1.4.0/go.mod h1:jodUvKwWbYaEsadDk5Fwe5c77LiNKVO9IDvqG2KuDX0= 37 | github.com/golang/protobuf v1.4.1/go.mod h1:U8fpvMrcmy5pZrNK1lt4xCsGvpyWQ/VVv6QDs8UjoX8= 38 | github.com/golang/protobuf v1.4.3/go.mod h1:oDoupMAO8OvCJWAcko0GGGIgR6R6ocIYbsSw735rRwI= 39 | github.com/google/go-cmp v0.2.0/go.mod h1:oXzfMopK8JAjlY9xF4vHSVASa0yLyX7SntLO5aqRK0M= 40 | github.com/google/go-cmp v0.3.0/go.mod h1:8QqcDgzrUqlUb/G2PQTWiueGozuR1884gddMywk6iLU= 41 | github.com/google/go-cmp v0.3.1/go.mod h1:8QqcDgzrUqlUb/G2PQTWiueGozuR1884gddMywk6iLU= 42 | github.com/google/go-cmp v0.4.0/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= 43 | github.com/google/go-cmp v0.5.0/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= 44 | github.com/google/go-cmp v0.5.3/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= 45 | github.com/google/go-cmp v0.5.7 h1:81/ik6ipDQS2aGcBfIN5dHDB36BwrStyeAQquSYCV4o= 46 | github.com/google/go-cmp v0.5.7/go.mod h1:n+brtR0CgQNWTVd5ZUFpTBC8YFBDLK/h/bpaJ8/DtOE= 47 | github.com/google/uuid v1.1.2/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= 48 | github.com/jmespath/go-jmespath v0.4.0 h1:BEgLn5cpjn8UN1mAw4NjwDrS35OdebyEtFe+9YPoQUg= 49 | github.com/jmespath/go-jmespath v0.4.0/go.mod h1:T8mJZnbsbmF+m6zOOFylbeCJqk5+pHWvzYPziyZiYoo= 50 | github.com/jmespath/go-jmespath/internal/testify v1.5.1 h1:shLQSRRSCCPj3f2gpwzGwWFoC7ycTf1rcQZHOlsJ6N8= 51 | github.com/jmespath/go-jmespath/internal/testify v1.5.1/go.mod h1:L3OGu8Wl2/fWfCI6z80xFu9LTZmf1ZRjMHUOPmWr69U= 52 | github.com/josharian/intern v1.0.0 h1:vlS4z54oSdjm0bgjRigI+G1HpF+tI+9rE5LLzOg8HmY= 53 | github.com/josharian/intern v1.0.0/go.mod h1:5DoeVV0s6jJacbCEi61lwdGj/aVlrQvzHFFd8Hwg//Y= 54 | github.com/kr/pretty v0.1.0/go.mod h1:dAy3ld7l9f0ibDNOQOHHMYYIIbhfbHSm3C4ZsoJORNo= 55 | github.com/kr/pretty v0.3.0/go.mod h1:640gp4NfQd8pI5XOwp5fnNeVWj67G7CFk/SaSQn7NBk= 56 | github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ= 57 | github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI= 58 | github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE= 59 | github.com/mailru/easyjson v0.7.7 h1:UGYAvKxe3sBsEDzO8ZeWOSlIQfWFlxbzLZe7hwFURr0= 60 | github.com/mailru/easyjson v0.7.7/go.mod h1:xzfreul335JAWq5oZzymOObrkdz5UnU4kGfJJLY9Nlc= 61 | github.com/markbates/pkger v0.17.1 h1:/MKEtWqtc0mZvu9OinB9UzVN9iYCwLWuyUv4Bw+PCno= 62 | github.com/markbates/pkger v0.17.1/go.mod h1:0JoVlrol20BSywW79rN3kdFFsE5xYM+rSCQDXbLhiuI= 63 | github.com/olivere/elastic/v7 v7.0.32 h1:R7CXvbu8Eq+WlsLgxmKVKPox0oOwAE/2T9Si5BnvK6E= 64 | github.com/olivere/elastic/v7 v7.0.32/go.mod h1:c7PVmLe3Fxq77PIfY/bZmxY/TAamBhCzZ8xDOE09a9k= 65 | github.com/opentracing/opentracing-go v1.2.0/go.mod h1:GxEUsuufX4nBwe+T+Wl9TAgYrxe9dPLANfrWvHYVTgc= 66 | github.com/pkg/errors v0.9.1 h1:FEBLx1zS214owpjy7qsBeixbURkuhQAwrK5UwLGTwt4= 67 | github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= 68 | github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= 69 | github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= 70 | github.com/prometheus/client_model v0.0.0-20190812154241-14fe0d1b01d4/go.mod h1:xMI15A0UPsDsEKsMN9yxemIoYk6Tm2C1GtYGdfGttqA= 71 | github.com/rogpeppe/go-internal v1.6.1/go.mod h1:xXDCJY+GAPziupqXw64V24skbSoqbTEfhy4qGm1nDQc= 72 | github.com/russross/blackfriday/v2 v2.1.0 h1:JIOH55/0cWyOuilr9/qlrm0BSXldqnqwMsf35Ld67mk= 73 | github.com/russross/blackfriday/v2 v2.1.0/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM= 74 | github.com/smartystreets/assertions v1.1.1/go.mod h1:tcbTF8ujkAEcZ8TElKY+i30BzYlVhC/LOxJk7iOWnoo= 75 | github.com/smartystreets/go-aws-auth v0.0.0-20180515143844-0c1422d1fdb9/go.mod h1:SnhjPscd9TpLiy1LpzGSKh3bXCfxxXuqd9xmQJy3slM= 76 | github.com/smartystreets/gunit v1.4.2/go.mod h1:ZjM1ozSIMJlAz/ay4SG8PeKF00ckUp+zMHZXV9/bvak= 77 | github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= 78 | github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI= 79 | github.com/stretchr/testify v1.4.0/go.mod h1:j7eGeouHqKxXV5pUuKE4zz7dFj8WfuZ+81PSLYec5m4= 80 | github.com/stretchr/testify v1.6.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= 81 | github.com/stretchr/testify v1.7.0 h1:nwc3DEeHmmLAfoZucVR881uASk0Mfjw8xYJ99tb5CcY= 82 | github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= 83 | github.com/urfave/cli/v2 v2.4.0 h1:m2pxjjDFgDxSPtO8WSdbndj17Wu2y8vOT86wE/tjr+I= 84 | github.com/urfave/cli/v2 v2.4.0/go.mod h1:NX9W0zmTvedE5oDoOMs2RTC8RvdK98NTYZE5LbaEYPg= 85 | go.opencensus.io v0.23.0/go.mod h1:XItmlyltB5F7CS4xOC1DcqMoFqwtC6OG2xF7mCv7P7E= 86 | go.opentelemetry.io/otel v1.5.0/go.mod h1:Jm/m+rNp/z0eqJc74H7LPwQ3G87qkU/AnnAydAjSAHk= 87 | go.opentelemetry.io/otel/trace v1.5.0/go.mod h1:sq55kfhjXYr1zVSyexg0w1mpa03AYXR5eyTkB9NPPdE= 88 | golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= 89 | golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto= 90 | golang.org/x/exp v0.0.0-20190121172915-509febef88a4/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA= 91 | golang.org/x/lint v0.0.0-20181026193005-c67002cb31c3/go.mod h1:UVdnD1Gm6xHRNCYTkRU2/jEulfH38KcIWyp/GAMgvoE= 92 | golang.org/x/lint v0.0.0-20190227174305-5b3e6a55c961/go.mod h1:wehouNa3lNwaWXcvxsM5YxQ5yQlVC4a0KAMCusXpPoU= 93 | golang.org/x/lint v0.0.0-20190313153728-d0100b6bd8b3/go.mod h1:6SW0HCj/g11FgYtHlgUYUwCkIfeOF89ocIRzGO/8vkc= 94 | golang.org/x/net v0.0.0-20180724234803-3673e40ba225/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= 95 | golang.org/x/net v0.0.0-20180826012351-8a410e7b638d/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= 96 | golang.org/x/net v0.0.0-20190213061140-3a22650c66bd/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= 97 | golang.org/x/net v0.0.0-20190311183353-d8887717615a/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= 98 | golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= 99 | golang.org/x/net v0.0.0-20201110031124-69a78807bb2b/go.mod h1:sp8m0HH+o8qH0wwXwYZr8TS3Oi6o0r6Gce1SSxlDquU= 100 | golang.org/x/net v0.0.0-20220127200216-cd36cc0744dd h1:O7DYs+zxREGLKzKoMQrtrEacpb0ZVXA5rIwylE2Xchk= 101 | golang.org/x/net v0.0.0-20220127200216-cd36cc0744dd/go.mod h1:CfG3xpIq0wQ8r1q4Su4UZFWDARRcnwPjda9FqA0JpMk= 102 | golang.org/x/oauth2 v0.0.0-20180821212333-d2e6202438be/go.mod h1:N/0e6XlmueqKjAGxoOufVs8QHGRruUQn6yWY3a++T0U= 103 | golang.org/x/sync v0.0.0-20180314180146-1d60e4601c6f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= 104 | golang.org/x/sync v0.0.0-20181108010431-42b317875d0f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= 105 | golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= 106 | golang.org/x/sys v0.0.0-20180830151530-49385e6e1522/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= 107 | golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= 108 | golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= 109 | golang.org/x/sys v0.0.0-20200930185726-fdedc70b468f/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= 110 | golang.org/x/sys v0.0.0-20210615035016-665e8c7367d1/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= 111 | golang.org/x/sys v0.0.0-20211216021012-1d35b9e2eb4e/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= 112 | golang.org/x/term v0.0.0-20210927222741-03fcf44c2211/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8= 113 | golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= 114 | golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= 115 | golang.org/x/text v0.3.7 h1:olpwvP2KacW1ZWvsR7uQhoyTYvKAupfQrRGBFM352Gk= 116 | golang.org/x/text v0.3.7/go.mod h1:u+2+/6zg+i71rQMx5EYifcz6MCKuco9NR6JIITiCfzQ= 117 | golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= 118 | golang.org/x/tools v0.0.0-20190114222345-bf090417da8b/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= 119 | golang.org/x/tools v0.0.0-20190226205152-f727befe758c/go.mod h1:9Yl7xja0Znq3iFh3HoIrodX9oNMXvdceNzlUR8zjMvY= 120 | golang.org/x/tools v0.0.0-20190311212946-11955173bddd/go.mod h1:LCzVGOaR6xXOjkQ3onu1FJEFr0SW1gC7cKk1uF8kGRs= 121 | golang.org/x/tools v0.0.0-20190524140312-2c0ae7006135/go.mod h1:RgjU9mgBXZiqYHBnxXauZ1Gv1EHHAz9KjViQ78xBX0Q= 122 | golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= 123 | google.golang.org/appengine v1.1.0/go.mod h1:EbEs0AVv82hx2wNQdGPgUI5lhzA/G0D9YwlJXL52JkM= 124 | google.golang.org/appengine v1.4.0/go.mod h1:xpcJRLb0r/rnEns0DIKYYv+WjYCduHsrkT7/EB5XEv4= 125 | google.golang.org/genproto v0.0.0-20180817151627-c66870c02cf8/go.mod h1:JiN7NxoALGmiZfu7CAH4rXhgtRTLTxftemlI0sWmxmc= 126 | google.golang.org/genproto v0.0.0-20190819201941-24fa4b261c55/go.mod h1:DMBHOl98Agz4BDEuKkezgsaosCRResVns1a3J2ZsMNc= 127 | google.golang.org/genproto v0.0.0-20200526211855-cb27e3aa2013/go.mod h1:NbSheEEYHJ7i3ixzK3sjbqSGDJWnxyFXZblF3eUsNvo= 128 | google.golang.org/grpc v1.19.0/go.mod h1:mqu4LbDTu4XGKhr4mRzUsmM4RtVoemTSY81AxZiDr8c= 129 | google.golang.org/grpc v1.23.0/go.mod h1:Y5yQAOtifL1yxbo5wqy6BxZv8vAUGQwXBOALyacEbxg= 130 | google.golang.org/grpc v1.25.1/go.mod h1:c3i+UQWmh7LiEpx4sFZnkU36qjEYZ0imhYfXVyQciAY= 131 | google.golang.org/grpc v1.27.0/go.mod h1:qbnxyOmOxrQa7FizSgH+ReBfzJrCY1pSN7KXBS8abTk= 132 | google.golang.org/grpc v1.33.2/go.mod h1:JMHMWHQWaTccqQQlmk3MJZS+GWXOdAesneDmEnv2fbc= 133 | google.golang.org/protobuf v0.0.0-20200109180630-ec00e32a8dfd/go.mod h1:DFci5gLYBciE7Vtevhsrf46CRTquxDuWsQurQQe4oz8= 134 | google.golang.org/protobuf v0.0.0-20200221191635-4d8936d0db64/go.mod h1:kwYJMbMJ01Woi6D6+Kah6886xMZcty6N08ah7+eCXa0= 135 | google.golang.org/protobuf v0.0.0-20200228230310-ab0ca4ff8a60/go.mod h1:cfTl7dwQJ+fmap5saPgwCLgHXTUD7jkjRqWcaiX5VyM= 136 | google.golang.org/protobuf v1.20.1-0.20200309200217-e05f789c0967/go.mod h1:A+miEFZTKqfCUM6K7xSMQL9OKL/b6hQv+e19PK+JZNE= 137 | google.golang.org/protobuf v1.21.0/go.mod h1:47Nbq4nVaFHyn7ilMalzfO3qCViNmqZ2kzikPIcrTAo= 138 | google.golang.org/protobuf v1.22.0/go.mod h1:EGpADcykh3NcUnDUJcl1+ZksZNG86OlYog2l/sGQquU= 139 | google.golang.org/protobuf v1.23.0/go.mod h1:EGpADcykh3NcUnDUJcl1+ZksZNG86OlYog2l/sGQquU= 140 | google.golang.org/protobuf v1.23.1-0.20200526195155-81db48ad09cc/go.mod h1:EGpADcykh3NcUnDUJcl1+ZksZNG86OlYog2l/sGQquU= 141 | google.golang.org/protobuf v1.25.0/go.mod h1:9JNX74DMeImyA3h4bdi1ymwjUzf21/xIlbajtzgsN7c= 142 | gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= 143 | gopkg.in/check.v1 v1.0.0-20180628173108-788fd7840127/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= 144 | gopkg.in/check.v1 v1.0.0-20190902080502-41f04d3bba15/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= 145 | gopkg.in/errgo.v2 v2.1.0/go.mod h1:hNsd1EY+bozCKY1Ytp96fpM3vjJbqLJn88ws8XvfDNI= 146 | gopkg.in/yaml.v2 v2.2.2/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= 147 | gopkg.in/yaml.v2 v2.2.7/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= 148 | gopkg.in/yaml.v2 v2.2.8/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= 149 | gopkg.in/yaml.v2 v2.4.0 h1:D8xgwECY7CYvx+Y2n4sBz93Jn9JRvxdiyyo8CTfuKaY= 150 | gopkg.in/yaml.v2 v2.4.0/go.mod h1:RDklbk79AGWmwhnvt/jBztapEOGDOx6ZbXqjP6csGnQ= 151 | gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= 152 | gopkg.in/yaml.v3 v3.0.0-20210107192922-496545a6307b h1:h8qDotaEPuJATrMmW04NCwg7v22aHH28wwpauUhK9Oo= 153 | gopkg.in/yaml.v3 v3.0.0-20210107192922-496545a6307b/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= 154 | honnef.co/go/tools v0.0.0-20190102054323-c2f93a96b099/go.mod h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4= 155 | honnef.co/go/tools v0.0.0-20190523083050-ea95bdfd59fc/go.mod h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4= 156 | -------------------------------------------------------------------------------- /mario.go: -------------------------------------------------------------------------------- 1 | package mario 2 | 3 | import "github.com/markbates/pkger" 4 | 5 | // This file seems to be needed to make pkger work with the new 6 | // project layout. I think it may be related to 7 | // https://github.com/markbates/pkger/issues/86. 8 | func init() { 9 | pkger.Include("/config") 10 | } 11 | -------------------------------------------------------------------------------- /pkg/client/elastic.go: -------------------------------------------------------------------------------- 1 | package client 2 | 3 | import ( 4 | "context" 5 | "errors" 6 | "github.com/aws/aws-sdk-go/aws/credentials" 7 | "github.com/aws/aws-sdk-go/aws/credentials/ec2rolecreds" 8 | "github.com/aws/aws-sdk-go/aws/ec2metadata" 9 | "github.com/aws/aws-sdk-go/aws/session" 10 | "github.com/markbates/pkger" 11 | "github.com/mitlibraries/mario/pkg/record" 12 | "github.com/olivere/elastic/v7" 13 | aws "github.com/olivere/elastic/v7/aws/v4" 14 | "io/ioutil" 15 | "net/http" 16 | "strings" 17 | ) 18 | 19 | // Primary alias 20 | const primary = "timdex-prod" 21 | 22 | // Indexer provides an interface for interacting with an index. 23 | type Indexer interface { 24 | Current(string) (string, error) 25 | Create(string) error 26 | Start() error 27 | Stop() error 28 | Add(record.Record, string, string) 29 | Promote(string) error 30 | Delete(string) error 31 | Reindex(string, string) (int64, error) 32 | Indexes() (elastic.CatIndicesResponse, error) 33 | } 34 | 35 | // ESClient wraps an olivere/elastic client. Create a new client with the 36 | // NewESClient function. 37 | type ESClient struct { 38 | client *elastic.Client 39 | bulker *elastic.BulkProcessor 40 | } 41 | 42 | // Current returns the name of the current index for the given source. A 43 | // current index is defined as one which is linked to the primary alias. An 44 | // error is returned if there is more than one matching index. An empty 45 | // string indicates there were no matching indexes. 46 | func (c ESClient) Current(source string) (string, error) { 47 | res, err := c.client.Aliases().Index(source + "*").Do(context.Background()) 48 | if err != nil { 49 | return "", err 50 | } 51 | aliases := res.IndicesByAlias(primary) 52 | if len(aliases) == 0 { 53 | return "", nil 54 | } else if len(aliases) > 1 { 55 | return "", errors.New("Could not determine current index") 56 | } else { 57 | return aliases[0], nil 58 | } 59 | } 60 | 61 | // Create the new index if it does not exist. 62 | func (c ESClient) Create(index string) error { 63 | exists, err := c.client.IndexExists(index).Do(context.Background()) 64 | if err != nil { 65 | return err 66 | } 67 | if exists { 68 | return nil 69 | } 70 | file, err := pkger.Open("/config/es_record_mappings.json") 71 | if err != nil { 72 | return err 73 | } 74 | mappings, err := ioutil.ReadAll(file) 75 | if err != nil { 76 | return err 77 | } 78 | _, err = c.client. 79 | CreateIndex(index). 80 | Body(string(mappings)). 81 | Do(context.Background()) 82 | return err 83 | } 84 | 85 | // Start the bulk processor. 86 | func (c *ESClient) Start() error { 87 | bulker, err := c.client. 88 | BulkProcessor(). 89 | Name("BulkProcessor"). 90 | Workers(2). 91 | Do(context.Background()) 92 | c.bulker = bulker 93 | return err 94 | } 95 | 96 | // Stop the bulk processor. 97 | func (c *ESClient) Stop() error { 98 | return c.bulker.Stop() 99 | } 100 | 101 | // Add a record using a bulk processor. 102 | func (c *ESClient) Add(record record.Record, index string, rtype string) { 103 | d := elastic.NewBulkIndexRequest(). 104 | Index(index). 105 | Id(record.TimdexRecordId). 106 | Doc(record) 107 | c.bulker.Add(d) 108 | } 109 | 110 | // Promote will add the given index to the primary alias. If there is an 111 | // existing index with the same prefix as the promoted index and linked to the 112 | // primary alias, it will be removed from the alias. This action is atomic. 113 | func (c ESClient) Promote(index string) error { 114 | svc := c.client.Alias().Add(index, primary) 115 | prefix := strings.Split(index, "-")[0] 116 | current, err := c.Current(prefix) 117 | if err != nil { 118 | return err 119 | } 120 | if current != "" && current != index { 121 | svc.Remove(current, primary) 122 | } 123 | _, err = svc.Do(context.Background()) 124 | return err 125 | } 126 | 127 | // Delete an index. 128 | func (c ESClient) Delete(index string) error { 129 | _, err := c.client.DeleteIndex(index).Do(context.Background()) 130 | return err 131 | } 132 | 133 | // Indexes returns a list of indexes in a cluster. 134 | func (c ESClient) Indexes() (elastic.CatIndicesResponse, error) { 135 | return c.client. 136 | CatIndices(). 137 | Columns("idx", "dc", "h", "s", "id", "ss"). 138 | Do(context.Background()) 139 | } 140 | 141 | // Aliases returns a list of aliases in a cluster. 142 | func (c ESClient) Aliases() (elastic.CatAliasesResponse, error) { 143 | return c.client.CatAliases().Do(context.Background()) 144 | } 145 | 146 | // Ping the URL for basic information about the cluster. 147 | func (c ESClient) Ping(url string) (*elastic.PingResult, error) { 148 | res, _, err := c.client.Ping(url).Do(context.Background()) 149 | return res, err 150 | } 151 | 152 | // Reindex the source index to the destination index. Returns the number 153 | // of documents reindexed. 154 | func (c ESClient) Reindex(source string, dest string) (int64, error) { 155 | resp, err := c.client. 156 | Reindex(). 157 | SourceIndex(source). 158 | DestinationIndex(dest). 159 | Do(context.Background()) 160 | if err != nil { 161 | return 0, err 162 | } 163 | return resp.Total, nil 164 | } 165 | 166 | // NewESClient creates a new OpenSearch client. 167 | func NewESClient(url string, v4 bool) (*ESClient, error) { 168 | var client *http.Client 169 | if v4 { 170 | sess := session.Must(session.NewSession()) 171 | creds := credentials.NewChainCredentials([]credentials.Provider{ 172 | &credentials.EnvProvider{}, 173 | &credentials.SharedCredentialsProvider{}, 174 | &ec2rolecreds.EC2RoleProvider{ 175 | Client: ec2metadata.New(sess), 176 | }, 177 | }) 178 | client = aws.NewV4SigningClient(creds, "us-east-1") 179 | } else { 180 | client = http.DefaultClient 181 | } 182 | es, err := elastic.NewClient( 183 | elastic.SetURL(url), 184 | elastic.SetSniff(false), 185 | elastic.SetHealthcheck(false), 186 | elastic.SetHttpClient(client), 187 | ) 188 | return &ESClient{client: es}, err 189 | } 190 | -------------------------------------------------------------------------------- /pkg/client/s3.go: -------------------------------------------------------------------------------- 1 | package client 2 | 3 | import ( 4 | "io" 5 | 6 | "github.com/aws/aws-sdk-go/aws" 7 | "github.com/aws/aws-sdk-go/aws/session" 8 | "github.com/aws/aws-sdk-go/service/s3" 9 | ) 10 | 11 | // GetS3Obj returns an io.ReadCloser for an S3 object. 12 | func GetS3Obj(bucket string, key string) (io.ReadCloser, error) { 13 | sess, err := session.NewSession(&aws.Config{ 14 | Region: aws.String("us-east-1")}, 15 | ) 16 | 17 | if err != nil { 18 | return nil, err 19 | } 20 | 21 | svc := s3.New(sess) 22 | 23 | input := &s3.GetObjectInput{ 24 | Bucket: aws.String(bucket), 25 | Key: aws.String(key), 26 | } 27 | 28 | result, err := svc.GetObject(input) 29 | if err != nil { 30 | return nil, err 31 | } 32 | 33 | return result.Body, err 34 | 35 | } 36 | -------------------------------------------------------------------------------- /pkg/consumer/consumers.go: -------------------------------------------------------------------------------- 1 | package consumer 2 | 3 | import ( 4 | "encoding/json" 5 | "fmt" 6 | "io" 7 | "log" 8 | 9 | "github.com/mitlibraries/mario/pkg/client" 10 | "github.com/mitlibraries/mario/pkg/record" 11 | ) 12 | 13 | //ESConsumer adds Records to OpenSearch. 14 | type ESConsumer struct { 15 | Index string 16 | RType string 17 | Client client.Indexer 18 | } 19 | 20 | //Consume the records. 21 | func (es *ESConsumer) Consume(in <-chan record.Record) <-chan bool { 22 | out := make(chan bool) 23 | go func() { 24 | for r := range in { 25 | es.Client.Add(r, es.Index, es.RType) 26 | } 27 | close(out) 28 | }() 29 | return out 30 | } 31 | 32 | //JSONConsumer outputs Records as JSON. The Records will be written 33 | //to JSONConsumer.out. 34 | type JSONConsumer struct { 35 | Out io.Writer 36 | } 37 | 38 | //Consume the records. 39 | func (js *JSONConsumer) Consume(in <-chan record.Record) <-chan bool { 40 | out := make(chan bool) 41 | go func() { 42 | fmt.Fprintln(js.Out, "[") 43 | var i int 44 | for r := range in { 45 | b, err := json.MarshalIndent(r, "", " ") 46 | if err != nil { 47 | log.Println(err) 48 | } 49 | if i != 0 { 50 | fmt.Fprintln(js.Out, ",") 51 | } 52 | fmt.Fprintln(js.Out, string(b)) 53 | i++ 54 | } 55 | fmt.Fprintln(js.Out, "]") 56 | close(out) 57 | }() 58 | return out 59 | } 60 | 61 | //TitleConsumer just outputs the title of Records. The titles will be 62 | //written to TitleConsumer.out. 63 | type TitleConsumer struct { 64 | Out io.Writer 65 | } 66 | 67 | //Consume the records. 68 | func (t *TitleConsumer) Consume(in <-chan record.Record) <-chan bool { 69 | out := make(chan bool) 70 | go func() { 71 | for r := range in { 72 | fmt.Fprintln(t.Out, r.Title) 73 | } 74 | close(out) 75 | }() 76 | return out 77 | } 78 | 79 | //SilentConsumer is useful for debugging sometimes 80 | type SilentConsumer struct { 81 | Out io.Writer 82 | } 83 | 84 | //Consume the records and close the channel when done. No processing is done. 85 | func (s *SilentConsumer) Consume(in <-chan record.Record) <-chan bool { 86 | out := make(chan bool) 87 | go func() { 88 | for range in { 89 | continue 90 | } 91 | close(out) 92 | }() 93 | return out 94 | } 95 | -------------------------------------------------------------------------------- /pkg/consumer/consumers_test.go: -------------------------------------------------------------------------------- 1 | package consumer 2 | 3 | import ( 4 | "bytes" 5 | "encoding/json" 6 | "github.com/mitlibraries/mario/pkg/record" 7 | "strings" 8 | "testing" 9 | ) 10 | 11 | func TestTitleConsumerConsume(t *testing.T) { 12 | var b bytes.Buffer 13 | in := make(chan record.Record) 14 | c := TitleConsumer{Out: &b} 15 | out := c.Consume(in) 16 | in <- record.Record{Title: "Hatsopoulos Microfluids"} 17 | close(in) 18 | <-out 19 | s := strings.TrimSpace(b.String()) 20 | if s != "Hatsopoulos Microfluids" { 21 | t.Error("Expected match, got", s) 22 | } 23 | } 24 | 25 | func TestTitleJsonConsume(t *testing.T) { 26 | var b bytes.Buffer 27 | in := make(chan record.Record) 28 | c := JSONConsumer{Out: &b} 29 | out := c.Consume(in) 30 | in <- record.Record{Title: "Hatsopoulos Microfluids"} 31 | close(in) 32 | <-out 33 | 34 | var records []*record.Record 35 | json.NewDecoder(&b).Decode(&records) 36 | 37 | if records[0].Title != "Hatsopoulos Microfluids" { 38 | t.Error("Expected match, got", records[0].Title) 39 | } 40 | } 41 | -------------------------------------------------------------------------------- /pkg/generator/jsonrecord.go: -------------------------------------------------------------------------------- 1 | package generator 2 | 3 | import ( 4 | "encoding/json" 5 | "github.com/mitlibraries/mario/pkg/record" 6 | "io" 7 | "log" 8 | ) 9 | 10 | type jsonparser struct { 11 | file io.Reader 12 | } 13 | 14 | //JSONGenerator parses JSON records. 15 | type JSONGenerator struct { 16 | File io.Reader 17 | } 18 | 19 | func (j *jsonparser) parse(out chan record.Record) { 20 | decoder := json.NewDecoder(j.file) 21 | 22 | // read open bracket 23 | _, err := decoder.Token() 24 | if err != nil { 25 | log.Fatal(err) 26 | } 27 | 28 | for decoder.More() { 29 | var r record.Record 30 | err = decoder.Decode(&r) 31 | if err != nil { 32 | log.Fatal(err) 33 | } 34 | out <- r 35 | } 36 | 37 | // read closing bracket 38 | _, err = decoder.Token() 39 | if err != nil { 40 | log.Fatal(err) 41 | } 42 | 43 | close(out) 44 | } 45 | 46 | //Generate creates a channel of Records. 47 | func (j *JSONGenerator) Generate() <-chan record.Record { 48 | out := make(chan record.Record) 49 | p := jsonparser{file: j.File} 50 | go p.parse(out) 51 | return out 52 | } 53 | -------------------------------------------------------------------------------- /pkg/generator/jsonrecord_test.go: -------------------------------------------------------------------------------- 1 | package generator 2 | 3 | import ( 4 | "github.com/mitlibraries/mario/pkg/record" 5 | "os" 6 | "testing" 7 | ) 8 | 9 | func TestJsonParser(t *testing.T) { 10 | jsonfile, err := os.Open("../../fixtures/timdex_record_samples.json") 11 | if err != nil { 12 | t.Error(err) 13 | } 14 | 15 | out := make(chan record.Record) 16 | 17 | p := jsonparser{file: jsonfile} 18 | go p.parse(out) 19 | 20 | var chanLength int 21 | for range out { 22 | chanLength++ 23 | } 24 | 25 | if chanLength != 6 { 26 | t.Error("Expected match, got", chanLength) 27 | } 28 | } 29 | 30 | func TestJsonProcess(t *testing.T) { 31 | jsonfile, err := os.Open("../../fixtures/timdex_record_samples.json") 32 | if err != nil { 33 | t.Error(err) 34 | } 35 | 36 | var i int 37 | p := JSONGenerator{File: jsonfile} 38 | for range p.Generate() { 39 | i++ 40 | } 41 | 42 | if i != 6 { 43 | t.Error("Expected match, got", i) 44 | } 45 | } 46 | -------------------------------------------------------------------------------- /pkg/ingester/ingester.go: -------------------------------------------------------------------------------- 1 | package ingester 2 | 3 | import ( 4 | "errors" 5 | "fmt" 6 | "io" 7 | "log" 8 | "net/url" 9 | "os" 10 | "strconv" 11 | "time" 12 | 13 | "github.com/mitlibraries/mario/pkg/client" 14 | "github.com/mitlibraries/mario/pkg/consumer" 15 | "github.com/mitlibraries/mario/pkg/generator" 16 | "github.com/mitlibraries/mario/pkg/pipeline" 17 | "github.com/mitlibraries/mario/pkg/transformer" 18 | ) 19 | 20 | // Config is a structure for passing a set of configuration parameters to 21 | // an Ingester. 22 | type Config struct { 23 | Filename string 24 | Source string 25 | Consumer string 26 | Index string 27 | NewIndex bool 28 | Promote bool 29 | } 30 | 31 | // NewStream returns an io.ReadCloser from a path string. The path can be 32 | // either a local directory path or a URL for an S3 object. 33 | func NewStream(filename string) (io.ReadCloser, error) { 34 | parts, err := url.Parse(filename) 35 | if err != nil { 36 | return nil, err 37 | } 38 | if parts.Scheme == "s3" { 39 | return client.GetS3Obj(parts.Host, parts.Path) 40 | } 41 | return os.Open(filename) 42 | } 43 | 44 | // Ingester does the work of ingesting a data stream. 45 | type Ingester struct { 46 | Stream io.ReadCloser 47 | config Config 48 | generator pipeline.Generator 49 | consumer pipeline.Consumer 50 | Client client.Indexer 51 | } 52 | 53 | // Configure an Ingester. This should be called before Ingest. 54 | func (i *Ingester) Configure(config Config) error { 55 | var err error 56 | // Configure generator 57 | i.generator = &generator.JSONGenerator{File: i.Stream} 58 | 59 | // Configure consumer 60 | if config.Consumer == "es" { 61 | if config.NewIndex == true { 62 | now := time.Now().UTC() 63 | config.Index = fmt.Sprintf("%s-%s", config.Source, now.Format("2006-01-02t15-04-05z")) 64 | } else { 65 | current, err := i.Client.Current(config.Source) 66 | if err != nil || current == "" { 67 | e := fmt.Errorf("No existing production index for source '%s'. Either promote an existing %s index or add the 'new' flag to the ingest command to create a new index.", config.Source, config.Source) 68 | return e 69 | } 70 | log.Printf("Ingesting into current production index: %s", current) 71 | config.Index = current 72 | config.Promote = false 73 | } 74 | 75 | err = i.Client.Create(config.Index) 76 | if err != nil { 77 | return err 78 | } 79 | i.consumer = &consumer.ESConsumer{ 80 | Index: config.Index, 81 | RType: "Record", 82 | Client: i.Client, 83 | } 84 | 85 | log.Printf("Configured OpenSearch consumer using source: %s, index: %s, and promote: %s", config.Source, config.Index, strconv.FormatBool(config.Promote)) 86 | 87 | } else if config.Consumer == "json" { 88 | i.consumer = &consumer.JSONConsumer{Out: os.Stdout} 89 | } else if config.Consumer == "title" { 90 | i.consumer = &consumer.TitleConsumer{Out: os.Stdout} 91 | } else if config.Consumer == "silent" { 92 | i.consumer = &consumer.SilentConsumer{Out: os.Stdout} 93 | } else { 94 | return errors.New("Unknown consumer") 95 | } 96 | 97 | i.config = config 98 | return nil 99 | } 100 | 101 | // Ingest the configured data stream. The Ingester should have been 102 | // configured before calling this method. It will return the number of 103 | // ingested documents. 104 | func (i *Ingester) Ingest() (int, error) { 105 | var err error 106 | p := pipeline.Pipeline{ 107 | Generator: i.generator, 108 | Consumer: i.consumer, 109 | } 110 | ctr := &transformer.Counter{} 111 | p.Next(ctr) 112 | if i.config.Consumer == "es" { 113 | err = i.Client.Start() 114 | if err != nil { 115 | return 0, err 116 | } 117 | defer i.Client.Stop() 118 | } 119 | out := p.Run() 120 | <-out 121 | if i.config.Promote { 122 | log.Printf("Automatic promotion is happening") 123 | err = i.Client.Promote(i.config.Index) 124 | } 125 | return ctr.Count, err 126 | } 127 | -------------------------------------------------------------------------------- /pkg/pipeline/pipeline.go: -------------------------------------------------------------------------------- 1 | package pipeline 2 | 3 | import "github.com/mitlibraries/mario/pkg/record" 4 | 5 | //A Pipeline builds and runs a data pipeline for process Records. A 6 | //Pipeline consists of exactly one Generator, one Consumer and zero or 7 | //more Transformers. 8 | type Pipeline struct { 9 | Generator Generator 10 | Transformers []Transformer 11 | Consumer Consumer 12 | } 13 | 14 | //The Transformer interface can be used to create an intermediate stage 15 | //in a Pipeline. 16 | type Transformer interface { 17 | Transform(<-chan record.Record) <-chan record.Record 18 | } 19 | 20 | //The Generator interface should be used to create the initial stage of 21 | //a Pipeline. 22 | type Generator interface { 23 | Generate() <-chan record.Record 24 | } 25 | 26 | //The Consumer interface should be used to create the last stage of a 27 | //Pipeline. 28 | type Consumer interface { 29 | Consume(<-chan record.Record) <-chan bool 30 | } 31 | 32 | //Next adds one or more Transformers to the Pipeline. Next can be called 33 | //multiple times. All Transformers will be run in the order added. 34 | func (p *Pipeline) Next(t ...Transformer) { 35 | p.Transformers = append(p.Transformers, t...) 36 | } 37 | 38 | //Run the Pipeline. Be sure to read from the empty channel that's returned 39 | //as that signals the Pipeline has finished running. 40 | func (p *Pipeline) Run() <-chan bool { 41 | out := p.Generator.Generate() 42 | for _, t := range p.Transformers { 43 | out = t.Transform(out) 44 | } 45 | return p.Consumer.Consume(out) 46 | } 47 | -------------------------------------------------------------------------------- /pkg/pipeline/pipeline_test.go: -------------------------------------------------------------------------------- 1 | package pipeline 2 | 3 | import ( 4 | "github.com/mitlibraries/mario/pkg/record" 5 | "testing" 6 | ) 7 | 8 | type Fooer struct{} 9 | 10 | func (f *Fooer) Transform(in <-chan record.Record) <-chan record.Record { 11 | out := make(chan record.Record) 12 | go func() { 13 | for r := range in { 14 | r.Title = r.Title + "FOO" 15 | out <- r 16 | } 17 | close(out) 18 | }() 19 | return out 20 | } 21 | 22 | type RecordGenerator struct{} 23 | 24 | func (g *RecordGenerator) Generate() <-chan record.Record { 25 | out := make(chan record.Record) 26 | go func() { 27 | out <- record.Record{Title: "Bar"} 28 | out <- record.Record{Title: "Gaz"} 29 | close(out) 30 | }() 31 | return out 32 | } 33 | 34 | type RecordConsumer struct { 35 | records []record.Record 36 | } 37 | 38 | func (c *RecordConsumer) Consume(in <-chan record.Record) <-chan bool { 39 | out := make(chan bool) 40 | go func() { 41 | for r := range in { 42 | c.records = append(c.records, r) 43 | } 44 | close(out) 45 | }() 46 | return out 47 | } 48 | 49 | func TestRun(t *testing.T) { 50 | c := &RecordConsumer{} 51 | p := Pipeline{ 52 | Generator: &RecordGenerator{}, 53 | Consumer: c, 54 | } 55 | p.Next(&Fooer{}) 56 | out := p.Run() 57 | <-out 58 | if c.records[0].Title != "BarFOO" { 59 | t.Error("Expected match, got", c.records[0].Title) 60 | } 61 | } 62 | -------------------------------------------------------------------------------- /pkg/record/record.go: -------------------------------------------------------------------------------- 1 | package record 2 | 3 | // Record struct stores our internal mappings of data and is used when 4 | // mapping various external data sources before sending to OpenSearch 5 | type Record struct { 6 | AlternateTitles []*AlternateTitle `json:"alternate_titles,omitempty"` 7 | CallNumbers []string `json:"call_numbers,omitempty"` 8 | Citation string `json:"citation,omitempty"` 9 | ContentType []string `json:"content_type,omitempty"` 10 | Contents []string `json:"contents,omitempty"` 11 | Contributors []*Contributor `json:"contributors,omitempty"` 12 | Dates []*Date `json:"dates,omitempty"` 13 | Edition string `json:"edition,omitempty"` 14 | FileFormats []string `json:"file_formats,omitempty"` 15 | Format string `json:"format,omitempty"` 16 | FundingInformation []*Funding `json:"funding_information,omitempty"` 17 | Holdings []*Holding `json:"holdings,omitempty"` 18 | Identifiers []*Identifier `json:"identifiers,omitempty"` 19 | Languages []string `json:"languages,omitempty"` 20 | Links []Link `json:"links,omitempty"` 21 | LiteraryForm string `json:"literary_form,omitempty"` 22 | Locations []*Location `json:"locations,omitempty"` 23 | Notes []*Note `json:"notes,omitempty"` 24 | Numbering string `json:"numbering,omitempty"` 25 | PhysicalDescription string `json:"physical_description,omitempty"` 26 | PublicationFrequency []string `json:"publication_frequency,omitempty"` 27 | PublicationInformation []string `json:"publication_information,omitempty"` 28 | RelatedItems []*RelatedItem `json:"related_items,omitempty"` 29 | Rights []*Right `json:"rights,omitempty"` 30 | Source string `json:"source"` 31 | SourceLink string `json:"source_link"` 32 | Subjects []*Subject `json:"subjects,omitempty"` 33 | Summary []string `json:"summary,omitempty"` 34 | TimdexRecordId string `json:"timdex_record_id"` 35 | Title string `json:"title"` 36 | } 37 | 38 | // AlternateTitle object 39 | type AlternateTitle struct { 40 | Kind string `json:"kind,omitempty"` 41 | Value string `json:"value"` 42 | } 43 | 44 | // Contributor object 45 | type Contributor struct { 46 | Affiliation []string `json:"affiliation,omitempty"` 47 | Kind string `json:"kind,omitempty"` 48 | Identifier []string `json:"identifier,omitempty"` 49 | MitAffiliated bool `json:"mit_affiliated,omitempty"` 50 | Value string `json:"value"` 51 | } 52 | 53 | // Date object 54 | type Date struct { 55 | Kind string `json:"kind,omitempty"` 56 | Note string `json:"note,omitempty"` 57 | Range *Range `json:"range,omitempty"` 58 | Value string `json:"value,omitempty"` 59 | } 60 | 61 | // Funding object 62 | type Funding struct { 63 | AwardNumber string `json:"award_number,omitempty"` 64 | AwardUri string `json:"award_uri,omitempty"` 65 | FunderIdentifier string `json:"funder_identifier,omitempty"` 66 | FunderIdentifierType string `json:"funder_identifier_type,omitempty"` 67 | FunderName string `json:"funder_name,omitempty"` 68 | } 69 | 70 | // Holding object 71 | type Holding struct { 72 | CallNumber string `json:"call_number,omitempty"` 73 | Collection string `json:"collection,omitempty"` 74 | Format string `json:"format,omitempty"` 75 | Location string `json:"location,omitempty"` 76 | Note string `json:"notes,omitempty"` 77 | Summary string `json:"summary,omitempty"` 78 | } 79 | 80 | // Identifier object 81 | type Identifier struct { 82 | Kind string `json:"kind,omitmempty"` 83 | Value string `json:"value"` 84 | } 85 | 86 | // Link object 87 | type Link struct { 88 | Kind string `json:"kind,omitempty"` 89 | Restrictions string `json:"restrictions,omitempty"` 90 | Text string `json:"text,omitempty"` 91 | Url string `json:"url"` 92 | } 93 | 94 | // Location object 95 | type Location struct { 96 | Geopoint []float32 `json:"geopoint,omitempty"` 97 | Kind string `json:"kind,omitempty"` 98 | Value string `json:"value,omitempty"` 99 | } 100 | 101 | // Note object 102 | type Note struct { 103 | Kind string `json:"kind,omitempty"` 104 | Value []string `json:"value"` 105 | } 106 | 107 | // Range object 108 | type Range struct { 109 | Gt string `json:"gt,omitempty"` 110 | Gte string `json:"gte,omitempty"` 111 | Lt string `json:"lt,omitempty"` 112 | Lte string `json:"lte,omitempty"` 113 | } 114 | 115 | // RelatedItem object 116 | type RelatedItem struct { 117 | Description string `json:"description,omitempty"` 118 | ItemType string `json:"item_type,omitempty"` 119 | Relationship string `json:"relationship,omitempty"` 120 | Uri string `json:"uri,omitempty"` 121 | } 122 | 123 | // Right object 124 | type Right struct { 125 | Description string `json:"desription,omitempty"` 126 | Kind string `json:"kind,omitempty"` 127 | Uri string `json:"uri,omitempty"` 128 | } 129 | 130 | // Subject object 131 | type Subject struct { 132 | Kind string `json:"kind,omitempty"` 133 | Value []string `json:"value"` 134 | } 135 | 136 | // Rule defines where the rules are in JSON 137 | type Rule struct { 138 | Label string `json:"label"` 139 | Array bool `json:"array"` 140 | Fields []*Field `json:"fields"` 141 | } 142 | 143 | // Field defines where the Fields within a Rule are in JSON 144 | type Field struct { 145 | Tag string `json:"tag"` 146 | Subfields string `json:"subfields"` 147 | Bytes string `json:"bytes"` 148 | Kind string `json:"kind"` 149 | } 150 | 151 | // Parser defines an interface common to parsers 152 | type Parser interface { 153 | Parse(chan Record) 154 | } 155 | 156 | // Processor is an interface that allows converting from custom data into 157 | // our Record structure 158 | type Processor interface { 159 | Process() 160 | } 161 | 162 | var ingested int 163 | -------------------------------------------------------------------------------- /pkg/transformer/transformers.go: -------------------------------------------------------------------------------- 1 | package transformer 2 | 3 | import "github.com/mitlibraries/mario/pkg/record" 4 | 5 | //Counter transformer records the number of records handled. 6 | type Counter struct { 7 | Count int 8 | } 9 | 10 | //Transform counts the records. 11 | func (c *Counter) Transform(in <-chan record.Record) <-chan record.Record { 12 | out := make(chan record.Record) 13 | go func() { 14 | for r := range in { 15 | c.Count++ 16 | out <- r 17 | } 18 | close(out) 19 | }() 20 | return out 21 | } 22 | -------------------------------------------------------------------------------- /pkg/transformer/transformers_test.go: -------------------------------------------------------------------------------- 1 | package transformer 2 | 3 | import ( 4 | "github.com/mitlibraries/mario/pkg/record" 5 | "testing" 6 | ) 7 | 8 | func TestCounterTransform(t *testing.T) { 9 | in := make(chan record.Record, 2) 10 | in <- record.Record{Title: "Foo"} 11 | in <- record.Record{Title: "Bar"} 12 | close(in) 13 | c := Counter{} 14 | out := c.Transform(in) 15 | <-out 16 | if c.Count != 2 { 17 | t.Error("Expected match, got", c.Count) 18 | } 19 | } 20 | --------------------------------------------------------------------------------