├── dockerfile ├── spotlight-compose.yml ├── nif-21.vm ├── spotlight.sh └── README.md /dockerfile: -------------------------------------------------------------------------------- 1 | FROM openjdk:8-jre-alpine 2 | 3 | MAINTAINER DBpedia Spotlight Team 4 | 5 | ENV SPOTLIGHT https://sourceforge.net/projects/spotlight-multilingual-docker/files/dbpedia-spotlight-1.1.jar 6 | 7 | # adding required packages 8 | RUN apk update && \ 9 | apk add bash && \ 10 | apk add tshark && \ 11 | apk add --no-cache curl && \ 12 | apk upgrade curl 13 | 14 | # downloading spolight model and dbpedia spotlight 15 | RUN mkdir -p /opt/spotlight/models && \ 16 | cd /opt/spotlight && \ 17 | wget -O dbpedia-spotlight.jar $SPOTLIGHT && \ 18 | mkdir -p src/main/resources/templates/ 19 | 20 | # adding the script to the container 21 | ADD spotlight.sh /bin/spotlight.sh 22 | COPY nif-21.vm /opt/spotlight/src/main/resources/templates/nif-21.vm 23 | RUN chmod +x /bin/spotlight.sh 24 | 25 | EXPOSE 80 26 | -------------------------------------------------------------------------------- /spotlight-compose.yml: -------------------------------------------------------------------------------- 1 | version: '3.5' 2 | services: 3 | spotlight.en: 4 | image: dbpedia/dbpedia-spotlight 5 | container_name: dbpedia-spotlight.en 6 | volumes: 7 | - spotlight-model:/opt/spotlight/models 8 | restart: unless-stopped 9 | ports: 10 | - "0.0.0.0:2222:80" 11 | command: /bin/spotlight.sh en 12 | 13 | spotlight.de: 14 | image: dbpedia/dbpedia-spotlight 15 | container_name: dbpedia-spotlight.de 16 | volumes: 17 | - spotlight-model:/opt/spotlight/models 18 | restart: unless-stopped 19 | ports: 20 | - "0.0.0.0:2223:80" 21 | command: /bin/spotlight.sh de 22 | 23 | spotlight.pt: 24 | image: dbpedia/dbpedia-spotlight 25 | container_name: dbpedia-spotlight.pt 26 | volumes: 27 | - spotlight-model:/opt/spotlight/models 28 | restart: unless-stopped 29 | ports: 30 | - "0.0.0.0:2224:80" 31 | command: /bin/spotlight.sh pt 32 | 33 | volumes: 34 | spotlight-model: 35 | external: true 36 | -------------------------------------------------------------------------------- /nif-21.vm: -------------------------------------------------------------------------------- 1 | { 2 | "@context": "$contextJSON", 3 | "@graph" : [ 4 | #foreach( $bean in $beans) 5 | { 6 | #if($bean.nifType == "CONTEXT") 7 | "@id" : "$bean.referenceContextId", 8 | "@type" : [ "Context", "OffsetBasedString" ], 9 | "beginIndex" : $bean.context.beginIndex, 10 | "endIndex" : $bean.context.endIndex, 11 | "isString" : "$bean.mention.trim()" 12 | #end 13 | #if($bean.nifType == "ENTITY") 14 | "@id": "$bean.context.NIF21id", 15 | "@type": "Annotation", 16 | "taAnnotatorsRef" : "$bean.annotator", 17 | "taClassRef": [ #foreach ($type in $bean.types)"$type"#if( $foreach.hasNext ),#end#end], 18 | "taConfidence": $bean.score, 19 | "taIdentRef": "$bean.taIdentRef", 20 | "beginIndex": "$bean.beginIndex", 21 | "endIndex": "$bean.endIndex", 22 | "referenceContext": "$bean.referenceContextId"#if($bean.taClassRef), 23 | "taMsClassRef":[#foreach ($type in $bean.taClassRef)"$type"#if( $foreach.hasNext ),#end#end]#end 24 | #end}#if( $foreach.hasNext ) 25 | ,#end 26 | #end 27 | ] 28 | } 29 | -------------------------------------------------------------------------------- /spotlight.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | LANG=$1 4 | 5 | MODELFOLDER=/opt/spotlight 6 | cd $MODELFOLDER 7 | 8 | DIRECTORY=/opt/spotlight/models/$LANG 9 | echo "Selected language: $LANG" 10 | if [ -d "$DIRECTORY" ] 11 | then 12 | echo "/opt/spotlight/$LANG http://0.0.0.0:80/rest/" 13 | if [[ $LANG == "en" ]] 14 | then 15 | java -Dfile.encoding=UTF-8 -Xmx15G -jar /opt/spotlight/dbpedia-spotlight.jar /opt/spotlight/models/$LANG http://0.0.0.0:80/rest 16 | else 17 | java -Dfile.encoding=UTF-8 -Xmx10G -jar /opt/spotlight/dbpedia-spotlight.jar /opt/spotlight/models/$LANG http://0.0.0.0:80/rest 18 | fi 19 | 20 | else 21 | QUERY="PREFIX dataid: 22 | PREFIX dataid-cv: 23 | PREFIX dct: 24 | PREFIX dcat: 25 | 26 | SELECT DISTINCT ?file WHERE { 27 | ?dataset dataid:artifact . 28 | ?dataset dcat:distribution ?distribution . 29 | { 30 | ?distribution dct:hasVersion ?latestVersion 31 | { 32 | SELECT (?version as ?latestVersion) WHERE { 33 | ?dataset dataid:artifact . 34 | ?dataset dct:hasVersion ?version . 35 | } ORDER BY DESC (?version) LIMIT 1 36 | } 37 | ?distribution dataid-cv:lang '$LANG' . 38 | } 39 | ?distribution dcat:downloadURL ?file . 40 | }" 41 | 42 | RESULT=`curl --data-urlencode query="$QUERY" -H 'accept:text/tab-separated-values' https://databus.dbpedia.org/sparql | sed 's/"//g' | grep -v "^file$" | head -n 1` 43 | echo $RESULT 44 | curl -LO $RESULT 45 | tar -C /opt/spotlight/models -xvf spotlight-model_lang=$LANG.tar.gz 46 | rm spotlight-model_lang=$LANG.tar.gz 47 | echo "/opt/spotlight/models/$LANG http://0.0.0.0:80/rest/" 48 | if [[ $LANG == "en" ]] 49 | then 50 | java -Dfile.encoding=UTF-8 -Xmx15G -jar /opt/spotlight/dbpedia-spotlight.jar /opt/spotlight/models/$LANG http://0.0.0.0:80/rest 51 | else 52 | java -Dfile.encoding=UTF-8 -Xmx10G -jar /opt/spotlight/dbpedia-spotlight.jar /opt/spotlight/models/$LANG http://0.0.0.0:80/rest 53 | fi 54 | 55 | fi 56 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # What is DBpedia Spotlight? 2 | 3 | [DBpedia Spotlight](https://www.dbpedia-spotlight.org/) is a tool for automatically annotating mentions of DBpedia resources in text, providing a solution for linking unstructured information sources to the Linked Open Data cloud through DBpedia. 4 | 5 | The dbpedia/dbpedia-spotlight is a docker image to run the DBpedia Spotlight service with the most recent language models, downloaded from the [DBpedia Databus repository](https://databus.dbpedia.org/dbpedia/spotlight/spotlight-model/), e.g., English (en), German (nl), Italian (it), etc. 6 | 7 | # Available language models + quick-start command 8 | 9 | The following table shows the available language models and the command line to start the DBpedia Spotlight service. The `--mount source=spotlight-model...` will automatically creates the `spotlight-model` volume. 10 | 11 | To run more than one DBpedia Spotlight service, just change the port number (`-p 222X:80`); for example, `-p 2222:80` for English language and `-p 2223:80` for German language. 12 | 13 | | Language | Two-digit code | Aprox. size of language model |Quick-start (command line) | 14 | | :------- | :------------: | :---------------------------: |:--------------------------- | 15 | | Catalan | ca | 161 MB |`docker run -tid --restart unless-stopped --name dbpedia-spotlight.ca --mount source=spotlight-model,target=/opt/spotlight -p 2222:80 dbpedia/dbpedia-spotlight spotlight.sh ca` | 16 | | Danish | da | 81 MB |`docker run -tid --restart unless-stopped --name dbpedia-spotlight.da --mount source=spotlight-model,target=/opt/spotlight -p 2222:80 dbpedia/dbpedia-spotlight spotlight.sh da` | 17 | | German | de | 1 GB |`docker run -tid --restart unless-stopped --name dbpedia-spotlight.de --mount source=spotlight-model,target=/opt/spotlight -p 2222:80 dbpedia/dbpedia-spotlight spotlight.sh de` | 18 | | English | en | 2 GB |`docker run -tid --restart unless-stopped --name dbpedia-spotlight.en --mount source=spotlight-model,target=/opt/spotlight -p 2222:80 dbpedia/dbpedia-spotlight spotlight.sh en` | 19 | | Spanish | es | 483 MB |`docker run -tid --restart unless-stopped --name dbpedia-spotlight.es --mount source=spotlight-model,target=/opt/spotlight -p 2222:80 dbpedia/dbpedia-spotlight spotlight.sh es` | 20 | | Finnish | fi | 112 MB |`docker run -tid --restart unless-stopped --name dbpedia-spotlight.fi --mount source=spotlight-model,target=/opt/spotlight -p 2222:80 dbpedia/dbpedia-spotlight spotlight.sh fi` | 21 | | French | fr | 663 MB |`docker run -tid --restart unless-stopped --name dbpedia-spotlight.fr --mount source=spotlight-model,target=/opt/spotlight -p 2222:80 dbpedia/dbpedia-spotlight spotlight.sh fr` | 22 | | Hungarian | hu | 87 MB |`docker run -tid --restart unless-stopped --name dbpedia-spotlight.hu --mount source=spotlight-model,target=/opt/spotlight -p 2222:80 dbpedia/dbpedia-spotlight spotlight.sh hu` | 23 | | Italian | it | 577 MB |`docker run -tid --restart unless-stopped --name dbpedia-spotlight.it --mount source=spotlight-model,target=/opt/spotlight -p 2222:80 dbpedia/dbpedia-spotlight spotlight.sh it` | 24 | | Lithuanian | lt | 35 MB |`docker run -tid --restart unless-stopped --name dbpedia-spotlight.lt --mount source=spotlight-model,target=/opt/spotlight -p 2222:80 dbpedia/dbpedia-spotlight spotlight.sh lt` | 25 | | Dutch | nl | 304 MB |`docker run -tid --restart unless-stopped --name dbpedia-spotlight.nl --mount source=spotlight-model,target=/opt/spotlight -p 2222:80 dbpedia/dbpedia-spotlight spotlight.sh nl` | 26 | | Norwegian | no | 118 MB |`docker run -tid --restart unless-stopped --name dbpedia-spotlight.no --mount source=spotlight-model,target=/opt/spotlight -p 2222:80 dbpedia/dbpedia-spotlight spotlight.sh no` | 27 | | Portuguese | pt | 241 MB |`docker run -tid --restart unless-stopped --name dbpedia-spotlight.pt --mount source=spotlight-model,target=/opt/spotlight -p 2222:80 dbpedia/dbpedia-spotlight spotlight.sh pt` | 28 | | Romanian | ro | 63 MB |`docker run -tid --restart unless-stopped --name dbpedia-spotlight.ro --mount source=spotlight-model,target=/opt/spotlight -p 2222:80 dbpedia/dbpedia-spotlight spotlight.sh ro` | 29 | | Russian | ru | 138 MB |`docker run -tid --restart unless-stopped --name dbpedia-spotlight.ru --mount source=spotlight-model,target=/opt/spotlight -p 2222:80 dbpedia/dbpedia-spotlight spotlight.sh ru` | 30 | | Swedish | sv | 197 MB |`docker run -tid --restart unless-stopped --name dbpedia-spotlight.sv --mount source=spotlight-model,target=/opt/spotlight -p 2222:80 dbpedia/dbpedia-spotlight spotlight.sh sv` | 31 | | Turkish | tr | 66 MB |`docker run -tid --restart unless-stopped --name dbpedia-spotlight.tr --mount source=spotlight-model,target=/opt/spotlight -p 2222:80 dbpedia/dbpedia-spotlight spotlight.sh tr` | 32 | 33 | ## Test DBpedia Spotlight service 34 | 35 | Consider changing the port number if necessary. 36 | 37 | curl http://localhost:2222/rest/annotate \ 38 | --data-urlencode "text=President Obama called Wednesday on Congress to extend a tax break for students included in last year's economic stimulus package, arguing that the policy provides more generous assistance." \ 39 | --data "confidence=0.35" \ 40 | -H "Accept: text/turtle" 41 | 42 | The "Accept: text/turtle" returns a NIF output but this option could be changed by "Accept: application/json" to returns a JSON output format. 43 | 44 | ## Stop the Docker container 45 | 46 | Any of the following commands could be use to stop the DBpedia Spotlight service: 47 | 48 | docker kill dbpedia-spotlight.[LANG] 49 | docker rm dbpedia-spotlight.[LANG] 50 | 51 | The `docker kill` command will stop the running container and the `docker rm` command will remove the container. The `dbpedia-spotlight` corresponds to the name given with the `--name` option of the docker run command. 52 | 53 | # Additional options to run DBpedia Spotlight 54 | 55 | ## Run a single DBpedia Spotlight service (bash command) 56 | 57 | This option could be used to persist the configuration used to run a DBpedia Spotlight service. It is recommended, but not restricted, to run a single DBpedia Spotlight service whose configuration differs from the default presented in the top table. 58 | 59 | 1. Save the following code into a bash file (for example, `english-spotlight.sh`) 60 | 61 | ``` 62 | LANG=en 63 | # Available languages: ca, da, de, en, es, fi, fr, hu, it, lt, nl, no, pt, ro, ru, sv, tr 64 | 65 | # Create a volume to persist models 66 | docker volume create spotlight-model 67 | 68 | #Run docker image 69 | docker run -tid \ 70 | --restart unless-stopped \ 71 | --name dbpedia-spotlight.$LANG \ 72 | --mount source=spotlight-model,target=/opt/spotlight \ 73 | -p 2222:80 \ 74 | dbpedia/dbpedia-spotlight \ 75 | spotlight.sh $LANG 76 | ``` 77 | 78 | 2. Grant execute permission to the file 79 | `chmod a+x english-spotlight.sh` 80 | 3. Execute the bash file 81 | `./english-spotlight.sh` 82 | 83 | ## Run one or more DBpedia Spotlight services (docker-compose) 84 | 85 | [Docker-compose](https://docs.docker.com/compose/#:~:text=Compose%20is%20a%20tool%20for,the%20services%20from%20your%20configuration) is a tool to run multiple containers as a single service. It is based on the data sarialization language YAML to define the configuration for one or multiple services. 86 | 87 | Docker-compose could be used to define and run more than one DBpedia Spotlight service. The following instructions explain how to create a YAML configuration file to run a single DBpedia Spotlight service. A YAML configuration file example for multiple DBpedia Spotlight services is available [here](https://raw.githubusercontent.com/dbpedia/spotlight-docker/multilingual/spotlight-compose.yml). 88 | 89 | 1. Create a Docker Compose file: 90 | 91 | The docker compose file (for example, `spotlight-compose.yml`) defines one or more DBpedia Spotlight services through the following configuration block: 92 | 93 | ``` 94 | version '3.5' 95 | services: 96 | spotlight.[LANG]: 97 | image: [DOCKER-IMAGE] 98 | container_name: dbepdia-spotlight.[LANG] 99 | volumes: 100 | - spotlight-model:/opt/spotlight 101 | restart: unless-stopped 102 | ports: 103 | - "0.0.0.0:2222:80" 104 | command: /bin/spotlight.sh [LANG] 105 | 106 | volumes: 107 | spotlight-model: 108 | external: true 109 | ``` 110 | 111 | Where 112 | 113 | - `[LANG]`: is a two digits code to define the language model. The available language models are: ca, da, de, en, es, fi, fr, hu, it, lt, nl, no, pt, ro, ru, sv, tr. 114 | - `[DOCKER-IMAGE]`: the name of the docker image. 115 | 116 | 2. Run Docker Compose file 117 | 118 | docker-compose -f spotlight-compose.yml up -d 119 | 120 | For example, the following docker-compose configuration runs the DBpedia Spotlight German language model: 121 | 122 | ``` 123 | version: '3.5' 124 | services: 125 | spotlight.de: 126 | image: dbpedia/dbpedia-spotlight 127 | container_name: dbpedia-spotlight.de 128 | volumes: 129 | - spotlight-model:/opt/spotlight/models 130 | restart: unless-stopped 131 | ports: 132 | - "0.0.0.0:2222:80" 133 | command: /bin/spotlight.sh de 134 | 135 | volumes: 136 | dbp-model: 137 | external: true 138 | 139 | ``` 140 | 141 | After creating the file, the command `docker-compose -f spotlight-compose.yml up -d` will start the service. 142 | 143 | ## Stop Docker Compose 144 | 145 | The following command could be use to stop the DBpedia Spotlight service: 146 | 147 | docker-compose -f spotlight-compose.yml stop 148 | 149 | The `stop` instruction will stop the running containers. The `spotlight-compose.yml` corresponds to the file name used to run the compose file. 150 | 151 | # Monitor DBpedia Spotlight service(s) through Docker commands 152 | 153 | The following docker commands could be used to show some information about the dbpedia-spotlight image: 154 | 155 | - `docker logs dbpedia-spotlight.[LANG]` : Displays the log information for the corresponding service 156 | - `docker stats dbpedia-spotlight.[LANG]` : Shows the statistics (e.g., the amount of memory and CPU) for the corresponding service 157 | 158 | # Troubleshooting 159 | 160 | ## Memory problems 161 | 162 | The DBpedia Spotlight service copies the language model to memory for efficient processing. The amount of memory needed depends on the selected language model; for example, the English model requires >8GB of memory to run. 163 | 164 | Before running the DBpedia Spotlight service, it could be necessary to adjust the memory options from the docker configuration first. For example, for Mac users, by default, Docker is set up to use only [2GB of memory](https://docs.docker.com/desktop/mac/#:~:text=Memory%3A%20By%20default%2C%20Docker%20Desktop%20is%20set%20to%20use%202%20GB%20runtime%20memory%2C%20allocated%20from%20the%20total%20available%20memory%20on%20your%20Mac.%20To%20increase%20the%20RAM%2C%20set%20this%20to%20a%20higher%20number.%20To%20decrease%20it%2C%20lower%20the%20number.). By the contrary, Linux users do not have this problem; Docker could use the available memory of the host system which could lead to another kind of problems such as the [out of memory problems](https://docs.docker.com/config/containers/resource_constraints/#understand-the-risks-of-running-out-of-memory). 165 | 166 | # Supported Docker versions 167 | This image is officially supported on Docker version **`19.03.11`**. 168 | 169 | Please see the [Docker installation documentation](https://docs.docker.com/installation/) for details on how to upgrade your Docker daemon. 170 | 171 | # Documentation 172 | 173 | Documentation for this image is stored in [GitHub repo](http://github.com/dbpedia-spotlight/dbpedia-spotlight/wiki). 174 | --------------------------------------------------------------------------------