├── .gitignore ├── .travis.yml ├── LICENSE ├── README.md ├── docker-compose-local.yml ├── docker-compose.yml ├── embed ├── Dockerfile ├── README.md ├── app │ ├── __init__.py │ ├── celery_factory.py │ ├── db_wrapper.py │ ├── exceptions.py │ ├── helper.py │ ├── iiif_manifest_factory.py │ ├── ingest.py │ ├── models.py │ ├── static │ │ ├── css │ │ │ ├── fonts │ │ │ │ ├── Raleway-Bold.ttf │ │ │ │ ├── Raleway-ExtraBold.ttf │ │ │ │ ├── Raleway-Regular.ttf │ │ │ │ ├── Ubuntu-B.ttf │ │ │ │ ├── Ubuntu-L.ttf │ │ │ │ └── Ubuntu-R.ttf │ │ │ ├── viewer.css │ │ │ └── viewer.css.map │ │ ├── img │ │ │ ├── cc.png │ │ │ ├── close.png │ │ │ ├── close_dark.png │ │ │ ├── crop.png │ │ │ ├── embed.png │ │ │ ├── embed_small.png │ │ │ ├── logo.png │ │ │ ├── metadata.png │ │ │ ├── next_grouphover.png │ │ │ ├── next_hover.png │ │ │ ├── next_pressed.png │ │ │ ├── next_rest.png │ │ │ ├── pd.png │ │ │ ├── previous_grouphover.png │ │ │ ├── previous_hover.png │ │ │ ├── previous_pressed.png │ │ │ ├── previous_rest.png │ │ │ ├── share_small.png │ │ │ ├── zoom-in.png │ │ │ └── zoom-out.png │ │ └── js │ │ │ ├── openseadragon.min.js │ │ │ ├── openseadragon.min.js.map │ │ │ ├── osdregionselect.js │ │ │ └── viewer.js │ ├── task_queue.py │ ├── templates │ │ ├── iframe_openseadragon_inline.html │ │ ├── index.html │ │ └── oembed_xml.html │ └── views.py ├── db_sql_create.py ├── requirements.txt ├── run.py ├── supervisord.conf └── test.py ├── iiif ├── README.md └── s3fs_delete_cache.sh ├── ingest ├── Dockerfile ├── README.md ├── docker-compose-ingest.yml └── requirements.txt ├── nginx ├── README.md └── embed.conf └── redis ├── README.md └── redis.conf /.gitignore: -------------------------------------------------------------------------------- 1 | *.pyc 2 | venv/ 3 | *.DS_Store 4 | data/ 5 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: python 2 | python: 3 | - "2.7" 4 | install: "pip install -r embed/requirements.txt" 5 | script: 6 | - cd embed 7 | - python test.py 8 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | 2 | European Union Public Licence 3 | ============================= 4 | V. 1.1 5 | 6 | EUPL © the European Community 2007 7 | 8 | 9 | This European Union Public Licence (the “EUPL”) applies to the Work or Software (as defined below) which is provided under the terms of this Licence. Any use of the Work, other than as authorised under this Licence is prohibited (to the extent such use is covered by a right of the copyright holder of the Work). 10 | 11 | The Original Work is provided under the terms of this Licence when the Licensor (as defined below) has placed the following notice immediately following the copyright notice for the Original Work: 12 | 13 | Licensed under the EUPL V.1.1 14 | 15 | or has expressed by any other mean his willingness to license under the EUPL. 16 | 17 | 18 | 1. Definitions 19 | 20 | In this Licence, the following terms have the following meaning: 21 | 22 | - The Licence: this Licence. 23 | 24 | - The Original Work or the Software: the software distributed and/or communicated by the Licensor under this Licence, available as Source Code and also as Executable Code as the case may be. 25 | 26 | - Derivative Works: the works or software that could be created by the Licensee, based upon the Original Work or modifications thereof. This Licence does not define the extent of modification or dependence on the Original Work required in order to classify a work as a Derivative Work; this extent is determined by copyright law applicable in the country mentioned in Article 15. 27 | 28 | - The Work: the Original Work and/or its Derivative Works. 29 | 30 | - The Source Code: the human-readable form of the Work which is the most convenient for people to study and modify. 31 | 32 | - The Executable Code: any code which has generally been compiled and which is meant to be interpreted by a computer as a program. 33 | 34 | - The Licensor: the natural or legal person that distributes and/or communicates the Work under the Licence. 35 | 36 | - Contributor(s): any natural or legal person who modifies the Work under the Licence, or otherwise contributes to the creation of a Derivative Work. 37 | 38 | - The Licensee or “You”: any natural or legal person who makes any usage of the Software under the terms of the Licence. 39 | 40 | - Distribution and/or Communication: any act of selling, giving, lending, renting, distributing, communicating, transmitting, or otherwise making available, on-line or off-line, copies of the Work or providing access to its essential functionalities at the disposal of any other natural or legal person. 41 | 42 | 43 | 44 | 2. Scope of the rights granted by the Licence 45 | 46 | The Licensor hereby grants You a world-wide, royalty-free, non-exclusive, sub-licensable licence to do the following, for the duration of copyright vested in the Original Work: 47 | 48 | - use the Work in any circumstance and for all usage, 49 | - reproduce the Work, 50 | - modify the Original Work, and make Derivative Works based upon the Work, 51 | - communicate to the public, including the right to make available or display the Work or copies thereof to the public and perform publicly, as the case may be, the Work, 52 | - distribute the Work or copies thereof, 53 | - lend and rent the Work or copies thereof, 54 | - sub-license rights in the Work or copies thereof. 55 | 56 | Those rights can be exercised on any media, supports and formats, whether now known or later invented, as far as the applicable law permits so. 57 | 58 | In the countries where moral rights apply, the Licensor waives his right to exercise his moral right to the extent allowed by law in order to make effective the licence of the economic rights here above listed. 59 | 60 | The Licensor grants to the Licensee royalty-free, non exclusive usage rights to any patents held by the Licensor, to the extent necessary to make use of the rights granted on the Work under this Licence. 61 | 62 | 63 | 64 | 3. Communication of the Source Code 65 | 66 | The Licensor may provide the Work either in its Source Code form, or as Executable Code. If the Work is provided as Executable Code, the Licensor provides in addition a machine-readable copy of the Source Code of the Work along with each copy of the Work that the Licensor distributes or indicates, in a notice following the copyright notice attached to the Work, a repository where the Source Code is easily and freely accessible for as long as the Licensor continues to distribute and/or communicate the Work. 67 | 68 | 69 | 70 | 4. Limitations on copyright 71 | 72 | Nothing in this Licence is intended to deprive the Licensee of the benefits from any exception or limitation to the exclusive rights of the rights owners in the Original Work or Software, of the exhaustion of those rights or of other applicable limitations thereto. 73 | 74 | 75 | 76 | 5. Obligations of the Licensee 77 | 78 | The grant of the rights mentioned above is subject to some restrictions and obligations imposed on the Licensee. Those obligations are the following: 79 | 80 | Attribution right: the Licensee shall keep intact all copyright, patent or trademarks notices and all notices that refer to the Licence and to the disclaimer of warranties. The Licensee must include a copy of such notices and a copy of the Licence with every copy of the Work he/she distributes and/or communicates. The Licensee must cause any Derivative Work to carry prominent notices stating that the Work has been modified and the date of modification. 81 | 82 | Copyleft clause: If the Licensee distributes and/or communicates copies of the Original Works or Derivative Works based upon the Original Work, this Distribution and/or Communication will be done under the terms of this Licence or of a later version of this Licence unless the Original Work is expressly distributed only under this version of the Licence. The Licensee (becoming Licensor) cannot offer or impose any additional terms or conditions on the Work or Derivative Work that alter or restrict the terms of the Licence. 83 | 84 | Compatibility clause: If the Licensee Distributes and/or Communicates Derivative Works or copies thereof based upon both the Original Work and another work licensed under a Compatible Licence, this Distribution and/or Communication can be done under the terms of this Compatible Licence. For the sake of this clause, “Compatible Licence” refers to the licences listed in the appendix attached to this Licence. Should the Licensee’s obligations under the Compatible Licence conflict with his/her obligations under this Licence, the obligations of the Compatible Licence shall prevail. 85 | 86 | Provision of Source Code: When distributing and/or communicating copies of the Work, the Licensee will provide a machine-readable copy of the Source Code or indicate a repository where this Source will be easily and freely available for as long as the Licensee continues to distribute and/or communicate the Work. 87 | 88 | Legal Protection: This Licence does not grant permission to use the trade names, trademarks, service marks, or names of the Licensor, except as required for reasonable and customary use in describing the origin of the Work and reproducing the content of the copyright notice. 89 | 90 | 91 | 92 | 6. Chain of Authorship 93 | 94 | The original Licensor warrants that the copyright in the Original Work granted hereunder is owned by him/her or licensed to him/her and that he/she has the power and authority to grant the Licence. 95 | 96 | Each Contributor warrants that the copyright in the modifications he/she brings to the Work are owned by him/her or licensed to him/her and that he/she has the power and authority to grant the Licence. 97 | 98 | Each time You accept the Licence, the original Licensor and subsequent Contributors grant You a licence to their contributions to the Work, under the terms of this Licence. 99 | 100 | 101 | 102 | 7. Disclaimer of Warranty 103 | 104 | The Work is a work in progress, which is continuously improved by numerous contributors. It is not a finished work and may therefore contain defects or “bugs” inherent to this type of software development. 105 | 106 | For the above reason, the Work is provided under the Licence on an “as is” basis and without warranties of any kind concerning the Work, including without limitation merchantability, fitness for a particular purpose, absence of defects or errors, accuracy, non-infringement of intellectual property rights other than copyright as stated in Article 6 of this Licence. 107 | 108 | This disclaimer of warranty is an essential part of the Licence and a condition for the grant of any rights to the Work. 109 | 110 | 111 | 112 | 8. Disclaimer of Liability 113 | 114 | Except in the cases of wilful misconduct or damages directly caused to natural persons, the Licensor will in no event be liable for any direct or indirect, material or moral, damages of any kind, arising out of the Licence or of the use of the Work, including without limitation, damages for loss of goodwill, work stoppage, computer failure or malfunction, loss of data or any commercial damage, even if the Licensor has been advised of the possibility of such damage. However, the Licensor will be liable under statutory product liability laws as far such laws apply to the Work. 115 | 116 | 117 | 118 | 9. Additional agreements 119 | 120 | While distributing the Original Work or Derivative Works, You may choose to conclude an additional agreement to offer, and charge a fee for, acceptance of support, warranty, indemnity, or other liability obligations and/or services consistent with this Licence. However, in accepting such obligations, You may act only on your own behalf and on your sole responsibility, not on behalf of the original Licensor or any other Contributor, and only if You agree to indemnify, defend, and hold each Contributor harmless for any liability incurred by, or claims asserted against such Contributor by the fact You have accepted any such warranty or additional liability. 121 | 122 | 123 | 124 | 10. Acceptance of the Licence 125 | 126 | The provisions of this Licence can be accepted by clicking on an icon “I agree” placed under the bottom of a window displaying the text of this Licence or by affirming consent in any other similar way, in accordance with the rules of applicable law. Clicking on that icon indicates your clear and irrevocable acceptance of this Licence and all of its terms and conditions. 127 | 128 | Similarly, you irrevocably accept this Licence and all of its terms and conditions by exercising any rights granted to You by Article 2 of this Licence, such as the use of the Work, the creation by You of a Derivative Work or the Distribution and/or Communication by You of the Work or copies thereof. 129 | 130 | 131 | 132 | 11. Information to the public 133 | 134 | In case of any Distribution and/or Communication of the Work by means of electronic communication by You (for example, by offering to download the Work from a remote location) the distribution channel or media (for example, a website) must at least provide to the public the information requested by the applicable law regarding the Licensor, the Licence and the way it may be accessible, concluded, stored and reproduced by the Licensee. 135 | 136 | 137 | 138 | 12. Termination of the Licence 139 | 140 | The Licence and the rights granted hereunder will terminate automatically upon any breach by the Licensee of the terms of the Licence. 141 | 142 | Such a termination will not terminate the licences of any person who has received the Work from the Licensee under the Licence, provided such persons remain in full compliance with the Licence. 143 | 144 | 145 | 146 | 13. Miscellaneous 147 | 148 | Without prejudice of Article 9 above, the Licence represents the complete agreement between the Parties as to the Work licensed hereunder. 149 | 150 | If any provision of the Licence is invalid or unenforceable under applicable law, this will not affect the validity or enforceability of the Licence as a whole. Such provision will be construed and/or reformed so as necessary to make it valid and enforceable. 151 | 152 | The European Commission may publish other linguistic versions and/or new versions of this Licence, so far this is required and reasonable, without reducing the scope of the rights granted by the Licence. New versions of the Licence will be published with a unique version number. 153 | 154 | All linguistic versions of this Licence, approved by the European Commission, have identical value. Parties can take advantage of the linguistic version of their choice. 155 | 156 | 157 | 158 | 14. Jurisdiction 159 | 160 | Any litigation resulting from the interpretation of this License, arising between the European Commission, as a Licensor, and any Licensee, will be subject to the jurisdiction of the Court of Justice of the European Communities, as laid down in article 238 of the Treaty establishing the European Community. 161 | 162 | Any litigation arising between Parties, other than the European Commission, and resulting from the interpretation of this License, will be subject to the exclusive jurisdiction of the competent court where the Licensor resides or conducts its primary business. 163 | 164 | 165 | 166 | 15. Applicable Law 167 | 168 | This Licence shall be governed by the law of the European Union country where the Licensor resides or has his registered office. 169 | 170 | This licence shall be governed by the Belgian law if: 171 | 172 | - a litigation arises between the European Commission, as a Licensor, and any Licensee; 173 | - the Licensor, other than the European Commission, has no residence or registered office inside a European Union country. 174 | 175 | 176 | === 177 | 178 | 179 | Appendix 180 | 181 | “Compatible Licences” according to article 5 EUPL are: 182 | 183 | - GNU General Public License (GNU GPL) v. 2 184 | 185 | - Open Software License (OSL) v. 2.1, v. 3.0 186 | 187 | - Common Public License v. 1.0 188 | 189 | - Eclipse Public License v. 1.0 190 | 191 | - Cecill v. 2.0 192 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Embedr: Image Embedding Service (IES) 2 | 3 | [![Build Status](https://travis-ci.org/klokantech/embedr.svg?branch=master)](https://travis-ci.org/klokantech/embedr/branches) 4 | 5 | Online service providing images hosted in Europeana portal via IIIF protocol (http://iiif.io). Developed in cooperation with Kennisland as part of Europeana Creative. Running on Amazon cloud infrastructure - publicly available at: http://embedr.eu/ 6 | 7 | Planned architecture is described in the [wiki](https://github.com/klokantech/hawk/wiki) 8 | 9 | ![hawk-aws-diagram-embedr](https://cloud.githubusercontent.com/assets/59284/11525883/d056901a-98d6-11e5-8317-9eebcdbe13da.jpeg) 10 | 11 | Embedding application can be run by docker-compose. This application consists of five docker containers: 12 | 13 | * redis - which serves as local database 14 | * nginx - which serves as proxy for embedding application itself 15 | * embed - which is a flask application which has specified functionality there: https://github.com/klokantech/hawk/wiki/B.Embed 16 | * ingest - which runs celery instance for downloading, compressing and uploading images to S3 17 | * iiifserver - serving the JPEG2000 images (see http://www.iiifserver.com/) 18 | 19 | Everything can be configured from one place - from the file docker-compose.yml 20 | 21 | After the configuration (which is discussed in this file and in the README for every docker container) whole embedding app can be run from this folder via `docker-compose up` command 22 | 23 | ## Start the application on Amazon EC2 24 | 25 | 1. Clone this git repository 26 | 3. Configure `docker-compose.yml`, fill your AWS credentials and set S3 bucket and Cloud Search domain with correct information 27 | 4. Run `docker-compose up` command 28 | 29 | ## Start the application for local development 30 | 31 | This will run the embed application without Nginx, Cloud Search and S3 (data remains locally only). 32 | 33 | 1. Clone this git repository 34 | 3. Run `docker-compose -f docker-compose-local.yml up` command 35 | 36 | Embed application will be available on `http://127.0.0.1:5000/` - you should see there a welcome page with EuropeanaCreative logo. Cool! 37 | 38 | Now you can try to push in a file with the [Ingest API](https://github.com/klokantech/embedr/wiki/C.Ingest). For example: 39 | ``` 40 | curl -H "Content-Type: application/json" -X POST -d '[{"id":"SK-A-4118","url":["https://upload.wikimedia.org/wikipedia/commons/e/e9/Aelbert_Cuyp_-_Rivierlandschap_met_ruiters_-_Google_Art_Project.jpg"],"title":"River Landscape with Riders", "institution":"Rijksmuseum Amsterdam","license":"http://creativecommons.org/publicdomain/zero/1.0/deed.en"}]' http://127.0.0.1:5000/ingest 41 | ``` 42 | this should return you the batch id - and you can check status with a link like: `http://127.0.0.1:5000/ingest?batch_id=1` 43 | 44 | If the import is susccessful you should be able to access the viewer at path like /id/: `http://127.0.0.1:5000/SK-A-4118` 45 | usable with OEmbed or in Mirador via IIIF manifest link at /id/manifest.json. 46 | 47 | Note: In case you use Kitematic or docker installed in a virtual machine, you must change in the docker-compose-local.yml the variables SERVER_NAME and IIIF_SERVER and replace 127.0.0.1 with the relevant IP address. The same ip should be used in the urls of mentioned examples. 48 | 49 | 50 | -------------------------------------------------------------------------------- /docker-compose-local.yml: -------------------------------------------------------------------------------- 1 | redisdata: 2 | image: "tianon/true" 3 | volumes: 4 | - /data 5 | 6 | redis: 7 | image: redis 8 | ports: 9 | - "6379:6379" 10 | volumes_from: 11 | - redisdata 12 | volumes: 13 | - ./redis/redis.conf:/usr/local/etc/redis/redis.conf 14 | command: redis-server /usr/local/etc/redis/redis.conf --appendonly yes --no-appendfsync-on-rewrite yes 15 | 16 | iiifserver: 17 | image: klokantech/iiifserver-iipimage-jpeg2000 18 | ports: 19 | - 5001:80 20 | volumes: 21 | - ./data/jp2:/data/ 22 | 23 | embed: 24 | build: embed 25 | command: bash -c "/usr/bin/python /usr/local/src/hawk/db_sql_create.py && /usr/bin/python /usr/local/src/hawk/run.py" 26 | ports: 27 | - "5000:5000" 28 | links: 29 | - redis 30 | volumes: 31 | - ./embed:/usr/local/src/hawk/ 32 | - ./data/batch:/data/batch 33 | - ./data/sql:/data/sql 34 | environment: 35 | - SERVER_NAME=127.0.0.1:5000 36 | - IIIF_SERVER=127.0.0.1:5001 37 | - REDIS_SERVER=redis 38 | - REDIS_PORT_NUMBER=6379 39 | - DEBUG=True 40 | - HOST=0.0.0.0 41 | - PORT=5000 42 | - SQL_DB_URL=/data/sql/db.db 43 | 44 | ingest: 45 | build: ./ingest 46 | links: 47 | - redis 48 | volumes: 49 | - ./embed:/usr/local/src/hawk/ 50 | - ./data/tmp:/tmp 51 | - ./data/jp2:/data/jp2 52 | environment: 53 | - C_FORCE_ROOT=true 54 | - REDIS_SERVER=redis 55 | - REDIS_PORT_NUMBER=6379 56 | - MAX_TASK_REPEAT=2 57 | - URL_OPEN_TIMEOUT=5 58 | command: bash -c "celery --app=app.task_queue.task_queue worker -E -l info --workdir=/usr/local/src/hawk/ --autoscale=10,3 --hostname worker1.%h && celery --app=app.task_queue.task_queue worker -E -l info --workdir=/usr/local/src/hawk/ --autoscale=10,3 --hostname worker2.%h && celery --app=app.task_queue.task_queue worker -E -l info --workdir=/usr/local/src/hawk/ --autoscale=10,3 --hostname worker3.%h" 59 | -------------------------------------------------------------------------------- /docker-compose.yml: -------------------------------------------------------------------------------- 1 | redis: 2 | image: redis 3 | ports: 4 | - "6379:6379" 5 | volumes: 6 | - ./data/redis:/data 7 | - ./redis/redis.conf:/usr/local/etc/redis/redis.conf 8 | command: redis-server /usr/local/etc/redis/redis.conf --appendonly yes --no-appendfsync-on-rewrite yes 9 | 10 | nginx: 11 | image: klokantech/nginx 12 | ports: 13 | - "80:80" 14 | links: 15 | - embed 16 | volumes: 17 | - nginx/:/etc/nginx/conf.d/ 18 | - .htpasswd:/etc/nginx/.htpasswd 19 | 20 | embed: 21 | build: ./embed 22 | command: bash -c "/usr/bin/python /usr/local/src/hawk/db_sql_create.py && /usr/local/bin/supervisord -c /etc/supervisord/supervisord.conf" 23 | expose: 24 | - "5000" 25 | links: 26 | - redis 27 | volumes: 28 | - ./embed:/usr/local/src/hawk/ 29 | - ./data/batch:/data/batch 30 | - ./data/sql:/data/sql 31 | environment: 32 | - SERVER_NAME=media.embedr.eu 33 | - IIIF_SERVER=iiif.embedr.eu 34 | - REDIS_SERVER=redis 35 | - REDIS_PORT_NUMBER=6379 36 | - SQL_DB_URL=/data/sql/db.db 37 | 38 | ingest: 39 | build: ./ingest 40 | links: 41 | - redis 42 | volumes: 43 | - ./embed:/usr/local/src/hawk/ 44 | - ./data/tmp:/tmp 45 | environment: 46 | - C_FORCE_ROOT=true 47 | - REDIS_SERVER=redis 48 | - REDIS_PORT_NUMBER=6379 49 | - AWS_ACCESS_KEY_ID= 50 | - AWS_SECRET_ACCESS_KEY= 51 | - S3_CHUNK_SIZE=52428800 52 | - S3_HOST=s3.eu-central-1.amazonaws.com 53 | - S3_DEFAULT_BUCKET=storage.hawk.bucket 54 | - MAX_TASK_REPEAT=5 55 | - URL_OPEN_TIMEOUT=10 56 | - CLOUDSEARCH_REGION=eu-central-1 57 | - CLOUDSEARCH_ITEM_DOMAIN=hawk 58 | command: bash -c "celery --app=app.task_queue.task_queue worker -E -l warning --workdir=/usr/local/src/hawk/ --autoscale=10,3 --hostname worker1.%h && celery --app=app.task_queue.task_queue worker -E -l warning --workdir=/usr/local/src/hawk/ --autoscale=10,3 --hostname worker2.%h" 59 | -------------------------------------------------------------------------------- /embed/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM klokantech/supervisord 2 | 3 | COPY . /usr/local/src/hawk/ 4 | 5 | RUN apt-get -qq update && apt-get -qq -y --no-install-recommends install \ 6 | python-pip \ 7 | uwsgi \ 8 | uwsgi-plugin-python \ 9 | && pip install -q -r /usr/local/src/hawk/requirements.txt 10 | 11 | EXPOSE 5000 12 | 13 | COPY supervisord.conf /etc/supervisord/ 14 | CMD ["/usr/local/bin/supervisord", "-c", "/etc/supervisord/supervisord.conf"] 15 | -------------------------------------------------------------------------------- /embed/README.md: -------------------------------------------------------------------------------- 1 | The embed docker container runs wsgi flask embedding application. Settings for server name, some urls, etc. can be set via docker-compose. There is `supervisord.conf` which is pushed to the right place into container by docker-compose and it settup and run wsgi server itself. 2 | The `requirements.txt` file is used during the build of the container and it specified python packages which are used by embed application. 3 | The `Dockerfile` is used as configuration to build docker container. 4 | The `run.py` file is a script which is run by supervisor and it starts wsgi server. 5 | The `test.py` file is unittest script. 6 | The `app` folder is python package with embed's application sources 7 | The sources have to be map into container itself (`/usr/local/src/hawk/`), folder for storing of json files with orders for ingest have to be map into `/data` folder in the container. 8 | 9 | This container have to map port 5000 to outside, then nginx can use this container as wsgi server. 10 | 11 | Some configuration in form of environment variables is needed to be done: 12 | * SERVER_NAME - base url for embed server 13 | * IIIF_SERVER - base url for IIIF server 14 | * REDIS_SERVER - base url for redis server 15 | * REDIS_PORT_NUMBER - accessible port on the redis server 16 | * AWS_ACCESS_KEY_ID - personal key to AWS 17 | * AWS_SECRET_ACCESS_KEY - personal secrete key to AWS 18 | * CLOUDSEARCH_REGION - Amazon region where the Cloud Search runs 19 | * CLOUDSEARCH_BATCH_DOMAIN - Cloud Search domain where complete ingest batches are stored 20 | 21 | *Configuration from docker-compose* 22 | 23 | ``` 24 | embed: 25 | build: ./embed 26 | command: /usr/local/bin/supervisord -c /etc/supervisord/supervisord.conf 27 | expose: 28 | - "5000" 29 | links: 30 | - redis 31 | volumes: 32 | - ./embed:/usr/local/src/hawk/ 33 | - ./data/batch:/data 34 | environment: 35 | - SERVER_NAME=media.embedr.eu 36 | - IIIF_SERVER=iiif.embedr.eu 37 | - REDIS_SERVER=redis 38 | - REDIS_PORT_NUMBER=6379 39 | - AWS_ACCESS_KEY_ID= 40 | - AWS_SECRET_ACCESS_KEY= 41 | - CLOUDSEARCH_REGION=eu-central-1 42 | - CLOUDSEARCH_BATCH_DOMAIN=hawk-batch 43 | ``` 44 | -------------------------------------------------------------------------------- /embed/app/__init__.py: -------------------------------------------------------------------------------- 1 | """Module which provides flask embed aplication factory""" 2 | 3 | import os 4 | 5 | from flask import Flask 6 | import redis 7 | 8 | from app import views 9 | from models import db 10 | 11 | 12 | def app_factory(db_backend=None): 13 | """Function which provides embed application factory. It takes config from environment and returns embed app itself. 14 | 'db_backend' - type of database backend, can be 'redis' (default) or 'fakeredis' 15 | """ 16 | 17 | app = Flask(__name__) 18 | 19 | app.config.update( 20 | SERVER_NAME=os.getenv('SERVER_NAME', '127.0.0.1:5000'), 21 | IIIF_SERVER=os.getenv('IIIF_SERVER', '127.0.0.1'), 22 | REDIS_SERVER=os.getenv('REDIS_SERVER', 'localhost'), 23 | REDIS_PORT_NUMBER=int(os.getenv('REDIS_PORT_NUMBER', 6379)), 24 | DEBUG=os.getenv('DEBUG', False), 25 | HOST=os.getenv('HOST', '127.0.0.1'), 26 | PORT=int(os.getenv('PORT', 5000)), 27 | SQL_DB_URL = os.getenv('SQL_DB_URL', None) 28 | ) 29 | 30 | ### Db initialization ### 31 | if db_backend: 32 | db.init_db(db_backend) 33 | else: 34 | db.init_db(redis.StrictRedis(host=app.config['REDIS_SERVER'], port=app.config['REDIS_PORT_NUMBER'], db=0)) 35 | 36 | if not hasattr(app, 'extensions'): 37 | app.extensions = dict() 38 | 39 | if 'redis' in app.extensions: 40 | raise ValueError('Already registered config prefix "redis"') 41 | 42 | app.extensions['redis'] = db 43 | 44 | ### Setting of relation between particular url and view function 45 | app.route('/')(views.index) 46 | app.route('/')(views.iFrame) 47 | app.route('//')(views.iFrame) 48 | app.route('//manifest.json')(views.iiifMeta) 49 | app.route('/oembed', methods=['GET'])(views.oEmbed) 50 | app.route('/ingest', methods=['GET', 'POST'])(views.ingest) 51 | 52 | return app 53 | -------------------------------------------------------------------------------- /embed/app/celery_factory.py: -------------------------------------------------------------------------------- 1 | """Module which provides celery ingest application factory""" 2 | 3 | import os 4 | 5 | from celery import Celery 6 | import redis 7 | 8 | from models import db 9 | 10 | def celery_factory(): 11 | """Function which provides celery ingest application factory. It takes config from environment and returns task queue, which is used to put ingest tasks in queue. 12 | """ 13 | 14 | REDIS_SERVER = os.getenv('REDIS_SERVER', 'localhost') 15 | REDIS_PORT_NUMBER = 6379 16 | 17 | db.init_db(redis.StrictRedis(host=REDIS_SERVER, port=REDIS_PORT_NUMBER, db=0)) 18 | 19 | task_queue = Celery(__name__, broker='redis://%s' % REDIS_SERVER, include=['app.ingest']) 20 | 21 | return task_queue 22 | -------------------------------------------------------------------------------- /embed/app/db_wrapper.py: -------------------------------------------------------------------------------- 1 | """Module which provides wrapper for database. It can wraps redis and fakeredis for testing""" 2 | 3 | import redis 4 | import fakeredis 5 | 6 | from exceptions import UnsupportedDbBackend 7 | 8 | 9 | class DatabaseWrapper(): 10 | """Class which provides wrapper for database and can be used to instantiate database itself""" 11 | 12 | def init_db(self, backend=None): 13 | """Method for initialization of database wrapper. 14 | 'backend' - desired backend for database, it can be redis or fakeredis for testing 15 | """ 16 | 17 | if not isinstance(backend, redis.StrictRedis) and not isinstance(backend, fakeredis.FakeStrictRedis): 18 | raise UnsupportedDbBackend('%s database backend is not allowed' % backend) 19 | self.backend = backend 20 | 21 | return self 22 | 23 | def get(self, key): 24 | """Method for getting of data from database by unique key. 25 | 'key' - unique key to database 26 | """ 27 | 28 | return self.backend.get(key) 29 | 30 | def set(self, key, data): 31 | """Method for setting of data to database by unique key. 32 | 'key' - unique key to database 33 | 'data' - data which have to be pushed to database and be reachable by key 34 | """ 35 | 36 | return self.backend.set(key, data) 37 | 38 | def delete(self, key): 39 | """Method for deleting of data from database by unique key. 40 | 'key' - unique key to database 41 | """ 42 | 43 | return self.backend.delete(key) 44 | 45 | def incr(self, key, default): 46 | """Method for atomically increasing numerical data in database. It is needed for implementation of counters. 47 | 'key' - unique key to database 48 | 'default' - numerical value which is set if the key is not available in database 49 | """ 50 | 51 | return self.backend.incr(key, default) 52 | -------------------------------------------------------------------------------- /embed/app/exceptions.py: -------------------------------------------------------------------------------- 1 | """Module which provides user defined exceptions""" 2 | 3 | class NoItemInDb(Exception): 4 | pass 5 | 6 | class ErrorItemImport(Exception): 7 | pass 8 | 9 | class UnsupportedDbBackend(Exception): 10 | pass 11 | 12 | class ErrorImageIdentify(Exception): 13 | pass 14 | 15 | class WrongCloudSearchService(Exception): 16 | pass 17 | -------------------------------------------------------------------------------- /embed/app/helper.py: -------------------------------------------------------------------------------- 1 | """Module which defines some useful helper functions""" 2 | 3 | import os 4 | import math 5 | 6 | import boto 7 | from flask import current_app as app 8 | 9 | from exceptions import WrongCloudSearchService 10 | 11 | S3_HOST = os.getenv('S3_HOST', '') 12 | S3_DEFAULT_BUCKET = os.getenv('S3_DEFAULT_BUCKET', '') 13 | CLOUDSEARCH_REGION = os.getenv('CLOUDSEARCH_REGION', '') 14 | 15 | 16 | def prepareTileSources(item, url, order): 17 | """Function which returns item with properly formated data for IIIF zooming. 18 | 'item' - item whose data have to be formated 19 | 'url' - base url of processed image 20 | 'order' - order number of specified image 21 | """ 22 | 23 | if order == 0: 24 | filename = item.id 25 | else: 26 | filename = '%s/%s' % (item.id, order) 27 | 28 | item.image_meta[url]['@context'] = 'http://iiif.io/api/image/2/context.json' 29 | item.image_meta[url]['@id'] = 'http://%s/%s' % (app.config['IIIF_SERVER'], filename) 30 | item.image_meta[url]['protocol'] = 'http://iiif.io/api/image' 31 | item.image_meta[url]['profile'] = ['http://iiif.io/api/image/2/level1.json', {'formats': ['jpg'], 'qualities': ['native', 'color', 'gray'], 'supports': ['regionByPct', 'sizeByForcedWh', 'sizeByWh', 'sizeAboveFull', 'rotationBy90s', 'mirroring', 'gray']}] 32 | 33 | num_resolutions = math.log(max(item.image_meta[url]['width'], item.image_meta[url]['height']) / 256.0, 2) 34 | 35 | num_resolutions = int(math.ceil(num_resolutions)) 36 | 37 | scaleFactors = [1] 38 | 39 | for i in range(1, num_resolutions + 1): 40 | scaleFactors.append(int(math.pow(2.0, i))) 41 | 42 | item.image_meta[url]['tiles'] = [{'width' : 256, 'height' : 256, 'scaleFactors': scaleFactors}] 43 | 44 | item.image_meta[url].pop('url', None) 45 | 46 | return item.image_meta[url] 47 | 48 | 49 | def getBucket(): 50 | """Function which returns S3 bucket defined by environment variable""" 51 | 52 | os.environ['S3_USE_SIGV4'] = 'True' 53 | s3 = boto.connect_s3(host=S3_HOST) 54 | return s3.get_bucket(S3_DEFAULT_BUCKET) 55 | 56 | 57 | def getCloudSearch(domain, service): 58 | """Function which returns Cloud Search service (document or search) 59 | 'domain' - Cloud Search domain to return service for 60 | 'service' - type of service, can be document or search 61 | """ 62 | 63 | if service == 'document': 64 | return boto.connect_cloudsearch2(region=CLOUDSEARCH_REGION, sign_request=True).lookup(domain).get_document_service() 65 | elif service == 'search': 66 | return boto.connect_cloudsearch2(region=CLOUDSEARCH_REGION, sign_request=True).lookup(domain).get_search_service() 67 | else: 68 | raise WrongCloudSearchService('Wrong type of Cloud Search service "%s"' % service) 69 | -------------------------------------------------------------------------------- /embed/app/iiif_manifest_factory.py: -------------------------------------------------------------------------------- 1 | """Module with IIIF manifest factory from https://github.com/IIIF/presentation-api/tree/master/implementations/manifest-factory""" 2 | 3 | import os, sys 4 | import commands 5 | import urllib 6 | 7 | try: 8 | import json 9 | except: 10 | # 2.5 11 | import simplejson as json 12 | 13 | try: 14 | # Only available in 2.7 15 | # This makes the code a bit messy, but eliminates the need 16 | # for the locally hacked ordered json encoder 17 | from collections import OrderedDict 18 | except: 19 | # Backported... 20 | try: 21 | from ordereddict import OrderedDict 22 | except: 23 | print "You must: easy_install ordereddict" 24 | raise 25 | 26 | try: 27 | from PIL import Image as pil_image 28 | except: 29 | try: 30 | import Image as pil_image 31 | except: 32 | pil_image = None 33 | 34 | try: 35 | from lxml import etree 36 | except: 37 | etree = None 38 | 39 | class PresentationError(Exception): 40 | resource = None 41 | 42 | def __init__(self, msg, resource=None): 43 | self.args = [msg] 44 | self.resource = resource 45 | 46 | # Raised when an object (likely the factory) isn't configured properly for the current operation 47 | class ConfigurationError(PresentationError): 48 | pass 49 | 50 | # Base metadata exception 51 | class MetadataError(PresentationError): 52 | pass 53 | 54 | # Raised when an object is not in the right place for the structure, or the structure is empty 55 | # and cannot be 56 | class StructuralError(MetadataError): 57 | pass 58 | 59 | # Raised when a requirement of the model is not met (eg property is missing) 60 | class RequirementError(MetadataError): 61 | pass 62 | 63 | # Raised when the data is invalid, either by content or type 64 | class DataError(MetadataError): 65 | pass 66 | 67 | MAN_VIEWINGHINTS = ['individuals', 'paged', 'continuous'] 68 | CVS_VIEWINGHINTS = ['non-paged'] 69 | RNG_VIEWINGHINTS = ['top', 'individuals', 'paged', 'continuous'] 70 | VIEWINGDIRS = ['left-to-right', 'right-to-left', 'top-to-bottom', 'bottom-to-top'] 71 | 72 | BAD_HTML_TAGS = ['script', 'style', 'object', 'form', 'input'] 73 | GOOD_HTML_TAGS = ['a', 'b', 'br', 'i', 'img', 'p', 'span'] 74 | 75 | KEY_ORDER = ["@context", "@id", "@type", "@value", "@language", "label", "value", 76 | "metadata", "description", "thumbnail", "attribution", "license", "logo", 77 | "format", "height", "width", "startCanvas", 78 | "viewingDirection", "viewingHint", 79 | "profile", "seeAlso", "search", "formats", "qualities", "supports", 80 | "scale_factors", "scaleFactors", "tile_width", "tile_height", "tiles", "sizes", 81 | "within", "motivation", "stylesheet", "resource", 82 | "on", "default", "item", "style", "full", "selector", "chars", "language", 83 | "sequences", "structures", "canvases", "resources", "images", "otherContent" ] 84 | 85 | KEY_ORDER_HASH = dict([(KEY_ORDER[x],x) for x in range(len(KEY_ORDER))]) 86 | 87 | class ManifestFactory(object): 88 | metadata_base = "" 89 | metadata_dir = "" 90 | 91 | def __init__(self, version="2.0", mdbase="", imgbase="", mddir="", lang="en"): 92 | """ mdbase: (string) URI to which identities will be appended for metadata 93 | imgbase: (string) URI to which image identities will be appended for IIIF Image API 94 | mddir: (string) Directory where metadata files will be written 95 | lang: (string) Language code to use by default if multiple languages given""" 96 | 97 | if mdbase: 98 | self.set_base_metadata_uri(mdbase) 99 | if imgbase: 100 | self.set_base_image_uri(imgbase) 101 | 102 | if mddir: 103 | self.set_base_metadata_dir(mddir) 104 | 105 | self.default_lang = lang 106 | if self.default_lang != "en": 107 | self.add_lang = True 108 | else: 109 | self.add_lang = False 110 | 111 | self.presentation_api_version = version 112 | if version[0] == "2": 113 | self.context_uri = "http://iiif.io/api/presentation/2/context.json" 114 | elif version == "1.0" or version == "0.9": 115 | self.context_uri = "http://www.shared-canvas.org/ns/context.json" 116 | else: 117 | raise ConfigurationError("Unknown Presentation API Version: " + version ) 118 | 119 | # Default Image API info 120 | self.default_image_api_version = "0" 121 | self.default_image_api_level = -1 122 | self.default_image_api_context = "" 123 | self.default_image_api_profile = "" 124 | self.default_image_api_uri = "" 125 | self.default_image_api_dir = "" 126 | 127 | self.default_base_image_uri = "" 128 | self.default_base_image_dir = "" 129 | 130 | self.debug_level = "warn" 131 | self.log_stream = sys.stdout 132 | 133 | # Try to find ImageMagick's identify 134 | try: 135 | self.whichid = commands.getoutput('which identify') 136 | except: 137 | # No IM or not unix 138 | self.whichid = "" 139 | 140 | def set_debug_stream(self, strm): 141 | self.log_stream = strm 142 | 143 | def set_debug(self, typ): 144 | # error = squash warnings 145 | # warn = display warnings 146 | # error_on_warning = raise exception for a warning rather than continuing 147 | 148 | if typ in ['error', 'warn', 'error_on_warning']: 149 | self.debug_level = typ 150 | else: 151 | raise ConfigurationError("Only levels are 'error', 'warn' and 'error_on_warning'") 152 | 153 | def maybe_warn(self, msg): 154 | if self.debug_level == "warn": 155 | self.log_stream.write(msg + "\n") 156 | try: 157 | self.log_stream.flush() 158 | except: 159 | pass 160 | elif self.debug_level == "error_on_warning": 161 | # We don't know the type, just raise a MetadataError 162 | raise MetadataError(msg) 163 | 164 | def assert_base_metadata_uri(self): 165 | if not self.metadata_base: 166 | raise ConfigurationError("Metadata API Base URI is not set") 167 | 168 | def assert_base_image_uri(self): 169 | if not self.default_base_image_uri: 170 | raise ConfigurationError("IIIF Image API Base URI is not set") 171 | 172 | def set_base_metadata_dir(self, dir): 173 | if not os.path.exists(dir): 174 | raise ConfigurationError("Metadata API Base Directory does not exist") 175 | elif dir[-1] != "/": 176 | dir += "/" 177 | self.metadata_dir = dir 178 | 179 | def set_base_metadata_uri(self, uri): 180 | if not uri: 181 | raise ValueError("Must provide a URI to set the base URI to") 182 | elif uri[-1] != "/": 183 | uri += "/" 184 | self.metadata_base = uri 185 | 186 | def set_default_label_language(self, lang): 187 | self.default_lang = lang 188 | 189 | 190 | def set_base_image_dir(self, dr): 191 | if not dr: 192 | raise ValueError("Must provide a directory name to set the base directory to") 193 | self.default_base_image_dir = dr 194 | 195 | def set_base_image_uri(self, uri): 196 | # No trailing / as that's what the base URI really is 197 | # Need to add it back all over the place though :( 198 | if not uri: 199 | raise ValueError("Must provide a URI to set the base URI to") 200 | if uri[-1] == "/": 201 | uri = uri[:-1] 202 | self.default_base_image_uri = uri 203 | 204 | def set_iiif_image_info(self, version="2.0", lvl="1"): 205 | version = str(version) 206 | lvl = str(lvl) 207 | if not version in ['1.0', '1.1', '2.0']: 208 | raise ConfigurationError("Only versions 1.0, 1.1, 2.0 are known") 209 | if not lvl in ['0','1','2']: 210 | raise ConfigurationError("Level must be 0, 1 or 2") 211 | self.default_image_api_version = version 212 | self.default_image_api_level = lvl 213 | if version == "1.0": 214 | self.default_image_api_profile = "http://library.stanford.edu/iiif/image-api/compliance.html#level" + lvl 215 | self.default_image_api_context = "http://library.stanford.edu/iiif/image-api/context.json" 216 | elif version == "1.1": 217 | self.default_image_api_profile = "http://library.stanford.edu/iiif/image-api/1.1/compliance.html#level" + lvl 218 | self.default_image_api_context = "http://library.stanford.edu/iiif/image-api/1.1/context.json" 219 | else: 220 | self.default_image_api_profile = "http://iiif.io/api/image/2/level%s.json" % lvl 221 | self.default_image_api_context = "http://iiif.io/api/image/2/context.json" 222 | 223 | def set_iiif_image_conformance(self, version, lvl): 224 | return self.set_iiif_image_info(version, lvl) 225 | 226 | def collection(self, ident="collection", label="", mdhash={}): 227 | if not ident.startswith('http'): 228 | self.assert_base_metadata_uri() 229 | return Collection(self, ident, label, mdhash) 230 | 231 | def manifest(self, ident="manifest", label="", mdhash={}): 232 | if not ident.startswith('http'): 233 | self.assert_base_metadata_uri() 234 | return Manifest(self, ident, label, mdhash) 235 | 236 | def sequence(self,ident="", label="", mdhash={}): 237 | if ident and not ident.startswith('http'): 238 | self.assert_base_metadata_uri() 239 | return Sequence(self, ident, label, mdhash) 240 | 241 | def canvas(self,ident="", label="", mdhash={}): 242 | if not ident: 243 | raise RequirementError("Canvases must have a real identity (Canvas['@id'] cannot be empty)") 244 | elif not ident.startswith('http'): 245 | self.assert_base_metadata_uri() 246 | return Canvas(self, ident, label, mdhash) 247 | 248 | def annotation(self, ident="", label="", mdhash={}): 249 | if ident and not ident.startswith('http'): 250 | self.assert_base_metadata_uri() 251 | return Annotation(self, ident, label=label) 252 | 253 | def annotationList(self, ident="", label="", mdhash={}): 254 | if not ident: 255 | raise RequirementError("AnnotationLists must have a real identity (AnnotationList['@id'] cannot be empty)") 256 | elif not ident.startswith('http'): 257 | self.assert_base_metadata_uri() 258 | return AnnotationList(self, ident, label, mdhash) 259 | 260 | def image(self, ident, label="", iiif=False): 261 | if not ident: 262 | raise RequirementError("Images must have a real identity (Image['@id'] cannot be empty)") 263 | return Image(self, ident, label, iiif) 264 | 265 | def audio(self, ident, label=""): 266 | if not ident: 267 | raise RequirementError("Audio must have a real identity (Audio['@id'] cannot be empty)") 268 | return Audio(self, ident, label) 269 | 270 | def choice(self, default, rest): 271 | return Choice(self, default, rest) 272 | 273 | def specificResource(self, full): 274 | return SpecificResource(self, full) 275 | 276 | def text(self, txt="", ident="", language="", format=""): 277 | if ident: 278 | return ExternalText(self, ident, language, format) 279 | else: 280 | # may be empty string 281 | return Text(self, txt, language, format) 282 | 283 | def range(self, ident="", label="", mdhash={}): 284 | return Range(self, ident, label, mdhash) 285 | 286 | def layer(self, ident="", label="", mdhash={}): 287 | return Layer(self, ident, label, mdhash) 288 | 289 | def service(self, ident="", label="", context="", profile=""): 290 | return Service(self, ident, label, context, profile) 291 | 292 | ### Note, id, type and context are always @(prop) in the output 293 | ### Cannot have type --> dc:type, for example 294 | 295 | class BaseMetadataObject(object): 296 | 297 | _properties = ['id', 'type', 'label', 'metadata', 'description', 'thumbnail', 298 | 'attribution', 'license', 'logo', 'service', 'seeAlso', 'within', 'related', 299 | 'viewingHint', 'viewingDirection'] 300 | _extra_properties = [] 301 | _integer_properties = [] 302 | _structure_properties = {} 303 | _object_properties = ['thumbnail', 'license', 'logo', 'seeAlso', 'within', 'related', 'service'] 304 | 305 | def __init__(self, factory, ident="", label="", mdhash={}, **kw): 306 | self._factory = factory 307 | if ident: 308 | if ident.startswith('http'): 309 | self.id = ident 310 | else: 311 | self.id = factory.metadata_base + self.__class__._uri_segment + ident 312 | if not self.id.endswith('.json'): 313 | self.id += '.json' 314 | else: 315 | self.id = "" 316 | self.type = self.__class__._type 317 | self.label = "" 318 | if label: 319 | self.set_label(label) 320 | self.metadata = [] 321 | if mdhash: 322 | self.set_metadata(mdhash) 323 | 324 | self.description = "" 325 | self.thumbnail = "" 326 | 327 | self.attribution = "" 328 | self.license = "" 329 | self.logo = "" 330 | 331 | self.service = "" 332 | self.seeAlso = "" 333 | self.within = "" 334 | self.related = "" 335 | 336 | def __setattr__(self, which, value): 337 | if which == 'context': 338 | raise DataError("Must not set context on non-Service, non-root objects") 339 | elif which[0] != "_" and not which in self._properties and not which in self._extra_properties and not which in self._structure_properties.keys(): 340 | self.maybe_warn("Setting non-standard field '%s' on resource of type '%s'" % (which, self._type)) 341 | elif which[0] != '_' and not type(value) in [str, unicode, list, dict] and not which in self._integer_properties and \ 342 | not isinstance(value, BaseMetadataObject) and not isinstance(value, OrderedDict): 343 | # Raise Exception for standard prop set to non standard value 344 | # not perfect but stops the worst cases. 345 | raise DataError("%s['%s'] does not accept a %s" % (self._type, which, type(value).__name__), self) 346 | elif which in self._integer_properties and type(value) != int: 347 | raise DataError("%s['%s'] does not accept a %s, only an integer" % (self._type, which, type(value).__name__), self) 348 | elif value and which in self._object_properties and not self.test_object(value): 349 | raise DataError("%s['%s'] must have a URI or resource, got %s" % (self._type, which, repr(value))) 350 | 351 | if hasattr(self, which) and hasattr(self, 'set_%s' % which): 352 | fn = getattr(self, 'set_%s' % which) 353 | return fn(value) 354 | elif value and which in self._object_properties: 355 | self._set_magic_resource(which, value) 356 | else: 357 | object.__setattr__(self, which, value) 358 | 359 | def maybe_warn(self, msg): 360 | msg = "WARNING: " + msg 361 | self._factory.maybe_warn(msg) 362 | 363 | def test_object(self, data): 364 | # "http://..." 365 | # {"@id": "http://..."} 366 | # or list of above 367 | if type(data) in [str, unicode]: 368 | return data.startswith('http') 369 | elif type(data) == dict: 370 | return '@id' in data 371 | elif isinstance(data, BaseMetadataObject): 372 | return True 373 | elif type(data) == list: 374 | for d in data: 375 | if type(d) in [str, unicode] and not data.startswith('http'): 376 | return False 377 | elif type(d) == dict and not '@id' in d: 378 | return False 379 | return True 380 | else: 381 | print "expecing a resource, got: %r" % data 382 | return True 383 | 384 | 385 | def test_html(self, data): 386 | if etree: 387 | try: 388 | dom = etree.XML(data) 389 | except Exception, e: 390 | raise DataError("Invalid XHTML in '%s': %s" % (data, e), self) 391 | for elm in dom.iter(): 392 | if elm.tag in BAD_HTML_TAGS: 393 | raise DataError("HTML vulnerability '%s' in '%s'" % (elm.tag, data), self) 394 | elif elm.tag in [etree.Comment, etree.ProcessingInstruction]: 395 | raise DataError("HTML Comment vulnerability '%s'" % elm, self) 396 | elif elm.tag == 'a': 397 | for x in elm.attrib.keys(): 398 | if x != "href": 399 | raise DataError("Vulnerable attribute '%s' on a tag" % x, self) 400 | elif elm.tag == 'img': 401 | for x in elm.attrib.keys(): 402 | if not x in ['src', 'alt']: 403 | raise DataError("Vulnerable attribute '%s' on img tag" % x, self) 404 | else: 405 | if elm.attrib: 406 | raise DataError("Attributes not allowed on %s tag" % (elm.tag), self) 407 | if not elm.tag in GOOD_HTML_TAGS: 408 | self.maybe_warn("Risky HTML tag '%s' in '%s'" % (elm.tag, data)) 409 | # Cannot keep CDATA sections separate from text when parsing in LXML :( 410 | 411 | def langhash_to_jsonld(self, lh, html=True): 412 | # {"fr": "something in french", "en": "something in english", "de html" : "German HTML"} 413 | # --> [{"@value": "something in french", "@language": "fr"}, ...] 414 | l = [] 415 | for (k,v) in lh.items(): 416 | if 'html' in k or (v[0] == '<' and v[-1] == '>'): 417 | k = k.replace("html", '').strip() 418 | if not html: 419 | raise DataError("Cannot have HTML in '%s', only plain text" % v, self) 420 | # process HTML here 421 | if v[0] != '<' or v[-1] != '>': 422 | raise DataError("First and last characters of HTML value must be '<' and '>' respectively, in '%r'" % v, self) 423 | self.test_html(v) 424 | if k: 425 | l.append(OrderedDict([("@value",v), ("@language",k)])) 426 | else: 427 | l.append(v) 428 | else: 429 | l.append(OrderedDict([("@value",v), ("@language",k)])) 430 | return l 431 | 432 | def set_metadata(self, mdhash): 433 | # In: {label:value, label2:value2} 434 | # ... or: {'label': langhash, 'value': langhash} 435 | # Set: {"label":label, "value":value} 436 | # Really add_metadata, as won't overwrite 437 | 438 | if type(mdhash) != dict: 439 | raise ValueError("set_metadata takes a dict()") 440 | 441 | # by reference, not value, so can modify in place without 442 | # triggering __setattr__ on the resource ;) 443 | md = self.metadata 444 | 445 | mdk = mdhash.keys() 446 | mdk.sort() 447 | if mdk == ['label', 'value']: 448 | # Work around to allow multiple languages for label; 449 | # just have to set_metadata() one at a time 450 | k = mdhash['label'] 451 | v = mdhash['value'] 452 | if type(k) in [str, unicode] and self._factory.add_lang: 453 | k = self.langhash_to_jsonld({self._factory.default_lang : k}) 454 | elif type(k) == dict: 455 | k = self.langhash_to_jsonld(k) 456 | if type(v) in [str, unicode] and self._factory.add_lang: 457 | v = self.langhash_to_jsonld({self._factory.default_lang : v}) 458 | elif type(v) == dict: 459 | v = self.langhash_to_jsonld(v) 460 | md.append(OrderedDict([("label", k), ("value", v)])) 461 | 462 | else: 463 | for (k,v) in mdhash.items(): 464 | if type(v) in [str, unicode] and self._factory.add_lang: 465 | v = self.langhash_to_jsonld({self._factory.default_lang : v}) 466 | elif type(v) == dict: 467 | v = self.langhash_to_jsonld(v) 468 | md.append(OrderedDict([("label", k), ("value", v)])) 469 | 470 | def _set_magic(self, which, value, html=True): 471 | if type(value) in [str, unicode]: 472 | if self._factory.add_lang: 473 | value = self.langhash_to_jsonld({self._factory.default_lang : value}, html) 474 | elif value and value[0] == '<' and value[-1] == '>': 475 | self.test_html(value) 476 | elif type(value) == dict: 477 | # {"en:"Something",fr":"Quelque Chose"} 478 | value = self.langhash_to_jsonld(value, html) 479 | elif type(value) == list: 480 | # list of values 481 | nl = [] 482 | for i in value: 483 | if type(i) in [str, unicode]: 484 | if self._factory.add_lang: 485 | nl.extend(self.langhash_to_jsonld({self._factory.default_lang : i}, html)) 486 | elif value and value[0] == '<' and value[-1] == '>': 487 | self.test_html(i) 488 | nl.append(i) 489 | elif type(i) == dict: 490 | # {"en:"Something",fr":"Quelque Chose"} 491 | nl.extend(self.langhash_to_jsonld(i, html)) 492 | else: 493 | nl.append(i) 494 | value = nl 495 | object.__setattr__(self, which, value) 496 | 497 | def set_label(self, value): 498 | return self._set_magic('label', value, False) 499 | def set_description(self, value): 500 | return self._set_magic('description', value) 501 | def set_attribution(self, value): 502 | return self._set_magic('attribution', value) 503 | 504 | def _set_magic_resource(self, which, value): 505 | # allow: string/object/dict, and magically generate list thereof 506 | try: 507 | current = getattr(self, which) 508 | except: 509 | current = None 510 | if not current: 511 | object.__setattr__(self, which, value) 512 | elif type(current) == list: 513 | new = current.append(value) 514 | object.__setattr__(self, which, new) 515 | else: 516 | new = [current, value] 517 | object.__setattr__(self, which, new) 518 | 519 | def add_service(self, ident, label="", context="", profile=""): 520 | svc = self._factory.service(ident, label, context, profile) 521 | self.service = svc 522 | return svc 523 | 524 | def toJSON(self, top=False): 525 | d = self.__dict__.copy() 526 | if d.has_key('id') and d['id']: 527 | d['@id'] = d['id'] 528 | del d['id'] 529 | d['@type'] = d['type'] 530 | del d['type'] 531 | for (k, v) in d.items(): 532 | if not v or k[0] == "_": 533 | del d[k] 534 | if d.has_key('context'): 535 | d['@context'] = d['context'] 536 | del d['context'] 537 | for e in self._required: 538 | if not d.has_key(e): 539 | if self._structure_properties.has_key(e): 540 | raise StructuralError("Resource type '%s' requires '%s' to be set" % (self._type, e), self) 541 | else: 542 | raise RequirementError("Resource type '%s' requires '%s' to be set" % (self._type, e), self) 543 | debug = self._factory.debug_level 544 | if debug.find("warn") > -1: 545 | for e in self._warn: 546 | if not d.has_key(e): 547 | msg = "Resource type '%s' should have '%s' set" % (self._type, e) 548 | self.maybe_warn(msg) 549 | if top: 550 | d['@context'] = self._factory.context_uri 551 | 552 | # Enumerations 553 | if d.has_key('viewingHint'): 554 | if hasattr(self, '_viewing_hints'): 555 | if not d['viewingHint'] in self._viewing_hints: 556 | msg = "'%s' not a known viewing hint for type '%s': %s" % (d['viewingHint'], self._type, ' '.join(self._viewing_hints)) 557 | self.maybe_warn(msg) 558 | else: 559 | msg = "Resource type '%s' does not have any known viewingHints; '%s' given" % (self._type, d['viewingHint']) 560 | self.maybe_warn(msg) 561 | 562 | if d.has_key('viewingDirection'): 563 | if hasattr(self, '_viewing_directions'): 564 | if not d['viewingDirection'] in self._viewing_directions: 565 | msg = "'%s' not a known viewing direction for type '%s': %s" % (d['viewingDirection'], self._type, ' '.join(self._viewing_directions)) 566 | raise DataError(msg, self) 567 | else: 568 | msg = "Resource type '%s' does not have any known viewingDirections; '%s' given" % (self._type, d['viewingDirection']) 569 | self.maybe_warn(msg) 570 | 571 | # Recurse into structures, maybe minimally 572 | for (p,sinfo) in self._structure_properties.items(): 573 | if d.has_key(p): 574 | if type(d[p]) == list: 575 | newl = [] 576 | for s in d[p]: 577 | minimalOveride = self._should_be_minimal(s) 578 | done = self._single_toJSON(s, sinfo, p, minimalOveride) 579 | newl.append(done) 580 | d[p] = newl 581 | else: 582 | if sinfo.get('list', False): 583 | raise StructuralError("%s['%s] must be a list, got %r" % (self._type, p, d[p]), self) 584 | d[p] = self._single_toJSON(d[p], sinfo, p) 585 | 586 | return OrderedDict(sorted(d.items(), key=lambda x: KEY_ORDER_HASH.get(x[0], 1000))) 587 | 588 | def _should_be_minimal(self, what): 589 | return False 590 | 591 | def _single_toJSON(self, instance, sinfo, prop, minimalOveride=False): 592 | # duck typing. Bite me. 593 | typ = sinfo.get('subclass', None) 594 | minimal = sinfo.get('minimal', False) 595 | if minimalOveride: 596 | minimal=True 597 | if type(instance) in [str, unicode]: 598 | # Just a URI 599 | return instance 600 | elif ( isinstance(instance, BaseMetadataObject) and typ == None ) or (typ != None and isinstance(instance, typ)): 601 | if minimal: 602 | return {'@id': instance.id, '@type': instance._type, 'label': instance.label} 603 | else: 604 | return instance.toJSON(False) 605 | elif type(instance) == dict and ( (instance.has_key('@type') and instance['@type'] == typ._type) or typ == None ): 606 | if minimal: 607 | return {'@id': instance['@id'], '@type':instance['@type'], 'label': instance['label']} 608 | else: 609 | return instance 610 | elif type(instance) == dict: 611 | raise StructuralError("%s['%s'] objects must be of type %s, got %s" % (self._type, prop, typ._type, instance.get('@type', None)), self) 612 | 613 | else: 614 | raise StructuralError("Saw unknown object in %s['%s']: %r" % (self._type, prop, instance), self) 615 | 616 | 617 | def _buildString(self, js, compact=True): 618 | if type(js) == dict: 619 | if compact: 620 | out = json.dumps(js, sort_keys=True, separators=(',',':')) 621 | else: 622 | out = json.dumps(js, sort_keys=True, indent=2) 623 | else: 624 | if compact: 625 | out = json.dumps(js, separators=(',',':')) 626 | else: 627 | out = json.dumps(js, indent=2) 628 | return out 629 | 630 | def toString(self, compact=True): 631 | js = self.toJSON(top=True) 632 | return self._buildString(js, compact) 633 | 634 | def toFile(self, compact=True): 635 | mdd = self._factory.metadata_dir 636 | if not mdd: 637 | raise ConfigurationError("Metadata Directory on Factory must be set to write to file") 638 | 639 | js = self.toJSON(top=True) 640 | # Now calculate file path based on URI of top object 641 | # ... which is self for those of you following at home 642 | myid = js['@id'] 643 | mdb = self._factory.metadata_base 644 | if not myid.startswith(mdb): 645 | raise ConfigurationError("The @id of that object is not the base URI in the Factory") 646 | 647 | fp = myid[len(mdb):] 648 | bits = fp.split('/') 649 | if len(bits) > 1: 650 | mydir = os.path.join(mdd, '/'.join(bits[:-1])) 651 | try: 652 | os.makedirs(mydir) 653 | except OSError, e: 654 | pass 655 | 656 | fh = file(os.path.join(mdd, fp), 'w') 657 | out = self._buildString(js, compact) 658 | fh.write(out) 659 | fh.close() 660 | return out 661 | 662 | 663 | class ContentResource(BaseMetadataObject): 664 | 665 | def make_selection(self, selector, summarize=False): 666 | if summarize: 667 | full = OrderedDict([("@id",self.id), ("@type", self.type)]) 668 | if self.label: 669 | full['label'] = self.label 670 | else: 671 | full = self 672 | 673 | sr = SpecificResource(self._factory, full) 674 | if type(selector) == str: 675 | selector = OrderedDict([("@type", "oa:FragmentSelector"), ("value", selector)]) 676 | elif type(selector) == dict: 677 | selector = OrderedDict(sorted(selector.items(), key=lambda x: KEY_ORDER_HASH.get(x[0], 1000))) 678 | sr.selector = selector 679 | return sr 680 | 681 | def make_fragment(self, fragment): 682 | return self.id + "#" + fragment 683 | 684 | 685 | class Collection(BaseMetadataObject): 686 | _type = "sc:Collection" 687 | _uri_segment = "" 688 | _required = ["@id", 'label'] 689 | _warn = ["description"] 690 | collections = [] 691 | manifests = [] 692 | 693 | def __init__(self, *args, **kw): 694 | super(Collection, self).__init__(*args, **kw) 695 | self.collections = [] 696 | self.manifests = [] 697 | 698 | def add_collection(self, coll): 699 | self.collections.append(coll) 700 | 701 | def add_manifest(self, manifest): 702 | self.manifests.append(manifest) 703 | 704 | def collection(self, *args, **kw): 705 | coll = self._factory.collection(*args, **kw) 706 | self.add_collection(coll) 707 | return coll 708 | 709 | def manifest(self, *args, **kw): 710 | mn = self._factory.manifest(*args, **kw) 711 | self.add_manifest(mn) 712 | mn.within = self.id 713 | return mn 714 | 715 | 716 | class Manifest(BaseMetadataObject): 717 | _type = "sc:Manifest" 718 | _uri_segment = "" 719 | _required = ["@id", "label", "sequences"] 720 | _warn = ["description"] 721 | _viewing_hints = MAN_VIEWINGHINTS 722 | _viewing_directions = VIEWINGDIRS 723 | 724 | sequences = [] 725 | structures = [] 726 | 727 | def __init__(self, *args, **kw): 728 | super(Manifest, self).__init__(*args, **kw) 729 | self.sequences = [] 730 | self.structures = [] 731 | 732 | def _should_be_minimal(self, what): 733 | if isinstance(what, Sequence) and self.sequences.index(what) > 0: 734 | return True 735 | return False 736 | 737 | def add_sequence(self, seq): 738 | # verify identity doesn't conflict with existing sequences 739 | if seq.id: 740 | for s in self.sequences: 741 | if s.id == seq.id: 742 | raise DataError("Cannot have two Sequences with the same identity", self) 743 | self.sequences.append(seq) 744 | 745 | def add_range(self, rng): 746 | # verify identity doesn't conflict with existing ranges 747 | if rng.id: 748 | for r in self.structures: 749 | if r.id == rng.id: 750 | raise DataError("Cannot have two Ranges with the same identity", self) 751 | self.structures.append(rng) 752 | 753 | def sequence(self, *args, **kw): 754 | seq = self._factory.sequence(*args, **kw) 755 | self.add_sequence(seq) 756 | return seq 757 | 758 | def range(self, *args, **kw): 759 | rng = self._factory.range(*args, **kw) 760 | self.add_range(rng) 761 | return rng 762 | 763 | 764 | 765 | class Sequence(BaseMetadataObject): 766 | _type = "sc:Sequence" 767 | _uri_segment = "sequence/" 768 | _required = ["canvases"] 769 | _warn = ["@id", "label"] 770 | _viewing_directions = VIEWINGDIRS 771 | _viewing_hints = MAN_VIEWINGHINTS 772 | _extra_properties = ["startCanvas"] 773 | 774 | canvases = [] 775 | 776 | def __init__(self, *args, **kw): 777 | super(Sequence, self).__init__(*args, **kw) 778 | self.canvases = [] 779 | 780 | def add_canvas(self, cvs, start=False): 781 | if cvs.id: 782 | for c in self.canvases: 783 | if c.id == cvs.id: 784 | raise DataError("Cannot have two Canvases with the same identity", self) 785 | self.canvases.append(cvs) 786 | if start: 787 | self.set_start_canvas(cvs) 788 | 789 | def canvas(self, *args, **kw): 790 | cvs = self._factory.canvas(*args, **kw) 791 | self.add_canvas(cvs) 792 | return cvs 793 | 794 | def set_start_canvas(self, cvs): 795 | if type(cvs) in [unicode, str]: 796 | cvsid = cvs 797 | elif isinstance(cvs, Canvas): 798 | cvsid = cvs.id 799 | elif isinstance(cvs, OrderedDict): 800 | cvsid = cvs['@id'] 801 | else: 802 | raise ValueError("Expected string, dict or Canvas, got %r" % cvs) 803 | 804 | okay = 0 805 | for c in self.canvases: 806 | if cvsid == c.id: 807 | okay = 1 808 | break 809 | if okay: 810 | self.startCanvas = cvsid 811 | else: 812 | raise RequirementError("Cannot set the startCanvas of a Sequence to a Canvas that is not in the Sequence") 813 | 814 | ### Canvas is a ContentResource as it can be segmented using oa:SpecificResource 815 | 816 | class Canvas(ContentResource): 817 | _type = "sc:Canvas" 818 | _uri_segment = "canvas/" 819 | _required = ["@id", "label", "height", "width"] 820 | _warn = ["images"] 821 | _viewing_hints = CVS_VIEWINGHINTS 822 | _extra_properties = ['height', 'width'] 823 | _integer_properties = ['height', 'width'] 824 | height = 0 825 | width = 0 826 | images = [] 827 | otherContent = [] 828 | 829 | def __init__(self, *args, **kw): 830 | super(Canvas, self).__init__(*args, **kw) 831 | self.images = [] 832 | self.otherContent = [] 833 | self.height = 0 834 | self.width = 0 835 | 836 | def set_hw(self, h,w): 837 | self.height = h 838 | self.width = w 839 | 840 | def add_image_annotation(self, imgid, iiif=True): 841 | self.maybe_warn("add_image_annotation is deprecated; use set_image_annotation() please") 842 | self.set_image_annotation(imgid, iiif) 843 | 844 | def set_image_annotation(self, imgid, iiif=True): 845 | # Make simple image annotation 846 | anno = self.annotation() 847 | image = anno.image(ident=imgid, iiif=iiif) 848 | if iiif: 849 | image.set_hw_from_iiif() 850 | else: 851 | if imgid.startswith('http'): 852 | # take only last segment 853 | imgid = os.path.split(imgid)[1] 854 | self.set_hw_from_file(imgid) 855 | self.set_hw(image.height, image.width) 856 | return anno 857 | 858 | def add_annotation(self, imgAnno): 859 | self.images.append(imgAnno) 860 | def add_annotationList(self, annoList): 861 | self.otherContent.append(annoList) 862 | 863 | def annotation(self, *args, **kw): 864 | anno = self._factory.annotation(*args, **kw) 865 | anno.on = self.id 866 | self.add_annotation(anno) 867 | return anno 868 | 869 | def annotationList(self, *args, **kw): 870 | annol = self._factory.annotationList(*args, **kw) 871 | annol._canvas = self 872 | self.add_annotationList(annol) 873 | return annol 874 | 875 | def toJSON(self, top=False): 876 | # first verify that images are all for Image resources 877 | for anno in self.images: 878 | res = anno.resource 879 | # if res is neither an Image, nor part of an Image, nor a Choice of those then break 880 | if not (isinstance(res, Choice) or isinstance(res, Image) or (isinstance(res, SpecificResource) and isinstance(res.full, Image))): 881 | raise StructuralError("Annotations in Canvas['images'] must have Images for their resources, got: %r" % res, self) 882 | 883 | d = super(Canvas, self).toJSON(top) 884 | return d 885 | 886 | 887 | class Annotation(BaseMetadataObject): 888 | _type = "oa:Annotation" 889 | _uri_segment = "annotation/" 890 | _required = ["motivation", "resource", "on"] 891 | _warn = ["@id"] 892 | _extra_properties = ['motivation', 'stylesheet'] 893 | 894 | def __init__(self, *args, **kw): 895 | super(Annotation, self).__init__(*args, **kw) 896 | self.motivation = "sc:painting" 897 | self.on = "" 898 | self.resource = {} 899 | 900 | def image(self, ident="", label="", iiif=False): 901 | img = self._factory.image(ident, label, iiif) 902 | self.resource = img 903 | return img 904 | 905 | def text(self, text="", ident="", language="", format="text/plain"): 906 | txt = self._factory.text(text, ident, language, format) 907 | self.resource = txt 908 | return txt 909 | 910 | def audio(self, ident="", label=""): 911 | aud = self._factory.audio(ident, label) 912 | self.resource = aud 913 | return aud 914 | 915 | def choice(self, default, rest): 916 | chc = self._factory.choice(default, rest) 917 | self.resource = chc 918 | return chc 919 | 920 | def stylesheet(self, css, cls): 921 | # This has to go here, as need to modify both Annotation and Resource 922 | ss = OrderedDict([("@type", ["oa:CssStyle", "cnt:ContentAsText"]), 923 | ("format", "text/css"), ("chars", css)]) 924 | self.stylesheet = ss 925 | if not self.resource: 926 | raise ConfigurationError("Cannot set a stylesheet without first creating the body") 927 | if isinstance(self.resource, SpecificResource): 928 | self.resource.style = cls 929 | else: 930 | sr = SpecificResource(self._factory, self.resource) 931 | sr.style = cls 932 | self.resource = sr 933 | 934 | class SpecificResource(BaseMetadataObject): 935 | _type = "oa:SpecificResource" 936 | _required = ['full'] 937 | _warn = [] 938 | _extra_properties = ['style', 'selector'] 939 | style = "" 940 | selector = "" 941 | full = None 942 | 943 | def __init__(self, factory, full): 944 | self._factory = factory 945 | self.type = self.__class__._type 946 | self.full=full 947 | 948 | 949 | class ExternalText(ContentResource): 950 | _type = "dctypes:Text" 951 | _required = [] 952 | _factory = None 953 | _warn = ["format"] 954 | _uri_segment = "resources" 955 | _extra_properties = ['format', 'language'] 956 | format = "" 957 | language = "" 958 | 959 | def __init__(self, factory, ident, language="", format=""): 960 | self._factory = factory 961 | self.format = format 962 | self.language = language 963 | self.type = self.__class__._type 964 | if ident.startswith('http'): 965 | self.id = ident 966 | else: 967 | self.id = self.id = factory.metadata_base + self.__class__._uri_segment + ident 968 | 969 | 970 | class Text(ContentResource): 971 | _type = "cnt:ContentAsText" 972 | _required = ["chars"] 973 | _warn = ["format"] 974 | _extra_properties = ['format', 'chars', 'language'] 975 | chars = "" 976 | format = "" 977 | language = "" 978 | 979 | def __init__(self, factory, text, language="", format="text/plain"): 980 | self._factory = factory 981 | self.type = self.__class__._type 982 | self.chars = text 983 | self.format = format 984 | if language: 985 | self.language = language 986 | 987 | class Audio(ContentResource): 988 | _type = "dctypes:Sound" 989 | _required = ["@id"] 990 | _warn = ["format"] 991 | _uri_segment = "res" 992 | _extra_properties = ['format'] 993 | 994 | class Image(ContentResource): 995 | _type = "dctypes:Image" 996 | _required = ["@id"] 997 | _warn = ["format", "height", "width"] 998 | _extra_properties = ['format', 'height', 'width'] 999 | _integer_properties = ['height', 'width'] 1000 | 1001 | def __init__(self, factory, ident, label, iiif=False): 1002 | self._factory = factory 1003 | self.type = self.__class__._type 1004 | self.label = "" 1005 | self.format = "" 1006 | self.height = 0 1007 | self.width = 0 1008 | self._identifier = "" 1009 | if label: 1010 | self.set_label(label) 1011 | 1012 | if iiif: 1013 | # add IIIF service -- iiif is version or bool 1014 | # ident is identifier 1015 | self.service = ImageService(factory, ident) 1016 | 1017 | if factory.default_image_api_version[0] == '1': 1018 | self.id = factory.default_base_image_uri + '/' + ident + '/full/full/0/native.jpg' 1019 | else: 1020 | self.id = factory.default_base_image_uri + '/' + ident + '/full/full/0/default.jpg' 1021 | self._identifier = ident 1022 | self.format = "image/jpeg" 1023 | 1024 | else: 1025 | # Static image 1026 | # ident is either full URL or filename 1027 | if ident.startswith('http://') or ident.startswith('https://'): 1028 | self.id = ident 1029 | else: 1030 | factory.assert_base_image_uri() 1031 | self.id = factory.default_base_image_uri + ident 1032 | 1033 | def set_hw(self, h,w): 1034 | self.height = h 1035 | self.width = w 1036 | 1037 | def set_hw_from_iiif(self): 1038 | if not self._identifier: 1039 | raise ConfigurationError("Image is not configured with IIIF support") 1040 | 1041 | requrl = self._factory.default_base_image_uri + "/" + self._identifier + '/info.json'; 1042 | try: 1043 | fh = urllib.urlopen(requrl) 1044 | data = fh.read() 1045 | fh.close() 1046 | except: 1047 | raise ConfigurationError("Could not get IIIF Info from %s" % requrl) 1048 | 1049 | try: 1050 | js = json.loads(data) 1051 | self.height = int(js['height']) 1052 | self.width = int(js['width']) 1053 | except: 1054 | print data 1055 | raise ConfigurationError("Response from IIIF server did not have mandatory height/width") 1056 | 1057 | 1058 | def set_hw_from_file(self, fn): 1059 | 1060 | # Try to do it automagically 1061 | if not os.path.exists(fn): 1062 | # Add base image dir 1063 | fn2 = self._factory.default_base_image_dir + '/' + fn 1064 | if not os.path.exists(fn2): 1065 | raise ValueError("Could not find image file: %s" % fn) 1066 | else: 1067 | fn = fn2 1068 | 1069 | cmd = self._factory.whichid 1070 | if cmd: 1071 | # Try IM 1072 | try: 1073 | info = commands.getoutput(cmd + ' -ping -format "%h %w" ' + fn) 1074 | (h, w) = info.split(" ") 1075 | self.height = int(h) 1076 | self.width = int(w) 1077 | return 1078 | except: 1079 | pass 1080 | 1081 | if pil_image: 1082 | # Try PIL 1083 | try: 1084 | img = pil_image.open(fn) 1085 | (w,h) = img.size 1086 | self.height = h 1087 | self.width = w 1088 | try: 1089 | img.close() 1090 | except: 1091 | pass 1092 | return 1093 | except: 1094 | pass 1095 | 1096 | raise ConfigurationError("No identify from ImageMagick and no PIL, you have to set manually") 1097 | 1098 | class Choice(BaseMetadataObject): 1099 | _type = "oa:Choice" 1100 | _uri_segment = "annotation" # not really necessary 1101 | _required = ["item"] 1102 | _warn = ["default"] 1103 | 1104 | default = {} 1105 | item = [] 1106 | 1107 | def __init__(self, factory, default, rest): 1108 | super(Choice, self).__init__(factory, indent="", label="", mdhash={}) 1109 | self.default = default 1110 | if type(rest) != list: 1111 | rest = [rest] 1112 | self.item = rest 1113 | 1114 | 1115 | class AnnotationList(BaseMetadataObject): 1116 | _type = "sc:AnnotationList" 1117 | _uri_segment = "list/" 1118 | _required = ["@id"] 1119 | _warn = [] 1120 | _canvas = None 1121 | 1122 | resources = [] 1123 | within = {} 1124 | 1125 | def __init__(self, *args, **kw): 1126 | 1127 | self.resources = [] 1128 | self.within = [] 1129 | self._canvas = None 1130 | return super(AnnotationList, self).__init__(*args, **kw) 1131 | 1132 | def add_annotation(self, imgAnno): 1133 | self.resources.append(imgAnno) 1134 | 1135 | def annotation(self, *args, **kw): 1136 | anno = self._factory.annotation(*args, **kw) 1137 | if self._canvas: 1138 | anno.on = self._canvas.id 1139 | self.add_annotation(anno) 1140 | return anno 1141 | 1142 | def layer(self, *args, **kw): 1143 | lyr = self._factory.layer(*args, **kw) 1144 | self.within = lyr 1145 | return lyr 1146 | 1147 | class Range(BaseMetadataObject): 1148 | _type = "sc:Range" 1149 | _uri_segment = "range/" 1150 | _required = ["@id", "label"] 1151 | _warn = ['canvases'] 1152 | _extra_properties = ['startCanvas'] 1153 | _viewing_hints = RNG_VIEWINGHINTS 1154 | _viewing_directions = VIEWINGDIRS 1155 | 1156 | startCanvas = "" 1157 | canvases = [] 1158 | ranges = [] 1159 | 1160 | def __init__(self, factory, ident="", label="", mdhash={}): 1161 | super(Range, self).__init__(factory, ident, label, mdhash) 1162 | self.canvases = [] 1163 | self.ranges = [] 1164 | 1165 | def add_canvas(self, cvs, frag="", start=False): 1166 | cvsid = cvs.id 1167 | if frag: 1168 | cvsid += frag 1169 | self.canvases.append(cvsid) 1170 | if start: 1171 | self.set_start_canvas(cvsid) 1172 | 1173 | def range(self, ident="", label="", mdhash={}): 1174 | r = self._factory.range(ident, label, mdhash) 1175 | self.add_range(r) 1176 | return r 1177 | 1178 | def add_range(self, rng): 1179 | self.ranges.append(rng.id) 1180 | 1181 | def set_start_canvas(self, cvs): 1182 | if type(cvs) in [unicode, str]: 1183 | cvsid = cvs 1184 | elif isinstance(cvs, Canvas): 1185 | cvsid = cvs.id 1186 | elif isinstance(cvs, OrderedDict): 1187 | cvsid = cvs['@id'] 1188 | else: 1189 | raise ValueError("Expected string, dict or Canvas, got %r" % cvs) 1190 | 1191 | if cvsid in self.canvases: 1192 | self.startCanvas = cvsid 1193 | else: 1194 | raise RequirementError("Cannot set the startCanvas of a Range to a Canvas that is not in the Range") 1195 | 1196 | 1197 | class Layer(BaseMetadataObject): 1198 | _type = "sc:Layer" 1199 | _uri_segment = "layer/" 1200 | _required = ["@id", "label"] 1201 | _warn = [] 1202 | 1203 | 1204 | class Service(BaseMetadataObject): 1205 | _type = "" 1206 | _uri_segment = "" 1207 | _required = ["@id"] 1208 | _warn = ["@context", "profile"] 1209 | _extra_properties = ['context', 'profile'] 1210 | context = "" 1211 | 1212 | 1213 | def __init__(self, factory, ident, label="", context="", profile=""): 1214 | if not ident.startswith('http'): 1215 | raise RequirementError("Services must have an http[s] URI") 1216 | BaseMetadataObject.__init__(self, factory, ident, label) 1217 | self.context = context 1218 | self.profile = profile 1219 | 1220 | def __setattr__(self, which, value): 1221 | if which == "context": 1222 | object.__setattr__(self, which, value) 1223 | else: 1224 | BaseMetadataObject.__setattr__(self, which, value) 1225 | 1226 | class ImageService(Service): 1227 | _type = "" 1228 | _uri_segment = "" 1229 | _required = ["@id", "@context"] 1230 | _warn = ["profile"] 1231 | context = "" 1232 | 1233 | def __init__(self, factory, ident, label="", context="", profile=""): 1234 | if not ident.startswith('http'): 1235 | # prepend factory.base before passing up 1236 | ident = factory.default_base_image_uri + '/' + ident 1237 | 1238 | BaseMetadataObject.__init__(self, factory, ident, label) 1239 | 1240 | if not context: 1241 | self.context = factory.default_image_api_context 1242 | else: 1243 | self.context = context 1244 | if not profile and factory.default_image_api_level != -1: 1245 | self.profile = factory.default_image_api_profile 1246 | elif profile: 1247 | self.profile = profile 1248 | 1249 | 1250 | # Need to set these at the end, after the classes have been defined 1251 | Collection._structure_properties = {'collections' : {'subclass': Collection, 'minimal': True, 'list': True}, 1252 | 'manifests': {'subclass': Manifest, 'minimal': True, 'list': True}} 1253 | Manifest._structure_properties = {'sequences': {'subclass': Sequence, 'list':True}, 1254 | 'structures': {'subclass': Range, 'list':True}} 1255 | Sequence._structure_properties = {'canvases': {'subclass':Canvas, 'list':True}} 1256 | Canvas._structure_properties = {'images': {'subclass': Annotation, 'list':True}, 1257 | 'otherContent': {'subclass': AnnotationList, 'minimal':True, 'list':True}} 1258 | AnnotationList._structure_properties = {'resources': {'subclass': Annotation, 'list':True}} 1259 | Range._structure_properties = {'canvases': {'subclass':Canvas, 'list':True, 'minimal':True}, # Could be canvas.json#xywh= ... 1260 | 'ranges': {'subclass': Range, 'list':True, 'minimal':True}} 1261 | 1262 | # Don't type check these as they're Content subclasses 1263 | Annotation._structure_properties = {'resource': {}, 'on':{'subclass': Canvas}} 1264 | SpecificResource._structure_properties = {'full':{}} 1265 | Choice._structure_properties = {'default':{}, 'item':{}} 1266 | 1267 | # Add Service object to all classes as structure 1268 | for c in [Collection, Manifest, Sequence, Canvas, Range, Layer, Image, AnnotationList, Annotation, Service]: 1269 | c._structure_properties['service'] = {'subclass': Service} 1270 | 1271 | if __name__ == "__main__": 1272 | factory = ManifestFactory() 1273 | factory.set_base_metadata_uri("http://www.example.org/metadata/") 1274 | 1275 | factory.set_base_image_uri("http://www.example.org/iiif/") 1276 | factory.set_iiif_image_info(version="2.0", lvl="2") 1277 | 1278 | mf = factory.manifest(label="Manifest") 1279 | mf.viewingHint = "paged" 1280 | 1281 | seq = mf.sequence() 1282 | for x in range(2): 1283 | # Mostly identity will come from incrementing number (f1r, f1v,...) 1284 | # or the image's identity 1285 | 1286 | cvs = seq.canvas(ident="c%s" % x, label="Canvas %s" % x) 1287 | cvs.set_hw(1000,1000) 1288 | anno = cvs.annotation() 1289 | # al = cvs.annotationList("foo") 1290 | img = factory.image("f1r.c", iiif=True) 1291 | img2 = factory.image("f1r", iiif=True) 1292 | chc = anno.choice(img, [img2]) 1293 | 1294 | print mf.toString(compact=False) 1295 | -------------------------------------------------------------------------------- /embed/app/ingest.py: -------------------------------------------------------------------------------- 1 | """Module which provides ingest functionality and which can be run by celery""" 2 | 3 | import os 4 | import sys 5 | import urllib2 6 | import math 7 | import subprocess 8 | import time 9 | import random 10 | import hashlib 11 | from datetime import datetime 12 | import traceback 13 | import sqlite3 14 | import shutil 15 | 16 | import simplejson as json 17 | import redis 18 | from filechunkio import FileChunkIO 19 | import requests 20 | import boto.exception 21 | 22 | from app.task_queue import task_queue 23 | from models import Item, Task 24 | from exceptions import NoItemInDb, ErrorItemImport, ErrorImageIdentify 25 | from helper import getBucket, getCloudSearch 26 | 27 | 28 | S3_CHUNK_SIZE = int(os.getenv('S3_CHUNK_SIZE', 52428800)) 29 | S3_DEFAULT_FOLDER = os.getenv('S3_DEFAULT_FOLDER', '') 30 | S3_HOST = os.getenv('S3_HOST', None) 31 | S3_DEFAULT_BUCKET = os.getenv('S3_DEFAULT_BUCKET', None) 32 | MAX_TASK_REPEAT = int(os.getenv('MAX_TASK_REPEAT', 1)) 33 | URL_OPEN_TIMEOUT = int(os.getenv('URL_OPEN_TIMEOUT', 10)) 34 | CLOUDSEARCH_ITEM_DOMAIN = os.getenv('CLOUDSEARCH_ITEM_DOMAIN', None) 35 | 36 | ERR_MESSAGE_CLOUDSEARCH = 5 37 | ERR_MESSAGE_HTTP = 4 38 | ERR_MESSAGE_IMAGE = 3 39 | ERR_MESSAGE_S3 = 2 40 | ERR_MESSAGE_OTHER = 1 41 | ERR_MESSAGE_NONE = 0 42 | 43 | @task_queue.task 44 | def ingestQueue(batch_id, item_id, task_id): 45 | try: 46 | task = Task(batch_id, item_id, task_id) 47 | except NoItemInDb, ErrorItemImport: 48 | return -1 49 | 50 | try: 51 | if S3_HOST is not None and S3_DEFAULT_BUCKET is not None: 52 | bucket = getBucket() 53 | else: 54 | # local storage only 55 | bucket = None 56 | 57 | if task.type == 'del': 58 | try: 59 | item = Item(item_id) 60 | 61 | if task.url_order > 0: 62 | filename = '%s/%s.jp2' % (item_id, task.url_order) 63 | 64 | else: 65 | filename = '%s.jp2' % item_id 66 | 67 | if bucket is not None: 68 | bucket.delete_key(S3_DEFAULT_FOLDER + filename) 69 | else: 70 | os.remove('/data/jp2/%s' % filename) 71 | 72 | except NoItemInDb: 73 | pass 74 | 75 | task.status = 'deleted' 76 | 77 | elif task.type == 'mod': 78 | task.status = 'ok' 79 | 80 | elif task.type == 'cloud_search': 81 | task.status = 'ok' 82 | 83 | elif task.type == 'add': 84 | if task.url_order > 0: 85 | filename = '/tmp/%s_%s' % (item_id, task.url_order) 86 | destination = '%s/%s.jp2' % (item_id, task.url_order) 87 | else: 88 | filename = '/tmp/%s' % item_id 89 | destination = '%s.jp2' % item_id 90 | 91 | if task.url_order == 1: 92 | # folder creation 93 | if bucket is not None: 94 | f = bucket.new_key('%s/' % item_id) 95 | f.set_contents_from_string('') 96 | else: 97 | if not os.path.exists('/data/jp2/%s' % item_id): 98 | os.makedirs('/data/jp2/%s/' % item_id) 99 | 100 | r = urllib2.urlopen(task.url, timeout=URL_OPEN_TIMEOUT) 101 | f = open(filename, 'wb') 102 | f.write(r.read()) 103 | f.close() 104 | 105 | if subprocess.check_output(['identify', '-quiet', '-format', '%m', filename]) != 'TIFF': 106 | subprocess.call(['convert', '-quiet', '-compress', 'none', filename, '%s.tif' % filename]) 107 | os.remove('%s' % filename) 108 | else: 109 | os.rename('%s' % filename, '%s.tif' % filename) 110 | 111 | test = subprocess.check_output(['identify', '-quiet', '-format', 'width:%w;height:%h;', '%s.tif' % filename]) 112 | 113 | if test: 114 | tmp = test.split(';') 115 | width = int(tmp[0].split(':')[1]) 116 | height = int(tmp[1].split(':')[1]) 117 | task.image_meta = {"width": width, "height": height} 118 | else: 119 | raise ErrorImageIdentify('Error in the image identify process') 120 | 121 | subprocess.call(['kdu_compress', '-i', '%s.tif' % filename, '-o', '%s.jp2' % filename, '-rate', '0.5', 'Clayers=1', 'Clevels=7', 'Cprecincts={256,256},{256,256},{256,256},{128,128},{128,128},{64,64},{64,64},{32,32},{16,16}', 'Corder=RPCL', 'ORGgen_plt=yes', 'ORGtparts=R', 'Cblk={64,64}', 'Cuse_sop=yes', '-quiet']) 122 | 123 | source_path = '%s.jp2' % filename 124 | 125 | if bucket is not None: 126 | source_size = os.stat(source_path).st_size 127 | chunk_count = int(math.ceil(source_size / float(S3_CHUNK_SIZE))) 128 | mp = bucket.initiate_multipart_upload(S3_DEFAULT_FOLDER + destination) 129 | 130 | for i in range(chunk_count): 131 | offset = S3_CHUNK_SIZE * i 132 | bytes = min(S3_CHUNK_SIZE, source_size - offset) 133 | 134 | with FileChunkIO(source_path, 'r', offset=offset, bytes=bytes) as fp: 135 | mp.upload_part_from_file(fp, part_num=i + 1) 136 | 137 | mp.complete_upload() 138 | 139 | else: 140 | shutil.copy('%s.jp2' % filename, '/data/jp2/%s' % destination) 141 | 142 | os.remove('%s.jp2' % filename) 143 | os.remove('%s.tif' % filename) 144 | 145 | task.status = 'ok' 146 | 147 | task.save() 148 | 149 | except: 150 | exception_type = sys.exc_info()[0] 151 | 152 | if exception_type is urllib2.HTTPError or exception_type is urllib2.URLError: 153 | task.message = ERR_MESSAGE_HTTP 154 | elif exception_type is subprocess.CalledProcessError: 155 | task.message = ERR_MESSAGE_IMAGE 156 | elif exception_type is boto.exception.S3ResponseError: 157 | task.message = ERR_MESSAGE_S3 158 | else: 159 | task.message = ERR_MESSAGE_OTHER 160 | 161 | print '\nFailed attempt numb.: %s\nItem: %s\nUrl: %s\nError message:\n###\n%s###' % (task.attempts + 1, task.item_id, task.url, traceback.format_exc()) 162 | task.attempts += 1 163 | 164 | try: 165 | if os.path.isfile('%s' % filename): 166 | os.remove('%s' % filename) 167 | if os.path.isfile('%s.jp2' % filename): 168 | os.remove('%s.jp2' % filename) 169 | if os.path.isfile('%s.tif' % filename): 170 | os.remove('%s.tif' % filename) 171 | except: 172 | pass 173 | 174 | if task.attempts < MAX_TASK_REPEAT: 175 | task.status = 'pending' 176 | task.save() 177 | rand = (task.attempts * 60) + random.randint(task.attempts * 60, task.attempts * 60 * 2) 178 | 179 | return ingestQueue.apply_async(args=[batch_id, item_id, task_id], countdown=rand) 180 | else: 181 | task.status = 'error' 182 | task.save() 183 | 184 | if task.increment_finished_item_tasks() >= task.item_tasks_count: 185 | finalizeItem(batch_id, item_id, task.item_tasks_count) 186 | 187 | return 188 | 189 | 190 | def finalizeItem(batch_id, item_id, item_tasks_count): 191 | item_tasks = [] 192 | 193 | for task_order in range(0, item_tasks_count): 194 | item_tasks.append(Task(batch_id, item_id, task_order)) 195 | 196 | # the task with highest id for the specific item has all item data 197 | last_task = item_tasks[-1] 198 | item_data = last_task.item_data 199 | item_data['timestamp'] = datetime.utcnow().isoformat("T") + "Z" 200 | 201 | if item_data.has_key('status') and item_data['status'] == 'deleted': 202 | whole_item_delete = True 203 | else: 204 | whole_item_delete = False 205 | 206 | try: 207 | old_item = Item(item_id) 208 | except: 209 | old_item = None 210 | 211 | if old_item: 212 | if not whole_item_delete: 213 | # check if there is any change on item 214 | if last_task.type == 'mod': 215 | modify_test = False 216 | 217 | for attribute in ['title', 'creator', 'source', 'institution', 'institution_link', 'license', 'description']: 218 | if item_data.get(attribute, '') != getattr(old_item, attribute, ''): 219 | modify_test = True 220 | break 221 | 222 | # without modification we can finish immediately 223 | if not modify_test: 224 | print "Item '%s' finalized - without modification" % item_id 225 | return 226 | 227 | item_data['image_meta'] = old_item.image_meta 228 | else: 229 | item_data['image_meta'] = {} 230 | 231 | error = False 232 | 233 | if not whole_item_delete: 234 | for task in item_tasks: 235 | if task.status == 'pending' or task.status == 'error': 236 | error = True 237 | # modification tasks never changes image_meta 238 | elif task.type == 'mod': 239 | pass 240 | elif task.status == 'deleted': 241 | # if the image is being really deleted not only being reshuffled 242 | if not task.url in item_data['url']: 243 | item_data['image_meta'].pop(task.url, None) 244 | elif task.status == 'ok': 245 | item_data['image_meta'][task.url] = task.image_meta 246 | 247 | if not error: 248 | if not (old_item and whole_item_delete): 249 | item = Item(item_id, item_data) 250 | ordered_image_meta = [] 251 | 252 | for url in item.url: 253 | tmp = item.image_meta[url] 254 | tmp['url'] = url 255 | ordered_image_meta.append(tmp) 256 | 257 | if CLOUDSEARCH_ITEM_DOMAIN is not None: 258 | try: 259 | cloudsearch = getCloudSearch(CLOUDSEARCH_ITEM_DOMAIN, 'document') 260 | 261 | if old_item and whole_item_delete: 262 | cloudsearch.delete(hashlib.sha512(item_id).hexdigest()[:128]) 263 | else: 264 | cloudsearch.add(hashlib.sha512(item_id).hexdigest()[:128], {'id': item.id, 'title': item.title, 'creator': item.creator, 'source': item.source, 'institution': item.institution, 'institution_link': item.institution_link, 'license': item.license, 'description': item.description, 'url': json.dumps(item.url), 'timestamp': item.timestamp, 'image_meta': json.dumps(ordered_image_meta)}) 265 | 266 | cloudsearch.commit() 267 | 268 | except: 269 | if last_task.attempts < MAX_TASK_REPEAT * 2: 270 | print '\nFailed Cloud Search attempt numb.: %s\nItem: %s\nError message:\n###\n%s###' % (last_task.attempts + 1, task.item_id, traceback.format_exc()) 271 | last_task.attempts += 1 272 | last_task.status = 'pending' 273 | last_task.type = 'cloud_search' 274 | last_task.save() 275 | rand = (last_task.attempts * 60) + random.randint(last_task.attempts * 60, last_task.attempts * 60 * 2) 276 | 277 | return ingestQueue.apply_async(args=[batch_id, item_id, last_task.task_id], countdown=rand) 278 | else: 279 | last_task.status = 'error' 280 | last_task.message = ERR_MESSAGE_CLOUDSEARCH 281 | last_task.save() 282 | 283 | if last_task.status == 'error': 284 | cleanErrItem(item_id, len(item_data['image_meta'])) 285 | print "Item '%s' failed" % item_id 286 | elif old_item and whole_item_delete: 287 | old_item.delete() 288 | print "Item '%s' deleted" % item_id 289 | else: 290 | item.save() 291 | print "Item '%s' finalized" % item_id 292 | 293 | else: 294 | cleanErrItem(item_id, len(item_data['image_meta'])) 295 | print "Item '%s' failed" % item_id 296 | 297 | return 298 | 299 | 300 | def cleanErrItem(item_id, count): 301 | try: 302 | bucket = getBucket() 303 | i = 0 304 | 305 | while count > i: 306 | if i == 0: 307 | filename = '%s.jp2' % item_id 308 | else: 309 | filename = '%s/%s.jp2' % (item_id, i) 310 | 311 | i += 1 312 | 313 | bucket.delete_key(S3_DEFAULT_FOLDER + filename) 314 | 315 | if count > 1: 316 | filename = '%s/' % item_id 317 | bucket.delete_key(S3_DEFAULT_FOLDER + filename) 318 | 319 | except: 320 | pass 321 | 322 | try: 323 | cloudsearch = getCloudSearch(CLOUDSEARCH_ITEM_DOMAIN, 'document') 324 | cloudsearch.delete(hashlib.sha512(item_id).hexdigest()[:128]) 325 | cloudsearch.commit() 326 | except: 327 | pass 328 | 329 | try: 330 | Item(item_id).delete() 331 | except: 332 | pass 333 | 334 | return 335 | -------------------------------------------------------------------------------- /embed/app/models.py: -------------------------------------------------------------------------------- 1 | """Module which defines data model""" 2 | 3 | import simplejson as json 4 | 5 | from exceptions import NoItemInDb, ErrorItemImport 6 | from db_wrapper import DatabaseWrapper 7 | 8 | db = DatabaseWrapper() 9 | 10 | 11 | class Item(): 12 | """Class which defines the Item model. 13 | 'id' - item ID which is unique in whole db 14 | 'data' - dictionary with Item's metadata 15 | """ 16 | 17 | def __init__(self, id, data=None): 18 | self.id = id 19 | self.title = '' 20 | self.creator = '' 21 | self.source = '' 22 | self.institution = '' 23 | self.institution_link = '' 24 | self.license = '' 25 | self.description = '' 26 | self.url = [] 27 | self.image_meta = {} 28 | self.timestamp = '' 29 | 30 | if data: 31 | try: 32 | data = json.loads(json.JSONEncoder().encode(data)) 33 | except: 34 | raise ErrorItemImport('There is an error in the item`s model representation %s' % data) 35 | else: 36 | data = db.get('item_id@%s' % id) 37 | 38 | if not data: 39 | raise NoItemInDb('No item with specified id stored in db') 40 | else: 41 | try: 42 | data = json.loads(data) 43 | except: 44 | raise ErrorItemImport('There is an error in the item`s model representation %s' % data) 45 | 46 | if data.has_key('url'): 47 | self.url = data['url'] 48 | 49 | if type(self.url) != list: 50 | raise ErrorItemImport('There is an error in the batch`s model representation %s' % data) 51 | 52 | for i,u in enumerate(self.url): 53 | self.url[i] = str(u) 54 | else: 55 | raise ErrorItemImport('The item doesn`t have all required params') 56 | 57 | if data.has_key('title'): 58 | self.title = data['title'] 59 | if data.has_key('creator'): 60 | self.creator = data['creator'] 61 | if data.has_key('source'): 62 | self.source = data['source'] 63 | if data.has_key('institution'): 64 | self.institution = data['institution'] 65 | if data.has_key('institution_link'): 66 | self.institution_link = data['institution_link'] 67 | if data.has_key('license'): 68 | self.license = data['license'] 69 | if data.has_key('description'): 70 | self.description = data['description'] 71 | if data.has_key('image_meta'): 72 | self.image_meta = data['image_meta'] 73 | if data.has_key('timestamp'): 74 | self.timestamp = data['timestamp'] 75 | 76 | def save(self): 77 | db.set('item_id@%s' % self.id, json.dumps({'url': self.url, 'title': self.title, 'creator': self.creator, 'source': self.source, 'institution': self.institution, 'institution_link': self.institution_link, 'license': self.license, 'description': self.description, 'image_meta': self.image_meta, 'timestamp': self.timestamp})) 78 | 79 | def delete(self): 80 | db.delete('item_id@%s' % self.id) 81 | 82 | 83 | class Task(): 84 | """Class which defines the Task model. 85 | 'batch_id' - ID of parent Batch 86 | 'item_id' - ID of processed Item 87 | 'task_id' - ID of Task, it is order of tasks for one Item 88 | 'data' - dictionary with Task's metadata 89 | """ 90 | 91 | def __init__(self, batch_id, item_id, task_id, data=None): 92 | self.task_id = task_id 93 | self.batch_id = batch_id 94 | self.item_id = item_id 95 | self.status = 'pending' 96 | self.url = '' 97 | self.url_order = 0 98 | self.image_meta = '' 99 | self.attempts = 0 100 | self.type = 'mod' 101 | self.item_data = {} 102 | self.item_tasks_count = 0 103 | self.message = 0 104 | 105 | safe = True 106 | 107 | if data is None: 108 | data = db.get('batch@id@%s@item@id%s@task@id@%s' % (self.batch_id, self.item_id, self.task_id)) 109 | 110 | if not data: 111 | raise NoItemInDb('No task with specified id stored in db') 112 | else: 113 | try: 114 | data = json.loads(data) 115 | safe = False 116 | 117 | except: 118 | raise ErrorItemImport('There is an error in the batch`s model representation %s' % data) 119 | 120 | if data.has_key('status'): 121 | self.status = data['status'] 122 | if data.has_key('url'): 123 | self.url = data['url'] 124 | if data.has_key('url_order'): 125 | self.url_order = data['url_order'] 126 | if data.has_key('image_meta'): 127 | self.image_meta = data['image_meta'] 128 | if data.has_key('attempts'): 129 | self.attempts = data['attempts'] 130 | if data.has_key('type'): 131 | self.type = data['type'] 132 | if data.has_key('item_data'): 133 | self.item_data = data['item_data'] 134 | if data.has_key('item_tasks_count'): 135 | self.item_tasks_count = data['item_tasks_count'] 136 | if data.has_key('message'): 137 | self.message = data['message'] 138 | 139 | if safe: 140 | self.save() 141 | 142 | def save(self): 143 | db.set('batch@id@%s@item@id%s@task@id@%s' % (self.batch_id, self.item_id, self.task_id), json.dumps({'status': self.status, 'url': self.url, 'url_order': self.url_order, 'image_meta': self.image_meta, 'attempts': self.attempts, 'type': self.type, 'item_data': self.item_data, 'item_tasks_count': self.item_tasks_count, 'message': self.message})) 144 | 145 | def increment_finished_item_tasks(self): 146 | if self.item_id != '': 147 | return db.incr('batch@id@%s@item@id%s' % (self.batch_id, self.item_id), 1) 148 | 149 | def delete(self): 150 | db.delete('batch@id@%s@item@id%s@task@id@%s' % (self.batch_id, self.item_id, self.task_id)) 151 | -------------------------------------------------------------------------------- /embed/app/static/css/fonts/Raleway-Bold.ttf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/klokantech/embedr/89311db64ae8ea0c08b39a7423530351763c6538/embed/app/static/css/fonts/Raleway-Bold.ttf -------------------------------------------------------------------------------- /embed/app/static/css/fonts/Raleway-ExtraBold.ttf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/klokantech/embedr/89311db64ae8ea0c08b39a7423530351763c6538/embed/app/static/css/fonts/Raleway-ExtraBold.ttf -------------------------------------------------------------------------------- /embed/app/static/css/fonts/Raleway-Regular.ttf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/klokantech/embedr/89311db64ae8ea0c08b39a7423530351763c6538/embed/app/static/css/fonts/Raleway-Regular.ttf -------------------------------------------------------------------------------- /embed/app/static/css/fonts/Ubuntu-B.ttf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/klokantech/embedr/89311db64ae8ea0c08b39a7423530351763c6538/embed/app/static/css/fonts/Ubuntu-B.ttf -------------------------------------------------------------------------------- /embed/app/static/css/fonts/Ubuntu-L.ttf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/klokantech/embedr/89311db64ae8ea0c08b39a7423530351763c6538/embed/app/static/css/fonts/Ubuntu-L.ttf -------------------------------------------------------------------------------- /embed/app/static/css/fonts/Ubuntu-R.ttf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/klokantech/embedr/89311db64ae8ea0c08b39a7423530351763c6538/embed/app/static/css/fonts/Ubuntu-R.ttf -------------------------------------------------------------------------------- /embed/app/static/css/viewer.css: -------------------------------------------------------------------------------- 1 | @font-face { 2 | font-family: Raleway; 3 | src: url(fonts/Raleway-Regular.ttf); 4 | font-weight: normal; } 5 | @font-face { 6 | font-family: Raleway; 7 | src: url(fonts/Raleway-Bold.ttf); 8 | font-weight: bold; } 9 | @font-face { 10 | font-family: Raleway; 11 | src: url(fonts/Raleway-ExtraBold.ttf); 12 | font-weight: 800; } 13 | @font-face { 14 | font-family: Ubuntu; 15 | src: url(fonts/Ubuntu-R.ttf); 16 | font-weight: normal; } 17 | @font-face { 18 | font-family: Ubuntu; 19 | src: url(fonts/Ubuntu-B.ttf); 20 | font-weight: bold; } 21 | @font-face { 22 | font-family: Ubuntu; 23 | src: url(fonts/Ubuntu-L.ttf); 24 | font-weight: lighter; } 25 | .button, .button__rounded { 26 | background-color: black; 27 | height: 40px; 28 | line-height: 40px; 29 | text-align: center; 30 | color: white; 31 | font-weight: bold; 32 | text-decoration: none; } 33 | .button a, .button__rounded a { 34 | color: white; 35 | font-weight: bold; } 36 | 37 | .button__rounded { 38 | display: inline-block; 39 | width: 40px; 40 | height: 40px; 41 | border-radius: 40px; 42 | margin-right: 10px; } 43 | 44 | .result .button__rounded { 45 | position: absolute; 46 | top: 5px; 47 | left: 10px; } 48 | 49 | .button__close { 50 | position: absolute; 51 | right: 5px; 52 | top: 5px; 53 | line-height: 0; 54 | cursor: pointer; } 55 | 56 | #detail__image { 57 | position: fixed; 58 | top: 41px; 59 | left: 0; 60 | right: 0; 61 | bottom: 0; } 62 | #detail__image iframe { 63 | width: 100%; 64 | height: 100%; } 65 | 66 | .embed__popup { 67 | position: absolute; 68 | left: 10px; 69 | top: 50px; 70 | width: 400px; 71 | padding: 14px 20px; 72 | background-color: white; 73 | z-index: 30; 74 | font-weight: normal; 75 | font-family: Ubuntu; 76 | font-size: 18px; 77 | line-height: 24px; } 78 | .embed__popup p { 79 | margin-top: 0; 80 | margin-bottom: 10px; } 81 | 82 | .viewer .embed__popup { 83 | left: 50px; 84 | top: 70px; } 85 | 86 | .embed__title { 87 | font-family: Raleway; 88 | font-weight: 800; 89 | font-size: 24px; 90 | margin-top: 10px; 91 | margin-bottom: 10px; } 92 | 93 | .embed__box { 94 | width: 100%; 95 | margin-bottom: 6px; } 96 | 97 | .embed__option { 98 | background-color: #f1f1f1; 99 | padding: 5px 10px; 100 | margin-bottom: 10px; } 101 | 102 | p.embed__resize { 103 | font-size: 14px; 104 | margin-top: 5px; 105 | margin-bottom: 5px; } 106 | p.embed__resize input { 107 | margin-left: 5px; 108 | margin-right: 5px; 109 | width: 60px; } 110 | 111 | .embed__subtitle { 112 | font-weight: bold; 113 | padding-top: 5px; } 114 | 115 | .embed__callout { 116 | line-height: 30px; } 117 | 118 | .embed__image { 119 | max-height: 250px; 120 | overflow: hidden; } 121 | 122 | .embed__icon { 123 | float: left; 124 | margin-right: 10px; } 125 | 126 | .embed__zoom-control span { 127 | font-size: 14px; 128 | margin-left: 5px; } 129 | 130 | #title { 131 | text-align: center; 132 | width: 100%; 133 | position: absolute; 134 | background-color: black; 135 | font-family: 'Ubuntu'; 136 | font-weight: 100; 137 | font-size: 14px; 138 | line-height: 20px; 139 | color: white; 140 | z-index: 10; 141 | bottom: 0px; 142 | padding: 5px 0 7px 0; } 143 | #title a { 144 | color: white; } 145 | #title img { 146 | vertical-align: text-bottom; } 147 | 148 | #map { 149 | width: 100%; 150 | height: 100%; 151 | position: absolute; } 152 | 153 | .viewer__toolbar { 154 | display: none; } 155 | 156 | #close { 157 | background-image: url(/static/img/close_dark.png); 158 | background-size: 14px 14px; 159 | height: 14px; 160 | width: 14px; 161 | display: block; 162 | float: right; 163 | margin-top: 4px; 164 | margin-right: 10px; } 165 | 166 | .viewer__toolbar { 167 | position: absolute; 168 | left: 45px; 169 | top: 10px; } 170 | 171 | .iframe__viewer__toolbar { 172 | position: absolute; 173 | left: 8px; 174 | top: 10px; } 175 | 176 | .osd-select-rectangle { 177 | border: 2px dashed black; } 178 | 179 | body { 180 | font-family: 'Raleway'; 181 | background-color: white; } 182 | 183 | a { 184 | text-decoration: underline; 185 | color: black; } 186 | 187 | /*# sourceMappingURL=viewer.css.map */ 188 | -------------------------------------------------------------------------------- /embed/app/static/css/viewer.css.map: -------------------------------------------------------------------------------- 1 | { 2 | "version": 3, 3 | "mappings": "AAAA,UAIC;EAHC,WAAW,EAAE,OAAO;EACpB,GAAG,EAAE,8BAA8B;EACnC,WAAW,EAAE,MAAM;AAErB,UAIC;EAHC,WAAW,EAAE,OAAO;EACpB,GAAG,EAAE,2BAA2B;EAChC,WAAW,EAAE,IAAI;AAEnB,UAIC;EAHC,WAAW,EAAE,OAAO;EACpB,GAAG,EAAE,gCAAgC;EACrC,WAAW,EAAE,GAAG;AAElB,UAIC;EAHC,WAAW,EAAE,MAAM;EACnB,GAAG,EAAE,uBAAuB;EAC5B,WAAW,EAAE,MAAM;AAErB,UAIC;EAHC,WAAW,EAAE,MAAM;EACnB,GAAG,EAAE,uBAAuB;EAC5B,WAAW,EAAE,IAAI;AAEnB,UAIC;EAHC,WAAW,EAAE,MAAM;EACnB,GAAG,EAAE,uBAAuB;EAC5B,WAAW,EAAE,OAAO;AC5BtB,yBAAQ;EACN,gBAAgB,EAAE,KAAK;EACvB,MAAM,EAAE,IAAI;EACZ,WAAW,EAAE,IAAI;EACjB,UAAU,EAAE,MAAM;EAClB,KAAK,EAAE,KAAK;EACZ,WAAW,EAAE,IAAI;EACjB,eAAe,EAAE,IAAI;EACrB,6BAAE;IACA,KAAK,EAAE,KAAK;IACZ,WAAW,EAAE,IAAI;;AAIrB,gBAAiB;EAEf,OAAO,EAAE,YAAY;EACrB,KAAK,EAAE,IAAI;EACX,MAAM,EAAE,IAAI;EACZ,aAAa,EAAE,IAAI;EACnB,YAAY,EAAE,IAAI;;AAGpB,wBAAyB;EACvB,QAAQ,EAAE,QAAQ;EAClB,GAAG,EAAE,GAAG;EACR,IAAI,EAAE,IAAI;;AAGZ,cAAe;EACb,QAAQ,EAAE,QAAQ;EAClB,KAAK,EAAE,GAAG;EACV,GAAG,EAAE,GAAG;EACR,WAAW,EAAE,CAAC;EACd,MAAM,EAAE,OAAO;;AClCjB,cAAe;EACb,QAAQ,EAAE,KAAK;EACf,GAAG,EAAE,IAAI;EACT,IAAI,EAAE,CAAC;EACP,KAAK,EAAE,CAAC;EACR,MAAM,EAAE,CAAC;EACT,qBAAO;IACL,KAAK,EAAE,IAAI;IACX,MAAM,EAAE,IAAI;;ACRhB,aAAc;EACZ,QAAQ,EAAE,QAAQ;EAClB,IAAI,EAAE,IAAI;EACV,GAAG,EAAE,IAAI;EACT,KAAK,EAAE,KAAK;EACZ,OAAO,EAAE,SAAS;EAClB,gBAAgB,EAAE,KAAK;EACvB,OAAO,EAAE,EAAE;EACX,WAAW,EAAE,MAAM;EACnB,WAAW,EAAE,MAAM;EACnB,SAAS,EAAE,IAAI;EACf,WAAW,EAAE,IAAI;EACjB,eAAE;IACA,UAAU,EAAE,CAAC;IACb,aAAa,EAAE,IAAI;;AAIvB,qBAAsB;EACpB,IAAI,EAAE,IAAI;EACV,GAAG,EAAE,IAAI;;AAGX,aAAc;EACZ,WAAW,EAAE,OAAO;EACpB,WAAW,EAAE,GAAG;EAChB,SAAS,EAAE,IAAI;EACf,UAAU,EAAE,IAAI;EAChB,aAAa,EAAE,IAAI;;AAGrB,WAAY;EACV,KAAK,EAAE,IAAI;EACX,aAAa,EAAE,GAAG;;AAGpB,cAAe;EACb,gBAAgB,EAAE,OAAO;EACzB,OAAO,EAAE,QAAQ;EACjB,aAAa,EAAE,IAAI;;AAGrB,eAAgB;EACd,SAAS,EAAE,IAAI;EACf,UAAU,EAAE,GAAG;EACf,aAAa,EAAE,GAAG;EAClB,qBAAM;IACJ,WAAW,EAAE,GAAG;IAChB,YAAY,EAAE,GAAG;IACjB,KAAK,EAAE,IAAI;;AAIf,gBAAiB;EACf,WAAW,EAAE,IAAI;EACjB,WAAW,EAAE,GAAG;;AAGlB,eAAgB;EACd,WAAW,EAAE,IAAI;;AAGnB,aAAc;EACZ,UAAU,EAAE,KAAK;EACjB,QAAQ,EAAE,MAAM;;AAGlB,YAAa;EACX,KAAK,EAAE,IAAI;EACX,YAAY,EAAE,IAAI;;AAGpB,yBAA0B;EACxB,SAAS,EAAE,IAAI;EACf,WAAW,EAAE,GAAG;;AC1ElB,MAAO;EACL,UAAU,EAAE,MAAM;EAClB,KAAK,EAAE,IAAI;EACX,QAAQ,EAAE,QAAQ;EAClB,gBAAgB,EAAE,KAAK;EACvB,WAAW,EAAE,QAAQ;EACrB,WAAW,EAAE,GAAG;EAChB,SAAS,EAAE,IAAI;EACf,WAAW,EAAE,IAAI;EACjB,KAAK,EAAE,KAAK;EACZ,OAAO,EAAE,EAAE;EACX,MAAM,EAAE,GAAG;EACX,OAAO,EAAE,WAAW;EACpB,QAAE;IACA,KAAK,EAAE,KAAK;EAEd,UAAI;IACF,cAAc,EAAE,WAAW;;AAI/B,IAAK;EACH,KAAK,EAAC,IAAI;EACV,MAAM,EAAC,IAAI;EACX,QAAQ,EAAC,QAAQ;;AAGnB,gBAAiB;EACf,OAAO,EAAE,IAAI;;AAGf,MAAO;EACL,gBAAgB,EAAE,+BAA+B;EACjD,eAAe,EAAE,SAAS;EAC1B,MAAM,EAAE,IAAI;EACZ,KAAK,EAAE,IAAI;EACX,OAAO,EAAE,KAAK;EACd,KAAK,EAAE,KAAK;EAEV,UAAG,EAAE,GAAG;EACR,YAAK,EAAE,IAAI;;AAIf,gBAAiB;EACf,QAAQ,EAAE,QAAQ;EAClB,IAAI,EAAE,IAAI;EACV,GAAG,EAAE,IAAI;;AAGX,wBAAyB;EACvB,QAAQ,EAAE,QAAQ;EAClB,IAAI,EAAE,GAAG;EACT,GAAG,EAAE,IAAI;;AAGX,qBAAsB;EACpB,MAAM,EAAE,gBAAgB;;ACnD1B,IAAK;EACH,WAAW,EAAE,SAAS;EACtB,gBAAgB,EAAE,KAAK;;AAGzB,CAAE;EACA,eAAe,EAAE,SAAS;EAC1B,KAAK,EAAE,KAAK", 4 | "sources": ["../../../../src/styles/modules/_fonts.scss","../../../../src/styles/modules/_button.scss","../../../../src/styles/modules/_detail.scss","../../../../src/styles/modules/_embed.scss","../../../../src/styles/modules/_viewer.scss","../../../../src/styles/viewer.scss"], 5 | "names": [], 6 | "file": "viewer.css" 7 | } 8 | -------------------------------------------------------------------------------- /embed/app/static/img/cc.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/klokantech/embedr/89311db64ae8ea0c08b39a7423530351763c6538/embed/app/static/img/cc.png -------------------------------------------------------------------------------- /embed/app/static/img/close.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/klokantech/embedr/89311db64ae8ea0c08b39a7423530351763c6538/embed/app/static/img/close.png -------------------------------------------------------------------------------- /embed/app/static/img/close_dark.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/klokantech/embedr/89311db64ae8ea0c08b39a7423530351763c6538/embed/app/static/img/close_dark.png -------------------------------------------------------------------------------- /embed/app/static/img/crop.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/klokantech/embedr/89311db64ae8ea0c08b39a7423530351763c6538/embed/app/static/img/crop.png -------------------------------------------------------------------------------- /embed/app/static/img/embed.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/klokantech/embedr/89311db64ae8ea0c08b39a7423530351763c6538/embed/app/static/img/embed.png -------------------------------------------------------------------------------- /embed/app/static/img/embed_small.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/klokantech/embedr/89311db64ae8ea0c08b39a7423530351763c6538/embed/app/static/img/embed_small.png -------------------------------------------------------------------------------- /embed/app/static/img/logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/klokantech/embedr/89311db64ae8ea0c08b39a7423530351763c6538/embed/app/static/img/logo.png -------------------------------------------------------------------------------- /embed/app/static/img/metadata.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/klokantech/embedr/89311db64ae8ea0c08b39a7423530351763c6538/embed/app/static/img/metadata.png -------------------------------------------------------------------------------- /embed/app/static/img/next_grouphover.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/klokantech/embedr/89311db64ae8ea0c08b39a7423530351763c6538/embed/app/static/img/next_grouphover.png -------------------------------------------------------------------------------- /embed/app/static/img/next_hover.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/klokantech/embedr/89311db64ae8ea0c08b39a7423530351763c6538/embed/app/static/img/next_hover.png -------------------------------------------------------------------------------- /embed/app/static/img/next_pressed.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/klokantech/embedr/89311db64ae8ea0c08b39a7423530351763c6538/embed/app/static/img/next_pressed.png -------------------------------------------------------------------------------- /embed/app/static/img/next_rest.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/klokantech/embedr/89311db64ae8ea0c08b39a7423530351763c6538/embed/app/static/img/next_rest.png -------------------------------------------------------------------------------- /embed/app/static/img/pd.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/klokantech/embedr/89311db64ae8ea0c08b39a7423530351763c6538/embed/app/static/img/pd.png -------------------------------------------------------------------------------- /embed/app/static/img/previous_grouphover.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/klokantech/embedr/89311db64ae8ea0c08b39a7423530351763c6538/embed/app/static/img/previous_grouphover.png -------------------------------------------------------------------------------- /embed/app/static/img/previous_hover.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/klokantech/embedr/89311db64ae8ea0c08b39a7423530351763c6538/embed/app/static/img/previous_hover.png -------------------------------------------------------------------------------- /embed/app/static/img/previous_pressed.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/klokantech/embedr/89311db64ae8ea0c08b39a7423530351763c6538/embed/app/static/img/previous_pressed.png -------------------------------------------------------------------------------- /embed/app/static/img/previous_rest.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/klokantech/embedr/89311db64ae8ea0c08b39a7423530351763c6538/embed/app/static/img/previous_rest.png -------------------------------------------------------------------------------- /embed/app/static/img/share_small.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/klokantech/embedr/89311db64ae8ea0c08b39a7423530351763c6538/embed/app/static/img/share_small.png -------------------------------------------------------------------------------- /embed/app/static/img/zoom-in.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/klokantech/embedr/89311db64ae8ea0c08b39a7423530351763c6538/embed/app/static/img/zoom-in.png -------------------------------------------------------------------------------- /embed/app/static/img/zoom-out.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/klokantech/embedr/89311db64ae8ea0c08b39a7423530351763c6538/embed/app/static/img/zoom-out.png -------------------------------------------------------------------------------- /embed/app/static/js/osdregionselect.js: -------------------------------------------------------------------------------- 1 | window.osdRegionRectTool=function(a){function b(){f(!0),k.addHandler("canvas-drag",c),k.addHandler("canvas-release",d),a.onModeEnter&&a.onModeEnter()}function c(b){if(m){var c=k.viewport.pointFromPixel(b.position);h(l,c),a.onDraw&&a.onDraw()}else m=!0,l=k.viewport.pointFromPixel(b.position),g(l),a.onDrawStart&&a.onDrawStart()}function d(b){m=!1,osdImageRect=k.viewport.viewportToImageRectangle(i),canvasRect={x:parseInt(osdImageRect.x,10),y:parseInt(osdImageRect.y,10),width:parseInt(osdImageRect.width,10),height:parseInt(osdImageRect.height,10)},a.onDrawFinish&&a.onDrawFinish(canvasRect),k.removeOverlay(j)}function e(b){f(!1),k.removeHandler("canvas-drag",c),k.removeHandler("canvas-release",d),a.onModeExit&&a.onModeExit()}function f(a){a?(k.panHorizontal=!1,k.panVertical=!1):(k.panHorizontal=!0,k.panVertical=!0)}function g(a){var b=a.x,c=a.y,d=0,e=0;i=new OpenSeadragon.Rect(b,c,d,e),j=document.createElement("div"),j.className="osd-select-rectangle",k.addOverlay({element:j,location:i})}function h(a,b){var c={x:Math.min(a.x,b.x),y:Math.min(a.y,b.y)},d={x:Math.max(a.x,b.x),y:Math.max(a.y,b.y)};i.x=c.x,i.y=c.y,i.width=d.x-c.x,i.height=d.y-c.y,k.updateOverlay(j,i)}var i,j,k=(a.osd,a.viewer),l=null,m=!1;return osdRegionRectTool={enterEditMode:b,exitEditMode:e},osdRegionRectTool}; -------------------------------------------------------------------------------- /embed/app/task_queue.py: -------------------------------------------------------------------------------- 1 | """Module which provides task queue for ingest""" 2 | 3 | from celery_factory import celery_factory 4 | 5 | 6 | task_queue = celery_factory() 7 | 8 | if __name__ == '__main__': 9 | task_queue.start() 10 | -------------------------------------------------------------------------------- /embed/app/templates/iframe_openseadragon_inline.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | {{ item.title }} 35 | 36 | 39 | 40 | 41 | 42 |
43 |
44 | 45 | 46 | 47 | 48 | 67 | 68 | 78 | 79 | 80 | -------------------------------------------------------------------------------- /embed/app/templates/index.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 |

embedhawk.klokantech.com

8 | 9 | Klokan Technologies GmbH 10 | 11 | 12 | 13 | -------------------------------------------------------------------------------- /embed/app/templates/oembed_xml.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | {{ data['version'] }} 4 | {{ data['type'] }} 5 | {{ data['title'] }} 6 | {{ data['html']|safe }} 7 | {{ data['author_name'] }} 8 | {{ data['author_url'] }} 9 | {{ data['provider_name'] }} 10 | {{ data['provider_url'] }} 11 | -------------------------------------------------------------------------------- /embed/app/views.py: -------------------------------------------------------------------------------- 1 | """Module which defines views - actions for url passed requests""" 2 | 3 | import sys 4 | import os 5 | import re 6 | from urlparse import urlparse 7 | import time 8 | import gzip 9 | import sqlite3 10 | import cgitb 11 | 12 | from flask import request, render_template, abort, url_for, g 13 | import simplejson as json 14 | from flask import current_app as app 15 | import bleach 16 | 17 | from iiif_manifest_factory import ManifestFactory 18 | from ingest import ingestQueue, ERR_MESSAGE_CLOUDSEARCH, ERR_MESSAGE_HTTP, ERR_MESSAGE_IMAGE, ERR_MESSAGE_S3, ERR_MESSAGE_OTHER, ERR_MESSAGE_NONE 19 | from models import Item, Task 20 | from exceptions import NoItemInDb, ErrorItemImport 21 | from helper import prepareTileSources 22 | 23 | 24 | # Tags which can be in Item description 25 | ALLOWED_TAGS = ['b', 'blockquote', 'code', 'em', 'i', 'li', 'ol', 'strong', 'ul'] 26 | 27 | # Regex for Item ID with order (of image) validation 28 | item_url_regular = re.compile(r""" 29 | ^/ 30 | (?P([-_.:~a-zA-Z0-9]){1,255}) 31 | /? 32 | (?P\d*) 33 | """, re.VERBOSE) 34 | 35 | # Regex for Item ID validation 36 | id_regular = re.compile(r""" 37 | ^([-_.:~a-zA-Z0-9]){1,255}$ 38 | """, re.VERBOSE) 39 | 40 | # Regex for general url validation 41 | url_regular = re.compile(ur'(?i)\b((?:https?://|www\d{0,3}[.]|[a-z0-9.\-]+[.][a-z]{2,4}/)(?:[^\s()<>]+|\(([^\s()<>]+|(\([^\s()<>]+\)))*\))+(?:\(([^\s()<>]+|(\([^\s()<>]+\)))*\)|[^\s`!()\[\]{};:\'".,<>?\xab\xbb\u201c\u201d\u2018\u2019]))') 42 | 43 | ERR_MESSAGE_OUTPUT = {ERR_MESSAGE_CLOUDSEARCH: 'Interaction with Cloud Search failed', ERR_MESSAGE_HTTP: 'Download failed', ERR_MESSAGE_IMAGE: 'Image processing failed', ERR_MESSAGE_S3: 'Interaction with S3 failed', ERR_MESSAGE_OTHER: 'Another error'} 44 | 45 | #@app.route('/') 46 | def index(): 47 | """View function for index page""" 48 | 49 | return render_template('index.html') 50 | 51 | html_escape_table = { 52 | "&": "&", 53 | '"': """, 54 | "'": "'", 55 | ">": ">", 56 | "<": "<", 57 | } 58 | 59 | def html_escape(text): 60 | """Produce entities within text.""" 61 | return "".join(html_escape_table.get(c,c) for c in text) 62 | 63 | 64 | #@app.route('/') 65 | #@app.route('//') 66 | def iFrame(item_id, order=None): 67 | """View function for iFrame. Response with html page for zooming on item. If item has more images, particular image can be requested by order. 68 | 'item_id' - ID of requested Item 69 | 'order' - order of requested image in Item 70 | """ 71 | 72 | if order is not None: 73 | try: 74 | order = int(order) 75 | 76 | if order < 0: 77 | return 'Wrong item sequence', 404 78 | except: 79 | return 'Wrong item sequence', 404 80 | else: 81 | order = -1 82 | 83 | try: 84 | item = Item(item_id) 85 | except NoItemInDb as err: 86 | return err.message, 404 87 | except ErrorItemImport as err: 88 | return err.message, 500 89 | 90 | if order >= len(item.url): 91 | return 'Wrong item sequence', 404 92 | 93 | tile_sources = [] 94 | 95 | if order == -1: 96 | count = 0 97 | 98 | for url in item.url: 99 | tile_sources.append(prepareTileSources(item, url, count)) 100 | count += 1 101 | 102 | order = 0 103 | else: 104 | url = item.url[order] 105 | tile_sources.append(prepareTileSources(item, url, order)) 106 | 107 | return render_template('iframe_openseadragon_inline.html', item = item, tile_sources = tile_sources, order = order) 108 | 109 | 110 | #@app.route('//manifest.json') 111 | def iiifMeta(item_id): 112 | """View function which returns IIIF manifest for particular Item 113 | 'item_id' - ID of requested Item 114 | """ 115 | 116 | try: 117 | item = Item(item_id) 118 | except NoItemInDb as err: 119 | return err.message, 404 120 | except ErrorItemImport as err: 121 | return err.message, 500 122 | 123 | fac = ManifestFactory() 124 | fac.set_base_metadata_uri(app.config['SERVER_NAME']) 125 | fac.set_base_metadata_dir(os.path.abspath(os.path.dirname(__file__))) 126 | fac.set_base_image_uri('http://%s' % app.config['IIIF_SERVER']) 127 | fac.set_iiif_image_info(2.0, 2) 128 | 129 | mf = fac.manifest(ident=url_for('iiifMeta', item_id=item_id, _external=True), label=item.title) 130 | mf.description = item.description 131 | mf.license = item.license 132 | 133 | mf.set_metadata({"label":"Author", "value":item.creator}) 134 | mf.set_metadata({"label":"Source", "value":item.source}) 135 | mf.set_metadata({"label":"Institution", "value":item.institution}) 136 | mf.set_metadata({"label":"Institution link", "value":item.institution_link}) 137 | 138 | seq = mf.sequence(ident='http://%s/sequence/s.json' % app.config['SERVER_NAME'], label='Item %s - sequence 1' % item_id) 139 | 140 | count = 0 141 | 142 | for url in item.url: 143 | if item.image_meta[url].has_key('width'): 144 | width = item.image_meta[url]['width'] 145 | else: 146 | width = 1 147 | 148 | if item.image_meta[url].has_key('height'): 149 | height = item.image_meta[url]['height'] 150 | else: 151 | height = 1 152 | 153 | cvs = seq.canvas(ident='http://%s/canvas/c%s.json' % (app.config['SERVER_NAME'], count), label='Item %s - image %s' % (item_id, count)) 154 | cvs.set_hw(height, width) 155 | 156 | anno = cvs.annotation() 157 | 158 | if count == 0: 159 | filename = item_id 160 | else: 161 | filename = '%s/%s' % (item_id, count) 162 | 163 | img = anno.image(ident='/%s/full/full/0/native.jpg' % filename) 164 | img.add_service(ident='http://%s/%s' % (app.config['IIIF_SERVER'], filename), context='http://iiif.io/api/image/2/context.json', profile='http://iiif.io/api/image/2/profiles/level2.json') 165 | 166 | img.width = width 167 | img.height = height 168 | 169 | count += 1 170 | 171 | return json.JSONEncoder().encode(mf.toJSON(top=True)), 200, {'Content-Type': 'application/json', 'Access-Control-Allow-Origin': '*'} 172 | 173 | 174 | #@app.route('/oembed', methods=['GET']) 175 | def oEmbed(): 176 | """View function for oembed which returns medatada about Item which can be used to embed this Item to client page. Url is required parameter. Format (json or xml), maxwidth and maxheight are optional.""" 177 | 178 | ### Parameters configuration ### 179 | url = request.args.get('url', None) 180 | 181 | if url is None: 182 | return 'No url parameter provided', 404 183 | 184 | format = request.args.get('format', None) 185 | 186 | if format is None: 187 | format = 'json' 188 | 189 | if format not in ('json', 'xml'): 190 | return 'The format parameter must be "json" or "xml" (or blank)', 501 191 | 192 | p_url = urlparse(url) 193 | 194 | if p_url.scheme != 'http': 195 | return 'The http scheme must be used', 404 196 | 197 | if p_url.netloc != app.config['SERVER_NAME']: 198 | return 'Only urls on the same server are allowed', 404 199 | 200 | test = item_url_regular.search(p_url.path) 201 | 202 | if test: 203 | item_id = test.group('item_id') 204 | order = test.group('order') 205 | 206 | if order == '': 207 | order = 0 208 | else: 209 | order = int(order) 210 | else: 211 | return 'Unsupported format of ID', 404 212 | 213 | ### Loading of Item from DB with testing ### 214 | try: 215 | item = Item(item_id) 216 | except NoItemInDb as err: 217 | return err.message, 404 218 | except ErrorItemImport as err: 219 | return err.message, 500 220 | 221 | if order >= len(item.url): 222 | return 'Wrong item sequence', 404 223 | 224 | ### Size of image configuration ### 225 | maxwidth = request.args.get('maxwidth', None) 226 | maxheight = request.args.get('maxheight', None) 227 | 228 | if maxwidth is not None: 229 | maxwidth = int(maxwidth) 230 | 231 | if maxheight is not None: 232 | maxheight = int(maxheight) 233 | 234 | # make a default max width of 560 235 | if maxwidth is None and maxheight is None: 236 | maxwidth = 560 237 | 238 | # Get the items width, set to -1 if not found 239 | if item.image_meta[item.url[order]].has_key('width'): 240 | width = int(item.image_meta[item.url[order]]['width']) 241 | else: 242 | width = -1 243 | 244 | # Get the items height, set to -1 if not found 245 | if item.image_meta[item.url[order]].has_key('height'): 246 | height = int(item.image_meta[item.url[order]]['height']) 247 | else: 248 | height = -1 249 | 250 | # Set ratio if width and height are found, otherwise assume 1:1 251 | if width != -1 and height != -1: 252 | ratio = float(width) / float(height) 253 | else: 254 | ratio = 1 255 | 256 | if width != -1: 257 | if maxwidth is not None and maxwidth < width: 258 | outwidth = maxwidth 259 | elif maxwidth > width: 260 | outwidth = maxwidth 261 | elif width > 560: 262 | outwidth = '560' 263 | else: 264 | outwidth = width 265 | else: 266 | if maxwidth is not None: 267 | outwidth = maxwidth 268 | else: 269 | outwidth = '560' 270 | 271 | if height != -1: 272 | if maxheight is not None and maxheight < height: 273 | outheight = maxheight 274 | elif maxheight > height: 275 | outheight = maxheight 276 | elif height > 560: 277 | outheight = '560' 278 | else: 279 | outheight = height 280 | else: 281 | if maxheight is not None: 282 | outheight = maxheight 283 | else: 284 | outheight = '560' 285 | 286 | size = '!%s,%s' % (outwidth, outheight) 287 | 288 | if ratio > (float(outwidth) / float(outheight)): 289 | width = outwidth 290 | height = float(outwidth) / ratio 291 | else: 292 | width = float(outheight) * ratio 293 | height = outheight 294 | 295 | ### Output finalization ### 296 | if order == 0: 297 | filename = item_id 298 | else: 299 | filename = '%s/%s' % (item_id, order) 300 | 301 | embed_code = '