├── .github └── workflows │ └── clojure.yml ├── .gitignore ├── LICENSE ├── README.md ├── project.clj ├── src └── sight │ ├── core.clj │ └── utils.clj └── test ├── resources └── dummy.pdf └── sight ├── core_test.clj └── integration_test.clj /.github/workflows/clojure.yml: -------------------------------------------------------------------------------- 1 | name: Clojure CI 2 | 3 | on: [push] 4 | 5 | jobs: 6 | build: 7 | 8 | runs-on: ubuntu-latest 9 | 10 | steps: 11 | - uses: actions/checkout@v2 12 | - name: Install dependencies 13 | run: lein deps 14 | - name: Run tests 15 | env: 16 | API_KEY: ${{ secrets.API_KEY }} 17 | run: lein cloverage --codecov 18 | - name: Upload Test Report 19 | run: bash <(curl -s https://codecov.io/bash) 20 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | /target 2 | /classes 3 | /checkouts 4 | profiles.clj 5 | pom.xml 6 | pom.xml.asc 7 | *.jar 8 | *.class 9 | /.lein-* 10 | /.nrepl-port 11 | .hgignore 12 | .hg/ 13 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | 2 | Apache License 3 | Version 2.0, January 2004 4 | http://www.apache.org/licenses/ 5 | 6 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 7 | 8 | 1. Definitions. 9 | 10 | "License" shall mean the terms and conditions for use, reproduction, 11 | and distribution as defined by Sections 1 through 9 of this document. 12 | 13 | "Licensor" shall mean the copyright owner or entity authorized by 14 | the copyright owner that is granting the License. 15 | 16 | "Legal Entity" shall mean the union of the acting entity and all 17 | other entities that control, are controlled by, or are under common 18 | control with that entity. For the purposes of this definition, 19 | "control" means (i) the power, direct or indirect, to cause the 20 | direction or management of such entity, whether by contract or 21 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 22 | outstanding shares, or (iii) beneficial ownership of such entity. 23 | 24 | "You" (or "Your") shall mean an individual or Legal Entity 25 | exercising permissions granted by this License. 26 | 27 | "Source" form shall mean the preferred form for making modifications, 28 | including but not limited to software source code, documentation 29 | source, and configuration files. 30 | 31 | "Object" form shall mean any form resulting from mechanical 32 | transformation or translation of a Source form, including but 33 | not limited to compiled object code, generated documentation, 34 | and conversions to other media types. 35 | 36 | "Work" shall mean the work of authorship, whether in Source or 37 | Object form, made available under the License, as indicated by a 38 | copyright notice that is included in or attached to the work 39 | (an example is provided in the Appendix below). 40 | 41 | "Derivative Works" shall mean any work, whether in Source or Object 42 | form, that is based on (or derived from) the Work and for which the 43 | editorial revisions, annotations, elaborations, or other modifications 44 | represent, as a whole, an original work of authorship. For the purposes 45 | of this License, Derivative Works shall not include works that remain 46 | separable from, or merely link (or bind by name) to the interfaces of, 47 | the Work and Derivative Works thereof. 48 | 49 | "Contribution" shall mean any work of authorship, including 50 | the original version of the Work and any modifications or additions 51 | to that Work or Derivative Works thereof, that is intentionally 52 | submitted to Licensor for inclusion in the Work by the copyright owner 53 | or by an individual or Legal Entity authorized to submit on behalf of 54 | the copyright owner. For the purposes of this definition, "submitted" 55 | means any form of electronic, verbal, or written communication sent 56 | to the Licensor or its representatives, including but not limited to 57 | communication on electronic mailing lists, source code control systems, 58 | and issue tracking systems that are managed by, or on behalf of, the 59 | Licensor for the purpose of discussing and improving the Work, but 60 | excluding communication that is conspicuously marked or otherwise 61 | designated in writing by the copyright owner as "Not a Contribution." 62 | 63 | "Contributor" shall mean Licensor and any individual or Legal Entity 64 | on behalf of whom a Contribution has been received by Licensor and 65 | subsequently incorporated within the Work. 66 | 67 | 2. Grant of Copyright License. Subject to the terms and conditions of 68 | this License, each Contributor hereby grants to You a perpetual, 69 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 70 | copyright license to reproduce, prepare Derivative Works of, 71 | publicly display, publicly perform, sublicense, and distribute the 72 | Work and such Derivative Works in Source or Object form. 73 | 74 | 3. Grant of Patent License. Subject to the terms and conditions of 75 | this License, each Contributor hereby grants to You a perpetual, 76 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 77 | (except as stated in this section) patent license to make, have made, 78 | use, offer to sell, sell, import, and otherwise transfer the Work, 79 | where such license applies only to those patent claims licensable 80 | by such Contributor that are necessarily infringed by their 81 | Contribution(s) alone or by combination of their Contribution(s) 82 | with the Work to which such Contribution(s) was submitted. If You 83 | institute patent litigation against any entity (including a 84 | cross-claim or counterclaim in a lawsuit) alleging that the Work 85 | or a Contribution incorporated within the Work constitutes direct 86 | or contributory patent infringement, then any patent licenses 87 | granted to You under this License for that Work shall terminate 88 | as of the date such litigation is filed. 89 | 90 | 4. Redistribution. You may reproduce and distribute copies of the 91 | Work or Derivative Works thereof in any medium, with or without 92 | modifications, and in Source or Object form, provided that You 93 | meet the following conditions: 94 | 95 | (a) You must give any other recipients of the Work or 96 | Derivative Works a copy of this License; and 97 | 98 | (b) You must cause any modified files to carry prominent notices 99 | stating that You changed the files; and 100 | 101 | (c) You must retain, in the Source form of any Derivative Works 102 | that You distribute, all copyright, patent, trademark, and 103 | attribution notices from the Source form of the Work, 104 | excluding those notices that do not pertain to any part of 105 | the Derivative Works; and 106 | 107 | (d) If the Work includes a "NOTICE" text file as part of its 108 | distribution, then any Derivative Works that You distribute must 109 | include a readable copy of the attribution notices contained 110 | within such NOTICE file, excluding those notices that do not 111 | pertain to any part of the Derivative Works, in at least one 112 | of the following places: within a NOTICE text file distributed 113 | as part of the Derivative Works; within the Source form or 114 | documentation, if provided along with the Derivative Works; or, 115 | within a display generated by the Derivative Works, if and 116 | wherever such third-party notices normally appear. The contents 117 | of the NOTICE file are for informational purposes only and 118 | do not modify the License. You may add Your own attribution 119 | notices within Derivative Works that You distribute, alongside 120 | or as an addendum to the NOTICE text from the Work, provided 121 | that such additional attribution notices cannot be construed 122 | as modifying the License. 123 | 124 | You may add Your own copyright statement to Your modifications and 125 | may provide additional or different license terms and conditions 126 | for use, reproduction, or distribution of Your modifications, or 127 | for any such Derivative Works as a whole, provided Your use, 128 | reproduction, and distribution of the Work otherwise complies with 129 | the conditions stated in this License. 130 | 131 | 5. Submission of Contributions. Unless You explicitly state otherwise, 132 | any Contribution intentionally submitted for inclusion in the Work 133 | by You to the Licensor shall be under the terms and conditions of 134 | this License, without any additional terms or conditions. 135 | Notwithstanding the above, nothing herein shall supersede or modify 136 | the terms of any separate license agreement you may have executed 137 | with Licensor regarding such Contributions. 138 | 139 | 6. Trademarks. This License does not grant permission to use the trade 140 | names, trademarks, service marks, or product names of the Licensor, 141 | except as required for reasonable and customary use in describing the 142 | origin of the Work and reproducing the content of the NOTICE file. 143 | 144 | 7. Disclaimer of Warranty. Unless required by applicable law or 145 | agreed to in writing, Licensor provides the Work (and each 146 | Contributor provides its Contributions) on an "AS IS" BASIS, 147 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 148 | implied, including, without limitation, any warranties or conditions 149 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 150 | PARTICULAR PURPOSE. You are solely responsible for determining the 151 | appropriateness of using or redistributing the Work and assume any 152 | risks associated with Your exercise of permissions under this License. 153 | 154 | 8. Limitation of Liability. In no event and under no legal theory, 155 | whether in tort (including negligence), contract, or otherwise, 156 | unless required by applicable law (such as deliberate and grossly 157 | negligent acts) or agreed to in writing, shall any Contributor be 158 | liable to You for damages, including any direct, indirect, special, 159 | incidental, or consequential damages of any character arising as a 160 | result of this License or out of the use or inability to use the 161 | Work (including but not limited to damages for loss of goodwill, 162 | work stoppage, computer failure or malfunction, or any and all 163 | other commercial damages or losses), even if such Contributor 164 | has been advised of the possibility of such damages. 165 | 166 | 9. Accepting Warranty or Additional Liability. While redistributing 167 | the Work or Derivative Works thereof, You may choose to offer, 168 | and charge a fee for, acceptance of support, warranty, indemnity, 169 | or other liability obligations and/or rights consistent with this 170 | License. However, in accepting such obligations, You may act only 171 | on Your own behalf and on Your sole responsibility, not on behalf 172 | of any other Contributor, and only if You agree to indemnify, 173 | defend, and hold each Contributor harmless for any liability 174 | incurred by, or claims asserted against, such Contributor by reason 175 | of your accepting any such warranty or additional liability. 176 | 177 | END OF TERMS AND CONDITIONS 178 | 179 | APPENDIX: How to apply the Apache License to your work. 180 | 181 | To apply the Apache License to your work, attach the following 182 | boilerplate notice, with the fields enclosed by brackets "[]" 183 | replaced with your own identifying information. (Don't include 184 | the brackets!) The text should be enclosed in the appropriate 185 | comment syntax for the file format. We also recommend that a 186 | file or class name and description of purpose be included on the 187 | same "printed page" as the copyright notice for easier 188 | identification within third-party archives. 189 | 190 | Copyright [yyyy] [name of copyright owner] 191 | 192 | Licensed under the Apache License, Version 2.0 (the "License"); 193 | you may not use this file except in compliance with the License. 194 | You may obtain a copy of the License at 195 | 196 | http://www.apache.org/licenses/LICENSE-2.0 197 | 198 | Unless required by applicable law or agreed to in writing, software 199 | distributed under the License is distributed on an "AS IS" BASIS, 200 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 201 | See the License for the specific language governing permissions and 202 | limitations under the License. 203 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | [![Clojars Project](https://img.shields.io/clojars/v/sight.svg)](https://clojars.org/sight) ![Clojure CI](https://github.com/ashwinbhaskar/sight-clojure/workflows/Clojure%20CI/badge.svg) 2 | 3 | This repository contains the official [Sight API](https://siftrics.com/) Clojure client. The Sight API is a text recognition service. 4 | 5 | # Quickstart 6 | 7 | 1. Add this project as a dependency. 8 | 9 | ### Leiningen/Boot: 10 | 11 | ``` 12 | [sight "1.1.0"] 13 | ``` 14 | 15 | ### Clojure CLI/deps.edn: 16 | 17 | ``` 18 | sight {:mvn/version "1.1.0"} 19 | ``` 20 | 21 | ### Gradle 22 | 23 | ``` 24 | compile 'sight:sight:1.1.0 25 | ``` 26 | 27 | ### Maven 28 | 29 | ``` 30 | 31 | sight 32 | sight 33 | 1.1.0 34 | 35 | ``` 36 | 37 | 2. Require or import the package. For example, add it to `:require`: 38 | 39 | ``` 40 | (ns my-namespace 41 | ... 42 | (:require [sight.core :as sight])) 43 | ``` 44 | 45 | 3. Grab an API key from the [Sight dashboard](https://siftrics.com/). 46 | 4. Create a client, passing your API key into the constructor, and recognize text: 47 | 48 | ``` 49 | (let [client (sight/->Client "xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx") 50 | files ["invoice.pdf" "receipt.png"]] 51 | (sight/recognize client files)) 52 | ``` 53 | 54 | The returned value of `(sight/recognize client files)` looks like this: 55 | 56 | ``` 57 | {:pages [{:error "", 58 | :file-index 0, 59 | :page-number 1, 60 | :number-of-pages-in-file 1, 61 | :recognized-text [{:top-left-y 193, 62 | :bottom-right-y 243, 63 | :bottom-left-x 152, 64 | :top-right-x 500, 65 | :bottom-left-y 248, 66 | :top-right-y 188, 67 | :top-left-x 151, 68 | :bottom-right-x 501, 69 | :confidence 0.10092532855610954, 70 | :text "Dummy PDF file"}]}]} 71 | ``` 72 | 73 | ## Word-Level Bounding Boxes 74 | 75 | `recognize` has an additional signature with a third parameter, `word-level-bounding-boxes`. If it's `true` then word-level bounding boxes are returned instead of sentence-level bounding boxes. E.g., 76 | 77 | ``` 78 | (sight/recognize client (list "invoice.pdf" "receipt.png") true) 79 | (sight/recognize client (list "invoice.pdf" "receipt.png") true) 80 | ``` 81 | 82 | ## Streaming Results as Pages are Processed 83 | 84 | 85 | When `(sight/recognize ...)` is called, it polls a URL and aggregates the results over time. Finally, when all results are collected, the function returns. 86 | 87 | It is possible to use results immediately as they come in, instead of waiting for all results to be collected. To do this, use the `recognize-stream` function. 88 | 89 | The `recognize-stream` uses [Adam Bard's Failjure library](https://github.com/adambard/failjure): Instead of throwing exceptions, `recognize-stream` returns a `failjure/failure`. 90 | 91 | ### Example 92 | 93 | Users must define `failure-func` and `success-func` in the following code snippet: 94 | 95 | ``` 96 | (:require [failjure.core :as f]) 97 | 98 | (defn process [pages] 99 | (->> pages 100 | (map (fn [p] 101 | (if (f/failed? p) 102 | (failure-func (f/message p)) 103 | (success-func p))))) 104 | 105 | (let [client (->Client "xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx")] 106 | (->> (recognize-stream client (list "receipt.pdf" "invoice.png")) 107 | (run! process))) 108 | ``` 109 | 110 | ## Official API Documentation 111 | 112 | Here is the [official documentation for the Sight API](https://siftrics.com/docs/sight.html). 113 | 114 | # Apache V2 License 115 | 116 | This code is licensed under Apache V2.0. The full text of the license can be found in the "LICENSE" file. 117 | 118 | # Lead Maintainer 119 | 120 | * [Ashwin Bhaskar](https://github.com/ashwinbhaskar) 121 | 122 | # Contributors 123 | 124 | * [Siftrics Founder](https://github.com/siftrics/) 125 | * [Ashwin Bhaskar](https://github.com/ashwinbhaskar) 126 | -------------------------------------------------------------------------------- /project.clj: -------------------------------------------------------------------------------- 1 | (defproject sight "1.1.0" 2 | :description "Official Clojure client for the Sight API, a text recognition service" 3 | :url "https://github.com/siftrics/sight-clojure" 4 | :license {:name "Apache-2.0" 5 | :url "https://www.apache.org/licenses/LICENSE-2.0.txt"} 6 | :dependencies [[org.clojure/clojure "1.10.0"] 7 | [org.clojure/core.async "1.0.567"] 8 | [org.clojure/data.json "1.0.0"] 9 | [clj-http "3.10.0"] 10 | [commons-codec/commons-codec "1.4"] 11 | [mock-clj "0.2.1"] 12 | [camel-snake-kebab "0.4.1"] 13 | [failjure "2.0.0"]] 14 | :cloverage {:fail-threshold 85} 15 | :plugins [[lein-cloverage "1.1.2"]] 16 | :deploy-repositories [["releases" :clojars] 17 | ["snapshots" :clojars]] 18 | :profiles {:dev {:resource-paths ["test/resources"]}} 19 | :repl-options {:init-ns sight.core}) 20 | -------------------------------------------------------------------------------- /src/sight/core.clj: -------------------------------------------------------------------------------- 1 | ;; Copyright © 2020 Siftrics 2 | ;; 3 | ;; Permission is hereby granted, free of charge, to any person obtaining a copy 4 | ;; of this software and associated documentation files (the "Software"), to deal 5 | ;; in the Software without restriction, including without limitation the rights 6 | ;; to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 7 | ;; copies of the Software, and to permit persons to whom the Software is 8 | ;; furnished to do so, subject to the following conditions: 9 | ;; 10 | ;; The above copyright notice and this permission notice shall be included in 11 | ;; all copies or substantial portions of the Software. 12 | ;; 13 | ;; THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 | ;; IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 | ;; FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 16 | ;; AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 17 | ;; LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 18 | ;; OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 19 | ;; THE SOFTWARE. 20 | 21 | (ns sight.core 22 | (:require [clojure.data.json :as json] 23 | [clojure.java.io] 24 | [clojure.string] 25 | [clj-http.client] 26 | [sight.utils :as u] 27 | [camel-snake-kebab.core :as csk] 28 | [failjure.core :as f])) 29 | 30 | (defrecord Client [api-key]) 31 | (defrecord Result [pages]) 32 | (defrecord RecognizedPage 33 | [error fileIndex pageNumber numberOfPagesInFile recognizedText]) 34 | (defrecord RecognizedText 35 | [text confidence 36 | topLeftX topLeftY topRightX topRightY 37 | bottomLeftX bottomLeftY bottomRightX bottomRightY]) 38 | (defrecord Payload [makeSentences files]) 39 | (defrecord FileEntry [mimeType base64File]) 40 | 41 | (defn- sight-get [{api-key :api-key} polling-url throw-exception?] 42 | (let [{:keys [status body]} (clj-http.client/get polling-url 43 | {:headers {"Authorization" (str "Basic " api-key)}})] 44 | (if (= status 200) 45 | (-> body 46 | (json/read-str :key-fn csk/->kebab-case-keyword) 47 | :pages) 48 | (if throw-exception? 49 | (throw (Exception. (str "Non-200 response: " status "\n" body))) 50 | (f/fail (str "Non-200 response: " status "\n" body)))))) 51 | 52 | (defn- sight-post [{api-key :api-key} payload throw-exception?] 53 | (let [{:keys [status body]} (clj-http.client/post 54 | "https://siftrics.com/api/sight/" 55 | {:headers {"Authorization" (str "Basic " api-key)} 56 | :body (json/write-str payload) 57 | :content-type :json 58 | :socket-timeout 10000 ;; in milliseconds 59 | :connection-timeout 10000 ;; in milliseconds 60 | :accept :json})] 61 | (if (= 200 status) 62 | (-> body 63 | (json/read-str :key-fn csk/->kebab-case-keyword)) 64 | (if throw-exception? 65 | (throw (Exception. (str "Non-200 response: " status "\n" body))) 66 | (f/fail (format "Non-200 response: status %d \n body: %s" status body)))))) 67 | 68 | (defn file-path->file-entry 69 | [file-path throw-exception?] 70 | (let [mime-type (u/file-path->mime-type file-path)] 71 | (if mime-type 72 | (->FileEntry mime-type (u/file-path->base64file file-path)) 73 | (if throw-exception? 74 | (throw (Exception. "invalid file extension; must be one of \".pdf\", \".bmp\", \".gif\", \".jpeg\", \".jpg\", or \".png\"")) 75 | (f/fail "invalid file extension; must be one of \".pdf\", \".bmp\", \".gif\", \".jpeg\", \".jpg\", or \".png\""))))) 76 | 77 | (defn file-paths->file-entries 78 | [file-paths throw-exception?] 79 | (f/attempt-all [result (map #(file-path->file-entry % throw-exception?) file-paths) 80 | _ (some #(if (f/failed? %) 81 | %) 82 | result)] 83 | result)) 84 | 85 | (defn make-payload 86 | [file-paths word-level-bounding-boxes throw-exception?] 87 | (f/if-let-ok? [file-entries (file-paths->file-entries file-paths throw-exception?)] 88 | (->Payload (not word-level-bounding-boxes) 89 | file-entries))) 90 | 91 | (defn mark-page-as-seen! 92 | [{:keys [error file-index page-number number-of-pages-in-file] :as page} file-index->seen-pages results] 93 | (assoc! results :pages (conj (:pages results) page)) 94 | (if (not (clojure.string/blank? error)) 95 | (let [new-arr (make-array Boolean/TYPE 1)] 96 | (aset new-arr 0 true) 97 | (aset file-index->seen-pages file-index new-arr)) 98 | (do 99 | (when (empty? (aget file-index->seen-pages file-index)) 100 | (aset file-index->seen-pages file-index (make-array Boolean/TYPE number-of-pages-in-file))) 101 | (aset file-index->seen-pages file-index (dec page-number) true)))) 102 | 103 | (defn mark-pages-as-seen! 104 | [pages file-index->seen-pages results] 105 | (doseq [page pages] 106 | (mark-page-as-seen! page file-index->seen-pages results))) 107 | 108 | (defn seen-all-pages? 109 | [file-index->pages] 110 | (every? #(and (seq %) 111 | (every? true? %)) 112 | file-index->pages)) 113 | 114 | (defn do-poll 115 | [client polling-url num-files stream?] 116 | (let [file-index->seen-pages (make-array Boolean/TYPE num-files 0) 117 | results (transient {:pages []}) 118 | failure-count (atom 0) 119 | fetch (fn [] 120 | (Thread/sleep 500) 121 | (sight-get client polling-url false))] 122 | (if stream? 123 | (->> (repeatedly fetch) 124 | (take-while (fn [pages] 125 | (if (f/failed? pages) 126 | (= 1 (swap! failure-count inc)) 127 | (if (seen-all-pages? file-index->seen-pages) 128 | false 129 | (do 130 | (mark-pages-as-seen! pages file-index->seen-pages results) 131 | true))))) 132 | (filter (comp not empty?))) 133 | (do 134 | (while (not (seen-all-pages? file-index->seen-pages)) 135 | (let [pages (sight-get client polling-url true)] 136 | (mark-pages-as-seen! pages file-index->seen-pages results)) 137 | (Thread/sleep 500)) 138 | (persistent! results))))) 139 | 140 | (defn recognize-payload 141 | [client {:keys [polling-url recognized-text]} num-files stream?] 142 | (if polling-url 143 | (do-poll client polling-url num-files stream?) 144 | (if stream? 145 | (lazy-seq [[{:error "" 146 | :file-index 0 147 | :page-number 1 148 | :number-of-pages-in-file 1 149 | :recognized-text recognized-text}]]) 150 | {:pages [{:error "" 151 | :file-index 0 152 | :page-number 1 153 | :number-of-pages-in-file 1 154 | :recognized-text recognized-text}]}))) 155 | 156 | (defn recognize 157 | "Recognize text in the given files" 158 | ([client file-paths] (recognize client file-paths {})) 159 | ([client file-paths {:keys [stream? word-level-bounding-boxes?] :as opts}] 160 | (let [payload (make-payload file-paths word-level-bounding-boxes? true) 161 | result (sight-post client payload true)] 162 | (recognize-payload 163 | client 164 | result 165 | (count file-paths) 166 | false)))) 167 | 168 | (defn recognize-stream 169 | "Recognize text in the given files" 170 | ([client file-paths] (recognize-stream client file-paths false)) 171 | ([client file-paths word-level-bounding-boxes?] 172 | (f/attempt-all [payload (make-payload file-paths word-level-bounding-boxes? false) 173 | result (sight-post client payload false)] 174 | (recognize-payload 175 | client 176 | result 177 | (count file-paths) 178 | true)))) 179 | -------------------------------------------------------------------------------- /src/sight/utils.clj: -------------------------------------------------------------------------------- 1 | (ns sight.utils 2 | (:require [clojure.string :as s] 3 | [clojure.string :as s]) 4 | (:import 5 | org.apache.commons.codec.binary.Base64 6 | (java.io ByteArrayOutputStream))) 7 | 8 | (def ^:private extension->mime-type 9 | {:pdf "application/pdf" 10 | :bmp "image/bmp" 11 | :gif "image/gif" 12 | :jpeg "image/jpeg" 13 | :jpg "image/jpg" 14 | :png "image/png"}) 15 | 16 | (defn slurp-bytes 17 | "Slurp the bytes from a slurpable thing. 18 | https://stackoverflow.com/questions/23018870/how-to-read-a-whole-binary-file-nippy-into-byte-array-in-clojure" 19 | [x] 20 | (with-open [out (ByteArrayOutputStream.)] 21 | (clojure.java.io/copy (clojure.java.io/input-stream x) out) 22 | (.toByteArray out))) 23 | 24 | (defn file-path->base64file 25 | "https://stackoverflow.com/questions/42523024/why-is-image-corrupted-when-converted-to-base64 26 | https://stackoverflow.com/questions/23018870/how-to-read-a-whole-binary-file-nippy-into-byte-array-in-clojure" 27 | [file-path] 28 | (-> file-path 29 | slurp-bytes 30 | Base64/encodeBase64 31 | String.)) 32 | 33 | (defn file-extension [file-name] 34 | (-> (s/split file-name #"\.") 35 | last 36 | keyword)) 37 | 38 | (defn file-path->mime-type 39 | [file-path] 40 | (-> file-path 41 | file-extension 42 | extension->mime-type)) 43 | 44 | -------------------------------------------------------------------------------- /test/resources/dummy.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/siftrics/sight-clojure/df23938ad566fb5d25f2bb4d4ab7bb791bb1096b/test/resources/dummy.pdf -------------------------------------------------------------------------------- /test/sight/core_test.clj: -------------------------------------------------------------------------------- 1 | (ns sight.core-test 2 | (:require [clojure.test :refer :all] 3 | [sight.core :as core] 4 | [mock-clj.core :as m] 5 | [clojure.data.json :as json] 6 | [sight.utils :as u] 7 | [camel-snake-kebab.core :as csk] 8 | [failjure.core :as f])) 9 | 10 | (deftest sight-api-200-response-test-one-shot 11 | (testing "Should return the result when http call gives 200 status code without polling-url" 12 | (let [client (core/->Client "12345678-1234-1234-1234-123456781234")] 13 | (m/with-mock [clj-http.client/post {:status 200 14 | :body (json/write-str {"RecognizedText" [{"Text" "Invoice" 15 | "Confidence" 0.22863210084975458 16 | "TopLeftX" 395 17 | "TopLeftY" 35 18 | "TopRightX" 449 19 | "TopRightY" 35 20 | "BottomLeftX" 395 21 | "BottomLeftY" 47 22 | "BottomRightX" 449 23 | "BottomRightY" 47 24 | }]})} 25 | u/file-path->base64file "YWxzZGtmanNhbGtkZmogYWxzZGtmamFzbGtkZmphc2xka2ZqYXNkbGtmaiBhbHNrZGZqbHNhZA=="] 26 | (let [result (core/recognize client 27 | (list "/Users/johndoe/Downloads/baz.jpg"))] 28 | (is (= {:pages [{:error "" 29 | :file-index 0 30 | :page-number 1 31 | :number-of-pages-in-file 1 32 | :recognized-text [{:top-left-y 35, 33 | :bottom-right-y 47, 34 | :bottom-left-x 395, 35 | :top-right-x 449, 36 | :bottom-left-y 47, 37 | :top-right-y 35, 38 | :top-left-x 395, 39 | :bottom-right-x 449, 40 | :confidence 0.22863210084975458, 41 | :text "Invoice"}]}]} 42 | result)) 43 | (is (= 1 44 | (m/call-count #'u/file-path->base64file))) 45 | (is (= ["/Users/johndoe/Downloads/baz.jpg"] 46 | (m/last-call #'u/file-path->base64file))) 47 | (is (= 1 48 | (m/call-count #'clj-http.client/post))) 49 | (is (= ["https://siftrics.com/api/sight/" {:headers {"Authorization" "Basic 12345678-1234-1234-1234-123456781234"} 50 | :body "{\"makeSentences\":true,\"files\":[{\"mimeType\":\"image\\/jpg\",\"base64File\":\"YWxzZGtmanNhbGtkZmogYWxzZGtmamFzbGtkZmphc2xka2ZqYXNkbGtmaiBhbHNrZGZqbHNhZA==\"}]}" 51 | :content-type :json 52 | :socket-timeout 10000 53 | :connection-timeout 10000 54 | :accept :json}] 55 | (m/last-call #'clj-http.client/post))))))) 56 | (testing "Should return the result when response is 200 and polling url is not null" 57 | (let [client (core/->Client "12345678-1234-1234-1234-123456781234")] 58 | (m/with-mock [clj-http.client/post {:status 200 59 | :body (json/write-str {"PollingURL" "https://siftrics.com/api/sight/12345678-1234-1234-1234-123456781234"})} 60 | clj-http.client/get {:status 200 61 | :body (json/write-str {"Pages" [{"Error" "", 62 | "FileIndex" 0, 63 | "PageNumber" 1, 64 | "NumberOfPagesInFile" 1, 65 | "RecognizedText" [{"Text" "Invoice" 66 | "Confidence" 0.22863210084975458 67 | "TopLeftX" 395 68 | "TopLeftY" 35 69 | "TopRightX" 449 70 | "TopRightY" 35 71 | "BottomLeftX" 395 72 | "BottomLeftY" 47 73 | "BottomRightX" 449 74 | "BottomRightY" 47 75 | }]}]})} 76 | u/file-path->base64file "YWxzZGtmanNhbGtkZmogYWxzZGtmamFzbGtkZmphc2xka2ZqYXNkbGtmaiBhbHNrZGZqbHNhZA=="] 77 | (let [result (core/recognize client 78 | (list "/Users/johndoe/Downloads/baz.jpg"))] 79 | (is (= {:pages [{:error "", 80 | :file-index 0, 81 | :page-number 1, 82 | :number-of-pages-in-file 1, 83 | :recognized-text [{:top-left-y 35, 84 | :bottom-right-y 47, 85 | :bottom-left-x 395, 86 | :top-right-x 449, 87 | :bottom-left-y 47, 88 | :top-right-y 35, 89 | :top-left-x 395, 90 | :bottom-right-x 449, 91 | :confidence 0.22863210084975458, 92 | :text "Invoice"}]}]} 93 | result)) 94 | (is (= 1 95 | (m/call-count #'u/file-path->base64file))) 96 | (is (= ["/Users/johndoe/Downloads/baz.jpg"] 97 | (m/last-call #'u/file-path->base64file))) 98 | (is (= 1 99 | (m/call-count #'clj-http.client/post))) 100 | (is (= ["https://siftrics.com/api/sight/" {:headers {"Authorization" "Basic 12345678-1234-1234-1234-123456781234"} 101 | :body "{\"makeSentences\":true,\"files\":[{\"mimeType\":\"image\\/jpg\",\"base64File\":\"YWxzZGtmanNhbGtkZmogYWxzZGtmamFzbGtkZmphc2xka2ZqYXNkbGtmaiBhbHNrZGZqbHNhZA==\"}]}" 102 | :content-type :json 103 | :socket-timeout 10000 104 | :connection-timeout 10000 105 | :accept :json}] 106 | (m/last-call #'clj-http.client/post))) 107 | (is (= 1 108 | (m/call-count #'clj-http.client/get))) 109 | (is (= ["https://siftrics.com/api/sight/12345678-1234-1234-1234-123456781234" 110 | {:headers {"Authorization" "Basic 12345678-1234-1234-1234-123456781234"}}] 111 | (m/last-call #'clj-http.client/get)))))))) 112 | 113 | (deftest sight-api-200-response-test-stream 114 | (testing "Should return the result when http call gives 200 status code without polling-url when streaming is true" 115 | (let [client (core/->Client "12345678-1234-1234-1234-123456781234")] 116 | (m/with-mock [clj-http.client/post {:status 200 117 | :body (json/write-str {"RecognizedText" [{"Text" "Invoice" 118 | "Confidence" 0.22863210084975458 119 | "TopLeftX" 395 120 | "TopLeftY" 35 121 | "TopRightX" 449 122 | "TopRightY" 35 123 | "BottomLeftX" 395 124 | "BottomLeftY" 47 125 | "BottomRightX" 449 126 | "BottomRightY" 47 127 | }]})} 128 | u/file-path->base64file "YWxzZGtmanNhbGtkZmogYWxzZGtmamFzbGtkZmphc2xka2ZqYXNkbGtmaiBhbHNrZGZqbHNhZA=="] 129 | (let [result (core/recognize-stream client 130 | (list "/Users/johndoe/Downloads/baz.jpg"))] 131 | (is (= [[{:error "" 132 | :file-index 0 133 | :page-number 1 134 | :number-of-pages-in-file 1 135 | :recognized-text [{:top-left-y 35, 136 | :bottom-right-y 47, 137 | :bottom-left-x 395, 138 | :top-right-x 449, 139 | :bottom-left-y 47, 140 | :top-right-y 35, 141 | :top-left-x 395, 142 | :bottom-right-x 449, 143 | :confidence 0.22863210084975458, 144 | :text "Invoice"}]}]] 145 | result)) 146 | (is (= 1 147 | (m/call-count #'u/file-path->base64file))) 148 | (is (= ["/Users/johndoe/Downloads/baz.jpg"] 149 | (m/last-call #'u/file-path->base64file))) 150 | (is (= 1 151 | (m/call-count #'clj-http.client/post))) 152 | (is (= ["https://siftrics.com/api/sight/" {:headers {"Authorization" "Basic 12345678-1234-1234-1234-123456781234"} 153 | :body "{\"makeSentences\":true,\"files\":[{\"mimeType\":\"image\\/jpg\",\"base64File\":\"YWxzZGtmanNhbGtkZmogYWxzZGtmamFzbGtkZmphc2xka2ZqYXNkbGtmaiBhbHNrZGZqbHNhZA==\"}]}" 154 | :content-type :json 155 | :socket-timeout 10000 156 | :connection-timeout 10000 157 | :accept :json}] 158 | (m/last-call #'clj-http.client/post))))))) 159 | (testing "Should return the result when response is 200 and polling url is not null and streaming is true" 160 | (let [client (core/->Client "12345678-1234-1234-1234-123456781234") 161 | polling-url-call-counter (atom 0)] 162 | (m/with-mock [clj-http.client/post {:status 200 163 | :body (json/write-str {"PollingURL" "https://siftrics.com/api/sight/12345678-1234-1234-1234-123456781234"})} 164 | clj-http.client/get (fn [& _] 165 | (if (= 1 166 | (swap! polling-url-call-counter inc)) 167 | {:status 200 168 | :body (json/write-str {"Pages" [{"Error" "", 169 | "FileIndex" 0, 170 | "PageNumber" 1, 171 | "NumberOfPagesInFile" 2, 172 | "RecognizedText" [{"Text" "Invoice" 173 | "Confidence" 0.22863210084975458 174 | "TopLeftX" 395 175 | "TopLeftY" 35 176 | "TopRightX" 449 177 | "TopRightY" 35 178 | "BottomLeftX" 395 179 | "BottomLeftY" 47 180 | "BottomRightX" 449 181 | "BottomRightY" 47 182 | }]}]})} 183 | {:status 200 184 | :body (json/write-str {"Pages" [{"Error" "", 185 | "FileIndex" 0, 186 | "PageNumber" 2, 187 | "NumberOfPagesInFile" 2, 188 | "RecognizedText" [{"Text" "Hi, Hello" 189 | "Confidence" 0.22863210084975458 190 | "TopLeftX" 395 191 | "TopLeftY" 35 192 | "TopRightX" 449 193 | "TopRightY" 35 194 | "BottomLeftX" 395 195 | "BottomLeftY" 47 196 | "BottomRightX" 449 197 | "BottomRightY" 47 198 | }]}]})})) 199 | u/file-path->base64file "YWxzZGtmanNhbGtkZmogYWxzZGtmamFzbGtkZmphc2xka2ZqYXNkbGtmaiBhbHNrZGZqbHNhZA=="] 200 | (let [result (->> (core/recognize-stream client 201 | (list "/Users/johndoe/Downloads/baz.jpg")) 202 | (mapcat identity))] 203 | (is (= [{:error "", 204 | :file-index 0, 205 | :page-number 1, 206 | :number-of-pages-in-file 2, 207 | :recognized-text [{:top-left-y 35, 208 | :bottom-right-y 47, 209 | :bottom-left-x 395, 210 | :top-right-x 449, 211 | :bottom-left-y 47, 212 | :top-right-y 35, 213 | :top-left-x 395, 214 | :bottom-right-x 449, 215 | :confidence 0.22863210084975458, 216 | :text "Invoice"}]} 217 | {:error "", 218 | :file-index 0, 219 | :page-number 2, 220 | :number-of-pages-in-file 2, 221 | :recognized-text [{:top-left-y 35, 222 | :bottom-right-y 47, 223 | :bottom-left-x 395, 224 | :top-right-x 449, 225 | :bottom-left-y 47, 226 | :top-right-y 35, 227 | :top-left-x 395, 228 | :bottom-right-x 449, 229 | :confidence 0.22863210084975458, 230 | :text "Hi, Hello"}]}] 231 | result)) 232 | (is (= 1 233 | (m/call-count #'u/file-path->base64file))) 234 | (is (= ["/Users/johndoe/Downloads/baz.jpg"] 235 | (m/last-call #'u/file-path->base64file))) 236 | (is (= 1 237 | (m/call-count #'clj-http.client/post))) 238 | (is (= ["https://siftrics.com/api/sight/" {:headers {"Authorization" "Basic 12345678-1234-1234-1234-123456781234"} 239 | :body "{\"makeSentences\":true,\"files\":[{\"mimeType\":\"image\\/jpg\",\"base64File\":\"YWxzZGtmanNhbGtkZmogYWxzZGtmamFzbGtkZmphc2xka2ZqYXNkbGtmaiBhbHNrZGZqbHNhZA==\"}]}" 240 | :content-type :json 241 | :socket-timeout 10000 242 | :connection-timeout 10000 243 | :accept :json}] 244 | (m/last-call #'clj-http.client/post))) 245 | (is (= 3 246 | (m/call-count #'clj-http.client/get))) 247 | (is (= ["https://siftrics.com/api/sight/12345678-1234-1234-1234-123456781234" 248 | {:headers {"Authorization" "Basic 12345678-1234-1234-1234-123456781234"}}] 249 | (m/last-call #'clj-http.client/get)))))))) 250 | 251 | 252 | 253 | 254 | (deftest payload-test 255 | (testing "Payload when word bounding boxes is false" 256 | (let [client (core/->Client "12345678-1234-1234-1234-123456781234")] 257 | (m/with-mock [u/file-path->base64file "YWxzZGtmanNhbGtkZmogYWxzZGtmamFzbGtkZmphc2xka2ZqYXNkbGtmaiBhbHNrZGZqbHNhZA==" 258 | clj-http.client/post {:status 200 259 | :body (-> {:recognized-text [{:top-left-y 35, 260 | :bottom-right-y 47, 261 | :bottom-left-x 395, 262 | :top-right-x 449, 263 | :bottom-left-y 47, 264 | :top-right-y 35, 265 | :top-left-x 395, 266 | :bottom-right-x 449, 267 | :confidence 0.22863210084975458, 268 | :text "Invoice"}]} 269 | (json/write-str :key-fn csk/->PascalCaseString))}] 270 | (is (= {:pages [{:error "", 271 | :file-index 0, 272 | :page-number 1, 273 | :number-of-pages-in-file 1, 274 | :recognized-text [{:top-left-y 35, 275 | :bottom-right-y 47, 276 | :bottom-left-x 395, 277 | :top-right-x 449, 278 | :bottom-left-y 47, 279 | :top-right-y 35, 280 | :top-left-x 395, 281 | :bottom-right-x 449, 282 | :confidence 0.22863210084975458, 283 | :text "Invoice"}]}]} 284 | (core/recognize client (list "/Users/johndoe/Downloads/baz.jpg")))) 285 | (is (= 1 286 | (m/call-count #'clj-http.client/post))) 287 | (is (= ["https://siftrics.com/api/sight/" {:headers {"Authorization" "Basic 12345678-1234-1234-1234-123456781234"} 288 | :body (json/write-str (core/make-payload (list "/Users/johndoe/Downloads/baz.jpg") 289 | false 290 | true)) 291 | :content-type :json 292 | :socket-timeout 10000 ;; in milliseconds 293 | :connection-timeout 10000 ;; in milliseconds 294 | :accept :json}] 295 | (m/last-call #'clj-http.client/post)))))) 296 | (testing "Payload when word bounding boxes is true" 297 | (let [client (core/->Client "12345678-1234-1234-1234-123456781234")] 298 | (m/with-mock [u/file-path->base64file "YWxzZGtmanNhbGtkZmogYWxzZGtmamFzbGtkZmphc2xka2ZqYXNkbGtmaiBhbHNrZGZqbHNhZA==" 299 | clj-http.client/post {:status 200 300 | :body (-> {:recognized-text [{:top-left-y 35, 301 | :bottom-right-y 47, 302 | :bottom-left-x 395, 303 | :top-right-x 449, 304 | :bottom-left-y 47, 305 | :top-right-y 35, 306 | :top-left-x 395, 307 | :bottom-right-x 449, 308 | :confidence 0.22863210084975458, 309 | :text "Invoice"}]} 310 | (json/write-str :key-fn csk/->PascalCaseString))}] 311 | (is (= {:pages [{:error "", 312 | :file-index 0, 313 | :page-number 1, 314 | :number-of-pages-in-file 1, 315 | :recognized-text [{:top-left-y 35, 316 | :bottom-right-y 47, 317 | :bottom-left-x 395, 318 | :top-right-x 449, 319 | :bottom-left-y 47, 320 | :top-right-y 35, 321 | :top-left-x 395, 322 | :bottom-right-x 449, 323 | :confidence 0.22863210084975458, 324 | :text "Invoice"}]}]} 325 | (core/recognize client (list "/Users/johndoe/Downloads/baz.jpg") {:word-level-bounding-boxes? true}))) 326 | (is (= 1 327 | (m/call-count #'clj-http.client/post))) 328 | (is (= ["https://siftrics.com/api/sight/" {:headers {"Authorization" "Basic 12345678-1234-1234-1234-123456781234"} 329 | :body (json/write-str (core/make-payload (list "/Users/johndoe/Downloads/baz.jpg") 330 | true 331 | true)) 332 | :content-type :json 333 | :socket-timeout 10000 ;; in milliseconds 334 | :connection-timeout 10000 ;; in milliseconds 335 | :accept :json}] 336 | (m/last-call #'clj-http.client/post))))))) 337 | 338 | (deftest unsupported-file-extension-test 339 | (testing "Should throw an exception when the given file-paths have an unsupported extension with one-shot" 340 | (let [client (core/->Client "12345678-1234-1234-1234-123456781234")] 341 | (m/with-mock [clj-http.client/post {:status 200 342 | :body {:message "success"}}] 343 | (try 344 | (core/recognize client 345 | (list "/Users/johndoe/Downloads/baz.qux" "/Users/johndoe/Downloads/bax.jpg")) 346 | (is false "should not reach this line") 347 | (catch Exception e 348 | (is (= "invalid file extension; must be one of \".pdf\", \".bmp\", \".gif\", \".jpeg\", \".jpg\", or \".png\"" 349 | (.getMessage e))) 350 | (is (not (m/called? #'clj-http.client/post)))))) 351 | (m/with-mock [clj-http.client/post {:status 200 352 | :body {:message "success"}}] 353 | (try 354 | (core/recognize client 355 | (list "/Users/johndoe/Downloads/baz.mp4" "/Users/johndoe/Downloads/bax.mp3")) 356 | (is false "should not reach this line") 357 | (catch Exception e 358 | (is (= "invalid file extension; must be one of \".pdf\", \".bmp\", \".gif\", \".jpeg\", \".jpg\", or \".png\"" 359 | (.getMessage e))) 360 | (is (not (m/called? #'clj-http.client/post)))))))) 361 | (testing "Should throw an exception when the given file-paths have an unsupported extension with streaming" 362 | (let [client (core/->Client "12345678-1234-1234-1234-123456781234")] 363 | (m/with-mock [clj-http.client/post {:status 200 364 | :body {:message "success"}}] 365 | (is (= "invalid file extension; must be one of \".pdf\", \".bmp\", \".gif\", \".jpeg\", \".jpg\", or \".png\"" 366 | (-> (core/recognize-stream client 367 | (list "/Users/johndoe/Downloads/baz.qux" "/Users/johndoe/Downloads/bax.jpg")) 368 | f/message))) 369 | (is (not (m/called? #'clj-http.client/post)))) 370 | (m/with-mock [clj-http.client/post {:status 200 371 | :body {:message "success"}}] 372 | (is (= "invalid file extension; must be one of \".pdf\", \".bmp\", \".gif\", \".jpeg\", \".jpg\", or \".png\"" 373 | (-> (core/recognize-stream client 374 | (list "/Users/johndoe/Downloads/baz.mp4" "/Users/johndoe/Downloads/bax.mp3")) 375 | f/message))) 376 | (is (not (m/called? #'clj-http.client/post))))))) 377 | 378 | (deftest sight-api-non-200-response-streaming-test 379 | (testing "Should get a failure when GET PollingUrl gives a non 200 response with streaming true" 380 | (let [client (core/->Client "12345678-1234-1234-1234-123456781234") 381 | polling-url-call-counter (atom 0)] 382 | (m/with-mock [clj-http.client/post {:status 200 383 | :body (json/write-str {"PollingURL" "https://siftrics.com/api/sight/12345678-1234-1234-1234-123456781234"})} 384 | clj-http.client/get (fn [& _] 385 | (if (= 1 386 | (swap! polling-url-call-counter inc)) 387 | {:status 200 388 | :body (json/write-str {"Pages" [{"Error" "", 389 | "FileIndex" 0, 390 | "PageNumber" 1, 391 | "NumberOfPagesInFile" 2, 392 | "RecognizedText" [{"Text" "Invoice" 393 | "Confidence" 0.22863210084975458 394 | "TopLeftX" 395 395 | "TopLeftY" 35 396 | "TopRightX" 449 397 | "TopRightY" 35 398 | "BottomLeftX" 395 399 | "BottomLeftY" 47 400 | "BottomRightX" 449 401 | "BottomRightY" 47 402 | }]}]})} 403 | {:status 500 404 | :body "Internal Server Error"})) 405 | u/file-path->base64file "YWxzZGtmanNhbGtkZmogYWxzZGtmamFzbGtkZmphc2xka2ZqYXNkbGtmaiBhbHNrZGZqbHNhZA=="] 406 | (let [result (core/recognize-stream client 407 | (list "/Users/johndoe/Downloads/baz.jpg"))] 408 | (is (= [{:error "", 409 | :file-index 0, 410 | :page-number 1, 411 | :number-of-pages-in-file 2, 412 | :recognized-text [{:top-left-y 35, 413 | :bottom-right-y 47, 414 | :bottom-left-x 395, 415 | :top-right-x 449, 416 | :bottom-left-y 47, 417 | :top-right-y 35, 418 | :top-left-x 395, 419 | :bottom-right-x 449, 420 | :confidence 0.22863210084975458, 421 | :text "Invoice"}]}] 422 | (first result))) 423 | (is (= "Non-200 response: 500\nInternal Server Error" 424 | (-> result 425 | second 426 | f/message)))))))) 427 | 428 | (deftest sight-api-non-200-response-one-shot-test 429 | (testing "Should throw an exception when the response status code is non 200" 430 | (let [client (core/->Client "12345678-1234-1234-1234-123456781234")] 431 | (m/with-mock [clj-http.client/post {:status 401 432 | :body {:message "you are not authorized to use this api"}} 433 | u/file-path->base64file "YWxzZGtmanNhbGtkZmogYWxzZGtmamFzbGtkZmphc2xka2ZqYXNkbGtmaiBhbHNrZGZqbHNhZA=="] 434 | (try 435 | (core/recognize client 436 | (list "/Users/johndoe/Downloads/baz.jpg" "/Users/johndoe/Downloads/bax.jpg") 437 | false) 438 | (is false "should not reach this line") 439 | (catch Exception e 440 | (is (= "Non-200 response: 401\n{:message \"you are not authorized to use this api\"}" 441 | (.getMessage e))) 442 | (is (= 1 443 | (m/call-count #'clj-http.client/post))) 444 | (is (= ["https://siftrics.com/api/sight/" {:accept :json 445 | :body "{\"makeSentences\":true,\"files\":[{\"mimeType\":\"image\\/jpg\",\"base64File\":\"YWxzZGtmanNhbGtkZmogYWxzZGtmamFzbGtkZmphc2xka2ZqYXNkbGtmaiBhbHNrZGZqbHNhZA==\"},{\"mimeType\":\"image\\/jpg\",\"base64File\":\"YWxzZGtmanNhbGtkZmogYWxzZGtmamFzbGtkZmphc2xka2ZqYXNkbGtmaiBhbHNrZGZqbHNhZA==\"}]}" 446 | :connection-timeout 10000 447 | :content-type :json 448 | :headers {"Authorization" "Basic 12345678-1234-1234-1234-123456781234"} 449 | :socket-timeout 10000}] 450 | (m/last-call #'clj-http.client/post)))))) 451 | (m/with-mock [clj-http.client/post {:status 500 452 | :body {:message "Internal server error"}} 453 | u/file-path->base64file "YWxzZGtmanNhbGtkZmogYWxzZGtmamFzbGtkZmphc2xka2ZqYXNkbGtmaiBhbHNrZGZqbHNhZA=="] 454 | (try 455 | (core/recognize client 456 | (list "/Users/johndoe/Downloads/baz.jpg" "/Users/johndoe/Downloads/bax.jpg")) 457 | (is false "should not reach this line") 458 | (catch Exception e 459 | (is (= "Non-200 response: 500\n{:message \"Internal server error\"}" 460 | (.getMessage e))) 461 | (is (= 1 462 | (m/call-count #'clj-http.client/post))) 463 | (is (= ["https://siftrics.com/api/sight/" {:accept :json 464 | :body "{\"makeSentences\":true,\"files\":[{\"mimeType\":\"image\\/jpg\",\"base64File\":\"YWxzZGtmanNhbGtkZmogYWxzZGtmamFzbGtkZmphc2xka2ZqYXNkbGtmaiBhbHNrZGZqbHNhZA==\"},{\"mimeType\":\"image\\/jpg\",\"base64File\":\"YWxzZGtmanNhbGtkZmogYWxzZGtmamFzbGtkZmphc2xka2ZqYXNkbGtmaiBhbHNrZGZqbHNhZA==\"}]}" 465 | :connection-timeout 10000 466 | :content-type :json 467 | :headers {"Authorization" "Basic 12345678-1234-1234-1234-123456781234"} 468 | :socket-timeout 10000}] 469 | (m/last-call #'clj-http.client/post))))))))) 470 | -------------------------------------------------------------------------------- /test/sight/integration_test.clj: -------------------------------------------------------------------------------- 1 | (ns sight.integration-test 2 | (:require [clojure.test :refer :all] 3 | [sight.core :as core] 4 | [clojure.java.io :as io])) 5 | 6 | (deftest test-sanity 7 | (testing "Should recognize all the words with one shot and streaming" 8 | (let [client (core/->Client (System/getenv "API_KEY")) 9 | resource (io/resource "dummy.pdf") 10 | files (list (-> resource 11 | .getPath))] 12 | (is (= "Dummy PDF file" 13 | (-> (core/recognize client files) 14 | :pages 15 | first 16 | :recognized-text 17 | first 18 | :text))) 19 | (is (= "Dummy PDF file" 20 | (->> (core/recognize-stream client files) 21 | (mapcat identity) 22 | first 23 | :recognized-text 24 | first 25 | :text)))))) 26 | --------------------------------------------------------------------------------