├── .dockerignore ├── .formatter.exs ├── .github ├── dependabot.yml └── workflows │ ├── ci.yml │ └── fly.yml ├── .gitignore ├── Dockerfile ├── LICENSE ├── README.md ├── _comparison ├── .gitignore ├── README.md ├── annotations │ └── captions_val2014.json ├── coco_dataset │ ├── 10707.jpg │ ├── 117014.jpg │ ├── 154830.jpg │ ├── 168797.jpg │ ├── 169250.jpg │ ├── 179187.jpg │ ├── 185546.jpg │ ├── 192554.jpg │ ├── 193021.jpg │ ├── 200058.jpg │ ├── 222343.jpg │ ├── 235642.jpg │ ├── 249295.jpg │ ├── 256470.jpg │ ├── 26518.jpg │ ├── 274947.jpg │ ├── 306437.jpg │ ├── 322762.jpg │ ├── 323507.jpg │ ├── 335099.jpg │ ├── 33650.jpg │ ├── 338847.jpg │ ├── 342958.jpg │ ├── 345125.jpg │ ├── 34657.jpg │ ├── 354572.jpg │ ├── 356828.jpg │ ├── 35807.jpg │ ├── 368242.jpg │ ├── 391255.jpg │ ├── 404642.jpg │ ├── 4069.jpg │ ├── 406932.jpg │ ├── 407646.jpg │ ├── 424293.jpg │ ├── 425361.jpg │ ├── 432146.jpg │ ├── 488573.jpg │ ├── 499989.jpg │ ├── 515904.jpg │ ├── 519094.jpg │ ├── 527040.jpg │ ├── 540775.jpg │ ├── 547617.jpg │ ├── 552775.jpg │ ├── 64425.jpg │ ├── 75076.jpg │ ├── 87956.jpg │ ├── 90366.jpg │ ├── 93437.jpg │ ├── blip-image-captioning-base_results.csv │ ├── blip-image-captioning-large_results.csv │ ├── captions.csv │ └── resnet-50_results.csv ├── coco_download.ipynb ├── manage_models.exs ├── metrics.ipynb ├── models │ └── README.md ├── requirements.txt └── run.exs ├── assets ├── css │ └── app.css ├── js │ ├── app.js │ └── micro.js ├── package-lock.json ├── package.json ├── pnpm-lock.yaml ├── tailwind.config.js └── vendor │ └── topbar.js ├── config ├── config.exs ├── dev.exs ├── prod.exs ├── runtime.exs └── test.exs ├── coveralls.json ├── deployment.md ├── fly.toml ├── hnswlib ├── README.md └── hnwslib.exs ├── lib ├── app.ex ├── app │ ├── application.ex │ ├── hnswlib_index.ex │ ├── image.ex │ ├── knn_index.ex │ ├── models.ex │ ├── release.ex │ └── repo.ex ├── app_web.ex └── app_web │ ├── components │ ├── core_components.ex │ ├── layouts.ex │ ├── layouts │ │ ├── app.html.heex │ │ └── root.html.heex │ └── spinner.ex │ ├── endpoint.ex │ ├── live │ ├── page_live.ex │ └── page_live.html.heex │ ├── router.ex │ └── telemetry.ex ├── mix.exs ├── mix.lock ├── priv ├── repo │ ├── migrations │ │ ├── .formatter.exs │ │ ├── 20231204092441_create_images.exs │ │ ├── 20240122151818_add_idx.exs │ │ └── 20240123153049_create_table_hnswlib_index.exs │ └── seeds.exs └── static │ ├── audio │ └── itwillallbeok.mp3 │ ├── favicon.ico │ ├── images │ ├── 8mb_image.jpeg │ ├── corrupted.jpg │ ├── empty.jpg │ ├── phoenix.jpg │ ├── phoenix.png │ ├── phoenix.xyz │ ├── spinner.svg │ ├── test.png │ ├── test2.png │ ├── test_404.webp │ └── test_integrity.png │ ├── robots.txt │ └── uploads │ ├── indexes_empty.bin │ ├── indexes_gen_test_1.bin │ ├── indexes_gen_test_2.bin │ └── indexes_gen_test_3.bin ├── rel ├── env.bat.eex ├── env.sh.eex ├── overlays │ └── bin │ │ ├── migrate │ │ ├── migrate.bat │ │ ├── server │ │ └── server.bat ├── remote.vm.args.eex └── vm.args.eex └── test ├── app └── image_test.exs ├── app_web └── live │ └── page_live_test.exs ├── support ├── conn_case.ex ├── data_case.ex ├── supervisor_support.ex └── upload_support.ex └── test_helper.exs /.dockerignore: -------------------------------------------------------------------------------- 1 | # This file excludes paths from the Docker build context. 2 | # 3 | # By default, Docker's build context includes all files (and folders) in the 4 | # current directory. Even if a file isn't copied into the container it is still sent to 5 | # the Docker daemon. 6 | # 7 | # There are multiple reasons to exclude files from the build context: 8 | # 9 | # 1. Prevent nested folders from being copied into the container (ex: exclude 10 | # /assets/node_modules when copying /assets) 11 | # 2. Reduce the size of the build context and improve build time (ex. /build, /deps, /doc) 12 | # 3. Avoid sending files containing sensitive information 13 | # 14 | # More information on using .dockerignore is available here: 15 | # https://docs.docker.com/engine/reference/builder/#dockerignore-file 16 | 17 | .dockerignore 18 | 19 | # Ignore git, but keep git HEAD and refs to access current commit hash if needed: 20 | # 21 | # $ cat .git/HEAD | awk '{print ".git/"$2}' | xargs cat 22 | # d0b8727759e1e0e7aa3d41707d12376e373d5ecc 23 | .git 24 | !.git/HEAD 25 | !.git/refs 26 | 27 | # Common development/test artifacts 28 | /cover/ 29 | /doc/ 30 | /test/ 31 | /tmp/ 32 | .elixir_ls 33 | 34 | # Mix artifacts 35 | /_build/ 36 | /deps/ 37 | *.ez 38 | 39 | # Generated on crash by the VM 40 | erl_crash.dump 41 | 42 | # Static artifacts - These should be fetched and built inside the Docker image 43 | /assets/node_modules/ 44 | /priv/static/assets/ 45 | /priv/static/cache_manifest.json 46 | -------------------------------------------------------------------------------- /.formatter.exs: -------------------------------------------------------------------------------- 1 | [ 2 | import_deps: [:phoenix], 3 | plugins: [Phoenix.LiveView.HTMLFormatter], 4 | inputs: ["*.{heex,ex,exs}", "{config,lib,test,_comparison}/**/*.{heex,ex,exs}"] 5 | ] 6 | -------------------------------------------------------------------------------- /.github/dependabot.yml: -------------------------------------------------------------------------------- 1 | version: 2 2 | updates: 3 | - package-ecosystem: mix 4 | directory: "/" 5 | schedule: 6 | interval: monthly 7 | time: "07:00" 8 | timezone: Europe/London 9 | -------------------------------------------------------------------------------- /.github/workflows/ci.yml: -------------------------------------------------------------------------------- 1 | name: Elixir CI 2 | 3 | on: 4 | push: 5 | branches: [ main ] 6 | pull_request: 7 | branches: [ main ] 8 | 9 | jobs: 10 | build: 11 | name: Build and test 12 | runs-on: ubuntu-latest 13 | services: 14 | postgres: 15 | image: postgres:12 16 | ports: ['5432:5432'] 17 | env: 18 | POSTGRES_PASSWORD: postgres 19 | options: >- 20 | --health-cmd pg_isready 21 | --health-interval 10s 22 | --health-timeout 5s 23 | --health-retries 5 24 | 25 | steps: 26 | - uses: actions/checkout@v2 27 | - name: Set up Elixir 28 | uses: erlef/setup-beam@v1 29 | with: 30 | elixir-version: '1.17.3' # Define the elixir version [required] 31 | otp-version: '27.1.2' # Define the OTP version [required] 32 | 33 | - name: Installing ffmpeg 34 | uses: FedericoCarboni/setup-ffmpeg@v3 35 | 36 | # Needed because of `ubuntu-latest` migration removed `libmagic-dev`. 37 | # https://www.yellowduck.be/posts/github-actions-will-update-ubuntu-latest-to-ubuntu-24-04?t 38 | - name: Install libmagic-dev 39 | run: sudo apt-get update && sudo apt-get install -y libmagic-dev 40 | 41 | - name: Restore dependencies cache 42 | uses: actions/cache@v4 43 | with: 44 | path: deps 45 | key: ${{ runner.os }}-mix-${{ hashFiles('**/mix.lock') }} 46 | restore-keys: ${{ runner.os }}-mix- 47 | 48 | - name: Install dependencies 49 | run: mix deps.get 50 | 51 | - name: Run Tests 52 | run: mix coveralls.json 53 | env: 54 | MIX_ENV: test 55 | 56 | - name: Upload coverage to Codecov 57 | uses: codecov/codecov-action@v1 58 | with: 59 | token: ${{ secrets.CODECOV_TOKEN }} 60 | -------------------------------------------------------------------------------- /.github/workflows/fly.yml: -------------------------------------------------------------------------------- 1 | # Continuous Deployment to Fly.io 2 | # https://fly.io/docs/app-guides/continuous-deployment-with-github-actions/ 3 | #name: Fly Deploy 4 | #on: 5 | # push: 6 | # branches: 7 | # - main 8 | 9 | #jobs: 10 | # deploy: 11 | # name: Deploy app 12 | # runs-on: ubuntu-latest 13 | # steps: 14 | # - uses: actions/checkout@v3 15 | # - uses: superfly/flyctl-actions/setup-flyctl@master 16 | # 17 | # # Runs the flyctl deploy command 18 | # - run: flyctl deploy --remote-only 19 | # env: 20 | # FLY_API_TOKEN: ${{ secrets.FLY_API_TOKEN }} 21 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # The directory Mix will write compiled artifacts to. 2 | /_build/ 3 | 4 | # If you run "mix test --cover", coverage assets end up here. 5 | /cover/ 6 | 7 | # The directory Mix downloads your dependencies sources to. 8 | /deps/ 9 | 10 | # Where 3rd-party dependencies like ExDoc output generated docs. 11 | /doc/ 12 | 13 | # Ignore .fetch files in case you like to edit your project deps locally. 14 | /.fetch 15 | 16 | # If the VM crashes, it generates a dump, let's ignore it too. 17 | erl_crash.dump 18 | 19 | # Also ignore archive artifacts (built via "mix archive.build"). 20 | *.ez 21 | 22 | # Ignore DB dumps 23 | *.db 24 | 25 | # Temporary files, for example, from tests. 26 | /tmp/ 27 | 28 | # Ignore package tarball (built via "mix hex.build"). 29 | app-*.tar 30 | 31 | # Ignore assets that are produced by build tools. 32 | /priv/static/assets/ 33 | 34 | # Ignore digested assets cache. 35 | /priv/static/cache_manifest.json 36 | 37 | # In case you use Node.js/npm, you want to ignore these. 38 | npm-debug.log 39 | /assets/node_modules/ 40 | 41 | # VSCode 42 | .vscode 43 | 44 | # Bumblebee model directory 45 | .bumblebee/* 46 | .elixir_ls 47 | 48 | # KNN index direcotry 49 | priv/static/uploads/indexes.bin 50 | 51 | -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | # Find eligible builder and runner images on Docker Hub. We use Ubuntu/Debian 2 | # instead of Alpine to avoid DNS resolution issues in production. 3 | # 4 | # https://hub.docker.com/r/hexpm/elixir/tags?page=1&name=ubuntu 5 | # https://hub.docker.com/_/ubuntu?tab=tags 6 | # 7 | # This file is based on these images: 8 | # 9 | # - https://hub.docker.com/r/hexpm/elixir/tags - for the build image 10 | # - https://hub.docker.com/_/debian?tab=tags&page=1&name=bullseye-20231009-slim - for the release image 11 | # - https://pkgs.org/ - resource for finding needed packages 12 | # - Ex: hexpm/elixir:1.15.7-erlang-26.0.2-debian-bullseye-20231009-slim 13 | # 14 | ARG ELIXIR_VERSION=1.16.2 15 | ARG OTP_VERSION=25.3.2.10 16 | ARG DEBIAN_VERSION=bullseye-20240130 17 | 18 | ARG BUILDER_IMAGE="hexpm/elixir:${ELIXIR_VERSION}-erlang-${OTP_VERSION}-debian-${DEBIAN_VERSION}" 19 | ARG RUNNER_IMAGE="debian:${DEBIAN_VERSION}" 20 | 21 | FROM ${BUILDER_IMAGE} as builder 22 | 23 | # install build dependencies (and curl for EXLA) 24 | RUN apt-get update -y && apt-get install -y build-essential git curl -y libmagic-dev && \ 25 | curl -sL https://deb.nodesource.com/setup_18.x | bash - && \ 26 | apt-get install -y nodejs && \ 27 | apt-get clean && rm -f /var/lib/apt/lists/*_* && \ 28 | node --version && \ 29 | npm --version 30 | 31 | RUN npm install -g pnpm 32 | 33 | # prepare build dir 34 | WORKDIR /app 35 | 36 | # install hex + rebar 37 | RUN mix local.hex --force && \ 38 | mix local.rebar --force 39 | 40 | # set build ENV 41 | ENV MIX_ENV="prod" 42 | 43 | # install mix dependencies 44 | COPY mix.exs mix.lock ./ 45 | RUN mix deps.get --only $MIX_ENV 46 | RUN mkdir config 47 | 48 | # copy compile-time config files before we compile dependencies 49 | # to ensure any relevant config change will trigger the dependencies 50 | # to be re-compiled. 51 | COPY config/config.exs config/${MIX_ENV}.exs config/ 52 | RUN mix deps.compile 53 | 54 | COPY priv priv 55 | 56 | COPY lib lib 57 | 58 | COPY assets assets 59 | 60 | # Install dependencies for assets folder 61 | # RUN cd assets && pnpm install 62 | RUN pnpm install --prefix assets 63 | 64 | # compile assets 65 | RUN mix assets.deploy 66 | 67 | # Compile the release 68 | RUN mix compile 69 | 70 | # Changes to config/runtime.exs don't require recompiling the code 71 | COPY config/runtime.exs config/ 72 | 73 | COPY rel rel 74 | RUN mix release 75 | 76 | # start a new build stage so that the final image will only contain 77 | # the compiled release and other runtime necessities 78 | FROM ${RUNNER_IMAGE} 79 | 80 | RUN apt-get update -y && \ 81 | apt-get install -y libstdc++6 openssl libncurses5 locales ca-certificates libmagic-dev\ 82 | && apt-get clean && rm -f /var/lib/apt/lists/*_* 83 | 84 | # Set the locale 85 | RUN sed -i '/en_US.UTF-8/s/^# //g' /etc/locale.gen && locale-gen 86 | 87 | ENV LANG en_US.UTF-8 88 | ENV LANGUAGE en_US:en 89 | ENV LC_ALL en_US.UTF-8 90 | 91 | WORKDIR "/app" 92 | RUN chown nobody /app 93 | 94 | # set runner ENV 95 | ENV MIX_ENV="prod" 96 | 97 | # Only copy the final release from the build stage 98 | COPY --from=builder --chown=nobody:root /app/_build/${MIX_ENV}/rel/app /app 99 | 100 | USER nobody 101 | 102 | # If using an environment that doesn't automatically reap zombie processes, it is 103 | # advised to add an init process such as tini via `apt-get install` 104 | # above and adding an entrypoint. See https://github.com/krallin/tini for details 105 | # ENTRYPOINT ["/tini", "--"] 106 | 107 | # Set the runtime ENV 108 | ENV ECTO_IPV6="true" 109 | ENV ERL_AFLAGS="-proto_dist inet6_tcp" 110 | 111 | CMD ["/app/bin/server"] 112 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | GNU GENERAL PUBLIC LICENSE 2 | Version 2, June 1991 3 | 4 | Copyright (C) 1989, 1991 Free Software Foundation, Inc., 5 | 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 6 | Everyone is permitted to copy and distribute verbatim copies 7 | of this license document, but changing it is not allowed. 8 | 9 | Preamble 10 | 11 | The licenses for most software are designed to take away your 12 | freedom to share and change it. By contrast, the GNU General Public 13 | License is intended to guarantee your freedom to share and change free 14 | software--to make sure the software is free for all its users. This 15 | General Public License applies to most of the Free Software 16 | Foundation's software and to any other program whose authors commit to 17 | using it. (Some other Free Software Foundation software is covered by 18 | the GNU Lesser General Public License instead.) You can apply it to 19 | your programs, too. 20 | 21 | When we speak of free software, we are referring to freedom, not 22 | price. Our General Public Licenses are designed to make sure that you 23 | have the freedom to distribute copies of free software (and charge for 24 | this service if you wish), that you receive source code or can get it 25 | if you want it, that you can change the software or use pieces of it 26 | in new free programs; and that you know you can do these things. 27 | 28 | To protect your rights, we need to make restrictions that forbid 29 | anyone to deny you these rights or to ask you to surrender the rights. 30 | These restrictions translate to certain responsibilities for you if you 31 | distribute copies of the software, or if you modify it. 32 | 33 | For example, if you distribute copies of such a program, whether 34 | gratis or for a fee, you must give the recipients all the rights that 35 | you have. You must make sure that they, too, receive or can get the 36 | source code. And you must show them these terms so they know their 37 | rights. 38 | 39 | We protect your rights with two steps: (1) copyright the software, and 40 | (2) offer you this license which gives you legal permission to copy, 41 | distribute and/or modify the software. 42 | 43 | Also, for each author's protection and ours, we want to make certain 44 | that everyone understands that there is no warranty for this free 45 | software. If the software is modified by someone else and passed on, we 46 | want its recipients to know that what they have is not the original, so 47 | that any problems introduced by others will not reflect on the original 48 | authors' reputations. 49 | 50 | Finally, any free program is threatened constantly by software 51 | patents. We wish to avoid the danger that redistributors of a free 52 | program will individually obtain patent licenses, in effect making the 53 | program proprietary. To prevent this, we have made it clear that any 54 | patent must be licensed for everyone's free use or not licensed at all. 55 | 56 | The precise terms and conditions for copying, distribution and 57 | modification follow. 58 | 59 | GNU GENERAL PUBLIC LICENSE 60 | TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION 61 | 62 | 0. This License applies to any program or other work which contains 63 | a notice placed by the copyright holder saying it may be distributed 64 | under the terms of this General Public License. The "Program", below, 65 | refers to any such program or work, and a "work based on the Program" 66 | means either the Program or any derivative work under copyright law: 67 | that is to say, a work containing the Program or a portion of it, 68 | either verbatim or with modifications and/or translated into another 69 | language. (Hereinafter, translation is included without limitation in 70 | the term "modification".) Each licensee is addressed as "you". 71 | 72 | Activities other than copying, distribution and modification are not 73 | covered by this License; they are outside its scope. The act of 74 | running the Program is not restricted, and the output from the Program 75 | is covered only if its contents constitute a work based on the 76 | Program (independent of having been made by running the Program). 77 | Whether that is true depends on what the Program does. 78 | 79 | 1. You may copy and distribute verbatim copies of the Program's 80 | source code as you receive it, in any medium, provided that you 81 | conspicuously and appropriately publish on each copy an appropriate 82 | copyright notice and disclaimer of warranty; keep intact all the 83 | notices that refer to this License and to the absence of any warranty; 84 | and give any other recipients of the Program a copy of this License 85 | along with the Program. 86 | 87 | You may charge a fee for the physical act of transferring a copy, and 88 | you may at your option offer warranty protection in exchange for a fee. 89 | 90 | 2. You may modify your copy or copies of the Program or any portion 91 | of it, thus forming a work based on the Program, and copy and 92 | distribute such modifications or work under the terms of Section 1 93 | above, provided that you also meet all of these conditions: 94 | 95 | a) You must cause the modified files to carry prominent notices 96 | stating that you changed the files and the date of any change. 97 | 98 | b) You must cause any work that you distribute or publish, that in 99 | whole or in part contains or is derived from the Program or any 100 | part thereof, to be licensed as a whole at no charge to all third 101 | parties under the terms of this License. 102 | 103 | c) If the modified program normally reads commands interactively 104 | when run, you must cause it, when started running for such 105 | interactive use in the most ordinary way, to print or display an 106 | announcement including an appropriate copyright notice and a 107 | notice that there is no warranty (or else, saying that you provide 108 | a warranty) and that users may redistribute the program under 109 | these conditions, and telling the user how to view a copy of this 110 | License. (Exception: if the Program itself is interactive but 111 | does not normally print such an announcement, your work based on 112 | the Program is not required to print an announcement.) 113 | 114 | These requirements apply to the modified work as a whole. If 115 | identifiable sections of that work are not derived from the Program, 116 | and can be reasonably considered independent and separate works in 117 | themselves, then this License, and its terms, do not apply to those 118 | sections when you distribute them as separate works. But when you 119 | distribute the same sections as part of a whole which is a work based 120 | on the Program, the distribution of the whole must be on the terms of 121 | this License, whose permissions for other licensees extend to the 122 | entire whole, and thus to each and every part regardless of who wrote it. 123 | 124 | Thus, it is not the intent of this section to claim rights or contest 125 | your rights to work written entirely by you; rather, the intent is to 126 | exercise the right to control the distribution of derivative or 127 | collective works based on the Program. 128 | 129 | In addition, mere aggregation of another work not based on the Program 130 | with the Program (or with a work based on the Program) on a volume of 131 | a storage or distribution medium does not bring the other work under 132 | the scope of this License. 133 | 134 | 3. You may copy and distribute the Program (or a work based on it, 135 | under Section 2) in object code or executable form under the terms of 136 | Sections 1 and 2 above provided that you also do one of the following: 137 | 138 | a) Accompany it with the complete corresponding machine-readable 139 | source code, which must be distributed under the terms of Sections 140 | 1 and 2 above on a medium customarily used for software interchange; or, 141 | 142 | b) Accompany it with a written offer, valid for at least three 143 | years, to give any third party, for a charge no more than your 144 | cost of physically performing source distribution, a complete 145 | machine-readable copy of the corresponding source code, to be 146 | distributed under the terms of Sections 1 and 2 above on a medium 147 | customarily used for software interchange; or, 148 | 149 | c) Accompany it with the information you received as to the offer 150 | to distribute corresponding source code. (This alternative is 151 | allowed only for noncommercial distribution and only if you 152 | received the program in object code or executable form with such 153 | an offer, in accord with Subsection b above.) 154 | 155 | The source code for a work means the preferred form of the work for 156 | making modifications to it. For an executable work, complete source 157 | code means all the source code for all modules it contains, plus any 158 | associated interface definition files, plus the scripts used to 159 | control compilation and installation of the executable. However, as a 160 | special exception, the source code distributed need not include 161 | anything that is normally distributed (in either source or binary 162 | form) with the major components (compiler, kernel, and so on) of the 163 | operating system on which the executable runs, unless that component 164 | itself accompanies the executable. 165 | 166 | If distribution of executable or object code is made by offering 167 | access to copy from a designated place, then offering equivalent 168 | access to copy the source code from the same place counts as 169 | distribution of the source code, even though third parties are not 170 | compelled to copy the source along with the object code. 171 | 172 | 4. You may not copy, modify, sublicense, or distribute the Program 173 | except as expressly provided under this License. Any attempt 174 | otherwise to copy, modify, sublicense or distribute the Program is 175 | void, and will automatically terminate your rights under this License. 176 | However, parties who have received copies, or rights, from you under 177 | this License will not have their licenses terminated so long as such 178 | parties remain in full compliance. 179 | 180 | 5. You are not required to accept this License, since you have not 181 | signed it. However, nothing else grants you permission to modify or 182 | distribute the Program or its derivative works. These actions are 183 | prohibited by law if you do not accept this License. Therefore, by 184 | modifying or distributing the Program (or any work based on the 185 | Program), you indicate your acceptance of this License to do so, and 186 | all its terms and conditions for copying, distributing or modifying 187 | the Program or works based on it. 188 | 189 | 6. Each time you redistribute the Program (or any work based on the 190 | Program), the recipient automatically receives a license from the 191 | original licensor to copy, distribute or modify the Program subject to 192 | these terms and conditions. You may not impose any further 193 | restrictions on the recipients' exercise of the rights granted herein. 194 | You are not responsible for enforcing compliance by third parties to 195 | this License. 196 | 197 | 7. If, as a consequence of a court judgment or allegation of patent 198 | infringement or for any other reason (not limited to patent issues), 199 | conditions are imposed on you (whether by court order, agreement or 200 | otherwise) that contradict the conditions of this License, they do not 201 | excuse you from the conditions of this License. If you cannot 202 | distribute so as to satisfy simultaneously your obligations under this 203 | License and any other pertinent obligations, then as a consequence you 204 | may not distribute the Program at all. For example, if a patent 205 | license would not permit royalty-free redistribution of the Program by 206 | all those who receive copies directly or indirectly through you, then 207 | the only way you could satisfy both it and this License would be to 208 | refrain entirely from distribution of the Program. 209 | 210 | If any portion of this section is held invalid or unenforceable under 211 | any particular circumstance, the balance of the section is intended to 212 | apply and the section as a whole is intended to apply in other 213 | circumstances. 214 | 215 | It is not the purpose of this section to induce you to infringe any 216 | patents or other property right claims or to contest validity of any 217 | such claims; this section has the sole purpose of protecting the 218 | integrity of the free software distribution system, which is 219 | implemented by public license practices. Many people have made 220 | generous contributions to the wide range of software distributed 221 | through that system in reliance on consistent application of that 222 | system; it is up to the author/donor to decide if he or she is willing 223 | to distribute software through any other system and a licensee cannot 224 | impose that choice. 225 | 226 | This section is intended to make thoroughly clear what is believed to 227 | be a consequence of the rest of this License. 228 | 229 | 8. If the distribution and/or use of the Program is restricted in 230 | certain countries either by patents or by copyrighted interfaces, the 231 | original copyright holder who places the Program under this License 232 | may add an explicit geographical distribution limitation excluding 233 | those countries, so that distribution is permitted only in or among 234 | countries not thus excluded. In such case, this License incorporates 235 | the limitation as if written in the body of this License. 236 | 237 | 9. The Free Software Foundation may publish revised and/or new versions 238 | of the General Public License from time to time. Such new versions will 239 | be similar in spirit to the present version, but may differ in detail to 240 | address new problems or concerns. 241 | 242 | Each version is given a distinguishing version number. If the Program 243 | specifies a version number of this License which applies to it and "any 244 | later version", you have the option of following the terms and conditions 245 | either of that version or of any later version published by the Free 246 | Software Foundation. If the Program does not specify a version number of 247 | this License, you may choose any version ever published by the Free Software 248 | Foundation. 249 | 250 | 10. If you wish to incorporate parts of the Program into other free 251 | programs whose distribution conditions are different, write to the author 252 | to ask for permission. For software which is copyrighted by the Free 253 | Software Foundation, write to the Free Software Foundation; we sometimes 254 | make exceptions for this. Our decision will be guided by the two goals 255 | of preserving the free status of all derivatives of our free software and 256 | of promoting the sharing and reuse of software generally. 257 | 258 | NO WARRANTY 259 | 260 | 11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY 261 | FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN 262 | OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES 263 | PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED 264 | OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF 265 | MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS 266 | TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE 267 | PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, 268 | REPAIR OR CORRECTION. 269 | 270 | 12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING 271 | WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR 272 | REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, 273 | INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING 274 | OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED 275 | TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY 276 | YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER 277 | PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE 278 | POSSIBILITY OF SUCH DAMAGES. 279 | 280 | END OF TERMS AND CONDITIONS 281 | 282 | How to Apply These Terms to Your New Programs 283 | 284 | If you develop a new program, and you want it to be of the greatest 285 | possible use to the public, the best way to achieve this is to make it 286 | free software which everyone can redistribute and change under these terms. 287 | 288 | To do so, attach the following notices to the program. It is safest 289 | to attach them to the start of each source file to most effectively 290 | convey the exclusion of warranty; and each file should have at least 291 | the "copyright" line and a pointer to where the full notice is found. 292 | 293 | 294 | Copyright (C) 295 | 296 | This program is free software; you can redistribute it and/or modify 297 | it under the terms of the GNU General Public License as published by 298 | the Free Software Foundation; either version 2 of the License, or 299 | (at your option) any later version. 300 | 301 | This program is distributed in the hope that it will be useful, 302 | but WITHOUT ANY WARRANTY; without even the implied warranty of 303 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 304 | GNU General Public License for more details. 305 | 306 | You should have received a copy of the GNU General Public License along 307 | with this program; if not, write to the Free Software Foundation, Inc., 308 | 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. 309 | 310 | Also add information on how to contact you by electronic and paper mail. 311 | 312 | If the program is interactive, make it output a short notice like this 313 | when it starts in an interactive mode: 314 | 315 | Gnomovision version 69, Copyright (C) year name of author 316 | Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'. 317 | This is free software, and you are welcome to redistribute it 318 | under certain conditions; type `show c' for details. 319 | 320 | The hypothetical commands `show w' and `show c' should show the appropriate 321 | parts of the General Public License. Of course, the commands you use may 322 | be called something other than `show w' and `show c'; they could even be 323 | mouse-clicks or menu items--whatever suits your program. 324 | 325 | You should also get your employer (if you work as a programmer) or your 326 | school, if any, to sign a "copyright disclaimer" for the program, if 327 | necessary. Here is a sample; alter the names: 328 | 329 | Yoyodyne, Inc., hereby disclaims all copyright interest in the program 330 | `Gnomovision' (which makes passes at compilers) written by James Hacker. 331 | 332 | , 1 April 1989 333 | Ty Coon, President of Vice 334 | 335 | This General Public License does not permit incorporating your program into 336 | proprietary programs. If your program is a subroutine library, you may 337 | consider it more useful to permit linking proprietary applications with the 338 | library. If this is what you want to do, use the GNU Lesser General 339 | Public License instead of this License. 340 | -------------------------------------------------------------------------------- /_comparison/.gitignore: -------------------------------------------------------------------------------- 1 | /models/*/* -------------------------------------------------------------------------------- /_comparison/README.md: -------------------------------------------------------------------------------- 1 | # Benchmark comparison between `Bumblebee` models 2 | 3 | The following table compares `Bumblebee's` image captioning models 4 | that are supported. 5 | 6 | Feel free to run `Bumblebee`-supported models 7 | and add them to this table! 8 | At the time of writing, 9 | `Bumblebee` doesn't support a lot of 10 | image captioning models. 11 | So any contribution is welcome! 🙏 12 | 13 | > [!NOTE] 14 | > 15 | > While `ResNet-5` leans more into image classification 16 | > (it returns a list of prediction keywords/tokens), 17 | > we're adding it to the table for simplicity sake. 18 | 19 | 20 | 21 | | Model | ROUGE-1 | ROUGE-2 | ROUGE-L | BLEU | METEOR | Word Error Rate | Time (s) | Size (in Mb) | 22 | |:----------------------------|----------:|----------:|----------:|--------:|---------:|------------------:|-----------:|---------------:| 23 | | blip-image-captioning-base | 0.6 | 0.36364 | 0.57983 | 20.0762 | 0.45953 | 0.58333 | 4.16365 | 990.6 | 24 | | blip-image-captioning-large | 0.59167 | 0.33333 | 0.55844 | 19.0449 | 0.53777 | 0.72381 | 11.878 | 1879.9 | 25 | | resnet-50 | 0 | 0 | 0 | 0 | 0.03953 | 1 | 0.32517 | 102.6 | 26 | 27 | 28 | > [!NOTE] 29 | > 30 | > All the values in the table above are the median values of 31 | > [**50 COCO Dataset images**](https://cocodataset.org/#home) 32 | > (for statistical relevance), 33 | > with the models being run 34 | > on an Apple M1 Pro with 16GB of memory. 35 | 36 |
37 |
38 | 39 | ---- 40 | 41 |
42 |
43 | 44 | In this guide, 45 | we will walk you through on benchmarking 46 | some image captioning models 47 | that `Bumblebee` offers. 48 | 49 | If you've followed the repo's main [`README.md`](../README.md), 50 | you'll probably have an idea that some models work better than others. 51 | In this guide, we'll provide a more *heuristic* representation of this, 52 | and help you create performance metrics on some of these models. 53 | 54 | For this, we'll be using 55 | the [**COCO Dataset**](https://cocodataset.org/#home), 56 | one of the largest open-source object detection, segmentation 57 | and captioning dataset, 58 | widely used for training, evaluating and testing models. 59 | We'll be using these captioned images 60 | to compare the yielded results of the `Bumblebee` models 61 | in `Elixir` 62 | and perform some metric evaluation. 63 | 64 | 65 | > [!NOTE] 66 | > 67 | > In the `coco_dataset` folder, 68 | > we've already retrieved 50 random images and their respective captions. 69 | > However, we'll guide you through getting these your own 70 | > if you want different images. 71 | 72 | 73 | # A quick overview of the contents of this folder 74 | 75 | You may be overwhelmed with the files that are in this folder. 76 | But don't be! 77 | We'll explain what each one does. 78 | 79 | - the `run.exs` and `manage_models.exs` 80 | are `Elixir` files that we'll use **to run our machine learning models**. 81 | The models are cached in the `models` folder. 82 | - the `coco_download.ipynb` file is a `Jupyter Notebook` file 83 | that **will allow you to download images from the [COCO Dataset](https://cocodataset.org/#home)**. 84 | You don't need to run this file because 85 | we've already downloaded 50 images and their captions 86 | for you beforehand - 87 | these images are located in the `coco_dataset` folder. 88 | However, if you *do want* to run this file, 89 | you need the `annotations` folder. 90 | This folder simply stores information of the captions 91 | of the images from the dataset, 92 | so you don't need to worry about it 🙂. 93 | - the `metrics.ipynb` file is also a `Jupyter Notebook` file 94 | that will process the results and create a table 95 | comparing each model. 96 | The notebook is documented with the metrics being used, 97 | so you can refer to it if you're confused 98 | on what each metric means! 99 | 100 | 101 | ## 0. Prerequisites 102 | 103 | Before starting, we need to set up our dev environment. 104 | We are going to: 105 | 106 | - execute the models **in `Elixir`**. 107 | - perform metric evaluation **in `Python`**. 108 | 109 | We're assuming you already have the `Elixir` environment set up 110 | after implementing the application. 111 | So we'll focus on getting the `Python` env set up 😃. 112 | 113 | First, install [**`Anaconda`**](https://www.anaconda.com/download). 114 | This program will allow us to create *virtual environments* 115 | in `Python`, each one being contained with their own dependencies. 116 | We will run our `Python` scripts 117 | (we'll use [`Jupyter Notebooks`](https://jupyter.org/), 118 | so it's preferred you install it or use `Visual Studio Code` 119 | to work with these) 120 | inside these environments. 121 | 122 | After installing `Anaconda`, 123 | we'll be able to run `conda` command in our terminal! 🎉 124 | 125 | 126 | ## 0.1 Create a virtual environment 127 | 128 | Let's create our virtual env. 129 | In your terminal, run: 130 | 131 | ```sh 132 | conda create -n --file requirements.txt 133 | ``` 134 | 135 | In ``, write any name you want. 136 | In our case, we'll type `comparison`. 137 | The `--file` argument allows us to pass a `requirements.txt` file 138 | with a list of dependencies to install. 139 | We've provided this file inside [this folder](./requirements.txt). 140 | 141 | Install the needed dependencies when prompted 142 | by typing `Y`. 143 | After this, the env will be created. 144 | 145 | To *enter the env*, type: 146 | 147 | ```sh 148 | conda activate comparison 149 | ``` 150 | 151 | After this, 152 | you'll have entered in your newly created virtual env 153 | and now you can run commands within it! 154 | Every `Python` dependency you install 155 | will be made available **only in this virtual env**, 156 | not outside. 157 | 158 | 159 | ## 0.2 Running `Jupyter Notebooks` inside virtual env 160 | 161 | If you've installed `Jupyter Notebooks`, 162 | as long as you run it inside this virtual environment through the terminal, 163 | all its dependencies will be available inside the notebook. 164 | 165 | If you are using `Visual Studio Code`, 166 | when opening a file `.ipynb` (a `Jupyter Notebook` file), 167 | you will be able to choose the virtual env on the right side. 168 | You can choose the one you've created 169 | (in the following image, 170 | we've named our env `"cocodataset"`). 171 | 172 |

173 | 174 |

175 | 176 | And that's it! 177 | You're ready to execute `Python` inside `Juypter Notebooks` 178 | to perform some metric evaluation! 179 | 180 | Let's kick this off! 🏃‍♂️ 181 | 182 | 183 | ## 1. *(Optional)* Downloading the COCO dataset images 184 | 185 | > [!NOTE] 186 | > 187 | > This section is entirely optional. 188 | > We've already provided images and captions 189 | > from the **COCO dataset** inside the `coco_dataset` folder. 190 | > 191 | > This chapter is only relevant to those 192 | > that want to experiment with *other* images. 193 | 194 | The COCO dataset can be accessed through 195 | [`cocoapi`](https://github.com/cocodataset/cocoapi). 196 | However, to get the images with their respective captions 197 | you have to put in some work. 198 | 199 | This process is a bit convoluted. 200 | In fact, you'd have to download the original 201 | `cocoapi` repo, 202 | create folders with the images and annotations 203 | and then run `make` to install the packages needed 204 | (more information on https://github.com/sliao-mi-luku/Image-Captioning#dataset). 205 | 206 | With this is mind, 207 | we've **simplified this process** and 208 | provided [`coco_download.ipynb`](./coco_download.ipynb) 209 | so you can fetch a random image and caption 210 | and download 50 different random images, 211 | if you want. 212 | 213 | Each executable block is documented, 214 | so you know what it does exactly. 215 | Don't forget: to use this, you will need to 216 | **run the notebook in the environment you've created**, 217 | since it has all the dependencies needed to run the notebook. 218 | 219 | > [!TIP] 220 | > 221 | > We are using the dataset from 2014 because 222 | > it provides a good variety of images. 223 | > However, if you want to experiment with 224 | > their other datasets, 225 | > you may do so in https://cocodataset.org/#download. 226 | > 227 | > As long as you're choosing a dataset that has caption annotations, 228 | > our `Jupyter Notebook` will work. 229 | 230 | 231 | # 2. Run `run.exs` 232 | 233 | The `run.exs` file is a standalone 234 | [`Elixir` script file ](https://thinkingelixir.com/2019-04-running-an-elixir-file-as-a-script/) 235 | that you can execute to make predictions 236 | based on any `Bumblebee`-supported model you want. 237 | 238 | To run the file, 239 | simply execute the following command: 240 | 241 | ```sh 242 | elixir run.exs 243 | ``` 244 | 245 | When you run this command, 246 | a `.csv` file will be created inside `coco_dataset` 247 | with the results of the benchmark of a given model. 248 | This new file will have information of the 249 | **execution time** and the **predicted caption**, 250 | with the file name being `"{model_name}_results.csv"`. 251 | 252 | **Every time you run the script, the `.csv` results file is overriden**. 253 | 254 | To run this file with different models, 255 | you only have to change a few parameters. 256 | If you open `run.exs`, 257 | inside the `Benchmark` module, 258 | you will find a comment block encompassed with 259 | `CHANGE YOUR SETTINGS HERE -----------------------------------`. 260 | Inside this code block, 261 | you can change: 262 | 263 | - the **image_width** of the image before being fed into the model. 264 | You want this value to be the same 265 | *as the same dimensions of the dataset the model was trained on*. 266 | The images will be redimensioned to this value 267 | whilst maintaining aspect ratio. 268 | This step is important because 269 | it will **yield better results** 270 | and **improve performance whilst running the script**, 271 | since we're optimizing unnecessary data that the model 272 | would otherwise ignore. 273 | 274 | - the **model** being tested. 275 | If can change: 276 | - the `title`, which is just a label for the image. 277 | This title should not have the `/` character or any other that might 278 | make it look like a path. 279 | This is because this `title` is used when creating the results file. 280 | - the `name` of the model, 281 | which should coincide with the name of the repo in `HuggingFace`. 282 | (i.e. [`Salesforce/blip-image-captioning-large`](https://huggingface.co/Salesforce/blip-image-captioning-large)). 283 | - the `cache_path`, pertaining to the location where the model is downloaded 284 | and cached locally. 285 | You should only change the name of the folder 286 | (don't change `@models_folder_path`). 287 | - `load_featurizer`, `load_tokenizer` and `load_generation_config` 288 | allow you to load these parameters if the model needs it. 289 | We recommend checking [`Bumblebee's` documentation](https://hexdocs.pm/bumblebee/Bumblebee.Vision.html) 290 | to check if your model needs any of these. 291 | 292 | - the `extract_label` function. 293 | This function pattern-matches the output of the model. 294 | You should change it according to the output of the model 295 | so you can successfully retrieve the result. 296 | 297 | And these are all the changes you need! 298 | You can change these settings for each model you test 299 | and a new file with the results will be created for each one 300 | inside `coco_dataset`! 301 | 302 | 303 | ## 2.1 (Important!) Benchmarking different models 304 | 305 | When you make the above changes, 306 | we are assuming that 307 | you are using the 308 | [`Bumblebee.Vision.image_to_text/5`](https://hexdocs.pm/bumblebee/Bumblebee.Vision.html#image_to_text/5) 309 | function to create the serving. 310 | 311 | The default code for the script pertains to 312 | [`Salesforce/blip-image-captioning-base`](https://huggingface.co/Salesforce/blip-image-captioning-base). 313 | However, there are other models that `Bumblebee` might support 314 | from the Hugging Face repositories in https://huggingface.co/models?pipeline_tag=image-to-text&sort=downloads. 315 | 316 | Some models are not served from 317 | [`Bumblebee.Vision.image_to_text/5`](https://hexdocs.pm/bumblebee/Bumblebee.Vision.html#image_to_text/5). 318 | If you want to benchmark others 319 | (as long as they are supported from `Bumblebee`), 320 | you'll have to make additional changes. 321 | 322 | 323 | ### 2.1.1 `ResNet-50` 324 | 325 | For example, if you want to test the `resnet-50` model, 326 | you also have to change the `serving/0` function 327 | inside `manage_models.exs` 328 | so it uses [`Bumblebee.Vision.image_classification/3`](https://hexdocs.pm/bumblebee/Bumblebee.Vision.html#image_classification/3) 329 | instead (the only way to correctly build the `resnet-50` model serving with `Bumblebee`). 330 | 331 | ```elixir 332 | Bumblebee.Vision.image_classification( 333 | model.model_info, 334 | model.featurizer, 335 | top_k: 1, 336 | compile: [batch_size: 10], 337 | defn_options: [compiler: EXLA], 338 | preallocate_params: true 339 | ) 340 | ``` 341 | 342 | ### 2.1.2 `Salesforce/blip-image-captioning-large` 343 | 344 | Normally, for `BLIP` models, 345 | you would only have to change the settings that were mentioned 346 | in the previous section. 347 | 348 | However, **at the time of writing**, 349 | this would result in an error if you wanted to 350 | use [`Salesforce/blip-image-captioning-large`](https://huggingface.co/Salesforce/blip-image-captioning-large). 351 | The bug, although fixed, has not yet been released. 352 | 353 | Therefore, for this model, 354 | you'd have to update the imports in `run.exs`. 355 | 356 | ```elixir 357 | {:bumblebee, git: "https://github.com/elixir-nx/bumblebee", branch: "main", override: true}, 358 | {:nx, git: "https://github.com/elixir-nx/nx.git", sparse: "nx", override: true}, 359 | {:exla, git: "https://github.com/elixir-nx/nx.git", sparse: "exla", override: true}, 360 | ``` 361 | 362 | 363 | For more information on this, 364 | check https://github.com/elixir-nx/bumblebee/issues/269#issuecomment-1865198005. 365 | 366 | 367 | 368 | 369 | # 3. Run `metrics.ipynb` 370 | 371 | Now that you have the `{model_name}_results.csv` 372 | generated by the above `Elixir` script, 373 | you can now run `metrics.ipynb`. 374 | 375 | This `Jupyter Notebook` will wrangle the data, 376 | process it 377 | and create the table with the metrics 378 | to evaluate each model that was benchmarked. 379 | 380 | The `Jupyter Notebook` is documented with each step. 381 | 382 | 383 | 384 | 385 | 386 | 387 | 388 | 389 | 390 | 391 | 392 | 393 | -------------------------------------------------------------------------------- /_comparison/coco_dataset/10707.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dwyl/image-classifier/ebdf21e5aec76d35633e12a590e65fac17a5adb0/_comparison/coco_dataset/10707.jpg -------------------------------------------------------------------------------- /_comparison/coco_dataset/117014.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dwyl/image-classifier/ebdf21e5aec76d35633e12a590e65fac17a5adb0/_comparison/coco_dataset/117014.jpg -------------------------------------------------------------------------------- /_comparison/coco_dataset/154830.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dwyl/image-classifier/ebdf21e5aec76d35633e12a590e65fac17a5adb0/_comparison/coco_dataset/154830.jpg -------------------------------------------------------------------------------- /_comparison/coco_dataset/168797.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dwyl/image-classifier/ebdf21e5aec76d35633e12a590e65fac17a5adb0/_comparison/coco_dataset/168797.jpg -------------------------------------------------------------------------------- /_comparison/coco_dataset/169250.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dwyl/image-classifier/ebdf21e5aec76d35633e12a590e65fac17a5adb0/_comparison/coco_dataset/169250.jpg -------------------------------------------------------------------------------- /_comparison/coco_dataset/179187.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dwyl/image-classifier/ebdf21e5aec76d35633e12a590e65fac17a5adb0/_comparison/coco_dataset/179187.jpg -------------------------------------------------------------------------------- /_comparison/coco_dataset/185546.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dwyl/image-classifier/ebdf21e5aec76d35633e12a590e65fac17a5adb0/_comparison/coco_dataset/185546.jpg -------------------------------------------------------------------------------- /_comparison/coco_dataset/192554.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dwyl/image-classifier/ebdf21e5aec76d35633e12a590e65fac17a5adb0/_comparison/coco_dataset/192554.jpg -------------------------------------------------------------------------------- /_comparison/coco_dataset/193021.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dwyl/image-classifier/ebdf21e5aec76d35633e12a590e65fac17a5adb0/_comparison/coco_dataset/193021.jpg -------------------------------------------------------------------------------- /_comparison/coco_dataset/200058.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dwyl/image-classifier/ebdf21e5aec76d35633e12a590e65fac17a5adb0/_comparison/coco_dataset/200058.jpg -------------------------------------------------------------------------------- /_comparison/coco_dataset/222343.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dwyl/image-classifier/ebdf21e5aec76d35633e12a590e65fac17a5adb0/_comparison/coco_dataset/222343.jpg -------------------------------------------------------------------------------- /_comparison/coco_dataset/235642.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dwyl/image-classifier/ebdf21e5aec76d35633e12a590e65fac17a5adb0/_comparison/coco_dataset/235642.jpg -------------------------------------------------------------------------------- /_comparison/coco_dataset/249295.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dwyl/image-classifier/ebdf21e5aec76d35633e12a590e65fac17a5adb0/_comparison/coco_dataset/249295.jpg -------------------------------------------------------------------------------- /_comparison/coco_dataset/256470.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dwyl/image-classifier/ebdf21e5aec76d35633e12a590e65fac17a5adb0/_comparison/coco_dataset/256470.jpg -------------------------------------------------------------------------------- /_comparison/coco_dataset/26518.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dwyl/image-classifier/ebdf21e5aec76d35633e12a590e65fac17a5adb0/_comparison/coco_dataset/26518.jpg -------------------------------------------------------------------------------- /_comparison/coco_dataset/274947.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dwyl/image-classifier/ebdf21e5aec76d35633e12a590e65fac17a5adb0/_comparison/coco_dataset/274947.jpg -------------------------------------------------------------------------------- /_comparison/coco_dataset/306437.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dwyl/image-classifier/ebdf21e5aec76d35633e12a590e65fac17a5adb0/_comparison/coco_dataset/306437.jpg -------------------------------------------------------------------------------- /_comparison/coco_dataset/322762.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dwyl/image-classifier/ebdf21e5aec76d35633e12a590e65fac17a5adb0/_comparison/coco_dataset/322762.jpg -------------------------------------------------------------------------------- /_comparison/coco_dataset/323507.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dwyl/image-classifier/ebdf21e5aec76d35633e12a590e65fac17a5adb0/_comparison/coco_dataset/323507.jpg -------------------------------------------------------------------------------- /_comparison/coco_dataset/335099.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dwyl/image-classifier/ebdf21e5aec76d35633e12a590e65fac17a5adb0/_comparison/coco_dataset/335099.jpg -------------------------------------------------------------------------------- /_comparison/coco_dataset/33650.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dwyl/image-classifier/ebdf21e5aec76d35633e12a590e65fac17a5adb0/_comparison/coco_dataset/33650.jpg -------------------------------------------------------------------------------- /_comparison/coco_dataset/338847.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dwyl/image-classifier/ebdf21e5aec76d35633e12a590e65fac17a5adb0/_comparison/coco_dataset/338847.jpg -------------------------------------------------------------------------------- /_comparison/coco_dataset/342958.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dwyl/image-classifier/ebdf21e5aec76d35633e12a590e65fac17a5adb0/_comparison/coco_dataset/342958.jpg -------------------------------------------------------------------------------- /_comparison/coco_dataset/345125.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dwyl/image-classifier/ebdf21e5aec76d35633e12a590e65fac17a5adb0/_comparison/coco_dataset/345125.jpg -------------------------------------------------------------------------------- /_comparison/coco_dataset/34657.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dwyl/image-classifier/ebdf21e5aec76d35633e12a590e65fac17a5adb0/_comparison/coco_dataset/34657.jpg -------------------------------------------------------------------------------- /_comparison/coco_dataset/354572.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dwyl/image-classifier/ebdf21e5aec76d35633e12a590e65fac17a5adb0/_comparison/coco_dataset/354572.jpg -------------------------------------------------------------------------------- /_comparison/coco_dataset/356828.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dwyl/image-classifier/ebdf21e5aec76d35633e12a590e65fac17a5adb0/_comparison/coco_dataset/356828.jpg -------------------------------------------------------------------------------- /_comparison/coco_dataset/35807.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dwyl/image-classifier/ebdf21e5aec76d35633e12a590e65fac17a5adb0/_comparison/coco_dataset/35807.jpg -------------------------------------------------------------------------------- /_comparison/coco_dataset/368242.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dwyl/image-classifier/ebdf21e5aec76d35633e12a590e65fac17a5adb0/_comparison/coco_dataset/368242.jpg -------------------------------------------------------------------------------- /_comparison/coco_dataset/391255.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dwyl/image-classifier/ebdf21e5aec76d35633e12a590e65fac17a5adb0/_comparison/coco_dataset/391255.jpg -------------------------------------------------------------------------------- /_comparison/coco_dataset/404642.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dwyl/image-classifier/ebdf21e5aec76d35633e12a590e65fac17a5adb0/_comparison/coco_dataset/404642.jpg -------------------------------------------------------------------------------- /_comparison/coco_dataset/4069.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dwyl/image-classifier/ebdf21e5aec76d35633e12a590e65fac17a5adb0/_comparison/coco_dataset/4069.jpg -------------------------------------------------------------------------------- /_comparison/coco_dataset/406932.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dwyl/image-classifier/ebdf21e5aec76d35633e12a590e65fac17a5adb0/_comparison/coco_dataset/406932.jpg -------------------------------------------------------------------------------- /_comparison/coco_dataset/407646.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dwyl/image-classifier/ebdf21e5aec76d35633e12a590e65fac17a5adb0/_comparison/coco_dataset/407646.jpg -------------------------------------------------------------------------------- /_comparison/coco_dataset/424293.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dwyl/image-classifier/ebdf21e5aec76d35633e12a590e65fac17a5adb0/_comparison/coco_dataset/424293.jpg -------------------------------------------------------------------------------- /_comparison/coco_dataset/425361.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dwyl/image-classifier/ebdf21e5aec76d35633e12a590e65fac17a5adb0/_comparison/coco_dataset/425361.jpg -------------------------------------------------------------------------------- /_comparison/coco_dataset/432146.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dwyl/image-classifier/ebdf21e5aec76d35633e12a590e65fac17a5adb0/_comparison/coco_dataset/432146.jpg -------------------------------------------------------------------------------- /_comparison/coco_dataset/488573.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dwyl/image-classifier/ebdf21e5aec76d35633e12a590e65fac17a5adb0/_comparison/coco_dataset/488573.jpg -------------------------------------------------------------------------------- /_comparison/coco_dataset/499989.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dwyl/image-classifier/ebdf21e5aec76d35633e12a590e65fac17a5adb0/_comparison/coco_dataset/499989.jpg -------------------------------------------------------------------------------- /_comparison/coco_dataset/515904.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dwyl/image-classifier/ebdf21e5aec76d35633e12a590e65fac17a5adb0/_comparison/coco_dataset/515904.jpg -------------------------------------------------------------------------------- /_comparison/coco_dataset/519094.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dwyl/image-classifier/ebdf21e5aec76d35633e12a590e65fac17a5adb0/_comparison/coco_dataset/519094.jpg -------------------------------------------------------------------------------- /_comparison/coco_dataset/527040.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dwyl/image-classifier/ebdf21e5aec76d35633e12a590e65fac17a5adb0/_comparison/coco_dataset/527040.jpg -------------------------------------------------------------------------------- /_comparison/coco_dataset/540775.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dwyl/image-classifier/ebdf21e5aec76d35633e12a590e65fac17a5adb0/_comparison/coco_dataset/540775.jpg -------------------------------------------------------------------------------- /_comparison/coco_dataset/547617.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dwyl/image-classifier/ebdf21e5aec76d35633e12a590e65fac17a5adb0/_comparison/coco_dataset/547617.jpg -------------------------------------------------------------------------------- /_comparison/coco_dataset/552775.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dwyl/image-classifier/ebdf21e5aec76d35633e12a590e65fac17a5adb0/_comparison/coco_dataset/552775.jpg -------------------------------------------------------------------------------- /_comparison/coco_dataset/64425.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dwyl/image-classifier/ebdf21e5aec76d35633e12a590e65fac17a5adb0/_comparison/coco_dataset/64425.jpg -------------------------------------------------------------------------------- /_comparison/coco_dataset/75076.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dwyl/image-classifier/ebdf21e5aec76d35633e12a590e65fac17a5adb0/_comparison/coco_dataset/75076.jpg -------------------------------------------------------------------------------- /_comparison/coco_dataset/87956.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dwyl/image-classifier/ebdf21e5aec76d35633e12a590e65fac17a5adb0/_comparison/coco_dataset/87956.jpg -------------------------------------------------------------------------------- /_comparison/coco_dataset/90366.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dwyl/image-classifier/ebdf21e5aec76d35633e12a590e65fac17a5adb0/_comparison/coco_dataset/90366.jpg -------------------------------------------------------------------------------- /_comparison/coco_dataset/93437.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dwyl/image-classifier/ebdf21e5aec76d35633e12a590e65fac17a5adb0/_comparison/coco_dataset/93437.jpg -------------------------------------------------------------------------------- /_comparison/coco_dataset/blip-image-captioning-base_results.csv: -------------------------------------------------------------------------------- 1 | image_id,time_in_microseconds,prediction 2 | 10707,10616571,a man playing a video game with a remote control 3 | 117014,4204543,a snowboarder is in mid air in the snow 4 | 154830,3870852,a man riding a bike down a street 5 | 168797,4034522,a man doing a trick on a skateboard 6 | 169250,3960603,a living room with a couch and a television 7 | 179187,4005360,a cloudy sky 8 | 185546,4127649,a bathroom with a toilet and a sink 9 | 192554,3866600,a bathroom with a sink and a mirror 10 | 193021,3839790,a kitchen with a refrigerator and a table 11 | 200058,3890470,a person on a snowboard in the snow 12 | 222343,5487423,a poster on the wall 13 | 235642,4565424,a wire fence 14 | 249295,4945551,a clock tower with a flag on top of it 15 | 256470,4999374,a horse is standing in the grass by the water 16 | 26518,5082765,a person is standing on a skateboard 17 | 274947,4980574,a red double decker bus 18 | 306437,4434137,a man riding a skateboard on a cement surface 19 | 322762,4390963,the elephant is standing 20 | 323507,4274194,a blue vase filled with yellow flowers on a wooden table 21 | 335099,4065509,a dog looking through a gate at a dog bowl 22 | 33650,3943589,a canopy over a bed 23 | 338847,4000244,a woman in a black dress and red tie 24 | 342958,3937181,a yellow kayak on a balcony overlooking the ocean 25 | 345125,3918008,a woman in a pink jacket talking on a cell 26 | 34657,3878472,three men sitting on a couch 27 | 354572,4085513,a person sitting on a stone statue 28 | 356828,4204811,a computer keyboard with a blue button on it 29 | 35807,4170361,a large elephant standing on a wet surface 30 | 368242,4093012,a boy flying a kite in a field 31 | 391255,4039245,a man and woman cutting a cake 32 | 404642,4038503,two hot dogs on a plate 33 | 4069,4156945,two people riding bikes 34 | 406932,4441285,a dog is looking in the side mirror of a car 35 | 407646,4039358,a man playing tennis 36 | 424293,4390354,a stop sign on a street corner with a stop sign 37 | 425361,4343673,a man sitting at a desk with a piece of pizza 38 | 432146,4013038,a person skiing down a snowy slope 39 | 488573,4274265,a red archway 40 | 499989,4048246,a gife standing in the grass 41 | 515904,3916022,a bathroom with a bathtub and a sink 42 | 519094,4457369,a cat is sitting on a chair with a scarf around its neck 43 | 527040,4506515,a table with a basket of bread and a newspaper 44 | 540775,4362543,a woman in a red shirt and black shorts playing tennis 45 | 547617,4173389,a tennis player is swinging at a ball 46 | 552775,4411786,a man in a kitchen with a stove and a microwave 47 | 64425,4211575,a traffic light 48 | 75076,4239243,a bench in the woods 49 | 87956,3982480,a man walking on the beach 50 | 90366,4100591,a cow and a dog are standing in the grass 51 | 93437,4326930,a man wearing a hat 52 | -------------------------------------------------------------------------------- /_comparison/coco_dataset/blip-image-captioning-large_results.csv: -------------------------------------------------------------------------------- 1 | image_id,time_in_microseconds,prediction 2 | 10707,20462743,they are playing a video game in the living room together 3 | 117014,11841946,there is a snowboarder that is jumping in the air 4 | 154830,13265613,there is a man riding a bike down the street on a bike 5 | 168797,12415384,arafed skateboarder doing a trick on a bench in the dark 6 | 169250,12035295,"there is a living room with a couch, television and a window" 7 | 179187,11987657,there is a sign on the side of the road that says no parking 8 | 185546,12267730,there is a bathroom with a toilet and a sink in it 9 | 192554,12156820,there is a urinal and a sink in a bathroom 10 | 193021,12208819,there is a kitchen with a table and chairs in it 11 | 200058,11970426,skier standing on a snowboard in the snow at a ski resort 12 | 222343,11762017,there is a poster on the wall above a toilet in a bathroom 13 | 235642,10199745,zebras standing in a line in a fenced in area 14 | 249295,9967461,there is a clock tower with a flag on the top of it 15 | 256470,11867798,arafed view of a horse ' s head from the back in a field 16 | 26518,12245676,someone is standing on a skateboard with their feet on it 17 | 274947,11773547,there are two red double decker buses that are parked on the street 18 | 306437,11779136,there is a man riding a skateboard on a cement block 19 | 322762,11869318,there is a large elephant standing in the water by the trees 20 | 323507,11909490,there is a blue vase with yellow flowers on a wooden table 21 | 335099,12112036,there is a dog that is looking out the window at another dog 22 | 33650,12056604,there is a bed with a canopy in a room with orange walls 23 | 338847,10409181,there is a woman in a black shirt and red tie 24 | 342958,9921738,surfers board on a balcony overlooking the beach and ocean 25 | 345125,11004133,smiling woman in pink jacket and sunglasses talking on cell phone 26 | 34657,12182907,three men sitting on a couch playing a video game 27 | 354572,11425574,there is a man sitting on a stone structure holding an umbrella 28 | 356828,9964207,there is a computer keyboard and mouse on a desk 29 | 35807,11535151,elephants walking in the rain at a zoo with people walking by 30 | 368242,11524058,there is a young boy flying a kite in a field 31 | 391255,10253626,bride and groom cutting their wedding cake with a sword 32 | 404642,9947578,there are two hot dogs on a plate with coleslaw on them 33 | 4069,11017163,they are two men riding bikes on a dirt road 34 | 406932,12247208,araffe dog looking out of a car window in the side mirror 35 | 407646,11832833,there is a man that is playing tennis on the court 36 | 424293,11886693,there is a stop sign on a pole on the corner of a street 37 | 425361,11944498,there is a man sitting at a desk with a glass of wine 38 | 432146,12013471,skier in red jacket skiing down a snowy slope with trees in background 39 | 488573,10321843,arafed red gate in the middle of a city street 40 | 499989,9964894,there is a giraffe standing in the tall grass near the trees 41 | 515904,11282493,there is a bathtub and a sink in a bathroom 42 | 519094,11790305,there is a cat that is sitting on a chair with its mouth open 43 | 527040,12173480,there are two birds sitting in a basket on a table 44 | 540775,11988141,there is a woman that is playing tennis on the court 45 | 547617,12159086,tennis player in mid air after hitting the ball with his racket 46 | 552775,12709285,there is a man taking a muffin out of the oven 47 | 64425,12193171,there is a traffic light that has a no turn on red sign on it 48 | 75076,12116284,there is a bench in the middle of a forest with red leaves 49 | 87956,10392796,surfer walking on the beach with his board in hand 50 | 90366,10478422,araffe and dog sitting in front of a fake cow 51 | 93437,12014263,smiling elderly man in kitchen with a knife and fork 52 | -------------------------------------------------------------------------------- /_comparison/coco_dataset/captions.csv: -------------------------------------------------------------------------------- 1 | image_id,caption 2 | 527040,A bunch of birds sitting in a bread basket 3 | 527040,Two birds perched on a bread basked on a table 4 | 527040,Two finch birds standing next to a basket of bread 5 | 527040,Birds are standing in a basket on the table 6 | 527040,three birds sitting on a bread basket near a newspaper 7 | 335099,"Two small dogs are in front of a black iron gate, one of them looking up at something" 8 | 335099,Two dogs outside a fence sniff it 9 | 335099,two dogs on the porch peering through a wrought iron gate 10 | 335099,Two brown and white dogs on patio outside of a gate 11 | 335099,A pair of dogs looking through a glass barred patio door 12 | 368242,a person is flying a kite in a field 13 | 368242,The boy is running as he tries to fly his kite 14 | 368242,A person running with a kite in the tall grass 15 | 368242,A boy is flying a kite in a field 16 | 368242,A small boy flying a lite in a big grassy field 17 | 425361,An individual is capture in the stillness of the picture 18 | 425361,A man sitting at a desk with food holding a wine glass 19 | 425361,A man who is holding a wine glass 20 | 425361,A young man that is having some wine and something to eat 21 | 425361,A man at an office desk drinking a glass of wine 22 | 488573,A cat on a city street with people 23 | 488573,A car drives under a red Asian archway 24 | 488573,A car is passing under an Asian styled red arch 25 | 488573,The arches in an Asian city are painted bright red 26 | 488573,An intersection shows an expanse of empty road and then a car coming out from under a large arch that looks like a giant Chinese letter and stands between two buildings that stand at the forefront of am open walled walkway and retail venues 27 | 345125,A woman in pink coat and sunglasses talking on a phone 28 | 345125,A woman wearing sunglasses talking on a cell phone 29 | 345125,A young lady with a silly face is on her phone 30 | 345125,A woman wearing sunglasses talking on the telephone 31 | 345125,A woman wearing a pink jacket and talking on a phone 32 | 185546,A bathroom with a white toilet underneath a window 33 | 185546,View of a bathroom vanity looking into a separate room with a toilet and window 34 | 185546,"This bathroom contains a toilet, sink, and window" 35 | 185546,A home bathroom with the sink in a separate room 36 | 185546,Clean bathroom painted in pink and white paint 37 | 547617,A young person running to return the tennis ball to their opponent 38 | 547617,The tennis player has just hit the ball with the tennis racket 39 | 547617,A tennis player playing in a match who has just hit the ball 40 | 547617,A professional tennis player off the ground having just hit the ball 41 | 547617,A man swinging a racket at a tennis ball 42 | 249295,A Roman number clock tower near a municipal building 43 | 249295,there is a clock that is at the top of this structure 44 | 249295,A brick clock tower in front of a building 45 | 249295,A clock tower sits near a building and flagpole 46 | 249295,A tower that has a clock on the top of it 47 | 33650,A bedroom area with a draped bed and a desk 48 | 33650,A double bed with a canopy next to a window 49 | 33650,"A bed with a canopy made of netting, tied back with ribbons" 50 | 33650,A fancy bed with curtains in an orange room 51 | 33650,A bed is shown with a canopy over it 52 | 356828,A computer keyboard sitting on top of a desk 53 | 356828,A computer keyboard on a wood topped desk 54 | 356828,An out of focus picture of a computer keyboard 55 | 356828,The computer keyboard is sitting on the table 56 | 356828,a keyboard is laying on a wood desk 57 | 515904,A bathroom with a large tub next to a sink 58 | 515904,A bathroom with large faux marble tiles and a built in bathtub 59 | 515904,a black bathroom in someones house with a tiny tub 60 | 515904,A bathroom laid in gray marble looks cold and uninviting 61 | 515904,Bathtub in a very fancy stone tiled bathroom 62 | 274947,two red shuttle buses riding down a street next to a blue pole 63 | 274947,The double decker bus drives next to a normal size bus 64 | 274947,A couple of red double decker buss on a street 65 | 274947,a double decker bus rides next to another bus 66 | 274947,two buses one regular the other double decker on a city street 67 | 222343,a close up of a toilet with a poster on the wall 68 | 222343,A band poster is above a bathroom toilet 69 | 222343,A concert poster is hung above the toilet 70 | 222343,Someone posted a poster above a toilet in a public restroom 71 | 222343,Large poster on wall behind white commode in dark tiled bathroom 72 | 256470,A horse standing in the open field looking a the lake 73 | 256470,A horse looks away from the camera toward a lake 74 | 256470,A view from someone riding horseback in an open field with a pond on it 75 | 256470,A first-person view of riding a brown horse through a field 76 | 256470,"The view is from the back of a horse, riding in a dry, grassy field towards a pond on a sunny say" 77 | 406932,A dog hanging out of a side window on a car 78 | 406932,Dog looking out car window as reflected in side view mirror 79 | 406932,A dog with his head out of the window and tongue hanging as seen in a car mirror 80 | 406932,a dog look through a cars window 81 | 406932,A dog's reflection in a car door mirror 82 | 323507,a little vase of flowers that are yellow in a blue vase 83 | 323507,a close up of a vase with flowers in it on a wooden floor 84 | 323507,A blue vase filled with yellow tiny flowers 85 | 323507,Small yellow and orange flowers are arranged in a cobalt blue vase 86 | 323507,An opalescent blue vase holds a bouquet of flowers 87 | 168797,A man riding a skateboard on to of a wooden bench 88 | 168797,a man on a skate board does a trick on a bench 89 | 168797,A black and white photo of a person doing a skateboard trick 90 | 168797,a person jumping in the air on a skateboard 91 | 168797,The boy slides along the bench on his skateboard 92 | 4069,Black and white photo of two men riding bikes 93 | 4069,Two men are bicycling down a dirt road next to the woods 94 | 4069,A couple of people on some bikes riding 95 | 4069,Two people that are riding bikes on a path 96 | 4069,Two people riding on bicycles near a row of trees 97 | 235642,A herd of zebra standing next to each other on a grass field 98 | 235642,A group of zebras are standing in the grass 99 | 235642,A group of zebras standing in a field 100 | 235642,A herd of zebras is standing next to a wire fence 101 | 235642,a pack of 4 zebras standing by a fence in a field 102 | 117014,A snowboarder is in the middle of a jump through the air 103 | 117014,View from below of a person on snowboard in air against sky 104 | 117014,A person on a snowboard who is performing a jump 105 | 117014,A person performing a trick on a snowboard 106 | 117014,the person is on a snowboard flying over the mountain 107 | 75076,A wooden park bench sitting in the middle of a forest 108 | 75076,a wooden park bench under colorful tree 109 | 75076,THERE IS A BENCH THAT IS ON THE LEAVES 110 | 75076,A park bench beneath a tree with red leaves 111 | 75076,Picture of a park bench surround by trees with coloring leaves 112 | 10707,A man standing in a living room holding a Nintendo Wii game controller 113 | 10707,Three people with cups on the couch and one with remote standing 114 | 10707,A man holding a motion controlled video game controller 115 | 10707,Man with video game controller in living room with onlooker seated nearby 116 | 10707,A man playing a video game while two men sit on a couch 117 | 424293,A red stop sign mounted on the side of a light pole 118 | 424293,A stop sign on a short light pole at an empty intersection 119 | 424293,A stop sign is on a short light pole in the middle of the street 120 | 424293,Stop sign on a lamp post on a Y section of road 121 | 424293,There is a stop sign on a street pole 122 | 432146,A person riding skis down a snow covered slope 123 | 432146,A man in a red jacket is skiing through the snow 124 | 432146,A skier skiing down a slope of fresh snow 125 | 432146,A person riding on ski's down a hill 126 | 432146,A skier is headed down a slope on his skis 127 | 64425,Traffic signals and sign on pole at roadway intersection 128 | 64425,A picture of a sign on a traffic light in front of trees 129 | 64425,A traffic light with a no turn sign on it 130 | 64425,A traffic light with a No Turn On Red sign under 131 | 64425,A set of two street lights with a no turn on red sign 132 | 26518,someone that is standing on top of a skateboard 133 | 26518,a person standing on a skate board on its side 134 | 26518,A man standing on top of a skateboard 135 | 26518,A person's feet on the side of an upended skateboard 136 | 26518,A person stands on the side of their longboard 137 | 322762,An elephant standing in the dry grass at the edge of water 138 | 322762,An elephant eats in the grass in his habitat 139 | 322762,An elephant drinking water on the edge of a body of water 140 | 322762,A single elephant standing next to a body of water drinking 141 | 322762,An elephant has it's trunk in it's mouth standing in the grass 142 | 552775,A man pokes his head in front of an oven open to baking cookies 143 | 552775,The man is close to an oven full of cookies that are being prepared 144 | 552775,an image of man baking goods in the oven 145 | 552775,a man standing by an oven while making some cookies 146 | 552775,A man standing by an oven with the door open 147 | 169250,"A simple apartment living area, with a couch and a television" 148 | 169250,This is a plain and small living room with a sofa and TV 149 | 169250,A couch sitting in front of a TV near a window 150 | 169250,"A small, curtained living room during the day" 151 | 169250,This simple living room has black furniture and a TV 152 | 354572,People sitting under an umbrella looking at the scenery 153 | 354572,A person with a colorful umbrella sits on a fountain 154 | 354572,A man with a colored umbrella sits on a monument 155 | 354572,A man sis on a public area while holding an umbrella 156 | 354572,This is a photo of two people sitting on the ledge of a piece of city artwork 157 | 179187,A sign posted on the side of a highway 158 | 179187,An empty freeway has very dark clouds overhead 159 | 179187,two signs on a pole a car a street and some buildings 160 | 179187,Black and white of highway with a gas pump sign and arrow on a pole and shopping buildings behind 161 | 179187,A long road surrounded by tall buildings and street signs 162 | 192554,"A sink, soap, toilet and a mirror" 163 | 192554,A urinal and sink are positioned side by side in a small public bathroom 164 | 192554,A white sink sitting next to a white urinal 165 | 192554,A bathroom sink with soap on it and a urinal beside it 166 | 192554,A small public restroom with a white sink and urinal 167 | 404642,there are two hot dogs on a paper plate with toppings 168 | 404642,A man is sitting down with two sandwiches in his lap 169 | 404642,A plate with food on it on a person's lap 170 | 404642,Two buns holds meat and coleslaw as the sit on white papers 171 | 404642,Two hot dogs topped with cole slaw sits on a paper plate 172 | 93437,A shot of an elderly man inside a kitchen 173 | 93437,An old man is wearing an odd hat 174 | 93437,An older man is wearing a funny hat in his dining room 175 | 93437,A man in a jacket and hat looks at the camera 176 | 93437,An old man standing in a kitchen posing for a picture 177 | 407646,A man playing tennis who is about to return a serve 178 | 407646,A tennis player sets his racket to hit the ball 179 | 407646,Tennis player swinging at tennis ball with racket 180 | 407646,The look on the tennis player's face telegraphs a possible error 181 | 407646,A man on a tennis court who has just hit the tennis ball 182 | 87956,A man holding a surf board in his hands walking towards the beach 183 | 87956,A person with a surfboard on a beach 184 | 87956,A man with his surf board is approaching the water 185 | 87956,A man with a surfboard walking on a beach toward the water 186 | 87956,A lone man on a big empty beach with a surf board 187 | 342958,a surf board next to a table with chairs 188 | 342958,A yellow surf board sits on a balcony overlooking the ocean 189 | 342958,A surfboard that is leaning against a rail 190 | 342958,A yellow surfboard sitting on top of a wooden deck 191 | 342958,A surf board and a boogie board are behind a table on a deck over looking the beach 192 | 90366,A dog sitting on the grass looking at a fake cow that is lit up at night 193 | 90366,A dog looking at a statue of a cow 194 | 90366,A dog sitting in the grass watching a cow 195 | 90366,A black and white cow is looking through a fence 196 | 90366,A dog faces a light up cow ornament 197 | 306437,A man riding a skateboard up a flight of steps 198 | 306437,A boy doing a stunt on a skateboard 199 | 306437,A guy on a skateboard grinding on a short concrete platform 200 | 306437,A skateboarder tips his board up against the base of a sculpture 201 | 306437,The man is practicing his moves on his skateboard 202 | 200058,A person on a snowboard in the snow 203 | 200058,A snowboarder stands with one foot in the bindings on a snowy slope 204 | 200058,A woman standing next to a snow board on a snow covered slope 205 | 200058,a person riding a snow boad on a snowy surface 206 | 200058,A person standing on the snowboard on top of the snow 207 | 540775,A person holding a tennis racquet by a tennis court 208 | 540775,a tennis player with her hand near her head holding a tennis racket 209 | 540775,A tennis player makes a funny gesture after a play 210 | 540775,Male tennis player with flattened hand raised above head 211 | 540775,A tennis player questioning the call of an umpire 212 | 154830,Car waits as person on bike crosses the road 213 | 154830,A four lane street in the suburbs 214 | 154830,A bicyclist crossing the street as a car waits to turn 215 | 154830,A man with a backpack riding a bicycle by a traffic light 216 | 154830,A full view of a suburban city with people 217 | 34657,A group of guys on a couch watching tv 218 | 34657,Three men sitting on a couch next to each other 219 | 34657,Three guys sitting on the couch watching some television 220 | 34657,an image of three men sitting on the couch watching tv 221 | 34657,Three guys on a couch playing a video game 222 | 338847,A young girl wearing a red tie is adjusting her glasses 223 | 338847,A person with glasses and a tie in a room 224 | 338847,a woman wearing glasses a shirt and tie 225 | 338847,The man wearing a red tie is standing near an animal's plastic house 226 | 338847,The man in the red tie is putting on his glasses 227 | 519094,A grey and white cat next to a blanket and shelf with blue bottle 228 | 519094,Cat laying on pillow next to glass bottle 229 | 519094,The cat could be yawning or could be screaming 230 | 519094,The cat is angry while sitting on top of a pillow 231 | 519094,a cat is yawning while it sits inside the house 232 | 193021,A hall way with family photos on the wall leading to a kitchen and dining area with wood floors 233 | 193021,A kitchen with wood floors and wood cabinets 234 | 193021,"A fully furnished apartment with photos, a table, and a bookshelf amongst other things" 235 | 193021,There is a table and chairs in the middle of a kitchen 236 | 193021,The kitchen and dining room of a house 237 | 499989,Giraffe in its natural habitat snacking on a tree 238 | 499989,a giraffe grazing in the wild eating from a tree 239 | 499989,A large giraffe standing in a dry brush field 240 | 499989,an image of a giraffe among trees looking back 241 | 499989,A giraffe standing near a tree in a grassy area 242 | 35807,An elephant walking behind a man in a park 243 | 35807,an elephant walking on a paved path with trees in the background 244 | 35807,A person with an elephant following walking down a path 245 | 35807,an elephant walking behind a person on an asphalt pavement next to grass 246 | 35807,A person in a raincoat walks an elephant through a park 247 | 391255,A young woman with a sword standing next to a man over an orange cake 248 | 391255,A woman and man stand next to each other with a sword 249 | 391255,a man and a woman are holding a sword near a cake 250 | 391255,The two people are holding a sward to cut a cake 251 | 391255,This is a picture of husband and wife cutting a cake 252 | -------------------------------------------------------------------------------- /_comparison/coco_dataset/resnet-50_results.csv: -------------------------------------------------------------------------------- 1 | image_id,time_in_microseconds,prediction 2 | 10707,1949795,hair spray 3 | 117014,596715,ski 4 | 154830,282925,"traffic light, traffic signal, stoplight" 5 | 168797,588788,"bannister, banister, balustrade, balusters, handrail" 6 | 169250,288680,window shade 7 | 179187,586702,street sign 8 | 185546,547854,"medicine chest, medicine cabinet" 9 | 192554,298281,"medicine chest, medicine cabinet" 10 | 193021,310997,library 11 | 200058,291388,ski 12 | 222343,299065,"washbasin, handbasin, washbowl, lavabo, wash-hand basin" 13 | 235642,276528,zebra 14 | 249295,414157,analog clock 15 | 256470,276553,French bulldog 16 | 26518,588966,mousetrap 17 | 274947,284341,"trolleybus, trolley coach, trackless trolley" 18 | 306437,322784,"pay-phone, pay-station" 19 | 322762,281707,"African elephant, Loxodonta africana" 20 | 323507,610119,"pot, flowerpot" 21 | 335099,533927,English setter 22 | 33650,573963,mosquito net 23 | 338847,327563,"academic gown, academic robe, judge's robe" 24 | 342958,274285,"seashore, coast, seacoast, sea-coast" 25 | 345125,280905,"sunglasses, dark glasses, shades" 26 | 34657,270652,"pajama, pyjama, pj's, jammies" 27 | 354572,584770,"stupa, tope" 28 | 356828,579434,"notebook, notebook computer" 29 | 35807,578473,"Indian elephant, Elephas maximus" 30 | 368242,275475,umbrella 31 | 391255,275337,"groom, bridegroom" 32 | 404642,604679,"hotdog, hot dog, red hot" 33 | 4069,585020,"mountain bike, all-terrain bike, off-roader" 34 | 406932,584881,car mirror 35 | 407646,459452,"racket, racquet" 36 | 424293,297047,"traffic light, traffic signal, stoplight" 37 | 425361,593017,wine bottle 38 | 432146,283281,ski 39 | 488573,529790,"gas pump, gasoline pump, petrol pump, island dispenser" 40 | 499989,304019,"cheetah, chetah, Acinonyx jubatus" 41 | 515904,290146,"tub, vat" 42 | 519094,274495,Norwich terrier 43 | 527040,363630,bulbul 44 | 540775,637872,"racket, racquet" 45 | 547617,332809,"racket, racquet" 46 | 552775,308652,waffle iron 47 | 64425,299392,"traffic light, traffic signal, stoplight" 48 | 75076,608454,park bench 49 | 87956,276873,"seashore, coast, seacoast, sea-coast" 50 | 90366,277039,ox 51 | 93437,601620,"bathing cap, swimming cap" 52 | -------------------------------------------------------------------------------- /_comparison/manage_models.exs: -------------------------------------------------------------------------------- 1 | defmodule Comparison.Models do 2 | @moduledoc """ 3 | Manages loading the modules when benchmarking models. 4 | It is inspired by the `App.Models` module in the Phoenix app. 5 | """ 6 | require Logger 7 | 8 | @doc """ 9 | Verifies and downloads the model according. 10 | You can optionally force it to re-download the model by passing `force_download?` 11 | """ 12 | def verify_and_download_model(model, force_download? \\ false) do 13 | case force_download? do 14 | true -> 15 | # Delete any cached pre-existing model 16 | File.rm_rf!(model.cache_path) 17 | # Download model 18 | download_model(model) 19 | 20 | false -> 21 | # Check if the model cache directory exists or if it's not empty. 22 | # If so, we download the model. 23 | model_location = Path.join(model.cache_path, "huggingface") 24 | 25 | if not File.exists?(model_location) or File.ls!(model_location) == [] do 26 | download_model(model) 27 | end 28 | end 29 | end 30 | 31 | @doc """ 32 | Serving function that serves the `Bumblebee` models used throughout the app. 33 | This function is meant to be called and served by `Nx`, 34 | like `Nx.Serving.run(serving, "The capital of [MASK] is Paris.")` 35 | 36 | This assumes the models that are being used exist locally. 37 | """ 38 | def serving(model) do 39 | model = load_offline_model_params(model) 40 | 41 | Bumblebee.Vision.image_to_text( 42 | model.model_info, 43 | model.featurizer, 44 | model.tokenizer, 45 | model.generation_config, 46 | compile: [batch_size: 10], 47 | defn_options: [compiler: EXLA], 48 | preallocate_params: true 49 | ) 50 | end 51 | 52 | # Loads the model from the cache folder. 53 | # It will load the model and the respective the featurizer, tokenizer and generation config if needed, 54 | # and return a map with all of these at the end. 55 | defp load_offline_model_params(model) do 56 | Logger.info("ℹ️ Loading #{model.name}...") 57 | 58 | # Loading model 59 | loading_settings = {:hf, model.name, cache_dir: model.cache_path, offline: true} 60 | {:ok, model_info} = Bumblebee.load_model(loading_settings) 61 | 62 | info = %{model_info: model_info} 63 | 64 | # Load featurizer, tokenizer and generation config if needed 65 | info = 66 | if(model.load_featurizer) do 67 | {:ok, featurizer} = Bumblebee.load_featurizer(loading_settings) 68 | Map.put(info, :featurizer, featurizer) 69 | else 70 | info 71 | end 72 | 73 | info = 74 | if(model.load_tokenizer) do 75 | {:ok, tokenizer} = Bumblebee.load_tokenizer(loading_settings) 76 | Map.put(info, :tokenizer, tokenizer) 77 | else 78 | info 79 | end 80 | 81 | info = 82 | if(model.load_generation_config) do 83 | {:ok, generation_config} = 84 | Bumblebee.load_generation_config(loading_settings) 85 | 86 | Map.put(info, :generation_config, generation_config) 87 | else 88 | info 89 | end 90 | 91 | # Return a map with the model and respective parameters. 92 | info 93 | end 94 | 95 | # Downloads the models according to a given %ModelInfo struct. 96 | # It will load the model and the respective the featurizer, tokenizer and generation config if needed. 97 | defp download_model(model) do 98 | Logger.info("ℹ️ Downloading #{model.name}...") 99 | 100 | # Download model 101 | downloading_settings = {:hf, model.name, cache_dir: model.cache_path} 102 | Bumblebee.load_model(downloading_settings) 103 | 104 | # Download featurizer, tokenizer and generation config if needed 105 | if(model.load_featurizer) do 106 | Bumblebee.load_featurizer(downloading_settings) 107 | end 108 | 109 | if(model.load_tokenizer) do 110 | Bumblebee.load_tokenizer(downloading_settings) 111 | end 112 | 113 | if(model.load_generation_config) do 114 | Bumblebee.load_generation_config(downloading_settings) 115 | end 116 | end 117 | end 118 | -------------------------------------------------------------------------------- /_comparison/models/README.md: -------------------------------------------------------------------------------- 1 | This folder will hold the models that are being benchmarked. -------------------------------------------------------------------------------- /_comparison/requirements.txt: -------------------------------------------------------------------------------- 1 | # This file may be used to create an environment using: 2 | # $ conda create --name --file 3 | # platform: osx-arm64 4 | absl-py=2.0.0=pyhd8ed1ab_0 5 | appnope=0.1.3=pyhd8ed1ab_0 6 | asttokens=2.4.1=pyhd8ed1ab_0 7 | blas=1.0=openblas 8 | bottleneck=1.3.5=py312ha86b861_0 9 | brotli=1.0.9=h1a28f6b_7 10 | brotli-bin=1.0.9=h1a28f6b_7 11 | brotli-python=1.0.9=py312h313beb8_7 12 | bzip2=1.0.8=h620ffc9_4 13 | ca-certificates=2023.11.17=hf0a4a13_0 14 | certifi=2023.11.17=pyhd8ed1ab_0 15 | cffi=1.16.0=py312h80987f9_0 16 | charset-normalizer=2.0.4=pyhd3eb1b0_0 17 | click=8.1.7=unix_pyh707e725_0 18 | colorama=0.4.6=pyhd8ed1ab_0 19 | comm=0.1.4=pyhd8ed1ab_0 20 | contourpy=1.2.0=py312h48ca7d4_0 21 | cryptography=41.0.7=py312hd4332d6_0 22 | cycler=0.11.0=pyhd3eb1b0_0 23 | cython=3.0.0=py312h80987f9_0 24 | debugpy=1.6.7=py312h313beb8_0 25 | decorator=5.1.1=pyhd8ed1ab_0 26 | exceptiongroup=1.2.0=pyhd8ed1ab_0 27 | executing=2.0.1=pyhd8ed1ab_0 28 | expat=2.5.0=h313beb8_0 29 | fonttools=4.25.0=pyhd3eb1b0_0 30 | freetype=2.12.1=h1192e45_0 31 | giflib=5.2.1=h80987f9_3 32 | icu=73.2=hc8870d7_0 33 | idna=3.4=py312hca03da5_0 34 | imageio=2.33.1=pypi_0 35 | importlib-metadata=7.0.0=pyha770c72_0 36 | importlib_metadata=7.0.0=hd8ed1ab_0 37 | ipykernel=6.26.0=pyh3cd1d5f_0 38 | ipython=8.18.1=pyh707e725_3 39 | jedi=0.19.1=pyhd8ed1ab_0 40 | jiwer=3.0.3=pypi_0 41 | joblib=1.3.2=pyhd8ed1ab_0 42 | jpeg=9e=h80987f9_1 43 | jupyter_client=8.6.0=pyhd8ed1ab_0 44 | jupyter_core=5.5.0=py312hca03da5_0 45 | kiwisolver=1.4.4=py312h313beb8_0 46 | lazy-loader=0.3=pypi_0 47 | lcms2=2.12=hba8e193_0 48 | lerc=3.0=hc377ac9_0 49 | libbrotlicommon=1.0.9=h1a28f6b_7 50 | libbrotlidec=1.0.9=h1a28f6b_7 51 | libbrotlienc=1.0.9=h1a28f6b_7 52 | libcxx=14.0.6=h848a8c0_0 53 | libdeflate=1.17=h80987f9_1 54 | libffi=3.4.4=hca03da5_0 55 | libgfortran=5.0.0=11_3_0_hca03da5_28 56 | libgfortran5=11.3.0=h009349e_28 57 | libiconv=1.17=h0d3ecfb_2 58 | libopenblas=0.3.21=h269037a_0 59 | libpng=1.6.39=h80987f9_0 60 | libsodium=1.0.18=h27ca646_1 61 | libtiff=4.5.1=h313beb8_0 62 | libwebp=1.3.2=ha3663a8_0 63 | libwebp-base=1.3.2=h80987f9_0 64 | libxml2=2.10.4=h0dcf63f_1 65 | libxslt=1.1.37=h80987f9_1 66 | llvm-openmp=14.0.6=hc6e5704_0 67 | lxml=4.9.3=py312h50ffb84_0 68 | lz4-c=1.9.4=h313beb8_0 69 | matplotlib=3.8.0=py312hca03da5_0 70 | matplotlib-base=3.8.0=py312hd77ebd4_0 71 | matplotlib-inline=0.1.6=pyhd8ed1ab_0 72 | munkres=1.1.4=py_0 73 | ncurses=6.4=h313beb8_0 74 | nest-asyncio=1.5.8=pyhd8ed1ab_0 75 | networkx=3.2.1=pypi_0 76 | nltk=3.8.1=py312hca03da5_0 77 | numexpr=2.8.7=py312h0f3ea24_0 78 | numpy=1.26.2=py312h7f4fdc5_0 79 | numpy-base=1.26.2=py312he047099_0 80 | openjpeg=2.3.0=h7a6adac_2 81 | openssl=3.2.0=h0d3ecfb_1 82 | packaging=23.2=pyhd8ed1ab_0 83 | pandas=2.1.1=py312hd77ebd4_0 84 | parso=0.8.3=pyhd8ed1ab_0 85 | pexpect=4.8.0=pyh1a96a4e_2 86 | pickleshare=0.7.5=py_1003 87 | pillow=10.0.1=py312h3b245a6_0 88 | pip=23.3.1=py312hca03da5_0 89 | platformdirs=4.1.0=pyhd8ed1ab_0 90 | portalocker=1.4.0=py_0 91 | prompt-toolkit=3.0.41=pyha770c72_0 92 | psutil=5.9.0=py312h80987f9_0 93 | ptyprocess=0.7.0=pyhd3deb0d_0 94 | pure_eval=0.2.2=pyhd8ed1ab_0 95 | pycocotools=2.0.7=pypi_0 96 | pycparser=2.21=pyhd3eb1b0_0 97 | pygments=2.17.2=pyhd8ed1ab_0 98 | pyopenssl=23.2.0=py312hca03da5_0 99 | pyparsing=3.0.9=py312hca03da5_0 100 | pysocks=1.7.1=py312hca03da5_0 101 | python=3.12.0=h99e199e_0 102 | python-dateutil=2.8.2=pyhd8ed1ab_0 103 | python-tzdata=2023.3=pyhd3eb1b0_0 104 | pytz=2023.3.post1=py312hca03da5_0 105 | pyzmq=25.1.0=py312h313beb8_0 106 | rapidfuzz=3.5.2=pypi_0 107 | readline=8.2=h1a28f6b_0 108 | regex=2023.10.3=py312h80987f9_0 109 | requests=2.31.0=py312hca03da5_0 110 | rouge-score=0.1.2=pyhd8ed1ab_0 111 | sacrebleu=2.4.0=pyhd8ed1ab_0 112 | scikit-image=0.22.0=pypi_0 113 | scipy=1.11.4=pypi_0 114 | setuptools=68.0.0=py312hca03da5_0 115 | six=1.16.0=pyh6c4a22f_0 116 | sqlite=3.41.2=h80987f9_0 117 | stack_data=0.6.2=pyhd8ed1ab_0 118 | tabulate=0.9.0=pyhd8ed1ab_1 119 | tifffile=2023.12.9=pypi_0 120 | tk=8.6.12=hb8d0fd4_0 121 | tornado=6.3.3=py312h80987f9_0 122 | tqdm=4.66.1=pyhd8ed1ab_0 123 | traitlets=5.14.0=pyhd8ed1ab_0 124 | typing=3.10.0.0=pyhd8ed1ab_0 125 | typing_extensions=4.9.0=pyha770c72_0 126 | tzdata=2023c=h04d1e81_0 127 | urllib3=1.26.18=py312hca03da5_0 128 | wcwidth=0.2.12=pyhd8ed1ab_0 129 | wheel=0.41.2=py312hca03da5_0 130 | xz=5.4.5=h80987f9_0 131 | zeromq=4.3.4=hc377ac9_0 132 | zipp=3.17.0=pyhd8ed1ab_0 133 | zlib=1.2.13=h5a0b063_0 134 | zstd=1.5.5=hd90d995_0 135 | -------------------------------------------------------------------------------- /_comparison/run.exs: -------------------------------------------------------------------------------- 1 | # Install the needed dependencies 2 | Mix.install( 3 | [ 4 | # Models 5 | {:bumblebee, "~> 0.4.2"}, 6 | {:exla, "~> 0.6.4"}, 7 | {:nx, "~> 0.6.4 "}, 8 | # Image 9 | {:vix, "~> 0.25.0"}, 10 | # CSV parsing 11 | {:csv, "~> 3.2"} 12 | ], 13 | config: [nx: [default_backend: EXLA.Backend]] 14 | ) 15 | 16 | # Define the model information struct used for each model being benchmarked. 17 | defmodule ModelInfo do 18 | @doc """ 19 | Information regarding the model being loaded. 20 | It holds the name of the model repository and the directory it will be saved into. 21 | It also has booleans to load each model parameter at will - this is because some models (like BLIP) require featurizer, tokenizations and generation configuration. 22 | """ 23 | defstruct [ 24 | :title, 25 | :name, 26 | :cache_path, 27 | :load_featurizer, 28 | :load_tokenizer, 29 | :load_generation_config 30 | ] 31 | end 32 | 33 | # Benchmark module that when executed, will create a file with the results of the benchmark. 34 | defmodule Benchmark do 35 | alias Vix.Vips.Image, as: Vimage 36 | require Logger 37 | Code.require_file("manage_models.exs") 38 | 39 | # Path to the models folder 40 | @models_folder_path Path.join(File.cwd!(), "models") 41 | 42 | # CHANGE YOUR SETTINGS HERE ----------------------------------- 43 | 44 | # The width of the images the model was trained on. 45 | @image_width 640 46 | 47 | # Model information 48 | @model %ModelInfo{ 49 | # should not have "\" in the string, as this is used in the filename of the file when writing the results 50 | title: "blip-image-captioning-base", 51 | name: "Salesforce/blip-image-captioning-base", 52 | cache_path: Path.join(@models_folder_path, "blip-image-captioning-base"), 53 | load_featurizer: true, 54 | load_tokenizer: true, 55 | load_generation_config: true 56 | } 57 | # Function to extract the prediction from the model 58 | def extract_label(result) do 59 | %{results: [%{text: label}]} = result 60 | label 61 | end 62 | 63 | # CHANGE YOUR SETTINGS HERE ----------------------------------- 64 | 65 | # Run this to create a file to benchmark the models 66 | @doc """ 67 | Main function that runs the benchmark. 68 | It verifies if the models are cached. If not, they are downloaded. 69 | It retrieves the images and the captions from the "coco_dataset" folder 70 | and runs them through the model. 71 | It creates a file with the results of the benchmark with the format "{model_name}_results.csv" 72 | """ 73 | def main() do 74 | # We first verify if the model exists and we download accordingly --------- 75 | Comparison.Models.verify_and_download_model(@model) 76 | serving = Comparison.Models.serving(@model) 77 | 78 | # Retrieve 50 images from COCO dataset 79 | # and create a list of pre-processed VIPS images with the referring captions --------- 80 | coco_dataset_images_path = File.cwd!() |> Path.join("coco_dataset") |> Path.join("*.jpg") 81 | files = Path.wildcard(coco_dataset_images_path) 82 | 83 | # coco_dataset_captions = 84 | # File.stream!(File.cwd!() |> Path.join("coco_dataset") |> Path.join("captions.csv")) 85 | # |> CSV.decode!() 86 | # |> Enum.map(& &1) 87 | 88 | vips_images_with_captions = 89 | Enum.map(files, fn path -> 90 | # Processing image 91 | {:ok, thumbnail_vimage} = 92 | Vix.Vips.Operation.thumbnail(path, @image_width, size: :VIPS_SIZE_DOWN) 93 | 94 | {:ok, tensor} = pre_process_image(thumbnail_vimage) 95 | 96 | # Getting ID of image from path 97 | image_id = Path.basename(path, ".jpg") 98 | 99 | # Getting captions of the image from the COCO Dataset 100 | # captions_of_image = 101 | # Enum.filter(coco_dataset_captions, fn [id, _caption] = _x -> 102 | # image_id == id 103 | # end) 104 | # |> Enum.map(fn [_id, caption] -> caption end) 105 | 106 | %{id: image_id, tensor: tensor} 107 | end) 108 | 109 | # Run the prediction on all the images --------- 110 | 111 | # Open the results file and adding header 112 | results_file_path = 113 | File.cwd!() |> Path.join("coco_dataset") |> Path.join("#{@model.title}_results.csv") 114 | 115 | File.write!( 116 | results_file_path, 117 | "image_id,time_in_microseconds,prediction\r\n", 118 | [:write, :utf8] 119 | ) 120 | 121 | # Go over each image and make prediction 122 | Enum.each(vips_images_with_captions, fn image -> 123 | Logger.info("📊 Benchmarking image #{image.id}...") 124 | 125 | # Run the prediction 126 | {time_in_microseconds, prediction} = 127 | :timer.tc(fn -> 128 | extract_label(Nx.Serving.run(serving, image.tensor)) 129 | end) 130 | 131 | # Write the results to the file with "image_id, time_in_microseconds, prediction" 132 | row_to_append = 133 | [[image.id, time_in_microseconds, prediction]] 134 | |> CSV.encode(headers: false) 135 | |> Enum.take(3) 136 | |> Enum.join() 137 | 138 | File.write!(results_file_path, row_to_append, [:append, :write, :utf8]) 139 | end) 140 | end 141 | 142 | # Pre-processes a given Vix image so it's suitable for the model to consume. 143 | defp pre_process_image(%Vimage{} = image) do 144 | # If the image has an alpha channel, flatten it: 145 | {:ok, flattened_image} = 146 | case Vix.Vips.Image.has_alpha?(image) do 147 | true -> Vix.Vips.Operation.flatten(image) 148 | false -> {:ok, image} 149 | end 150 | 151 | # Convert the image to sRGB colourspace ---------------- 152 | {:ok, srgb_image} = Vix.Vips.Operation.colourspace(flattened_image, :VIPS_INTERPRETATION_sRGB) 153 | 154 | # Converting image to tensor ---------------- 155 | {:ok, tensor} = Vix.Vips.Image.write_to_tensor(srgb_image) 156 | 157 | # We reshape the tensor given a specific format. 158 | # In this case, we are using {height, width, channels/bands}. 159 | %Vix.Tensor{data: binary, type: type, shape: {x, y, bands}} = tensor 160 | format = [:height, :width, :bands] 161 | shape = {x, y, bands} 162 | 163 | final_tensor = 164 | binary 165 | |> Nx.from_binary(type) 166 | |> Nx.reshape(shape, names: format) 167 | 168 | {:ok, final_tensor} 169 | end 170 | end 171 | 172 | # Runs the benchmark module 173 | # To change the model you want to use, 174 | # check the `Benchmark` module above and change the variables inside the `CHANGE YOUR SETTINGS HERE` comment blocks. 175 | Benchmark.main() 176 | -------------------------------------------------------------------------------- /assets/css/app.css: -------------------------------------------------------------------------------- 1 | @import "tailwindcss/base"; 2 | @import "tailwindcss/components"; 3 | @import "tailwindcss/utilities"; 4 | 5 | @import "../node_modules/toastify-js/src/toastify.css"; 6 | 7 | /* This file is for your main application CSS */ 8 | /* override browser default */ 9 | html, 10 | body { 11 | margin: 0; 12 | padding: 0; 13 | } 14 | 15 | /* use viewport-relative units to cover page fully */ 16 | body { 17 | height: 100vh; 18 | width: 100vw; 19 | } 20 | 21 | /* Override the container displays to both be shown on larger screens */ 22 | @media (min-width: 1024px) { 23 | #upload_container, #search_container { 24 | display: block !important; /* Override any inline styles */ 25 | } 26 | } -------------------------------------------------------------------------------- /assets/js/app.js: -------------------------------------------------------------------------------- 1 | // If you want to use Phoenix channels, run `mix help phx.gen.channel` 2 | // to get started and then uncomment the line below. 3 | // import "./user_socket.js" 4 | 5 | // You can include dependencies in two ways. 6 | // 7 | // The simplest option is to put them in assets/vendor and 8 | // import them using relative paths: 9 | // 10 | // import "../vendor/some-package.js" 11 | // 12 | // Alternatively, you can `npm install some-package --prefix assets` and import 13 | // them using a path starting with the package name: 14 | // 15 | // import "some-package" 16 | // 17 | 18 | // Include phoenix_html to handle method=PUT/DELETE in forms and buttons. 19 | import "phoenix_html"; 20 | // Establish Phoenix Socket and LiveView configuration. 21 | import { Socket } from "phoenix"; 22 | import { LiveSocket } from "phoenix_live_view"; 23 | import Toastify from "toastify-js"; 24 | import Audio from "./micro.js"; 25 | import topbar from "../vendor/topbar"; 26 | 27 | let Hooks = { Audio }; 28 | 29 | // Hook to track inactivity 30 | Hooks.ActivityTracker = { 31 | mounted() { 32 | // Set the inactivity duration in milliseconds 33 | const inactivityDuration = 8000; // 8 seconds 34 | 35 | // Set a variable to keep track of the timer and if the process to predict example image has already been sent 36 | let inactivityTimer; 37 | let processHasBeenSent = false; 38 | 39 | // We use the `mounted()` context to push the event. This is used in the `setTimeout` function below. 40 | let ctx = this; 41 | 42 | // Function to reset the timer 43 | function resetInactivityTimer() { 44 | // Clear the previous timer 45 | clearTimeout(inactivityTimer); 46 | 47 | // Start a new timer 48 | inactivityTimer = setTimeout(() => { 49 | // Perform the desired action after the inactivity duration 50 | // For example, send a message to the Elixir process using Phoenix Socket 51 | if (!processHasBeenSent) { 52 | processHasBeenSent = true; 53 | ctx.pushEvent("show_examples", {}); 54 | } 55 | }, inactivityDuration); 56 | } 57 | 58 | // Call the function to start the timer initially 59 | resetInactivityTimer(); 60 | 61 | // Reset the timer whenever there is user activity 62 | document.addEventListener("mousemove", resetInactivityTimer); 63 | document.addEventListener("keydown", resetInactivityTimer); 64 | }, 65 | }; 66 | 67 | // Hook to show message toast 68 | Hooks.MessageToaster = { 69 | mounted() { 70 | this.handleEvent("toast", (payload) => { 71 | Toastify({ 72 | text: payload.message, 73 | gravity: "bottom", 74 | position: "right", 75 | style: { 76 | background: "linear-gradient(to right, #f27474, #ed87b5)", 77 | }, 78 | duration: 4000, 79 | }).showToast(); 80 | }); 81 | }, 82 | }; 83 | 84 | let csrfToken = document 85 | .querySelector("meta[name='csrf-token']") 86 | .getAttribute("content"); 87 | let liveSocket = new LiveSocket("/live", Socket, { 88 | hooks: Hooks, 89 | params: { _csrf_token: csrfToken }, 90 | }); 91 | 92 | // Toggles to show upload or semantic search containers 93 | // JavaScript to toggle visibility and styles 94 | document.getElementById('upload_option').addEventListener('click', function() { 95 | document.getElementById('upload_container').style.display = 'block'; 96 | document.getElementById('search_container').style.display = 'none'; 97 | 98 | document.getElementById('upload_option').classList.replace('bg-white', 'bg-blue-500'); 99 | document.getElementById('upload_option').classList.replace('text-gray-900', 'text-white'); 100 | document.getElementById('upload_option').classList.replace('hover:bg-gray-50', 'hover:bg-blue-600'); 101 | document.getElementById('upload_option').getElementsByTagName('svg')[0].classList.replace('text-gray-400', 'text-white'); 102 | 103 | document.getElementById('search_option').classList.replace('bg-blue-500', 'bg-white'); 104 | document.getElementById('search_option').classList.replace('text-white', 'text-gray-900'); 105 | document.getElementById('search_option').classList.replace('hover:bg-blue-600', 'hover:bg-gray-50'); 106 | document.getElementById('search_option').getElementsByTagName('svg')[0].classList.replace('text-white', 'text-gray-400'); 107 | }); 108 | 109 | document.getElementById('search_option').addEventListener('click', function() { 110 | document.getElementById('upload_container').style.display = 'none'; 111 | document.getElementById('search_container').style.display = 'block'; 112 | 113 | document.getElementById('search_option').classList.replace('bg-white', 'bg-blue-500'); 114 | document.getElementById('search_option').classList.replace('text-gray-900', 'text-white'); 115 | document.getElementById('search_option').classList.replace('hover:bg-gray-50', 'hover:bg-blue-600'); 116 | document.getElementById('search_option').getElementsByTagName('svg')[0].classList.replace('text-gray-400', 'text-white'); 117 | 118 | document.getElementById('upload_option').classList.replace('bg-blue-500', 'bg-white'); 119 | document.getElementById('upload_option').classList.replace('text-white', 'text-gray-900'); 120 | document.getElementById('upload_option').classList.replace('hover:bg-blue-600', 'hover:bg-gray-50'); 121 | document.getElementById('upload_option').getElementsByTagName('svg')[0].classList.replace('text-white', 'text-gray-400'); 122 | }); 123 | 124 | 125 | // Show progress bar on live navigation and form submits 126 | topbar.config({ barColors: { 0: "#29d" }, shadowColor: "rgba(0, 0, 0, .3)" }); 127 | window.addEventListener("phx:page-loading-start", (_info) => topbar.show(300)); 128 | window.addEventListener("phx:page-loading-stop", (_info) => topbar.hide()); 129 | 130 | // connect if there are any LiveViews on the page 131 | liveSocket.connect(); 132 | 133 | // expose liveSocket on window for web console debug logs and latency simulation: 134 | // >> liveSocket.enableDebug() 135 | // >> liveSocket.enableLatencySim(1000) // enabled for duration of browser session 136 | // >> liveSocket.disableLatencySim() 137 | window.liveSocket = liveSocket; 138 | -------------------------------------------------------------------------------- /assets/js/micro.js: -------------------------------------------------------------------------------- 1 | import toWav from "audiobuffer-to-wav"; 2 | 3 | export default { 4 | mounted() { 5 | let mediaRecorder, 6 | audioChunks = []; 7 | 8 | // Defining the elements and styles to be used during recording 9 | // and shown on the HTML. 10 | const recordButton = document.getElementById("record"), 11 | audioElement = document.getElementById("audio"), 12 | text = document.getElementById("text"), 13 | blue = ["bg-blue-500", "hover:bg-blue-700"], 14 | pulseGreen = ["bg-green-500", "hover:bg-green-700", "animate-pulse"]; 15 | 16 | _this = this; 17 | 18 | // Adding event listener for "click" event 19 | recordButton.addEventListener("click", () => { 20 | // Check if it's recording. 21 | // If it is, we stop the record and update the elements. 22 | if (mediaRecorder && mediaRecorder.state === "recording") { 23 | mediaRecorder.stop(); 24 | // audioChunks.getAudioTracks()[0].stop(); 25 | text.textContent = "Record"; 26 | } 27 | 28 | // Otherwise, it means the user wants to start recording. 29 | else { 30 | navigator.mediaDevices.getUserMedia({ audio: true }).then((stream) => { 31 | // Instantiate MediaRecorder 32 | mediaRecorder = new MediaRecorder(stream); 33 | mediaRecorder.start() 34 | 35 | // And update the elements 36 | recordButton.classList.remove(...blue); 37 | recordButton.classList.add(...pulseGreen); 38 | text.textContent = "Stop"; 39 | 40 | // Add "dataavailable" event handler 41 | mediaRecorder.addEventListener("dataavailable", (event) => { 42 | event.data.size > 0 && audioChunks.push(event.data); 43 | }); 44 | 45 | // Add "stop" event handler for when the recording stops. 46 | mediaRecorder.addEventListener("stop", async () => { 47 | const audioBlob = new Blob(audioChunks); 48 | 49 | // update the source of the Audio tag for the user to listen to his audio 50 | audioElement.src = URL.createObjectURL(audioBlob); 51 | 52 | // create an AudioContext with a sampleRate of 16000 53 | const audioContext = new AudioContext({ sampleRate: 16000 }); 54 | 55 | // We optimize the audio to reduce the size of the file whilst maintaining the necessary information for the model ----------- 56 | // async read the Blob as ArrayBuffer to feed the "decodeAudioData" 57 | const arrayBuffer = await audioBlob.arrayBuffer(); 58 | // decodes the ArrayBuffer into the AudioContext format 59 | const audioBuffer = await audioContext.decodeAudioData(arrayBuffer); 60 | // converts the AudioBuffer into a WAV format 61 | const wavBuffer = toWav(audioBuffer); 62 | // builds a Blob to pass to the Phoenix.JS.upload 63 | const wavBlob = new Blob([wavBuffer], { type: "audio/wav" }); 64 | 65 | 66 | // upload to the server via a chanel with the built-in Phoenix.JS.upload 67 | _this.upload("speech", [wavBlob]); 68 | // close the MediaRecorder instance 69 | mediaRecorder.stop(); 70 | 71 | // cleanups 72 | audioChunks = []; 73 | recordButton.classList.remove(...pulseGreen); 74 | recordButton.classList.add(...blue); 75 | }); 76 | }); 77 | } 78 | }); 79 | }, 80 | }; 81 | 82 | -------------------------------------------------------------------------------- /assets/package-lock.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "assets", 3 | "lockfileVersion": 3, 4 | "requires": true, 5 | "packages": { 6 | "": { 7 | "dependencies": { 8 | "audiobuffer-to-wav": "^1.0.0", 9 | "toastify-js": "^1.12.0" 10 | } 11 | }, 12 | "node_modules/audiobuffer-to-wav": { 13 | "version": "1.0.0", 14 | "resolved": "https://registry.npmjs.org/audiobuffer-to-wav/-/audiobuffer-to-wav-1.0.0.tgz", 15 | "integrity": "sha512-CAoir4NRrAzAgYo20tEMiKZR84coE8bq/L+H2kwAaULVY4+0xySsEVtNT5raqpzmH6y0pqzY6EmoViLd9W8F/w==" 16 | }, 17 | "node_modules/toastify-js": { 18 | "version": "1.12.0", 19 | "resolved": "https://registry.npmjs.org/toastify-js/-/toastify-js-1.12.0.tgz", 20 | "integrity": "sha512-HeMHCO9yLPvP9k0apGSdPUWrUbLnxUKNFzgUoZp1PHCLploIX/4DSQ7V8H25ef+h4iO9n0he7ImfcndnN6nDrQ==" 21 | } 22 | } 23 | } 24 | -------------------------------------------------------------------------------- /assets/package.json: -------------------------------------------------------------------------------- 1 | { 2 | "dependencies": { 3 | "audiobuffer-to-wav": "^1.0.0", 4 | "toastify-js": "^1.12.0" 5 | } 6 | } 7 | -------------------------------------------------------------------------------- /assets/pnpm-lock.yaml: -------------------------------------------------------------------------------- 1 | lockfileVersion: '6.0' 2 | 3 | settings: 4 | autoInstallPeers: true 5 | excludeLinksFromLockfile: false 6 | 7 | dependencies: 8 | audiobuffer-to-wav: 9 | specifier: ^1.0.0 10 | version: 1.0.0 11 | toastify-js: 12 | specifier: ^1.12.0 13 | version: 1.12.0 14 | 15 | packages: 16 | 17 | /audiobuffer-to-wav@1.0.0: 18 | resolution: {integrity: sha512-CAoir4NRrAzAgYo20tEMiKZR84coE8bq/L+H2kwAaULVY4+0xySsEVtNT5raqpzmH6y0pqzY6EmoViLd9W8F/w==} 19 | dev: false 20 | 21 | /toastify-js@1.12.0: 22 | resolution: {integrity: sha512-HeMHCO9yLPvP9k0apGSdPUWrUbLnxUKNFzgUoZp1PHCLploIX/4DSQ7V8H25ef+h4iO9n0he7ImfcndnN6nDrQ==} 23 | dev: false 24 | -------------------------------------------------------------------------------- /assets/tailwind.config.js: -------------------------------------------------------------------------------- 1 | // See the Tailwind configuration guide for advanced usage 2 | // https://tailwindcss.com/docs/configuration 3 | 4 | const plugin = require("tailwindcss/plugin") 5 | 6 | module.exports = { 7 | content: [ 8 | "./js/**/*.js", 9 | "../lib/*_web.ex", 10 | "../lib/*_web/**/*.*ex" 11 | ], 12 | theme: { 13 | extend: { 14 | colors: { 15 | brand: "#FD4F00", 16 | } 17 | }, 18 | }, 19 | plugins: [ 20 | require("@tailwindcss/forms"), 21 | plugin(({addVariant}) => addVariant("phx-no-feedback", [".phx-no-feedback&", ".phx-no-feedback &"])), 22 | plugin(({addVariant}) => addVariant("phx-click-loading", [".phx-click-loading&", ".phx-click-loading &"])), 23 | plugin(({addVariant}) => addVariant("phx-submit-loading", [".phx-submit-loading&", ".phx-submit-loading &"])), 24 | plugin(({addVariant}) => addVariant("phx-change-loading", [".phx-change-loading&", ".phx-change-loading &"])) 25 | ] 26 | } 27 | -------------------------------------------------------------------------------- /assets/vendor/topbar.js: -------------------------------------------------------------------------------- 1 | /** 2 | * @license MIT 3 | * topbar 2.0.0, 2023-02-04 4 | * https://buunguyen.github.io/topbar 5 | * Copyright (c) 2021 Buu Nguyen 6 | */ 7 | (function (window, document) { 8 | "use strict"; 9 | 10 | // https://gist.github.com/paulirish/1579671 11 | (function () { 12 | var lastTime = 0; 13 | var vendors = ["ms", "moz", "webkit", "o"]; 14 | for (var x = 0; x < vendors.length && !window.requestAnimationFrame; ++x) { 15 | window.requestAnimationFrame = 16 | window[vendors[x] + "RequestAnimationFrame"]; 17 | window.cancelAnimationFrame = 18 | window[vendors[x] + "CancelAnimationFrame"] || 19 | window[vendors[x] + "CancelRequestAnimationFrame"]; 20 | } 21 | if (!window.requestAnimationFrame) 22 | window.requestAnimationFrame = function (callback, element) { 23 | var currTime = new Date().getTime(); 24 | var timeToCall = Math.max(0, 16 - (currTime - lastTime)); 25 | var id = window.setTimeout(function () { 26 | callback(currTime + timeToCall); 27 | }, timeToCall); 28 | lastTime = currTime + timeToCall; 29 | return id; 30 | }; 31 | if (!window.cancelAnimationFrame) 32 | window.cancelAnimationFrame = function (id) { 33 | clearTimeout(id); 34 | }; 35 | })(); 36 | 37 | var canvas, 38 | currentProgress, 39 | showing, 40 | progressTimerId = null, 41 | fadeTimerId = null, 42 | delayTimerId = null, 43 | addEvent = function (elem, type, handler) { 44 | if (elem.addEventListener) elem.addEventListener(type, handler, false); 45 | else if (elem.attachEvent) elem.attachEvent("on" + type, handler); 46 | else elem["on" + type] = handler; 47 | }, 48 | options = { 49 | autoRun: true, 50 | barThickness: 3, 51 | barColors: { 52 | 0: "rgba(26, 188, 156, .9)", 53 | ".25": "rgba(52, 152, 219, .9)", 54 | ".50": "rgba(241, 196, 15, .9)", 55 | ".75": "rgba(230, 126, 34, .9)", 56 | "1.0": "rgba(211, 84, 0, .9)", 57 | }, 58 | shadowBlur: 10, 59 | shadowColor: "rgba(0, 0, 0, .6)", 60 | className: null, 61 | }, 62 | repaint = function () { 63 | canvas.width = window.innerWidth; 64 | canvas.height = options.barThickness * 5; // need space for shadow 65 | 66 | var ctx = canvas.getContext("2d"); 67 | ctx.shadowBlur = options.shadowBlur; 68 | ctx.shadowColor = options.shadowColor; 69 | 70 | var lineGradient = ctx.createLinearGradient(0, 0, canvas.width, 0); 71 | for (var stop in options.barColors) 72 | lineGradient.addColorStop(stop, options.barColors[stop]); 73 | ctx.lineWidth = options.barThickness; 74 | ctx.beginPath(); 75 | ctx.moveTo(0, options.barThickness / 2); 76 | ctx.lineTo( 77 | Math.ceil(currentProgress * canvas.width), 78 | options.barThickness / 2 79 | ); 80 | ctx.strokeStyle = lineGradient; 81 | ctx.stroke(); 82 | }, 83 | createCanvas = function () { 84 | canvas = document.createElement("canvas"); 85 | var style = canvas.style; 86 | style.position = "fixed"; 87 | style.top = style.left = style.right = style.margin = style.padding = 0; 88 | style.zIndex = 100001; 89 | style.display = "none"; 90 | if (options.className) canvas.classList.add(options.className); 91 | document.body.appendChild(canvas); 92 | addEvent(window, "resize", repaint); 93 | }, 94 | topbar = { 95 | config: function (opts) { 96 | for (var key in opts) 97 | if (options.hasOwnProperty(key)) options[key] = opts[key]; 98 | }, 99 | show: function (delay) { 100 | if (showing) return; 101 | if (delay) { 102 | if (delayTimerId) return; 103 | delayTimerId = setTimeout(() => topbar.show(), delay); 104 | } else { 105 | showing = true; 106 | if (fadeTimerId !== null) window.cancelAnimationFrame(fadeTimerId); 107 | if (!canvas) createCanvas(); 108 | canvas.style.opacity = 1; 109 | canvas.style.display = "block"; 110 | topbar.progress(0); 111 | if (options.autoRun) { 112 | (function loop() { 113 | progressTimerId = window.requestAnimationFrame(loop); 114 | topbar.progress( 115 | "+" + 0.05 * Math.pow(1 - Math.sqrt(currentProgress), 2) 116 | ); 117 | })(); 118 | } 119 | } 120 | }, 121 | progress: function (to) { 122 | if (typeof to === "undefined") return currentProgress; 123 | if (typeof to === "string") { 124 | to = 125 | (to.indexOf("+") >= 0 || to.indexOf("-") >= 0 126 | ? currentProgress 127 | : 0) + parseFloat(to); 128 | } 129 | currentProgress = to > 1 ? 1 : to; 130 | repaint(); 131 | return currentProgress; 132 | }, 133 | hide: function () { 134 | clearTimeout(delayTimerId); 135 | delayTimerId = null; 136 | if (!showing) return; 137 | showing = false; 138 | if (progressTimerId != null) { 139 | window.cancelAnimationFrame(progressTimerId); 140 | progressTimerId = null; 141 | } 142 | (function loop() { 143 | if (topbar.progress("+.1") >= 1) { 144 | canvas.style.opacity -= 0.05; 145 | if (canvas.style.opacity <= 0.05) { 146 | canvas.style.display = "none"; 147 | fadeTimerId = null; 148 | return; 149 | } 150 | } 151 | fadeTimerId = window.requestAnimationFrame(loop); 152 | })(); 153 | }, 154 | }; 155 | 156 | if (typeof module === "object" && typeof module.exports === "object") { 157 | module.exports = topbar; 158 | } else if (typeof define === "function" && define.amd) { 159 | define(function () { 160 | return topbar; 161 | }); 162 | } else { 163 | this.topbar = topbar; 164 | } 165 | }.call(this, window, document)); 166 | -------------------------------------------------------------------------------- /config/config.exs: -------------------------------------------------------------------------------- 1 | # This file is responsible for configuring your application 2 | # and its dependencies with the aid of the Config module. 3 | # 4 | # This configuration file is loaded before any dependency and 5 | # is restricted to this project. 6 | 7 | # General application configuration 8 | import Config 9 | 10 | # DB configuration 11 | config :app, 12 | ecto_repos: [App.Repo], 13 | generators: [timestamp_type: :utc_datetime] 14 | 15 | # Tells `NX` to use `EXLA` as backend 16 | # config :nx, default_backend: EXLA.Backend 17 | # needed to run on `Fly.io` 18 | config :nx, :default_backend, {EXLA.Backend, client: :host} 19 | 20 | # Configures the endpoint 21 | config :app, AppWeb.Endpoint, 22 | url: [host: "localhost"], 23 | render_errors: [ 24 | formats: [html: AppWeb.ErrorHTML, json: AppWeb.ErrorJSON], 25 | layout: false 26 | ], 27 | pubsub_server: App.PubSub, 28 | live_view: [signing_salt: "euyclMQ2"] 29 | 30 | # Configure esbuild (the version is required) 31 | config :esbuild, 32 | version: "0.18.6", 33 | default: [ 34 | args: 35 | ~w(js/app.js --bundle --target=es2017 --outdir=../priv/static/assets --external:/fonts/* --external:/images/*), 36 | cd: Path.expand("../assets", __DIR__), 37 | env: %{"NODE_PATH" => Path.expand("../deps", __DIR__)} 38 | ] 39 | 40 | # Configure tailwind (the version is required) 41 | config :tailwind, 42 | version: "3.2.4", 43 | default: [ 44 | args: ~w( 45 | --config=tailwind.config.js 46 | --input=css/app.css 47 | --output=../priv/static/assets/app.css 48 | ), 49 | cd: Path.expand("../assets", __DIR__) 50 | ] 51 | 52 | # Configures Elixir's Logger 53 | config :logger, :console, 54 | format: "$time $metadata[$level] $message\n", 55 | metadata: [:request_id] 56 | 57 | # Use Jason for JSON parsing in Phoenix 58 | config :phoenix, :json_library, Jason 59 | 60 | # Import environment specific config. This must remain at the bottom 61 | # of this file so it overrides the configuration defined above. 62 | import_config "#{config_env()}.exs" 63 | 64 | # When deploying to `fly.io`, you can delete this or leave it in. 65 | # It only makes sense to set it to `true` if you're changing models 66 | # in deployment. 67 | # 68 | # So, you run `fly deploy` with this set to `true`. 69 | # After deploying, you set it to `false` and deploy it again, 70 | # so the application doesn't download the model again on every restart. 71 | config :app, 72 | models_cache_dir: ".bumblebee" 73 | -------------------------------------------------------------------------------- /config/dev.exs: -------------------------------------------------------------------------------- 1 | import Config 2 | 3 | # Configure your database 4 | config :app, App.Repo, 5 | username: "postgres", 6 | password: "postgres", 7 | hostname: "localhost", 8 | database: "app_dev", 9 | stacktrace: true, 10 | show_sensitive_data_on_connection_error: true, 11 | pool_size: 10 12 | 13 | # For development, we disable any cache and enable 14 | # debugging and code reloading. 15 | # 16 | # The watchers configuration can be used to run external 17 | # watchers to your application. For example, we use it 18 | # with esbuild to bundle .js and .css sources. 19 | config :app, AppWeb.Endpoint, 20 | # Binding to loopback ipv4 address prevents access from other machines. 21 | # Change to `ip: {0, 0, 0, 0}` to allow access from other machines. 22 | http: [ip: {127, 0, 0, 1}, port: 4000], 23 | check_origin: false, 24 | code_reloader: true, 25 | debug_errors: true, 26 | secret_key_base: "btTsEy6WVagm+4u+ZrbwVg6F48ZfgpePZx70twE9SSyPZKkZHiaYa77bFUWV4Vw5", 27 | watchers: [ 28 | esbuild: {Esbuild, :install_and_run, [:default, ~w(--sourcemap=inline --watch)]}, 29 | tailwind: {Tailwind, :install_and_run, [:default, ~w(--watch)]} 30 | ] 31 | 32 | # ## SSL Support 33 | # 34 | # In order to use HTTPS in development, a self-signed 35 | # certificate can be generated by running the following 36 | # Mix task: 37 | # 38 | # mix phx.gen.cert 39 | # 40 | # Run `mix help phx.gen.cert` for more information. 41 | # 42 | # The `http:` config above can be replaced with: 43 | # 44 | # https: [ 45 | # port: 4001, 46 | # cipher_suite: :strong, 47 | # keyfile: "priv/cert/selfsigned_key.pem", 48 | # certfile: "priv/cert/selfsigned.pem" 49 | # ], 50 | # 51 | # If desired, both `http:` and `https:` keys can be 52 | # configured to run both http and https servers on 53 | # different ports. 54 | 55 | # Watch static and templates for browser reloading. 56 | config :app, AppWeb.Endpoint, 57 | live_reload: [ 58 | patterns: [ 59 | ~r"priv/static/.*(js|css|png|jpeg|jpg|gif|svg)$", 60 | ~r"lib/app_web/(controllers|live|components)/.*(ex|heex)$" 61 | ] 62 | ] 63 | 64 | # Enable dev routes for dashboard and mailbox 65 | config :app, dev_routes: true 66 | 67 | # Do not include metadata nor timestamps in development logs 68 | config :logger, :console, format: "[$level] $message\n" 69 | 70 | # Set a higher stacktrace during development. Avoid configuring such 71 | # in production as building large stacktraces may be expensive. 72 | config :phoenix, :stacktrace_depth, 20 73 | 74 | # Initialize plugs at runtime for faster development compilation 75 | config :phoenix, :plug_init_mode, :runtime 76 | -------------------------------------------------------------------------------- /config/prod.exs: -------------------------------------------------------------------------------- 1 | import Config 2 | 3 | # For production, don't forget to configure the url host 4 | # to something meaningful, Phoenix uses this information 5 | # when generating URLs. 6 | 7 | # Note we also include the path to a cache manifest 8 | # containing the digested version of static files. This 9 | # manifest is generated by the `mix phx.digest` task, 10 | # which you should run after static files are built and 11 | # before starting your production server. 12 | config :app, AppWeb.Endpoint, cache_static_manifest: "priv/static/cache_manifest.json" 13 | 14 | # Do not print debug messages in production 15 | config :logger, level: :info 16 | 17 | # Runtime production configuration, including reading 18 | # of environment variables, is done on config/runtime.exs. 19 | -------------------------------------------------------------------------------- /config/runtime.exs: -------------------------------------------------------------------------------- 1 | import Config 2 | 3 | # config/runtime.exs is executed for all environments, including 4 | # during releases. It is executed after compilation and before the 5 | # system starts, so it is typically used to load production configuration 6 | # and secrets from environment variables or elsewhere. Do not define 7 | # any compile-time configuration in here, as it won't be applied. 8 | # The block below contains prod specific runtime configuration. 9 | 10 | # ## Using releases 11 | # 12 | # If you use `mix release`, you need to explicitly enable the server 13 | # by passing the PHX_SERVER=true when you start it: 14 | # 15 | # PHX_SERVER=true bin/app start 16 | # 17 | # Alternatively, you can use `mix phx.gen.release` to generate a `bin/server` 18 | # script that automatically sets the env var above. 19 | if System.get_env("PHX_SERVER") do 20 | config :app, AppWeb.Endpoint, server: true 21 | end 22 | 23 | if config_env() == :prod do 24 | database_url = 25 | System.get_env("DATABASE_URL") || 26 | raise """ 27 | environment variable DATABASE_URL is missing. 28 | For example: ecto://USER:PASS@HOST/DATABASE 29 | """ 30 | 31 | maybe_ipv6 = if System.get_env("ECTO_IPV6") in ~w(true 1), do: [:inet6], else: [] 32 | 33 | config :app, App.Repo, 34 | # ssl: true, 35 | url: database_url, 36 | pool_size: String.to_integer(System.get_env("POOL_SIZE") || "10"), 37 | socket_options: maybe_ipv6 38 | 39 | # The secret key base is used to sign/encrypt cookies and other secrets. 40 | # A default value is used in config/dev.exs and config/test.exs but you 41 | # want to use a different value for prod and you most likely don't want 42 | # to check this value into version control, so we use an environment 43 | # variable instead. 44 | secret_key_base = 45 | System.get_env("SECRET_KEY_BASE") || 46 | raise """ 47 | environment variable SECRET_KEY_BASE is missing. 48 | You can generate one by calling: mix phx.gen.secret 49 | """ 50 | 51 | host = System.get_env("PHX_HOST") || "example.com" 52 | port = String.to_integer(System.get_env("PORT") || "4000") 53 | 54 | config :app, :dns_cluster_query, System.get_env("DNS_CLUSTER_QUERY") 55 | 56 | config :app, AppWeb.Endpoint, 57 | url: [host: host, port: 443, scheme: "https"], 58 | http: [ 59 | # Enable IPv6 and bind on all interfaces. 60 | # Set it to {0, 0, 0, 0, 0, 0, 0, 1} for local network only access. 61 | # See the documentation on https://hexdocs.pm/plug_cowboy/Plug.Cowboy.html 62 | # for details about using IPv6 vs IPv4 and loopback vs public addresses. 63 | ip: {0, 0, 0, 0, 0, 0, 0, 0}, 64 | port: port 65 | ], 66 | secret_key_base: secret_key_base 67 | 68 | # ## SSL Support 69 | # 70 | # To get SSL working, you will need to add the `https` key 71 | # to your endpoint configuration: 72 | # 73 | # config :app, AppWeb.Endpoint, 74 | # https: [ 75 | # ..., 76 | # port: 443, 77 | # cipher_suite: :strong, 78 | # keyfile: System.get_env("SOME_APP_SSL_KEY_PATH"), 79 | # certfile: System.get_env("SOME_APP_SSL_CERT_PATH") 80 | # ] 81 | # 82 | # The `cipher_suite` is set to `:strong` to support only the 83 | # latest and more secure SSL ciphers. This means old browsers 84 | # and clients may not be supported. You can set it to 85 | # `:compatible` for wider support. 86 | # 87 | # `:keyfile` and `:certfile` expect an absolute path to the key 88 | # and cert in disk or a relative path inside priv, for example 89 | # "priv/ssl/server.key". For all supported SSL configuration 90 | # options, see https://hexdocs.pm/plug/Plug.SSL.html#configure/1 91 | # 92 | # We also recommend setting `force_ssl` in your endpoint, ensuring 93 | # no data is ever sent via http, always redirecting to https: 94 | # 95 | # config :app, AppWeb.Endpoint, 96 | # force_ssl: [hsts: true] 97 | # 98 | # Check `Plug.SSL` for all available options in `force_ssl`. 99 | end 100 | -------------------------------------------------------------------------------- /config/test.exs: -------------------------------------------------------------------------------- 1 | import Config 2 | 3 | # Configure your database 4 | # 5 | # The MIX_TEST_PARTITION environment variable can be used 6 | # to provide built-in test partitioning in CI environment. 7 | # Run `mix help test` for more information. 8 | config :app, App.Repo, 9 | username: "postgres", 10 | password: "postgres", 11 | hostname: "localhost", 12 | database: "app_test#{System.get_env("MIX_TEST_PARTITION")}", 13 | pool: Ecto.Adapters.SQL.Sandbox, 14 | pool_size: 10 15 | 16 | # We don't run a server during test. If one is required, 17 | # you can enable the server option below. 18 | config :app, AppWeb.Endpoint, 19 | http: [ip: {127, 0, 0, 1}, port: 4002], 20 | secret_key_base: "d422JqbVTXef5vPy90SakC4QcPN76fRi6wLm+pUnC09eFxWUjPbTKe0dVmpGpI5N", 21 | server: false 22 | 23 | # Print only warnings and errors during test 24 | config :logger, level: :warning 25 | 26 | # Initialize plugs at runtime for faster test compilation 27 | config :phoenix, :plug_init_mode, :runtime 28 | 29 | # App configuration 30 | config :app, 31 | start_genserver: false, 32 | knnindex_indices_test: true, 33 | use_test_models: true 34 | -------------------------------------------------------------------------------- /coveralls.json: -------------------------------------------------------------------------------- 1 | { 2 | "skip_files": [ 3 | "test/", 4 | "lib/app.ex", 5 | "lib/app/application.ex", 6 | "lib/app/models.ex", 7 | "lib/app_web.ex", 8 | "lib/app/repo.ex", 9 | "lib/app/release.ex", 10 | "lib/app_web/views/app_view.ex", 11 | "lib/app_web/views/init_view.ex", 12 | "lib/app_web/views/layout_view.ex", 13 | "lib/app_web/views/error_helpers.ex", 14 | "lib/app_web/components", 15 | "lib/app_web/endpoint.ex", 16 | "lib/app_web/telemetry.ex" 17 | ] 18 | } -------------------------------------------------------------------------------- /fly.toml: -------------------------------------------------------------------------------- 1 | # fly.toml app configuration file generated for imgai on 2024-03-11T18:20:28Z 2 | # 3 | # See https://fly.io/docs/reference/configuration/ for information about how to use this file. 4 | # 5 | 6 | app = 'imgai' 7 | primary_region = 'mad' 8 | kill_signal = 'SIGTERM' 9 | swap_size_mb = 512 10 | 11 | [build] 12 | 13 | [deploy] 14 | release_command = '/app/bin/migrate' 15 | 16 | [env] 17 | PHX_HOST = 'imgai.fly.dev' 18 | PORT = '8080' 19 | 20 | [[mounts]] 21 | source = 'models' 22 | destination = '/app/bin/.bumblebee' 23 | 24 | [http_service] 25 | internal_port = 8080 26 | force_https = true 27 | auto_stop_machines = true 28 | auto_start_machines = true 29 | min_machines_running = 0 30 | processes = ['app'] 31 | 32 | [http_service.concurrency] 33 | type = 'connections' 34 | hard_limit = 1000 35 | soft_limit = 1000 36 | 37 | [[vm]] 38 | size = 'performance-4x' 39 | -------------------------------------------------------------------------------- /hnswlib/README.md: -------------------------------------------------------------------------------- 1 | # Notes on HNSWLib binding for Elixir 2 | 3 | ## Semantic search example on how to use `HNSWLib` 4 | 5 | The code below can be run in an IEX session 6 | or in a Livebook. 7 | 8 | ```elixir 9 | elixir hnswlib.exs 10 | ``` 11 | 12 | We use the small model [sentence-transformers/paraphrase-MiniLM-L6-v2](https://huggingface.co/sentence-transformers/paraphrase-MiniLM-L6-v2) 13 | from `Hugging Face` to compute embeddings from text 14 | and run a [semantic search](https://sbert.net/examples/applications/semantic-search/README.html). 15 | 16 | This model is a vector of dimension [384](https://huggingface.co/sentence-transformers/paraphrase-MiniLM-L6-v2/blob/main/config.json). 17 | 18 | ### Dependencies 19 | 20 | ```elixir 21 | Mix.install([ 22 | {:bumblebee, "~> 0.5.0"}, 23 | {:exla, "~> 0.7.0"}, 24 | {:nx, "~> 0.7.0 "}, 25 | {:hnswlib, "~> 0.1.5"}, 26 | ]) 27 | 28 | Nx.global_default_backend(EXLA.Backend) 29 | ``` 30 | 31 | ### Instantiate the `hnswlib` index 32 | 33 | #### Metric 34 | 35 | You need to endow the vector space with one of the following metrics by setting the `space` argument from the list: 36 | 37 | `[:l2, :ip, :cosine]` 38 | 39 | > the first is the standard Euclidean metric, the second the inner product, and the third the pseudo-metric "cosine similarity". 40 | 41 | We set the `:dimension` to **384**. 42 | We firstly use the `:l2` norm to build the `hnswlib` index: 43 | 44 | ```elixir 45 | {:ok, index} = 46 | HNSWLib.Index.new( 47 | _space = :l2, 48 | ^^^ 49 | _dim = 384, 50 | _max_elements = 200 51 | ) 52 | ``` 53 | 54 | ### `Nx.Serving` 55 | 56 | We build the `Nx.serving` for our model: 57 | it downloads the model file from the Hugging Face. 58 | 59 | ```elixir 60 | transformer = "sentence-transformers/paraphrase-MiniLM-L6-v2" 61 | 62 | {:ok, %{model: _, params: _} = model_info} = 63 | Bumblebee.load_model({:hf, transformer}) 64 | 65 | {:ok, tokenizer} = 66 | Bumblebee.load_tokenizer({:hf, transformer}) 67 | 68 | serving = 69 | Bumblebee.Text.TextEmbedding.text_embedding( 70 | model_info, 71 | tokenizer, 72 | defn_options: [compiler: EXLA] 73 | ) 74 | ``` 75 | 76 | ### Compute embeddings and add to the index 77 | 78 | We check that our index is instantiated and empty: 79 | 80 | ```elixir 81 | HNSWLib.Index.get_current_count(index) 82 | #{:ok, 0} 83 | ``` 84 | 85 | We compute our first embedding for the word "short": 86 | 87 | ```elixir 88 | input = "short" 89 | # you compute the embedding 90 | %{embedding: data} = 91 | Nx.Serving.run(serving, input) 92 | ``` 93 | 94 | You get: 95 | 96 | ```elixir 97 | %{ 98 | embedding: #Nx.Tensor< 99 | f32[384] 100 | [-0.013410531915724277, 0.07099384069442749, -0.013070221990346909,...] 101 | } 102 | ``` 103 | 104 | You then append the embedding to your Index: 105 | 106 | ```elixir 107 | :ok = HNSWLib.Index.add_items(index, data) 108 | 109 | HNSWLib.Index.save_index(index, "my_index.bin") 110 | #{:ok, 1} 111 | ``` 112 | 113 | You should see a file `"my_index.bin"` is your current directory. 114 | 115 | When you append an entry one by one, you can get the final indice of the Index with: 116 | 117 | ```elixir 118 | HNSWLib.Index.get_current_count(index) 119 | ``` 120 | 121 | This means you can persist the index to uniquely identify an item. 122 | 123 | > You can also enter a batch of items. You will only get back the last indice. This means that you may need to persist the embedding if you want to identify the input in this case. 124 | 125 | Let's enter another entry: 126 | 127 | ```elixir 128 | input = "tall" 129 | # you get an embedding 130 | %{embedding: data} = 131 | Nx.Serving.run(serving, input) 132 | 133 | # you build your Index struct 134 | :ok = HNSWLib.Index.add_items(index, data) 135 | 136 | HNSWLib.Index.save_index(index, "my_index.bin") 137 | 138 | HNSWLib.Index.get_current_count(index) 139 | #{:ok, 2} 140 | ``` 141 | 142 | ### KNN search 143 | 144 | You now run a `knn_query`from a text input - converted into an embedding - to look for the closest element present in the Index. 145 | 146 | Let's find the closest item in the Index to the input "small". 147 | We expect to get "short", the first item. 148 | 149 | ```elixir 150 | input = "small" 151 | # you normalise your query data 152 | %{embedding: query_data} = 153 | Nx.Serving.run(serving, input) 154 | 155 | {:ok, labels, _d} = 156 | HNSWLib.Index.knn_query( 157 | index, 158 | query_data, 159 | k: 1 160 | ) 161 | ``` 162 | 163 | You should get: 164 | 165 | ```elixir 166 | {:ok, 167 | #Nx.Tensor< 168 | u64[1][1] 169 | EXLA.Backend 170 | [ 171 | [0] 172 | ] 173 | >, 174 | #Nx.Tensor< 175 | f32[1][1] 176 | EXLA.Backend 177 | [ 178 | [0.2972676455974579] 179 | ] 180 | >} 181 | ``` 182 | 183 | This means that the nearest neighbour of the given input has the indice "0" in the Index. 184 | This corresponds to the **first** entry "short". 185 | 186 | We can recover the embedding to compare: 187 | 188 | ```elixir 189 | {:ok, data} = 190 | HNSWLib.Index.get_items( 191 | index, 192 | Nx.to_flat_list(labels[0]) 193 | ) 194 | 195 | hd(data) |> Nx.from_binary(:f32) |> Nx.stack() 196 | ``` 197 | 198 | The result is: 199 | 200 | ```elixir 201 | ##Nx.Tensor< 202 | f32[1][384] 203 | EXLA.Backend 204 | [ 205 | [-0.013410531915724277, 0.07099384069442749, -0.013070221990346909,...] 206 | ] 207 | ``` 208 | 209 | As expected, we recovered the first embedding. 210 | 211 | ### Change the norm 212 | 213 | The model has been trained with the norm `:cosine`. We will use it. 214 | 215 | ```elixir 216 | {:ok, index} = 217 | HNSWLib.Index.new( 218 | _space = :cosine, 219 | ^^^ 220 | _dim = 384, 221 | _max_elements = 200 222 | ) 223 | ``` 224 | 225 | We get the embedding: 226 | 227 | ```elixir 228 | #Nx.Tensor< 229 | f32[384] 230 | [-0.013410531915724277, 0.07099384069442749, -0.013070221990346909,...] 231 | 232 | ``` 233 | 234 | When we run the knn search, we find again the same "nearest neighbour" with of course a different distance. 235 | 236 | ```elixir 237 | #Nx.Tensor< 238 | u64[1][1] 239 | EXLA.Backend 240 | [ 241 | [0] 242 | ] 243 | >, 244 | #Nx.Tensor< 245 | f32[1][1] 246 | EXLA.Backend 247 | [ 248 | [0.06562089920043945] 249 | ] 250 | >} 251 | ``` 252 | 253 | The recovered embedding is however different: 254 | 255 | ```elixir 256 | #Nx.Tensor< 257 | f32[1][384] 258 | EXLA.Backend 259 | [ 260 | [-0.008871854282915592, 0.04696659371256828, -0.00864671915769577,...] 261 | ``` 262 | 263 | The reason is that the "sentence-transformer" model uses differents settings than `Bumblebee`default settings: 264 | 265 | ```py 266 | SentenceTransformer( 267 | (0): Transformer({'max_seq_length': 128, 'do_lower_case': False}) with Transformer model: BertModel 268 | (1): Pooling({'word_embedding_dimension': 384, 'pooling_mode_cls_token': False, 'pooling_mode_mean_tokens': True, 'pooling_mode_max_tokens': False, 'pooling_mode_mean_sqrt_len_tokens': False}) 269 | ) 270 | ``` 271 | 272 | It performs "mean_tokens_pooling" and "normalizes" the vectors [see here](https://www.sbert.net/docs/package_reference/models.html#sentence_transformers.models.Pooling) with `class sentence_transformers.models.Pooling` and `class sentence_transformers.models.Normalize`. 273 | 274 | [This blog post confirms this](https://samrat.me/blog/til-creating-sentence-transformers-embeddings-from-bumblebee). 275 | 276 | To recover the embedding, we can: 277 | 278 | - **normalize** the tensors with the transformation: 279 | 280 | ```elixir 281 | %{embedding: t_small} = 282 | Nx.Serving.run(serving, "short") 283 | 284 | n_small = 285 | Nx.divide(t_small, Nx.LinAlg.norm(t_small)) 286 | 287 | HNSWLib.Index.add_items(index, n_small) 288 | ``` 289 | 290 | - or just change the options to the [Bumblebee.Text.text_embedding/3](https://hexdocs.pm/bumblebee/0.4.0/Bumblebee.Text.html#text_embedding/3): 291 | 292 | ```elixir 293 | serving = 294 | Bumblebee.Text.TextEmbedding.text_embedding( 295 | model_info, 296 | tokenizer, 297 | defn_options: [compiler: EXLA], 298 | embedding_processor: :l2_norm, 299 | output_pool: :mean_pooling, 300 | output_attribute: :hidden_state 301 | ) 302 | ``` 303 | 304 | When we just normalize the vectors, we recover the exact same vector. 305 | When we change the `Bimblebee` settings, the recovered embedding is almost identical: 306 | 307 | ```elixir 308 | [-0.03144508972764015, 0.12630629539489746, 0.018703171983361244,...] 309 | ``` 310 | 311 | ## Notes on vector spaces 312 | 313 | A vector space of embeddings can be equipped with a (Euclidean) _inner product_. If $u=(u_1,\dots,u_n)$ and $v=(v_1,\dots,v_n)$ are two embeddings, the (euclidean) inner product is defined as: 314 | 315 | $< u,v >=u_1v_1+\cdots+u_nv_n$ 316 | 317 | This inner product induces an Euclidean _norm_: 318 | 319 | $||u|| = \sqrt{< u,u >} = \sqrt{u_1^2+\cdots+u_n^2}$ 320 | 321 | Let $u_v$ be the perpendicular projection of $u$ on $v$. Then: 322 | 323 | $< u, v > = < u_v,v > = ||u||\cdot ||v|| \cos\widehat{u,v}$ 324 | 325 | The value below is known as the _cosine similarity_. 326 | 327 | $<\frac{u}{||u||}\frac{v}{\||v||}> = \cos\widehat{u,v}$. 328 | 329 | You will remark that the norm of any embedding $\frac1{||u||}u$ is 1. We say that the embedding is $L_2$-normalised. 330 | 331 | The previous formula shows that the inner product of normalised (aka unit) embeddings is the `cosine` of the angle between these "normalised" embeddings. 332 | 333 | > Source: 334 | 335 | _Note that this is not a distance._ 336 | 337 | The norm in turn induces a _distance_: 338 | $d(u,v) = ||u-v||$ 339 | 340 | By definition, 341 | $||u-v||^2 = < u-v,u-v >$. 342 | 343 | By developing, we obtain: 344 | 345 | $||u-v||^2 = ||u||^2+||v||^2-2< u,v >$ 346 | 347 | Consider now **two normalised** vectors. We have: 348 | $\frac12||u-v||^2=1-\cos\widehat{u,v} = d_c(u,v)$ 349 | 350 | This is commonly known as the **cosine distance** _when the embeddings are normalised_. It ranges from 0 to 2. Note that it is not a true distance metric. 351 | 352 | Finally, note that since we are dealing with finite dimensional vector spaces, all the norms are equivalent (in some precise mathematical way). This means that the limit points are always the same. However, the values of the distances can be quite different, and a "clusterisation" process can give significantly different results. 353 | -------------------------------------------------------------------------------- /hnswlib/hnwslib.exs: -------------------------------------------------------------------------------- 1 | Mix.install([ 2 | {:bumblebee, "~> 0.5.0"}, 3 | {:exla, "~> 0.7.0"}, 4 | {:nx, "~> 0.7.0 "}, 5 | {:hnswlib, "~> 0.1.5"} 6 | ]) 7 | 8 | Nx.global_default_backend(EXLA.Backend) 9 | 10 | IO.puts "Loading the model..................................." 11 | transformer = "sentence-transformers/paraphrase-MiniLM-L6-v2" 12 | {:ok, %{model: _model, params: _params} = model_info} = 13 | Bumblebee.load_model({:hf, transformer}) 14 | 15 | {:ok, tokenizer} = Bumblebee.load_tokenizer({:hf, transformer}) 16 | serving = Bumblebee.Text.TextEmbedding.text_embedding( 17 | model_info, 18 | tokenizer, 19 | defn_options: [compiler: EXLA], 20 | embedding_processor: :l2_norm 21 | # output_pool: :mean_pooling, 22 | # output_attribute: :hidden_state, 23 | ) 24 | 25 | # keys = model_info.params |> Map.keys() 26 | # keys |> dbg() 27 | 28 | IO.puts "" 29 | IO.puts "======================================================" 30 | IO.puts "Norm: :l2" 31 | {:ok, index} = HNSWLib.Index.new(_space = :l2, _dim = 384, _max_elements = 200) 32 | IO.puts "" 33 | IO.puts "Compute embedding for: 'short' ----------------------" 34 | %{embedding: t_short} = Nx.Serving.run(serving, "short") |> dbg() 35 | HNSWLib.Index.add_items(index, t_short) 36 | HNSWLib.Index.get_current_count(index) |> dbg() 37 | 38 | IO.puts "Compute embedding for: 'tall' ----------------------" 39 | %{embedding: t_tall} = Nx.Serving.run(serving, "tall") 40 | HNSWLib.Index.add_items(index, t_tall) 41 | HNSWLib.Index.get_current_count(index) |> dbg() 42 | 43 | IO.puts "Compute embedding for: 'average' ----------------------" 44 | %{embedding: t_tall} = Nx.Serving.run(serving, "average") 45 | HNSWLib.Index.add_items(index, t_tall) 46 | HNSWLib.Index.get_current_count(index) |> dbg() 47 | 48 | IO.puts "Compute KNN search for: 'small' ----------------------" 49 | %{embedding: data} = Nx.Serving.run(serving, "small") 50 | {:ok, labels, distances} = HNSWLib.Index.knn_query(index, data, k: 1) 51 | idx = Nx.to_flat_list(labels[0]) 52 | d = Nx.to_flat_list(distances[0]) 53 | %{found_index: hd(idx)+1, distance_to_closeset: hd(d)} |> dbg() 54 | {:ok, dt} = HNSWLib.Index.get_items(index, idx) 55 | recovered = hd(dt) |> Nx.from_binary(:f32) |> Nx.stack() 56 | IO.puts "Check the recovered embedding at the found index is the embedding of 'short' " 57 | {recovered, t_short} |> dbg() 58 | 59 | 60 | IO.puts "" 61 | IO.puts "======================================================" 62 | IO.puts "Norm: :cosine" 63 | {:ok, index} = HNSWLib.Index.new(_space = :cosine, _dim = 384, _max_elements = 200) 64 | IO.puts "" 65 | IO.puts "No normalisation" 66 | IO.puts "" 67 | IO.puts "Compute embedding for: 'short' ----------------------" 68 | %{embedding: t_small} = Nx.Serving.run(serving, "short") |> dbg() 69 | HNSWLib.Index.add_items(index, t_small) 70 | HNSWLib.Index.get_current_count(index) |> dbg() 71 | 72 | IO.puts "Compute embedding for: 'tall' ----------------------" 73 | %{embedding: t_tall} = Nx.Serving.run(serving, "tall") 74 | HNSWLib.Index.add_items(index, t_tall) 75 | HNSWLib.Index.get_current_count(index) |> dbg() 76 | 77 | IO.puts "Compute embedding for: 'average' ----------------------" 78 | %{embedding: t_avg} = Nx.Serving.run(serving, "average") 79 | HNSWLib.Index.add_items(index, t_avg) 80 | HNSWLib.Index.get_current_count(index) |> dbg() 81 | 82 | IO.puts "KNN search for: 'small' ----------------------" 83 | %{embedding: data} = Nx.Serving.run(serving, "small") 84 | {:ok, labels, distances} = HNSWLib.Index.knn_query(index, data, k: 1) |> dbg() 85 | idx = Nx.to_flat_list(labels[0]) 86 | d = Nx.to_flat_list(distances[0]) 87 | %{found_index: hd(idx)+1, distance_to_closeset: hd(d)} |> dbg() 88 | {:ok, dt} = HNSWLib.Index.get_items(index, idx) 89 | recovered_from_index = hd(dt) |> Nx.from_binary(:f32) |> Nx.stack() 90 | IO.puts "Check the recovered embedding at the found index is the embedding of 'small' " 91 | {recovered_from_index, t_small} |> dbg() 92 | 93 | 94 | IO.puts "" 95 | IO.puts "======================================================" 96 | IO.puts "Norm: :cosine" 97 | {:ok, index} = HNSWLib.Index.new(_space = :cosine, _dim = 384, _max_elements = 200) 98 | IO.puts "" 99 | IO.puts "Normalize the tensors" 100 | IO.puts "" 101 | IO.puts "Compute embedding for: 'short' ----------------------" 102 | %{embedding: t_small} = Nx.Serving.run(serving, "short") 103 | n_small = Nx.divide(t_small, Nx.LinAlg.norm(t_small)) |>dbg() 104 | HNSWLib.Index.add_items(index, n_small) 105 | HNSWLib.Index.get_current_count(index) |> dbg() 106 | 107 | IO.puts "Compute embedding for: 'tall' ----------------------" 108 | %{embedding: t_tall} = Nx.Serving.run(serving, "tall") 109 | n_tall = Nx.divide(t_tall, Nx.LinAlg.norm(t_tall)) 110 | HNSWLib.Index.add_items(index, n_tall) 111 | HNSWLib.Index.get_current_count(index) |> dbg() 112 | 113 | IO.puts "Compute embedding for: 'average' ----------------------" 114 | %{embedding: t_tall} = Nx.Serving.run(serving, "average") 115 | n_avg = Nx.divide(t_tall, Nx.LinAlg.norm(t_tall)) 116 | HNSWLib.Index.add_items(index, n_avg) 117 | HNSWLib.Index.get_current_count(index) |> dbg() 118 | 119 | IO.puts "KNN search for: 'small' ----------------------" 120 | %{embedding: data} = Nx.Serving.run(serving, "small") 121 | n_data = Nx.divide(data, Nx.LinAlg.norm(data)) 122 | {:ok, labels, distances} = HNSWLib.Index.knn_query(index, n_data, k: 1) 123 | idx = Nx.to_flat_list(labels[0]) 124 | d = Nx.to_flat_list(distances[0]) 125 | %{found_index: hd(idx)+1, distance_to_closeset: hd(d)} |> dbg() 126 | {:ok, dt} = HNSWLib.Index.get_items(index, idx) 127 | recovered_from_index = hd(dt) |> Nx.from_binary(:f32) |> Nx.stack() 128 | IO.puts "Check the recovered embedding at the found index is the embedding of 'small' " 129 | {recovered_from_index, n_small} |> dbg() 130 | -------------------------------------------------------------------------------- /lib/app.ex: -------------------------------------------------------------------------------- 1 | defmodule App do 2 | @moduledoc """ 3 | App keeps the contexts that define your domain 4 | and business logic. 5 | 6 | Contexts are also responsible for managing your data, regardless 7 | if it comes from the database, an external API or others. 8 | """ 9 | end 10 | -------------------------------------------------------------------------------- /lib/app/application.ex: -------------------------------------------------------------------------------- 1 | defmodule App.Application do 2 | # See https://hexdocs.pm/elixir/Application.html 3 | # for more information on OTP Applications 4 | @moduledoc false 5 | require Logger 6 | use Application 7 | 8 | @upload_dir Application.app_dir(:app, ["priv", "static", "uploads"]) 9 | 10 | @saved_index if Application.compile_env(:app, :knnindex_indices_test, false), 11 | do: Path.join(@upload_dir, "indexes_test.bin"), 12 | else: Path.join(@upload_dir, "indexes.bin") 13 | 14 | def check_models_on_startup do 15 | App.Models.verify_and_download_models() 16 | |> case do 17 | {:error, msg} -> 18 | Logger.error("⚠️ #{msg}") 19 | System.stop(0) 20 | 21 | :ok -> 22 | Logger.info("ℹ️ Models: ✅") 23 | :ok 24 | end 25 | end 26 | 27 | @impl true 28 | def start(_type, _args) do 29 | :ok = check_models_on_startup() 30 | 31 | children = [ 32 | # Start the Telemetry supervisor 33 | AppWeb.Telemetry, 34 | # Setup DB 35 | App.Repo, 36 | # Start the PubSub system 37 | {Phoenix.PubSub, name: App.PubSub}, 38 | # Nx serving for the embedding 39 | {Nx.Serving, serving: App.Models.embedding(), name: Embedding, batch_size: 1}, 40 | # Nx serving for Speech-to-Text 41 | {Nx.Serving, 42 | serving: 43 | if Application.get_env(:app, :use_test_models) == true do 44 | App.Models.audio_serving_test() 45 | else 46 | App.Models.audio_serving() 47 | end, 48 | name: Whisper}, 49 | # Nx serving for image classifier 50 | {Nx.Serving, 51 | serving: 52 | if Application.get_env(:app, :use_test_models) == true do 53 | App.Models.caption_serving_test() 54 | else 55 | App.Models.caption_serving() 56 | end, 57 | name: ImageClassifier}, 58 | {GenMagic.Server, name: :gen_magic}, 59 | 60 | # Adding a supervisor 61 | {Task.Supervisor, name: App.TaskSupervisor}, 62 | # Start the Endpoint (http/https) 63 | AppWeb.Endpoint 64 | # Start a worker by calling: App.Worker.start_link(arg) 65 | # {App.Worker, arg} 66 | ] 67 | 68 | # We are starting the HNSWLib Index GenServer only during testing. 69 | # Because this GenServer needs the database to be seeded first, 70 | # we only add it when we're not testing. 71 | # When testing, you need to spawn this process manually (it is done in the test_helper.exs file). 72 | children = 73 | if Application.get_env(:app, :start_genserver, true) == true do 74 | Enum.concat(children, [{App.KnnIndex, [space: :cosine, index: @saved_index]}]) 75 | else 76 | children 77 | end 78 | 79 | # See https://hexdocs.pm/elixir/Supervisor.html 80 | # for other strategies and supported options 81 | opts = [strategy: :one_for_one, name: App.Supervisor] 82 | Supervisor.start_link(children, opts) 83 | end 84 | 85 | # Tell Phoenix to update the endpoint configuration 86 | # whenever the application is updated. 87 | @impl true 88 | def config_change(changed, _new, removed) do 89 | AppWeb.Endpoint.config_change(changed, removed) 90 | :ok 91 | end 92 | end 93 | -------------------------------------------------------------------------------- /lib/app/hnswlib_index.ex: -------------------------------------------------------------------------------- 1 | defmodule App.HnswlibIndex do 2 | use Ecto.Schema 3 | alias App.HnswlibIndex 4 | 5 | require Logger 6 | 7 | @moduledoc """ 8 | Ecto schema to save the HNSWLib Index file into a singleton table 9 | with utility functions 10 | """ 11 | 12 | schema "hnswlib_index" do 13 | field(:file, :binary) 14 | field(:lock_version, :integer, default: 1) 15 | end 16 | 17 | def changeset(struct \\ %__MODULE__{}, params \\ %{}) do 18 | struct 19 | |> Ecto.Changeset.cast(params, [:id, :file]) 20 | |> Ecto.Changeset.optimistic_lock(:lock_version) 21 | |> Ecto.Changeset.validate_required([:id]) 22 | end 23 | 24 | @doc """ 25 | Tries to load index from DB. 26 | If the table is empty, it creates a new one. 27 | If the table is not empty but there's no file, an index is created from scratch. 28 | If there's one, we use it and load it to be used throughout the application. 29 | """ 30 | def maybe_load_index_from_db(space, dim, max_elements) do 31 | # Check if the table has an entry 32 | App.Repo.get_by(HnswlibIndex, id: 1) 33 | |> case do 34 | # If the table is empty 35 | nil -> 36 | Logger.info("ℹ️ No index file found in DB. Creating new one...") 37 | create(space, dim, max_elements) 38 | 39 | # If the table is not empty but has no file 40 | response when response.file == nil -> 41 | Logger.info("ℹ️ Empty index file in DB. Recreating one...") 42 | 43 | # Purge the table and create a new file row in it 44 | App.Repo.delete_all(App.HnswlibIndex) 45 | create(space, dim, max_elements) 46 | 47 | # If the table is not empty and has a file 48 | index_db -> 49 | Logger.info("ℹ️ Index file found in DB. Loading it...") 50 | 51 | # We get the path of the index 52 | with path <- App.KnnIndex.index_path(), 53 | # Save the file on disk 54 | :ok <- File.write(path, index_db.file), 55 | # And load it 56 | {:ok, index} <- HNSWLib.Index.load_index(space, dim, path) do 57 | {:ok, index, index_db} 58 | end 59 | end 60 | end 61 | 62 | defp create(space, dim, max_elements) do 63 | # Inserting the row in the table 64 | {:ok, schema} = 65 | HnswlibIndex.changeset(%__MODULE__{}, %{id: 1}) 66 | |> App.Repo.insert() 67 | 68 | # Creates index 69 | {:ok, index} = 70 | HNSWLib.Index.new(space, dim, max_elements) 71 | 72 | # Builds index for testing only 73 | if Application.get_env(:app, :use_test_models, false) do 74 | empty_index = 75 | Application.app_dir(:app, ["priv", "static", "uploads"]) 76 | |> Path.join("indexes_empty.bin") 77 | 78 | HNSWLib.Index.save_index(index, empty_index) 79 | end 80 | 81 | {:ok, index, schema} 82 | end 83 | end 84 | -------------------------------------------------------------------------------- /lib/app/image.ex: -------------------------------------------------------------------------------- 1 | defmodule App.Image do 2 | use Ecto.Schema 3 | require Logger 4 | 5 | @moduledoc """ 6 | Ecto schema for the table Images and 7 | utility functions. 8 | """ 9 | 10 | @primary_key {:id, :id, autogenerate: true} 11 | schema "images" do 12 | field(:description, :string) 13 | field(:width, :integer) 14 | field(:url, :string) 15 | field(:height, :integer) 16 | field(:idx, :integer) 17 | field(:sha1, :string) 18 | 19 | timestamps(type: :utc_datetime) 20 | end 21 | 22 | def changeset(image, params \\ %{}) do 23 | image 24 | |> Ecto.Changeset.cast(params, [:url, :description, :width, :height, :idx, :sha1]) 25 | |> Ecto.Changeset.validate_required([:width, :height]) 26 | |> Ecto.Changeset.unique_constraint(:sha1, name: :images_sha1_index) 27 | |> Ecto.Changeset.unique_constraint(:idx, name: :images_idx_index) 28 | end 29 | 30 | @doc """ 31 | Inserts a new image into the database. 32 | Returns `{:ok, image}` if the image was inserted correctly. 33 | Returns `{:error, reason}` if the image was not inserted correctly. 34 | """ 35 | def insert(params) do 36 | App.Image.changeset(%App.Image{}, params) 37 | |> App.Repo.insert() 38 | end 39 | 40 | @doc """ 41 | Calculates the SHA1 of a given binary 42 | """ 43 | def calc_sha1(file_binary) do 44 | :crypto.hash(:sha, file_binary) 45 | |> Base.encode16() 46 | end 47 | 48 | @doc """ 49 | Returns `{:ok, image}` or `nil` if the given sha1 is saved into the database Image table. 50 | """ 51 | def check_sha1(sha1) when is_binary(sha1) do 52 | App.Repo.get_by(App.Image, %{sha1: sha1}) 53 | |> case do 54 | nil -> 55 | nil 56 | 57 | %App.Image{} = image -> 58 | {:ok, image} 59 | end 60 | end 61 | 62 | @doc """ 63 | Uploads the given image to S3. 64 | Returns {:ok, response} if the upload is successful. 65 | Returns {:error, reason} if the upload fails. 66 | """ 67 | def upload_image_to_s3(file_path, mimetype) do 68 | extension = MIME.extensions(mimetype) |> Enum.at(0) 69 | 70 | # Upload to Imgup - https://github.com/dwyl/imgup 71 | upload_response = 72 | HTTPoison.post( 73 | "https://imgup.fly.dev/api/images", 74 | {:multipart, 75 | [ 76 | { 77 | :file, 78 | file_path, 79 | {"form-data", [name: "image", filename: "#{Path.basename(file_path)}.#{extension}"]}, 80 | [{"Content-Type", mimetype}] 81 | } 82 | ]}, 83 | [] 84 | ) 85 | 86 | # Process the response and return error if there was a problem uploading the image 87 | case upload_response do 88 | # In case it's successful 89 | {:ok, %HTTPoison.Response{status_code: 200, body: body}} -> 90 | %{"url" => url, "compressed_url" => _} = Jason.decode!(body) 91 | {:ok, url} 92 | 93 | # In case it returns HTTP 400 with specific reason it failed 94 | {:ok, %HTTPoison.Response{status_code: 400, body: body}} -> 95 | %{"errors" => %{"detail" => reason}} = Jason.decode!(body) 96 | {:error, reason} 97 | 98 | # In case the request fails for whatever other reason 99 | {:error, %HTTPoison.Error{reason: reason}} -> 100 | {:error, reason} 101 | end 102 | end 103 | 104 | @doc """ 105 | Check file type via magic number. It uses a GenServer running the `C` lib "libmagic". 106 | Returns {:ok, %{mime_type: mime_type}} if the file type is accepted. 107 | Otherwise, {:error, reason}. 108 | """ 109 | def gen_magic_eval(path, accepted_mime) do 110 | # Perform the magic evaluation. 111 | GenMagic.Server.perform(:gen_magic, path) 112 | |> case do 113 | # In case it fails, return reason. 114 | {:error, reason} -> 115 | {:error, reason} 116 | 117 | # If it succeeds, we check if it's an accepted mime type. 118 | {:ok, 119 | %GenMagic.Result{ 120 | mime_type: mime, 121 | encoding: "binary", 122 | content: _content 123 | }} -> 124 | if Enum.member?(accepted_mime, mime), 125 | do: {:ok, %{mime_type: mime}}, 126 | else: {:error, "Not accepted mime type."} 127 | 128 | # In case the evaluation fails and it's not acceptable. 129 | {:ok, %GenMagic.Result{} = res} -> 130 | Logger.warning("⚠️ MIME type error: #{inspect(res)}") 131 | {:error, "Not acceptable."} 132 | end 133 | end 134 | end 135 | -------------------------------------------------------------------------------- /lib/app/knn_index.ex: -------------------------------------------------------------------------------- 1 | defmodule App.KnnIndex do 2 | use GenServer 3 | 4 | @moduledoc """ 5 | A GenServer to load and handle the Index file for HNSWLib. 6 | It loads the index from the FileSystem if existing or from the table HnswlibIndex. 7 | It creates an new one if no Index file is found in the FileSystem 8 | and if the table HnswlibIndex is empty. 9 | It holds the index and the App.Image singleton table in the state. 10 | """ 11 | 12 | require Logger 13 | 14 | @dim 384 15 | @max_elements 200 16 | @upload_dir Application.app_dir(:app, ["priv", "static", "uploads"]) 17 | @saved_index if Application.compile_env(:app, :knnindex_indices_test, false), 18 | do: Path.join(@upload_dir, "indexes_test.bin"), 19 | else: Path.join(@upload_dir, "indexes.bin") 20 | 21 | # Client API ------------------ 22 | def start_link(args) do 23 | :ok = File.mkdir_p!(@upload_dir) 24 | GenServer.start_link(__MODULE__, args, name: __MODULE__) 25 | end 26 | 27 | def index_path do 28 | @saved_index 29 | end 30 | 31 | def save_index_to_db do 32 | GenServer.call(__MODULE__, :save_index_to_db) 33 | end 34 | 35 | def get_count do 36 | GenServer.call(__MODULE__, :get_count) 37 | end 38 | 39 | def add_item(embedding) do 40 | GenServer.call(__MODULE__, {:add_item, embedding}) 41 | end 42 | 43 | def knn_search(input) do 44 | GenServer.call(__MODULE__, {:knn_search, input}) 45 | end 46 | 47 | def not_empty_index do 48 | GenServer.call(__MODULE__, :not_empty) 49 | end 50 | 51 | # --------------------------------------------------- 52 | @impl true 53 | def init(args) do 54 | # Trying to load the index file 55 | index_path = Keyword.fetch!(args, :index) 56 | space = Keyword.fetch!(args, :space) 57 | 58 | case File.exists?(index_path) do 59 | # If the index file doesn't exist in the FileSystem, 60 | # we try to load it from the database. 61 | false -> 62 | {:ok, index, index_schema} = 63 | App.HnswlibIndex.maybe_load_index_from_db(space, @dim, @max_elements) 64 | 65 | {:ok, {index, index_schema, space}} 66 | 67 | # If the index file exists in the FileSystem, 68 | # we compare it the existing DB table and check for incoherences. 69 | true -> 70 | Logger.info("ℹ️ Index file found on disk. Let's compare it with the database...") 71 | 72 | App.Repo.get_by(App.HnswlibIndex, id: 1) 73 | |> case do 74 | nil -> 75 | {:stop, 76 | {:error, 77 | "Error comparing the index file with the one on the database. Incoherence on table."}} 78 | 79 | schema -> 80 | check_integrity(index_path, schema, space) 81 | end 82 | end 83 | end 84 | 85 | defp check_integrity(path, schema, space) do 86 | # We check the count of the images in the database and the total count of the index. 87 | with db_count <- 88 | App.Repo.all(App.Image) |> length(), 89 | {:ok, index} <- 90 | HNSWLib.Index.load_index(space, @dim, path), 91 | {:ok, index_count} <- 92 | HNSWLib.Index.get_current_count(index), 93 | true <- 94 | index_count == db_count do 95 | Logger.info("ℹ️ Integrity: ✅") 96 | {:ok, {index, schema, space}} 97 | 98 | # If it fails, we return an error. 99 | else 100 | false -> 101 | {:stop, 102 | {:error, "Integrity error. The count of images from index differs from the database."}} 103 | 104 | {:error, msg} -> 105 | Logger.error("⚠️ #{msg}") 106 | {:stop, {:error, msg}} 107 | end 108 | end 109 | 110 | @impl true 111 | def handle_call(:save_index_to_db, _, {index, index_schema, space} = state) do 112 | # We read the index file and try to update the index on the table as well. 113 | File.read(@saved_index) 114 | |> case do 115 | {:ok, file} -> 116 | {:ok, updated_schema} = 117 | index_schema 118 | |> App.HnswlibIndex.changeset(%{file: file}) 119 | |> App.Repo.update() 120 | 121 | {:reply, {:ok, updated_schema}, {index, updated_schema, space}} 122 | 123 | {:error, msg} -> 124 | {:reply, {:error, msg}, state} 125 | end 126 | end 127 | 128 | def handle_call(:get_count, _, {index, _, _} = state) do 129 | {:ok, count} = HNSWLib.Index.get_current_count(index) 130 | {:reply, count, state} 131 | end 132 | 133 | def handle_call({:add_item, embedding}, _, {index, _, _} = state) do 134 | # We add the new item to the index and update it. 135 | with :ok <- 136 | HNSWLib.Index.add_items(index, embedding), 137 | {:ok, idx} <- 138 | HNSWLib.Index.get_current_count(index), 139 | :ok <- 140 | HNSWLib.Index.save_index(index, @saved_index) do 141 | {:reply, {:ok, idx}, state} 142 | else 143 | {:error, msg} -> 144 | {:reply, {:error, msg}, state} 145 | end 146 | end 147 | 148 | def handle_call({:knn_search, nil}, _, state) do 149 | {:reply, {:error, "No index found"}, state} 150 | end 151 | 152 | def handle_call({:knn_search, input}, _, {index, _, _} = state) do 153 | # We search for the nearest neighbors of the input embedding. 154 | case HNSWLib.Index.knn_query(index, input, k: 1) do 155 | {:ok, labels, _distances} -> 156 | response = 157 | labels[0] 158 | |> Nx.to_flat_list() 159 | |> hd() 160 | |> then(fn idx -> 161 | App.Repo.get_by(App.Image, %{idx: idx + 1}) 162 | end) 163 | 164 | # TODO: add threshold on "distances" 165 | {:reply, response, state} 166 | 167 | {:error, msg} -> 168 | {:reply, {:error, msg}, state} 169 | end 170 | end 171 | 172 | def handle_call(:not_empty, _, {index, _, _} = state) do 173 | case HNSWLib.Index.get_current_count(index) do 174 | {:ok, 0} -> 175 | Logger.warning("⚠️ Empty index.") 176 | {:reply, :error, state} 177 | 178 | {:ok, _} -> 179 | {:reply, :ok, state} 180 | end 181 | end 182 | end 183 | -------------------------------------------------------------------------------- /lib/app/models.ex: -------------------------------------------------------------------------------- 1 | defmodule ModelInfo do 2 | @moduledoc """ 3 | Information regarding the model being loaded. 4 | It holds the name of the model repository and the directory it will be saved into. 5 | It also has booleans to load each model parameter at will - this is because some models (like BLIP) require featurizer, tokenizations and generation configuration. 6 | """ 7 | 8 | defstruct [:name, :cache_path, :load_featurizer, :load_tokenizer, :load_generation_config] 9 | end 10 | 11 | defmodule App.Models do 12 | @moduledoc """ 13 | Manages loading the modules and their location according to env. 14 | """ 15 | require Logger 16 | 17 | # IMPORTANT: This should be the same directory as defined in the `Dockerfile` 18 | # where the models will be downloaded into. 19 | @models_folder_path Application.compile_env!(:app, :models_cache_dir) 20 | 21 | # Embedding------- 22 | @embedding_model %ModelInfo{ 23 | name: "sentence-transformers/paraphrase-MiniLM-L6-v2", 24 | cache_path: Path.join(@models_folder_path, "paraphrase-MiniLM-L6-v2"), 25 | load_featurizer: false, 26 | load_tokenizer: true, 27 | load_generation_config: true 28 | } 29 | # Captioning -- 30 | @captioning_test_model %ModelInfo{ 31 | name: "microsoft/resnet-50", 32 | cache_path: Path.join(@models_folder_path, "resnet-50"), 33 | load_featurizer: true 34 | } 35 | 36 | @captioning_prod_model %ModelInfo{ 37 | name: "Salesforce/blip-image-captioning-base", 38 | cache_path: Path.join(@models_folder_path, "blip-image-captioning-base"), 39 | load_featurizer: true, 40 | load_tokenizer: true, 41 | load_generation_config: true 42 | } 43 | 44 | # Audio transcription -- 45 | @audio_test_model %ModelInfo{ 46 | name: "openai/whisper-small", 47 | cache_path: Path.join(@models_folder_path, "whisper-small"), 48 | load_featurizer: true, 49 | load_tokenizer: true, 50 | load_generation_config: true 51 | } 52 | 53 | @audio_prod_model %ModelInfo{ 54 | name: "openai/whisper-small", 55 | cache_path: Path.join(@models_folder_path, "whisper-small"), 56 | load_featurizer: true, 57 | load_tokenizer: true, 58 | load_generation_config: true 59 | } 60 | 61 | def extract_captioning_test_label(result) do 62 | %{predictions: [%{label: label}]} = result 63 | label 64 | end 65 | 66 | def extract_captioning_prod_label(result) do 67 | %{results: [%{text: label}]} = result 68 | label 69 | end 70 | 71 | @doc """ 72 | Verifies and downloads the models according to configuration 73 | and if they are already cached locally or not. 74 | 75 | The models that are downloaded are hardcoded in this function. 76 | """ 77 | def verify_and_download_models() do 78 | { 79 | Application.get_env(:app, :force_models_download, false), 80 | Application.get_env(:app, :use_test_models, false) 81 | } 82 | |> case do 83 | {true, true} -> 84 | # Delete any cached pre-existing models 85 | File.rm_rf!(@models_folder_path) 86 | 87 | with :ok <- download_model(@captioning_test_model), 88 | :ok <- download_model(@embedding_model), 89 | :ok <- download_model(@audio_test_model) do 90 | :ok 91 | else 92 | {:error, msg} -> {:error, msg} 93 | end 94 | 95 | {true, false} -> 96 | # Delete any cached pre-existing models 97 | File.rm_rf!(@models_folder_path) 98 | 99 | with :ok <- download_model(@captioning_prod_model), 100 | :ok <- download_model(@audio_prod_model), 101 | :ok <- download_model(@embedding_model) do 102 | :ok 103 | else 104 | {:error, msg} -> {:error, msg} 105 | end 106 | 107 | {false, false} -> 108 | # Check if the prod model cache directory exists or if it's not empty. 109 | # If so, we download the prod models. 110 | 111 | with :ok <- check_folder_and_download(@captioning_prod_model), 112 | :ok <- check_folder_and_download(@audio_prod_model), 113 | :ok <- check_folder_and_download(@embedding_model) do 114 | :ok 115 | else 116 | {:error, msg} -> {:error, msg} 117 | end 118 | 119 | {false, true} -> 120 | # Check if the test model cache directory exists or if it's not empty. 121 | # If so, we download the test models. 122 | 123 | with :ok <- check_folder_and_download(@captioning_test_model), 124 | :ok <- check_folder_and_download(@audio_test_model), 125 | :ok <- check_folder_and_download(@embedding_model) do 126 | :ok 127 | else 128 | {:error, msg} -> {:error, msg} 129 | end 130 | end 131 | end 132 | 133 | @doc """ 134 | Loads the embedding model. 135 | """ 136 | def embedding() do 137 | load_offline_model(@embedding_model) 138 | |> then(fn response -> 139 | case response do 140 | {:ok, model} -> 141 | %Nx.Serving{} = 142 | Bumblebee.Text.TextEmbedding.text_embedding( 143 | model.model_info, 144 | model.tokenizer, 145 | defn_options: [compiler: EXLA], 146 | preallocate_params: true 147 | ) 148 | 149 | {:error, msg} -> 150 | {:error, msg} 151 | end 152 | end) 153 | end 154 | 155 | @doc """ 156 | Serving function that serves the `Bumblebee` captioning model used throughout the app. 157 | This function is meant to be called and served by `Nx` in `lib/app/application.ex`. 158 | 159 | This assumes the models that are being used exist locally, in the @models_folder_path. 160 | """ 161 | def caption_serving do 162 | load_offline_model(@captioning_prod_model) 163 | |> then(fn response -> 164 | case response do 165 | {:ok, model} -> 166 | %Nx.Serving{} = 167 | Bumblebee.Vision.image_to_text( 168 | model.model_info, 169 | model.featurizer, 170 | model.tokenizer, 171 | model.generation_config, 172 | compile: [batch_size: 1], 173 | defn_options: [compiler: EXLA], 174 | # needed to run on `Fly.io` 175 | preallocate_params: true 176 | ) 177 | 178 | {:error, msg} -> 179 | {:error, msg} 180 | end 181 | end) 182 | end 183 | 184 | @doc """ 185 | Serving function that serves the `Bumblebee` audio transcription model used throughout the app. 186 | """ 187 | def audio_serving do 188 | load_offline_model(@audio_prod_model) 189 | |> then(fn response -> 190 | case response do 191 | {:ok, model} -> 192 | %Nx.Serving{} = 193 | Bumblebee.Audio.speech_to_text_whisper( 194 | model.model_info, 195 | model.featurizer, 196 | model.tokenizer, 197 | model.generation_config, 198 | chunk_num_seconds: 30, 199 | task: :transcribe, 200 | defn_options: [compiler: EXLA], 201 | preallocate_params: true 202 | ) 203 | 204 | {:error, msg} -> 205 | {:error, msg} 206 | end 207 | end) 208 | end 209 | 210 | @doc """ 211 | Serving function for tests only. It uses a test audio transcription model. 212 | """ 213 | def audio_serving_test do 214 | load_offline_model(@audio_test_model) 215 | |> then(fn response -> 216 | case response do 217 | {:ok, model} -> 218 | %Nx.Serving{} = 219 | Bumblebee.Audio.speech_to_text_whisper( 220 | model.model_info, 221 | model.featurizer, 222 | model.tokenizer, 223 | model.generation_config, 224 | chunk_num_seconds: 30, 225 | task: :transcribe, 226 | defn_options: [compiler: EXLA], 227 | preallocate_params: true 228 | ) 229 | 230 | {:error, msg} -> 231 | {:error, msg} 232 | end 233 | end) 234 | end 235 | 236 | @doc """ 237 | Serving function for tests only. It uses a test captioning model. 238 | This function is meant to be called and served by `Nx` in `lib/app/application.ex`. 239 | 240 | This assumes the models that are being used exist locally, in the @models_folder_path. 241 | """ 242 | def caption_serving_test do 243 | load_offline_model(@captioning_test_model) 244 | |> then(fn response -> 245 | case response do 246 | {:ok, model} -> 247 | %Nx.Serving{} = 248 | Bumblebee.Vision.image_classification( 249 | model.model_info, 250 | model.featurizer, 251 | top_k: 1, 252 | compile: [batch_size: 10], 253 | defn_options: [compiler: EXLA], 254 | # needed to run on `Fly.io` 255 | preallocate_params: true 256 | ) 257 | 258 | {:error, msg} -> 259 | {:error, msg} 260 | end 261 | end) 262 | end 263 | 264 | # Loads the models from the cache folder. 265 | # It will load the model and the respective the featurizer, tokenizer and generation config if needed, 266 | # and return a map with all of these at the end. 267 | defp load_offline_model(model) do 268 | Logger.info("ℹ️ Loading #{model.name}...") 269 | 270 | # Loading model 271 | loading_settings = {:hf, model.name, cache_dir: model.cache_path, offline: true} 272 | 273 | Bumblebee.load_model(loading_settings) 274 | |> case do 275 | {:ok, model_info} -> 276 | info = %{model_info: model_info} 277 | 278 | # Load featurizer, tokenizer and generation config if needed 279 | info = 280 | if Map.get(model, :load_featurizer) do 281 | {:ok, featurizer} = Bumblebee.load_featurizer(loading_settings) 282 | Map.put(info, :featurizer, featurizer) 283 | else 284 | info 285 | end 286 | 287 | info = 288 | if Map.get(model, :load_tokenizer) do 289 | {:ok, tokenizer} = Bumblebee.load_tokenizer(loading_settings) 290 | Map.put(info, :tokenizer, tokenizer) 291 | else 292 | info 293 | end 294 | 295 | info = 296 | if Map.get(model, :load_generation_config) do 297 | {:ok, generation_config} = 298 | Bumblebee.load_generation_config(loading_settings) 299 | 300 | Map.put(info, :generation_config, generation_config) 301 | else 302 | info 303 | end 304 | 305 | # Return a map with the model and respective parameters. 306 | {:ok, info} 307 | 308 | {:error, msg} -> 309 | {:error, msg} 310 | end 311 | end 312 | 313 | # Downloads the pre-trained models according to a given %ModelInfo struct. 314 | # It will load the model and the respective the featurizer, tokenizer and generation config if needed. 315 | defp download_model(model) do 316 | Logger.info("ℹ️ Downloading #{model.name}...") 317 | 318 | # Download model 319 | downloading_settings = {:hf, model.name, cache_dir: model.cache_path} 320 | 321 | # Download featurizer, tokenizer and generation config if needed 322 | Bumblebee.load_model(downloading_settings) 323 | |> case do 324 | {:ok, _} -> 325 | if Map.get(model, :load_featurizer) do 326 | {:ok, _} = Bumblebee.load_featurizer(downloading_settings) 327 | end 328 | 329 | if Map.get(model, :load_tokenizer) do 330 | {:ok, _} = Bumblebee.load_tokenizer(downloading_settings) 331 | end 332 | 333 | if Map.get(model, :load_generation_config) do 334 | {:ok, _} = Bumblebee.load_generation_config(downloading_settings) 335 | end 336 | 337 | :ok 338 | 339 | {:error, msg} -> 340 | {:error, msg} 341 | end 342 | end 343 | 344 | # Checks if the folder exists and downloads the model if it doesn't. 345 | def check_folder_and_download(model) do 346 | :ok = File.mkdir_p!(@models_folder_path) 347 | 348 | model_location = 349 | Path.join(model.cache_path, "huggingface") 350 | 351 | if File.ls(model_location) == {:error, :enoent} or File.ls(model_location) == {:ok, []} do 352 | download_model(model) 353 | |> case do 354 | :ok -> :ok 355 | {:error, msg} -> {:error, msg} 356 | end 357 | else 358 | Logger.info("ℹ️ No download needed: #{model.name}") 359 | :ok 360 | end 361 | end 362 | end 363 | -------------------------------------------------------------------------------- /lib/app/release.ex: -------------------------------------------------------------------------------- 1 | defmodule App.Release do 2 | @moduledoc """ 3 | Used for executing DB release tasks when run in production without Mix 4 | installed. 5 | """ 6 | @app :app 7 | 8 | def migrate do 9 | load_app() 10 | 11 | for repo <- repos() do 12 | {:ok, _, _} = Ecto.Migrator.with_repo(repo, &Ecto.Migrator.run(&1, :up, all: true)) 13 | end 14 | end 15 | 16 | def rollback(repo, version) do 17 | load_app() 18 | {:ok, _, _} = Ecto.Migrator.with_repo(repo, &Ecto.Migrator.run(&1, :down, to: version)) 19 | end 20 | 21 | defp repos do 22 | Application.fetch_env!(@app, :ecto_repos) 23 | end 24 | 25 | defp load_app do 26 | Application.load(@app) 27 | end 28 | end 29 | -------------------------------------------------------------------------------- /lib/app/repo.ex: -------------------------------------------------------------------------------- 1 | defmodule App.Repo do 2 | use Ecto.Repo, 3 | otp_app: :app, 4 | adapter: Ecto.Adapters.Postgres 5 | end 6 | -------------------------------------------------------------------------------- /lib/app_web.ex: -------------------------------------------------------------------------------- 1 | defmodule AppWeb do 2 | @moduledoc """ 3 | The entrypoint for defining your web interface, such 4 | as controllers, components, channels, and so on. 5 | 6 | This can be used in your application as: 7 | 8 | use AppWeb, :controller 9 | use AppWeb, :html 10 | 11 | The definitions below will be executed for every controller, 12 | component, etc, so keep them short and clean, focused 13 | on imports, uses and aliases. 14 | 15 | Do NOT define functions inside the quoted expressions 16 | below. Instead, define additional modules and import 17 | those modules here. 18 | """ 19 | 20 | def static_paths, do: ~w(assets fonts images favicon.ico robots.txt) 21 | 22 | def router do 23 | quote do 24 | use Phoenix.Router, helpers: false 25 | 26 | # Import common connection and controller functions to use in pipelines 27 | import Plug.Conn 28 | import Phoenix.Controller 29 | import Phoenix.LiveView.Router 30 | end 31 | end 32 | 33 | def channel do 34 | quote do 35 | use Phoenix.Channel 36 | end 37 | end 38 | 39 | def controller do 40 | quote do 41 | use Phoenix.Controller, 42 | formats: [:html, :json], 43 | layouts: [html: AppWeb.Layouts] 44 | 45 | import Plug.Conn 46 | 47 | unquote(verified_routes()) 48 | end 49 | end 50 | 51 | def live_view do 52 | quote do 53 | use Phoenix.LiveView, 54 | layout: {AppWeb.Layouts, :app} 55 | 56 | unquote(html_helpers()) 57 | end 58 | end 59 | 60 | def live_component do 61 | quote do 62 | use Phoenix.LiveComponent 63 | 64 | unquote(html_helpers()) 65 | end 66 | end 67 | 68 | def html do 69 | quote do 70 | use Phoenix.Component 71 | 72 | # Import convenience functions from controllers 73 | import Phoenix.Controller, 74 | only: [get_csrf_token: 0, view_module: 1, view_template: 1] 75 | 76 | # Include general helpers for rendering HTML 77 | unquote(html_helpers()) 78 | end 79 | end 80 | 81 | defp html_helpers do 82 | quote do 83 | # HTML escaping functionality 84 | import Phoenix.HTML 85 | # Core UI components and translation 86 | import AppWeb.CoreComponents 87 | 88 | # Shortcut for generating JS commands 89 | alias Phoenix.LiveView.JS 90 | 91 | # Routes generation with the ~p sigil 92 | unquote(verified_routes()) 93 | end 94 | end 95 | 96 | def verified_routes do 97 | quote do 98 | use Phoenix.VerifiedRoutes, 99 | endpoint: AppWeb.Endpoint, 100 | router: AppWeb.Router, 101 | statics: AppWeb.static_paths() 102 | end 103 | end 104 | 105 | @doc """ 106 | When used, dispatch to the appropriate controller/view/etc. 107 | """ 108 | defmacro __using__(which) when is_atom(which) do 109 | apply(__MODULE__, which, []) 110 | end 111 | end 112 | -------------------------------------------------------------------------------- /lib/app_web/components/layouts.ex: -------------------------------------------------------------------------------- 1 | defmodule AppWeb.Layouts do 2 | use AppWeb, :html 3 | 4 | embed_templates "layouts/*" 5 | end 6 | -------------------------------------------------------------------------------- /lib/app_web/components/layouts/app.html.heex: -------------------------------------------------------------------------------- 1 |
2 | <.flash_group flash={@flash} /> 3 | <%= @inner_content %> 4 |
5 | -------------------------------------------------------------------------------- /lib/app_web/components/layouts/root.html.heex: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | <.live_title suffix=" · Phoenix Framework"> 8 | <%= assigns[:page_title] || "App" %> 9 | 10 | 11 | 13 | 14 | 15 |
16 | <%= @inner_content %> 17 | 18 | 19 | -------------------------------------------------------------------------------- /lib/app_web/components/spinner.ex: -------------------------------------------------------------------------------- 1 | defmodule AppWeb.Spinner do 2 | use Phoenix.Component 3 | 4 | @moduledoc """ 5 | Stateless component to display a spinner. 6 | Takes a boolean `spin` as assign 7 | """ 8 | 9 | attr :spin, :boolean, default: false 10 | 11 | def spin(assigns) do 12 | ~H""" 13 |
14 |
15 |
16 |
17 |
18 |
19 | """ 20 | end 21 | end 22 | -------------------------------------------------------------------------------- /lib/app_web/endpoint.ex: -------------------------------------------------------------------------------- 1 | defmodule AppWeb.Endpoint do 2 | use Phoenix.Endpoint, otp_app: :app 3 | 4 | # The session will be stored in the cookie and signed, 5 | # this means its contents can be read but not tampered with. 6 | # Set :encryption_salt if you would also like to encrypt it. 7 | @session_options [ 8 | store: :cookie, 9 | key: "_app_key", 10 | signing_salt: "DbbfLLfE", 11 | same_site: "Lax" 12 | ] 13 | 14 | socket "/live", Phoenix.LiveView.Socket, websocket: [connect_info: [session: @session_options]] 15 | 16 | # Serve at "/" the static files from "priv/static" directory. 17 | # 18 | # You should set gzip to true if you are running phx.digest 19 | # when deploying your static files in production. 20 | plug Plug.Static, 21 | at: "/", 22 | from: :app, 23 | gzip: false, 24 | only: AppWeb.static_paths() 25 | 26 | # Code reloading can be explicitly enabled under the 27 | # :code_reloader configuration of your endpoint. 28 | if code_reloading? do 29 | socket "/phoenix/live_reload/socket", Phoenix.LiveReloader.Socket 30 | plug Phoenix.LiveReloader 31 | plug Phoenix.CodeReloader 32 | end 33 | 34 | plug Plug.RequestId 35 | plug Plug.Telemetry, event_prefix: [:phoenix, :endpoint] 36 | 37 | plug Plug.Parsers, 38 | parsers: [:urlencoded, :multipart, :json], 39 | pass: ["*/*"], 40 | json_decoder: Phoenix.json_library() 41 | 42 | plug Plug.MethodOverride 43 | plug Plug.Head 44 | plug Plug.Session, @session_options 45 | plug AppWeb.Router 46 | end 47 | -------------------------------------------------------------------------------- /lib/app_web/live/page_live.html.heex: -------------------------------------------------------------------------------- 1 |