├── priv └── categories │ ├── no_person_or_person.json │ ├── coco_with_voc_labels_categories.json │ ├── coco_categories.json │ └── imagenet_v2_categories.json ├── config ├── prod.exs ├── test.exs ├── dev.exs ├── config.exs └── runtime.exs ├── test ├── test_helper.exs ├── assets │ ├── cat.jpg │ ├── results │ │ └── style_transfer │ │ │ ├── cat_candy.gt │ │ │ ├── cat_mosaic.gt │ │ │ ├── cat_udnie.gt │ │ │ ├── cat_princess.gt │ │ │ ├── cat_candy_fast.gt │ │ │ ├── cat_mosaic_fast.gt │ │ │ ├── cat_udnie_fast.gt │ │ │ └── cat_princess_fast.gt │ └── categories.json ├── ex_vision │ ├── object_detection │ │ ├── fasterrcnn_resnet50_fpn_test.exs │ │ └── ssdlite320_mobilenetv3_test.exs │ ├── instance_segmentation │ │ └── maskrcnn_resnet50_fpn_v2_test.exs │ ├── semantic_segmentation │ │ └── deep_lab_v3_mobilenet_v3_test.exs │ ├── classification │ │ ├── mobilenet_v3_small_test.exs │ │ ├── squeezenet1_1_test.exs │ │ ├── efficientnet_v2_l_test.exs │ │ ├── efficientnet_v2_m_test.exs │ │ └── efficientnet_v2_s_test.exs │ ├── keypoint_detection │ │ └── keypointrcnn_resnet50_fpn_test.exs │ ├── style_transfer │ │ └── style_transfer_test.exs │ ├── cache_test.exs │ └── utils_test.exs └── support │ └── exvision │ ├── test_utils.ex │ └── model │ └── case.ex ├── .formatter.exs ├── .editorconfig ├── lib └── ex_vision │ ├── ex_vision.ex │ ├── types.ex │ ├── types │ ├── metadata.ex │ ├── bbox.ex │ ├── bboxwithmask.ex │ └── bboxwithkeypoints.ex │ ├── utils │ └── macros.ex │ ├── classification │ ├── squeezenet1_1.ex │ ├── efficientnet_v2_l.ex │ ├── mobilenet_v3_small.ex │ ├── efficientnet_v2_m.ex │ ├── efficientnet_v2_s.ex │ └── generic_classifier.ex │ ├── object_detection │ ├── ssdlite320_mobilenetv3.ex │ ├── fasterrcnn_resnet50_fpn.ex │ └── generic_detector.ex │ ├── model │ ├── definition │ │ ├── parts │ │ │ └── with_categories.ex │ │ └── ortex.ex │ └── definition.ex │ ├── semantic_segmentation │ └── deep_lab_v3_mobilenet_v3.ex │ ├── instance_segmentation │ └── maskrcnn_resnet50_fpn_v2.ex │ ├── style_transfer │ └── style_transfer.ex │ ├── keypoint_detection │ └── keypointrcnn_resnet50_fpn.ex │ ├── model.ex │ ├── utils.ex │ └── cache.ex ├── .gitattributes ├── devcontainer.json ├── .github └── workflows │ └── elixir.yml ├── python └── exports │ ├── deep_lab_v3.py │ ├── classification.py │ ├── instance_segmentation.py │ ├── object_detection.py │ └── keypoint_detection.py ├── .gitignore ├── README.md ├── mix.exs ├── examples ├── 2-usage-as-nx-serving.livemd ├── 3-membrane.livemd └── 1-basic-tutorial.livemd ├── .credo.exs ├── LICENSE └── mix.lock /priv/categories/no_person_or_person.json: -------------------------------------------------------------------------------- 1 | ["no person", "person"] -------------------------------------------------------------------------------- /config/prod.exs: -------------------------------------------------------------------------------- 1 | import Config 2 | 3 | config :logger, level: :info 4 | -------------------------------------------------------------------------------- /config/test.exs: -------------------------------------------------------------------------------- 1 | import Config 2 | 3 | config :ex_vision, cache_path: "models" 4 | -------------------------------------------------------------------------------- /test/test_helper.exs: -------------------------------------------------------------------------------- 1 | Mimic.copy(Req) 2 | 3 | ExUnit.start(capture_log: true) 4 | -------------------------------------------------------------------------------- /test/assets/cat.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/software-mansion-labs/ex_vision/HEAD/test/assets/cat.jpg -------------------------------------------------------------------------------- /.formatter.exs: -------------------------------------------------------------------------------- 1 | [ 2 | inputs: [ 3 | "{lib,test,config}/**/*.{ex,exs}", 4 | ".formatter.exs", 5 | "*.exs" 6 | ] 7 | ] 8 | -------------------------------------------------------------------------------- /config/dev.exs: -------------------------------------------------------------------------------- 1 | import Config 2 | 3 | config :ortex, Ortex.Native, features: ["coreml"] 4 | 5 | config :ex_vision, cache_path: "models" 6 | -------------------------------------------------------------------------------- /test/assets/results/style_transfer/cat_candy.gt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/software-mansion-labs/ex_vision/HEAD/test/assets/results/style_transfer/cat_candy.gt -------------------------------------------------------------------------------- /test/assets/results/style_transfer/cat_mosaic.gt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/software-mansion-labs/ex_vision/HEAD/test/assets/results/style_transfer/cat_mosaic.gt -------------------------------------------------------------------------------- /test/assets/results/style_transfer/cat_udnie.gt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/software-mansion-labs/ex_vision/HEAD/test/assets/results/style_transfer/cat_udnie.gt -------------------------------------------------------------------------------- /test/assets/results/style_transfer/cat_princess.gt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/software-mansion-labs/ex_vision/HEAD/test/assets/results/style_transfer/cat_princess.gt -------------------------------------------------------------------------------- /test/assets/results/style_transfer/cat_candy_fast.gt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/software-mansion-labs/ex_vision/HEAD/test/assets/results/style_transfer/cat_candy_fast.gt -------------------------------------------------------------------------------- /test/assets/results/style_transfer/cat_mosaic_fast.gt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/software-mansion-labs/ex_vision/HEAD/test/assets/results/style_transfer/cat_mosaic_fast.gt -------------------------------------------------------------------------------- /test/assets/results/style_transfer/cat_udnie_fast.gt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/software-mansion-labs/ex_vision/HEAD/test/assets/results/style_transfer/cat_udnie_fast.gt -------------------------------------------------------------------------------- /test/assets/results/style_transfer/cat_princess_fast.gt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/software-mansion-labs/ex_vision/HEAD/test/assets/results/style_transfer/cat_princess_fast.gt -------------------------------------------------------------------------------- /.editorconfig: -------------------------------------------------------------------------------- 1 | root = true 2 | 3 | [*] 4 | charset = utf-8 5 | end_of_line = lf 6 | indent_size = 2 7 | indent_style = space 8 | insert_final_newline = true 9 | max_line_length = 100 10 | tab_width = 2 11 | trim_trailing_whitespace = true 12 | -------------------------------------------------------------------------------- /config/config.exs: -------------------------------------------------------------------------------- 1 | import Config 2 | 3 | config :nx, default_backend: EXLA.Backend 4 | config :logger, level: :debug 5 | 6 | config :ex_vision, 7 | server_url: URI.new!("https://ai.swmansion.com/exvision/files") 8 | 9 | import_config "#{config_env()}.exs" 10 | -------------------------------------------------------------------------------- /config/runtime.exs: -------------------------------------------------------------------------------- 1 | import Config 2 | 3 | config :ex_vision, 4 | server_url: 5 | "EX_VISION_HOSTING_URI" 6 | |> System.get_env("https://ai.swmansion.com/exvision/files") 7 | |> URI.new!(), 8 | cache_path: System.get_env("EX_VISION_CACHE_DIR", "/tmp/ex_vision/cache") 9 | -------------------------------------------------------------------------------- /lib/ex_vision/ex_vision.ex: -------------------------------------------------------------------------------- 1 | defmodule ExVision do 2 | @moduledoc false 3 | use Application 4 | 5 | @impl true 6 | def start(_type, _args) do 7 | children = [{ExVision.Cache, name: ExVision.Cache}] 8 | Supervisor.start_link(children, strategy: :one_for_one) 9 | end 10 | end 11 | -------------------------------------------------------------------------------- /lib/ex_vision/types.ex: -------------------------------------------------------------------------------- 1 | defmodule ExVision.Types do 2 | @moduledoc """ 3 | A collection of commonly used types in ExVision 4 | """ 5 | 6 | @typedoc """ 7 | Type describing image size as a two element tuple `{width, height}` 8 | """ 9 | @type image_size_t() :: {width :: number(), height :: number()} 10 | end 11 | -------------------------------------------------------------------------------- /priv/categories/coco_with_voc_labels_categories.json: -------------------------------------------------------------------------------- 1 | [ 2 | "__background__", 3 | "aeroplane", 4 | "bicycle", 5 | "bird", 6 | "boat", 7 | "bottle", 8 | "bus", 9 | "car", 10 | "cat", 11 | "chair", 12 | "cow", 13 | "diningtable", 14 | "dog", 15 | "horse", 16 | "motorbike", 17 | "person", 18 | "pottedplant", 19 | "sheep", 20 | "sofa", 21 | "train", 22 | "tvmonitor" 23 | ] 24 | -------------------------------------------------------------------------------- /test/ex_vision/object_detection/fasterrcnn_resnet50_fpn_test.exs: -------------------------------------------------------------------------------- 1 | defmodule ExVision.ObjectDetection.FasterRCNN_ResNet50_FPN_Test do 2 | use ExVision.Model.Case, module: ExVision.ObjectDetection.FasterRCNN_ResNet50_FPN 3 | use ExVision.TestUtils 4 | alias ExVision.Types.BBox 5 | 6 | @impl true 7 | def test_inference_result(result) do 8 | assert [%BBox{x1: 135, y1: 22, label: :cat, score: score}] = result 9 | assert_floats_equal(score, 1.0) 10 | end 11 | end 12 | -------------------------------------------------------------------------------- /test/ex_vision/object_detection/ssdlite320_mobilenetv3_test.exs: -------------------------------------------------------------------------------- 1 | defmodule ExVision.ObjectDetection.Ssdlite320_MobileNetv3Test do 2 | use ExVision.Model.Case, module: ExVision.ObjectDetection.Ssdlite320_MobileNetv3 3 | use ExVision.TestUtils 4 | 5 | alias ExVision.Types.BBox 6 | 7 | @impl true 8 | def test_inference_result(result) do 9 | assert [%BBox{x1: 132, y1: 12, label: :cat, score: score}] = result 10 | assert_floats_equal(score, 1.0) 11 | end 12 | end 13 | -------------------------------------------------------------------------------- /lib/ex_vision/types/metadata.ex: -------------------------------------------------------------------------------- 1 | defmodule ExVision.Types.ImageMetadata do 2 | @moduledoc """ 3 | Type describing image metadata that is being passed to `ExVision.Model.Implementation` callbacks. 4 | """ 5 | 6 | @enforce_keys [:original_size] 7 | defstruct @enforce_keys 8 | 9 | @typedoc """ 10 | Type describing image metadata that is being passed to `ExVision.Model.Implementation` callbacks. 11 | 12 | - `original_size` - gives the original size of originally loaded image 13 | """ 14 | @type t() :: %__MODULE__{ 15 | original_size: ExVision.Types.image_size_t() 16 | } 17 | end 18 | -------------------------------------------------------------------------------- /test/ex_vision/instance_segmentation/maskrcnn_resnet50_fpn_v2_test.exs: -------------------------------------------------------------------------------- 1 | defmodule ExVision.InstanceSegmentation.MaskRCNN_ResNet50_FPN_V2_Test do 2 | use ExVision.Model.Case, module: ExVision.InstanceSegmentation.MaskRCNN_ResNet50_FPN_V2 3 | use ExVision.TestUtils 4 | alias ExVision.Types.BBoxWithMask 5 | 6 | @impl true 7 | def test_inference_result(result) do 8 | assert [%BBoxWithMask{x1: 129, y1: 15, label: :cat, score: score, mask: mask}] = result 9 | assert_floats_equal(score, 1.0) 10 | 11 | assert_floats_equal(nx_mean(mask), 0.37) 12 | end 13 | 14 | defp nx_mean(t), do: t |> Nx.mean() |> Nx.to_number() 15 | end 16 | -------------------------------------------------------------------------------- /test/ex_vision/semantic_segmentation/deep_lab_v3_mobilenet_v3_test.exs: -------------------------------------------------------------------------------- 1 | defmodule ExVision.SemanticSegmentation.DeepLabV3_MobileNetV3Test do 2 | use ExVision.Model.Case, module: ExVision.SemanticSegmentation.DeepLabV3_MobileNetV3 3 | use ExVision.TestUtils 4 | 5 | @impl true 6 | def test_inference_result(result) do 7 | assert %{cat: cat, __background__: background} = result, 8 | "The result doesn't contain required classes" 9 | 10 | assert_floats_equal(nx_mean(cat) + nx_mean(background), 1.0) 11 | assert_floats_equal(nx_mean(cat), 0.36) 12 | end 13 | 14 | defp nx_mean(t), do: t |> Nx.mean() |> Nx.to_number() 15 | end 16 | -------------------------------------------------------------------------------- /lib/ex_vision/utils/macros.ex: -------------------------------------------------------------------------------- 1 | defmodule ExVision.Utils.Macros do 2 | @moduledoc false 3 | defmacro defunimplemented(function, options \\ []) do 4 | options = 5 | Keyword.validate!(options, 6 | with_impl: false, 7 | message: "This function is not implemented" 8 | ) 9 | 10 | quote do 11 | if unquote(options[:with_impl]) do 12 | @impl true 13 | end 14 | 15 | # credo:disable-for-next-line 16 | def unquote(function) do 17 | raise RuntimeError, message: unquote(options[:message]) 18 | end 19 | end 20 | end 21 | 22 | defmacro __using__(_opts) do 23 | quote do 24 | import ExVision.Utils.Macros, only: :macros 25 | end 26 | end 27 | end 28 | -------------------------------------------------------------------------------- /lib/ex_vision/classification/squeezenet1_1.ex: -------------------------------------------------------------------------------- 1 | defmodule ExVision.Classification.SqueezeNet1_1 do 2 | @moduledoc """ 3 | An object classifier based on SqueezeNet1_1. 4 | Exported from `torchvision`. 5 | Weights from Imagenet 1k. 6 | """ 7 | use ExVision.Model.Definition.Ortex, 8 | model: "squeezenet1_1_classifier.onnx", 9 | categories: "priv/categories/imagenet_v2_categories.json" 10 | 11 | use ExVision.Classification.GenericClassifier 12 | 13 | @impl true 14 | def preprocessing(image, _metadata) do 15 | image 16 | |> ExVision.Utils.resize({224, 224}) 17 | |> NxImage.normalize( 18 | Nx.f32([0.485, 0.456, 0.406]), 19 | Nx.f32([0.229, 0.224, 0.225]), 20 | channels: :first 21 | ) 22 | end 23 | end 24 | -------------------------------------------------------------------------------- /test/ex_vision/classification/mobilenet_v3_small_test.exs: -------------------------------------------------------------------------------- 1 | defmodule ExVision.Classification.MobileNetV3Test do 2 | @moduledoc false 3 | use ExVision.Model.Case, module: ExVision.Classification.MobileNetV3Small 4 | use ExVision.TestUtils 5 | 6 | @expected_result "test/assets/results/classification/mobilenet_v3_small.json" 7 | |> File.read!() 8 | |> Jason.decode!() 9 | |> Map.new(fn {k, v} -> {ExVision.Utils.normalize_category_name(k), v} end) 10 | 11 | @impl true 12 | def test_inference_result(result) do 13 | assert_float_dicts_equal(@expected_result, result) 14 | 15 | top_result = Enum.max_by(result, &elem(&1, 1)) 16 | assert {:tabby, _pred} = top_result 17 | end 18 | end 19 | -------------------------------------------------------------------------------- /test/ex_vision/classification/squeezenet1_1_test.exs: -------------------------------------------------------------------------------- 1 | defmodule ExVision.Classification.SqueezeNet1_1_Test do 2 | @moduledoc false 3 | use ExVision.Model.Case, module: ExVision.Classification.SqueezeNet1_1 4 | use ExVision.TestUtils 5 | 6 | @expected_result "test/assets/results/classification/squeezenet1_1.json" 7 | |> File.read!() 8 | |> Jason.decode!() 9 | |> Map.new(fn {k, v} -> {ExVision.Utils.normalize_category_name(k), v} end) 10 | 11 | @impl true 12 | def test_inference_result(result) do 13 | assert_float_dicts_equal(@expected_result, result, 0.21) 14 | 15 | top_result = Enum.max_by(result, &elem(&1, 1)) 16 | assert {:egyptian_cat, _pred} = top_result 17 | end 18 | end 19 | -------------------------------------------------------------------------------- /lib/ex_vision/classification/efficientnet_v2_l.ex: -------------------------------------------------------------------------------- 1 | defmodule ExVision.Classification.EfficientNet_V2_L do 2 | @moduledoc """ 3 | An object classifier based on EfficientNet_V2_L. 4 | Exported from `torchvision`. 5 | Weights from Imagenet 1k. 6 | """ 7 | use ExVision.Model.Definition.Ortex, 8 | model: "efficientnet_v2_l_classifier.onnx", 9 | categories: "priv/categories/imagenet_v2_categories.json" 10 | 11 | use ExVision.Classification.GenericClassifier 12 | 13 | @impl true 14 | def preprocessing(image, _metadata) do 15 | image 16 | |> ExVision.Utils.resize({480, 480}) 17 | |> NxImage.normalize( 18 | Nx.f32([0.5, 0.5, 0.5]), 19 | Nx.f32([0.5, 0.5, 0.5]), 20 | channels: :first 21 | ) 22 | end 23 | end 24 | -------------------------------------------------------------------------------- /lib/ex_vision/classification/mobilenet_v3_small.ex: -------------------------------------------------------------------------------- 1 | defmodule ExVision.Classification.MobileNetV3Small do 2 | @moduledoc """ 3 | An object detector based on MobileNetV1 Large. 4 | Exported from `torchvision`. 5 | Weights from Imagenet 1k. 6 | """ 7 | use ExVision.Model.Definition.Ortex, 8 | model: "mobilenetv3small-classifier.onnx", 9 | categories: "priv/categories/imagenet_v2_categories.json" 10 | 11 | use ExVision.Classification.GenericClassifier 12 | 13 | @impl true 14 | def preprocessing(image, _metadata) do 15 | image 16 | |> ExVision.Utils.resize({224, 224}) 17 | |> NxImage.normalize( 18 | Nx.f32([0.485, 0.456, 0.406]), 19 | Nx.f32([0.229, 0.224, 0.225]), 20 | channels: :first 21 | ) 22 | end 23 | end 24 | -------------------------------------------------------------------------------- /test/ex_vision/classification/efficientnet_v2_l_test.exs: -------------------------------------------------------------------------------- 1 | defmodule ExVision.Classification.EfficientNet_V2_L_Test do 2 | @moduledoc false 3 | use ExVision.Model.Case, module: ExVision.Classification.EfficientNet_V2_L 4 | use ExVision.TestUtils 5 | 6 | @expected_result "test/assets/results/classification/efficientnet_v2_l.json" 7 | |> File.read!() 8 | |> Jason.decode!() 9 | |> Map.new(fn {k, v} -> {ExVision.Utils.normalize_category_name(k), v} end) 10 | 11 | @impl true 12 | def test_inference_result(result) do 13 | assert_float_dicts_equal(@expected_result, result) 14 | 15 | top_result = Enum.max_by(result, &elem(&1, 1)) 16 | assert {:egyptian_cat, _pred} = top_result 17 | end 18 | end 19 | -------------------------------------------------------------------------------- /test/ex_vision/classification/efficientnet_v2_m_test.exs: -------------------------------------------------------------------------------- 1 | defmodule ExVision.Classification.EfficientNet_V2_M_Test do 2 | @moduledoc false 3 | use ExVision.Model.Case, module: ExVision.Classification.EfficientNet_V2_M 4 | use ExVision.TestUtils 5 | 6 | @expected_result "test/assets/results/classification/efficientnet_v2_m.json" 7 | |> File.read!() 8 | |> Jason.decode!() 9 | |> Map.new(fn {k, v} -> {ExVision.Utils.normalize_category_name(k), v} end) 10 | 11 | @impl true 12 | def test_inference_result(result) do 13 | assert_float_dicts_equal(@expected_result, result) 14 | 15 | top_result = Enum.max_by(result, &elem(&1, 1)) 16 | assert {:egyptian_cat, _pred} = top_result 17 | end 18 | end 19 | -------------------------------------------------------------------------------- /test/ex_vision/classification/efficientnet_v2_s_test.exs: -------------------------------------------------------------------------------- 1 | defmodule ExVision.Classification.EfficientNet_V2_S_Test do 2 | @moduledoc false 3 | use ExVision.Model.Case, module: ExVision.Classification.EfficientNet_V2_S 4 | use ExVision.TestUtils 5 | 6 | @expected_result "test/assets/results/classification/efficientnet_v2_s.json" 7 | |> File.read!() 8 | |> Jason.decode!() 9 | |> Map.new(fn {k, v} -> {ExVision.Utils.normalize_category_name(k), v} end) 10 | 11 | @impl true 12 | def test_inference_result(result) do 13 | assert_float_dicts_equal(@expected_result, result) 14 | 15 | top_result = Enum.max_by(result, &elem(&1, 1)) 16 | assert {:egyptian_cat, _pred} = top_result 17 | end 18 | end 19 | -------------------------------------------------------------------------------- /lib/ex_vision/classification/efficientnet_v2_m.ex: -------------------------------------------------------------------------------- 1 | defmodule ExVision.Classification.EfficientNet_V2_M do 2 | @moduledoc """ 3 | An object classifier based on EfficientNet_V2_M. 4 | Exported from `torchvision`. 5 | Weights from Imagenet 1k. 6 | """ 7 | use ExVision.Model.Definition.Ortex, 8 | model: "efficientnet_v2_m_classifier.onnx", 9 | categories: "priv/categories/imagenet_v2_categories.json" 10 | 11 | use ExVision.Classification.GenericClassifier 12 | 13 | @impl true 14 | def preprocessing(image, _metadata) do 15 | image 16 | |> ExVision.Utils.resize({480, 480}) 17 | |> NxImage.normalize( 18 | Nx.f32([0.485, 0.456, 0.406]), 19 | Nx.f32([0.229, 0.224, 0.225]), 20 | channels: :first 21 | ) 22 | end 23 | end 24 | -------------------------------------------------------------------------------- /lib/ex_vision/classification/efficientnet_v2_s.ex: -------------------------------------------------------------------------------- 1 | defmodule ExVision.Classification.EfficientNet_V2_S do 2 | @moduledoc """ 3 | An object classifier based on EfficientNet_V2_S. 4 | Exported from `torchvision`. 5 | Weights from Imagenet 1k. 6 | """ 7 | use ExVision.Model.Definition.Ortex, 8 | model: "efficientnet_v2_s_classifier.onnx", 9 | categories: "priv/categories/imagenet_v2_categories.json" 10 | 11 | use ExVision.Classification.GenericClassifier 12 | 13 | @impl true 14 | def preprocessing(image, _metadata) do 15 | image 16 | |> ExVision.Utils.resize({384, 384}) 17 | |> NxImage.normalize( 18 | Nx.f32([0.485, 0.456, 0.406]), 19 | Nx.f32([0.229, 0.224, 0.225]), 20 | channels: :first 21 | ) 22 | end 23 | end 24 | -------------------------------------------------------------------------------- /lib/ex_vision/object_detection/ssdlite320_mobilenetv3.ex: -------------------------------------------------------------------------------- 1 | defmodule ExVision.ObjectDetection.Ssdlite320_MobileNetv3 do 2 | @moduledoc """ 3 | SSDLite320 object detector with MobileNetV3 Large architecture, exported from torchvision. 4 | """ 5 | use ExVision.Model.Definition.Ortex, 6 | model: "ssdlite320_mobilenet_v3_large_object_detector.onnx", 7 | categories: "priv/categories/coco_categories.json" 8 | 9 | use ExVision.ObjectDetection.GenericDetector 10 | 11 | require Logger 12 | 13 | @impl true 14 | def load(options \\ []) do 15 | if Keyword.has_key?(options, :batch_size) do 16 | Logger.warning( 17 | "`:max_batch_size` was given, but this model can only process batch of size 1. Overriding" 18 | ) 19 | end 20 | 21 | options 22 | |> Keyword.put(:batch_size, 1) 23 | |> default_model_load() 24 | end 25 | end 26 | -------------------------------------------------------------------------------- /lib/ex_vision/object_detection/fasterrcnn_resnet50_fpn.ex: -------------------------------------------------------------------------------- 1 | defmodule ExVision.ObjectDetection.FasterRCNN_ResNet50_FPN do 2 | @moduledoc """ 3 | FasterRCNN object detector with ResNet50 backbone and FPN detection head, exported from torchvision. 4 | """ 5 | use ExVision.Model.Definition.Ortex, 6 | model: "fasterrcnn_resnet50_fpn_object_detector.onnx", 7 | categories: "priv/categories/coco_categories.json" 8 | 9 | use ExVision.ObjectDetection.GenericDetector 10 | 11 | require Logger 12 | 13 | @impl true 14 | def load(options \\ []) do 15 | if Keyword.has_key?(options, :batch_size) do 16 | Logger.warning( 17 | "`:max_batch_size` was given, but this model can only process batch of size 1. Overriding" 18 | ) 19 | end 20 | 21 | options 22 | |> Keyword.put(:batch_size, 1) 23 | |> default_model_load() 24 | end 25 | end 26 | -------------------------------------------------------------------------------- /.gitattributes: -------------------------------------------------------------------------------- 1 | models/**/*.onnx filter=lfs diff=lfs merge=lfs -text 2 | models/deeplab_v3_mobilenetv3_segmentation.onnx filter=lfs diff=lfs merge=lfs -text 3 | models/maskrcnn_resnet50_fpn_v2_instance_segmentation.onnx filter=lfs diff=lfs merge=lfs -text 4 | models/keypointrcnn_resnet50_fpn_keypoint_detector.onnx filter=lfs diff=lfs merge=lfs -text 5 | models/fasterrcnn_resnet50_fpn_object_detector.onnx filter=lfs diff=lfs merge=lfs -text 6 | models/mobilenetv3small-classifier.onnx filter=lfs diff=lfs merge=lfs -text 7 | models/efficientnet_v2_s_classifier.onnx filter=lfs diff=lfs merge=lfs -text 8 | models/efficientnet_v2_m_classifier.onnx filter=lfs diff=lfs merge=lfs -text 9 | models/efficientnet_v2_l_classifier.onnx filter=lfs diff=lfs merge=lfs -text 10 | models/squeezenet1_1_classifier.onnx filter=lfs diff=lfs merge=lfs -text 11 | models/ssdlite320_mobilenet_v3_large_object_detector.onnx filter=lfs diff=lfs merge=lfs -text 12 | -------------------------------------------------------------------------------- /test/assets/categories.json: -------------------------------------------------------------------------------- 1 | ["__background__", "person", "bicycle", "car", "motorcycle", "airplane", "bus", "train", "truck", "boat", "traffic light", "fire hydrant", "N/A", "stop sign", "parking meter", "bench", "bird", "cat", "dog", "horse", "sheep", "cow", "elephant", "bear", "zebra", "giraffe", "N/A", "backpack", "umbrella", "N/A", "N/A", "handbag", "tie", "suitcase", "frisbee", "skis", "snowboard", "sports ball", "kite", "baseball bat", "baseball glove", "skateboard", "surfboard", "tennis racket", "bottle", "N/A", "wine glass", "cup", "fork", "knife", "spoon", "bowl", "banana", "apple", "sandwich", "orange", "broccoli", "carrot", "hot dog", "pizza", "donut", "cake", "chair", "couch", "potted plant", "bed", "N/A", "dining table", "N/A", "N/A", "toilet", "N/A", "tv", "laptop", "mouse", "remote", "keyboard", "cell phone", "microwave", "oven", "toaster", "sink", "refrigerator", "N/A", "book", "clock", "vase", "scissors", "teddy bear", "hair drier", "toothbrush"] -------------------------------------------------------------------------------- /lib/ex_vision/model/definition/parts/with_categories.ex: -------------------------------------------------------------------------------- 1 | defmodule ExVision.Model.Definition.Parts.WithCategories do 2 | @moduledoc false 3 | require Logger 4 | alias ExVision.Utils 5 | 6 | defmacro __using__(options) do 7 | options = Keyword.validate!(options, [:name, :categories]) 8 | 9 | unless is_nil(options |> Keyword.fetch!(:categories)) do 10 | categories = options |> Keyword.fetch!(:categories) |> Utils.load_categories() 11 | spec = categories |> Enum.uniq() |> Bunch.Typespec.enum_to_alternative() 12 | 13 | quote do 14 | require Bunch.Typespec 15 | 16 | @typedoc """ 17 | Type describing all categories recognised by #{unquote(options[:name])} 18 | """ 19 | @type category_t() :: unquote(spec) 20 | 21 | @doc """ 22 | Returns a list of all categories recognised by #{unquote(options[:name])} 23 | """ 24 | @spec categories() :: [category_t()] 25 | def categories(), do: unquote(categories) 26 | end 27 | end 28 | end 29 | end 30 | -------------------------------------------------------------------------------- /lib/ex_vision/semantic_segmentation/deep_lab_v3_mobilenet_v3.ex: -------------------------------------------------------------------------------- 1 | defmodule ExVision.SemanticSegmentation.DeepLabV3_MobileNetV3 do 2 | @moduledoc """ 3 | A semantic segmentation model for MobileNetV3 Backbone. Exported from torchvision. 4 | """ 5 | use ExVision.Model.Definition.Ortex, 6 | model: "deeplab_v3_mobilenetv3_segmentation.onnx", 7 | categories: "priv/categories/coco_with_voc_labels_categories.json" 8 | 9 | @type output_t() :: %{category_t() => Nx.Tensor.t()} 10 | 11 | @impl true 12 | def preprocessing(img, _metdata) do 13 | ExVision.Utils.resize(img, {224, 224}) 14 | end 15 | 16 | @impl true 17 | def postprocessing(%{"output" => out}, metadata) do 18 | cls_per_pixel = 19 | out 20 | |> Nx.backend_transfer() 21 | |> NxImage.resize(metadata.original_size, channels: :first) 22 | |> Nx.squeeze() 23 | |> Axon.Activations.softmax(axis: [0]) 24 | |> Nx.argmax(axis: 0) 25 | 26 | categories() 27 | |> Enum.with_index() 28 | |> Map.new(fn {category, i} -> 29 | {category, cls_per_pixel |> Nx.equal(i)} 30 | end) 31 | end 32 | end 33 | -------------------------------------------------------------------------------- /test/ex_vision/keypoint_detection/keypointrcnn_resnet50_fpn_test.exs: -------------------------------------------------------------------------------- 1 | defmodule ExVision.KeypointDetection.KeypointRCNN_ResNet50_FPNTest do 2 | use ExVision.Model.Case, module: ExVision.KeypointDetection.KeypointRCNN_ResNet50_FPN 3 | use ExVision.TestUtils 4 | alias ExVision.Types.BBoxWithKeypoints 5 | 6 | @impl true 7 | def test_inference_result(result) do 8 | assert [ 9 | %BBoxWithKeypoints{ 10 | x1: 113, 11 | y1: 15, 12 | label: :person, 13 | score: score1, 14 | keypoints: keypoints 15 | }, 16 | %BBoxWithKeypoints{ 17 | x1: 141, 18 | y1: 167, 19 | label: :person, 20 | score: score2 21 | } 22 | ] = result 23 | 24 | assert_floats_equal(score1, 0.46) 25 | assert_floats_equal(score2, 0.29) 26 | 27 | assert max_keypoint_score(keypoints) < 5 28 | end 29 | 30 | defp max_keypoint_score(keypoints) do 31 | keypoints |> Enum.map(fn {_name, %{score: score}} -> score end) |> Enum.max() 32 | end 33 | end 34 | -------------------------------------------------------------------------------- /devcontainer.json: -------------------------------------------------------------------------------- 1 | // For format details, see https://aka.ms/devcontainer.json. For config options, see the 2 | // README at: https://github.com/devcontainers/templates/tree/main/src/ubuntu 3 | { 4 | "name": "Ubuntu", 5 | // Or use a Dockerfile or Docker Compose file. More info: https://containers.dev/guide/dockerfile 6 | "image": "mcr.microsoft.com/devcontainers/base:jammy", 7 | "features": { 8 | "ghcr.io/devcontainers/features/python:1": { 9 | "installTools": true, 10 | "version": "3.11" 11 | }, 12 | "ghcr.io/devcontainers/features/rust:1": { 13 | "version": "1.76", 14 | "profile": "minimal" 15 | }, 16 | "ghcr.io/devcontainers-contrib/features/elixir-asdf:2": { 17 | "elixirVersion": "1.16.2", 18 | "erlangVersion": "26.0" 19 | } 20 | } 21 | 22 | // Features to add to the dev container. More info: https://containers.dev/features. 23 | // "features": {}, 24 | 25 | // Use 'forwardPorts' to make a list of ports inside the container available locally. 26 | // "forwardPorts": [], 27 | 28 | // Use 'postCreateCommand' to run commands after the container is created. 29 | // "postCreateCommand": "uname -a", 30 | 31 | // Configure tool-specific properties. 32 | // "customizations": {}, 33 | 34 | // Uncomment to connect as root instead. More info: https://aka.ms/dev-containers-non-root. 35 | // "remoteUser": "root" 36 | } 37 | -------------------------------------------------------------------------------- /lib/ex_vision/classification/generic_classifier.ex: -------------------------------------------------------------------------------- 1 | defmodule ExVision.Classification.GenericClassifier do 2 | @moduledoc false 3 | 4 | # Contains a default implementation of post processing for TorchVision classifiers 5 | # To use: `use ExVision.Classification.GenericClassifier` 6 | 7 | alias ExVision.Types.ImageMetadata 8 | 9 | @typep output_t() :: %{atom() => number()} 10 | 11 | @spec postprocessing(map(), ImageMetadata.t(), [atom()]) :: output_t() 12 | def postprocessing(%{"output" => scores}, _metadata, categories) do 13 | scores 14 | |> Nx.backend_transfer() 15 | |> Nx.flatten() 16 | |> Axon.Activations.softmax(axis: [0]) 17 | |> Nx.to_flat_list() 18 | |> then(&Enum.zip(categories, &1)) 19 | |> Map.new() 20 | end 21 | 22 | defmacro __using__(_opts) do 23 | quote do 24 | @typedoc """ 25 | A type describing the output of a classification model as a mapping of category to probability. 26 | """ 27 | @type output_t() :: %{category_t() => number()} 28 | 29 | @impl true 30 | @spec postprocessing(map(), ExVision.Types.ImageMetadata.t()) :: output_t() 31 | def postprocessing(output, metadata) do 32 | ExVision.Classification.GenericClassifier.postprocessing(output, metadata, categories()) 33 | end 34 | 35 | defoverridable postprocessing: 2 36 | end 37 | end 38 | end 39 | -------------------------------------------------------------------------------- /.github/workflows/elixir.yml: -------------------------------------------------------------------------------- 1 | name: Elixir CI 2 | 3 | on: 4 | push: 5 | branches: ["main"] 6 | pull_request: 7 | branches: ["main"] 8 | 9 | permissions: 10 | contents: read 11 | 12 | env: 13 | MIX_ENV: test 14 | 15 | jobs: 16 | build: 17 | name: Build and test 18 | runs-on: ubuntu-latest 19 | 20 | steps: 21 | - uses: actions/checkout@v4 22 | - name: Set up Elixir 23 | uses: erlef/setup-beam@61e01a43a562a89bfc54c7f9a378ff67b03e4a21 # v1.16.0 24 | with: 25 | elixir-version: "1.16.2" # [Required] Define the Elixir version 26 | otp-version: "26.0" # [Required] Define the Erlang/OTP version 27 | - name: brndnmtthws/rust-action-rustup 28 | uses: brndnmtthws/rust-action-rustup@v1.0.0 29 | - name: Restore dependencies cache 30 | uses: actions/cache@v3 31 | with: 32 | path: deps 33 | key: ${{ runner.os }}-mix-${{ hashFiles('**/mix.lock') }} 34 | restore-keys: ${{ runner.os }}-mix- 35 | - name: Install dependencies 36 | run: mix deps.get && mix deps.compile 37 | - name: Checks if compiles without warning 38 | run: mix compile --warnings-as-errors 39 | - name: Run tests 40 | run: mix test 41 | - name: Run Credo 42 | run: mix credo 43 | - name: Test formatting 44 | run: mix format --check-formatted 45 | -------------------------------------------------------------------------------- /python/exports/deep_lab_v3.py: -------------------------------------------------------------------------------- 1 | from torchvision.models.segmentation import ( 2 | deeplabv3_mobilenet_v3_large, 3 | DeepLabV3_MobileNet_V3_Large_Weights, 4 | ) 5 | import torch 6 | import json 7 | from pathlib import Path 8 | 9 | base_dir = Path("models/segmentation/deeplab_v3_mobilenetv3") 10 | base_dir.mkdir(parents=True, exist_ok=True) 11 | 12 | model_file = base_dir / "model.onnx" 13 | categories_file = base_dir / "categories.json" 14 | 15 | weights = DeepLabV3_MobileNet_V3_Large_Weights.DEFAULT 16 | model = deeplabv3_mobilenet_v3_large(weights=weights) 17 | model.eval() 18 | 19 | categories = weights.meta["categories"] 20 | transforms = weights.transforms() 21 | 22 | with open(categories_file, "w") as f: 23 | json.dump(categories, f) 24 | 25 | onnx_input = torch.rand(1, 3, 224, 224) 26 | 27 | 28 | from torchvision.io.image import read_image 29 | 30 | cat = read_image("examples/files/cat.jpg") 31 | batch = transforms(cat).unsqueeze(0) 32 | outputs = model(batch) 33 | 34 | torch.onnx.export( 35 | model, 36 | batch, 37 | str(model_file), 38 | verbose=False, 39 | input_names=["input"], 40 | output_names=["output", "aux"], 41 | dynamic_axes={ 42 | "input": {0: "batch_size", 2: "width", 3: "height"}, 43 | "output": {0: "batch_size", 2: "width", 3: "height"}, 44 | }, 45 | export_params=True, 46 | ) 47 | 48 | print(transforms) 49 | print(batch.shape) 50 | print(outputs) 51 | -------------------------------------------------------------------------------- /lib/ex_vision/types/bbox.ex: -------------------------------------------------------------------------------- 1 | defmodule ExVision.Types.BBox do 2 | @moduledoc """ 3 | A struct describing the bounding box returned by the object detection model. 4 | """ 5 | 6 | @enforce_keys [:x1, :y1, :x2, :y2, :label, :score] 7 | defstruct @enforce_keys 8 | 9 | @typedoc """ 10 | A type describing the Bounding Box object. 11 | 12 | Bounding box is a rectangle encompassing the region. 13 | When used in object detectors, this box will describe the location of the object in the image. 14 | 15 | - `x1` - x componenet of the upper left corner 16 | - `y1` - y componenet of the upper left corner 17 | - `x2` - x componenet of the lower right 18 | - `y2` - y componenet of the lower right 19 | - `score` - confidence of the predition 20 | - `label` - label assigned to this bounding box 21 | """ 22 | @type t(label_t) :: %__MODULE__{ 23 | x1: number(), 24 | y1: number(), 25 | y2: number(), 26 | x2: number(), 27 | label: label_t, 28 | score: number() 29 | } 30 | 31 | @typedoc """ 32 | Exactly like `t:t/1`, but doesn't put any constraints on the `label` field: 33 | """ 34 | @type t() :: t(term()) 35 | 36 | @doc """ 37 | Return the width of the bounding box 38 | """ 39 | @spec width(t()) :: number() 40 | def width(%__MODULE__{x1: x1, x2: x2}), do: abs(x2 - x1) 41 | 42 | @doc """ 43 | Return the height of the bounding box 44 | """ 45 | @spec height(t()) :: number() 46 | def height(%__MODULE__{y1: y1, y2: y2}), do: abs(y2 - y1) 47 | end 48 | -------------------------------------------------------------------------------- /priv/categories/coco_categories.json: -------------------------------------------------------------------------------- 1 | [ 2 | "__background__", 3 | "person", 4 | "bicycle", 5 | "car", 6 | "motorcycle", 7 | "airplane", 8 | "bus", 9 | "train", 10 | "truck", 11 | "boat", 12 | "traffic light", 13 | "fire hydrant", 14 | "N/A", 15 | "stop sign", 16 | "parking meter", 17 | "bench", 18 | "bird", 19 | "cat", 20 | "dog", 21 | "horse", 22 | "sheep", 23 | "cow", 24 | "elephant", 25 | "bear", 26 | "zebra", 27 | "giraffe", 28 | "N/A", 29 | "backpack", 30 | "umbrella", 31 | "N/A", 32 | "N/A", 33 | "handbag", 34 | "tie", 35 | "suitcase", 36 | "frisbee", 37 | "skis", 38 | "snowboard", 39 | "sports ball", 40 | "kite", 41 | "baseball bat", 42 | "baseball glove", 43 | "skateboard", 44 | "surfboard", 45 | "tennis racket", 46 | "bottle", 47 | "N/A", 48 | "wine glass", 49 | "cup", 50 | "fork", 51 | "knife", 52 | "spoon", 53 | "bowl", 54 | "banana", 55 | "apple", 56 | "sandwich", 57 | "orange", 58 | "broccoli", 59 | "carrot", 60 | "hot dog", 61 | "pizza", 62 | "donut", 63 | "cake", 64 | "chair", 65 | "couch", 66 | "potted plant", 67 | "bed", 68 | "N/A", 69 | "dining table", 70 | "N/A", 71 | "N/A", 72 | "toilet", 73 | "N/A", 74 | "tv", 75 | "laptop", 76 | "mouse", 77 | "remote", 78 | "keyboard", 79 | "cell phone", 80 | "microwave", 81 | "oven", 82 | "toaster", 83 | "sink", 84 | "refrigerator", 85 | "N/A", 86 | "book", 87 | "clock", 88 | "vase", 89 | "scissors", 90 | "teddy bear", 91 | "hair drier", 92 | "toothbrush" 93 | ] 94 | -------------------------------------------------------------------------------- /lib/ex_vision/types/bboxwithmask.ex: -------------------------------------------------------------------------------- 1 | defmodule ExVision.Types.BBoxWithMask do 2 | @moduledoc """ 3 | A struct describing the bounding box with mask returned by the instance segmentation model. 4 | """ 5 | 6 | @enforce_keys [ 7 | :x1, 8 | :y1, 9 | :x2, 10 | :y2, 11 | :label, 12 | :score, 13 | :mask 14 | ] 15 | defstruct @enforce_keys 16 | 17 | @typedoc """ 18 | A type describing the Bounding Box with Mask object. 19 | 20 | Bounding box is a rectangle encompassing the region. 21 | When used in instance segmentation, this box will describe the location of the object in the image. 22 | Additionally, a binary mask represents the instance segmentation of the object. 23 | 24 | - `x1` - x componenet of the upper left corner 25 | - `y1` - y componenet of the upper left corner 26 | - `x2` - x componenet of the lower right 27 | - `y2` - y componenet of the lower right 28 | - `score` - confidence of the predition 29 | - `label` - label assigned to this bounding box 30 | - `mask` - binary mask 31 | """ 32 | @type t(label_t) :: %__MODULE__{ 33 | x1: number(), 34 | y1: number(), 35 | y2: number(), 36 | x2: number(), 37 | label: label_t, 38 | score: number(), 39 | mask: Nx.Tensor.t() 40 | } 41 | 42 | @typedoc """ 43 | Exactly like `t:t/1`, but doesn't put any constraints on the `label` field: 44 | """ 45 | @type t() :: t(term()) 46 | 47 | @doc """ 48 | Return the width of the bounding box 49 | """ 50 | @spec width(t()) :: number() 51 | def width(%__MODULE__{x1: x1, x2: x2}), do: abs(x2 - x1) 52 | 53 | @doc """ 54 | Return the height of the bounding box 55 | """ 56 | @spec height(t()) :: number() 57 | def height(%__MODULE__{y1: y1, y2: y2}), do: abs(y2 - y1) 58 | end 59 | -------------------------------------------------------------------------------- /test/ex_vision/style_transfer/style_transfer_test.exs: -------------------------------------------------------------------------------- 1 | defmodule TestConfiguration do 2 | @spec configuration() :: %{} 3 | def configuration do 4 | %{ 5 | ExVision.StyleTransfer.CandyTest => [ 6 | module: ExVision.StyleTransfer.Candy, 7 | gt_file: "cat_candy.gt" 8 | ], 9 | ExVision.StyleTransfer.CandyFastTest => [ 10 | module: ExVision.StyleTransfer.CandyFast, 11 | gt_file: "cat_candy_fast.gt" 12 | ], 13 | ExVision.StyleTransfer.PrincessTest => [ 14 | module: ExVision.StyleTransfer.Princess, 15 | gt_file: "cat_princess.gt" 16 | ], 17 | ExVision.StyleTransfer.PrincessFastTest => [ 18 | module: ExVision.StyleTransfer.PrincessFast, 19 | gt_file: "cat_princess_fast.gt" 20 | ], 21 | ExVision.StyleTransfer.UdnieTest => [ 22 | module: ExVision.StyleTransfer.Udnie, 23 | gt_file: "cat_udnie.gt" 24 | ], 25 | ExVision.StyleTransfer.UdnieFastTest => [ 26 | module: ExVision.StyleTransfer.UdnieFast, 27 | gt_file: "cat_udnie_fast.gt" 28 | ], 29 | ExVision.StyleTransfer.MosaicTest => [ 30 | module: ExVision.StyleTransfer.Mosaic, 31 | gt_file: "cat_mosaic.gt" 32 | ], 33 | ExVision.StyleTransfer.MosaicFastTest => [ 34 | module: ExVision.StyleTransfer.MosaicFast, 35 | gt_file: "cat_mosaic_fast.gt" 36 | ] 37 | } 38 | end 39 | end 40 | 41 | for {module, opts} <- TestConfiguration.configuration() do 42 | defmodule module do 43 | use ExVision.Model.Case, module: unquote(opts[:module]) 44 | use ExVision.TestUtils 45 | 46 | @impl true 47 | def test_inference_result(result) do 48 | expected_result = 49 | "test/assets/results/style_transfer/#{unquote(opts[:gt_file])}" 50 | |> File.read!() 51 | |> Nx.deserialize() 52 | 53 | assert_tensors_equal(result, expected_result, 5, 0.05) 54 | end 55 | end 56 | end 57 | -------------------------------------------------------------------------------- /test/support/exvision/test_utils.ex: -------------------------------------------------------------------------------- 1 | defmodule ExVision.TestUtils do 2 | @moduledoc false 3 | 4 | import ExUnit.Assertions, only: :macros 5 | 6 | @default_delta 0.05 7 | 8 | @doc """ 9 | Compares to floats by ensuring that the distance between them is smaller than specified delta 10 | """ 11 | @spec float_eq(float(), float(), float()) :: boolean() 12 | def float_eq(a, b, delta \\ @default_delta) do 13 | abs(a - b) < delta 14 | end 15 | 16 | @typedoc """ 17 | Type describing a dictionary which values are floats 18 | """ 19 | @type float_dict_t() :: %{any() => float()} 20 | 21 | @spec float_dict_eq(float_dict_t(), float_dict_t(), number()) :: boolean() 22 | def float_dict_eq(a, b, delta \\ @default_delta) do 23 | keys = MapSet.new(Map.keys(a) ++ Map.keys(b)) 24 | 25 | Enum.reduce(keys, true, fn key, acc -> 26 | a = a[key] 27 | b = b[key] 28 | 29 | acc and not is_nil(a) and not is_nil(b) and float_eq(a, b, delta) 30 | end) 31 | end 32 | 33 | defmacro assert_floats_equal(a, b, delta \\ @default_delta) do 34 | quote do 35 | assert ExVision.TestUtils.float_eq(unquote(a), unquote(b), unquote(delta)) 36 | end 37 | end 38 | 39 | defmacro assert_float_dicts_equal(a, b, delta \\ @default_delta) do 40 | quote do 41 | assert ExVision.TestUtils.float_dict_eq(unquote(a), unquote(b), unquote(delta)) 42 | end 43 | end 44 | 45 | defmacro assert_tensors_equal(a, b, delta \\ @default_delta, relative_delta \\ 0.0) do 46 | quote do 47 | value_condition = 48 | unquote(a) 49 | |> Nx.all_close(unquote(b), atol: unquote(delta), rtol: unquote(relative_delta)) 50 | |> Nx.reduce_min() 51 | |> Nx.to_number() == 1 52 | 53 | equal_on_count = 54 | unquote(a) 55 | |> Nx.equal(unquote(b)) 56 | |> Nx.as_type(:u64) 57 | |> Nx.reduce(0, fn x, y -> Nx.add(x, y) end) 58 | |> Nx.to_number() 59 | 60 | number_count = unquote(a) |> Nx.shape() |> Tuple.product() 61 | proportional_condition = equal_on_count / number_count > 0.99 62 | 63 | assert value_condition or proportional_condition 64 | end 65 | end 66 | 67 | defmacro __using__(_opts) do 68 | quote do 69 | import ExVision.TestUtils, only: :macros 70 | end 71 | end 72 | end 73 | -------------------------------------------------------------------------------- /lib/ex_vision/instance_segmentation/maskrcnn_resnet50_fpn_v2.ex: -------------------------------------------------------------------------------- 1 | defmodule ExVision.InstanceSegmentation.MaskRCNN_ResNet50_FPN_V2 do 2 | @moduledoc """ 3 | An instance segmentation model with a ResNet-50-FPN backbone. Exported from torchvision. 4 | """ 5 | use ExVision.Model.Definition.Ortex, 6 | model: "maskrcnn_resnet50_fpn_v2_instance_segmentation.onnx", 7 | categories: "priv/categories/coco_categories.json" 8 | 9 | import ExVision.Utils 10 | 11 | require Logger 12 | 13 | alias ExVision.Types.BBoxWithMask 14 | 15 | @type output_t() :: [BBoxWithMask.t()] 16 | 17 | @impl true 18 | def load(options \\ []) do 19 | if Keyword.has_key?(options, :batch_size) do 20 | Logger.warning( 21 | "`:max_batch_size` was given, but this model can only process batch of size 1. Overriding" 22 | ) 23 | end 24 | 25 | options 26 | |> Keyword.put(:batch_size, 1) 27 | |> default_model_load() 28 | end 29 | 30 | @impl true 31 | def preprocessing(img, _metdata) do 32 | ExVision.Utils.resize(img, {224, 224}) 33 | end 34 | 35 | @impl true 36 | def postprocessing( 37 | %{ 38 | "boxes_unsqueezed" => bboxes, 39 | "labels_unsqueezed" => labels, 40 | "masks_unsqueezed" => masks, 41 | "scores_unsqueezed" => scores 42 | }, 43 | metadata 44 | ) do 45 | categories = categories() 46 | 47 | {h, w} = metadata.original_size 48 | scale_x = w / 224 49 | scale_y = h / 224 50 | 51 | bboxes = scale_and_listify_bbox(bboxes, Nx.f32([scale_x, scale_y, scale_x, scale_y])) 52 | 53 | scores = squeeze_and_listify(scores) 54 | labels = squeeze_and_listify(labels) 55 | 56 | masks = 57 | masks 58 | |> Nx.backend_transfer() 59 | |> Nx.squeeze(axes: [0, 2]) 60 | |> NxImage.resize(metadata.original_size, channels: :first) 61 | |> Nx.round() 62 | |> Nx.as_type(:s64) 63 | |> Nx.to_list() 64 | 65 | [bboxes, labels, scores, masks] 66 | |> Enum.zip() 67 | |> Enum.filter(fn {_bbox, _label, score, _mask} -> score > 0.1 end) 68 | |> Enum.map(fn {[x1, y1, x2, y2], label, score, mask} -> 69 | %BBoxWithMask{ 70 | x1: x1, 71 | y1: y1, 72 | x2: x2, 73 | y2: y2, 74 | label: Enum.at(categories, label), 75 | score: score, 76 | mask: Nx.tensor(mask) 77 | } 78 | end) 79 | end 80 | end 81 | -------------------------------------------------------------------------------- /test/ex_vision/cache_test.exs: -------------------------------------------------------------------------------- 1 | defmodule ExVision.CacheTest do 2 | use ExUnit.Case, async: false 3 | use Mimic 4 | 5 | alias ExVision.Cache 6 | 7 | @moduletag :tmp_dir 8 | 9 | setup ctx do 10 | files = 11 | Map.get(ctx, :files, %{ 12 | "/test" => rand_string(256) 13 | }) 14 | 15 | set_mimic_global() 16 | 17 | stub(Req, :get, fn 18 | %URI{host: "mock_server", port: 8000, path: path}, options -> 19 | options = Keyword.validate!(options, [:raw, :into]) 20 | 21 | case Map.fetch(files, path) do 22 | {:ok, content} -> 23 | body = Enum.into([content], options[:into]) 24 | {:ok, %Req.Response{status: 200, body: body}} 25 | 26 | :error -> 27 | # Req seems to be saving the file anyway 28 | body = Enum.into([""], options[:into]) 29 | {:ok, %Req.Response{status: 404, body: body}} 30 | end 31 | 32 | _uri, _options -> 33 | {:error, %Mint.TransportError{reason: :connection_failed}} 34 | end) 35 | 36 | [files: files] 37 | end 38 | 39 | setup %{tmp_dir: tmp_dir} do 40 | {:ok, _cache} = 41 | Cache.start_link( 42 | name: MyCache, 43 | server_url: URI.new!("http://mock_server:8000"), 44 | cache_path: tmp_dir 45 | ) 46 | 47 | :ok 48 | end 49 | 50 | test "Can download the file", ctx do 51 | [{path, expected_contents}] = Enum.to_list(ctx.files) 52 | expected_path = Path.join(ctx.tmp_dir, path) 53 | assert {:ok, ^expected_path} = Cache.lazy_get(MyCache, path) 54 | verify_download(expected_path, expected_contents) 55 | end 56 | 57 | test "will fail if server is unreachable" do 58 | url = "http://localhost:9999" 59 | {:ok, c} = Cache.start_link(server_url: url, name: nil) 60 | 61 | assert {:error, :connection_failed} = Cache.lazy_get(c, "/test") 62 | assert {:error, :connection_failed} = Cache.lazy_get(c, "/test") 63 | end 64 | 65 | test "will fail if we request file that doesn't exist" do 66 | assert {:error, :doesnt_exist} = Cache.lazy_get(MyCache, "/idk") 67 | assert {:error, :doesnt_exist} = Cache.lazy_get(MyCache, "/idk") 68 | end 69 | 70 | defp verify_download(path, expected_contents) do 71 | assert File.exists?(path) 72 | assert not File.dir?(path) 73 | assert File.read!(path) == expected_contents 74 | end 75 | 76 | defp rand_string(length), do: :crypto.strong_rand_bytes(length) 77 | end 78 | -------------------------------------------------------------------------------- /lib/ex_vision/object_detection/generic_detector.ex: -------------------------------------------------------------------------------- 1 | defmodule ExVision.ObjectDetection.GenericDetector do 2 | @moduledoc false 3 | 4 | # Contains a default implementation of pre and post processing for TorchVision detectors 5 | # To use: `use ExVision.ObjectDetection.GenericDetector` 6 | 7 | import ExVision.Utils 8 | 9 | require Logger 10 | 11 | alias ExVision.Types.{BBox, ImageMetadata} 12 | 13 | @typep output_t() :: [BBox.t()] 14 | 15 | @spec preprocessing(Nx.Tensor.t(), ImageMetadata.t()) :: Nx.Tensor.t() 16 | def preprocessing(img, _metadata) do 17 | ExVision.Utils.resize(img, {224, 224}) 18 | end 19 | 20 | @spec postprocessing(map(), ImageMetadata.t(), [atom()]) :: output_t() 21 | def postprocessing( 22 | %{ 23 | "boxes_unsqueezed" => bboxes, 24 | "scores_unsqueezed" => scores, 25 | "labels_unsqueezed" => labels 26 | }, 27 | metadata, 28 | categories 29 | ) do 30 | {h, w} = metadata.original_size 31 | scale_x = w / 224 32 | scale_y = h / 224 33 | 34 | bboxes = scale_and_listify_bbox(bboxes, Nx.f32([scale_x, scale_y, scale_x, scale_y])) 35 | 36 | scores = squeeze_and_listify(scores) 37 | labels = squeeze_and_listify(labels) 38 | 39 | [bboxes, scores, labels] 40 | |> Enum.zip() 41 | |> Enum.filter(fn {_bbox, score, _label} -> score > 0.1 end) 42 | |> Enum.map(fn {[x1, y1, x2, y2], score, label} -> 43 | %BBox{ 44 | x1: x1, 45 | x2: x2, 46 | y1: y1, 47 | y2: y2, 48 | score: score, 49 | label: Enum.at(categories, label) 50 | } 51 | end) 52 | end 53 | 54 | defmacro __using__(_opts) do 55 | quote do 56 | @typedoc """ 57 | A type describing output of `run/2` as a list of a bounding boxes. 58 | 59 | Each bounding box describes the location of the object indicated by the `label`. 60 | It also provides the `score` field marking the probability of the prediction. 61 | Bounding boxes with very low scores should most likely be ignored. 62 | """ 63 | @type output_t() :: [BBox.t()] 64 | 65 | @impl true 66 | defdelegate preprocessing(image, metadata), to: ExVision.ObjectDetection.GenericDetector 67 | 68 | @impl true 69 | @spec postprocessing(map(), ExVision.Types.ImageMetadata.t()) :: output_t() 70 | def postprocessing(output, metadata) do 71 | ExVision.ObjectDetection.GenericDetector.postprocessing(output, metadata, categories()) 72 | end 73 | 74 | defoverridable preprocessing: 2, postprocessing: 2 75 | end 76 | end 77 | end 78 | -------------------------------------------------------------------------------- /lib/ex_vision/types/bboxwithkeypoints.ex: -------------------------------------------------------------------------------- 1 | defmodule ExVision.Types.BBoxWithKeypoints do 2 | @moduledoc """ 3 | A struct describing the bounding box with keypoints returned by the keypoint detection model. 4 | """ 5 | 6 | @enforce_keys [ 7 | :x1, 8 | :y1, 9 | :x2, 10 | :y2, 11 | :label, 12 | :score, 13 | :keypoints 14 | ] 15 | defstruct @enforce_keys 16 | 17 | @typedoc """ 18 | A type describing the Bounding Box object. 19 | 20 | Bounding box is a rectangle encompassing the region. 21 | When used in object detectors, this box will describe the location of the object in the image. 22 | It also includes keypoints. Each keypoint has a predefined atom as its name. 23 | 24 | - `x1` - x componenet of the upper left corner 25 | - `y1` - y componenet of the upper left corner 26 | - `x2` - x componenet of the lower right 27 | - `y2` - y componenet of the lower right 28 | - `label` - label assigned to this bounding box 29 | - `score` - confidence of the predition 30 | - `keypoints` - a map where keys are predefined names (represented as atoms) denoting the specific keypoints (body parts). The values associated with each key are another map, which contains the following: 31 | - `:x`: The x-coordinate of the keypoint 32 | - `:y`: The y-coordinate of the keypoint 33 | - `:score`: The confidence score of the predicted keypoint 34 | 35 | Keypoint atom names include: 36 | - `:nose` 37 | - `:left_eye` 38 | - `:right_eye` 39 | - `:left_ear` 40 | - `:right_ear` 41 | - `:left_shoulder` 42 | - `:right_shoulder` 43 | - `:left_elbow` 44 | - `:right_elbow` 45 | - `:left_wrist` 46 | - `:right_wrist` 47 | - `:left_hip` 48 | - `:right_hip` 49 | - `:left_knee` 50 | - `:right_knee` 51 | - `:left_ankle` 52 | - `:right_ankle` 53 | """ 54 | @type t(label_t) :: %__MODULE__{ 55 | x1: number(), 56 | y1: number(), 57 | y2: number(), 58 | x2: number(), 59 | label: label_t, 60 | score: number(), 61 | keypoints: %{ 62 | atom() => %{ 63 | x: number(), 64 | y: number(), 65 | score: number() 66 | } 67 | } 68 | } 69 | 70 | @typedoc """ 71 | Exactly like `t:t/1`, but doesn't put any constraints on the `label` field: 72 | """ 73 | @type t() :: t(term()) 74 | 75 | @doc """ 76 | Return the width of the bounding box 77 | """ 78 | @spec width(t()) :: number() 79 | def width(%__MODULE__{x1: x1, x2: x2}), do: abs(x2 - x1) 80 | 81 | @doc """ 82 | Return the height of the bounding box 83 | """ 84 | @spec height(t()) :: number() 85 | def height(%__MODULE__{y1: y1, y2: y2}), do: abs(y2 - y1) 86 | end 87 | -------------------------------------------------------------------------------- /test/support/exvision/model/case.ex: -------------------------------------------------------------------------------- 1 | defmodule ExVision.Model.Case do 2 | @moduledoc false 3 | @img_path "test/assets/cat.jpg" 4 | 5 | @callback test_inference_result(result :: any()) :: any() 6 | 7 | defmacro __using__(opts) do 8 | opts = Keyword.validate!(opts, [:module]) 9 | 10 | quote do 11 | use ExUnit.Case, async: true 12 | # use ExVision.TestUtils.MockCacheServer 13 | @behaviour ExVision.Model.Case 14 | 15 | setup_all do 16 | {:ok, model} = unquote(opts[:module]).load() 17 | [model: model] 18 | end 19 | 20 | test "load/0", %{model: model} do 21 | assert model 22 | end 23 | 24 | test "inference", %{model: model} do 25 | model 26 | |> unquote(opts[:module]).run(unquote(@img_path)) 27 | |> test_inference_result() 28 | end 29 | 30 | test "inference for batch", %{model: model} do 31 | model 32 | |> unquote(opts[:module]).run([unquote(@img_path), unquote(@img_path)]) 33 | |> Enum.each(&test_inference_result/1) 34 | end 35 | 36 | test "child_spec/1" do 37 | assert spec = unquote(opts[:module]).child_spec() 38 | end 39 | 40 | describe "stateful/process workflow" do 41 | setup ctx do 42 | name = String.to_atom("#{__MODULE__}#{ctx[:test]}") 43 | model = ctx[:model] 44 | 45 | {:ok, _supervisor} = 46 | Supervisor.start_link( 47 | [unquote(opts[:module]).child_spec(name: name)], 48 | strategy: :one_for_one 49 | ) 50 | 51 | [name: name] 52 | end 53 | 54 | test "inference", %{name: name} do 55 | name 56 | |> unquote(opts[:module]).batched_run(unquote(@img_path)) 57 | |> test_inference_result() 58 | end 59 | 60 | test "inference for batch", %{name: name} do 61 | name 62 | |> unquote(opts[:module]).batched_run([unquote(@img_path), unquote(@img_path)]) 63 | |> Enum.each(&test_inference_result/1) 64 | end 65 | end 66 | 67 | test "stateful/process workflow accepts options" do 68 | options = [ 69 | name: __MODULE__.TestProcess1, 70 | batch_size: 8, 71 | batch_timeout: 10, 72 | partitions: true 73 | ] 74 | 75 | child_spec = {unquote(opts[:module]), options} 76 | 77 | assert {:ok, _supervisor} = 78 | Supervisor.start_link([child_spec], strategy: :one_for_one, restarts: :none) 79 | 80 | assert unquote(opts[:module]).batched_run( 81 | __MODULE__.TestProcess1, 82 | unquote(@img_path) 83 | ) 84 | end 85 | end 86 | end 87 | end 88 | -------------------------------------------------------------------------------- /lib/ex_vision/style_transfer/style_transfer.ex: -------------------------------------------------------------------------------- 1 | defmodule Configuration do 2 | @moduledoc false 3 | 4 | @low_resolution {400, 300} 5 | @high_resolution {640, 480} 6 | 7 | @spec configuration() :: %{} 8 | def configuration do 9 | %{ 10 | ExVision.StyleTransfer.Candy => [model: "candy.onnx", resolution: @high_resolution], 11 | ExVision.StyleTransfer.CandyFast => [model: "candy_fast.onnx", resolution: @low_resolution], 12 | ExVision.StyleTransfer.Princess => [model: "princess.onnx", resolution: @high_resolution], 13 | ExVision.StyleTransfer.PrincessFast => [ 14 | model: "princess_fast.onnx", 15 | resolution: @low_resolution 16 | ], 17 | ExVision.StyleTransfer.Udnie => [model: "udnie.onnx", resolution: @high_resolution], 18 | ExVision.StyleTransfer.UdnieFast => [model: "udnie_fast.onnx", resolution: @low_resolution], 19 | ExVision.StyleTransfer.Mosaic => [model: "mosaic.onnx", resolution: @high_resolution], 20 | ExVision.StyleTransfer.MosaicFast => [ 21 | model: "mosaic_fast.onnx", 22 | resolution: @low_resolution 23 | ] 24 | } 25 | end 26 | end 27 | 28 | for {module, opts} <- Configuration.configuration() do 29 | defmodule module do 30 | @moduledoc """ 31 | #{module} is a custom style transfer model optimised for devices with low computational capabilities and CPU inference. 32 | """ 33 | use ExVision.Model.Definition.Ortex, model: unquote(opts[:model]) 34 | 35 | require Logger 36 | 37 | @typedoc """ 38 | A type consisting of output tesnor (stylized image tensor) from style transfer models of shape {#{Enum.join(Tuple.to_list(opts[:resolution]) ++ [3], ", ")}}. 39 | """ 40 | @type output_t() :: Nx.Tensor.t() 41 | 42 | @impl true 43 | def load(options \\ []) do 44 | if Keyword.has_key?(options, :batch_size) do 45 | Logger.warning( 46 | "`:max_batch_size` was given, but this model can only process batch of size 1. Overriding" 47 | ) 48 | end 49 | 50 | options 51 | |> Keyword.put(:batch_size, 1) 52 | |> default_model_load() 53 | end 54 | 55 | @impl true 56 | def preprocessing(img, _metdata) do 57 | img |> ExVision.Utils.resize(unquote(opts[:resolution])) |> Nx.divide(255.0) 58 | end 59 | 60 | @impl true 61 | def postprocessing( 62 | stylized_frame, 63 | metadata 64 | ) do 65 | {h, w} = unquote(opts[:resolution]) 66 | 67 | stylized_frame["55"] 68 | |> Nx.reshape({3, h, w}, names: [:channel, :height, :width]) 69 | |> NxImage.resize(metadata.original_size, channels: :first, method: :bilinear) 70 | |> Nx.clip(0.0, 255.0) 71 | |> Nx.as_type(:u8) 72 | |> Nx.transpose(axes: [1, 2, 0]) 73 | end 74 | end 75 | end 76 | -------------------------------------------------------------------------------- /python/exports/classification.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | from torchvision.transforms.functional import to_tensor, resize 3 | import torch 4 | import json 5 | from pathlib import Path 6 | from PIL import Image 7 | 8 | 9 | def export(model_builder, Model_Weights, input_shape): 10 | base_dir = Path(f"models/classification/{model_builder.__name__}") 11 | base_dir.mkdir(parents=True, exist_ok=True) 12 | 13 | model_file = base_dir / "model.onnx" 14 | categories_file = base_dir / "categories.json" 15 | 16 | weights = Model_Weights.DEFAULT 17 | model = model_builder(weights=weights) 18 | model.eval() 19 | 20 | categories = [x.lower().replace(" ", "_") 21 | for x in weights.meta["categories"]] 22 | transforms = weights.transforms() 23 | 24 | with open(categories_file, "w") as f: 25 | json.dump(categories, f) 26 | 27 | onnx_input = to_tensor(Image.open("test/assets/cat.jpg")).unsqueeze(0) 28 | onnx_input = resize(onnx_input, input_shape) 29 | onnx_input = transforms(onnx_input) 30 | 31 | torch.onnx.export( 32 | model, 33 | onnx_input, 34 | str(model_file), 35 | verbose=False, 36 | input_names=["input"], 37 | output_names=["output"], 38 | dynamic_axes={ 39 | "input": {0: "batch_size"}, 40 | "output": {0: "batch_size"} 41 | }, 42 | export_params=True, 43 | ) 44 | 45 | expected_output: torch.Tensor = model(onnx_input) 46 | expected_output = expected_output.softmax(dim=1) 47 | 48 | result = dict(zip(categories, expected_output[0].tolist())) 49 | 50 | file = Path( 51 | f"test/assets/results/classification/{model_builder.__name__}.json" 52 | ) 53 | file.parent.mkdir(exist_ok=True, parents=True) 54 | 55 | with file.open("w") as f: 56 | json.dump(result, f) 57 | 58 | 59 | parser = argparse.ArgumentParser() 60 | parser.add_argument("model") 61 | args = parser.parse_args() 62 | 63 | match(args.model): 64 | case "mobilenet_v3_small": 65 | from torchvision.models import mobilenet_v3_small, MobileNet_V3_Small_Weights 66 | export(mobilenet_v3_small, MobileNet_V3_Small_Weights, [224, 224]) 67 | case "efficientnet_v2_s": 68 | from torchvision.models import efficientnet_v2_s, EfficientNet_V2_S_Weights 69 | export(efficientnet_v2_s, EfficientNet_V2_S_Weights, [384, 384]) 70 | case "efficientnet_v2_m": 71 | from torchvision.models import efficientnet_v2_m, EfficientNet_V2_M_Weights 72 | export(efficientnet_v2_m, EfficientNet_V2_M_Weights, [480, 480]) 73 | case "efficientnet_v2_l": 74 | from torchvision.models import efficientnet_v2_l, EfficientNet_V2_L_Weights 75 | export(efficientnet_v2_l, EfficientNet_V2_L_Weights, [480, 480]) 76 | case "squeezenet1_1": 77 | from torchvision.models import squeezenet1_1, SqueezeNet1_1_Weights 78 | export(squeezenet1_1, SqueezeNet1_1_Weights, [224, 224]) 79 | case _: 80 | print("Model not found") 81 | -------------------------------------------------------------------------------- /lib/ex_vision/keypoint_detection/keypointrcnn_resnet50_fpn.ex: -------------------------------------------------------------------------------- 1 | defmodule ExVision.KeypointDetection.KeypointRCNN_ResNet50_FPN do 2 | @moduledoc """ 3 | Keypoint R-CNN model with a ResNet-50-FPN backbone, exported from torchvision. 4 | """ 5 | use ExVision.Model.Definition.Ortex, 6 | model: "keypointrcnn_resnet50_fpn_keypoint_detector.onnx", 7 | categories: "priv/categories/no_person_or_person.json" 8 | 9 | import ExVision.Utils 10 | 11 | require Logger 12 | 13 | alias ExVision.Types.BBoxWithKeypoints 14 | 15 | @typep output_t() :: [BBoxWithKeypoints.t()] 16 | 17 | @keypoints_names [ 18 | :nose, 19 | :left_eye, 20 | :right_eye, 21 | :left_ear, 22 | :right_ear, 23 | :left_shoulder, 24 | :right_shoulder, 25 | :left_elbow, 26 | :right_elbow, 27 | :left_wrist, 28 | :right_wrist, 29 | :left_hip, 30 | :right_hip, 31 | :left_knee, 32 | :right_knee, 33 | :left_ankle, 34 | :right_ankle 35 | ] 36 | 37 | @impl true 38 | def load(options \\ []) do 39 | if Keyword.has_key?(options, :batch_size) do 40 | Logger.warning( 41 | "`:max_batch_size` was given, but this model can only process batch of size 1. Overriding" 42 | ) 43 | end 44 | 45 | options 46 | |> Keyword.put(:batch_size, 1) 47 | |> default_model_load() 48 | end 49 | 50 | @impl true 51 | def preprocessing(img, _metadata) do 52 | ExVision.Utils.resize(img, {224, 224}) 53 | end 54 | 55 | @impl true 56 | def postprocessing( 57 | %{ 58 | "boxes_unsqueezed" => bboxes, 59 | "scores_unsqueezed" => scores, 60 | "labels_unsqueezed" => labels, 61 | "keypoints_unsqueezed" => keypoints_list, 62 | "keypoints_scores_unsqueezed" => keypoints_scores_list 63 | }, 64 | metadata 65 | ) do 66 | categories = categories() 67 | 68 | {h, w} = metadata.original_size 69 | scale_x = w / 224 70 | scale_y = h / 224 71 | 72 | bboxes = scale_and_listify_bbox(bboxes, Nx.f32([scale_x, scale_y, scale_x, scale_y])) 73 | 74 | scores = squeeze_and_listify(scores) 75 | labels = squeeze_and_listify(labels) 76 | 77 | keypoints_list = scale_and_listify_bbox(keypoints_list, Nx.tensor([scale_x, scale_y, 1])) 78 | 79 | keypoints_scores_list = squeeze_and_listify(keypoints_scores_list) 80 | 81 | [bboxes, scores, labels, keypoints_list, keypoints_scores_list] 82 | |> Enum.zip() 83 | |> Enum.filter(fn {_bbox, score, _label, _keypoints, _keypoints_scores} -> score > 0.1 end) 84 | |> Enum.map(fn {[x1, y1, x2, y2], score, label, keypoints, keypoints_scores} -> 85 | keypoints = 86 | [keypoints, keypoints_scores] 87 | |> Enum.zip() 88 | |> Enum.map(fn {[x, y, _w], keypoint_score} -> %{x: x, y: y, score: keypoint_score} end) 89 | 90 | %BBoxWithKeypoints{ 91 | x1: x1, 92 | x2: x2, 93 | y1: y1, 94 | y2: y2, 95 | score: score, 96 | label: Enum.at(categories, label), 97 | keypoints: [@keypoints_names, keypoints] |> Enum.zip() |> Map.new() 98 | } 99 | end) 100 | end 101 | end 102 | -------------------------------------------------------------------------------- /lib/ex_vision/model.ex: -------------------------------------------------------------------------------- 1 | defprotocol ExVision.Model do 2 | @moduledoc """ 3 | A protocol describing a generic ExVision model. 4 | """ 5 | 6 | @typedoc """ 7 | A type describing a single element that can be processed by ExVision's models 8 | """ 9 | @type model_input_t() :: Path.t() | Nx.Tensor.t() | Vix.Vips.Image.t() 10 | 11 | @typedoc """ 12 | A typespec definiting ExVision's model input, either as single `t:model_input_t/0` or a list. 13 | """ 14 | @type input_t() :: model_input_t() | [model_input_t()] 15 | 16 | @typedoc """ 17 | A generic type indicating a model output. For details on each model, refer to it's own `output_t()` definition. 18 | """ 19 | @type output_t() :: any() 20 | 21 | @doc """ 22 | Starts and links the module in process workflow 23 | """ 24 | @spec start_link(t(), keyword()) :: GenServer.on_start() 25 | def start_link(model, options \\ []) 26 | 27 | @doc """ 28 | A function used to submit input for inference (inline variant). 29 | """ 30 | @spec run(t(), input_t()) :: output_t() | [output_t()] 31 | def run(model, input) 32 | 33 | @doc """ 34 | Function used to submit the input for inference in a process setting when the model is served as a process. 35 | """ 36 | @spec batched_run(t(), input_t()) :: output_t() 37 | def batched_run(model, input) 38 | 39 | @spec as_serving(t()) :: Nx.Serving.t() 40 | def as_serving(model) 41 | end 42 | 43 | defimpl ExVision.Model, for: Any do 44 | require Logger 45 | 46 | def run(model, input) when is_list(input) do 47 | model |> as_serving() |> Nx.Serving.run(input) 48 | end 49 | 50 | def run(model, input) do 51 | model 52 | |> run([input]) 53 | |> hd() 54 | end 55 | 56 | def start_link(model, options \\ []) do 57 | options 58 | |> validate_start_link_options!(name: process_name(model)) 59 | |> Keyword.put(:serving, as_serving(model)) 60 | |> Nx.Serving.start_link() 61 | end 62 | 63 | def batched_run(model, input) do 64 | Logger.warning(""" 65 | Calling batched_run/2 at the ExVision.Model struct can lead to undefined behaviour. 66 | Referencing the already running process by name is preffered. 67 | """) 68 | 69 | model 70 | |> process_name() 71 | |> ExVision.Utils.batched_run(input) 72 | end 73 | 74 | def as_serving(%{serving: serving}), do: serving 75 | 76 | defp process_name(%module{}), do: module 77 | 78 | defp validate_start_link_options!(options, extras) do 79 | spec = 80 | [ 81 | :partitions, 82 | :batch_timeout, 83 | :distribution_weight, 84 | :shutdown, 85 | :hibernate_after, 86 | :spawn_opt, 87 | :name 88 | ] -- Keyword.keys(extras) 89 | 90 | Keyword.validate!( 91 | options, 92 | spec ++ extras 93 | ) 94 | end 95 | end 96 | 97 | defimpl ExVision.Model, for: Atom do 98 | use ExVision.Utils.Macros 99 | defunimplemented(run(_model, _input), with_impl: true) 100 | defunimplemented(start_link(_model, _opts), with_impl: true) 101 | defunimplemented(as_serving(_model), with_impl: true) 102 | 103 | @impl true 104 | def batched_run(module, input) do 105 | ExVision.Utils.batched_run(module, input) 106 | end 107 | end 108 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | compile_commands.json 2 | .gdb_history 3 | bundlex.sh 4 | bundlex.bat 5 | 6 | # Dir generated by tmp_dir ExUnit tag 7 | /tmp/ 8 | 9 | # Created by https://www.gitignore.io/api/c,vim,linux,macos,elixir,windows,visualstudiocode 10 | # Edit at https://www.gitignore.io/?templates=c,vim,linux,macos,elixir,windows,visualstudiocode 11 | 12 | ### C ### 13 | # Prerequisites 14 | *.d 15 | 16 | # Object files 17 | *.o 18 | *.ko 19 | *.obj 20 | *.elf 21 | 22 | # Linker output 23 | *.ilk 24 | *.map 25 | *.exp 26 | 27 | # Precompiled Headers 28 | *.gch 29 | *.pch 30 | 31 | # Libraries 32 | *.lib 33 | *.a 34 | *.la 35 | *.lo 36 | 37 | # Shared objects (inc. Windows DLLs) 38 | *.dll 39 | *.so 40 | *.so.* 41 | *.dylib 42 | 43 | # Executables 44 | *.exe 45 | *.out 46 | *.app 47 | *.i*86 48 | *.x86_64 49 | *.hex 50 | 51 | # Debug files 52 | *.dSYM/ 53 | *.su 54 | *.idb 55 | *.pdb 56 | 57 | # Kernel Module Compile Results 58 | *.mod* 59 | *.cmd 60 | .tmp_versions/ 61 | modules.order 62 | Module.symvers 63 | Mkfile.old 64 | dkms.conf 65 | 66 | ### Elixir ### 67 | /_build 68 | /cover 69 | /deps 70 | /doc 71 | /.fetch 72 | erl_crash.dump 73 | *.ez 74 | *.beam 75 | /config/*.secret.exs 76 | .elixir_ls/ 77 | 78 | ### Elixir Patch ### 79 | 80 | ### Linux ### 81 | *~ 82 | 83 | # temporary files which can be created if a process still has a handle open of a deleted file 84 | .fuse_hidden* 85 | 86 | # KDE directory preferences 87 | .directory 88 | 89 | # Linux trash folder which might appear on any partition or disk 90 | .Trash-* 91 | 92 | # .nfs files are created when an open file is removed but is still being accessed 93 | .nfs* 94 | 95 | ### macOS ### 96 | # General 97 | .DS_Store 98 | .AppleDouble 99 | .LSOverride 100 | 101 | # Icon must end with two \r 102 | Icon 103 | 104 | # Thumbnails 105 | ._* 106 | 107 | # Files that might appear in the root of a volume 108 | .DocumentRevisions-V100 109 | .fseventsd 110 | .Spotlight-V100 111 | .TemporaryItems 112 | .Trashes 113 | .VolumeIcon.icns 114 | .com.apple.timemachine.donotpresent 115 | 116 | # Directories potentially created on remote AFP share 117 | .AppleDB 118 | .AppleDesktop 119 | Network Trash Folder 120 | Temporary Items 121 | .apdisk 122 | 123 | ### Vim ### 124 | # Swap 125 | [._]*.s[a-v][a-z] 126 | [._]*.sw[a-p] 127 | [._]s[a-rt-v][a-z] 128 | [._]ss[a-gi-z] 129 | [._]sw[a-p] 130 | 131 | # Session 132 | Session.vim 133 | Sessionx.vim 134 | 135 | # Temporary 136 | .netrwhist 137 | # Auto-generated tag files 138 | tags 139 | # Persistent undo 140 | [._]*.un~ 141 | 142 | ### VisualStudioCode ### 143 | .vscode/* 144 | !.vscode/settings.json 145 | !.vscode/tasks.json 146 | !.vscode/launch.json 147 | !.vscode/extensions.json 148 | 149 | ### VisualStudioCode Patch ### 150 | # Ignore all local history of files 151 | .history 152 | 153 | ### Windows ### 154 | # Windows thumbnail cache files 155 | Thumbs.db 156 | Thumbs.db:encryptable 157 | ehthumbs.db 158 | ehthumbs_vista.db 159 | 160 | # Dump file 161 | *.stackdump 162 | 163 | # Folder config file 164 | [Dd]esktop.ini 165 | 166 | # Recycle Bin used on file shares 167 | $RECYCLE.BIN/ 168 | 169 | # Windows Installer files 170 | *.cab 171 | *.msi 172 | *.msix 173 | *.msm 174 | *.msp 175 | 176 | # Windows shortcuts 177 | *.lnk 178 | 179 | # End of https://www.gitignore.io/api/c,vim,linux,macos,elixir,windows,visualstudiocode 180 | models/ 181 | -------------------------------------------------------------------------------- /python/exports/instance_segmentation.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | from torchvision.transforms.functional import to_tensor, resize 3 | import torch 4 | import json 5 | from pathlib import Path 6 | import onnx 7 | from onnx import helper, TensorProto 8 | from PIL import Image 9 | 10 | 11 | def export(model_builder, Model_Weights): 12 | base_dir = Path(f"models/instance_segmentation/{model_builder.__name__}") 13 | base_dir.mkdir(parents=True, exist_ok=True) 14 | 15 | model_file = base_dir / "model.onnx" 16 | categories_file = base_dir / "categories.json" 17 | 18 | weights = Model_Weights.DEFAULT 19 | model = model_builder(weights=weights) 20 | model.eval() 21 | 22 | categories = weights.meta["categories"] 23 | transforms = weights.transforms() 24 | 25 | with open(categories_file, "w") as f: 26 | json.dump(categories, f) 27 | 28 | onnx_input = to_tensor(Image.open("test/assets/cat.jpg")).unsqueeze(0) 29 | onnx_input = resize(onnx_input, [224, 224]) 30 | onnx_input = transforms(onnx_input) 31 | 32 | torch.onnx.export( 33 | model, 34 | onnx_input, 35 | str(model_file), 36 | verbose=False, 37 | input_names=["input"], 38 | output_names=["boxes", "labels", "scores", "masks"], 39 | dynamic_axes={ 40 | "boxes": {0: "detections"}, 41 | "labels": {0: "detections"}, 42 | "scores": {0: "detections"}, 43 | "masks": {0: "detections"}, 44 | }, 45 | export_params=True, 46 | ) 47 | 48 | model = onnx.load(str(model_file)) 49 | 50 | prev_names = ["boxes", "labels", "scores", "masks"] 51 | 52 | nodes = [] 53 | for data in prev_names: 54 | axes_init = helper.make_tensor( 55 | name=data+"_axes", 56 | data_type=TensorProto.INT64, 57 | dims=[1], 58 | vals=[0] 59 | ) 60 | model.graph.initializer.append(axes_init) 61 | 62 | node = helper.make_node( 63 | op_type="Unsqueeze", 64 | inputs=[data, data+"_axes"], 65 | outputs=[data+"_unsqueezed"] 66 | ) 67 | nodes.append(node) 68 | 69 | model.graph.node.extend(nodes) 70 | 71 | new_outputs = [] 72 | for data in prev_names: 73 | match data: 74 | case "boxes": 75 | shape = [1, None, 4] 76 | case "masks": 77 | shape = [1, None, 1, 224, 224] 78 | case _: 79 | shape = [1, None] 80 | 81 | new_output = helper.make_tensor_value_info( 82 | name=data+"_unsqueezed", 83 | elem_type=TensorProto.INT64 if data == "labels" else TensorProto.FLOAT, 84 | shape=shape 85 | ) 86 | new_outputs.append(new_output) 87 | 88 | model.graph.output.extend(new_outputs) 89 | 90 | for data in prev_names: 91 | old_output = next(i for i in model.graph.output if i.name == data) 92 | model.graph.output.remove(old_output) 93 | 94 | onnx.save(model, str(model_file)) 95 | 96 | 97 | parser = argparse.ArgumentParser() 98 | parser.add_argument("model") 99 | args = parser.parse_args() 100 | 101 | match(args.model): 102 | case "maskrcnn_resnet50_fpn_v2": 103 | from torchvision.models.detection import maskrcnn_resnet50_fpn_v2, MaskRCNN_ResNet50_FPN_V2_Weights 104 | export(maskrcnn_resnet50_fpn_v2, MaskRCNN_ResNet50_FPN_V2_Weights) 105 | case _: 106 | print("Model not found") 107 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # ExVision 2 | 3 | [![Hex.pm](https://img.shields.io/hexpm/v/ex_vision.svg)](https://hex.pm/packages/ex_vision) 4 | [![API Docs](https://img.shields.io/badge/api-docs-yellow.svg?style=flat)](https://hexdocs.pm/ex_vision) 5 | 6 | ExVision is the collection of AI models related to vision delivered with ready to use package and easy to understand API. 7 | ExVision will take care of all necessary input transformations internally and return the result in the sensible format. 8 | 9 | ExVision models are powered by [Ortex](https://www.github.com/elixir-nx/ortex). 10 | 11 | ## Usage 12 | 13 | In order to use the model, you need to first load it 14 | 15 | ```elixir 16 | alias ExVision.Classification.MobileNetV3Small 17 | 18 | model = MobileNetV3Small.load() #=> %MobileNetV3{} 19 | ``` 20 | 21 | After that, the model is available for inference. 22 | ExVision will take care of all necessary input transformations and covert output to a format that makes sense. 23 | 24 | ```elixir 25 | MobileNetV3Small.run(model, "example/files/cat.jpg") #=> %{cat: 0.98, dog: 0.01, car: 0.00, ...} 26 | ``` 27 | 28 | ExVision is also capable of accepting tensors and images on input: 29 | 30 | ```elixir 31 | cat = Image.open!("example/files/cat.jpg") 32 | {:ok, cat_tensor} = Image.to_nx(cat) 33 | MobileNetV3Small.run(model, cat) #=> %{cat: 0.98, dog: 0.01, car: 0.00, ...} 34 | MobileNetV3Small.run(model, cat_tensor) #=> %{cat: 0.98, dog: 0.01, car: 0.00, ...} 35 | ``` 36 | 37 | ### Usage in process workflow 38 | 39 | All ExVision models are implemented using `Nx.Serving`. 40 | They are therefore compatible with process workflow. 41 | 42 | You can start a model's process: 43 | 44 | ```elixir 45 | {:ok, pid} = MobileNetV3Small.start_link(name: MyModel) 46 | ``` 47 | 48 | or start it under the supervision tree 49 | 50 | ```elixir 51 | {:ok, _supervisor_pid} = Supervisor.start_link([ 52 | {MobileNetV3Small, name: MyModel} 53 | ], strategy: :one_for_one) 54 | ``` 55 | 56 | After starting, it's immediatelly available for inference using `batched_run/2` function. 57 | 58 | ```elixir 59 | MobileNetV3Small.batched_run(MyModel, cat) #=> %{cat: 0.98, dog: 0.01, car: 0.00, ...} 60 | ``` 61 | 62 | ## Installation 63 | 64 | The package can be installed by adding `ex_vision` to your list of dependencies in `mix.exs`: 65 | 66 | ```elixir 67 | def deps do 68 | [ 69 | {:ex_vision, "~> 0.4.0"} 70 | ] 71 | end 72 | ``` 73 | 74 | In order to compile, ExVision **requires Rust and Cargo** to be installed on your system. 75 | 76 | ## Current Timeline 77 | 78 | We have identified a set of models that we would like to support. 79 | If the model that you would like to use is missing, feel free to open the issue, express interest in an existing one or contribute the model directly. 80 | 81 | - [x] Classification 82 | - [x] MobileNetV3 Small 83 | - [x] EfficientNetV2 84 | - [x] SqueezeNet 85 | - [x] Object detection 86 | - [x] SSDLite320 - MobileNetV3 Large backbone 87 | - [x] FasterRCNN ResNet50 FPN 88 | - [x] Semantic segmentation 89 | - [x] DeepLabV3 - MobileNetV3 90 | - [x] Instance segmentation 91 | - [x] Mask R-CNN 92 | - [x] Keypoint Detection 93 | - [x] Keypoint R-CNN 94 | 95 | ## Copyright and License 96 | 97 | Copyright 2024, [Software Mansion](https://swmansion.com/?utm_source=git&utm_medium=readme&utm_campaign=ex_vision) 98 | 99 | [![Software Mansion](https://logo.swmansion.com/logo?color=white&variant=desktop&width=200&tag=membrane-github)](https://swmansion.com/?utm_source=git&utm_medium=readme&utm_campaign=ex_vision) 100 | 101 | Licensed under the [Apache License, Version 2.0](LICENSE) 102 | -------------------------------------------------------------------------------- /python/exports/object_detection.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | from torchvision.transforms.functional import to_tensor, resize 3 | import torch 4 | import json 5 | from pathlib import Path 6 | import onnx 7 | from onnx import helper, TensorProto 8 | from PIL import Image 9 | 10 | 11 | def export(model_builder, Model_Weights, output_names): 12 | base_dir = Path(f"models/object_detection/{model_builder.__name__}") 13 | base_dir.mkdir(parents=True, exist_ok=True) 14 | 15 | model_file = base_dir / "model.onnx" 16 | categories_file = base_dir / "categories.json" 17 | 18 | weights = Model_Weights.DEFAULT 19 | model = model_builder(weights=weights) 20 | model.eval() 21 | 22 | categories = weights.meta["categories"] 23 | transforms = weights.transforms() 24 | 25 | with open(categories_file, "w") as f: 26 | json.dump(categories, f) 27 | 28 | onnx_input = to_tensor(Image.open("test/assets/cat.jpg")).unsqueeze(0) 29 | onnx_input = resize(onnx_input, [224, 224]) 30 | onnx_input = transforms(onnx_input) 31 | 32 | torch.onnx.export( 33 | model, 34 | onnx_input, 35 | str(model_file), 36 | verbose=False, 37 | input_names=["input"], 38 | output_names=output_names, 39 | dynamic_axes={ 40 | "boxes": {0: "detections"}, 41 | "labels": {0: "detections"}, 42 | "scores": {0: "detections"}, 43 | }, 44 | export_params=True, 45 | ) 46 | 47 | model = onnx.load(str(model_file)) 48 | 49 | nodes = [] 50 | for output_name in output_names: 51 | axes_init = helper.make_tensor( 52 | name=output_name+"_axes", 53 | data_type=TensorProto.INT64, 54 | dims=[1], 55 | vals=[0] 56 | ) 57 | model.graph.initializer.append(axes_init) 58 | 59 | node = helper.make_node( 60 | op_type="Unsqueeze", 61 | inputs=[output_name, output_name+"_axes"], 62 | outputs=[output_name+"_unsqueezed"] 63 | ) 64 | nodes.append(node) 65 | 66 | model.graph.node.extend(nodes) 67 | 68 | new_outputs = [] 69 | for output_name in output_names: 70 | new_output = helper.make_tensor_value_info( 71 | name=output_name+"_unsqueezed", 72 | elem_type=TensorProto.INT64 if output_name == "labels" else TensorProto.FLOAT, 73 | shape=[1, None, 4] if output_name == "boxes" else [1, None] 74 | ) 75 | new_outputs.append(new_output) 76 | 77 | model.graph.output.extend(new_outputs) 78 | 79 | for output_name in output_names: 80 | old_output = next( 81 | i for i in model.graph.output if i.name == output_name) 82 | model.graph.output.remove(old_output) 83 | 84 | onnx.save(model, str(model_file)) 85 | 86 | 87 | parser = argparse.ArgumentParser() 88 | parser.add_argument("model") 89 | args = parser.parse_args() 90 | 91 | match(args.model): 92 | case "fasterrcnn_resnet50_fpn": 93 | from torchvision.models.detection import fasterrcnn_resnet50_fpn, FasterRCNN_ResNet50_FPN_Weights 94 | export( 95 | fasterrcnn_resnet50_fpn, 96 | FasterRCNN_ResNet50_FPN_Weights, 97 | ["boxes", "labels", "scores"] 98 | ) 99 | case "ssdlite320_mobilenet_v3_large": 100 | from torchvision.models.detection import ssdlite320_mobilenet_v3_large, SSDLite320_MobileNet_V3_Large_Weights 101 | export( 102 | ssdlite320_mobilenet_v3_large, 103 | SSDLite320_MobileNet_V3_Large_Weights, 104 | ["boxes", "scores", "labels"] 105 | ) 106 | case _: 107 | print("Model not found") 108 | -------------------------------------------------------------------------------- /python/exports/keypoint_detection.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | from torchvision.transforms.functional import to_tensor, resize 3 | import torch 4 | import json 5 | from pathlib import Path 6 | import onnx 7 | from onnx import helper, TensorProto 8 | from PIL import Image 9 | 10 | 11 | def export(model_builder, Model_Weights): 12 | base_dir = Path(f"models/keypoint_detection/{model_builder.__name__}") 13 | base_dir.mkdir(parents=True, exist_ok=True) 14 | 15 | model_file = base_dir / "model.onnx" 16 | categories_file = base_dir / "categories.json" 17 | 18 | weights = Model_Weights.DEFAULT 19 | model = model_builder(weights=weights) 20 | model.eval() 21 | 22 | categories = weights.meta["categories"] 23 | transforms = weights.transforms() 24 | 25 | with open(categories_file, "w") as f: 26 | json.dump(categories, f) 27 | 28 | onnx_input = to_tensor(Image.open("test/assets/cat.jpg")).unsqueeze(0) 29 | onnx_input = resize(onnx_input, [224, 224]) 30 | onnx_input = transforms(onnx_input) 31 | 32 | torch.onnx.export( 33 | model, 34 | onnx_input, 35 | str(model_file), 36 | verbose=False, 37 | input_names=["input"], 38 | output_names=["boxes", "labels", "scores", 39 | "keypoints", "keypoints_scores"], 40 | dynamic_axes={ 41 | "boxes": {0: "detections"}, 42 | "labels": {0: "detections"}, 43 | "scores": {0: "detections"}, 44 | "keypoints": {0: "detections"}, 45 | "keypoints_scores": {0: "detections"} 46 | }, 47 | export_params=True, 48 | ) 49 | 50 | output_names = ["boxes", "labels", "scores", 51 | "keypoints", "keypoints_scores"] 52 | 53 | model = onnx.load(str(model_file)) 54 | 55 | nodes = [] 56 | for output_name in output_names: 57 | axes_init = helper.make_tensor( 58 | name=output_name+"_axes", 59 | data_type=TensorProto.INT64, 60 | dims=[1], 61 | vals=[0] 62 | ) 63 | model.graph.initializer.append(axes_init) 64 | 65 | node = helper.make_node( 66 | op_type="Unsqueeze", 67 | inputs=[output_name, output_name+"_axes"], 68 | outputs=[output_name+"_unsqueezed"] 69 | ) 70 | nodes.append(node) 71 | 72 | model.graph.node.extend(nodes) 73 | 74 | new_outputs = [] 75 | for output_name in output_names: 76 | match output_name: 77 | case "boxes": 78 | shape = [1, None, 4] 79 | case "keypoints": 80 | shape = [1, None, 17, 3] 81 | case "keypoints_scores": 82 | shape = [1, None, 17] 83 | case _: 84 | shape = [1, None] 85 | 86 | new_output = helper.make_tensor_value_info( 87 | name=output_name+"_unsqueezed", 88 | elem_type=TensorProto.INT64 if output_name == "labels" else TensorProto.FLOAT, 89 | shape=shape 90 | ) 91 | new_outputs.append(new_output) 92 | 93 | model.graph.output.extend(new_outputs) 94 | 95 | for output_name in output_names: 96 | old_output = next( 97 | i for i in model.graph.output if i.name == output_name) 98 | model.graph.output.remove(old_output) 99 | 100 | onnx.save(model, str(model_file)) 101 | 102 | 103 | parser = argparse.ArgumentParser() 104 | parser.add_argument("model") 105 | args = parser.parse_args() 106 | 107 | match(args.model): 108 | case "keypointrcnn_resnet50_fpn": 109 | from torchvision.models.detection import keypointrcnn_resnet50_fpn, KeypointRCNN_ResNet50_FPN_Weights 110 | export(keypointrcnn_resnet50_fpn, KeypointRCNN_ResNet50_FPN_Weights) 111 | case _: 112 | print("Model not found") 113 | -------------------------------------------------------------------------------- /lib/ex_vision/model/definition.ex: -------------------------------------------------------------------------------- 1 | defmodule ExVision.Model.Definition do 2 | @moduledoc """ 3 | A module describing the behaviour that MUST be implemented by all ExVision models. 4 | """ 5 | 6 | require Bunch.Typespec 7 | 8 | @callback load(keyword()) :: {:ok, ExVision.Model.t()} | {:error, reason :: atom()} 9 | @callback run(ExVision.Model.t(), ExVision.Model.input_t()) :: any() 10 | @callback batched_run(atom(), ExVision.Model.input_t()) :: any() 11 | @callback child_spec(keyword()) :: Supervisor.child_spec() 12 | @callback start_link(keyword()) :: GenServer.on_start() 13 | 14 | defp module_to_name(module), 15 | do: 16 | module 17 | |> Module.split() 18 | |> List.last() 19 | |> String.split("_") 20 | |> Enum.map_join(" ", fn <> -> 21 | String.upcase(first) <> rest 22 | end) 23 | 24 | defmacro __using__(options) do 25 | Application.ensure_all_started(:req) 26 | 27 | options = 28 | Keyword.validate!(options, 29 | categories: nil, 30 | name: module_to_name(__CALLER__.module) 31 | ) 32 | 33 | quote do 34 | # conditional defintion based on whether `categories` option is present has to be moved inside __using__ macro 35 | # here is explenation https://cocoa-research.works/2022/10/conditional-compliation-with-if-and-use-in-elixir/ 36 | use ExVision.Model.Definition.Parts.WithCategories, unquote(options) 37 | 38 | @behaviour ExVision.Model.Definition 39 | 40 | @derive [ExVision.Model] 41 | @enforce_keys [:serving] 42 | defstruct [:serving] 43 | 44 | @typedoc """ 45 | An instance of the #{__MODULE__} 46 | """ 47 | @opaque t() :: %__MODULE__{serving: Nx.Serving.t()} 48 | 49 | @impl true 50 | def start_link(options \\ []) do 51 | {start_link_options, load_options} = 52 | Keyword.split(options, [ 53 | :partitions, 54 | :batch_timeout, 55 | :distribution_weight, 56 | :shutdown, 57 | :hibernate_after, 58 | :spawn_opt, 59 | :name 60 | ]) 61 | 62 | with {:ok, model} <- load(load_options), 63 | do: ExVision.Model.start_link(model, start_link_options) 64 | end 65 | 66 | @doc """ 67 | Same as `load/1`, but raises and error on failure. 68 | """ 69 | @spec load!(keyword()) :: t() 70 | def load!(opts \\ []) do 71 | case load(opts) do 72 | {:ok, model} -> 73 | model 74 | 75 | {:error, reason} -> 76 | require Logger 77 | 78 | Logger.error( 79 | "Failed to load model #{unquote(options[:name])} due to #{inspect(reason)}" 80 | ) 81 | 82 | raise "Failed to load model" 83 | end 84 | end 85 | 86 | @impl true 87 | @doc """ 88 | Immediatelly applies the model to the given input, in the scope of the current process. 89 | """ 90 | @spec run(t(), ExVision.Model.input_t()) :: output_t() | [output_t()] 91 | defdelegate run(model, input), to: ExVision.Model 92 | 93 | @doc """ 94 | Submits the input for inference to the process running the Nx.Serving for this model. 95 | """ 96 | @impl true 97 | @spec batched_run(atom(), ExVision.Model.input_t()) :: output_t() | [output_t()] 98 | def batched_run(name \\ __MODULE__, input), do: ExVision.Model.batched_run(name, input) 99 | 100 | @impl true 101 | @spec child_spec(keyword()) :: Supervisor.child_spec() 102 | def child_spec(options \\ []) do 103 | {child_spec_opts, start_link_options} = Keyword.split(options, [:id]) 104 | child_spec_opts = Keyword.validate!(child_spec_opts, id: __MODULE__) 105 | 106 | %{ 107 | id: child_spec_opts[:id], 108 | start: {__MODULE__, :start_link, [start_link_options]} 109 | } 110 | end 111 | 112 | defoverridable run: 2, 113 | batched_run: 2, 114 | child_spec: 1, 115 | child_spec: 0, 116 | start_link: 0, 117 | start_link: 1 118 | end 119 | end 120 | end 121 | -------------------------------------------------------------------------------- /mix.exs: -------------------------------------------------------------------------------- 1 | defmodule ExVision.Mixfile do 2 | use Mix.Project 3 | 4 | @version "0.4.0" 5 | @github_url "https://github.com/software-mansion-labs/ex_vision/" 6 | 7 | def project do 8 | [ 9 | app: :ex_vision, 10 | version: @version, 11 | elixir: "~> 1.16", 12 | elixirc_paths: elixirc_paths(Mix.env()), 13 | start_permanent: Mix.env() == :prod, 14 | deps: deps(), 15 | dialyzer: dialyzer(), 16 | 17 | # hex 18 | description: "A collection of ONNX vision AI models with wrappers based on Ortex", 19 | package: package(), 20 | 21 | # docs 22 | name: "Ex Vision", 23 | source_url: @github_url, 24 | docs: docs(), 25 | homepage_url: "https://hexdocs.pm/ex_vision" 26 | ] 27 | end 28 | 29 | def application do 30 | [ 31 | included_applications: [:ex_vision], 32 | mod: {ExVision, []}, 33 | extra_applications: [] 34 | ] 35 | end 36 | 37 | defp elixirc_paths(:test), do: ["lib", "test/support"] 38 | defp elixirc_paths(_env), do: ["lib"] 39 | 40 | defp deps do 41 | [ 42 | # TODO: change the `>= 0.0.0` dependencies to concrete versions 43 | {:nx, ">= 0.0.0"}, 44 | {:ortex, ">= 0.0.0"}, 45 | {:nx_image, "~> 0.1.2"}, 46 | {:bunch, "~> 1.6", runtime: false}, 47 | {:axon, "~> 0.6.1"}, 48 | {:exla, ">= 0.0.0"}, 49 | {:image, ">= 0.0.0"}, 50 | {:req, ">= 0.0.0"}, 51 | {:mimic, "~> 1.7", only: :test}, 52 | {:ex_doc, ">= 0.0.0", only: :dev, runtime: false}, 53 | {:dialyxir, ">= 0.0.0", only: :dev, runtime: false}, 54 | {:credo, ">= 0.0.0", only: [:dev, :test], runtime: false} 55 | ] 56 | end 57 | 58 | defp dialyzer() do 59 | opts = [ 60 | flags: [:error_handling] 61 | ] 62 | 63 | if System.get_env("CI") == "true" do 64 | # Store PLTs in cacheable directory for CI 65 | [plt_local_path: "priv/plts", plt_core_path: "priv/plts"] ++ opts 66 | else 67 | opts 68 | end 69 | end 70 | 71 | defp package do 72 | [ 73 | maintainers: ["Software Mansion"], 74 | licenses: ["Apache-2.0"], 75 | links: %{ 76 | "GitHub" => @github_url, 77 | "Software Mansion" => "https://www.swmansion.com" 78 | } 79 | ] 80 | end 81 | 82 | @tutorials Path.wildcard("examples/*.livemd") 83 | defp docs do 84 | [ 85 | main: "readme", 86 | extras: [ 87 | "README.md", 88 | "LICENSE" 89 | | @tutorials 90 | ], 91 | groups_for_extras: [ 92 | Tutorials: @tutorials 93 | ], 94 | groups_for_modules: [ 95 | Models: [ 96 | ExVision.Classification.MobileNetV3Small, 97 | ExVision.Classification.EfficientNet_V2_S, 98 | ExVision.Classification.EfficientNet_V2_M, 99 | ExVision.Classification.EfficientNet_V2_L, 100 | ExVision.Classification.SqueezeNet1_1, 101 | ExVision.SemanticSegmentation.DeepLabV3_MobileNetV3, 102 | ExVision.StyleTransfer.Candy, 103 | ExVision.StyleTransfer.CandyFast, 104 | ExVision.StyleTransfer.Udnie, 105 | ExVision.StyleTransfer.UdnieFast, 106 | ExVision.StyleTransfer.Mosaic, 107 | ExVision.StyleTransfer.MosaicFast, 108 | ExVision.StyleTransfer.Princess, 109 | ExVision.StyleTransfer.PrincessFast, 110 | ExVision.InstanceSegmentation.MaskRCNN_ResNet50_FPN_V2, 111 | ExVision.ObjectDetection.Ssdlite320_MobileNetv3, 112 | ExVision.ObjectDetection.FasterRCNN_ResNet50_FPN, 113 | ExVision.KeypointDetection.KeypointRCNN_ResNet50_FPN 114 | ], 115 | Types: [ 116 | ExVision.Types, 117 | ExVision.Types.BBox, 118 | ExVision.Types.BBoxWithKeypoints, 119 | ExVision.Types.BBoxWithMask, 120 | ExVision.Types.ImageMetadata 121 | ], 122 | "Protocols and Behaviours": [ 123 | ExVision.Model, 124 | ExVision.Model.Definition, 125 | ExVision.Model.Definition.Ortex 126 | ] 127 | ], 128 | nest_modules_by_prefix: [ 129 | ExVision.Model, 130 | ExVision.Model.Definition, 131 | ExVision.Types, 132 | ExVision.Classification, 133 | ExVision.SemanticSegmentation, 134 | ExVision.StyleTransfer, 135 | ExVision.InstanceSegmentation, 136 | ExVision.ObjectDetection, 137 | ExVision.KeypointDetection 138 | ], 139 | formatters: ["html"], 140 | source_ref: "v#{@version}" 141 | ] 142 | end 143 | end 144 | -------------------------------------------------------------------------------- /examples/2-usage-as-nx-serving.livemd: -------------------------------------------------------------------------------- 1 | 2 | 3 | # Usage in production - process workflow 4 | 5 | ```elixir 6 | Mix.install( 7 | [ 8 | :ex_vision, 9 | :exla, 10 | :kino, 11 | :nx, 12 | :kino_bumblebee 13 | ], 14 | config: [nx: [default_backend: EXLA.Backend]] 15 | ) 16 | ``` 17 | 18 | ## A word of introduction - what problem are we solving? 19 | 20 | Deploying an AI model in a production environement can quite difficult to get right. In order to ensure efficient resource usage and high throughput, one needs to consider the following: 21 | 22 | * creating a cluster of GPU enabled machines, effectively creating an AI-microservice. That comes with all of the associated challenges of service discovery and API implementation 23 | * Even if the cluster is not necessary, most of the time running one model instance per user is not a viable option, as loading the model takes a long time and that approach wastes a lot of potential of your hardware 24 | * intelligently batching requests from different sources, to get the most out of your GPU's concurrency potential, while also preventing the delay from mounting up while waiting for other requests to complete the batch 25 | * Critical error handling 26 | 27 | ### The solution 28 | 29 | Fortunately, Elixir ecosystem features an amazing, prebuilt solution to most of these problems in form of [`Nx.Serving`](https://hexdocs.pm/nx/Nx.Serving.html). ExVision's models are all implemented using `Nx.Serving` underneath. In fact, our `ExVision.Model.run/2` and `ExVision.Model.batched_run/2` all make use of the matching `Nx.Serving.run/2` and `Nx.Serving.batched_run/2` respectively. 30 | 31 | This approach allows us to make use of the built in intelligent batching and ability to be run as a standalone process provided by `Nx.Serving` out of the box. 32 | 33 | In fact, we even expose `ExVision.Model.as_serving/1` that will extract the ExVision internal struct and expose the underlyeing `Nx.Serving`. 34 | 35 | ## Basic usage example 36 | 37 | In this section, we will showcase running the ExVision's models in the process workflow, but we will not attempt to explain every single detail of the `Nx.Serving`, as this part of the ExVision's API is just a thin convinience wrapper on top of it. 38 | 39 | If you want to dig deeper, we would encourage consulting the [`Nx.Serving` statefull/process workflow documentation](https://hexdocs.pm/nx/Nx.Serving.html#module-stateful-process-workflow). 40 | 41 | ### What we're building 42 | 43 | In this example we will build a simple interactive app performing the classification of the uploaded image 44 | 45 | ### Starting the model using the process workflow 46 | 47 | In order to start the model process, just add it to your supervision tree. It is recommended that this process is started somewhere at the top of the tree. For all available options, please refer to the Nx documentation on [`Nx.Serving.start_link/1`](https://hexdocs.pm/nx/Nx.Serving.html#start_link/1). 48 | 49 | If not explicitely provided, ExVision models will by default take their module name as the process name. 50 | 51 | ```elixir 52 | alias ExVision.Classification.MobileNetV3Small, as: Model 53 | 54 | children = [ 55 | {Model, batch_size: 8, batch_timeout: 500} 56 | ] 57 | 58 | {:ok, _pid} = Supervisor.start_link(children, strategy: :one_for_one) 59 | Kino.nothing() 60 | ``` 61 | 62 | And just like that, our model is now ready is now avaiable for inference for our entire cluster. And we can call on it like that: 63 | 64 | 65 | 66 | ```elixir 67 | input = Nx.iota({3, 1920, 1280}) 68 | Model.batched_run(input) 69 | ``` 70 | 71 | The only difference when compared to an inline workflow, from the perspective of the user is the necessity to use the `batched_run/2` instead of `run/2`. 72 | 73 | This time we didn't need to provide the model argument for `batched_run/2`. 74 | That is because we didn't specify the `:name` option when adding our model to the supervision tree and we relied on the default name assigned by ExVision, which by default is the name of their module. 75 | If you assigned a custom name to the model, you can give it as a first argument to `batched_run/2` 76 | 77 | 78 | 79 | ```elixir 80 | Model.batched_run(MyModel, input) 81 | ``` 82 | 83 | ## Creating an example app 84 | 85 | Now that we have a model instanciated and we know how to call on it, let's create an example app performing image classification. 86 | 87 | We will make use of the [`Kino`](https://github.com/livebook-dev/kino) library to read the input image and in order to display the classification results. 88 | 89 | ```elixir 90 | form = Kino.Control.form([image: Kino.Input.image("Image", format: :jpeg)], submit: "Submit") 91 | frame = Kino.Frame.new() 92 | 93 | Kino.listen(form, fn %{data: %{image: %{file_ref: ref}}, origin: origin} -> 94 | input = Kino.Input.file_path(ref) 95 | result = Model.batched_run(input) 96 | 97 | result 98 | |> Enum.sort_by(fn {_label, score} -> score end, :desc) 99 | |> Enum.take(10) 100 | |> Kino.Bumblebee.ScoredList.new() 101 | |> then(&Kino.Frame.render(frame, &1, to: origin)) 102 | |> dbg() 103 | end) 104 | 105 | Kino.Layout.grid([form, frame], columns: 2) 106 | ``` 107 | 108 | ## Next steps 109 | 110 | After completing this tutorial you can check out our [Using ExVision with Membrane](3-membrane.livemd) tutorial. 111 | -------------------------------------------------------------------------------- /lib/ex_vision/model/definition/ortex.ex: -------------------------------------------------------------------------------- 1 | defmodule ExVision.Model.Definition.Ortex do 2 | @moduledoc """ 3 | A generic implementation of the `ExVision.Model.Definition` for Ortex based models. 4 | """ 5 | 6 | # TODO: improve the documentation here 7 | 8 | require Logger 9 | 10 | alias ExVision.Types.ImageMetadata 11 | 12 | @doc """ 13 | A callback used to apply preprocessing for your model. 14 | 15 | The requirements for that will differ depending on the model used. 16 | """ 17 | @callback preprocessing(Nx.Tensor.t(), ImageMetadata.t()) :: Nx.Tensor.t() 18 | 19 | @doc """ 20 | A callback used to apply postprocessing to the output of the ONNX model. 21 | 22 | In this callback, you should transform the output to match your desired format. 23 | """ 24 | @callback postprocessing(map(), ImageMetadata.t()) :: ExVision.Model.output_t() 25 | 26 | @typedoc """ 27 | A type describing ONNX provider that can be used with ExVision. 28 | 29 | For some providers, it may be necessary to use the local version of `libonnxruntime` and provide some configuration option. 30 | For details, please consult [Ortex documentaiton](https://hexdocs.pm/ortex/Ortex.html#load/3) 31 | """ 32 | @type provider_t() :: :cpu | :coreml | :cpu 33 | 34 | @typedoc """ 35 | A type describing all options possible to use with the default implementation of the `load/0` function. 36 | 37 | - `:cache_path` - specifies a caching directory for this model. 38 | - `:providers` - a list of desired providers, sorted by preference. Onnx will attempt to use the first available provider. If none of the provided is available, onnx will fallback to `:cpu`. Default: `[:cpu]` 39 | - `:batch_size` - specifies a default batch size for this instance. Default: `1`. 40 | """ 41 | @type load_option_t() :: 42 | {:cache_path, Path.t()} 43 | | {:providers, [provider_t()]} 44 | | {:batch_size, pos_integer()} 45 | 46 | defmacrop get_client_preprocessing(module) do 47 | quote do 48 | fn input -> 49 | images = ExVision.Utils.load_image(input) 50 | 51 | metadata = 52 | Enum.map( 53 | images, 54 | &%ExVision.Types.ImageMetadata{ 55 | original_size: ExVision.Utils.image_size(&1) 56 | } 57 | ) 58 | 59 | batch = 60 | images 61 | |> Enum.zip(metadata) 62 | |> Enum.map(fn {image, metadata} -> unquote(module).preprocessing(image, metadata) end) 63 | |> Nx.Batch.stack() 64 | 65 | {batch, metadata} 66 | end 67 | end 68 | end 69 | 70 | defmacrop get_client_postprocessing(module, output_names) do 71 | quote do 72 | fn {result, _server_metadata}, metadata -> 73 | result 74 | |> split_onnx_result(unquote(output_names)) 75 | |> Enum.zip(metadata) 76 | |> Enum.map(fn {result, metadata} -> unquote(module).postprocessing(result, metadata) end) 77 | end 78 | end 79 | end 80 | 81 | @doc """ 82 | Loads the ONNX model and attaches the `Nx.Serving` to callbacks defined in the module 83 | """ 84 | @spec load_ortex_model(module(), Path.t(), [load_option_t()]) :: 85 | {:ok, ExVision.Model.t()} | {:error, atom()} 86 | def load_ortex_model(module, model_path, options) do 87 | with {:ok, options} <- 88 | Keyword.validate(options, 89 | batch_size: 1, 90 | providers: [:cpu] 91 | ), 92 | {:ok, path} <- ExVision.Cache.lazy_get(ExVision.Cache, model_path), 93 | {:ok, model} <- do_load_model(path, options[:providers]) do 94 | output_names = ExVision.Utils.onnx_output_names(model) 95 | 96 | model 97 | |> then(&Nx.Serving.new(Ortex.Serving, &1)) 98 | |> Nx.Serving.batch_size(options[:batch_size]) 99 | |> Nx.Serving.client_preprocessing(get_client_preprocessing(module)) 100 | |> Nx.Serving.client_postprocessing(get_client_postprocessing(module, output_names)) 101 | |> then(&{:ok, struct!(module, serving: &1)}) 102 | end 103 | end 104 | 105 | defp do_load_model(path, providers) do 106 | try do 107 | {:ok, Ortex.load(path, providers)} 108 | rescue 109 | e in RuntimeError -> 110 | require Logger 111 | Logger.error("Failed to load model from `#{inspect(path)}` due to #{inspect(e)}") 112 | {:error, :onnx_load_failure} 113 | end 114 | end 115 | 116 | defp split_onnx_result(tuple, outputs) do 117 | tuple 118 | |> Tuple.to_list() 119 | |> Enum.map(fn x -> 120 | # Do a backend transfer and also return a list of batches here 121 | x |> Nx.backend_transfer() |> Nx.to_batched(1) 122 | end) 123 | |> Enum.zip() 124 | |> Enum.map(fn parts -> 125 | parts |> Tuple.to_list() |> then(&Enum.zip(outputs, &1)) |> Enum.into(%{}) 126 | end) 127 | end 128 | 129 | @type using_option_t() :: {:base_dir, Path.t()} | {:name, String.t()} 130 | @spec __using__([using_option_t()]) :: Macro.t() 131 | defmacro __using__(opts) do 132 | {opts, generic_opts} = Keyword.split(opts, [:model]) 133 | opts = Keyword.validate!(opts, [:model]) 134 | model_path = Keyword.fetch!(opts, :model) 135 | 136 | quote do 137 | use ExVision.Model.Definition, unquote(generic_opts) 138 | @behaviour ExVision.Model.Definition.Ortex 139 | 140 | @doc """ 141 | Creates the model instance 142 | """ 143 | @impl true 144 | @spec load([ExVision.Model.Definition.Ortex.load_option_t()]) :: 145 | {:ok, t()} | {:error, reason :: atom()} 146 | def load(options \\ []) do 147 | default_model_load(options) 148 | end 149 | 150 | defp default_model_load(options) do 151 | ExVision.Model.Definition.Ortex.load_ortex_model(__MODULE__, unquote(model_path), options) 152 | end 153 | 154 | @impl true 155 | def postprocessing(result, _metdata), do: result 156 | 157 | @impl true 158 | def preprocessing(image, _metadata), do: image 159 | 160 | defoverridable load: 0, load: 1, preprocessing: 2, postprocessing: 2 161 | end 162 | end 163 | end 164 | -------------------------------------------------------------------------------- /lib/ex_vision/utils.ex: -------------------------------------------------------------------------------- 1 | defmodule ExVision.Utils do 2 | @moduledoc false 3 | 4 | require Nx 5 | require Image 6 | alias ExVision.Types 7 | 8 | @type channel_spec_t() :: :first | :last 9 | @type pixel_size_t() :: 8 | 16 | 32 | 64 10 | @type pixel_type_t() :: {:u | :f, pixel_size_t()} 11 | @type load_image_option_t() :: 12 | {:pixel_type, pixel_type_t()} 13 | | {:channel_spec, channel_spec_t()} 14 | 15 | @spec load_image(ExVision.Model.input_t(), [load_image_option_t()]) :: [Nx.Tensor.t()] 16 | def load_image(image, options \\ []) do 17 | options = Keyword.validate!(options, pixel_type: {:f, 32}, channel_spec: :first) 18 | 19 | image 20 | |> read_image() 21 | |> List.flatten() 22 | |> Stream.map(&convert_pixel_type(&1, options[:pixel_type])) 23 | |> Stream.map(&convert_channel_spec(&1, options[:channel_spec])) 24 | |> Enum.to_list() 25 | end 26 | 27 | @spec convert_channel_spec(Nx.Tensor.t(), channel_spec_t()) :: Nx.Tensor.t() 28 | def convert_channel_spec(tensor, target) do 29 | current_spec = guess_channel_spec(tensor) 30 | 31 | cond do 32 | current_spec == target -> tensor 33 | target == :first -> Nx.transpose(tensor, axes: [2, 0, 1]) 34 | target == :last -> Nx.transpose(tensor, axes: [1, 2, 0]) 35 | end 36 | end 37 | 38 | @spec guess_channel_spec(Nx.Tensor.t()) :: channel_spec_t() 39 | defp guess_channel_spec(tensor) do 40 | case Nx.shape(tensor) do 41 | {3, _w, _h} -> :first 42 | {_batch, 3, _w, _h} -> :first 43 | {_w, _h, 3} -> :last 44 | {_batch, _w, _h, 3} -> :last 45 | shape -> raise "Failed to infer channel spec for shape #{inspect(shape)}" 46 | end 47 | end 48 | 49 | @spec convert_pixel_type(Nx.Tensor.t(), pixel_type_t()) :: Nx.Tensor.t() 50 | def convert_pixel_type(tensor, {:f, _size} = target) do 51 | case Nx.type(tensor) do 52 | {:f, _} -> Nx.as_type(tensor, target) 53 | {:u, _} -> tensor |> Nx.divide(255) |> convert_pixel_type(target) 54 | end 55 | end 56 | 57 | def convert_pixel_type(tensor, {:u, _size} = target) do 58 | case Nx.type(tensor) do 59 | ^target -> tensor 60 | {:u, _size} -> Nx.as_type(tensor, target) 61 | {:f, _size} -> tensor |> Nx.multiply(255) |> convert_pixel_type(target) 62 | end 63 | end 64 | 65 | def convert_pixel_type(tensor, nil), do: tensor 66 | 67 | @spec read_image(ExVision.Model.input_t()) :: [Nx.Tensor.t()] 68 | defp read_image(%Nx.Batch{} = batch), do: read_image(batch.stack) 69 | 70 | defp read_image(list) when is_list(list) do 71 | list |> Enum.map(&read_image/1) 72 | end 73 | 74 | defp read_image(%Vix.Vips.Image{} = image) do 75 | image |> Image.to_nx!() |> read_image() 76 | end 77 | 78 | defp read_image(x) when Nx.is_tensor(x) do 79 | ensure_grad_3(x) 80 | end 81 | 82 | defp read_image(x) when is_binary(x) do 83 | x |> Image.open!() |> read_image() 84 | end 85 | 86 | defp ensure_grad_3(tensor) do 87 | tensor 88 | |> Nx.rank() 89 | |> case do 90 | 3 -> [tensor] 91 | 4 -> tensor |> Nx.to_batched(1) |> Stream.map(&Nx.squeeze(&1, axes: [0])) |> Enum.to_list() 92 | other -> raise "Received unexpected tensor of grad #{other}" 93 | end 94 | end 95 | 96 | @type resize_spec_t() :: number() | Types.image_size_t() 97 | @spec resize(Nx.Tensor.t(), resize_spec_t()) :: Nx.Tensor.t() 98 | def resize(tensor, size) when is_number(size) do 99 | NxImage.resize_short(tensor, size, channels: guess_channel_spec(tensor)) 100 | end 101 | 102 | def resize(tensor, size) when is_tuple(size) do 103 | NxImage.resize(tensor, size, channels: guess_channel_spec(tensor)) 104 | end 105 | 106 | @spec image_size(Vix.Vips.Image.t() | Nx.Tensor.t()) :: Types.image_size_t() 107 | def image_size(%Vix.Vips.Image{} = image), do: {Image.height(image), Image.width(image)} 108 | 109 | def image_size(t) when Nx.is_tensor(t) do 110 | case t |> Nx.squeeze() |> Nx.shape() do 111 | {3, w, h} -> {w, h} 112 | {w, h, 3} -> {w, h} 113 | end 114 | end 115 | 116 | @spec load_categories(Path.t()) :: [atom()] 117 | def load_categories(path) do 118 | path 119 | |> File.read!() 120 | |> Jason.decode!() 121 | |> Enum.map(&normalize_category_name/1) 122 | end 123 | 124 | @spec normalize_category_name(String.t()) :: atom() 125 | def normalize_category_name(name), 126 | do: name |> String.downcase() |> String.replace(~r(\ |\'|\-), "_") |> String.to_atom() 127 | 128 | @spec onnx_result_backend_transfer(tuple()) :: tuple() 129 | def onnx_result_backend_transfer(tuple), 130 | do: tuple |> Tuple.to_list() |> Enum.map(&Nx.backend_transfer/1) |> List.to_tuple() 131 | 132 | @spec onnx_input_shape(Ortex.Model.t()) :: tuple() 133 | def onnx_input_shape(%Ortex.Model{reference: r}) do 134 | ["input", "Float32", shape] = 135 | r 136 | |> Ortex.Native.show_session() 137 | |> Enum.find(fn [name, _type, _shape] -> name == "input" end) 138 | |> hd() 139 | 140 | List.to_tuple(shape) 141 | end 142 | 143 | @spec onnx_output_names(Ortex.Model.t()) :: [String.t()] 144 | def onnx_output_names(%Ortex.Model{reference: r}) do 145 | {_inputs, outputs} = Ortex.Native.show_session(r) 146 | 147 | Enum.map(outputs, fn {name, _type, _shape} -> name end) 148 | end 149 | 150 | @spec batched_run(atom(), ExVision.Model.input_t()) :: ExVision.Model.output_t() 151 | def batched_run(process_name, input) when is_list(input) do 152 | Nx.Serving.batched_run(process_name, input) 153 | end 154 | 155 | def batched_run(process_name, input) do 156 | process_name |> batched_run([input]) |> hd() 157 | end 158 | 159 | @spec scale_and_listify_bbox(Nx.Tensor.t(), Nx.Tensor.t()) :: [integer()] 160 | def scale_and_listify_bbox(bbox, scales) do 161 | bbox 162 | |> Nx.squeeze(axes: [0]) 163 | |> Nx.multiply(scales) 164 | |> Nx.round() 165 | |> Nx.as_type(:s64) 166 | |> Nx.to_list() 167 | end 168 | 169 | @spec squeeze_and_listify(Nx.Tensor.t()) :: [number()] 170 | def squeeze_and_listify(batched_value) do 171 | batched_value |> Nx.squeeze(axes: [0]) |> Nx.to_list() 172 | end 173 | end 174 | -------------------------------------------------------------------------------- /lib/ex_vision/cache.ex: -------------------------------------------------------------------------------- 1 | defmodule ExVision.Cache do 2 | @moduledoc false 3 | # Module responsible for handling model file caching 4 | 5 | use GenServer 6 | require Logger 7 | 8 | @type lazy_get_option_t() :: {:force, boolean()} 9 | 10 | @doc """ 11 | Lazily evaluate the path from the cache directory. 12 | It will only download the file if it's missing or the `force: true` option is given. 13 | """ 14 | @spec lazy_get(term() | pid(), Path.t(), options :: [lazy_get_option_t()]) :: 15 | {:ok, Path.t()} | {:error, reason :: atom()} 16 | def lazy_get(server, path, options \\ []) do 17 | with {:ok, options} <- Keyword.validate(options, force: false), 18 | do: GenServer.call(server, {:download, path, options}, :infinity) 19 | end 20 | 21 | @spec start_link(keyword()) :: GenServer.on_start() 22 | def start_link(opts) do 23 | {init_args, opts} = Keyword.split(opts, [:server_url, :cache_path]) 24 | GenServer.start_link(__MODULE__, init_args, opts) 25 | end 26 | 27 | @impl true 28 | def init(opts) do 29 | opts = Keyword.validate!(opts, cache_path: get_cache_path(), server_url: get_server_url()) 30 | 31 | with {:ok, server_url} <- URI.new(opts[:server_url]), 32 | :ok <- File.mkdir_p(opts[:cache_path]) do 33 | {:ok, 34 | %{ 35 | downloads: %{}, 36 | server_url: server_url, 37 | cache_path: opts[:cache_path], 38 | refs: %{} 39 | }} 40 | end 41 | end 42 | 43 | @impl true 44 | def handle_call({:download, cache_path, options}, from, state) do 45 | file_path = Path.join(state.cache_path, cache_path) 46 | 47 | updated_downloads = 48 | Map.update(state.downloads, cache_path, MapSet.new([from]), &MapSet.put(&1, from)) 49 | 50 | cond do 51 | Map.has_key?(state.downloads, cache_path) -> 52 | {:noreply, %{state | downloads: updated_downloads}} 53 | 54 | File.exists?(file_path) or options[:force] -> 55 | {:reply, {:ok, file_path}, state} 56 | 57 | true -> 58 | ref = do_create_download_job(cache_path, state) 59 | 60 | {:noreply, 61 | %{state | downloads: updated_downloads, refs: Map.put(state.refs, ref, cache_path)}} 62 | end 63 | end 64 | 65 | @impl true 66 | def handle_info({ref, result}, state) do 67 | state = emit(result, ref, state) 68 | {:noreply, state} 69 | end 70 | 71 | @impl true 72 | def handle_info({:DOWN, ref, :process, _pid, reason}, state) do 73 | state = 74 | if reason != :normal do 75 | Logger.error("Task #{inspect(ref)} has crashed due to #{inspect(reason)}") 76 | emit({:error, reason}, ref, state) 77 | else 78 | state 79 | end 80 | 81 | {:noreply, state} 82 | end 83 | 84 | @impl true 85 | def handle_info(msg, state) do 86 | Logger.warning("Received an unknown message #{inspect(msg)}. Ignoring") 87 | {:noreply, state} 88 | end 89 | 90 | defp emit(message, ref, state) do 91 | path = state.refs[ref] 92 | 93 | state.downloads 94 | |> Map.get(path, []) 95 | |> Enum.each(fn from -> 96 | GenServer.reply(from, message) 97 | end) 98 | 99 | %{state | refs: Map.delete(state.refs, ref), downloads: Map.delete(state.downloads, path)} 100 | end 101 | 102 | defp do_create_download_job(path, %{server_url: server_url, cache_path: cache_path}) do 103 | target_file_path = Path.join(cache_path, path) 104 | download_url = URI.append_path(server_url, ensure_backslash(path)) 105 | 106 | %Task{ref: ref} = 107 | Task.async(fn -> 108 | download_file(download_url, target_file_path) 109 | end) 110 | 111 | ref 112 | end 113 | 114 | @default_cache_path Application.compile_env(:ex_vision, :cache_path, "/tmp/ex_vision/cache") 115 | defp get_cache_path() do 116 | Application.get_env(:ex_vision, :cache_path, @default_cache_path) 117 | end 118 | 119 | @default_server_url Application.compile_env( 120 | :ex_vision, 121 | :server_url, 122 | URI.new!("https://ai.swmansion.com/exvision/files") 123 | ) 124 | defp get_server_url() do 125 | Application.get_env(:ex_vision, :server_url, @default_server_url) 126 | end 127 | 128 | @spec download_file(URI.t(), Path.t()) :: 129 | {:ok, Path.t()} | {:error, reason :: any()} 130 | defp download_file(url, cache_path) do 131 | with :ok <- cache_path |> Path.dirname() |> File.mkdir_p(), 132 | tmp_file_path = cache_path <> ".unconfirmed", 133 | tmp_file = File.stream!(tmp_file_path), 134 | :ok <- do_download_file(url, tmp_file), 135 | :ok <- validate_download(tmp_file_path), 136 | :ok <- File.rename(tmp_file_path, cache_path) do 137 | {:ok, cache_path} 138 | end 139 | end 140 | 141 | defp ensure_backslash("/" <> _rest = i), do: i 142 | defp ensure_backslash(i), do: "/" <> i 143 | 144 | defp validate_download(path) do 145 | if File.exists?(path), 146 | do: :ok, 147 | else: {:error, :download_failed} 148 | end 149 | 150 | @spec do_download_file(URI.t(), File.Stream.t()) :: :ok | {:error, reason :: any()} 151 | defp do_download_file(%URI{} = url, %File.Stream{path: target_file_path} = target_file) do 152 | Logger.debug("Downloading file from `#{url}` and saving to `#{target_file_path}`") 153 | 154 | case make_get_request(url, raw: true, into: target_file) do 155 | {:ok, _resp} -> 156 | :ok 157 | 158 | {:error, reason} = error -> 159 | Logger.error("Failed to download the file due to #{inspect(reason)}") 160 | File.rm(target_file_path) 161 | error 162 | end 163 | end 164 | 165 | defp make_get_request(url, options) do 166 | url 167 | |> Req.get(options) 168 | |> case do 169 | {:ok, %Req.Response{status: 200}} = resp -> 170 | resp 171 | 172 | {:ok, %Req.Response{status: 404}} -> 173 | {:error, :doesnt_exist} 174 | 175 | {:ok, %Req.Response{status: status}} -> 176 | Logger.warning("Request has failed with status #{status}") 177 | {:error, :server_error} 178 | 179 | {:error, %Mint.TransportError{reason: reason}} -> 180 | {:error, reason} 181 | 182 | {:error, _error} -> 183 | {:error, :connection_failed} 184 | end 185 | end 186 | end 187 | -------------------------------------------------------------------------------- /test/ex_vision/utils_test.exs: -------------------------------------------------------------------------------- 1 | defmodule ExVision.UtilsTest do 2 | use ExUnit.Case, async: true 3 | alias ExVision.Utils 4 | 5 | @img_path Path.join(__DIR__, "../assets/cat.jpg") 6 | @categories_path Path.join(__DIR__, "../assets/categories.json") 7 | 8 | describe "load_image/1 loads from" do 9 | test "path" do 10 | assert [img] = Utils.load_image(@img_path) 11 | assert Nx.shape(img) == {3, 360, 543} 12 | assert Nx.type(img) == {:f, 32} 13 | end 14 | 15 | test ":image library image" do 16 | img = Image.open!(@img_path) 17 | assert [img_from_image] = Utils.load_image(img) 18 | assert [img_from_path] = Utils.load_image(@img_path) 19 | assert Nx.equal(img_from_image, img_from_path) 20 | end 21 | 22 | test "Nx.Tensor" do 23 | tensor = @img_path |> Image.open!() |> Image.to_nx!(shape: :hwc) 24 | assert [img_from_tensor] = Utils.load_image(tensor) 25 | assert [img_from_path] = Utils.load_image(@img_path) 26 | assert Nx.equal(img_from_tensor, img_from_path) 27 | end 28 | 29 | test "batched Nx.Tensor" do 30 | tensor = 31 | @img_path 32 | |> Image.open!() 33 | |> Image.to_nx!(shape: :hwc) 34 | |> Stream.duplicate(2) 35 | |> Enum.to_list() 36 | |> Nx.stack() 37 | 38 | tensor 39 | |> Utils.load_image() 40 | |> Enum.each(&assert Nx.shape(&1) == {3, 360, 543}) 41 | end 42 | 43 | test "list of differently sized tensors" do 44 | input = [ 45 | Nx.iota({3, 10, 20}, type: :f32), 46 | Nx.iota({3, 20, 10}, type: :f32) 47 | ] 48 | 49 | input 50 | |> Utils.load_image() 51 | |> Enum.zip(input) 52 | |> Enum.each(fn {a, b} -> assert Nx.equal(a, b) end) 53 | end 54 | 55 | test "list of tensors" do 56 | tensor = 57 | @img_path 58 | |> Image.open!() 59 | |> Image.to_nx!(shape: :hwc) 60 | |> Stream.duplicate(2) 61 | |> Enum.to_list() 62 | 63 | tensor 64 | |> Utils.load_image() 65 | |> Enum.each(&assert Nx.shape(&1) == {3, 360, 543}) 66 | end 67 | 68 | test "list of paths" do 69 | assert [a, b] = img = Utils.load_image([@img_path, @img_path]) 70 | 71 | Enum.each(img, fn img -> 72 | assert Nx.shape(img) == {3, 360, 543} 73 | assert Nx.type(img) == {:f, 32} 74 | end) 75 | 76 | assert Nx.equal(a, b) 77 | end 78 | 79 | test "Nx.Batch" do 80 | tensor = 81 | @img_path 82 | |> Image.open!() 83 | |> Image.to_nx!(shape: :hwc) 84 | 85 | serving = Nx.Serving.new(fn opts -> Nx.Defn.jit(fn a -> a end, opts) end) 86 | batch = Nx.Batch.stack([tensor, tensor]) 87 | batch = Nx.Serving.run(serving, batch) 88 | output = Utils.load_image(batch) 89 | 90 | Enum.each(output, fn img -> 91 | assert Nx.shape(img) == {3, 360, 543} 92 | end) 93 | end 94 | end 95 | 96 | describe "load_image/2 handles option to" do 97 | test "channel spec change from to :last" do 98 | tensor_first = Nx.iota({3, 128, 256}, type: :f32) 99 | assert [tensor_last] = Utils.load_image(tensor_first, channel_spec: :last) 100 | assert Nx.shape(tensor_last) == {128, 256, 3} 101 | 102 | assert [tensor_new_last] = Utils.load_image(tensor_last, channel_spec: :last) 103 | assert Nx.equal(tensor_last, tensor_new_last) 104 | end 105 | 106 | test "channel spec change to :first" do 107 | assert [img] = Utils.load_image(@img_path, channel_spec: :last) 108 | assert Nx.shape(img) == {360, 543, 3} 109 | end 110 | 111 | test "pixel format change" do 112 | for t <- [{:u, 8}, {:f, 16}] do 113 | assert [img] = Utils.load_image(@img_path, pixel_type: t) 114 | assert Nx.type(img) == t, "assertion failed for #{inspect(t)}" 115 | end 116 | end 117 | end 118 | 119 | test "load_categories/1" do 120 | expected_categories = 121 | [ 122 | "__background__", 123 | "person", 124 | "bicycle", 125 | "car", 126 | "motorcycle", 127 | "airplane", 128 | "bus", 129 | "train", 130 | "truck", 131 | "boat", 132 | "traffic_light", 133 | "fire_hydrant", 134 | "n/a", 135 | "stop_sign", 136 | "parking_meter", 137 | "bench", 138 | "bird", 139 | "cat", 140 | "dog", 141 | "horse", 142 | "sheep", 143 | "cow", 144 | "elephant", 145 | "bear", 146 | "zebra", 147 | "giraffe", 148 | "n/a", 149 | "backpack", 150 | "umbrella", 151 | "n/a", 152 | "n/a", 153 | "handbag", 154 | "tie", 155 | "suitcase", 156 | "frisbee", 157 | "skis", 158 | "snowboard", 159 | "sports_ball", 160 | "kite", 161 | "baseball_bat", 162 | "baseball_glove", 163 | "skateboard", 164 | "surfboard", 165 | "tennis_racket", 166 | "bottle", 167 | "n/a", 168 | "wine_glass", 169 | "cup", 170 | "fork", 171 | "knife", 172 | "spoon", 173 | "bowl", 174 | "banana", 175 | "apple", 176 | "sandwich", 177 | "orange", 178 | "broccoli", 179 | "carrot", 180 | "hot_dog", 181 | "pizza", 182 | "donut", 183 | "cake", 184 | "chair", 185 | "couch", 186 | "potted_plant", 187 | "bed", 188 | "n/a", 189 | "dining_table", 190 | "n/a", 191 | "n/a", 192 | "toilet", 193 | "n/a", 194 | "tv", 195 | "laptop", 196 | "mouse", 197 | "remote", 198 | "keyboard", 199 | "cell_phone", 200 | "microwave", 201 | "oven", 202 | "toaster", 203 | "sink", 204 | "refrigerator", 205 | "n/a", 206 | "book", 207 | "clock", 208 | "vase", 209 | "scissors", 210 | "teddy_bear", 211 | "hair_drier", 212 | "toothbrush" 213 | ] 214 | |> Enum.map(&String.to_atom/1) 215 | 216 | assert Utils.load_categories(@categories_path) == expected_categories 217 | end 218 | 219 | describe "convert_channel_spec/2" do 220 | test "converts :last to :first" do 221 | input = Nx.iota({1, 2, 3}) 222 | assert input |> Utils.convert_channel_spec(:first) |> Nx.shape() == {3, 1, 2} 223 | end 224 | 225 | test "converts :first to :last" do 226 | input = Nx.iota({3, 1, 2}) 227 | assert input |> Utils.convert_channel_spec(:last) |> Nx.shape() == {1, 2, 3} 228 | end 229 | end 230 | end 231 | -------------------------------------------------------------------------------- /.credo.exs: -------------------------------------------------------------------------------- 1 | # This file contains the configuration for Credo and you are probably reading 2 | # this after creating it with `mix credo.gen.config`. 3 | # 4 | # If you find anything wrong or unclear in this file, please report an 5 | # issue on GitHub: https://github.com/rrrene/credo/issues 6 | # 7 | %{ 8 | # 9 | # You can have as many configs as you like in the `configs:` field. 10 | configs: [ 11 | %{ 12 | # 13 | # Run any config using `mix credo -C `. If no config name is given 14 | # "default" is used. 15 | # 16 | name: "default", 17 | # 18 | # These are the files included in the analysis: 19 | files: %{ 20 | # 21 | # You can give explicit globs or simply directories. 22 | # In the latter case `**/*.{ex,exs}` will be used. 23 | # 24 | included: [ 25 | "lib/", 26 | "src/", 27 | "test/", 28 | "web/", 29 | "apps/*/lib/", 30 | "apps/*/src/", 31 | "apps/*/test/", 32 | "apps/*/web/" 33 | ], 34 | excluded: [~r"/_build/", ~r"/deps/", ~r"/node_modules/"] 35 | }, 36 | # 37 | # Load and configure plugins here: 38 | # 39 | plugins: [], 40 | # 41 | # If you create your own checks, you must specify the source files for 42 | # them here, so they can be loaded by Credo before running the analysis. 43 | # 44 | requires: [], 45 | # 46 | # If you want to enforce a style guide and need a more traditional linting 47 | # experience, you can change `strict` to `true` below: 48 | # 49 | strict: false, 50 | # 51 | # To modify the timeout for parsing files, change this value: 52 | # 53 | parse_timeout: 5000, 54 | # 55 | # If you want to use uncolored output by default, you can change `color` 56 | # to `false` below: 57 | # 58 | color: true, 59 | # 60 | # You can customize the parameters of any check by adding a second element 61 | # to the tuple. 62 | # 63 | # To disable a check put `false` as second element: 64 | # 65 | # {Credo.Check.Design.DuplicatedCode, false} 66 | # 67 | checks: [ 68 | # 69 | ## Consistency Checks 70 | # 71 | {Credo.Check.Consistency.ExceptionNames, []}, 72 | {Credo.Check.Consistency.LineEndings, []}, 73 | {Credo.Check.Consistency.ParameterPatternMatching, []}, 74 | {Credo.Check.Consistency.SpaceAroundOperators, []}, 75 | {Credo.Check.Consistency.SpaceInParentheses, []}, 76 | {Credo.Check.Consistency.TabsOrSpaces, []}, 77 | 78 | # 79 | ## Design Checks 80 | # 81 | # You can customize the priority of any check 82 | # Priority values are: `low, normal, high, higher` 83 | # 84 | {Credo.Check.Design.AliasUsage, 85 | [priority: :low, if_nested_deeper_than: 2, if_called_more_often_than: 0]}, 86 | # You can also customize the exit_status of each check. 87 | # If you don't want TODO comments to cause `mix credo` to fail, just 88 | # set this value to 0 (zero). 89 | # 90 | {Credo.Check.Design.TagTODO, [exit_status: 0]}, 91 | {Credo.Check.Design.TagFIXME, []}, 92 | 93 | # 94 | ## Readability Checks 95 | # 96 | {Credo.Check.Readability.AliasOrder, [priority: :normal]}, 97 | {Credo.Check.Readability.FunctionNames, []}, 98 | {Credo.Check.Readability.LargeNumbers, []}, 99 | {Credo.Check.Readability.MaxLineLength, [priority: :low, max_length: 120]}, 100 | {Credo.Check.Readability.ModuleAttributeNames, []}, 101 | {Credo.Check.Readability.ModuleDoc, []}, 102 | {Credo.Check.Readability.ModuleNames, false}, 103 | {Credo.Check.Readability.ParenthesesInCondition, []}, 104 | {Credo.Check.Readability.ParenthesesOnZeroArityDefs, parens: true}, 105 | {Credo.Check.Readability.PredicateFunctionNames, []}, 106 | {Credo.Check.Readability.PreferImplicitTry, []}, 107 | {Credo.Check.Readability.RedundantBlankLines, []}, 108 | {Credo.Check.Readability.Semicolons, []}, 109 | {Credo.Check.Readability.SpaceAfterCommas, []}, 110 | {Credo.Check.Readability.StringSigils, []}, 111 | {Credo.Check.Readability.TrailingBlankLine, []}, 112 | {Credo.Check.Readability.TrailingWhiteSpace, []}, 113 | {Credo.Check.Readability.UnnecessaryAliasExpansion, []}, 114 | {Credo.Check.Readability.VariableNames, []}, 115 | {Credo.Check.Readability.WithSingleClause, false}, 116 | 117 | # 118 | ## Refactoring Opportunities 119 | # 120 | {Credo.Check.Refactor.CondStatements, []}, 121 | {Credo.Check.Refactor.CyclomaticComplexity, []}, 122 | {Credo.Check.Refactor.FunctionArity, []}, 123 | {Credo.Check.Refactor.LongQuoteBlocks, []}, 124 | {Credo.Check.Refactor.MapInto, false}, 125 | {Credo.Check.Refactor.MatchInCondition, []}, 126 | {Credo.Check.Refactor.NegatedConditionsInUnless, []}, 127 | {Credo.Check.Refactor.NegatedConditionsWithElse, []}, 128 | {Credo.Check.Refactor.Nesting, [max_nesting: 3]}, 129 | {Credo.Check.Refactor.UnlessWithElse, []}, 130 | {Credo.Check.Refactor.WithClauses, []}, 131 | 132 | # 133 | ## Warnings 134 | # 135 | {Credo.Check.Warning.BoolOperationOnSameValues, []}, 136 | {Credo.Check.Warning.ExpensiveEmptyEnumCheck, []}, 137 | {Credo.Check.Warning.IExPry, []}, 138 | {Credo.Check.Warning.IoInspect, []}, 139 | {Credo.Check.Warning.LazyLogging, false}, 140 | {Credo.Check.Warning.MixEnv, []}, 141 | {Credo.Check.Warning.OperationOnSameValues, []}, 142 | {Credo.Check.Warning.OperationWithConstantResult, []}, 143 | {Credo.Check.Warning.RaiseInsideRescue, []}, 144 | {Credo.Check.Warning.UnusedEnumOperation, []}, 145 | {Credo.Check.Warning.UnusedFileOperation, []}, 146 | {Credo.Check.Warning.UnusedKeywordOperation, []}, 147 | {Credo.Check.Warning.UnusedListOperation, []}, 148 | {Credo.Check.Warning.UnusedPathOperation, []}, 149 | {Credo.Check.Warning.UnusedRegexOperation, []}, 150 | {Credo.Check.Warning.UnusedStringOperation, []}, 151 | {Credo.Check.Warning.UnusedTupleOperation, []}, 152 | {Credo.Check.Warning.UnsafeExec, []}, 153 | 154 | # 155 | # Checks scheduled for next check update (opt-in for now, just replace `false` with `[]`) 156 | 157 | # 158 | # Controversial and experimental checks (opt-in, just replace `false` with `[]`) 159 | # 160 | {Credo.Check.Readability.StrictModuleLayout, 161 | priority: :normal, order: ~w/shortdoc moduledoc behaviour use import require alias/a}, 162 | {Credo.Check.Consistency.MultiAliasImportRequireUse, false}, 163 | {Credo.Check.Consistency.UnusedVariableNames, force: :meaningful}, 164 | {Credo.Check.Design.DuplicatedCode, []}, 165 | {Credo.Check.Readability.AliasAs, false}, 166 | {Credo.Check.Readability.MultiAlias, []}, 167 | {Credo.Check.Readability.Specs, []}, 168 | {Credo.Check.Readability.SinglePipe, false}, 169 | {Credo.Check.Readability.WithCustomTaggedTuple, false}, 170 | {Credo.Check.Refactor.ABCSize, false}, 171 | {Credo.Check.Refactor.AppendSingleItem, false}, 172 | {Credo.Check.Refactor.DoubleBooleanNegation, false}, 173 | {Credo.Check.Refactor.ModuleDependencies, false}, 174 | {Credo.Check.Refactor.NegatedIsNil, false}, 175 | {Credo.Check.Refactor.PipeChainStart, []}, 176 | {Credo.Check.Refactor.VariableRebinding, false}, 177 | {Credo.Check.Warning.LeakyEnvironment, []}, 178 | {Credo.Check.Warning.MapGetUnsafePass, []}, 179 | {Credo.Check.Warning.UnsafeToAtom, false} 180 | 181 | # 182 | # Custom checks can be created using `mix credo.gen.check`. 183 | # 184 | ] 185 | } 186 | ] 187 | } 188 | -------------------------------------------------------------------------------- /examples/3-membrane.livemd: -------------------------------------------------------------------------------- 1 | 2 | 3 | # Using ExVision with Membrane 4 | 5 | ```elixir 6 | Mix.install( 7 | [ 8 | :ex_vision, 9 | :image, 10 | :membrane_core, 11 | :membrane_file_plugin, 12 | :membrane_flv_plugin, 13 | :membrane_h26x_plugin, 14 | :membrane_h264_ffmpeg_plugin, 15 | :membrane_ffmpeg_swscale_plugin, 16 | {:membrane_mp4_plugin, "~> 0.34.2"}, 17 | :kino, 18 | :kino_membrane 19 | ], 20 | config: [ 21 | nx: [default_backend: EXLA.Backend] 22 | ] 23 | ) 24 | ``` 25 | 26 | ## Introduction 27 | 28 | In this example we will showcase ExVision by integrating it into media processing pipeline using [Membrane Framework](https://membrane.stream). This livebook can be treated as a tutorial on this process. 29 | 30 | ### Prerequisites 31 | 32 | * We will be using [Membrane Framework](https://membrane.stream), so basic familiarity with this framework is highly recommended 33 | * Basic familiarity with ExVision 34 | 35 | ### Contents of this tutorial 36 | 37 | You will learn how to write a [Membrane Filter element](https://membrane.stream/learn/get_started_with_membrane/3) that makes use of one of the ExVision's models, using an example of object detection. 38 | 39 | In particular, we will implement a bird detector. 40 | 41 | ## Integrate with Membrane 42 | 43 | The main part of integrating with Membrane is implementing a Filter - an element which is responsible for applying a transformation on each frame in the stream. 44 | 45 | But before we dive into the code, here are a few tips that will make it both easier to understand and easier to modify for your own usecase: 46 | 47 | * It's useful to constrain an accepted format on input and output pads to `%Membrane.RawVideo{pixel_format: :RGB}`. 48 | 49 | This format is equivalent to a stream of raw frames in RGB format, which is what most models are trained to accept. By setting this constraint, Membrane will be able to perform a sanity check to highlight errors some obvious errors in the processing pipeline. 50 | 51 | * Model should be loaded in the `handle_setup/2` callback and stored in the element state. 52 | 53 | It may be tempting to initialize the model in `handle_init/2` but it will delay the initialization of the pipeline, as it runs in the pipeline process, not the element process 54 | 55 | ### Writing the Membrane Element 56 | 57 | With that knowledge, let's implement the Membrane Filter that will be responsible for: 58 | 59 | 1. initialization of the detection model 60 | 2. feeding the frames through the detector 61 | 3. Drawing the boxes indicating the detected birds in the resulting image, using the `:image` library 62 | 63 | ```elixir 64 | defmodule Membrane.ExVision.Detector do 65 | use Membrane.Filter 66 | 67 | alias ExVision.ObjectDetection.Ssdlite320_MobileNetv3, as: Model 68 | alias ExVision.Types.BBox 69 | 70 | # Define both input and output pads 71 | # On both, we want to have raw image in RGB 72 | def_input_pad(:input, 73 | accepted_format: %Membrane.RawVideo{pixel_format: :RGB}, 74 | flow_control: :auto 75 | ) 76 | 77 | def_output_pad(:output, 78 | accepted_format: %Membrane.RawVideo{pixel_format: :RGB}, 79 | flow_control: :auto 80 | ) 81 | 82 | defmodule State do 83 | @moduledoc """ 84 | A struct describing the state of the detector element 85 | """ 86 | defstruct [:detector] 87 | 88 | @type t() :: %__MODULE__{ 89 | detector: Model.t() | nil 90 | } 91 | end 92 | 93 | @impl true 94 | def handle_init(_ctx, _opts) do 95 | {[], %State{}} 96 | end 97 | 98 | # Model initialization should be performed in this callback 99 | @impl true 100 | def handle_setup(_ctx, state) do 101 | # due to the quirk in Nx.Serving, all servings need to be registered, 102 | # as it's impossible to make a call to Nx.Serving using PID 103 | # Generate a random process name 104 | name = 105 | 10 106 | |> :crypto.strong_rand_bytes() 107 | |> then(&"#{&1}") 108 | |> :base64.encode() 109 | |> String.to_atom() 110 | 111 | {:ok, _pid} = Model.start_link(name: name) 112 | 113 | {[], %State{state | detector: name}} 114 | end 115 | 116 | # The frames will be received in this callback 117 | @impl true 118 | def handle_buffer(:input, buffer, ctx, %State{detector: detector} = state) do 119 | tensor = buffer_to_tensor(buffer, ctx.pads.input.stream_format) 120 | {:ok, image} = Image.from_nx(tensor) 121 | 122 | # Run inference and filter out unlikely bounding boxes 123 | predictions = 124 | detector 125 | |> Model.batched_run(tensor) 126 | # filter out butterfly bounding boxes 127 | |> Enum.filter(fn %BBox{score: score, label: label} -> score > 0.3 and label == :bird end) 128 | 129 | # For each bounding box, represent it as a rectangle in the image 130 | image = 131 | Enum.reduce(predictions, image, fn %BBox{} = prediction, image -> 132 | image 133 | |> Image.Draw.rect!( 134 | prediction.x1, 135 | prediction.y1, 136 | BBox.width(prediction), 137 | BBox.height(prediction), 138 | fill: false, 139 | color: :red, 140 | stroke_width: 5 141 | ) 142 | end) 143 | 144 | # Emit the resulting buffer 145 | {[buffer: {:output, fill_buffer_with_image(image, buffer)}], state} 146 | end 147 | 148 | # Utility function that will 149 | defp buffer_to_tensor(%Membrane.Buffer{payload: payload}, %Membrane.RawVideo{ 150 | width: w, 151 | height: h 152 | }) do 153 | payload 154 | |> Nx.from_binary(:u8) 155 | |> Nx.reshape({h, w, 3}, names: [:height, :width, :colors]) 156 | end 157 | 158 | # Replaces the payload of the Membrane Buffer with the image contents 159 | # This way, we're maintaining the buffer metadata, ex. the timestamps 160 | defp fill_buffer_with_image(image, buffer) do 161 | image |> Image.to_nx!(shape: :hwc) |> Nx.to_binary() |> then(&%{buffer | payload: &1}) 162 | end 163 | end 164 | ``` 165 | 166 | ### Create the processing pipeline 167 | 168 | 169 | 170 | Now that we have a Membrane Filter implemented, the next step is to define a processing pipeline. 171 | 172 | In this case, we will read the video from the file, feed it through our `Detector` element and then transform it back into a video in `.mp4` format. 173 | 174 | The details of this process can be a little complicated. That said, in simple terms, we're going to: 175 | 176 | 1. read the file 177 | 2. Parse the MP4 structure and extract the video from it 178 | 3. Decode the video to achieve raw frames (images) and convert them to RGB 179 | 4. **Apply our `Detector` module** 180 | 5. Encode our images to H264 181 | 6. Put our resulting video into the MP4 container 182 | 7. Save the result to the file 183 | 184 | ```elixir 185 | defmodule Pipeline do 186 | use Membrane.Pipeline 187 | 188 | @impl true 189 | def handle_init(_ctx, {input_file, output_file}) do 190 | structure = 191 | child(%Membrane.File.Source{ 192 | chunk_size: 1024, 193 | location: input_file, 194 | seekable?: true 195 | }) 196 | |> child(:demuxer, %Membrane.MP4.Demuxer.ISOM{optimize_for_non_fast_start?: true}) 197 | |> via_out(Pad.ref(:output, 1)) 198 | |> child(%Membrane.H264.Parser{ 199 | output_stream_structure: :annexb 200 | }) 201 | |> child(Membrane.H264.FFmpeg.Decoder) 202 | |> child(%Membrane.FFmpeg.SWScale.PixelFormatConverter{format: :RGB}) 203 | |> child(Membrane.ExVision.Detector) 204 | |> child(%Membrane.FFmpeg.SWScale.PixelFormatConverter{format: :I420}) 205 | |> child(%Membrane.H264.FFmpeg.Encoder{profile: :baseline}) 206 | |> child(%Membrane.H264.Parser{ 207 | output_stream_structure: :avc1 208 | }) 209 | |> child(Membrane.MP4.Muxer.ISOM) 210 | |> child(:sink, %Membrane.File.Sink{ 211 | location: output_file 212 | }) 213 | 214 | {[spec: structure], %{}} 215 | end 216 | 217 | # Terminate the process after the processing is finished 218 | @impl true 219 | def handle_element_end_of_stream(:sink, :input, _ctx, state) do 220 | Membrane.Pipeline.terminate(self(), asynchronous?: true) 221 | {[], state} 222 | end 223 | 224 | @impl true 225 | def handle_element_end_of_stream(_element, _pad, _ctx, state), do: {[], state} 226 | end 227 | ``` 228 | 229 | You're welcome to run the inference on your own file, but please keep in mind that this pipeline is specific to MP4 files containing H264 video and no audio stream, it will not work on other type of files. 230 | 231 | ## Run inference 232 | 233 | We have written the Filter responsible for applying our model and the full processing pipeline! It's time to make use of it. Let's download our input file first: 234 | 235 | ```elixir 236 | {:ok, input_file} = ExVision.Cache.lazy_get(ExVision.Cache, "assets/example.mp4") 237 | ``` 238 | 239 | Define the location of our output file: 240 | 241 | ```elixir 242 | output_file = Path.join("/tmp", "#{DateTime.utc_now()}.mp4") 243 | ``` 244 | 245 | And finally, execute our pipeline 246 | 247 | ```elixir 248 | {:ok, _supervisor_pid, pipeline_pid} = 249 | Membrane.Pipeline.start(Pipeline, {input_file, output_file}) 250 | ``` 251 | 252 | ## Download the results 253 | 254 | The pipeline is running in a separate process, therefore the previous call wasn't blocking. Our output file is not ready until the pipeline finishes and therefore terminates. 255 | 256 | In order to get notified about the pipeline terminating, we will make use of `Process.monitor/1` 257 | 258 | 259 | 260 | ```elixir 261 | monitor = Process.monitor(pipeline_pid) 262 | 263 | {time, _result} = 264 | :timer.tc(fn -> 265 | receive do 266 | {:DOWN, ^monitor, :process, _pid, _reson} -> :ok 267 | end 268 | end) 269 | 270 | Kino.Text.new("Operation took #{time / 1_000_000} seconds") 271 | ``` 272 | 273 | After the cell above has finished evaluating, our output file should already be all ready. 274 | 275 | Let's write some code to fetch it from the notebook. 276 | 277 | 278 | 279 | ```elixir 280 | content_btn = 281 | Kino.Download.new(fn -> File.read!(output_file) end, 282 | label: "Download the video", 283 | filename: "video.mp4" 284 | ) 285 | 286 | delete_btn = Kino.Control.button("Delete the file permanently") 287 | no_file_msg = Kino.Text.new("The file doesn't exist") 288 | 289 | Kino.listen(delete_btn, fn _data -> 290 | File.rm!(output_file) 291 | Kino.render(no_file_msg) 292 | end) 293 | 294 | if File.exists?(output_file), 295 | do: Kino.Layout.grid([content_btn, delete_btn], gap: 10), 296 | else: no_file_msg 297 | ``` 298 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "[]" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright 2020 Software Mansion 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | -------------------------------------------------------------------------------- /examples/1-basic-tutorial.livemd: -------------------------------------------------------------------------------- 1 | 2 | 3 | # ExVision walkthrough 4 | 5 | ```elixir 6 | Mix.install( 7 | [ 8 | :ex_vision, 9 | :kino, 10 | :kino_bumblebee, 11 | :stb_image, 12 | :exla, 13 | :image 14 | ], 15 | config: [ 16 | nx: [default_backend: EXLA.Backend] 17 | ] 18 | ) 19 | ``` 20 | 21 | ## ExVision introduction 22 | 23 | **This Livebook will only work when the repository is cloned locally** 24 | 25 | 26 | 27 | ExVision is a collection of models with easy to use API and descriptive output formats. 28 | It uses [Ortex](https://www.github.com/elixir-nx/ortex) under the hood to run it's predefined models. 29 | 30 | The main objective of ExVision is ease of use. This sacrifices some control over the model but allows you to get started using predefined models in seconds. That approach should allow an average Elixir Developer to quickly introduce some AI into their app, just like that. 31 | 32 | 33 | 34 | ```elixir 35 | alias ExVision.Classification.MobileNetV3Small, as: Classifier 36 | alias ExVision.ObjectDetection.FasterRCNN_ResNet50_FPN, as: ObjectDetector 37 | alias ExVision.SemanticSegmentation.DeepLabV3_MobileNetV3, as: SemanticSegmentation 38 | alias ExVision.InstanceSegmentation.MaskRCNN_ResNet50_FPN_V2, as: InstanceSegmentation 39 | alias ExVision.KeypointDetection.KeypointRCNN_ResNet50_FPN, as: KeypointDetector 40 | 41 | {:ok, classifier} = Classifier.load() 42 | {:ok, object_detector} = ObjectDetector.load() 43 | {:ok, semantic_segmentation} = SemanticSegmentation.load() 44 | {:ok, instance_segmentation} = InstanceSegmentation.load() 45 | {:ok, keypoint_detector} = KeypointDetector.load() 46 | 47 | Kino.nothing() 48 | ``` 49 | 50 | At this point the model is loaded and ready for inference. 51 | 52 | ExVision handles multiple types of input: 53 | 54 | * file path 55 | * pre-loaded Nx tensors, in both interleaved and planar formats 56 | * Evision matricies. 57 | 58 | Under the hood, all of these formats will be converted to Nx's Tensors and normalized for inference by the given model. 59 | 60 | 61 | 62 | ### Output formats 63 | 64 | A big point of ExVision over using the models directly has to be documentation and intuitive outputs. Hence, models return the following types: 65 | 66 | * Classifier - a mapping the category into the probability: [`%{category_t() => number()}`](http://localhost:55556/ExVision.Classification.MobileNetV3Small.html#t:output_t/0) 67 | * Object Detector - a list of bounding boxes: [`list(BBox.t())`](http://localhost:55556/ExVision.ObjectDetection.Ssdlite320_MobileNetv3.BBox.html) 68 | * Semantic Segmentation - a mapping of category to boolean tensor determining if the pixel is part of the mask for the given class: [`%{category_t() => Nx.Tensor.t()}`](http://localhost:55556/ExVision.SemanticSegmentation.DeepLabV3_MobileNetV3.html#t:output_t/0) 69 | * Instance Segmentation - a list of bounding boxes with mask: [`list(BBoxWithMask.t())`](http://localhost:55556/ExVision.InstanceSegmentation.MaskRCNN_ResNet50_FPN_V2.html#t:output_t/0) 70 | * Keypoint Detector - a list of bounding boxes with keypoints: [`list(BBoxWithKeypoints.t())`](http://localhost:55556/ExVision.KeypointDetection.KeypointRCNN_ResNet50_FPN.html#t:output_t/0) 71 | 72 | 73 | 74 | ### Example inference 75 | 76 | Let's put it into practice and run some predictions on a sample image of the cat. 77 | This code is intentionally using some calls to `dbg/1` macro in order to aid with the understanding of these formats. 78 | 79 | 80 | 81 | Let's start with loading our test suspect. For this purpose, we have defined a helper function that will automatically load some default images if you don't specify any. 82 | 83 | ```elixir 84 | defmodule ImageHandler do 85 | def get(input, default_image) do 86 | img_path = 87 | case Kino.Input.read(input) do 88 | nil -> 89 | {:ok, file} = ExVision.Cache.lazy_get(ExVision.Cache, default_image) 90 | file 91 | 92 | %{file_ref: image} -> 93 | Kino.Input.file_path(image) 94 | end 95 | 96 | Image.open!(img_path) 97 | end 98 | end 99 | ``` 100 | 101 | In the next cell, you can provide your own image that will be used as an example in this notebook. If you don't have anything handy, we're also providing a default image of a cat. 102 | 103 | 104 | 105 | ```elixir 106 | input = Kino.Input.image("Image to evaluate", format: :jpeg) 107 | ``` 108 | 109 | 110 | 111 | ```elixir 112 | image = ImageHandler.get(input, "cat.jpg") 113 | ``` 114 | 115 | ### Image classification 116 | 117 | Image classification is the process of assining the image a category that best describes the contents of that image. For example, when given an image of a cat, image classifier predict that the image should be assinged to `:cat` class. 118 | 119 | The output format of an classifier is a dictionary that maps the category that the model knows into the probability. In most cases, that means that you will get a lot of categories with near zero probability and that's on purpose. Where possible, we don't want to make ExVision feel too much like magic. You're still doing AI, we're just handling the input and output format conversions. 120 | 121 | Usually however, the class with the highest probability is the category you should assign. However, if there are multiple classes with comparatively high probabilities, this may indicate that the model has no idea and it's actually not a prediction at all. 122 | 123 | #### Code example 124 | 125 | In this example, we will try to find out the most likely class that the provided image could belong to. In order to do this, we will: 126 | 127 | 1. Use the image classifier to gather predictions 128 | 2. Sort the predictions 129 | 3. Take 10 of the most likely ones 130 | 4. Plot the results 131 | 132 | 133 | 134 | ```elixir 135 | predictions = 136 | image 137 | # run inference 138 | |> then(&Classifier.run(classifier, &1)) 139 | # sort the dictionary by the probability of the prediction 140 | |> Enum.sort_by(fn {_label, score} -> score end, :desc) 141 | # Only include a few of the most likely predictions in the output 142 | |> Enum.take(10) 143 | |> dbg() 144 | 145 | [{top_prediction, _score} | _rest] = predictions 146 | 147 | # Kino rendering stuff, not important 148 | scored_list = Kino.Bumblebee.ScoredList.new(predictions) 149 | 150 | Kino.Layout.grid( 151 | [ 152 | image, 153 | Kino.Layout.grid([Kino.Text.new("Class probabilities"), scored_list]) 154 | ], 155 | columns: 2, 156 | gap: 25 157 | ) 158 | ``` 159 | 160 | ### Object detection 161 | 162 | In object detection, we're trying to locate the objects in the image. Format of the output in this case should provide a lot of clarification: it's a list of bounding boxes, which effectively indicate the area in the image that the object of the specified class are located in according to the image. Each bounding box is also assigned a score, which can be interpreted as the certainty of the detection. 163 | 164 | By default, ExVision will discard extremely low probability bounding boxes (with scores lower than 0.1), as they are just noise. 165 | 166 | #### Code example 167 | 168 | In this example, we will draw a rectangle around the biggest object in the image. In order to do this, we will perform the following operations: 169 | 170 | 1. Use the object detector to get the bounding boxes 171 | 2. Find the bounding box with the biggest total area 172 | 3. Draw a rectangle around the the region indicated by that bounding box 173 | 174 | 175 | 176 | ```elixir 177 | alias ExVision.Types.BBox 178 | 179 | # apply the model 180 | prediction = 181 | image 182 | |> then(&ObjectDetector.run(object_detector, &1)) 183 | # Find the biggest object by area 184 | |> Enum.max_by(&(BBox.width(&1) * BBox.height(&1))) 185 | |> dbg() 186 | 187 | # Render an image 188 | Image.Draw.rect!( 189 | image, 190 | prediction.x1, 191 | prediction.y1, 192 | BBox.width(prediction), 193 | BBox.height(prediction), 194 | fill: false, 195 | color: :red, 196 | stroke_width: 5 197 | ) 198 | ``` 199 | 200 | ## Semantic segmentation 201 | 202 | The goal of semantic segmentation is to generate per-pixel masks stating if the object of the given class is in the corresponding pixel. 203 | 204 | In ExVision, the output of semantic segmentation models is a mapping of category to a binary per-pixel binary mask. In contrast to previous models, we're not getting scores. Each pixel is always assigned the most probable class. 205 | 206 | ### Code example 207 | 208 | In this example, we will feed the image to the semantic segmentation model and inspect some of the masks provided by the model. 209 | 210 | 211 | 212 | ```elixir 213 | nx_image = Image.to_nx!(image) 214 | uniform_black = 0 |> Nx.broadcast(Nx.shape(nx_image)) |> Nx.as_type(Nx.type(nx_image)) 215 | 216 | predictions = 217 | image 218 | |> then(&SemanticSegmentation.run(semantic_segmentation, &1)) 219 | # Filter out masks covering less than 5% of the total image area 220 | |> Enum.filter(fn {_label, mask} -> 221 | mask |> Nx.mean() |> Nx.to_number() > 0.05 222 | end) 223 | |> dbg() 224 | 225 | predictions 226 | |> Enum.map(fn {label, mask} -> 227 | # expand the mask to cover all channels 228 | mask = Nx.broadcast(mask, Nx.shape(nx_image), axes: [0, 1]) 229 | 230 | # Cut out the mask from the original image 231 | image = Nx.select(mask, nx_image, uniform_black) 232 | image = Nx.as_type(image, :u8) 233 | 234 | Kino.Layout.grid([ 235 | label |> Atom.to_string() |> Kino.Text.new(), 236 | Kino.Image.new(image) 237 | ]) 238 | end) 239 | |> Kino.Layout.grid(columns: 2) 240 | ``` 241 | 242 | ## Instance segmentation 243 | 244 | The objective of instance segmentation is to not only identify objects within an image on a per-pixel basis but also differentiate each specific object of the same class. 245 | 246 | In ExVision, the output of instance segmentation models includes a bounding box with a label and a score (similar to object detection), and a binary mask for every instance detected in the image. 247 | 248 | Extremely low probability detections (with scores lower than 0.1) will be discarded by ExVision, as they are just noise. 249 | 250 | ### Code example 251 | 252 | In the following example, we will pass an image through the instance segmentation model and examine the individual instance masks recognized by the model. 253 | 254 | ```elixir 255 | alias ExVision.Types.BBoxWithMask 256 | 257 | nx_image = Image.to_nx!(image) 258 | uniform_black = 0 |> Nx.broadcast(Nx.shape(nx_image)) |> Nx.as_type(Nx.type(nx_image)) 259 | 260 | predictions = 261 | image 262 | |> then(&InstanceSegmentation.run(instance_segmentation, &1)) 263 | # Get most likely predictions from the output 264 | |> Enum.filter(fn %BBoxWithMask{score: score} -> score > 0.8 end) 265 | |> dbg() 266 | 267 | predictions 268 | |> Enum.map(fn %BBoxWithMask{label: label, mask: mask} -> 269 | # expand the mask to cover all channels 270 | mask = Nx.broadcast(mask, Nx.shape(nx_image), axes: [0, 1]) 271 | 272 | # Cut out the mask from the original image 273 | image = Nx.select(mask, nx_image, uniform_black) 274 | image = Nx.as_type(image, :u8) 275 | 276 | Kino.Layout.grid([ 277 | label |> Atom.to_string() |> Kino.Text.new(), 278 | Kino.Image.new(image) 279 | ]) 280 | end) 281 | |> Kino.Layout.grid(columns: 2) 282 | ``` 283 | 284 | ## Keypoint detection 285 | 286 | In keypoint detection, we're trying to specific keypoints in the image. ExVision returns the output as a list of boudning boxes (similar to object detection) with named keypoints. Each keypoint consists of x, y coordinates and a score which is the model's certainty of that keypoint. 287 | 288 | ExVision will discard extremely low probability detections (with scores lower than 0.1), as they are just noise. 289 | 290 | 291 | 292 | The KeypointRCNN_ResNet50_FPN model is commonly used for detecting human body parts in images. To illustrate this, let's begin by importing an image that features people. 293 | 294 | ```elixir 295 | image = ImageHandler.get(input, "people.jpg") 296 | ``` 297 | 298 | #### Code example 299 | 300 | In this example, we will draw keypoints for every detection with a high enough score returned by the model, additionally we will draw a bounding box around them. 301 | 302 | ```elixir 303 | alias ExVision.Types.BBoxWithKeypoints 304 | 305 | # define skeleton pose 306 | connections = [ 307 | # face 308 | {:nose, :left_eye}, 309 | {:nose, :right_eye}, 310 | {:left_eye, :right_eye}, 311 | {:left_eye, :left_ear}, 312 | {:right_eye, :right_ear}, 313 | 314 | # left arm 315 | {:left_wrist, :left_elbow}, 316 | {:left_elbow, :left_shoulder}, 317 | 318 | # right arm 319 | {:right_wrist, :right_elbow}, 320 | {:right_elbow, :right_shoulder}, 321 | 322 | # torso 323 | {:left_shoulder, :right_shoulder}, 324 | {:left_shoulder, :left_hip}, 325 | {:right_shoulder, :right_hip}, 326 | {:left_hip, :right_hip}, 327 | {:left_shoulder, :left_ear}, 328 | {:right_shoulder, :right_ear}, 329 | 330 | # left leg 331 | {:left_ankle, :left_knee}, 332 | {:left_knee, :left_hip}, 333 | 334 | # right leg 335 | {:right_ankle, :right_knee}, 336 | {:right_knee, :right_hip} 337 | ] 338 | 339 | # apply the model 340 | predictions = 341 | image 342 | |> then(&KeypointDetector.run(keypoint_detector, &1)) 343 | # Get most likely predictions from the output 344 | |> Enum.filter(fn %BBoxWithKeypoints{score: score} -> score > 0.8 end) 345 | |> dbg() 346 | 347 | predictions 348 | |> Enum.reduce(image, fn prediction, image_acc -> 349 | # draw keypoints 350 | image_acc = 351 | prediction.keypoints 352 | |> Enum.reduce(image_acc, fn {_key, %{x: x, y: y}}, acc -> 353 | Image.Draw.circle!(acc, x, y, 2, color: :red) 354 | end) 355 | 356 | # draw skeleton pose 357 | image_acc = 358 | connections 359 | |> Enum.reduce(image_acc, fn {from, to}, acc -> 360 | %{x: x1, y: y1} = prediction.keypoints[from] 361 | %{x: x2, y: y2} = prediction.keypoints[to] 362 | 363 | Image.Draw.line!(acc, x1, y1, x2, y2, color: :red) 364 | end) 365 | 366 | # draw bounding box 367 | Image.Draw.rect!( 368 | image_acc, 369 | prediction.x1, 370 | prediction.y1, 371 | BBoxWithKeypoints.width(prediction), 372 | BBoxWithKeypoints.height(prediction), 373 | fill: false, 374 | color: :red, 375 | stroke_width: 2 376 | ) 377 | end) 378 | ``` 379 | 380 | ## Next steps 381 | 382 | After completing this tutorial you can also check out our next tutorial focusing on using models in production in process workflow [here](2-usage-as-nx-serving.livemd) 383 | -------------------------------------------------------------------------------- /mix.lock: -------------------------------------------------------------------------------- 1 | %{ 2 | "axon": {:hex, :axon, "0.6.1", "1d042fdba1c1b4413a3d65800524feebd1bc8ed218f8cdefe7a97510c3f427f3", [:mix], [{:kino, "~> 0.7", [hex: :kino, repo: "hexpm", optional: true]}, {:kino_vega_lite, "~> 0.1.7", [hex: :kino_vega_lite, repo: "hexpm", optional: true]}, {:nx, "~> 0.6.0 or ~> 0.7.0", [hex: :nx, repo: "hexpm", optional: false]}, {:polaris, "~> 0.1", [hex: :polaris, repo: "hexpm", optional: false]}, {:table_rex, "~> 3.1.1", [hex: :table_rex, repo: "hexpm", optional: true]}], "hexpm", "d6b0ae2f0dd284f6bf702edcab71e790d6c01ca502dd06c4070836554f5a48e1"}, 3 | "bunch": {:hex, :bunch, "1.6.1", "5393d827a64d5f846092703441ea50e65bc09f37fd8e320878f13e63d410aec7", [:mix], [], "hexpm", "286cc3add551628b30605efbe2fca4e38cc1bea89bcd0a1a7226920b3364fe4a"}, 4 | "bunt": {:hex, :bunt, "1.0.0", "081c2c665f086849e6d57900292b3a161727ab40431219529f13c4ddcf3e7a44", [:mix], [], "hexpm", "dc5f86aa08a5f6fa6b8096f0735c4e76d54ae5c9fa2c143e5a1fc7c1cd9bb6b5"}, 5 | "castore": {:hex, :castore, "1.0.7", "b651241514e5f6956028147fe6637f7ac13802537e895a724f90bf3e36ddd1dd", [:mix], [], "hexpm", "da7785a4b0d2a021cd1292a60875a784b6caef71e76bf4917bdee1f390455cf5"}, 6 | "cc_precompiler": {:hex, :cc_precompiler, "0.1.10", "47c9c08d8869cf09b41da36538f62bc1abd3e19e41701c2cea2675b53c704258", [:mix], [{:elixir_make, "~> 0.7", [hex: :elixir_make, repo: "hexpm", optional: false]}], "hexpm", "f6e046254e53cd6b41c6bacd70ae728011aa82b2742a80d6e2214855c6e06b22"}, 7 | "coerce": {:hex, :coerce, "1.0.1", "211c27386315dc2894ac11bc1f413a0e38505d808153367bd5c6e75a4003d096", [:mix], [], "hexpm", "b44a691700f7a1a15b4b7e2ff1fa30bebd669929ac8aa43cffe9e2f8bf051cf1"}, 8 | "complex": {:hex, :complex, "0.5.0", "af2d2331ff6170b61bb738695e481b27a66780e18763e066ee2cd863d0b1dd92", [:mix], [], "hexpm", "2683bd3c184466cfb94fad74cbfddfaa94b860e27ad4ca1bffe3bff169d91ef1"}, 9 | "credo": {:hex, :credo, "1.7.7", "771445037228f763f9b2afd612b6aa2fd8e28432a95dbbc60d8e03ce71ba4446", [:mix], [{:bunt, "~> 0.2.1 or ~> 1.0", [hex: :bunt, repo: "hexpm", optional: false]}, {:file_system, "~> 0.2 or ~> 1.0", [hex: :file_system, repo: "hexpm", optional: false]}, {:jason, "~> 1.0", [hex: :jason, repo: "hexpm", optional: false]}], "hexpm", "8bc87496c9aaacdc3f90f01b7b0582467b69b4bd2441fe8aae3109d843cc2f2e"}, 10 | "dialyxir": {:hex, :dialyxir, "1.4.3", "edd0124f358f0b9e95bfe53a9fcf806d615d8f838e2202a9f430d59566b6b53b", [:mix], [{:erlex, ">= 0.2.6", [hex: :erlex, repo: "hexpm", optional: false]}], "hexpm", "bf2cfb75cd5c5006bec30141b131663299c661a864ec7fbbc72dfa557487a986"}, 11 | "earmark_parser": {:hex, :earmark_parser, "1.4.39", "424642f8335b05bb9eb611aa1564c148a8ee35c9c8a8bba6e129d51a3e3c6769", [:mix], [], "hexpm", "06553a88d1f1846da9ef066b87b57c6f605552cfbe40d20bd8d59cc6bde41944"}, 12 | "elixir_make": {:hex, :elixir_make, "0.8.4", "4960a03ce79081dee8fe119d80ad372c4e7badb84c493cc75983f9d3bc8bde0f", [:mix], [{:castore, "~> 0.1 or ~> 1.0", [hex: :castore, repo: "hexpm", optional: true]}, {:certifi, "~> 2.0", [hex: :certifi, repo: "hexpm", optional: true]}], "hexpm", "6e7f1d619b5f61dfabd0a20aa268e575572b542ac31723293a4c1a567d5ef040"}, 13 | "erlex": {:hex, :erlex, "0.2.6", "c7987d15e899c7a2f34f5420d2a2ea0d659682c06ac607572df55a43753aa12e", [:mix], [], "hexpm", "2ed2e25711feb44d52b17d2780eabf998452f6efda104877a3881c2f8c0c0c75"}, 14 | "evision": {:hex, :evision, "0.1.38", "f8b23ad685c3ebd70969a3457027b5c74b5bc8dc51588661c516098c3240b92d", [:make, :mix, :rebar3], [{:castore, "~> 0.1 or ~> 1.0", [hex: :castore, repo: "hexpm", optional: false]}, {:elixir_make, "~> 0.7", [hex: :elixir_make, repo: "hexpm", optional: false]}, {:kino, "~> 0.11", [hex: :kino, repo: "hexpm", optional: true]}, {:nx, "~> 0.4", [hex: :nx, repo: "hexpm", optional: false]}, {:progress_bar, "~> 2.0 or ~> 3.0", [hex: :progress_bar, repo: "hexpm", optional: true]}], "hexpm", "f9302547d76c5e4ad7022ffdc76be13e33c990fdd67ad2af203f24ab5d3aee20"}, 15 | "ex_doc": {:hex, :ex_doc, "0.32.1", "21e40f939515373bcdc9cffe65f3b3543f05015ac6c3d01d991874129d173420", [:mix], [{:earmark_parser, "~> 1.4.39", [hex: :earmark_parser, repo: "hexpm", optional: false]}, {:makeup_c, ">= 0.1.1", [hex: :makeup_c, repo: "hexpm", optional: true]}, {:makeup_elixir, "~> 0.14", [hex: :makeup_elixir, repo: "hexpm", optional: false]}, {:makeup_erlang, "~> 0.1", [hex: :makeup_erlang, repo: "hexpm", optional: false]}], "hexpm", "5142c9db521f106d61ff33250f779807ed2a88620e472ac95dc7d59c380113da"}, 16 | "exla": {:hex, :exla, "0.7.2", "8ac573093df8e5e6b36845beeb3f5a0ea92b05082bf2fa4678f80170cfc887f6", [:make, :mix], [{:elixir_make, "~> 0.6", [hex: :elixir_make, repo: "hexpm", optional: false]}, {:nimble_pool, "~> 1.0", [hex: :nimble_pool, repo: "hexpm", optional: false]}, {:nx, "~> 0.7.1", [hex: :nx, repo: "hexpm", optional: false]}, {:telemetry, "~> 0.4.0 or ~> 1.0", [hex: :telemetry, repo: "hexpm", optional: false]}, {:xla, "~> 0.6.0", [hex: :xla, repo: "hexpm", optional: false]}], "hexpm", "d061ea87858415e5585cbd4b7bdae5489000339519a2c6a7f51eb0defd73b588"}, 17 | "file_system": {:hex, :file_system, "1.0.1", "79e8ceaddb0416f8b8cd02a0127bdbababe7bf4a23d2a395b983c1f8b3f73edd", [:mix], [], "hexpm", "4414d1f38863ddf9120720cd976fce5bdde8e91d8283353f0e31850fa89feb9e"}, 18 | "finch": {:hex, :finch, "0.18.0", "944ac7d34d0bd2ac8998f79f7a811b21d87d911e77a786bc5810adb75632ada4", [:mix], [{:castore, "~> 0.1 or ~> 1.0", [hex: :castore, repo: "hexpm", optional: false]}, {:mime, "~> 1.0 or ~> 2.0", [hex: :mime, repo: "hexpm", optional: false]}, {:mint, "~> 1.3", [hex: :mint, repo: "hexpm", optional: false]}, {:nimble_options, "~> 0.4 or ~> 1.0", [hex: :nimble_options, repo: "hexpm", optional: false]}, {:nimble_pool, "~> 0.2.6 or ~> 1.0", [hex: :nimble_pool, repo: "hexpm", optional: false]}, {:telemetry, "~> 0.4 or ~> 1.0", [hex: :telemetry, repo: "hexpm", optional: false]}], "hexpm", "69f5045b042e531e53edc2574f15e25e735b522c37e2ddb766e15b979e03aa65"}, 19 | "hpax": {:hex, :hpax, "0.2.0", "5a58219adcb75977b2edce5eb22051de9362f08236220c9e859a47111c194ff5", [:mix], [], "hexpm", "bea06558cdae85bed075e6c036993d43cd54d447f76d8190a8db0dc5893fa2f1"}, 20 | "image": {:hex, :image, "0.44.0", "e8eea9398abbed12b7784e786f26a5c839a00bcddd8f2f8ba12adf7e227beb9f", [:mix], [{:bumblebee, "~> 0.3", [hex: :bumblebee, repo: "hexpm", optional: true]}, {:evision, "~> 0.1.33", [hex: :evision, repo: "hexpm", optional: true]}, {:exla, "~> 0.5", [hex: :exla, repo: "hexpm", optional: true]}, {:jason, "~> 1.4", [hex: :jason, repo: "hexpm", optional: true]}, {:kino, "~> 0.11", [hex: :kino, repo: "hexpm", optional: true]}, {:nx, "~> 0.5", [hex: :nx, repo: "hexpm", optional: true]}, {:nx_image, "~> 0.1", [hex: :nx_image, repo: "hexpm", optional: true]}, {:phoenix_html, "~> 2.1 or ~> 3.2 or ~> 4.0", [hex: :phoenix_html, repo: "hexpm", optional: false]}, {:plug, "~> 1.13", [hex: :plug, repo: "hexpm", optional: true]}, {:req, "~> 0.4", [hex: :req, repo: "hexpm", optional: true]}, {:rustler, "> 0.0.0", [hex: :rustler, repo: "hexpm", optional: true]}, {:sweet_xml, "~> 0.7", [hex: :sweet_xml, repo: "hexpm", optional: false]}, {:vix, "~> 0.23", [hex: :vix, repo: "hexpm", optional: false]}], "hexpm", "cd00a3de4d7a40a2cb1ca72b9852b0d81701793414af8babf4d33dbeb6de0f6f"}, 21 | "jason": {:hex, :jason, "1.4.4", "b9226785a9aa77b6857ca22832cffa5d5011a667207eb2a0ad56adb5db443b8a", [:mix], [{:decimal, "~> 1.0 or ~> 2.0", [hex: :decimal, repo: "hexpm", optional: true]}], "hexpm", "c5eb0cab91f094599f94d55bc63409236a8ec69a21a67814529e8d5f6cc90b3b"}, 22 | "makeup": {:hex, :makeup, "1.1.1", "fa0bc768698053b2b3869fa8a62616501ff9d11a562f3ce39580d60860c3a55e", [:mix], [{:nimble_parsec, "~> 1.2.2 or ~> 1.3", [hex: :nimble_parsec, repo: "hexpm", optional: false]}], "hexpm", "5dc62fbdd0de44de194898b6710692490be74baa02d9d108bc29f007783b0b48"}, 23 | "makeup_elixir": {:hex, :makeup_elixir, "0.16.2", "627e84b8e8bf22e60a2579dad15067c755531fea049ae26ef1020cad58fe9578", [:mix], [{:makeup, "~> 1.0", [hex: :makeup, repo: "hexpm", optional: false]}, {:nimble_parsec, "~> 1.2.3 or ~> 1.3", [hex: :nimble_parsec, repo: "hexpm", optional: false]}], "hexpm", "41193978704763f6bbe6cc2758b84909e62984c7752b3784bd3c218bb341706b"}, 24 | "makeup_erlang": {:hex, :makeup_erlang, "0.1.5", "e0ff5a7c708dda34311f7522a8758e23bfcd7d8d8068dc312b5eb41c6fd76eba", [:mix], [{:makeup, "~> 1.0", [hex: :makeup, repo: "hexpm", optional: false]}], "hexpm", "94d2e986428585a21516d7d7149781480013c56e30c6a233534bedf38867a59a"}, 25 | "membrane_core": {:hex, :membrane_core, "1.0.0", "1b543aefd952283be1f2a215a1db213aa4d91222722ba03cd35280622f1905ee", [:mix], [{:bunch, "~> 1.6", [hex: :bunch, repo: "hexpm", optional: false]}, {:qex, "~> 0.3", [hex: :qex, repo: "hexpm", optional: false]}, {:ratio, "~> 3.0", [hex: :ratio, repo: "hexpm", optional: false]}, {:telemetry, "~> 1.0", [hex: :telemetry, repo: "hexpm", optional: false]}], "hexpm", "352c90fd0a29942143c4bf7a727cc05c632e323f50a1a4e99321b1e8982f1533"}, 26 | "mime": {:hex, :mime, "2.0.5", "dc34c8efd439abe6ae0343edbb8556f4d63f178594894720607772a041b04b02", [:mix], [], "hexpm", "da0d64a365c45bc9935cc5c8a7fc5e49a0e0f9932a761c55d6c52b142780a05c"}, 27 | "mimic": {:hex, :mimic, "1.7.4", "cd2772ffbc9edefe964bc668bfd4059487fa639a5b7f1cbdf4fd22946505aa4f", [:mix], [], "hexpm", "437c61041ecf8a7fae35763ce89859e4973bb0666e6ce76d75efc789204447c3"}, 28 | "mint": {:hex, :mint, "1.6.0", "88a4f91cd690508a04ff1c3e28952f322528934be541844d54e0ceb765f01d5e", [:mix], [{:castore, "~> 0.1.0 or ~> 1.0", [hex: :castore, repo: "hexpm", optional: true]}, {:hpax, "~> 0.1.1 or ~> 0.2.0", [hex: :hpax, repo: "hexpm", optional: false]}], "hexpm", "3c5ae85d90a5aca0a49c0d8b67360bbe407f3b54f1030a111047ff988e8fefaa"}, 29 | "nimble_options": {:hex, :nimble_options, "1.1.0", "3b31a57ede9cb1502071fade751ab0c7b8dbe75a9a4c2b5bbb0943a690b63172", [:mix], [], "hexpm", "8bbbb3941af3ca9acc7835f5655ea062111c9c27bcac53e004460dfd19008a99"}, 30 | "nimble_ownership": {:hex, :nimble_ownership, "0.3.1", "99d5244672fafdfac89bfad3d3ab8f0d367603ce1dc4855f86a1c75008bce56f", [:mix], [], "hexpm", "4bf510adedff0449a1d6e200e43e57a814794c8b5b6439071274d248d272a549"}, 31 | "nimble_parsec": {:hex, :nimble_parsec, "1.4.0", "51f9b613ea62cfa97b25ccc2c1b4216e81df970acd8e16e8d1bdc58fef21370d", [:mix], [], "hexpm", "9c565862810fb383e9838c1dd2d7d2c437b3d13b267414ba6af33e50d2d1cf28"}, 32 | "nimble_pool": {:hex, :nimble_pool, "1.1.0", "bf9c29fbdcba3564a8b800d1eeb5a3c58f36e1e11d7b7fb2e084a643f645f06b", [:mix], [], "hexpm", "af2e4e6b34197db81f7aad230c1118eac993acc0dae6bc83bac0126d4ae0813a"}, 33 | "numbers": {:hex, :numbers, "5.2.4", "f123d5bb7f6acc366f8f445e10a32bd403c8469bdbce8ce049e1f0972b607080", [:mix], [{:coerce, "~> 1.0", [hex: :coerce, repo: "hexpm", optional: false]}, {:decimal, "~> 1.9 or ~> 2.0", [hex: :decimal, repo: "hexpm", optional: true]}], "hexpm", "eeccf5c61d5f4922198395bf87a465b6f980b8b862dd22d28198c5e6fab38582"}, 34 | "nx": {:hex, :nx, "0.7.2", "7f6f6584585e49ffbf81769e7ccc2d01c5639074e399c1f94adc2b509869673e", [:mix], [{:complex, "~> 0.5", [hex: :complex, repo: "hexpm", optional: false]}, {:telemetry, "~> 0.4.0 or ~> 1.0", [hex: :telemetry, repo: "hexpm", optional: false]}], "hexpm", "e2c0680066eec5af8b8ef00c99e9bf40a0d08d8b2bbba77f59f801ec54a3f90e"}, 35 | "nx_image": {:hex, :nx_image, "0.1.2", "0c6e3453c1dc30fc80c723a54861204304cebc8a89ed3b806b972c73ee5d119d", [:mix], [{:nx, "~> 0.4", [hex: :nx, repo: "hexpm", optional: false]}], "hexpm", "9161863c42405ddccb6dbbbeae078ad23e30201509cc804b3b3a7c9e98764b81"}, 36 | "ortex": {:hex, :ortex, "0.1.9", "a9b14552ef6058961a3e300f973a51887328a13c2ffa6f2cad1b0785f9c7e73c", [:mix], [{:nx, "~> 0.6", [hex: :nx, repo: "hexpm", optional: false]}, {:rustler, "~> 0.29.0", [hex: :rustler, repo: "hexpm", optional: false]}], "hexpm", "5201b9aa8e22a86f3a04e819266bfd1c5a8194f0c51f917c1d0cffe8bdbb76d8"}, 37 | "phoenix_html": {:hex, :phoenix_html, "4.1.1", "4c064fd3873d12ebb1388425a8f2a19348cef56e7289e1998e2d2fa758aa982e", [:mix], [], "hexpm", "f2f2df5a72bc9a2f510b21497fd7d2b86d932ec0598f0210fed4114adc546c6f"}, 38 | "poison": {:hex, :poison, "5.0.0", "d2b54589ab4157bbb82ec2050757779bfed724463a544b6e20d79855a9e43b24", [:mix], [{:decimal, "~> 2.0", [hex: :decimal, repo: "hexpm", optional: true]}], "hexpm", "11dc6117c501b80c62a7594f941d043982a1bd05a1184280c0d9166eb4d8d3fc"}, 39 | "polaris": {:hex, :polaris, "0.1.0", "dca61b18e3e801ecdae6ac9f0eca5f19792b44a5cb4b8d63db50fc40fc038d22", [:mix], [{:nx, "~> 0.5", [hex: :nx, repo: "hexpm", optional: false]}], "hexpm", "13ef2b166650e533cb24b10e2f3b8ab4f2f449ba4d63156e8c569527f206e2c2"}, 40 | "qex": {:hex, :qex, "0.5.1", "0d82c0f008551d24fffb99d97f8299afcb8ea9cf99582b770bd004ed5af63fd6", [:mix], [], "hexpm", "935a39fdaf2445834b95951456559e9dc2063d0a055742c558a99987b38d6bab"}, 41 | "ratio": {:hex, :ratio, "3.0.2", "60a5976872a4dc3d873ecc57eed1738589e99d1094834b9c935b118231297cfb", [:mix], [{:decimal, "~> 1.6 or ~> 2.0", [hex: :decimal, repo: "hexpm", optional: true]}, {:numbers, "~> 5.2.0", [hex: :numbers, repo: "hexpm", optional: false]}], "hexpm", "3a13ed5a30ad0bfd7e4a86bf86d93d2b5a06f5904417d38d3f3ea6406cdfc7bb"}, 42 | "req": {:hex, :req, "0.4.14", "103de133a076a31044e5458e0f850d5681eef23dfabf3ea34af63212e3b902e2", [:mix], [{:aws_signature, "~> 0.3.2", [hex: :aws_signature, repo: "hexpm", optional: true]}, {:brotli, "~> 0.3.1", [hex: :brotli, repo: "hexpm", optional: true]}, {:ezstd, "~> 1.0", [hex: :ezstd, repo: "hexpm", optional: true]}, {:finch, "~> 0.17", [hex: :finch, repo: "hexpm", optional: false]}, {:jason, "~> 1.0", [hex: :jason, repo: "hexpm", optional: false]}, {:mime, "~> 1.6 or ~> 2.0", [hex: :mime, repo: "hexpm", optional: false]}, {:nimble_csv, "~> 1.0", [hex: :nimble_csv, repo: "hexpm", optional: true]}, {:nimble_ownership, "~> 0.2.0 or ~> 0.3.0", [hex: :nimble_ownership, repo: "hexpm", optional: false]}, {:plug, "~> 1.0", [hex: :plug, repo: "hexpm", optional: true]}], "hexpm", "2ddd3d33f9ab714ced8d3c15fd03db40c14dbf129003c4a3eb80fac2cc0b1b08"}, 43 | "rustler": {:hex, :rustler, "0.29.1", "880f20ae3027bd7945def6cea767f5257bc926f33ff50c0d5d5a5315883c084d", [:mix], [{:jason, "~> 1.0", [hex: :jason, repo: "hexpm", optional: false]}, {:toml, "~> 0.6", [hex: :toml, repo: "hexpm", optional: false]}], "hexpm", "109497d701861bfcd26eb8f5801fe327a8eef304f56a5b63ef61151ff44ac9b6"}, 44 | "stb_image": {:hex, :stb_image, "0.6.8", "c68768e85045b2f40afe376cafa7d3fff491404366659bc2a01abe78d2ef9c4a", [:make, :mix], [{:cc_precompiler, "~> 0.1.0", [hex: :cc_precompiler, repo: "hexpm", optional: false]}, {:elixir_make, "~> 0.8.2", [hex: :elixir_make, repo: "hexpm", optional: false]}, {:kino, "~> 0.7", [hex: :kino, repo: "hexpm", optional: true]}, {:nx, "~> 0.4", [hex: :nx, repo: "hexpm", optional: true]}], "hexpm", "6688b8a8dd3db718bb3fccf267cb7132d2193fd62441603b08539515fedfa8d6"}, 45 | "sweet_xml": {:hex, :sweet_xml, "0.7.4", "a8b7e1ce7ecd775c7e8a65d501bc2cd933bff3a9c41ab763f5105688ef485d08", [:mix], [], "hexpm", "e7c4b0bdbf460c928234951def54fe87edf1a170f6896675443279e2dbeba167"}, 46 | "telemetry": {:hex, :telemetry, "1.2.1", "68fdfe8d8f05a8428483a97d7aab2f268aaff24b49e0f599faa091f1d4e7f61c", [:rebar3], [], "hexpm", "dad9ce9d8effc621708f99eac538ef1cbe05d6a874dd741de2e689c47feafed5"}, 47 | "toml": {:hex, :toml, "0.7.0", "fbcd773caa937d0c7a02c301a1feea25612720ac3fa1ccb8bfd9d30d822911de", [:mix], [], "hexpm", "0690246a2478c1defd100b0c9b89b4ea280a22be9a7b313a8a058a2408a2fa70"}, 48 | "vix": {:hex, :vix, "0.27.0", "c9d6be17abe6fd1b3daed52964331c67ff1f980ea188499d8ac5e723cf215576", [:make, :mix], [{:castore, "~> 0.1 or ~> 1.0", [hex: :castore, repo: "hexpm", optional: false]}, {:cc_precompiler, "~> 0.1.4 or ~> 0.2", [hex: :cc_precompiler, repo: "hexpm", optional: false]}, {:elixir_make, "~> 0.7.3 or ~> 0.8", [hex: :elixir_make, repo: "hexpm", optional: false]}, {:kino, "~> 0.7", [hex: :kino, repo: "hexpm", optional: true]}], "hexpm", "ae4ba5bb9882753396baadfff93b6cab5d4275b13751fd49723591eb116f373a"}, 49 | "xla": {:hex, :xla, "0.6.0", "67bb7695efa4a23b06211dc212de6a72af1ad5a9e17325e05e0a87e4c241feb8", [:make, :mix], [{:elixir_make, "~> 0.4", [hex: :elixir_make, repo: "hexpm", optional: false]}], "hexpm", "dd074daf942312c6da87c7ed61b62fb1a075bced157f1cc4d47af2d7c9f44fb7"}, 50 | } 51 | -------------------------------------------------------------------------------- /priv/categories/imagenet_v2_categories.json: -------------------------------------------------------------------------------- 1 | [ 2 | "tench", 3 | "goldfish", 4 | "great white shark", 5 | "tiger shark", 6 | "hammerhead", 7 | "electric ray", 8 | "stingray", 9 | "cock", 10 | "hen", 11 | "ostrich", 12 | "brambling", 13 | "goldfinch", 14 | "house finch", 15 | "junco", 16 | "indigo bunting", 17 | "robin", 18 | "bulbul", 19 | "jay", 20 | "magpie", 21 | "chickadee", 22 | "water ouzel", 23 | "kite", 24 | "bald eagle", 25 | "vulture", 26 | "great grey owl", 27 | "European fire salamander", 28 | "common newt", 29 | "eft", 30 | "spotted salamander", 31 | "axolotl", 32 | "bullfrog", 33 | "tree frog", 34 | "tailed frog", 35 | "loggerhead", 36 | "leatherback turtle", 37 | "mud turtle", 38 | "terrapin", 39 | "box turtle", 40 | "banded gecko", 41 | "common iguana", 42 | "American chameleon", 43 | "whiptail", 44 | "agama", 45 | "frilled lizard", 46 | "alligator lizard", 47 | "Gila monster", 48 | "green lizard", 49 | "African chameleon", 50 | "Komodo dragon", 51 | "African crocodile", 52 | "American alligator", 53 | "triceratops", 54 | "thunder snake", 55 | "ringneck snake", 56 | "hognose snake", 57 | "green snake", 58 | "king snake", 59 | "garter snake", 60 | "water snake", 61 | "vine snake", 62 | "night snake", 63 | "boa constrictor", 64 | "rock python", 65 | "Indian cobra", 66 | "green mamba", 67 | "sea snake", 68 | "horned viper", 69 | "diamondback", 70 | "sidewinder", 71 | "trilobite", 72 | "harvestman", 73 | "scorpion", 74 | "black and gold garden spider", 75 | "barn spider", 76 | "garden spider", 77 | "black widow", 78 | "tarantula", 79 | "wolf spider", 80 | "tick", 81 | "centipede", 82 | "black grouse", 83 | "ptarmigan", 84 | "ruffed grouse", 85 | "prairie chicken", 86 | "peacock", 87 | "quail", 88 | "partridge", 89 | "African grey", 90 | "macaw", 91 | "sulphur-crested cockatoo", 92 | "lorikeet", 93 | "coucal", 94 | "bee eater", 95 | "hornbill", 96 | "hummingbird", 97 | "jacamar", 98 | "toucan", 99 | "drake", 100 | "red-breasted merganser", 101 | "goose", 102 | "black swan", 103 | "tusker", 104 | "echidna", 105 | "platypus", 106 | "wallaby", 107 | "koala", 108 | "wombat", 109 | "jellyfish", 110 | "sea anemone", 111 | "brain coral", 112 | "flatworm", 113 | "nematode", 114 | "conch", 115 | "snail", 116 | "slug", 117 | "sea slug", 118 | "chiton", 119 | "chambered nautilus", 120 | "Dungeness crab", 121 | "rock crab", 122 | "fiddler crab", 123 | "king crab", 124 | "American lobster", 125 | "spiny lobster", 126 | "crayfish", 127 | "hermit crab", 128 | "isopod", 129 | "white stork", 130 | "black stork", 131 | "spoonbill", 132 | "flamingo", 133 | "little blue heron", 134 | "American egret", 135 | "bittern", 136 | "crane bird", 137 | "limpkin", 138 | "European gallinule", 139 | "American coot", 140 | "bustard", 141 | "ruddy turnstone", 142 | "red-backed sandpiper", 143 | "redshank", 144 | "dowitcher", 145 | "oystercatcher", 146 | "pelican", 147 | "king penguin", 148 | "albatross", 149 | "grey whale", 150 | "killer whale", 151 | "dugong", 152 | "sea lion", 153 | "Chihuahua", 154 | "Japanese spaniel", 155 | "Maltese dog", 156 | "Pekinese", 157 | "Shih-Tzu", 158 | "Blenheim spaniel", 159 | "papillon", 160 | "toy terrier", 161 | "Rhodesian ridgeback", 162 | "Afghan hound", 163 | "basset", 164 | "beagle", 165 | "bloodhound", 166 | "bluetick", 167 | "black-and-tan coonhound", 168 | "Walker hound", 169 | "English foxhound", 170 | "redbone", 171 | "borzoi", 172 | "Irish wolfhound", 173 | "Italian greyhound", 174 | "whippet", 175 | "Ibizan hound", 176 | "Norwegian elkhound", 177 | "otterhound", 178 | "Saluki", 179 | "Scottish deerhound", 180 | "Weimaraner", 181 | "Staffordshire bullterrier", 182 | "American Staffordshire terrier", 183 | "Bedlington terrier", 184 | "Border terrier", 185 | "Kerry blue terrier", 186 | "Irish terrier", 187 | "Norfolk terrier", 188 | "Norwich terrier", 189 | "Yorkshire terrier", 190 | "wire-haired fox terrier", 191 | "Lakeland terrier", 192 | "Sealyham terrier", 193 | "Airedale", 194 | "cairn", 195 | "Australian terrier", 196 | "Dandie Dinmont", 197 | "Boston bull", 198 | "miniature schnauzer", 199 | "giant schnauzer", 200 | "standard schnauzer", 201 | "Scotch terrier", 202 | "Tibetan terrier", 203 | "silky terrier", 204 | "soft-coated wheaten terrier", 205 | "West Highland white terrier", 206 | "Lhasa", 207 | "flat-coated retriever", 208 | "curly-coated retriever", 209 | "golden retriever", 210 | "Labrador retriever", 211 | "Chesapeake Bay retriever", 212 | "German short-haired pointer", 213 | "vizsla", 214 | "English setter", 215 | "Irish setter", 216 | "Gordon setter", 217 | "Brittany spaniel", 218 | "clumber", 219 | "English springer", 220 | "Welsh springer spaniel", 221 | "cocker spaniel", 222 | "Sussex spaniel", 223 | "Irish water spaniel", 224 | "kuvasz", 225 | "schipperke", 226 | "groenendael", 227 | "malinois", 228 | "briard", 229 | "kelpie", 230 | "komondor", 231 | "Old English sheepdog", 232 | "Shetland sheepdog", 233 | "collie", 234 | "Border collie", 235 | "Bouvier des Flandres", 236 | "Rottweiler", 237 | "German shepherd", 238 | "Doberman", 239 | "miniature pinscher", 240 | "Greater Swiss Mountain dog", 241 | "Bernese mountain dog", 242 | "Appenzeller", 243 | "EntleBucher", 244 | "boxer", 245 | "bull mastiff", 246 | "Tibetan mastiff", 247 | "French bulldog", 248 | "Great Dane", 249 | "Saint Bernard", 250 | "Eskimo dog", 251 | "malamute", 252 | "Siberian husky", 253 | "dalmatian", 254 | "affenpinscher", 255 | "basenji", 256 | "pug", 257 | "Leonberg", 258 | "Newfoundland", 259 | "Great Pyrenees", 260 | "Samoyed", 261 | "Pomeranian", 262 | "chow", 263 | "keeshond", 264 | "Brabancon griffon", 265 | "Pembroke", 266 | "Cardigan", 267 | "toy poodle", 268 | "miniature poodle", 269 | "standard poodle", 270 | "Mexican hairless", 271 | "timber wolf", 272 | "white wolf", 273 | "red wolf", 274 | "coyote", 275 | "dingo", 276 | "dhole", 277 | "African hunting dog", 278 | "hyena", 279 | "red fox", 280 | "kit fox", 281 | "Arctic fox", 282 | "grey fox", 283 | "tabby", 284 | "tiger cat", 285 | "Persian cat", 286 | "Siamese cat", 287 | "Egyptian cat", 288 | "cougar", 289 | "lynx", 290 | "leopard", 291 | "snow leopard", 292 | "jaguar", 293 | "lion", 294 | "tiger", 295 | "cheetah", 296 | "brown bear", 297 | "American black bear", 298 | "ice bear", 299 | "sloth bear", 300 | "mongoose", 301 | "meerkat", 302 | "tiger beetle", 303 | "ladybug", 304 | "ground beetle", 305 | "long-horned beetle", 306 | "leaf beetle", 307 | "dung beetle", 308 | "rhinoceros beetle", 309 | "weevil", 310 | "fly", 311 | "bee", 312 | "ant", 313 | "grasshopper", 314 | "cricket", 315 | "walking stick", 316 | "cockroach", 317 | "mantis", 318 | "cicada", 319 | "leafhopper", 320 | "lacewing", 321 | "dragonfly", 322 | "damselfly", 323 | "admiral", 324 | "ringlet", 325 | "monarch", 326 | "cabbage butterfly", 327 | "sulphur butterfly", 328 | "lycaenid", 329 | "starfish", 330 | "sea urchin", 331 | "sea cucumber", 332 | "wood rabbit", 333 | "hare", 334 | "Angora", 335 | "hamster", 336 | "porcupine", 337 | "fox squirrel", 338 | "marmot", 339 | "beaver", 340 | "guinea pig", 341 | "sorrel", 342 | "zebra", 343 | "hog", 344 | "wild boar", 345 | "warthog", 346 | "hippopotamus", 347 | "ox", 348 | "water buffalo", 349 | "bison", 350 | "ram", 351 | "bighorn", 352 | "ibex", 353 | "hartebeest", 354 | "impala", 355 | "gazelle", 356 | "Arabian camel", 357 | "llama", 358 | "weasel", 359 | "mink", 360 | "polecat", 361 | "black-footed ferret", 362 | "otter", 363 | "skunk", 364 | "badger", 365 | "armadillo", 366 | "three-toed sloth", 367 | "orangutan", 368 | "gorilla", 369 | "chimpanzee", 370 | "gibbon", 371 | "siamang", 372 | "guenon", 373 | "patas", 374 | "baboon", 375 | "macaque", 376 | "langur", 377 | "colobus", 378 | "proboscis monkey", 379 | "marmoset", 380 | "capuchin", 381 | "howler monkey", 382 | "titi", 383 | "spider monkey", 384 | "squirrel monkey", 385 | "Madagascar cat", 386 | "indri", 387 | "Indian elephant", 388 | "African elephant", 389 | "lesser panda", 390 | "giant panda", 391 | "barracouta", 392 | "eel", 393 | "coho", 394 | "rock beauty", 395 | "anemone fish", 396 | "sturgeon", 397 | "gar", 398 | "lionfish", 399 | "puffer", 400 | "abacus", 401 | "abaya", 402 | "academic gown", 403 | "accordion", 404 | "acoustic guitar", 405 | "aircraft carrier", 406 | "airliner", 407 | "airship", 408 | "altar", 409 | "ambulance", 410 | "amphibian", 411 | "analog clock", 412 | "apiary", 413 | "apron", 414 | "ashcan", 415 | "assault rifle", 416 | "backpack", 417 | "bakery", 418 | "balance beam", 419 | "balloon", 420 | "ballpoint", 421 | "Band Aid", 422 | "banjo", 423 | "bannister", 424 | "barbell", 425 | "barber chair", 426 | "barbershop", 427 | "barn", 428 | "barometer", 429 | "barrel", 430 | "barrow", 431 | "baseball", 432 | "basketball", 433 | "bassinet", 434 | "bassoon", 435 | "bathing cap", 436 | "bath towel", 437 | "bathtub", 438 | "beach wagon", 439 | "beacon", 440 | "beaker", 441 | "bearskin", 442 | "beer bottle", 443 | "beer glass", 444 | "bell cote", 445 | "bib", 446 | "bicycle-built-for-two", 447 | "bikini", 448 | "binder", 449 | "binoculars", 450 | "birdhouse", 451 | "boathouse", 452 | "bobsled", 453 | "bolo tie", 454 | "bonnet", 455 | "bookcase", 456 | "bookshop", 457 | "bottlecap", 458 | "bow", 459 | "bow tie", 460 | "brass", 461 | "brassiere", 462 | "breakwater", 463 | "breastplate", 464 | "broom", 465 | "bucket", 466 | "buckle", 467 | "bulletproof vest", 468 | "bullet train", 469 | "butcher shop", 470 | "cab", 471 | "caldron", 472 | "candle", 473 | "cannon", 474 | "canoe", 475 | "can opener", 476 | "cardigan", 477 | "car mirror", 478 | "carousel", 479 | "carpenter's kit", 480 | "carton", 481 | "car wheel", 482 | "cash machine", 483 | "cassette", 484 | "cassette player", 485 | "castle", 486 | "catamaran", 487 | "CD player", 488 | "cello", 489 | "cellular telephone", 490 | "chain", 491 | "chainlink fence", 492 | "chain mail", 493 | "chain saw", 494 | "chest", 495 | "chiffonier", 496 | "chime", 497 | "china cabinet", 498 | "Christmas stocking", 499 | "church", 500 | "cinema", 501 | "cleaver", 502 | "cliff dwelling", 503 | "cloak", 504 | "clog", 505 | "cocktail shaker", 506 | "coffee mug", 507 | "coffeepot", 508 | "coil", 509 | "combination lock", 510 | "computer keyboard", 511 | "confectionery", 512 | "container ship", 513 | "convertible", 514 | "corkscrew", 515 | "cornet", 516 | "cowboy boot", 517 | "cowboy hat", 518 | "cradle", 519 | "crane", 520 | "crash helmet", 521 | "crate", 522 | "crib", 523 | "Crock Pot", 524 | "croquet ball", 525 | "crutch", 526 | "cuirass", 527 | "dam", 528 | "desk", 529 | "desktop computer", 530 | "dial telephone", 531 | "diaper", 532 | "digital clock", 533 | "digital watch", 534 | "dining table", 535 | "dishrag", 536 | "dishwasher", 537 | "disk brake", 538 | "dock", 539 | "dogsled", 540 | "dome", 541 | "doormat", 542 | "drilling platform", 543 | "drum", 544 | "drumstick", 545 | "dumbbell", 546 | "Dutch oven", 547 | "electric fan", 548 | "electric guitar", 549 | "electric locomotive", 550 | "entertainment center", 551 | "envelope", 552 | "espresso maker", 553 | "face powder", 554 | "feather boa", 555 | "file", 556 | "fireboat", 557 | "fire engine", 558 | "fire screen", 559 | "flagpole", 560 | "flute", 561 | "folding chair", 562 | "football helmet", 563 | "forklift", 564 | "fountain", 565 | "fountain pen", 566 | "four-poster", 567 | "freight car", 568 | "French horn", 569 | "frying pan", 570 | "fur coat", 571 | "garbage truck", 572 | "gasmask", 573 | "gas pump", 574 | "goblet", 575 | "go-kart", 576 | "golf ball", 577 | "golfcart", 578 | "gondola", 579 | "gong", 580 | "gown", 581 | "grand piano", 582 | "greenhouse", 583 | "grille", 584 | "grocery store", 585 | "guillotine", 586 | "hair slide", 587 | "hair spray", 588 | "half track", 589 | "hammer", 590 | "hamper", 591 | "hand blower", 592 | "hand-held computer", 593 | "handkerchief", 594 | "hard disc", 595 | "harmonica", 596 | "harp", 597 | "harvester", 598 | "hatchet", 599 | "holster", 600 | "home theater", 601 | "honeycomb", 602 | "hook", 603 | "hoopskirt", 604 | "horizontal bar", 605 | "horse cart", 606 | "hourglass", 607 | "iPod", 608 | "iron", 609 | "jack-o'-lantern", 610 | "jean", 611 | "jeep", 612 | "jersey", 613 | "jigsaw puzzle", 614 | "jinrikisha", 615 | "joystick", 616 | "kimono", 617 | "knee pad", 618 | "knot", 619 | "lab coat", 620 | "ladle", 621 | "lampshade", 622 | "laptop", 623 | "lawn mower", 624 | "lens cap", 625 | "letter opener", 626 | "library", 627 | "lifeboat", 628 | "lighter", 629 | "limousine", 630 | "liner", 631 | "lipstick", 632 | "Loafer", 633 | "lotion", 634 | "loudspeaker", 635 | "loupe", 636 | "lumbermill", 637 | "magnetic compass", 638 | "mailbag", 639 | "mailbox", 640 | "maillot", 641 | "maillot tank suit", 642 | "manhole cover", 643 | "maraca", 644 | "marimba", 645 | "mask", 646 | "matchstick", 647 | "maypole", 648 | "maze", 649 | "measuring cup", 650 | "medicine chest", 651 | "megalith", 652 | "microphone", 653 | "microwave", 654 | "military uniform", 655 | "milk can", 656 | "minibus", 657 | "miniskirt", 658 | "minivan", 659 | "missile", 660 | "mitten", 661 | "mixing bowl", 662 | "mobile home", 663 | "Model T", 664 | "modem", 665 | "monastery", 666 | "monitor", 667 | "moped", 668 | "mortar", 669 | "mortarboard", 670 | "mosque", 671 | "mosquito net", 672 | "motor scooter", 673 | "mountain bike", 674 | "mountain tent", 675 | "mouse", 676 | "mousetrap", 677 | "moving van", 678 | "muzzle", 679 | "nail", 680 | "neck brace", 681 | "necklace", 682 | "nipple", 683 | "notebook", 684 | "obelisk", 685 | "oboe", 686 | "ocarina", 687 | "odometer", 688 | "oil filter", 689 | "organ", 690 | "oscilloscope", 691 | "overskirt", 692 | "oxcart", 693 | "oxygen mask", 694 | "packet", 695 | "paddle", 696 | "paddlewheel", 697 | "padlock", 698 | "paintbrush", 699 | "pajama", 700 | "palace", 701 | "panpipe", 702 | "paper towel", 703 | "parachute", 704 | "parallel bars", 705 | "park bench", 706 | "parking meter", 707 | "passenger car", 708 | "patio", 709 | "pay-phone", 710 | "pedestal", 711 | "pencil box", 712 | "pencil sharpener", 713 | "perfume", 714 | "Petri dish", 715 | "photocopier", 716 | "pick", 717 | "pickelhaube", 718 | "picket fence", 719 | "pickup", 720 | "pier", 721 | "piggy bank", 722 | "pill bottle", 723 | "pillow", 724 | "ping-pong ball", 725 | "pinwheel", 726 | "pirate", 727 | "pitcher", 728 | "plane", 729 | "planetarium", 730 | "plastic bag", 731 | "plate rack", 732 | "plow", 733 | "plunger", 734 | "Polaroid camera", 735 | "pole", 736 | "police van", 737 | "poncho", 738 | "pool table", 739 | "pop bottle", 740 | "pot", 741 | "potter's wheel", 742 | "power drill", 743 | "prayer rug", 744 | "printer", 745 | "prison", 746 | "projectile", 747 | "projector", 748 | "puck", 749 | "punching bag", 750 | "purse", 751 | "quill", 752 | "quilt", 753 | "racer", 754 | "racket", 755 | "radiator", 756 | "radio", 757 | "radio telescope", 758 | "rain barrel", 759 | "recreational vehicle", 760 | "reel", 761 | "reflex camera", 762 | "refrigerator", 763 | "remote control", 764 | "restaurant", 765 | "revolver", 766 | "rifle", 767 | "rocking chair", 768 | "rotisserie", 769 | "rubber eraser", 770 | "rugby ball", 771 | "rule", 772 | "running shoe", 773 | "safe", 774 | "safety pin", 775 | "saltshaker", 776 | "sandal", 777 | "sarong", 778 | "sax", 779 | "scabbard", 780 | "scale", 781 | "school bus", 782 | "schooner", 783 | "scoreboard", 784 | "screen", 785 | "screw", 786 | "screwdriver", 787 | "seat belt", 788 | "sewing machine", 789 | "shield", 790 | "shoe shop", 791 | "shoji", 792 | "shopping basket", 793 | "shopping cart", 794 | "shovel", 795 | "shower cap", 796 | "shower curtain", 797 | "ski", 798 | "ski mask", 799 | "sleeping bag", 800 | "slide rule", 801 | "sliding door", 802 | "slot", 803 | "snorkel", 804 | "snowmobile", 805 | "snowplow", 806 | "soap dispenser", 807 | "soccer ball", 808 | "sock", 809 | "solar dish", 810 | "sombrero", 811 | "soup bowl", 812 | "space bar", 813 | "space heater", 814 | "space shuttle", 815 | "spatula", 816 | "speedboat", 817 | "spider web", 818 | "spindle", 819 | "sports car", 820 | "spotlight", 821 | "stage", 822 | "steam locomotive", 823 | "steel arch bridge", 824 | "steel drum", 825 | "stethoscope", 826 | "stole", 827 | "stone wall", 828 | "stopwatch", 829 | "stove", 830 | "strainer", 831 | "streetcar", 832 | "stretcher", 833 | "studio couch", 834 | "stupa", 835 | "submarine", 836 | "suit", 837 | "sundial", 838 | "sunglass", 839 | "sunglasses", 840 | "sunscreen", 841 | "suspension bridge", 842 | "swab", 843 | "sweatshirt", 844 | "swimming trunks", 845 | "swing", 846 | "switch", 847 | "syringe", 848 | "table lamp", 849 | "tank", 850 | "tape player", 851 | "teapot", 852 | "teddy", 853 | "television", 854 | "tennis ball", 855 | "thatch", 856 | "theater curtain", 857 | "thimble", 858 | "thresher", 859 | "throne", 860 | "tile roof", 861 | "toaster", 862 | "tobacco shop", 863 | "toilet seat", 864 | "torch", 865 | "totem pole", 866 | "tow truck", 867 | "toyshop", 868 | "tractor", 869 | "trailer truck", 870 | "tray", 871 | "trench coat", 872 | "tricycle", 873 | "trimaran", 874 | "tripod", 875 | "triumphal arch", 876 | "trolleybus", 877 | "trombone", 878 | "tub", 879 | "turnstile", 880 | "typewriter keyboard", 881 | "umbrella", 882 | "unicycle", 883 | "upright", 884 | "vacuum", 885 | "vase", 886 | "vault", 887 | "velvet", 888 | "vending machine", 889 | "vestment", 890 | "viaduct", 891 | "violin", 892 | "volleyball", 893 | "waffle iron", 894 | "wall clock", 895 | "wallet", 896 | "wardrobe", 897 | "warplane", 898 | "washbasin", 899 | "washer", 900 | "water bottle", 901 | "water jug", 902 | "water tower", 903 | "whiskey jug", 904 | "whistle", 905 | "wig", 906 | "window screen", 907 | "window shade", 908 | "Windsor tie", 909 | "wine bottle", 910 | "wing", 911 | "wok", 912 | "wooden spoon", 913 | "wool", 914 | "worm fence", 915 | "wreck", 916 | "yawl", 917 | "yurt", 918 | "web site", 919 | "comic book", 920 | "crossword puzzle", 921 | "street sign", 922 | "traffic light", 923 | "book jacket", 924 | "menu", 925 | "plate", 926 | "guacamole", 927 | "consomme", 928 | "hot pot", 929 | "trifle", 930 | "ice cream", 931 | "ice lolly", 932 | "French loaf", 933 | "bagel", 934 | "pretzel", 935 | "cheeseburger", 936 | "hotdog", 937 | "mashed potato", 938 | "head cabbage", 939 | "broccoli", 940 | "cauliflower", 941 | "zucchini", 942 | "spaghetti squash", 943 | "acorn squash", 944 | "butternut squash", 945 | "cucumber", 946 | "artichoke", 947 | "bell pepper", 948 | "cardoon", 949 | "mushroom", 950 | "Granny Smith", 951 | "strawberry", 952 | "orange", 953 | "lemon", 954 | "fig", 955 | "pineapple", 956 | "banana", 957 | "jackfruit", 958 | "custard apple", 959 | "pomegranate", 960 | "hay", 961 | "carbonara", 962 | "chocolate sauce", 963 | "dough", 964 | "meat loaf", 965 | "pizza", 966 | "potpie", 967 | "burrito", 968 | "red wine", 969 | "espresso", 970 | "cup", 971 | "eggnog", 972 | "alp", 973 | "bubble", 974 | "cliff", 975 | "coral reef", 976 | "geyser", 977 | "lakeside", 978 | "promontory", 979 | "sandbar", 980 | "seashore", 981 | "valley", 982 | "volcano", 983 | "ballplayer", 984 | "groom", 985 | "scuba diver", 986 | "rapeseed", 987 | "daisy", 988 | "yellow lady's slipper", 989 | "corn", 990 | "acorn", 991 | "hip", 992 | "buckeye", 993 | "coral fungus", 994 | "agaric", 995 | "gyromitra", 996 | "stinkhorn", 997 | "earthstar", 998 | "hen-of-the-woods", 999 | "bolete", 1000 | "ear", 1001 | "toilet tissue" 1002 | ] 1003 | --------------------------------------------------------------------------------